diff options
Diffstat (limited to 'drivers/net/ethernet/intel/i40e/i40e_txrx.c')
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_txrx.c | 688 |
1 files changed, 451 insertions, 237 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 97d46058d71d..29321a6167a6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -27,6 +27,7 @@ #include <linux/prefetch.h> #include <net/busy_poll.h> #include "i40e.h" +#include "i40e_trace.h" #include "i40e_prototype.h" static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, @@ -71,6 +72,9 @@ static void i40e_fdir(struct i40e_ring *tx_ring, flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK & (fdata->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT); + flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK & + (fdata->flex_offset << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT); + /* Use LAN VSI Id if not programmed by user */ flex_ptype |= I40E_TXD_FLTR_QW0_DEST_VSI_MASK & ((u32)(fdata->dest_vsi ? : pf->vsi[pf->lan_vsi]->id) << @@ -203,7 +207,6 @@ static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi, struct i40e_pf *pf = vsi->back; struct udphdr *udp; struct iphdr *ip; - bool err = false; u8 *raw_packet; int ret; static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, @@ -219,18 +222,28 @@ static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi, udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET + sizeof(struct iphdr)); - ip->daddr = fd_data->dst_ip[0]; + ip->daddr = fd_data->dst_ip; udp->dest = fd_data->dst_port; - ip->saddr = fd_data->src_ip[0]; + ip->saddr = fd_data->src_ip; udp->source = fd_data->src_port; + if (fd_data->flex_filter) { + u8 *payload = raw_packet + I40E_UDPIP_DUMMY_PACKET_LEN; + __be16 pattern = fd_data->flex_word; + u16 off = fd_data->flex_offset; + + *((__force __be16 *)(payload + off)) = pattern; + } + fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); if (ret) { dev_info(&pf->pdev->dev, "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n", fd_data->pctype, fd_data->fd_id, ret); - err = true; + /* Free the packet buffer since it wasn't added to the ring */ + kfree(raw_packet); + return -EOPNOTSUPP; } else if (I40E_DEBUG_FD & pf->hw.debug_mask) { if (add) dev_info(&pf->pdev->dev, @@ -241,10 +254,13 @@ static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi, "Filter deleted for PCTYPE %d loc = %d\n", fd_data->pctype, fd_data->fd_id); } - if (err) - kfree(raw_packet); - return err ? -EOPNOTSUPP : 0; + if (add) + pf->fd_udp4_filter_cnt++; + else + pf->fd_udp4_filter_cnt--; + + return 0; } #define I40E_TCPIP_DUMMY_PACKET_LEN 54 @@ -263,7 +279,6 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, struct i40e_pf *pf = vsi->back; struct tcphdr *tcp; struct iphdr *ip; - bool err = false; u8 *raw_packet; int ret; /* Dummy packet */ @@ -281,39 +296,110 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET + sizeof(struct iphdr)); - ip->daddr = fd_data->dst_ip[0]; + ip->daddr = fd_data->dst_ip; tcp->dest = fd_data->dst_port; - ip->saddr = fd_data->src_ip[0]; + ip->saddr = fd_data->src_ip; tcp->source = fd_data->src_port; + if (fd_data->flex_filter) { + u8 *payload = raw_packet + I40E_TCPIP_DUMMY_PACKET_LEN; + __be16 pattern = fd_data->flex_word; + u16 off = fd_data->flex_offset; + + *((__force __be16 *)(payload + off)) = pattern; + } + + fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP; + ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); + if (ret) { + dev_info(&pf->pdev->dev, + "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n", + fd_data->pctype, fd_data->fd_id, ret); + /* Free the packet buffer since it wasn't added to the ring */ + kfree(raw_packet); + return -EOPNOTSUPP; + } else if (I40E_DEBUG_FD & pf->hw.debug_mask) { + if (add) + dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n", + fd_data->pctype, fd_data->fd_id); + else + dev_info(&pf->pdev->dev, + "Filter deleted for PCTYPE %d loc = %d\n", + fd_data->pctype, fd_data->fd_id); + } + if (add) { - pf->fd_tcp_rule++; + pf->fd_tcp4_filter_cnt++; if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n"); - pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; + pf->flags |= I40E_FLAG_FD_ATR_AUTO_DISABLED; } else { - pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ? - (pf->fd_tcp_rule - 1) : 0; - if (pf->fd_tcp_rule == 0) { - if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && - I40E_DEBUG_FD & pf->hw.debug_mask) - dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n"); - pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED; - } + pf->fd_tcp4_filter_cnt--; } - fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP; - ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); + return 0; +} + +#define I40E_SCTPIP_DUMMY_PACKET_LEN 46 +/** + * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for + * a specific flow spec + * @vsi: pointer to the targeted VSI + * @fd_data: the flow director data required for the FDir descriptor + * @add: true adds a filter, false removes it + * + * Returns 0 if the filters were successfully added or removed + **/ +static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi, + struct i40e_fdir_filter *fd_data, + bool add) +{ + struct i40e_pf *pf = vsi->back; + struct sctphdr *sctp; + struct iphdr *ip; + u8 *raw_packet; + int ret; + /* Dummy packet */ + static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, + 0x45, 0, 0, 0x20, 0, 0, 0x40, 0, 0x40, 0x84, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); + if (!raw_packet) + return -ENOMEM; + memcpy(raw_packet, packet, I40E_SCTPIP_DUMMY_PACKET_LEN); + + ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); + sctp = (struct sctphdr *)(raw_packet + IP_HEADER_OFFSET + + sizeof(struct iphdr)); + ip->daddr = fd_data->dst_ip; + sctp->dest = fd_data->dst_port; + ip->saddr = fd_data->src_ip; + sctp->source = fd_data->src_port; + + if (fd_data->flex_filter) { + u8 *payload = raw_packet + I40E_SCTPIP_DUMMY_PACKET_LEN; + __be16 pattern = fd_data->flex_word; + u16 off = fd_data->flex_offset; + + *((__force __be16 *)(payload + off)) = pattern; + } + + fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_SCTP; + ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); if (ret) { dev_info(&pf->pdev->dev, "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n", fd_data->pctype, fd_data->fd_id, ret); - err = true; + /* Free the packet buffer since it wasn't added to the ring */ + kfree(raw_packet); + return -EOPNOTSUPP; } else if (I40E_DEBUG_FD & pf->hw.debug_mask) { if (add) - dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n", + dev_info(&pf->pdev->dev, + "Filter OK for PCTYPE %d loc = %d\n", fd_data->pctype, fd_data->fd_id); else dev_info(&pf->pdev->dev, @@ -321,10 +407,12 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, fd_data->pctype, fd_data->fd_id); } - if (err) - kfree(raw_packet); + if (add) + pf->fd_sctp4_filter_cnt++; + else + pf->fd_sctp4_filter_cnt--; - return err ? -EOPNOTSUPP : 0; + return 0; } #define I40E_IP_DUMMY_PACKET_LEN 34 @@ -343,7 +431,6 @@ static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi, { struct i40e_pf *pf = vsi->back; struct iphdr *ip; - bool err = false; u8 *raw_packet; int ret; int i; @@ -359,18 +446,29 @@ static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi, memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN); ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); - ip->saddr = fd_data->src_ip[0]; - ip->daddr = fd_data->dst_ip[0]; + ip->saddr = fd_data->src_ip; + ip->daddr = fd_data->dst_ip; ip->protocol = 0; + if (fd_data->flex_filter) { + u8 *payload = raw_packet + I40E_IP_DUMMY_PACKET_LEN; + __be16 pattern = fd_data->flex_word; + u16 off = fd_data->flex_offset; + + *((__force __be16 *)(payload + off)) = pattern; + } + fd_data->pctype = i; ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); - if (ret) { dev_info(&pf->pdev->dev, "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n", fd_data->pctype, fd_data->fd_id, ret); - err = true; + /* The packet buffer wasn't added to the ring so we + * need to free it now. + */ + kfree(raw_packet); + return -EOPNOTSUPP; } else if (I40E_DEBUG_FD & pf->hw.debug_mask) { if (add) dev_info(&pf->pdev->dev, @@ -383,10 +481,12 @@ static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi, } } - if (err) - kfree(raw_packet); + if (add) + pf->fd_ip4_filter_cnt++; + else + pf->fd_ip4_filter_cnt--; - return err ? -EOPNOTSUPP : 0; + return 0; } /** @@ -409,6 +509,9 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi, case UDP_V4_FLOW: ret = i40e_add_del_fdir_udpv4(vsi, input, add); break; + case SCTP_V4_FLOW: + ret = i40e_add_del_fdir_sctpv4(vsi, input, add); + break; case IP_USER_FLOW: switch (input->ip4_proto) { case IPPROTO_TCP: @@ -417,19 +520,23 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi, case IPPROTO_UDP: ret = i40e_add_del_fdir_udpv4(vsi, input, add); break; + case IPPROTO_SCTP: + ret = i40e_add_del_fdir_sctpv4(vsi, input, add); + break; case IPPROTO_IP: ret = i40e_add_del_fdir_ipv4(vsi, input, add); break; default: /* We cannot support masking based on protocol */ - goto unsupported_flow; + dev_info(&pf->pdev->dev, "Unsupported IPv4 protocol 0x%02x\n", + input->ip4_proto); + return -EINVAL; } break; default: -unsupported_flow: - dev_info(&pf->pdev->dev, "Could not specify spec type %d\n", + dev_info(&pf->pdev->dev, "Unsupported flow type 0x%02x\n", input->flow_type); - ret = -EINVAL; + return -EINVAL; } /* The buffer allocated here will be normally be freed by @@ -476,7 +583,7 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, * progress do nothing, once flush is complete the state will * be cleared. */ - if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state)) + if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state)) return; pf->fd_add_err++; @@ -484,9 +591,9 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf); if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) && - (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) { - pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; - set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state); + pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED) { + pf->flags |= I40E_FLAG_FD_ATR_AUTO_DISABLED; + set_bit(__I40E_FD_FLUSH_REQUESTED, pf->state); } /* filter programming failed most likely due to table full */ @@ -498,12 +605,10 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, */ if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) { if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) && - !(pf->auto_disable_flags & - I40E_FLAG_FD_SB_ENABLED)) { + !(pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED)) { + pf->flags |= I40E_FLAG_FD_SB_AUTO_DISABLED; if (I40E_DEBUG_FD & pf->hw.debug_mask) dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n"); - pf->auto_disable_flags |= - I40E_FLAG_FD_SB_ENABLED; } } } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) { @@ -599,19 +704,15 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) /** * i40e_get_tx_pending - how many tx descriptors not processed * @tx_ring: the ring of descriptors - * @in_sw: is tx_pending being checked in SW or HW * * Since there is no access to the ring head register * in XL710, we need to use our local copies **/ -u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw) +u32 i40e_get_tx_pending(struct i40e_ring *ring) { u32 head, tail; - if (!in_sw) - head = i40e_get_head(ring); - else - head = ring->next_to_clean; + head = i40e_get_head(ring); tail = readl(ring->tail); if (head != tail) @@ -657,6 +758,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, /* prevent any other reads prior to eop_desc */ read_barrier_depends(); + i40e_trace(clean_tx_irq, tx_ring, tx_desc, tx_buf); /* we have caught up to head, no work left to do */ if (tx_head == tx_desc) break; @@ -683,6 +785,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, /* unmap remaining buffers */ while (tx_desc != eop_desc) { + i40e_trace(clean_tx_irq_unmap, + tx_ring, tx_desc, tx_buf); tx_buf++; tx_desc++; @@ -734,11 +838,11 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, * them to be written back in case we stay in NAPI. * In this mode on X722 we do not enable Interrupt. */ - unsigned int j = i40e_get_tx_pending(tx_ring, false); + unsigned int j = i40e_get_tx_pending(tx_ring); if (budget && ((j / WB_STRIDE) == 0) && (j > 0) && - !test_bit(__I40E_DOWN, &vsi->state) && + !test_bit(__I40E_VSI_DOWN, vsi->state) && (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) tx_ring->arm_wb = true; } @@ -756,7 +860,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, smp_mb(); if (__netif_subqueue_stopped(tx_ring->netdev, tx_ring->queue_index) && - !test_bit(__I40E_DOWN, &vsi->state)) { + !test_bit(__I40E_VSI_DOWN, vsi->state)) { netif_wake_subqueue(tx_ring->netdev, tx_ring->queue_index); ++tx_ring->tx_stats.restart_queue; @@ -930,9 +1034,29 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) } /** + * i40e_rx_is_programming_status - check for programming status descriptor + * @qw: qword representing status_error_len in CPU ordering + * + * The value of in the descriptor length field indicate if this + * is a programming status descriptor for flow director or FCoE + * by the value of I40E_RX_PROG_STATUS_DESC_LENGTH, otherwise + * it is a packet descriptor. + **/ +static inline bool i40e_rx_is_programming_status(u64 qw) +{ + /* The Rx filter programming status and SPH bit occupy the same + * spot in the descriptor. Since we don't support packet split we + * can just reuse the bit as an indication that this is a + * programming status descriptor. + */ + return qw & I40E_RXD_QW1_LENGTH_SPH_MASK; +} + +/** * i40e_clean_programming_status - clean the programming status descriptor * @rx_ring: the rx ring that has this descriptor * @rx_desc: the rx descriptor written back by HW + * @qw: qword representing status_error_len in CPU ordering * * Flow director should handle FD_FILTER_STATUS to check its filter programming * status being successful or not and take actions accordingly. FCoE should @@ -940,22 +1064,23 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) * **/ static void i40e_clean_programming_status(struct i40e_ring *rx_ring, - union i40e_rx_desc *rx_desc) + union i40e_rx_desc *rx_desc, + u64 qw) { - u64 qw; + u32 ntc = rx_ring->next_to_clean + 1; u8 id; - qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len); + /* fetch, update, and store next to clean */ + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + + prefetch(I40E_RX_DESC(rx_ring, ntc)); + id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >> I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT; if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS) i40e_fd_handle_status(rx_ring, rx_desc, id); -#ifdef I40E_FCOE - else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) || - (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS)) - i40e_fcoe_handle_status(rx_ring, rx_desc, id); -#endif } /** @@ -1010,7 +1135,6 @@ err: **/ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) { - struct device *dev = rx_ring->dev; unsigned long bi_size; u16 i; @@ -1030,8 +1154,22 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) if (!rx_bi->page) continue; - dma_unmap_page(dev, rx_bi->dma, PAGE_SIZE, DMA_FROM_DEVICE); - __free_pages(rx_bi->page, 0); + /* Invalidate cache lines that may have been written to by + * device so that we avoid corrupting memory. + */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_bi->dma, + rx_bi->page_offset, + rx_ring->rx_buf_len, + DMA_FROM_DEVICE); + + /* free resources associated with mapping */ + dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma, + i40e_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + I40E_RX_DMA_ATTR); + + __page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias); rx_bi->page = NULL; rx_bi->page_offset = 0; @@ -1132,6 +1270,17 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) } /** + * i40e_rx_offset - Return expected offset into page to access data + * @rx_ring: Ring we are requesting offset of + * + * Returns the offset value for ring into the data buffer. + */ +static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring) +{ + return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0; +} + +/** * i40e_alloc_mapped_page - recycle or make a new page * @rx_ring: ring to use * @bi: rx_buffer struct to modify @@ -1152,27 +1301,33 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring, } /* alloc new page for storage */ - page = dev_alloc_page(); + page = dev_alloc_pages(i40e_rx_pg_order(rx_ring)); if (unlikely(!page)) { rx_ring->rx_stats.alloc_page_failed++; return false; } /* map page for use */ - dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); + dma = dma_map_page_attrs(rx_ring->dev, page, 0, + i40e_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + I40E_RX_DMA_ATTR); /* if mapping failed free memory back to system since * there isn't much point in holding memory we can't use */ if (dma_mapping_error(rx_ring->dev, dma)) { - __free_pages(page, 0); + __free_pages(page, i40e_rx_pg_order(rx_ring)); rx_ring->rx_stats.alloc_page_failed++; return false; } bi->dma = dma; bi->page = page; - bi->page_offset = 0; + bi->page_offset = i40e_rx_offset(rx_ring); + + /* initialize pagecnt_bias to 1 representing we fully own page */ + bi->pagecnt_bias = 1; return true; } @@ -1219,6 +1374,12 @@ bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) if (!i40e_alloc_mapped_page(rx_ring, bi)) goto no_buffers; + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, bi->dma, + bi->page_offset, + rx_ring->rx_buf_len, + DMA_FROM_DEVICE); + /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ @@ -1259,8 +1420,6 @@ no_buffers: * @vsi: the VSI we care about * @skb: skb currently being received and modified * @rx_desc: the receive descriptor - * - * skb->protocol must be set before this function is called **/ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, struct sk_buff *skb, @@ -1422,12 +1581,12 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring, i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype); - /* modifies the skb - consumes the enet header */ - skb->protocol = eth_type_trans(skb, rx_ring->netdev); - i40e_rx_checksum(rx_ring->vsi, skb, rx_desc); skb_record_rx_queue(skb, rx_ring->queue_index); + + /* modifies the skb - consumes the enet header */ + skb->protocol = eth_type_trans(skb, rx_ring->netdev); } /** @@ -1472,7 +1631,10 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; /* transfer page from old buffer to new buffer */ - *new_buff = *old_buff; + new_buff->dma = old_buff->dma; + new_buff->page = old_buff->page; + new_buff->page_offset = old_buff->page_offset; + new_buff->pagecnt_bias = old_buff->pagecnt_bias; } /** @@ -1493,8 +1655,6 @@ static inline bool i40e_page_is_reusable(struct page *page) * the adapter for another receive * * @rx_buffer: buffer containing the page - * @page: page address from rx_buffer - * @truesize: actual size of the buffer in this page * * If page is reusable, rx_buffer->page_offset is adjusted to point to * an unused region in the page. @@ -1517,13 +1677,10 @@ static inline bool i40e_page_is_reusable(struct page *page) * * In either case, if the page is reusable its refcount is increased. **/ -static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, - struct page *page, - const unsigned int truesize) +static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer) { -#if (PAGE_SIZE >= 8192) - unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; -#endif + unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; + struct page *page = rx_buffer->page; /* Is any reuse possible? */ if (unlikely(!i40e_page_is_reusable(page))) @@ -1531,21 +1688,23 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) + if (unlikely((page_count(page) - pagecnt_bias) > 1)) return false; - - /* flip page offset to other buffer */ - rx_buffer->page_offset ^= truesize; #else - /* move offset up to the next cache line */ - rx_buffer->page_offset += truesize; - - if (rx_buffer->page_offset > last_offset) +#define I40E_LAST_OFFSET \ + (SKB_WITH_OVERHEAD(PAGE_SIZE) - I40E_RXBUFFER_2048) + if (rx_buffer->page_offset > I40E_LAST_OFFSET) return false; #endif - /* Inc ref count on page before passing it up to the stack */ - get_page(page); + /* If we have drained the page fragment pool we need to update + * the pagecnt_bias and page count so that we fully restock the + * number of references the driver holds. + */ + if (unlikely(!pagecnt_bias)) { + page_ref_add(page, USHRT_MAX); + rx_buffer->pagecnt_bias = USHRT_MAX; + } return true; } @@ -1554,145 +1713,201 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: buffer containing page to add - * @size: packet length from rx_desc * @skb: sk_buff to place the data into + * @size: packet length from rx_desc * * This function will add the data contained in rx_buffer->page to the skb. - * This is done either through a direct copy if the data in the buffer is - * less than the skb header size, otherwise it will just attach the page as - * a frag to the skb. + * It will just attach the page as a frag to the skb. * - * The function will then update the page offset if necessary and return - * true if the buffer can be reused by the adapter. + * The function will then update the page offset. **/ -static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, +static void i40e_add_rx_frag(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer, - unsigned int size, - struct sk_buff *skb) + struct sk_buff *skb, + unsigned int size) { - struct page *page = rx_buffer->page; - unsigned char *va = page_address(page) + rx_buffer->page_offset; #if (PAGE_SIZE < 8192) - unsigned int truesize = I40E_RXBUFFER_2048; + unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; #else - unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); + unsigned int truesize = SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring)); #endif - unsigned int pull_len; - - if (unlikely(skb_is_nonlinear(skb))) - goto add_tail_frag; - - /* will the data fit in the skb we allocated? if so, just - * copy it as it is pretty small anyway - */ - if (size <= I40E_RX_HDR_SIZE) { - memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - /* page is reusable, we can reuse buffer as-is */ - if (likely(i40e_page_is_reusable(page))) - return true; + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, + rx_buffer->page_offset, size, truesize); - /* this page cannot be reused so discard it */ - __free_pages(page, 0); - return false; - } + /* page is being used so we must update the page offset */ +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif +} - /* we need the header to contain the greater of either - * ETH_HLEN or 60 bytes if the skb->len is less than - * 60 for skb_pad. - */ - pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE); +/** + * i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use + * @rx_ring: rx descriptor ring to transact packets on + * @size: size of buffer to add to skb + * + * This function will pull an Rx buffer from the ring and synchronize it + * for use by the CPU. + */ +static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring, + const unsigned int size) +{ + struct i40e_rx_buffer *rx_buffer; - /* align pull length to size of long to optimize - * memcpy performance - */ - memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); + rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; + prefetchw(rx_buffer->page); - /* update all of the pointers */ - va += pull_len; - size -= pull_len; + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + size, + DMA_FROM_DEVICE); -add_tail_frag: - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - (unsigned long)va & ~PAGE_MASK, size, truesize); + /* We have pulled a buffer for use, so decrement pagecnt_bias */ + rx_buffer->pagecnt_bias--; - return i40e_can_reuse_rx_page(rx_buffer, page, truesize); + return rx_buffer; } /** - * i40e_fetch_rx_buffer - Allocate skb and populate it + * i40e_construct_skb - Allocate skb and populate it * @rx_ring: rx descriptor ring to transact packets on - * @rx_desc: descriptor containing info written by hardware + * @rx_buffer: rx buffer to pull data from + * @size: size of buffer to add to skb * - * This function allocates an skb on the fly, and populates it with the page - * data from the current receive descriptor, taking care to set up the skb - * correctly, as well as handling calling the page recycle function if - * necessary. + * This function allocates an skb. It then populates it with the page + * data from the current receive descriptor, taking care to set up the + * skb correctly. */ -static inline -struct sk_buff *i40e_fetch_rx_buffer(struct i40e_ring *rx_ring, - union i40e_rx_desc *rx_desc, - struct sk_buff *skb) +static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *rx_buffer, + unsigned int size) { - u64 local_status_error_len = - le64_to_cpu(rx_desc->wb.qword1.status_error_len); - unsigned int size = - (local_status_error_len & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> - I40E_RXD_QW1_LENGTH_PBUF_SHIFT; - struct i40e_rx_buffer *rx_buffer; - struct page *page; + void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; +#if (PAGE_SIZE < 8192) + unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(size); +#endif + unsigned int headlen; + struct sk_buff *skb; - rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; - page = rx_buffer->page; - prefetchw(page); + /* prefetch first cache line of first page */ + prefetch(va); +#if L1_CACHE_BYTES < 128 + prefetch(va + L1_CACHE_BYTES); +#endif + + /* allocate a skb to store the frags */ + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, + I40E_RX_HDR_SIZE, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!skb)) + return NULL; + + /* Determine available headroom for copy */ + headlen = size; + if (headlen > I40E_RX_HDR_SIZE) + headlen = eth_get_headlen(va, I40E_RX_HDR_SIZE); + + /* align pull length to size of long to optimize memcpy performance */ + memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); + + /* update all of the pointers */ + size -= headlen; + if (size) { + skb_add_rx_frag(skb, 0, rx_buffer->page, + rx_buffer->page_offset + headlen, + size, truesize); - if (likely(!skb)) { - void *page_addr = page_address(page) + rx_buffer->page_offset; + /* buffer is used by skb, update page_offset */ +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif + } else { + /* buffer is unused, reset bias back to rx_buffer */ + rx_buffer->pagecnt_bias++; + } - /* prefetch first cache line of first page */ - prefetch(page_addr); + return skb; +} + +/** + * i40e_build_skb - Build skb around an existing buffer + * @rx_ring: Rx descriptor ring to transact packets on + * @rx_buffer: Rx buffer to pull data from + * @size: size of buffer to add to skb + * + * This function builds an skb around an existing Rx buffer, taking care + * to set up the skb correctly and avoid any memcpy overhead. + */ +static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *rx_buffer, + unsigned int size) +{ + void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; +#if (PAGE_SIZE < 8192) + unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(size); +#endif + struct sk_buff *skb; + + /* prefetch first cache line of first page */ + prefetch(va); #if L1_CACHE_BYTES < 128 - prefetch(page_addr + L1_CACHE_BYTES); + prefetch(va + L1_CACHE_BYTES); #endif + /* build an skb around the page buffer */ + skb = build_skb(va - I40E_SKB_PAD, truesize); + if (unlikely(!skb)) + return NULL; - /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - I40E_RX_HDR_SIZE, - GFP_ATOMIC | __GFP_NOWARN); - if (unlikely(!skb)) { - rx_ring->rx_stats.alloc_buff_failed++; - return NULL; - } + /* update pointers within the skb to store the data */ + skb_reserve(skb, I40E_SKB_PAD); + __skb_put(skb, size); - /* we will be copying header into skb->data in - * pskb_may_pull so it is in our interest to prefetch - * it now to avoid a possible cache miss - */ - prefetchw(skb->data); - } + /* buffer is used by skb, update page_offset */ +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - size, - DMA_FROM_DEVICE); + return skb; +} - /* pull page into skb */ - if (i40e_add_rx_frag(rx_ring, rx_buffer, size, skb)) { +/** + * i40e_put_rx_buffer - Clean up used buffer and either recycle or free + * @rx_ring: rx descriptor ring to transact packets on + * @rx_buffer: rx buffer to pull data from + * + * This function will clean up the contents of the rx_buffer. It will + * either recycle the bufer or unmap it and free the associated resources. + */ +static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *rx_buffer) +{ + if (i40e_can_reuse_rx_page(rx_buffer)) { /* hand second half of page back to the ring */ i40e_reuse_rx_page(rx_ring, rx_buffer); rx_ring->rx_stats.page_reuse_count++; } else { /* we are not reusing the buffer so unmap it */ - dma_unmap_page(rx_ring->dev, rx_buffer->dma, PAGE_SIZE, - DMA_FROM_DEVICE); + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, + i40e_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, I40E_RX_DMA_ATTR); + __page_frag_cache_drain(rx_buffer->page, + rx_buffer->pagecnt_bias); } /* clear contents of buffer_info */ rx_buffer->page = NULL; - - return skb; } /** @@ -1718,11 +1933,6 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, prefetch(I40E_RX_DESC(rx_ring, ntc)); -#define staterrlen rx_desc->wb.qword1.status_error_len - if (unlikely(i40e_rx_is_programming_status(le64_to_cpu(staterrlen)))) { - i40e_clean_programming_status(rx_ring, rx_desc); - return true; - } /* if we are the last buffer then there is nothing else to do */ #define I40E_RXD_EOF BIT(I40E_RX_DESC_STATUS_EOF_SHIFT) if (likely(i40e_test_staterr(rx_desc, I40E_RXD_EOF))) @@ -1753,7 +1963,9 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) bool failure = false; while (likely(total_rx_packets < budget)) { + struct i40e_rx_buffer *rx_buffer; union i40e_rx_desc *rx_desc; + unsigned int size; u16 vlan_tag; u8 rx_ptype; u64 qword; @@ -1770,22 +1982,44 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) /* status_error_len will always be zero for unused descriptors * because it's cleared in cleanup, and overlaps with hdr_addr * which is always zero because packet split isn't used, if the - * hardware wrote DD then it will be non-zero + * hardware wrote DD then the length will be non-zero */ - if (!i40e_test_staterr(rx_desc, - BIT(I40E_RX_DESC_STATUS_DD_SHIFT))) - break; + qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); /* This memory barrier is needed to keep us from reading - * any other fields out of the rx_desc until we know the - * DD bit is set. + * any other fields out of the rx_desc until we have + * verified the descriptor has been written back. */ dma_rmb(); - skb = i40e_fetch_rx_buffer(rx_ring, rx_desc, skb); - if (!skb) + if (unlikely(i40e_rx_is_programming_status(qword))) { + i40e_clean_programming_status(rx_ring, rx_desc, qword); + continue; + } + size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> + I40E_RXD_QW1_LENGTH_PBUF_SHIFT; + if (!size) break; + i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb); + rx_buffer = i40e_get_rx_buffer(rx_ring, size); + + /* retrieve a buffer from the ring */ + if (skb) + i40e_add_rx_frag(rx_ring, rx_buffer, skb, size); + else if (ring_uses_build_skb(rx_ring)) + skb = i40e_build_skb(rx_ring, rx_buffer, size); + else + skb = i40e_construct_skb(rx_ring, rx_buffer, size); + + /* exit if we failed to retrieve a buffer */ + if (!skb) { + rx_ring->rx_stats.alloc_buff_failed++; + rx_buffer->pagecnt_bias++; + break; + } + + i40e_put_rx_buffer(rx_ring, rx_buffer); cleaned_count++; if (i40e_is_non_eop(rx_ring, rx_desc, skb)) @@ -1798,6 +2032,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) */ if (unlikely(i40e_test_staterr(rx_desc, BIT(I40E_RXD_QW1_ERROR_SHIFT)))) { dev_kfree_skb_any(skb); + skb = NULL; continue; } @@ -1816,18 +2051,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) /* populate checksum, VLAN, and protocol */ i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); -#ifdef I40E_FCOE - if (unlikely( - i40e_rx_is_fcoe(rx_ptype) && - !i40e_fcoe_handle_offload(rx_ring, rx_desc, skb))) { - dev_kfree_skb_any(skb); - continue; - } -#endif - vlan_tag = (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; + i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb); i40e_receive_skb(rx_ring, skb, vlan_tag); skb = NULL; @@ -1944,7 +2171,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, } enable_int: - if (!test_bit(__I40E_DOWN, &vsi->state)) + if (!test_bit(__I40E_VSI_DOWN, vsi->state)) wr32(hw, INTREG(vector - 1), txval); if (q_vector->itr_countdown) @@ -1973,13 +2200,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) int budget_per_ring; int work_done = 0; - if (test_bit(__I40E_DOWN, &vsi->state)) { + if (test_bit(__I40E_VSI_DOWN, vsi->state)) { napi_complete(napi); return 0; } - /* Clear hung_detected bit */ - clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &q_vector->hung_detected); /* Since the actual Tx work is minimal, we can give the Tx a larger * budget and be more aggressive about cleaning up the Tx descriptors. */ @@ -2079,7 +2304,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED)) return; - if ((pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) + if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED) return; /* if sampling is disabled do nothing */ @@ -2113,10 +2338,9 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, th = (struct tcphdr *)(hdr.network + hlen); /* Due to lack of space, no more new filters can be programmed */ - if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) + if (th->syn && (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED)) return; - if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) && - (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) { + if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) { /* HW ATR eviction will take care of removing filters on FIN * and RST packets. */ @@ -2178,8 +2402,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & I40E_TXD_FLTR_QW1_CNTINDEX_MASK; - if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) && - (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) + if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK; fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); @@ -2200,15 +2423,9 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, * Returns error code indicate the frame should be dropped upon error and the * otherwise returns 0 to indicate the flags has been set properly. **/ -#ifdef I40E_FCOE -inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, - struct i40e_ring *tx_ring, - u32 *flags) -#else static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, struct i40e_ring *tx_ring, u32 *flags) -#endif { __be16 protocol = skb->protocol; u32 tx_flags = 0; @@ -2409,7 +2626,7 @@ static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb, return 0; if (pf->ptp_tx && - !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) { + !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, pf->state)) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; pf->ptp_tx_skb = skb_get(skb); } else { @@ -2716,15 +2933,9 @@ bool __i40e_chk_linearize(struct sk_buff *skb) * @td_cmd: the command field in the descriptor * @td_offset: offset for checksum or crc **/ -#ifdef I40E_FCOE -inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, - struct i40e_tx_buffer *first, u32 tx_flags, - const u8 hdr_len, u32 td_cmd, u32 td_offset) -#else static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, struct i40e_tx_buffer *first, u32 tx_flags, const u8 hdr_len, u32 td_cmd, u32 td_offset) -#endif { unsigned int data_len = skb->data_len; unsigned int size = skb_headlen(skb); @@ -2925,6 +3136,8 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, /* prefetch the data, we'll need it later */ prefetch(skb->data); + i40e_trace(xmit_frame_ring, skb, tx_ring); + count = i40e_xmit_descriptor_count(skb); if (i40e_chk_linearize(skb, count)) { if (__skb_linearize(skb)) { @@ -3003,6 +3216,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, return NETDEV_TX_OK; out_drop: + i40e_trace(xmit_frame_ring_drop, first->skb, tx_ring); dev_kfree_skb_any(first->skb); first->skb = NULL; return NETDEV_TX_OK; |