diff options
author | David S. Miller <davem@davemloft.net> | 2017-08-24 06:42:10 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-08-24 06:42:10 +0300 |
commit | ae99e188928e01dfa0fa45d2e1431ad33a0eaad9 (patch) | |
tree | 1b27593712dd7a12cfe32fa24400a5005f9afb8e | |
parent | 5f9ae3d9e7e4ad6db0491abc7c4ae5452dbeadd8 (diff) | |
parent | 186cd4d4e4144803652212eb0b7413141469feee (diff) | |
download | linux-ae99e188928e01dfa0fa45d2e1431ad33a0eaad9.tar.xz |
Merge branch 'mvpp2-software-TSO-support'
Antoine Tenart says:
====================
net: mvpp2: software TSO support
This series adds the s/w TSO support in the PPv2 driver, in addition to
two cosmetic commits. As stated in patch 3/3:
Using iperf and 10G ports, using TSO shows a significant performance
improvement by a factor 2 to reach around 9.5Gbps in TX; as well as a
significant CPU usage drop (from 25% to 15%).
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/freescale/fec_main.c | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/marvell/mv643xx_eth.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/marvell/mvneta.c | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/marvell/mvpp2.c | 182 | ||||
-rw-r--r-- | include/net/tso.h | 2 |
6 files changed, 164 insertions, 27 deletions
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index 57858522c33c..67d1a3230773 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -277,7 +277,6 @@ struct snd_queue { u16 xdp_free_cnt; bool is_xdp; -#define TSO_HEADER_SIZE 128 /* For TSO segment's header */ char *tso_hdrs; dma_addr_t tso_hdrs_phys; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index df09b254553d..56f56d6ada9c 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -226,7 +226,6 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); #define COPYBREAK_DEFAULT 256 -#define TSO_HEADER_SIZE 128 /* Max number of allowed TCP segments for software TSO */ #define FEC_MAX_TSO_SEGS 100 #define FEC_MAX_SKB_DESCS (FEC_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 9c94ea9b2b80..fb2d533ae4ef 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -183,8 +183,6 @@ static char mv643xx_eth_driver_version[] = "1.4"; #define DEFAULT_TX_QUEUE_SIZE 512 #define SKB_DMA_REALIGN ((PAGE_SIZE - NET_SKB_PAD) % SMP_CACHE_BYTES) -#define TSO_HEADER_SIZE 128 - /* Max number of allowed TCP segments for software TSO */ #define MV643XX_MAX_TSO_SEGS 100 #define MV643XX_MAX_SKB_DESCS (MV643XX_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 0aab74c2a209..35ff1ecfcff0 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -281,9 +281,6 @@ */ #define MVNETA_RSS_LU_TABLE_SIZE 1 -/* TSO header size */ -#define TSO_HEADER_SIZE 128 - /* Max number of Rx descriptors */ #define MVNETA_MAX_RXD 128 diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 02c62cbbfe51..7fa251bf91ae 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -35,6 +35,7 @@ #include <uapi/linux/ppp_defs.h> #include <net/ip.h> #include <net/ipv6.h> +#include <net/tso.h> /* RX Fifo Registers */ #define MVPP2_RX_DATA_FIFO_SIZE_REG(port) (0x00 + 4 * (port)) @@ -1010,6 +1011,10 @@ struct mvpp2_txq_pcpu { /* Index of the TX DMA descriptor to be cleaned up */ int txq_get_index; + + /* DMA buffer for TSO headers */ + char *tso_headers; + dma_addr_t tso_headers_dma; }; struct mvpp2_tx_queue { @@ -5284,15 +5289,14 @@ static unsigned int mvpp2_tx_done(struct mvpp2_port *port, u32 cause, /* Allocate and initialize descriptors for aggr TXQ */ static int mvpp2_aggr_txq_init(struct platform_device *pdev, - struct mvpp2_tx_queue *aggr_txq, - int desc_num, int cpu, + struct mvpp2_tx_queue *aggr_txq, int cpu, struct mvpp2 *priv) { u32 txq_dma; /* Allocate memory for TX descriptors */ aggr_txq->descs = dma_alloc_coherent(&pdev->dev, - desc_num * MVPP2_DESC_ALIGNED_SIZE, + MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE, &aggr_txq->descs_dma, GFP_KERNEL); if (!aggr_txq->descs) return -ENOMEM; @@ -5313,7 +5317,8 @@ static int mvpp2_aggr_txq_init(struct platform_device *pdev, MVPP22_AGGR_TXQ_DESC_ADDR_OFFS; mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_ADDR_REG(cpu), txq_dma); - mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_SIZE_REG(cpu), desc_num); + mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_SIZE_REG(cpu), + MVPP2_AGGR_TXQ_SIZE); return 0; } @@ -5494,6 +5499,14 @@ static int mvpp2_txq_init(struct mvpp2_port *port, txq_pcpu->reserved_num = 0; txq_pcpu->txq_put_index = 0; txq_pcpu->txq_get_index = 0; + + txq_pcpu->tso_headers = + dma_alloc_coherent(port->dev->dev.parent, + MVPP2_AGGR_TXQ_SIZE * TSO_HEADER_SIZE, + &txq_pcpu->tso_headers_dma, + GFP_KERNEL); + if (!txq_pcpu->tso_headers) + goto cleanup; } return 0; @@ -5501,6 +5514,11 @@ cleanup: for_each_present_cpu(cpu) { txq_pcpu = per_cpu_ptr(txq->pcpu, cpu); kfree(txq_pcpu->buffs); + + dma_free_coherent(port->dev->dev.parent, + MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE, + txq_pcpu->tso_headers, + txq_pcpu->tso_headers_dma); } dma_free_coherent(port->dev->dev.parent, @@ -5520,6 +5538,11 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port, for_each_present_cpu(cpu) { txq_pcpu = per_cpu_ptr(txq->pcpu, cpu); kfree(txq_pcpu->buffs); + + dma_free_coherent(port->dev->dev.parent, + MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE, + txq_pcpu->tso_headers, + txq_pcpu->tso_headers_dma); } if (txq->descs) @@ -6049,6 +6072,123 @@ cleanup: return -ENOMEM; } +static inline void mvpp2_tso_put_hdr(struct sk_buff *skb, + struct net_device *dev, + struct mvpp2_tx_queue *txq, + struct mvpp2_tx_queue *aggr_txq, + struct mvpp2_txq_pcpu *txq_pcpu, + int hdr_sz) +{ + struct mvpp2_port *port = netdev_priv(dev); + struct mvpp2_tx_desc *tx_desc = mvpp2_txq_next_desc_get(aggr_txq); + dma_addr_t addr; + + mvpp2_txdesc_txq_set(port, tx_desc, txq->id); + mvpp2_txdesc_size_set(port, tx_desc, hdr_sz); + + addr = txq_pcpu->tso_headers_dma + + txq_pcpu->txq_put_index * TSO_HEADER_SIZE; + mvpp2_txdesc_offset_set(port, tx_desc, addr & MVPP2_TX_DESC_ALIGN); + mvpp2_txdesc_dma_addr_set(port, tx_desc, addr & ~MVPP2_TX_DESC_ALIGN); + + mvpp2_txdesc_cmd_set(port, tx_desc, mvpp2_skb_tx_csum(port, skb) | + MVPP2_TXD_F_DESC | + MVPP2_TXD_PADDING_DISABLE); + mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc); +} + +static inline int mvpp2_tso_put_data(struct sk_buff *skb, + struct net_device *dev, struct tso_t *tso, + struct mvpp2_tx_queue *txq, + struct mvpp2_tx_queue *aggr_txq, + struct mvpp2_txq_pcpu *txq_pcpu, + int sz, bool left, bool last) +{ + struct mvpp2_port *port = netdev_priv(dev); + struct mvpp2_tx_desc *tx_desc = mvpp2_txq_next_desc_get(aggr_txq); + dma_addr_t buf_dma_addr; + + mvpp2_txdesc_txq_set(port, tx_desc, txq->id); + mvpp2_txdesc_size_set(port, tx_desc, sz); + + buf_dma_addr = dma_map_single(dev->dev.parent, tso->data, sz, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev->dev.parent, buf_dma_addr))) { + mvpp2_txq_desc_put(txq); + return -ENOMEM; + } + + mvpp2_txdesc_offset_set(port, tx_desc, + buf_dma_addr & MVPP2_TX_DESC_ALIGN); + mvpp2_txdesc_dma_addr_set(port, tx_desc, + buf_dma_addr & ~MVPP2_TX_DESC_ALIGN); + + if (!left) { + mvpp2_txdesc_cmd_set(port, tx_desc, MVPP2_TXD_L_DESC); + if (last) { + mvpp2_txq_inc_put(port, txq_pcpu, skb, tx_desc); + return 0; + } + } else { + mvpp2_txdesc_cmd_set(port, tx_desc, 0); + } + + mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc); + return 0; +} + +static int mvpp2_tx_tso(struct sk_buff *skb, struct net_device *dev, + struct mvpp2_tx_queue *txq, + struct mvpp2_tx_queue *aggr_txq, + struct mvpp2_txq_pcpu *txq_pcpu) +{ + struct mvpp2_port *port = netdev_priv(dev); + struct tso_t tso; + int hdr_sz = skb_transport_offset(skb) + tcp_hdrlen(skb); + int i, len, descs = 0; + + /* Check number of available descriptors */ + if (mvpp2_aggr_desc_num_check(port->priv, aggr_txq, + tso_count_descs(skb)) || + mvpp2_txq_reserved_desc_num_proc(port->priv, txq, txq_pcpu, + tso_count_descs(skb))) + return 0; + + tso_start(skb, &tso); + len = skb->len - hdr_sz; + while (len > 0) { + int left = min_t(int, skb_shinfo(skb)->gso_size, len); + char *hdr = txq_pcpu->tso_headers + + txq_pcpu->txq_put_index * TSO_HEADER_SIZE; + + len -= left; + descs++; + + tso_build_hdr(skb, hdr, &tso, left, len == 0); + mvpp2_tso_put_hdr(skb, dev, txq, aggr_txq, txq_pcpu, hdr_sz); + + while (left > 0) { + int sz = min_t(int, tso.size, left); + left -= sz; + descs++; + + if (mvpp2_tso_put_data(skb, dev, &tso, txq, aggr_txq, + txq_pcpu, sz, left, len == 0)) + goto release; + tso_build_data(skb, &tso, sz); + } + } + + return descs; + +release: + for (i = descs - 1; i >= 0; i--) { + struct mvpp2_tx_desc *tx_desc = txq->descs + i; + tx_desc_unmap_put(port, txq, tx_desc); + } + return 0; +} + /* Main tx processing */ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev) { @@ -6066,6 +6206,10 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev) txq_pcpu = this_cpu_ptr(txq->pcpu); aggr_txq = &port->priv->aggr_txqs[smp_processor_id()]; + if (skb_is_gso(skb)) { + frags = mvpp2_tx_tso(skb, dev, txq, aggr_txq, txq_pcpu); + goto out; + } frags = skb_shinfo(skb)->nr_frags + 1; /* Check number of available descriptors */ @@ -6115,22 +6259,21 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev) } } - txq_pcpu->reserved_num -= frags; - txq_pcpu->count += frags; - aggr_txq->count += frags; - - /* Enable transmit */ - wmb(); - mvpp2_aggr_txq_pend_desc_add(port, frags); - - if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1) { - struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id); - - netif_tx_stop_queue(nq); - } out: if (frags > 0) { struct mvpp2_pcpu_stats *stats = this_cpu_ptr(port->stats); + struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id); + + txq_pcpu->reserved_num -= frags; + txq_pcpu->count += frags; + aggr_txq->count += frags; + + /* Enable transmit */ + wmb(); + mvpp2_aggr_txq_pend_desc_add(port, frags); + + if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1) + netif_tx_stop_queue(nq); u64_stats_update_begin(&stats->syncp); stats->tx_packets++; @@ -7255,7 +7398,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, } } - features = NETIF_F_SG | NETIF_F_IP_CSUM; + features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO; dev->features = features | NETIF_F_RXCSUM; dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO; dev->vlan_features |= features; @@ -7445,8 +7588,7 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv) for_each_present_cpu(i) { priv->aggr_txqs[i].id = i; priv->aggr_txqs[i].size = MVPP2_AGGR_TXQ_SIZE; - err = mvpp2_aggr_txq_init(pdev, &priv->aggr_txqs[i], - MVPP2_AGGR_TXQ_SIZE, i, priv); + err = mvpp2_aggr_txq_init(pdev, &priv->aggr_txqs[i], i, priv); if (err < 0) return err; } diff --git a/include/net/tso.h b/include/net/tso.h index b7be852bfe9d..9a56c39e6d0a 100644 --- a/include/net/tso.h +++ b/include/net/tso.h @@ -3,6 +3,8 @@ #include <net/ip.h> +#define TSO_HEADER_SIZE 128 + struct tso_t { int next_frag_idx; void *data; |