summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-08-24 06:42:10 +0300
committerDavid S. Miller <davem@davemloft.net>2017-08-24 06:42:10 +0300
commitae99e188928e01dfa0fa45d2e1431ad33a0eaad9 (patch)
tree1b27593712dd7a12cfe32fa24400a5005f9afb8e
parent5f9ae3d9e7e4ad6db0491abc7c4ae5452dbeadd8 (diff)
parent186cd4d4e4144803652212eb0b7413141469feee (diff)
downloadlinux-ae99e188928e01dfa0fa45d2e1431ad33a0eaad9.tar.xz
Merge branch 'mvpp2-software-TSO-support'
Antoine Tenart says: ==================== net: mvpp2: software TSO support This series adds the s/w TSO support in the PPv2 driver, in addition to two cosmetic commits. As stated in patch 3/3: Using iperf and 10G ports, using TSO shows a significant performance improvement by a factor 2 to reach around 9.5Gbps in TX; as well as a significant CPU usage drop (from 25% to 15%). ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.h1
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c1
-rw-r--r--drivers/net/ethernet/marvell/mv643xx_eth.c2
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c3
-rw-r--r--drivers/net/ethernet/marvell/mvpp2.c182
-rw-r--r--include/net/tso.h2
6 files changed, 164 insertions, 27 deletions
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 57858522c33c..67d1a3230773 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -277,7 +277,6 @@ struct snd_queue {
u16 xdp_free_cnt;
bool is_xdp;
-#define TSO_HEADER_SIZE 128
/* For TSO segment's header */
char *tso_hdrs;
dma_addr_t tso_hdrs_phys;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index df09b254553d..56f56d6ada9c 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -226,7 +226,6 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
#define COPYBREAK_DEFAULT 256
-#define TSO_HEADER_SIZE 128
/* Max number of allowed TCP segments for software TSO */
#define FEC_MAX_TSO_SEGS 100
#define FEC_MAX_SKB_DESCS (FEC_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 9c94ea9b2b80..fb2d533ae4ef 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -183,8 +183,6 @@ static char mv643xx_eth_driver_version[] = "1.4";
#define DEFAULT_TX_QUEUE_SIZE 512
#define SKB_DMA_REALIGN ((PAGE_SIZE - NET_SKB_PAD) % SMP_CACHE_BYTES)
-#define TSO_HEADER_SIZE 128
-
/* Max number of allowed TCP segments for software TSO */
#define MV643XX_MAX_TSO_SEGS 100
#define MV643XX_MAX_SKB_DESCS (MV643XX_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 0aab74c2a209..35ff1ecfcff0 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -281,9 +281,6 @@
*/
#define MVNETA_RSS_LU_TABLE_SIZE 1
-/* TSO header size */
-#define TSO_HEADER_SIZE 128
-
/* Max number of Rx descriptors */
#define MVNETA_MAX_RXD 128
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 02c62cbbfe51..7fa251bf91ae 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -35,6 +35,7 @@
#include <uapi/linux/ppp_defs.h>
#include <net/ip.h>
#include <net/ipv6.h>
+#include <net/tso.h>
/* RX Fifo Registers */
#define MVPP2_RX_DATA_FIFO_SIZE_REG(port) (0x00 + 4 * (port))
@@ -1010,6 +1011,10 @@ struct mvpp2_txq_pcpu {
/* Index of the TX DMA descriptor to be cleaned up */
int txq_get_index;
+
+ /* DMA buffer for TSO headers */
+ char *tso_headers;
+ dma_addr_t tso_headers_dma;
};
struct mvpp2_tx_queue {
@@ -5284,15 +5289,14 @@ static unsigned int mvpp2_tx_done(struct mvpp2_port *port, u32 cause,
/* Allocate and initialize descriptors for aggr TXQ */
static int mvpp2_aggr_txq_init(struct platform_device *pdev,
- struct mvpp2_tx_queue *aggr_txq,
- int desc_num, int cpu,
+ struct mvpp2_tx_queue *aggr_txq, int cpu,
struct mvpp2 *priv)
{
u32 txq_dma;
/* Allocate memory for TX descriptors */
aggr_txq->descs = dma_alloc_coherent(&pdev->dev,
- desc_num * MVPP2_DESC_ALIGNED_SIZE,
+ MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
&aggr_txq->descs_dma, GFP_KERNEL);
if (!aggr_txq->descs)
return -ENOMEM;
@@ -5313,7 +5317,8 @@ static int mvpp2_aggr_txq_init(struct platform_device *pdev,
MVPP22_AGGR_TXQ_DESC_ADDR_OFFS;
mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_ADDR_REG(cpu), txq_dma);
- mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_SIZE_REG(cpu), desc_num);
+ mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_SIZE_REG(cpu),
+ MVPP2_AGGR_TXQ_SIZE);
return 0;
}
@@ -5494,6 +5499,14 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
txq_pcpu->reserved_num = 0;
txq_pcpu->txq_put_index = 0;
txq_pcpu->txq_get_index = 0;
+
+ txq_pcpu->tso_headers =
+ dma_alloc_coherent(port->dev->dev.parent,
+ MVPP2_AGGR_TXQ_SIZE * TSO_HEADER_SIZE,
+ &txq_pcpu->tso_headers_dma,
+ GFP_KERNEL);
+ if (!txq_pcpu->tso_headers)
+ goto cleanup;
}
return 0;
@@ -5501,6 +5514,11 @@ cleanup:
for_each_present_cpu(cpu) {
txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
kfree(txq_pcpu->buffs);
+
+ dma_free_coherent(port->dev->dev.parent,
+ MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
+ txq_pcpu->tso_headers,
+ txq_pcpu->tso_headers_dma);
}
dma_free_coherent(port->dev->dev.parent,
@@ -5520,6 +5538,11 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port,
for_each_present_cpu(cpu) {
txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
kfree(txq_pcpu->buffs);
+
+ dma_free_coherent(port->dev->dev.parent,
+ MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
+ txq_pcpu->tso_headers,
+ txq_pcpu->tso_headers_dma);
}
if (txq->descs)
@@ -6049,6 +6072,123 @@ cleanup:
return -ENOMEM;
}
+static inline void mvpp2_tso_put_hdr(struct sk_buff *skb,
+ struct net_device *dev,
+ struct mvpp2_tx_queue *txq,
+ struct mvpp2_tx_queue *aggr_txq,
+ struct mvpp2_txq_pcpu *txq_pcpu,
+ int hdr_sz)
+{
+ struct mvpp2_port *port = netdev_priv(dev);
+ struct mvpp2_tx_desc *tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
+ dma_addr_t addr;
+
+ mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
+ mvpp2_txdesc_size_set(port, tx_desc, hdr_sz);
+
+ addr = txq_pcpu->tso_headers_dma +
+ txq_pcpu->txq_put_index * TSO_HEADER_SIZE;
+ mvpp2_txdesc_offset_set(port, tx_desc, addr & MVPP2_TX_DESC_ALIGN);
+ mvpp2_txdesc_dma_addr_set(port, tx_desc, addr & ~MVPP2_TX_DESC_ALIGN);
+
+ mvpp2_txdesc_cmd_set(port, tx_desc, mvpp2_skb_tx_csum(port, skb) |
+ MVPP2_TXD_F_DESC |
+ MVPP2_TXD_PADDING_DISABLE);
+ mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc);
+}
+
+static inline int mvpp2_tso_put_data(struct sk_buff *skb,
+ struct net_device *dev, struct tso_t *tso,
+ struct mvpp2_tx_queue *txq,
+ struct mvpp2_tx_queue *aggr_txq,
+ struct mvpp2_txq_pcpu *txq_pcpu,
+ int sz, bool left, bool last)
+{
+ struct mvpp2_port *port = netdev_priv(dev);
+ struct mvpp2_tx_desc *tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
+ dma_addr_t buf_dma_addr;
+
+ mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
+ mvpp2_txdesc_size_set(port, tx_desc, sz);
+
+ buf_dma_addr = dma_map_single(dev->dev.parent, tso->data, sz,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dev->dev.parent, buf_dma_addr))) {
+ mvpp2_txq_desc_put(txq);
+ return -ENOMEM;
+ }
+
+ mvpp2_txdesc_offset_set(port, tx_desc,
+ buf_dma_addr & MVPP2_TX_DESC_ALIGN);
+ mvpp2_txdesc_dma_addr_set(port, tx_desc,
+ buf_dma_addr & ~MVPP2_TX_DESC_ALIGN);
+
+ if (!left) {
+ mvpp2_txdesc_cmd_set(port, tx_desc, MVPP2_TXD_L_DESC);
+ if (last) {
+ mvpp2_txq_inc_put(port, txq_pcpu, skb, tx_desc);
+ return 0;
+ }
+ } else {
+ mvpp2_txdesc_cmd_set(port, tx_desc, 0);
+ }
+
+ mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc);
+ return 0;
+}
+
+static int mvpp2_tx_tso(struct sk_buff *skb, struct net_device *dev,
+ struct mvpp2_tx_queue *txq,
+ struct mvpp2_tx_queue *aggr_txq,
+ struct mvpp2_txq_pcpu *txq_pcpu)
+{
+ struct mvpp2_port *port = netdev_priv(dev);
+ struct tso_t tso;
+ int hdr_sz = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ int i, len, descs = 0;
+
+ /* Check number of available descriptors */
+ if (mvpp2_aggr_desc_num_check(port->priv, aggr_txq,
+ tso_count_descs(skb)) ||
+ mvpp2_txq_reserved_desc_num_proc(port->priv, txq, txq_pcpu,
+ tso_count_descs(skb)))
+ return 0;
+
+ tso_start(skb, &tso);
+ len = skb->len - hdr_sz;
+ while (len > 0) {
+ int left = min_t(int, skb_shinfo(skb)->gso_size, len);
+ char *hdr = txq_pcpu->tso_headers +
+ txq_pcpu->txq_put_index * TSO_HEADER_SIZE;
+
+ len -= left;
+ descs++;
+
+ tso_build_hdr(skb, hdr, &tso, left, len == 0);
+ mvpp2_tso_put_hdr(skb, dev, txq, aggr_txq, txq_pcpu, hdr_sz);
+
+ while (left > 0) {
+ int sz = min_t(int, tso.size, left);
+ left -= sz;
+ descs++;
+
+ if (mvpp2_tso_put_data(skb, dev, &tso, txq, aggr_txq,
+ txq_pcpu, sz, left, len == 0))
+ goto release;
+ tso_build_data(skb, &tso, sz);
+ }
+ }
+
+ return descs;
+
+release:
+ for (i = descs - 1; i >= 0; i--) {
+ struct mvpp2_tx_desc *tx_desc = txq->descs + i;
+ tx_desc_unmap_put(port, txq, tx_desc);
+ }
+ return 0;
+}
+
/* Main tx processing */
static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
{
@@ -6066,6 +6206,10 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
txq_pcpu = this_cpu_ptr(txq->pcpu);
aggr_txq = &port->priv->aggr_txqs[smp_processor_id()];
+ if (skb_is_gso(skb)) {
+ frags = mvpp2_tx_tso(skb, dev, txq, aggr_txq, txq_pcpu);
+ goto out;
+ }
frags = skb_shinfo(skb)->nr_frags + 1;
/* Check number of available descriptors */
@@ -6115,22 +6259,21 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
}
}
- txq_pcpu->reserved_num -= frags;
- txq_pcpu->count += frags;
- aggr_txq->count += frags;
-
- /* Enable transmit */
- wmb();
- mvpp2_aggr_txq_pend_desc_add(port, frags);
-
- if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1) {
- struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
-
- netif_tx_stop_queue(nq);
- }
out:
if (frags > 0) {
struct mvpp2_pcpu_stats *stats = this_cpu_ptr(port->stats);
+ struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
+
+ txq_pcpu->reserved_num -= frags;
+ txq_pcpu->count += frags;
+ aggr_txq->count += frags;
+
+ /* Enable transmit */
+ wmb();
+ mvpp2_aggr_txq_pend_desc_add(port, frags);
+
+ if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1)
+ netif_tx_stop_queue(nq);
u64_stats_update_begin(&stats->syncp);
stats->tx_packets++;
@@ -7255,7 +7398,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
}
}
- features = NETIF_F_SG | NETIF_F_IP_CSUM;
+ features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
dev->features = features | NETIF_F_RXCSUM;
dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
dev->vlan_features |= features;
@@ -7445,8 +7588,7 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv)
for_each_present_cpu(i) {
priv->aggr_txqs[i].id = i;
priv->aggr_txqs[i].size = MVPP2_AGGR_TXQ_SIZE;
- err = mvpp2_aggr_txq_init(pdev, &priv->aggr_txqs[i],
- MVPP2_AGGR_TXQ_SIZE, i, priv);
+ err = mvpp2_aggr_txq_init(pdev, &priv->aggr_txqs[i], i, priv);
if (err < 0)
return err;
}
diff --git a/include/net/tso.h b/include/net/tso.h
index b7be852bfe9d..9a56c39e6d0a 100644
--- a/include/net/tso.h
+++ b/include/net/tso.h
@@ -3,6 +3,8 @@
#include <net/ip.h>
+#define TSO_HEADER_SIZE 128
+
struct tso_t {
int next_frag_idx;
void *data;