summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwilly tarreau <w@1wt.eu>2014-01-16 11:20:17 +0400
committerDavid S. Miller <davem@davemloft.net>2014-01-17 03:15:43 +0400
commitf19fadfce8717d1da5e38c12d0dbe473bc3a7626 (patch)
tree66855a00be2e09b2d81b8b52a20aafa0c5761fe3
parent8ec2cd48fe0d08f4c5370e925f083a68e4593ec8 (diff)
downloadlinux-f19fadfce8717d1da5e38c12d0dbe473bc3a7626.tar.xz
net: mvneta: implement rx_copybreak
calling dma_map_single()/dma_unmap_single() is quite expensive compared to copying a small packet. So let's copy short frames and keep the buffers mapped. We set the limit to 256 bytes which seems to give good results both on the XP-GP board and on the AX3/4. The Rx small packet rate increased by 16.4% doing this, from 486kpps to 573kpps. It is worth noting that even the call to the function dma_sync_single_range_for_cpu() is expensive (300 ns) although less than dma_unmap_single(). Without it, the packet rate raises to 711kpps (+24% more). Thus on systems where coherency from device to CPU is guaranteed by a snoop control unit, this patch should provide even more gains, and probably rx_copybreak could be increased. Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Cc: Gregory CLEMENT <gregory.clement@free-electrons.com> Tested-by: Arnaud Ebalard <arno@natisbad.org> Signed-off-by: Willy Tarreau <w@1wt.eu> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c44
1 files changed, 38 insertions, 6 deletions
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 726a8d292a08..f5fc7a249880 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -444,6 +444,8 @@ static int txq_number = 8;
static int rxq_def;
+static int rx_copybreak __read_mostly = 256;
+
#define MVNETA_DRIVER_NAME "mvneta"
#define MVNETA_DRIVER_VERSION "1.0"
@@ -1463,22 +1465,51 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
rx_done++;
rx_filled++;
rx_status = rx_desc->status;
+ rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
data = (unsigned char *)rx_desc->buf_cookie;
if (!mvneta_rxq_desc_is_first_last(rx_status) ||
- (rx_status & MVNETA_RXD_ERR_SUMMARY) ||
- !(skb = build_skb(data, pp->frag_size > PAGE_SIZE ? 0 : pp->frag_size))) {
+ (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
+ err_drop_frame:
dev->stats.rx_errors++;
mvneta_rx_error(pp, rx_desc);
/* leave the descriptor untouched */
continue;
}
- dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr,
+ if (rx_bytes <= rx_copybreak) {
+ /* better copy a small frame and not unmap the DMA region */
+ skb = netdev_alloc_skb_ip_align(dev, rx_bytes);
+ if (unlikely(!skb))
+ goto err_drop_frame;
+
+ dma_sync_single_range_for_cpu(dev->dev.parent,
+ rx_desc->buf_phys_addr,
+ MVNETA_MH_SIZE + NET_SKB_PAD,
+ rx_bytes,
+ DMA_FROM_DEVICE);
+ memcpy(skb_put(skb, rx_bytes),
+ data + MVNETA_MH_SIZE + NET_SKB_PAD,
+ rx_bytes);
+
+ skb->protocol = eth_type_trans(skb, dev);
+ mvneta_rx_csum(pp, rx_status, skb);
+ napi_gro_receive(&pp->napi, skb);
+
+ rcvd_pkts++;
+ rcvd_bytes += rx_bytes;
+
+ /* leave the descriptor and buffer untouched */
+ continue;
+ }
+
+ skb = build_skb(data, pp->frag_size > PAGE_SIZE ? 0 : pp->frag_size);
+ if (!skb)
+ goto err_drop_frame;
+
+ dma_unmap_single(dev->dev.parent, rx_desc->buf_phys_addr,
MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
- rx_bytes = rx_desc->data_size -
- (ETH_FCS_LEN + MVNETA_MH_SIZE);
rcvd_pkts++;
rcvd_bytes += rx_bytes;
@@ -1495,7 +1526,7 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
/* Refill processing */
err = mvneta_rx_refill(pp, rx_desc);
if (err) {
- netdev_err(pp->dev, "Linux processing - Can't refill\n");
+ netdev_err(dev, "Linux processing - Can't refill\n");
rxq->missed++;
rx_filled--;
}
@@ -2945,3 +2976,4 @@ module_param(rxq_number, int, S_IRUGO);
module_param(txq_number, int, S_IRUGO);
module_param(rxq_def, int, S_IRUGO);
+module_param(rx_copybreak, int, S_IRUGO | S_IWUSR);