diff options
author | Jakub Kicinski <kuba@kernel.org> | 2022-01-06 04:18:07 +0300 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2022-01-06 04:18:07 +0300 |
commit | 4e023b44d5cec470df1366a93112293cceddc3e8 (patch) | |
tree | 0610d06bbc028ddd06da2a12e08bd2c54f095c23 | |
parent | 87eee9c5589e1e546537a38a72e7ea3f0a7cac60 (diff) | |
parent | e015593573b3e3f74bd8a63c05fa92902194a354 (diff) | |
download | linux-4e023b44d5cec470df1366a93112293cceddc3e8.tar.xz |
Merge branch 'net-lantiq_xrx200-improve-ethernet-performance'
Aleksander Jan Bajkowski says:
====================
net: lantiq_xrx200: improve ethernet performance
This patchset improves Ethernet performance by 15%.
NAT Performance results on BT Home Hub 5A (kernel 5.10.89, mtu 1500):
Down Up
Before 539 Mbps 599 Mbps
After 624 Mbps 695 Mbps
====================
Link: https://lore.kernel.org/r/20220104151144.181736-1-olek2@wp.pl
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r-- | arch/mips/include/asm/mach-lantiq/xway/xway_dma.h | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/lantiq_xrx200.c | 62 |
2 files changed, 41 insertions, 23 deletions
diff --git a/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h index 31ca9151b539..3dee15c61c8a 100644 --- a/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h +++ b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h @@ -8,7 +8,7 @@ #define LTQ_DMA_H__ #define LTQ_DESC_SIZE 0x08 /* each descriptor is 64bit */ -#define LTQ_DESC_NUM 0x40 /* 64 descriptors / channel */ +#define LTQ_DESC_NUM 0xC0 /* 192 descriptors / channel */ #define LTQ_DMA_OWN BIT(31) /* owner bit */ #define LTQ_DMA_C BIT(30) /* complete bit */ diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 503fb99c5b90..41d11137cde0 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -63,7 +63,11 @@ struct xrx200_chan { struct napi_struct napi; struct ltq_dma_channel dma; - struct sk_buff *skb[LTQ_DESC_NUM]; + + union { + struct sk_buff *skb[LTQ_DESC_NUM]; + void *rx_buff[LTQ_DESC_NUM]; + }; struct sk_buff *skb_head; struct sk_buff *skb_tail; @@ -78,6 +82,7 @@ struct xrx200_priv { struct xrx200_chan chan_rx; u16 rx_buf_size; + u16 rx_skb_size; struct net_device *net_dev; struct device *dev; @@ -115,6 +120,12 @@ static int xrx200_buffer_size(int mtu) return round_up(xrx200_max_frame_len(mtu), 4 * XRX200_DMA_BURST_LEN); } +static int xrx200_skb_size(u16 buf_size) +{ + return SKB_DATA_ALIGN(buf_size + NET_SKB_PAD + NET_IP_ALIGN) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); +} + /* drop all the packets from the DMA ring */ static void xrx200_flush_dma(struct xrx200_chan *ch) { @@ -173,30 +184,29 @@ static int xrx200_close(struct net_device *net_dev) return 0; } -static int xrx200_alloc_skb(struct xrx200_chan *ch) +static int xrx200_alloc_buf(struct xrx200_chan *ch, void *(*alloc)(unsigned int size)) { - struct sk_buff *skb = ch->skb[ch->dma.desc]; + void *buf = ch->rx_buff[ch->dma.desc]; struct xrx200_priv *priv = ch->priv; dma_addr_t mapping; int ret = 0; - ch->skb[ch->dma.desc] = netdev_alloc_skb_ip_align(priv->net_dev, - priv->rx_buf_size); - if (!ch->skb[ch->dma.desc]) { + ch->rx_buff[ch->dma.desc] = alloc(priv->rx_skb_size); + if (!ch->rx_buff[ch->dma.desc]) { ret = -ENOMEM; goto skip; } - mapping = dma_map_single(priv->dev, ch->skb[ch->dma.desc]->data, + mapping = dma_map_single(priv->dev, ch->rx_buff[ch->dma.desc], priv->rx_buf_size, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(priv->dev, mapping))) { - dev_kfree_skb_any(ch->skb[ch->dma.desc]); - ch->skb[ch->dma.desc] = skb; + skb_free_frag(ch->rx_buff[ch->dma.desc]); + ch->rx_buff[ch->dma.desc] = buf; ret = -ENOMEM; goto skip; } - ch->dma.desc_base[ch->dma.desc].addr = mapping; + ch->dma.desc_base[ch->dma.desc].addr = mapping + NET_SKB_PAD + NET_IP_ALIGN; /* Make sure the address is written before we give it to HW */ wmb(); skip: @@ -210,13 +220,14 @@ static int xrx200_hw_receive(struct xrx200_chan *ch) { struct xrx200_priv *priv = ch->priv; struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->dma.desc]; - struct sk_buff *skb = ch->skb[ch->dma.desc]; + void *buf = ch->rx_buff[ch->dma.desc]; u32 ctl = desc->ctl; int len = (ctl & LTQ_DMA_SIZE_MASK); struct net_device *net_dev = priv->net_dev; + struct sk_buff *skb; int ret; - ret = xrx200_alloc_skb(ch); + ret = xrx200_alloc_buf(ch, napi_alloc_frag); ch->dma.desc++; ch->dma.desc %= LTQ_DESC_NUM; @@ -227,19 +238,21 @@ static int xrx200_hw_receive(struct xrx200_chan *ch) return ret; } + skb = build_skb(buf, priv->rx_skb_size); + skb_reserve(skb, NET_SKB_PAD); skb_put(skb, len); /* add buffers to skb via skb->frag_list */ if (ctl & LTQ_DMA_SOP) { ch->skb_head = skb; ch->skb_tail = skb; + skb_reserve(skb, NET_IP_ALIGN); } else if (ch->skb_head) { if (ch->skb_head == ch->skb_tail) skb_shinfo(ch->skb_tail)->frag_list = skb; else ch->skb_tail->next = skb; ch->skb_tail = skb; - skb_reserve(ch->skb_tail, -NET_IP_ALIGN); ch->skb_head->len += skb->len; ch->skb_head->data_len += skb->len; ch->skb_head->truesize += skb->truesize; @@ -395,12 +408,13 @@ xrx200_change_mtu(struct net_device *net_dev, int new_mtu) struct xrx200_chan *ch_rx = &priv->chan_rx; int old_mtu = net_dev->mtu; bool running = false; - struct sk_buff *skb; + void *buff; int curr_desc; int ret = 0; net_dev->mtu = new_mtu; priv->rx_buf_size = xrx200_buffer_size(new_mtu); + priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size); if (new_mtu <= old_mtu) return ret; @@ -416,14 +430,15 @@ xrx200_change_mtu(struct net_device *net_dev, int new_mtu) for (ch_rx->dma.desc = 0; ch_rx->dma.desc < LTQ_DESC_NUM; ch_rx->dma.desc++) { - skb = ch_rx->skb[ch_rx->dma.desc]; - ret = xrx200_alloc_skb(ch_rx); + buff = ch_rx->rx_buff[ch_rx->dma.desc]; + ret = xrx200_alloc_buf(ch_rx, netdev_alloc_frag); if (ret) { net_dev->mtu = old_mtu; priv->rx_buf_size = xrx200_buffer_size(old_mtu); + priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size); break; } - dev_kfree_skb_any(skb); + skb_free_frag(buff); } ch_rx->dma.desc = curr_desc; @@ -476,7 +491,7 @@ static int xrx200_dma_init(struct xrx200_priv *priv) ltq_dma_alloc_rx(&ch_rx->dma); for (ch_rx->dma.desc = 0; ch_rx->dma.desc < LTQ_DESC_NUM; ch_rx->dma.desc++) { - ret = xrx200_alloc_skb(ch_rx); + ret = xrx200_alloc_buf(ch_rx, netdev_alloc_frag); if (ret) goto rx_free; } @@ -511,7 +526,7 @@ rx_ring_free: /* free the allocated RX ring */ for (i = 0; i < LTQ_DESC_NUM; i++) { if (priv->chan_rx.skb[i]) - dev_kfree_skb_any(priv->chan_rx.skb[i]); + skb_free_frag(priv->chan_rx.rx_buff[i]); } rx_free: @@ -528,7 +543,7 @@ static void xrx200_hw_cleanup(struct xrx200_priv *priv) /* free the allocated RX ring */ for (i = 0; i < LTQ_DESC_NUM; i++) - dev_kfree_skb_any(priv->chan_rx.skb[i]); + skb_free_frag(priv->chan_rx.rx_buff[i]); } static int xrx200_probe(struct platform_device *pdev) @@ -553,6 +568,7 @@ static int xrx200_probe(struct platform_device *pdev) net_dev->min_mtu = ETH_ZLEN; net_dev->max_mtu = XRX200_DMA_DATA_LEN - xrx200_max_frame_len(0); priv->rx_buf_size = xrx200_buffer_size(ETH_DATA_LEN); + priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size); /* load the memory ranges */ priv->pmac_reg = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); @@ -597,8 +613,10 @@ static int xrx200_probe(struct platform_device *pdev) PMAC_HD_CTL); /* setup NAPI */ - netif_napi_add(net_dev, &priv->chan_rx.napi, xrx200_poll_rx, 32); - netif_tx_napi_add(net_dev, &priv->chan_tx.napi, xrx200_tx_housekeeping, 32); + netif_napi_add(net_dev, &priv->chan_rx.napi, xrx200_poll_rx, + NAPI_POLL_WEIGHT); + netif_tx_napi_add(net_dev, &priv->chan_tx.napi, xrx200_tx_housekeeping, + NAPI_POLL_WEIGHT); platform_set_drvdata(pdev, priv); |