diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2026-06-03 00:12:36 +0300 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2026-06-03 00:12:36 +0300 |
| commit | b736e34b963d7b05ee78fd3f0e7eb53a2edef7b9 (patch) | |
| tree | d3cd0efecbefb62fa3aab54f2fff98cf2fcad548 | |
| parent | cbadf6015e6b7920065b9cc53e8d0088a66b3a34 (diff) | |
| parent | 399f030cd6123f1b3539d1557a6e956eb1cd7da7 (diff) | |
| download | linux-b736e34b963d7b05ee78fd3f0e7eb53a2edef7b9.tar.xz | |
Merge branch 'net-mlx5-avoid-payload-in-skb-s-linear-part-for-better-gro-processing'
Tariq Toukan says:
====================
net/mlx5: Avoid payload in skb's linear part for better GRO-processing
This is V7 of a series originally submitted by Christoph.
When LRO is enabled on the MLX, mlx5e_skb_from_cqe_mpwrq_nonlinear
copies parts of the payload to the linear part of the skb.
This triggers suboptimal processing in GRO, causing slow throughput.
This patch series addresses this by using eth_get_headlen to compute the
size of the protocol headers and only copy those bits. This results in a
significant throughput improvement (detailed results in the specific
patch).
====================
Link: https://patch.msgid.link/20260601061522.398044-1-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 33 |
1 files changed, 22 insertions, 11 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 5b60aa47c75b..6fbc0441c4b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1912,7 +1912,6 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w u32 page_idx) { struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx]; - u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); struct mlx5e_frag_page *head_page = frag_page; struct mlx5e_frag_page *linear_page = NULL; struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf; @@ -1923,11 +1922,12 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w unsigned int truesize = 0; u32 pg_consumed_bytes; struct bpf_prog *prog; + void *va, *head_addr; struct sk_buff *skb; u32 linear_frame_sz; u16 linear_data_len; u16 linear_hr; - void *va; + u16 headlen; if (unlikely(cqe_bcnt > rq->hw_mtu)) { u8 lro_num_seg = get_cqe_lro_num_seg(cqe); @@ -1940,9 +1940,11 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w prog = rcu_dereference(rq->xdp_prog); + head_addr = netmem_address(head_page->netmem) + head_offset; + if (prog) { /* area for bpf_xdp_[store|load]_bytes */ - net_prefetchw(netmem_address(frag_page->netmem) + frag_offset); + net_prefetchw(head_addr); va = mlx5e_mpwqe_get_linear_page_frag(rq); if (!va) { @@ -1956,6 +1958,8 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w linear_frame_sz = MLX5_SKB_FRAG_SZ(linear_hr + MLX5E_RX_MAX_HEAD); linear_page = &rq->mpwqe.linear_info->frag_page; } else { + dma_addr_t addr; + skb = napi_alloc_skb(rq->cq.napi, ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long))); if (unlikely(!skb)) { @@ -1967,6 +1971,14 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w net_prefetchw(va); /* xdp_frame data area */ net_prefetchw(skb->data); + headlen = min(MLX5E_RX_MAX_HEAD, cqe_bcnt); + addr = page_pool_get_dma_addr_netmem(head_page->netmem); + dma_sync_single_for_cpu(rq->pdev, addr + head_offset, + ALIGN(headlen, sizeof(long)), + rq->buff.map_dir); + + headlen = eth_get_headlen(rq->netdev, head_addr, headlen); + frag_offset += headlen; byte_cnt -= headlen; linear_hr = skb_headroom(skb); @@ -2051,13 +2063,11 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w pagep->frags++; while (++pagep < frag_page); - headlen = min_t(u16, MLX5E_RX_MAX_HEAD - len, - skb->data_len); - __pskb_pull_tail(skb, headlen); + if (len < ETH_HLEN) + __pskb_pull_tail(skb, min(ETH_HLEN - len, + skb->data_len)); } } else { - dma_addr_t addr; - if (xdp_buff_has_frags(&mxbuf->xdp)) { struct mlx5e_frag_page *pagep; @@ -2071,10 +2081,11 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w pagep->frags++; while (++pagep < frag_page); } + /* copy header */ - addr = page_pool_get_dma_addr_netmem(head_page->netmem); - mlx5e_copy_skb_header(rq, skb, head_page->netmem, addr, - head_offset, head_offset, headlen); + skb_copy_to_linear_data(skb, head_addr, + ALIGN(headlen, sizeof(long))); + /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; skb->len += headlen; |
