diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2026-05-06 05:27:54 +0300 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2026-05-06 05:27:55 +0300 |
| commit | 3e8ec3440b3731576f0e71a01121445e66c26bfd (patch) | |
| tree | e5a45083eb2049055b5ae4a675fac831b49b42a5 | |
| parent | 22675f07260ca26423851a42b553b0ea669228d1 (diff) | |
| parent | 203cee647f551abc87b992045cd920b117ff990a (diff) | |
| download | linux-3e8ec3440b3731576f0e71a01121445e66c26bfd.tar.xz | |
Merge branch 'xsk-fix-bugs-around-xsk-skb-allocation'
Jason Xing says:
====================
xsk: fix bugs around xsk skb allocation
There are rare issues around xsk_build_skb(). Some of them
were founded by Sashiko[1][2].
[1]: https://lore.kernel.org/all/20260415082654.21026-1-kerneljasonxing@gmail.com/
[2]: https://lore.kernel.org/all/20260418045644.28612-1-kerneljasonxing@gmail.com/
====================
Link: https://patch.msgid.link/20260502200722.53960-1-kerneljasonxing@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
| -rw-r--r-- | net/xdp/xsk.c | 115 | ||||
| -rw-r--r-- | net/xdp/xsk_buff_pool.c | 3 |
2 files changed, 77 insertions, 41 deletions
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 887abed25466..5e5786cd9af5 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -646,9 +646,42 @@ static u64 xsk_skb_destructor_get_addr(struct sk_buff *skb) return (u64)((uintptr_t)skb_shinfo(skb)->destructor_arg & ~0x1UL); } -static void xsk_skb_destructor_set_addr(struct sk_buff *skb, u64 addr) +static struct xsk_addrs *__xsk_addrs_alloc(struct sk_buff *skb, u64 addr) { - skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL); + struct xsk_addrs *xsk_addr; + + xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL); + if (unlikely(!xsk_addr)) + return NULL; + + xsk_addr->addrs[0] = addr; + skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; + return xsk_addr; +} + +static struct xsk_addrs *xsk_addrs_alloc(struct sk_buff *skb) +{ + struct xsk_addrs *xsk_addr; + + if (!xsk_skb_destructor_is_addr(skb)) + return (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; + + xsk_addr = __xsk_addrs_alloc(skb, xsk_skb_destructor_get_addr(skb)); + if (likely(xsk_addr)) + xsk_addr->num_descs = 1; + return xsk_addr; +} + +static int xsk_skb_destructor_set_addr(struct sk_buff *skb, u64 addr) +{ + if (IS_ENABLED(CONFIG_64BIT)) { + skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL); + return 0; + } + + if (unlikely(!__xsk_addrs_alloc(skb, addr))) + return -ENOMEM; + return 0; } static void xsk_inc_num_desc(struct sk_buff *skb) @@ -685,7 +718,7 @@ static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool, spin_lock_irqsave(&pool->cq_prod_lock, flags); idx = xskq_get_prod(pool->cq); - if (unlikely(num_descs > 1)) { + if (unlikely(!xsk_skb_destructor_is_addr(skb))) { xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; for (i = 0; i < num_descs; i++) { @@ -724,14 +757,20 @@ void xsk_destruct_skb(struct sk_buff *skb) sock_wfree(skb); } -static void xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs, - u64 addr) +static int xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs, + u64 addr) { + int err; + + err = xsk_skb_destructor_set_addr(skb, addr); + if (unlikely(err)) + return err; + skb->dev = xs->dev; skb->priority = READ_ONCE(xs->sk.sk_priority); skb->mark = READ_ONCE(xs->sk.sk_mark); skb->destructor = xsk_destruct_skb; - xsk_skb_destructor_set_addr(skb, addr); + return 0; } static void xsk_consume_skb(struct sk_buff *skb) @@ -740,7 +779,7 @@ static void xsk_consume_skb(struct sk_buff *skb) u32 num_descs = xsk_get_num_desc(skb); struct xsk_addrs *xsk_addr; - if (unlikely(num_descs > 1)) { + if (unlikely(!xsk_skb_destructor_is_addr(skb))) { xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; kmem_cache_free(xsk_tx_generic_cache, xsk_addr); } @@ -819,28 +858,19 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, return ERR_PTR(err); skb_reserve(skb, hr); - - xsk_skb_init_misc(skb, xs, desc->addr); if (desc->options & XDP_TX_METADATA) { err = xsk_skb_metadata(skb, buffer, desc, pool, hr); - if (unlikely(err)) + if (unlikely(err)) { + kfree_skb(skb); return ERR_PTR(err); + } } } else { struct xsk_addrs *xsk_addr; - if (xsk_skb_destructor_is_addr(skb)) { - xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, - GFP_KERNEL); - if (!xsk_addr) - return ERR_PTR(-ENOMEM); - - xsk_addr->num_descs = 1; - xsk_addr->addrs[0] = xsk_skb_destructor_get_addr(skb); - skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; - } else { - xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; - } + xsk_addr = xsk_addrs_alloc(skb); + if (!xsk_addr) + return ERR_PTR(-ENOMEM); /* in case of -EOVERFLOW that could happen below, * xsk_consume_skb() will release this node as whole skb @@ -856,8 +886,11 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, addr = buffer - pool->addrs; for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) { - if (unlikely(i >= MAX_SKB_FRAGS)) + if (unlikely(i >= MAX_SKB_FRAGS)) { + if (!xs->skb) + kfree_skb(skb); return ERR_PTR(-EOVERFLOW); + } page = pool->umem->pgs[addr >> PAGE_SHIFT]; get_page(page); @@ -914,7 +947,6 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, if (unlikely(err)) goto free_err; - xsk_skb_init_misc(skb, xs, desc->addr); if (desc->options & XDP_TX_METADATA) { err = xsk_skb_metadata(skb, buffer, desc, xs->pool, hr); @@ -927,19 +959,10 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, struct page *page; u8 *vaddr; - if (xsk_skb_destructor_is_addr(skb)) { - xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, - GFP_KERNEL); - if (!xsk_addr) { - err = -ENOMEM; - goto free_err; - } - - xsk_addr->num_descs = 1; - xsk_addr->addrs[0] = xsk_skb_destructor_get_addr(skb); - skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; - } else { - xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; + xsk_addr = xsk_addrs_alloc(skb); + if (!xsk_addr) { + err = -ENOMEM; + goto free_err; } if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) { @@ -964,18 +987,28 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, } } + if (!xs->skb) { + err = xsk_skb_init_misc(skb, xs, desc->addr); + if (unlikely(err)) + goto free_err; + } xsk_inc_num_desc(skb); return skb; free_err: - if (skb && !skb_shinfo(skb)->nr_frags) + if (skb && !xs->skb) kfree_skb(skb); if (err == -EOVERFLOW) { - /* Drop the packet */ - xsk_inc_num_desc(xs->skb); - xsk_drop_skb(xs->skb); + if (xs->skb) { + /* Drop the packet */ + xsk_inc_num_desc(xs->skb); + xsk_drop_skb(xs->skb); + } else { + xsk_cq_cancel_locked(xs->pool, 1); + xs->tx->invalid_descs++; + } xskq_cons_release(xs->tx); } else { /* Let application retry */ diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index cd7bc50872f6..d981cfdd8535 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -175,6 +175,9 @@ int xp_assign_dev(struct xsk_buff_pool *pool, if (force_zc && force_copy) return -EINVAL; + if (pool->tx_sw_csum && (netdev->priv_flags & IFF_TX_SKB_NO_LINEAR)) + return -EOPNOTSUPP; + if (xsk_get_pool_from_qid(netdev, queue_id)) return -EBUSY; |
