diff options
author | Jakub Kicinski <kuba@kernel.org> | 2024-04-12 05:29:27 +0300 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2024-04-12 05:29:28 +0300 |
commit | ef4ba01181710c589c1e923a2e760090281d8750 (patch) | |
tree | 1cf42254477023e515e9ab8287988b3c0deba39f | |
parent | 94426ed2137a948d212302df571445d4245772b9 (diff) | |
parent | a580ea994fd37f4105028f5a85c38ff6508a2b25 (diff) | |
download | linux-ef4ba01181710c589c1e923a2e760090281d8750.tar.xz |
Merge branch 'minor-cleanups-to-skb-frag-ref-unref'
Mina Almasry says:
====================
Minor cleanups to skb frag ref/unref
This series is largely motivated by a recent discussion where there was
some confusion on how to properly ref/unref pp pages vs non pp pages:
https://lore.kernel.org/netdev/CAHS8izOoO-EovwMwAm9tLYetwikNPxC0FKyVGu1TPJWSz4bGoA@mail.gmail.com/T/#t
There is some subtely there because pp uses page->pp_ref_count for
refcounting, while non-pp uses get_page()/put_page() for ref counting.
Getting the refcounting pairs wrong can lead to kernel crash.
Additionally currently it may not be obvious to skb users unaware of
page pool internals how to properly acquire a ref on a pp frag. It
requires checking of skb->pp_recycle & is_pp_page() to make the correct
calls and may require some handling at the call site aware of arguable pp
internals.
This series is a minor refactor with a couple of goals:
1. skb users should be able to ref/unref a frag using
[__]skb_frag_[un]ref() functions without needing to understand pp
concepts and pp_ref_count vs get/put_page() differences.
2. reference counting functions should have a mirror opposite. I.e. there
should be a foo_unref() to every foo_ref() with a mirror opposite
implementation (as much as possible).
This is RFC to collect feedback if this change is desirable, but also so
that I don't race with the fix for the issue Dragos is seeing for his
crash.
https://lore.kernel.org/lkml/CAHS8izN436pn3SndrzsCyhmqvJHLyxgCeDpWXA4r1ANt3RCDLQ@mail.gmail.com/T/
====================
Link: https://lore.kernel.org/r/20240410190505.1225848-1-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r-- | drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/marvell/sky2.c | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_rx.c | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/sun/cassini.c | 5 | ||||
-rw-r--r-- | drivers/net/veth.c | 3 | ||||
-rw-r--r-- | drivers/net/xen-netback/netback.c | 1 | ||||
-rw-r--r-- | include/linux/skbuff.h | 63 | ||||
-rw-r--r-- | include/linux/skbuff_ref.h | 106 | ||||
-rw-r--r-- | net/core/gro.c | 1 | ||||
-rw-r--r-- | net/core/skbuff.c | 47 | ||||
-rw-r--r-- | net/ipv4/esp4.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 1 | ||||
-rw-r--r-- | net/ipv6/esp6.c | 1 | ||||
-rw-r--r-- | net/tls/tls_device.c | 1 | ||||
-rw-r--r-- | net/tls/tls_device_fallback.c | 3 | ||||
-rw-r--r-- | net/tls/tls_strp.c | 1 |
16 files changed, 129 insertions, 110 deletions
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 6482728794dd..3832c2e8ea5a 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -10,6 +10,7 @@ #include <net/ipv6.h> #include <linux/netdevice.h> #include <crypto/aes.h> +#include <linux/skbuff_ref.h> #include "chcr_ktls.h" static LIST_HEAD(uld_ctx_list); @@ -1658,7 +1659,7 @@ static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb, for (i = 0; i < record->num_frags; i++) { skb_shinfo(nskb)->frags[i] = record->frags[i]; /* increase the frag ref count */ - __skb_frag_ref(&skb_shinfo(nskb)->frags[i]); + __skb_frag_ref(&skb_shinfo(nskb)->frags[i], nskb->pp_recycle); } skb_shinfo(nskb)->nr_frags = record->num_frags; diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 07720841a8d7..f3f7f4cc27b3 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -34,6 +34,7 @@ #include <linux/mii.h> #include <linux/of_net.h> #include <linux/dmi.h> +#include <linux/skbuff_ref.h> #include <asm/irq.h> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index eac49657bd07..8328df8645d5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -42,6 +42,7 @@ #include <linux/if_vlan.h> #include <linux/vmalloc.h> #include <linux/irq.h> +#include <linux/skbuff_ref.h> #include <net/ip.h> #if IS_ENABLED(CONFIG_IPV6) diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index bfb903506367..f058e154a3bc 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -73,6 +73,7 @@ #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> +#include <linux/skbuff_ref.h> #include <linux/ethtool.h> #include <linux/crc32.h> #include <linux/random.h> @@ -1999,7 +2000,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, skb->len += hlen - swivel; skb_frag_fill_page_desc(frag, page->buffer, off, hlen - swivel); - __skb_frag_ref(frag); + __skb_frag_ref(frag, skb->pp_recycle); /* any more data? */ if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) { @@ -2023,7 +2024,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, frag++; skb_frag_fill_page_desc(frag, page->buffer, 0, hlen); - __skb_frag_ref(frag); + __skb_frag_ref(frag, skb->pp_recycle); RX_USED_ADD(page, hlen + cp->crc_size); } diff --git a/drivers/net/veth.c b/drivers/net/veth.c index bcdfbf61eb66..0b0293629329 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -26,6 +26,7 @@ #include <linux/ptr_ring.h> #include <linux/bpf_trace.h> #include <linux/net_tstamp.h> +#include <linux/skbuff_ref.h> #include <net/page_pool/helpers.h> #define DRV_NAME "veth" @@ -716,7 +717,7 @@ static void veth_xdp_get(struct xdp_buff *xdp) return; for (i = 0; i < sinfo->nr_frags; i++) - __skb_frag_ref(&sinfo->frags[i]); + __skb_frag_ref(&sinfo->frags[i], false); } static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq, diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index ef76850d9bcd..48254fc07d64 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -38,6 +38,7 @@ #include <linux/if_vlan.h> #include <linux/udp.h> #include <linux/highmem.h> +#include <linux/skbuff_ref.h> #include <net/tcp.h> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7135a3e94afd..4072a7ee3859 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3492,73 +3492,10 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag) return netmem_to_page(frag->netmem); } -/** - * __skb_frag_ref - take an addition reference on a paged fragment. - * @frag: the paged fragment - * - * Takes an additional reference on the paged fragment @frag. - */ -static inline void __skb_frag_ref(skb_frag_t *frag) -{ - get_page(skb_frag_page(frag)); -} - -/** - * skb_frag_ref - take an addition reference on a paged fragment of an skb. - * @skb: the buffer - * @f: the fragment offset. - * - * Takes an additional reference on the @f'th paged fragment of @skb. - */ -static inline void skb_frag_ref(struct sk_buff *skb, int f) -{ - __skb_frag_ref(&skb_shinfo(skb)->frags[f]); -} - int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom); int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, struct bpf_prog *prog); -bool napi_pp_put_page(struct page *page); - -static inline void -skb_page_unref(struct page *page, bool recycle) -{ -#ifdef CONFIG_PAGE_POOL - if (recycle && napi_pp_put_page(page)) - return; -#endif - put_page(page); -} - -/** - * __skb_frag_unref - release a reference on a paged fragment. - * @frag: the paged fragment - * @recycle: recycle the page if allocated via page_pool - * - * Releases a reference on the paged fragment @frag - * or recycles the page via the page_pool API. - */ -static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle) -{ - skb_page_unref(skb_frag_page(frag), recycle); -} - -/** - * skb_frag_unref - release a reference on a paged fragment of an skb. - * @skb: the buffer - * @f: the fragment offset - * - * Releases a reference on the @f'th paged fragment of @skb. - */ -static inline void skb_frag_unref(struct sk_buff *skb, int f) -{ - struct skb_shared_info *shinfo = skb_shinfo(skb); - - if (!skb_zcopy_managed(skb)) - __skb_frag_unref(&shinfo->frags[f], skb->pp_recycle); -} - /** * skb_frag_address - gets the address of the data contained in a paged fragment * @frag: the paged fragment buffer diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h new file mode 100644 index 000000000000..4dcdbe9fbc5f --- /dev/null +++ b/include/linux/skbuff_ref.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Skb ref helpers. + * + */ + +#ifndef _LINUX_SKBUFF_REF_H +#define _LINUX_SKBUFF_REF_H + +#include <linux/skbuff.h> +#include <net/page_pool/helpers.h> + +#ifdef CONFIG_PAGE_POOL +static inline bool is_pp_page(struct page *page) +{ + return (page->pp_magic & ~0x3UL) == PP_SIGNATURE; +} + +static inline bool napi_pp_get_page(struct page *page) +{ + page = compound_head(page); + + if (!is_pp_page(page)) + return false; + + page_pool_ref_page(page); + return true; +} +#endif + +static inline void skb_page_ref(struct page *page, bool recycle) +{ +#ifdef CONFIG_PAGE_POOL + if (recycle && napi_pp_get_page(page)) + return; +#endif + get_page(page); +} + +/** + * __skb_frag_ref - take an addition reference on a paged fragment. + * @frag: the paged fragment + * @recycle: skb->pp_recycle param of the parent skb. False if no parent skb. + * + * Takes an additional reference on the paged fragment @frag. Obtains the + * correct reference count depending on whether skb->pp_recycle is set and + * whether the frag is a page pool frag. + */ +static inline void __skb_frag_ref(skb_frag_t *frag, bool recycle) +{ + skb_page_ref(skb_frag_page(frag), recycle); +} + +/** + * skb_frag_ref - take an addition reference on a paged fragment of an skb. + * @skb: the buffer + * @f: the fragment offset. + * + * Takes an additional reference on the @f'th paged fragment of @skb. + */ +static inline void skb_frag_ref(struct sk_buff *skb, int f) +{ + __skb_frag_ref(&skb_shinfo(skb)->frags[f], skb->pp_recycle); +} + +bool napi_pp_put_page(struct page *page); + +static inline void +skb_page_unref(struct page *page, bool recycle) +{ +#ifdef CONFIG_PAGE_POOL + if (recycle && napi_pp_put_page(page)) + return; +#endif + put_page(page); +} + +/** + * __skb_frag_unref - release a reference on a paged fragment. + * @frag: the paged fragment + * @recycle: recycle the page if allocated via page_pool + * + * Releases a reference on the paged fragment @frag + * or recycles the page via the page_pool API. + */ +static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle) +{ + skb_page_unref(skb_frag_page(frag), recycle); +} + +/** + * skb_frag_unref - release a reference on a paged fragment of an skb. + * @skb: the buffer + * @f: the fragment offset + * + * Releases a reference on the @f'th paged fragment of @skb. + */ +static inline void skb_frag_unref(struct sk_buff *skb, int f) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + + if (!skb_zcopy_managed(skb)) + __skb_frag_unref(&shinfo->frags[f], skb->pp_recycle); +} + +#endif /* _LINUX_SKBUFF_REF_H */ diff --git a/net/core/gro.c b/net/core/gro.c index 83f35d99a682..2459ab697f7f 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -3,6 +3,7 @@ #include <net/dst_metadata.h> #include <net/busy_poll.h> #include <trace/events/net.h> +#include <linux/skbuff_ref.h> #define MAX_GRO_SKBS 8 diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ab970ded8a7b..ea052fa710d8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -51,6 +51,7 @@ #endif #include <linux/string.h> #include <linux/skbuff.h> +#include <linux/skbuff_ref.h> #include <linux/splice.h> #include <linux/cache.h> #include <linux/rtnetlink.h> @@ -906,11 +907,6 @@ static void skb_clone_fraglist(struct sk_buff *skb) skb_get(list); } -static bool is_pp_page(struct page *page) -{ - return (page->pp_magic & ~0x3UL) == PP_SIGNATURE; -} - int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom) { @@ -1032,37 +1028,6 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data) return napi_pp_put_page(virt_to_page(data)); } -/** - * skb_pp_frag_ref() - Increase fragment references of a page pool aware skb - * @skb: page pool aware skb - * - * Increase the fragment reference count (pp_ref_count) of a skb. This is - * intended to gain fragment references only for page pool aware skbs, - * i.e. when skb->pp_recycle is true, and not for fragments in a - * non-pp-recycling skb. It has a fallback to increase references on normal - * pages, as page pool aware skbs may also have normal page fragments. - */ -static int skb_pp_frag_ref(struct sk_buff *skb) -{ - struct skb_shared_info *shinfo; - struct page *head_page; - int i; - - if (!skb->pp_recycle) - return -EINVAL; - - shinfo = skb_shinfo(skb); - - for (i = 0; i < shinfo->nr_frags; i++) { - head_page = compound_head(skb_frag_page(&shinfo->frags[i])); - if (likely(is_pp_page(head_page))) - page_pool_ref_page(head_page); - else - page_ref_inc(head_page); - } - return 0; -} - static void skb_kfree_head(void *head, unsigned int end_offset) { if (end_offset == SKB_SMALL_HEAD_HEADROOM) @@ -4175,7 +4140,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) to++; } else { - __skb_frag_ref(fragfrom); + __skb_frag_ref(fragfrom, skb->pp_recycle); skb_frag_page_copy(fragto, fragfrom); skb_frag_off_copy(fragto, fragfrom); skb_frag_size_set(fragto, todo); @@ -4825,7 +4790,7 @@ normal: } *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag; - __skb_frag_ref(nskb_frag); + __skb_frag_ref(nskb_frag, nskb->pp_recycle); size = skb_frag_size(nskb_frag); if (pos < offset) { @@ -5956,10 +5921,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, /* if the skb is not cloned this does nothing * since we set nr_frags to 0. */ - if (skb_pp_frag_ref(from)) { - for (i = 0; i < from_shinfo->nr_frags; i++) - __skb_frag_ref(&from_shinfo->frags[i]); - } + for (i = 0; i < from_shinfo->nr_frags; i++) + __skb_frag_ref(&from_shinfo->frags[i], from->pp_recycle); to->truesize += delta; to->len += len; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 40330253f076..dff04580318f 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -20,6 +20,7 @@ #include <net/udp.h> #include <net/tcp.h> #include <net/espintcp.h> +#include <linux/skbuff_ref.h> #include <linux/highmem.h> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9282fafc0e61..61119d42b0fd 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -44,6 +44,7 @@ #include <linux/gfp.h> #include <linux/module.h> #include <linux/static_key.h> +#include <linux/skbuff_ref.h> #include <trace/events/tcp.h> diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index fb431d0a3475..6bc0a84c8d05 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -36,6 +36,7 @@ #include <net/tcp.h> #include <net/espintcp.h> #include <net/inet6_hashtables.h> +#include <linux/skbuff_ref.h> #include <linux/highmem.h> diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index bf8ed36b1ad6..ab6e694f7bc2 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -37,6 +37,7 @@ #include <net/inet_connection_sock.h> #include <net/tcp.h> #include <net/tls.h> +#include <linux/skbuff_ref.h> #include "tls.h" #include "trace.h" diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 4e7228f275fa..9237dded4467 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -33,6 +33,7 @@ #include <crypto/aead.h> #include <crypto/scatterwalk.h> #include <net/ip6_checksum.h> +#include <linux/skbuff_ref.h> #include "tls.h" @@ -277,7 +278,7 @@ static int fill_sg_in(struct scatterlist *sg_in, for (i = 0; remaining > 0; i++) { skb_frag_t *frag = &record->frags[i]; - __skb_frag_ref(frag); + __skb_frag_ref(frag, false); sg_set_page(sg_in + i, skb_frag_page(frag), skb_frag_size(frag), skb_frag_off(frag)); diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index ca1e0e198ceb..58c4b06f4f0c 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -2,6 +2,7 @@ /* Copyright (c) 2016 Tom Herbert <tom@herbertland.com> */ #include <linux/skbuff.h> +#include <linux/skbuff_ref.h> #include <linux/workqueue.h> #include <net/strparser.h> #include <net/tcp.h> |