diff options
Diffstat (limited to 'include/linux/skbuff.h')
-rw-r--r-- | include/linux/skbuff.h | 460 |
1 files changed, 286 insertions, 174 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5f11f9873341..39f1d16f3628 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -37,6 +37,7 @@ #endif #include <net/net_debug.h> #include <net/dropreason-core.h> +#include <net/netmem.h> /** * DOC: skb checksums @@ -352,14 +353,16 @@ struct sk_buff; #define MAX_SKB_FRAGS CONFIG_MAX_SKB_FRAGS -extern int sysctl_max_skb_frags; - /* Set skb_shinfo(skb)->gso_size to this in case you want skb_segment to * segment using its current segmentation instead. */ #define GSO_BY_FRAGS 0xFFFF -typedef struct bio_vec skb_frag_t; +typedef struct skb_frag { + netmem_ref netmem; + unsigned int len; + unsigned int offset; +} skb_frag_t; /** * skb_frag_size() - Returns the size of a skb fragment @@ -367,7 +370,7 @@ typedef struct bio_vec skb_frag_t; */ static inline unsigned int skb_frag_size(const skb_frag_t *frag) { - return frag->bv_len; + return frag->len; } /** @@ -377,7 +380,7 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag) */ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) { - frag->bv_len = size; + frag->len = size; } /** @@ -387,7 +390,7 @@ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) */ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) { - frag->bv_len += delta; + frag->len += delta; } /** @@ -397,7 +400,7 @@ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) */ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) { - frag->bv_len -= delta; + frag->len -= delta; } /** @@ -417,7 +420,7 @@ static inline bool skb_frag_must_loop(struct page *p) * skb_frag_foreach_page - loop over pages in a fragment * * @f: skb frag to operate on - * @f_off: offset from start of f->bv_page + * @f_off: offset from start of f->netmem * @f_len: length from f_off to loop over * @p: (temp var) current page * @p_off: (temp var) offset from start of current page, @@ -522,6 +525,13 @@ enum { #define SKBFL_ALL_ZEROCOPY (SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY | \ SKBFL_DONT_ORPHAN | SKBFL_MANAGED_FRAG_REFS) +struct ubuf_info_ops { + void (*complete)(struct sk_buff *, struct ubuf_info *, + bool zerocopy_success); + /* has to be compatible with skb_zcopy_set() */ + int (*link_skb)(struct sk_buff *skb, struct ubuf_info *uarg); +}; + /* * The callback notifies userspace to release buffers when skb DMA is done in * lower device, the skb last reference should be 0 when calling this. @@ -531,8 +541,7 @@ enum { * The desc field is used to track userspace buffer index. */ struct ubuf_info { - void (*callback)(struct sk_buff *, struct ubuf_info *, - bool zerocopy_success); + const struct ubuf_info_ops *ops; refcount_t refcnt; u8 flags; }; @@ -566,6 +575,15 @@ struct ubuf_info_msgzc { int mm_account_pinned_pages(struct mmpin *mmp, size_t size); void mm_unaccount_pinned_pages(struct mmpin *mmp); +/* Preserve some data across TX submission and completion. + * + * Note, this state is stored in the driver. Extending the layout + * might need some special care. + */ +struct xsk_tx_metadata_compl { + __u64 *tx_timestamp; +}; + /* This data is invariant across clones and lives at * the end of the header data, ie. at skb->end. */ @@ -578,7 +596,10 @@ struct skb_shared_info { /* Warning: this field is not always filled in (UFO)! */ unsigned short gso_segs; struct sk_buff *frag_list; - struct skb_shared_hwtstamps hwtstamps; + union { + struct skb_shared_hwtstamps hwtstamps; + struct xsk_tx_metadata_compl xsk_meta; + }; unsigned int gso_type; u32 tskey; @@ -685,6 +706,13 @@ typedef unsigned int sk_buff_data_t; typedef unsigned char *sk_buff_data_t; #endif +enum skb_tstamp_type { + SKB_CLOCK_REALTIME, + SKB_CLOCK_MONOTONIC, + SKB_CLOCK_TAI, + __SKB_CLOCK_MAX = SKB_CLOCK_TAI, +}; + /** * DOC: Basic sk_buff geometry * @@ -740,7 +768,6 @@ typedef unsigned char *sk_buff_data_t; * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL * @cb: Control buffer. Free for use by every layer. Put private vars here * @_skb_refdst: destination entry (with norefcount bit) - * @sp: the security path, used for xfrm * @len: Length of actual data * @data_len: Data length * @mac_len: Length of link layer header @@ -774,7 +801,6 @@ typedef unsigned char *sk_buff_data_t; * @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue) * @_sk_redir: socket redirection information for skmsg * @_nfct: Associated connection, if any (with nfctinfo bits) - * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index * @hash: the packet hash @@ -801,13 +827,13 @@ typedef unsigned char *sk_buff_data_t; * @csum_level: indicates the number of consecutive checksums found in * the packet minus one that have been verified as * CHECKSUM_UNNECESSARY (max 3) + * @unreadable: indicates that at least 1 of the fragments in this skb is + * unreadable. * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required - * @mono_delivery_time: When set, skb->tstamp has the - * delivery_time in mono clock base (i.e. EDT). Otherwise, the - * skb->tstamp has the (rcv) timestamp at ingress and - * delivery_time at egress. + * @tstamp_type: When set, skb->tstamp has the + * delivery_time clock base of skb->tstamp. * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS * @alloc_cpu: CPU which did the skb allocation. @@ -935,7 +961,7 @@ struct sk_buff { /* private: */ __u8 __mono_tc_offset[0]; /* public: */ - __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */ + __u8 tstamp_type:2; /* See skb_tstamp_type */ #ifdef CONFIG_NET_XGRESS __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ __u8 tc_skip_classify:1; @@ -977,14 +1003,14 @@ struct sk_buff { #ifdef CONFIG_NETFILTER_SKIP_EGRESS __u8 nf_skip_egress:1; #endif -#ifdef CONFIG_TLS_DEVICE +#ifdef CONFIG_SKB_DECRYPTED __u8 decrypted:1; #endif __u8 slow_gro:1; #if IS_ENABLED(CONFIG_IP_SCTP) __u8 csum_not_inet:1; #endif - + __u8 unreadable:1; #if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS) __u16 tc_index; /* traffic control index */ #endif @@ -1052,7 +1078,7 @@ struct sk_buff { refcount_t users; #ifdef CONFIG_SKB_EXTENSIONS - /* only useable after checking ->active_extensions != 0 */ + /* only usable after checking ->active_extensions != 0 */ struct skb_ext *extensions; #endif }; @@ -1065,15 +1091,16 @@ struct sk_buff { #endif #define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset) -/* if you move tc_at_ingress or mono_delivery_time +/* if you move tc_at_ingress or tstamp_type * around, you also must adapt these constants. */ #ifdef __BIG_ENDIAN_BITFIELD -#define SKB_MONO_DELIVERY_TIME_MASK (1 << 7) -#define TC_AT_INGRESS_MASK (1 << 6) +#define SKB_TSTAMP_TYPE_MASK (3 << 6) +#define SKB_TSTAMP_TYPE_RSHIFT (6) +#define TC_AT_INGRESS_MASK (1 << 5) #else -#define SKB_MONO_DELIVERY_TIME_MASK (1 << 0) -#define TC_AT_INGRESS_MASK (1 << 1) +#define SKB_TSTAMP_TYPE_MASK (3) +#define TC_AT_INGRESS_MASK (1 << 2) #endif #define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset) @@ -1159,15 +1186,6 @@ static inline bool skb_dst_is_noref(const struct sk_buff *skb) return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb); } -/** - * skb_rtable - Returns the skb &rtable - * @skb: buffer - */ -static inline struct rtable *skb_rtable(const struct sk_buff *skb) -{ - return (struct rtable *)skb_dst(skb); -} - /* For mangling skb->pkt_type from user space side from applications * such as nft, tc, etc, we only allow a conservative subset of * possible pkt_types to be set. @@ -1209,7 +1227,7 @@ static inline bool skb_unref(struct sk_buff *skb) { if (unlikely(!skb)) return false; - if (likely(refcount_read(&skb->users) == 1)) + if (!IS_ENABLED(CONFIG_DEBUG_NET) && likely(refcount_read(&skb->users) == 1)) smp_rmb(); else if (likely(!refcount_dec_and_test(&skb->users))) return false; @@ -1217,8 +1235,32 @@ static inline bool skb_unref(struct sk_buff *skb) return true; } -void __fix_address -kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason); +static inline bool skb_data_unref(const struct sk_buff *skb, + struct skb_shared_info *shinfo) +{ + int bias; + + if (!skb->cloned) + return true; + + bias = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; + + if (atomic_read(&shinfo->dataref) == bias) + smp_rmb(); + else if (atomic_sub_return(bias, &shinfo->dataref)) + return false; + + return true; +} + +void __fix_address sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason reason); + +static inline void +kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) +{ + sk_skb_reason_drop(NULL, skb, reason); +} /** * kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason @@ -1251,7 +1293,6 @@ static inline void consume_skb(struct sk_buff *skb) void __consume_stateless_skb(struct sk_buff *skb); void __kfree_skb(struct sk_buff *skb); -extern struct kmem_cache *skbuff_cache; void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, @@ -1394,6 +1435,7 @@ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, unsigned int skb_seq_read(unsigned int consumed, const u8 **data, struct skb_seq_state *st); void skb_abort_seq_read(struct skb_seq_state *st); +int skb_copy_seq_read(struct skb_seq_state *st, int offset, void *to, int len); unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config); @@ -1465,8 +1507,14 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) __skb_set_hash(skb, hash, true, is_l4); } -void __skb_get_hash(struct sk_buff *skb); -u32 __skb_get_hash_symmetric(const struct sk_buff *skb); +u32 __skb_get_hash_symmetric_net(const struct net *net, const struct sk_buff *skb); + +static inline u32 __skb_get_hash_symmetric(const struct sk_buff *skb) +{ + return __skb_get_hash_symmetric_net(NULL, skb); +} + +void __skb_get_hash_net(const struct net *net, struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, const void *data, const struct flow_keys_basic *keys, int hlen); @@ -1545,10 +1593,18 @@ void skb_flow_dissect_hash(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container); +static inline __u32 skb_get_hash_net(const struct net *net, struct sk_buff *skb) +{ + if (!skb->l4_hash && !skb->sw_hash) + __skb_get_hash_net(net, skb); + + return skb->hash; +} + static inline __u32 skb_get_hash(struct sk_buff *skb) { if (!skb->l4_hash && !skb->sw_hash) - __skb_get_hash(skb); + __skb_get_hash_net(NULL, skb); return skb->hash; } @@ -1583,17 +1639,26 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) static inline int skb_cmp_decrypted(const struct sk_buff *skb1, const struct sk_buff *skb2) { -#ifdef CONFIG_TLS_DEVICE +#ifdef CONFIG_SKB_DECRYPTED return skb2->decrypted - skb1->decrypted; #else return 0; #endif } +static inline bool skb_is_decrypted(const struct sk_buff *skb) +{ +#ifdef CONFIG_SKB_DECRYPTED + return skb->decrypted; +#else + return false; +#endif +} + static inline void skb_copy_decrypted(struct sk_buff *to, const struct sk_buff *from) { -#ifdef CONFIG_TLS_DEVICE +#ifdef CONFIG_SKB_DECRYPTED to->decrypted = from->decrypted; #endif } @@ -1630,18 +1695,20 @@ static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) } #endif +extern const struct ubuf_info_ops msg_zerocopy_ubuf_ops; + struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, struct ubuf_info *uarg); void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); -void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, - bool success); - int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, struct iov_iter *from, size_t length); +int zerocopy_fill_skb_from_iter(struct sk_buff *skb, + struct iov_iter *from, size_t length); + static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) { @@ -1725,13 +1792,13 @@ static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb) static inline void net_zcopy_put(struct ubuf_info *uarg) { if (uarg) - uarg->callback(NULL, uarg, true); + uarg->ops->complete(NULL, uarg, true); } static inline void net_zcopy_put_abort(struct ubuf_info *uarg, bool have_uref) { if (uarg) { - if (uarg->callback == msg_zerocopy_callback) + if (uarg->ops == &msg_zerocopy_ubuf_ops) msg_zerocopy_put_abort(uarg, have_uref); else if (have_uref) net_zcopy_put(uarg); @@ -1745,7 +1812,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success) if (uarg) { if (!skb_zcopy_is_nouarg(skb)) - uarg->callback(skb, uarg, zerocopy_success); + uarg->ops->complete(skb, uarg, zerocopy_success); skb_shinfo(skb)->flags &= ~SKBFL_ALL_ZEROCOPY; } @@ -1759,6 +1826,12 @@ static inline void skb_zcopy_downgrade_managed(struct sk_buff *skb) __skb_zcopy_downgrade_managed(skb); } +/* Return true if frags in this skb are readable by the host. */ +static inline bool skb_frags_readable(const struct sk_buff *skb) +{ + return !skb->unreadable; +} + static inline void skb_mark_not_on_list(struct sk_buff *skb) { skb->next = NULL; @@ -2414,22 +2487,37 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb) return skb_headlen(skb) + __skb_pagelen(skb); } +static inline void skb_frag_fill_netmem_desc(skb_frag_t *frag, + netmem_ref netmem, int off, + int size) +{ + frag->netmem = netmem; + frag->offset = off; + skb_frag_size_set(frag, size); +} + static inline void skb_frag_fill_page_desc(skb_frag_t *frag, struct page *page, int off, int size) { - frag->bv_page = page; - frag->bv_offset = off; - skb_frag_size_set(frag, size); + skb_frag_fill_netmem_desc(frag, page_to_netmem(page), off, size); +} + +static inline void __skb_fill_netmem_desc_noacc(struct skb_shared_info *shinfo, + int i, netmem_ref netmem, + int off, int size) +{ + skb_frag_t *frag = &shinfo->frags[i]; + + skb_frag_fill_netmem_desc(frag, netmem, off, size); } static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo, int i, struct page *page, int off, int size) { - skb_frag_t *frag = &shinfo->frags[i]; - - skb_frag_fill_page_desc(frag, page, off, size); + __skb_fill_netmem_desc_noacc(shinfo, i, page_to_netmem(page), off, + size); } /** @@ -2445,10 +2533,10 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) } /** - * __skb_fill_page_desc - initialise a paged fragment in an skb + * __skb_fill_netmem_desc - initialise a fragment in an skb * @skb: buffer containing fragment to be initialised - * @i: paged fragment index to initialise - * @page: the page to use for this fragment + * @i: fragment index to initialise + * @netmem: the netmem to use for this fragment * @off: the offset to the data with @page * @size: the length of the data * @@ -2457,10 +2545,19 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) * * Does not take any additional reference on the fragment. */ -static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, - struct page *page, int off, int size) +static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i, + netmem_ref netmem, int off, int size) { - __skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size); + struct page *page; + + __skb_fill_netmem_desc_noacc(skb_shinfo(skb), i, netmem, off, size); + + if (netmem_is_net_iov(netmem)) { + skb->unreadable = true; + return; + } + + page = netmem_to_page(netmem); /* Propagate page pfmemalloc to the skb if we can. The problem is * that not all callers have unique ownership of the page but rely @@ -2468,7 +2565,20 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, */ page = compound_head(page); if (page_is_pfmemalloc(page)) - skb->pfmemalloc = true; + skb->pfmemalloc = true; +} + +static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, + struct page *page, int off, int size) +{ + __skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); +} + +static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i, + netmem_ref netmem, int off, int size) +{ + __skb_fill_netmem_desc(skb, i, netmem, off, size); + skb_shinfo(skb)->nr_frags = i + 1; } /** @@ -2488,8 +2598,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, static inline void skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size) { - __skb_fill_page_desc(skb, i, page, off, size); - skb_shinfo(skb)->nr_frags = i + 1; + skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); } /** @@ -2513,8 +2622,16 @@ static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i, shinfo->nr_frags = i + 1; } -void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, - int size, unsigned int truesize); +void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, + int off, int size, unsigned int truesize); + +static inline void skb_add_rx_frag(struct sk_buff *skb, int i, + struct page *page, int off, int size, + unsigned int truesize) +{ + skb_add_rx_frag_netmem(skb, i, page_to_netmem(page), off, size, + truesize); +} void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, unsigned int truesize); @@ -2994,6 +3111,7 @@ static inline int skb_transport_offset(const struct sk_buff *skb) static inline u32 skb_network_header_len(const struct sk_buff *skb) { + DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb)); return skb->transport_header - skb->network_header; } @@ -3295,13 +3413,7 @@ static inline void *napi_alloc_frag_align(unsigned int fragsz, return __napi_alloc_frag_align(fragsz, -align); } -struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, - unsigned int length, gfp_t gfp_mask); -static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi, - unsigned int length) -{ - return __napi_alloc_skb(napi, length, GFP_ATOMIC); -} +struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int length); void napi_consume_skb(struct sk_buff *skb, int budget); void napi_skb_free_stolen_head(struct sk_buff *skb); @@ -3316,11 +3428,11 @@ void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason); * * %NULL is returned if there is no free memory. */ -static inline struct page *__dev_alloc_pages(gfp_t gfp_mask, +static inline struct page *__dev_alloc_pages_noprof(gfp_t gfp_mask, unsigned int order) { /* This piece of code contains several assumptions. - * 1. This is for device Rx, therefor a cold page is preferred. + * 1. This is for device Rx, therefore a cold page is preferred. * 2. The expectation is the user wants a compound page. * 3. If requesting a order 0 page it will not be compound * due to the check to see if order has a value in prep_new_page @@ -3329,13 +3441,15 @@ static inline struct page *__dev_alloc_pages(gfp_t gfp_mask, */ gfp_mask |= __GFP_COMP | __GFP_MEMALLOC; - return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); + return alloc_pages_node_noprof(NUMA_NO_NODE, gfp_mask, order); } +#define __dev_alloc_pages(...) alloc_hooks(__dev_alloc_pages_noprof(__VA_ARGS__)) -static inline struct page *dev_alloc_pages(unsigned int order) -{ - return __dev_alloc_pages(GFP_ATOMIC | __GFP_NOWARN, order); -} +/* + * This specialized allocator has to be a macro for its allocations to be + * accounted separately (to have a separate alloc_tag). + */ +#define dev_alloc_pages(_order) __dev_alloc_pages(GFP_ATOMIC | __GFP_NOWARN, _order) /** * __dev_alloc_page - allocate a page for network Rx @@ -3345,15 +3459,17 @@ static inline struct page *dev_alloc_pages(unsigned int order) * * %NULL is returned if there is no free memory. */ -static inline struct page *__dev_alloc_page(gfp_t gfp_mask) +static inline struct page *__dev_alloc_page_noprof(gfp_t gfp_mask) { - return __dev_alloc_pages(gfp_mask, 0); + return __dev_alloc_pages_noprof(gfp_mask, 0); } +#define __dev_alloc_page(...) alloc_hooks(__dev_alloc_page_noprof(__VA_ARGS__)) -static inline struct page *dev_alloc_page(void) -{ - return dev_alloc_pages(0); -} +/* + * This specialized allocator has to be a macro for its allocations to be + * accounted separately (to have a separate alloc_tag). + */ +#define dev_alloc_page() dev_alloc_pages(0) /** * dev_page_is_reusable - check whether a page can be reused for network Rx @@ -3389,7 +3505,7 @@ static inline void skb_propagate_pfmemalloc(const struct page *page, */ static inline unsigned int skb_frag_off(const skb_frag_t *frag) { - return frag->bv_offset; + return frag->offset; } /** @@ -3399,7 +3515,7 @@ static inline unsigned int skb_frag_off(const skb_frag_t *frag) */ static inline void skb_frag_off_add(skb_frag_t *frag, int delta) { - frag->bv_offset += delta; + frag->offset += delta; } /** @@ -3409,7 +3525,7 @@ static inline void skb_frag_off_add(skb_frag_t *frag, int delta) */ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) { - frag->bv_offset = offset; + frag->offset = offset; } /** @@ -3420,95 +3536,61 @@ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) static inline void skb_frag_off_copy(skb_frag_t *fragto, const skb_frag_t *fragfrom) { - fragto->bv_offset = fragfrom->bv_offset; -} - -/** - * skb_frag_page - retrieve the page referred to by a paged fragment - * @frag: the paged fragment - * - * Returns the &struct page associated with @frag. - */ -static inline struct page *skb_frag_page(const skb_frag_t *frag) -{ - return frag->bv_page; + fragto->offset = fragfrom->offset; } -/** - * __skb_frag_ref - take an addition reference on a paged fragment. - * @frag: the paged fragment - * - * Takes an additional reference on the paged fragment @frag. - */ -static inline void __skb_frag_ref(skb_frag_t *frag) +/* Return: true if the skb_frag contains a net_iov. */ +static inline bool skb_frag_is_net_iov(const skb_frag_t *frag) { - get_page(skb_frag_page(frag)); + return netmem_is_net_iov(frag->netmem); } /** - * skb_frag_ref - take an addition reference on a paged fragment of an skb. - * @skb: the buffer - * @f: the fragment offset. + * skb_frag_net_iov - retrieve the net_iov referred to by fragment + * @frag: the fragment * - * Takes an additional reference on the @f'th paged fragment of @skb. + * Return: the &struct net_iov associated with @frag. Returns NULL if this + * frag has no associated net_iov. */ -static inline void skb_frag_ref(struct sk_buff *skb, int f) -{ - __skb_frag_ref(&skb_shinfo(skb)->frags[f]); -} - -bool napi_pp_put_page(struct page *page, bool napi_safe); - -static inline void -skb_page_unref(const struct sk_buff *skb, struct page *page, bool napi_safe) -{ -#ifdef CONFIG_PAGE_POOL - if (skb->pp_recycle && napi_pp_put_page(page, napi_safe)) - return; -#endif - put_page(page); -} - -static inline void -napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe) +static inline struct net_iov *skb_frag_net_iov(const skb_frag_t *frag) { - struct page *page = skb_frag_page(frag); + if (!skb_frag_is_net_iov(frag)) + return NULL; -#ifdef CONFIG_PAGE_POOL - if (recycle && napi_pp_put_page(page, napi_safe)) - return; -#endif - put_page(page); + return netmem_to_net_iov(frag->netmem); } /** - * __skb_frag_unref - release a reference on a paged fragment. + * skb_frag_page - retrieve the page referred to by a paged fragment * @frag: the paged fragment - * @recycle: recycle the page if allocated via page_pool * - * Releases a reference on the paged fragment @frag - * or recycles the page via the page_pool API. + * Return: the &struct page associated with @frag. Returns NULL if this frag + * has no associated page. */ -static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle) +static inline struct page *skb_frag_page(const skb_frag_t *frag) { - napi_frag_unref(frag, recycle, false); + if (skb_frag_is_net_iov(frag)) + return NULL; + + return netmem_to_page(frag->netmem); } /** - * skb_frag_unref - release a reference on a paged fragment of an skb. - * @skb: the buffer - * @f: the fragment offset + * skb_frag_netmem - retrieve the netmem referred to by a fragment + * @frag: the fragment * - * Releases a reference on the @f'th paged fragment of @skb. + * Return: the &netmem_ref associated with @frag. */ -static inline void skb_frag_unref(struct sk_buff *skb, int f) +static inline netmem_ref skb_frag_netmem(const skb_frag_t *frag) { - struct skb_shared_info *shinfo = skb_shinfo(skb); - - if (!skb_zcopy_managed(skb)) - __skb_frag_unref(&shinfo->frags[f], skb->pp_recycle); + return frag->netmem; } +int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, + unsigned int headroom); +int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, + struct bpf_prog *prog); + /** * skb_frag_address - gets the address of the data contained in a paged fragment * @frag: the paged fragment buffer @@ -3518,6 +3600,9 @@ static inline void skb_frag_unref(struct sk_buff *skb, int f) */ static inline void *skb_frag_address(const skb_frag_t *frag) { + if (!skb_frag_page(frag)) + return NULL; + return page_address(skb_frag_page(frag)) + skb_frag_off(frag); } @@ -3545,7 +3630,7 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag) static inline void skb_frag_page_copy(skb_frag_t *fragto, const skb_frag_t *fragfrom) { - fragto->bv_page = fragfrom->bv_page; + fragto->netmem = fragfrom->netmem; } bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); @@ -3710,6 +3795,9 @@ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int l return __skb_put_padto(skb, len, true); } +bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) + __must_check; + static inline int skb_add_data(struct sk_buff *skb, struct iov_iter *from, int copy) { @@ -3996,12 +4084,6 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); void skb_free_datagram(struct sock *sk, struct sk_buff *skb); -void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len); -static inline void skb_free_datagram_locked(struct sock *sk, - struct sk_buff *skb) -{ - __skb_free_datagram_locked(sk, skb, 0); -} int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); @@ -4025,6 +4107,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features unsigned int offset); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len); +int skb_ensure_writable_head_tail(struct sk_buff *skb, struct net_device *dev); int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); @@ -4189,7 +4272,7 @@ static inline void skb_get_new_timestampns(const struct sk_buff *skb, static inline void __net_timestamp(struct sk_buff *skb) { skb->tstamp = ktime_get_real(); - skb->mono_delivery_time = 0; + skb->tstamp_type = SKB_CLOCK_REALTIME; } static inline ktime_t net_timedelta(ktime_t t) @@ -4198,10 +4281,36 @@ static inline ktime_t net_timedelta(ktime_t t) } static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt, - bool mono) + u8 tstamp_type) { skb->tstamp = kt; - skb->mono_delivery_time = kt && mono; + + if (kt) + skb->tstamp_type = tstamp_type; + else + skb->tstamp_type = SKB_CLOCK_REALTIME; +} + +static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb, + ktime_t kt, clockid_t clockid) +{ + u8 tstamp_type = SKB_CLOCK_REALTIME; + + switch (clockid) { + case CLOCK_REALTIME: + break; + case CLOCK_MONOTONIC: + tstamp_type = SKB_CLOCK_MONOTONIC; + break; + case CLOCK_TAI: + tstamp_type = SKB_CLOCK_TAI; + break; + default: + WARN_ON_ONCE(1); + kt = 0; + } + + skb_set_delivery_time(skb, kt, tstamp_type); } DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); @@ -4211,8 +4320,8 @@ DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); */ static inline void skb_clear_delivery_time(struct sk_buff *skb) { - if (skb->mono_delivery_time) { - skb->mono_delivery_time = 0; + if (skb->tstamp_type) { + skb->tstamp_type = SKB_CLOCK_REALTIME; if (static_branch_unlikely(&netstamp_needed_key)) skb->tstamp = ktime_get_real(); else @@ -4222,7 +4331,7 @@ static inline void skb_clear_delivery_time(struct sk_buff *skb) static inline void skb_clear_tstamp(struct sk_buff *skb) { - if (skb->mono_delivery_time) + if (skb->tstamp_type) return; skb->tstamp = 0; @@ -4230,7 +4339,7 @@ static inline void skb_clear_tstamp(struct sk_buff *skb) static inline ktime_t skb_tstamp(const struct sk_buff *skb) { - if (skb->mono_delivery_time) + if (skb->tstamp_type) return 0; return skb->tstamp; @@ -4238,7 +4347,7 @@ static inline ktime_t skb_tstamp(const struct sk_buff *skb) static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond) { - if (!skb->mono_delivery_time && skb->tstamp) + if (skb->tstamp_type != SKB_CLOCK_MONOTONIC && skb->tstamp) return skb->tstamp; if (static_branch_unlikely(&netstamp_needed_key) || cond) @@ -4263,10 +4372,13 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, { const void *a = skb_metadata_end(skb_a); const void *b = skb_metadata_end(skb_b); - /* Using more efficient varaiant than plain call to memcmp(). */ -#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 u64 diffs = 0; + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + BITS_PER_LONG != 64) + goto slow; + + /* Using more efficient variant than plain call to memcmp(). */ switch (meta_len) { #define __it(x, op) (x -= sizeof(u##op)) #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op)) @@ -4286,11 +4398,11 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, fallthrough; case 4: diffs |= __it_diff(a, b, 32); break; + default: +slow: + return memcmp(a - meta_len, b - meta_len, meta_len); } return diffs; -#else - return memcmp(a - meta_len, b - meta_len, meta_len); -#endif } static inline bool skb_metadata_differs(const struct sk_buff *skb_a, |