diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2025-11-11 03:43:51 +0300 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2025-11-11 03:43:51 +0300 |
| commit | 7fc2bf8d30beebceac5841ad2227e0bd41abdf27 (patch) | |
| tree | 2c15b4330839599ba6867a144102f9c0164f6cf4 /include | |
| parent | 38f073a71e85c726b09f935e7886de72bc57b15b (diff) | |
| parent | 67f4cfb530150387dedc13bac7e2ab7f1a525d7f (diff) | |
| download | linux-7fc2bf8d30beebceac5841ad2227e0bd41abdf27.tar.xz | |
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Martin KaFai Lau says:
====================
pull-request: bpf-next 2025-11-10
We've added 19 non-merge commits during the last 3 day(s) which contain
a total of 22 files changed, 1345 insertions(+), 197 deletions(-).
The main changes are:
1) Preserve skb metadata after a TC BPF program has changed the skb,
from Jakub Sitnicki.
This allows a TC program at the end of a TC filter chain to still see
the skb metadata, even if another TC program at the front of the chain
has changed the skb using BPF helpers.
2) Initial af_smc bpf_struct_ops support to control the smc specific
syn/synack options, from D. Wythe.
* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next:
bpf/selftests: Add selftest for bpf_smc_hs_ctrl
net/smc: bpf: Introduce generic hook for handshake flow
bpf: Export necessary symbols for modules with struct_ops
selftests/bpf: Cover skb metadata access after bpf_skb_change_proto
selftests/bpf: Cover skb metadata access after change_head/tail helper
selftests/bpf: Cover skb metadata access after bpf_skb_adjust_room
selftests/bpf: Cover skb metadata access after vlan push/pop helper
selftests/bpf: Expect unclone to preserve skb metadata
selftests/bpf: Dump skb metadata on verification failure
selftests/bpf: Verify skb metadata in BPF instead of userspace
bpf: Make bpf_skb_change_head helper metadata-safe
bpf: Make bpf_skb_change_proto helper metadata-safe
bpf: Make bpf_skb_adjust_room metadata-safe
bpf: Make bpf_skb_vlan_push helper metadata-safe
bpf: Make bpf_skb_vlan_pop helper metadata-safe
vlan: Make vlan_remove_tag return nothing
bpf: Unclone skb head on bpf_dynptr_write to skb metadata
net: Preserve metadata on pskb_expand_head
net: Helper to move packet data and metadata after skb_push/pull
====================
Link: https://patch.msgid.link/20251110232427.3929291-1-martin.lau@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/filter.h | 9 | ||||
| -rw-r--r-- | include/linux/if_vlan.h | 13 | ||||
| -rw-r--r-- | include/linux/skbuff.h | 75 | ||||
| -rw-r--r-- | include/net/netns/smc.h | 3 | ||||
| -rw-r--r-- | include/net/smc.h | 53 |
5 files changed, 146 insertions, 7 deletions
diff --git a/include/linux/filter.h b/include/linux/filter.h index e116de7edc58..a104b3994230 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1781,6 +1781,8 @@ int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len); void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len); void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf, unsigned long len, bool flush); +int __bpf_skb_meta_store_bytes(struct sk_buff *skb, u32 offset, + const void *from, u32 len, u64 flags); void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset); #else /* CONFIG_NET */ static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, @@ -1817,6 +1819,13 @@ static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, voi { } +static inline int __bpf_skb_meta_store_bytes(struct sk_buff *skb, u32 offset, + const void *from, u32 len, + u64 flags) +{ + return -EOPNOTSUPP; +} + static inline void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset) { return ERR_PTR(-EOPNOTSUPP); diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 15e01935d3fa..f7f34eb15e06 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -355,16 +355,17 @@ static inline int __vlan_insert_inner_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci, unsigned int mac_len) { + const u8 meta_len = mac_len > ETH_TLEN ? skb_metadata_len(skb) : 0; struct vlan_ethhdr *veth; - if (skb_cow_head(skb, VLAN_HLEN) < 0) + if (skb_cow_head(skb, meta_len + VLAN_HLEN) < 0) return -ENOMEM; skb_push(skb, VLAN_HLEN); /* Move the mac header sans proto to the beginning of the new header. */ if (likely(mac_len > ETH_TLEN)) - memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN); + skb_postpush_data_move(skb, VLAN_HLEN, mac_len - ETH_TLEN); if (skb_mac_header_was_set(skb)) skb->mac_header -= VLAN_HLEN; @@ -731,18 +732,16 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, * * Expects the skb to contain a VLAN tag in the payload, and to have skb->data * pointing at the MAC header. - * - * Returns: a new pointer to skb->data, or NULL on failure to pull. */ -static inline void *vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci) +static inline void vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci) { struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); *vlan_tci = ntohs(vhdr->h_vlan_TCI); - memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); vlan_set_encap_proto(skb, vhdr); - return __skb_pull(skb, VLAN_HLEN); + __skb_pull(skb, VLAN_HLEN); + skb_postpull_data_move(skb, VLAN_HLEN, 2 * ETH_ALEN); } /** diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a7cc3d1f4fd1..ff90281ddf90 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4564,6 +4564,81 @@ static inline void skb_metadata_clear(struct sk_buff *skb) skb_metadata_set(skb, 0); } +/** + * skb_data_move - Move packet data and metadata after skb_push() or skb_pull(). + * @skb: packet to operate on + * @len: number of bytes pushed or pulled from &sk_buff->data + * @n: number of bytes to memmove() from pre-push/pull &sk_buff->data + * + * Moves @n bytes of packet data, can be zero, and all bytes of skb metadata. + * + * Assumes metadata is located immediately before &sk_buff->data prior to the + * push/pull, and that sufficient headroom exists to hold it after an + * skb_push(). Otherwise, metadata is cleared and a one-time warning is issued. + * + * Prefer skb_postpull_data_move() or skb_postpush_data_move() to calling this + * helper directly. + */ +static inline void skb_data_move(struct sk_buff *skb, const int len, + const unsigned int n) +{ + const u8 meta_len = skb_metadata_len(skb); + u8 *meta, *meta_end; + + if (!len || (!n && !meta_len)) + return; + + if (!meta_len) + goto no_metadata; + + meta_end = skb_metadata_end(skb); + meta = meta_end - meta_len; + + if (WARN_ON_ONCE(meta_end + len != skb->data || + meta_len > skb_headroom(skb))) { + skb_metadata_clear(skb); + goto no_metadata; + } + + memmove(meta + len, meta, meta_len + n); + return; + +no_metadata: + memmove(skb->data, skb->data - len, n); +} + +/** + * skb_postpull_data_move - Move packet data and metadata after skb_pull(). + * @skb: packet to operate on + * @len: number of bytes pulled from &sk_buff->data + * @n: number of bytes to memmove() from pre-pull &sk_buff->data + * + * See skb_data_move() for details. + */ +static inline void skb_postpull_data_move(struct sk_buff *skb, + const unsigned int len, + const unsigned int n) +{ + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + skb_data_move(skb, len, n); +} + +/** + * skb_postpush_data_move - Move packet data and metadata after skb_push(). + * @skb: packet to operate on + * @len: number of bytes pushed onto &sk_buff->data + * @n: number of bytes to memmove() from pre-push &sk_buff->data + * + * See skb_data_move() for details. + */ +static inline void skb_postpush_data_move(struct sk_buff *skb, + const unsigned int len, + const unsigned int n) +{ + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + skb_data_move(skb, -len, n); +} + struct sk_buff *skb_clone_sk(struct sk_buff *skb); #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index 6ceb12baec24..ed24c9f638ee 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -17,6 +17,9 @@ struct netns_smc { #ifdef CONFIG_SYSCTL struct ctl_table_header *smc_hdr; #endif +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) + struct smc_hs_ctrl __rcu *hs_ctrl; +#endif /* CONFIG_SMC_HS_CTRL_BPF */ unsigned int sysctl_autocorking_size; unsigned int sysctl_smcr_buf_type; int sysctl_smcr_testlink_time; diff --git a/include/net/smc.h b/include/net/smc.h index 08bee529ed8d..bfdc4c41f019 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -17,6 +17,8 @@ #include <linux/wait.h> #include <linux/dibs.h> +struct tcp_sock; +struct inet_request_sock; struct sock; #define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */ @@ -50,4 +52,55 @@ struct smcd_dev { u8 going_away : 1; }; +#define SMC_HS_CTRL_NAME_MAX 16 + +enum { + /* ops can be inherit from init_net */ + SMC_HS_CTRL_FLAG_INHERITABLE = 0x1, + + SMC_HS_CTRL_ALL_FLAGS = SMC_HS_CTRL_FLAG_INHERITABLE, +}; + +struct smc_hs_ctrl { + /* private */ + + struct list_head list; + struct module *owner; + + /* public */ + + /* unique name */ + char name[SMC_HS_CTRL_NAME_MAX]; + int flags; + + /* Invoked before computing SMC option for SYN packets. + * We can control whether to set SMC options by returning various value. + * Return 0 to disable SMC, or return any other value to enable it. + */ + int (*syn_option)(struct tcp_sock *tp); + + /* Invoked before Set up SMC options for SYN-ACK packets + * We can control whether to respond SMC options by returning various + * value. Return 0 to disable SMC, or return any other value to enable + * it. + */ + int (*synack_option)(const struct tcp_sock *tp, + struct inet_request_sock *ireq); +}; + +#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) +#define smc_call_hsbpf(init_val, tp, func, ...) ({ \ + typeof(init_val) __ret = (init_val); \ + struct smc_hs_ctrl *ctrl; \ + rcu_read_lock(); \ + ctrl = rcu_dereference(sock_net((struct sock *)(tp))->smc.hs_ctrl); \ + if (ctrl && ctrl->func) \ + __ret = ctrl->func(tp, ##__VA_ARGS__); \ + rcu_read_unlock(); \ + __ret; \ +}) +#else +#define smc_call_hsbpf(init_val, tp, ...) ({ (void)(tp); (init_val); }) +#endif /* CONFIG_SMC_HS_CTRL_BPF */ + #endif /* _SMC_H */ |
