summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-11-11 03:43:51 +0300
committerJakub Kicinski <kuba@kernel.org>2025-11-11 03:43:51 +0300
commit7fc2bf8d30beebceac5841ad2227e0bd41abdf27 (patch)
tree2c15b4330839599ba6867a144102f9c0164f6cf4 /include
parent38f073a71e85c726b09f935e7886de72bc57b15b (diff)
parent67f4cfb530150387dedc13bac7e2ab7f1a525d7f (diff)
downloadlinux-7fc2bf8d30beebceac5841ad2227e0bd41abdf27.tar.xz
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Martin KaFai Lau says: ==================== pull-request: bpf-next 2025-11-10 We've added 19 non-merge commits during the last 3 day(s) which contain a total of 22 files changed, 1345 insertions(+), 197 deletions(-). The main changes are: 1) Preserve skb metadata after a TC BPF program has changed the skb, from Jakub Sitnicki. This allows a TC program at the end of a TC filter chain to still see the skb metadata, even if another TC program at the front of the chain has changed the skb using BPF helpers. 2) Initial af_smc bpf_struct_ops support to control the smc specific syn/synack options, from D. Wythe. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: bpf/selftests: Add selftest for bpf_smc_hs_ctrl net/smc: bpf: Introduce generic hook for handshake flow bpf: Export necessary symbols for modules with struct_ops selftests/bpf: Cover skb metadata access after bpf_skb_change_proto selftests/bpf: Cover skb metadata access after change_head/tail helper selftests/bpf: Cover skb metadata access after bpf_skb_adjust_room selftests/bpf: Cover skb metadata access after vlan push/pop helper selftests/bpf: Expect unclone to preserve skb metadata selftests/bpf: Dump skb metadata on verification failure selftests/bpf: Verify skb metadata in BPF instead of userspace bpf: Make bpf_skb_change_head helper metadata-safe bpf: Make bpf_skb_change_proto helper metadata-safe bpf: Make bpf_skb_adjust_room metadata-safe bpf: Make bpf_skb_vlan_push helper metadata-safe bpf: Make bpf_skb_vlan_pop helper metadata-safe vlan: Make vlan_remove_tag return nothing bpf: Unclone skb head on bpf_dynptr_write to skb metadata net: Preserve metadata on pskb_expand_head net: Helper to move packet data and metadata after skb_push/pull ==================== Link: https://patch.msgid.link/20251110232427.3929291-1-martin.lau@linux.dev Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/filter.h9
-rw-r--r--include/linux/if_vlan.h13
-rw-r--r--include/linux/skbuff.h75
-rw-r--r--include/net/netns/smc.h3
-rw-r--r--include/net/smc.h53
5 files changed, 146 insertions, 7 deletions
diff --git a/include/linux/filter.h b/include/linux/filter.h
index e116de7edc58..a104b3994230 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1781,6 +1781,8 @@ int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len);
void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
void *buf, unsigned long len, bool flush);
+int __bpf_skb_meta_store_bytes(struct sk_buff *skb, u32 offset,
+ const void *from, u32 len, u64 flags);
void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset);
#else /* CONFIG_NET */
static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset,
@@ -1817,6 +1819,13 @@ static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, voi
{
}
+static inline int __bpf_skb_meta_store_bytes(struct sk_buff *skb, u32 offset,
+ const void *from, u32 len,
+ u64 flags)
+{
+ return -EOPNOTSUPP;
+}
+
static inline void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset)
{
return ERR_PTR(-EOPNOTSUPP);
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 15e01935d3fa..f7f34eb15e06 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -355,16 +355,17 @@ static inline int __vlan_insert_inner_tag(struct sk_buff *skb,
__be16 vlan_proto, u16 vlan_tci,
unsigned int mac_len)
{
+ const u8 meta_len = mac_len > ETH_TLEN ? skb_metadata_len(skb) : 0;
struct vlan_ethhdr *veth;
- if (skb_cow_head(skb, VLAN_HLEN) < 0)
+ if (skb_cow_head(skb, meta_len + VLAN_HLEN) < 0)
return -ENOMEM;
skb_push(skb, VLAN_HLEN);
/* Move the mac header sans proto to the beginning of the new header. */
if (likely(mac_len > ETH_TLEN))
- memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN);
+ skb_postpush_data_move(skb, VLAN_HLEN, mac_len - ETH_TLEN);
if (skb_mac_header_was_set(skb))
skb->mac_header -= VLAN_HLEN;
@@ -731,18 +732,16 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb,
*
* Expects the skb to contain a VLAN tag in the payload, and to have skb->data
* pointing at the MAC header.
- *
- * Returns: a new pointer to skb->data, or NULL on failure to pull.
*/
-static inline void *vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci)
+static inline void vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci)
{
struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
*vlan_tci = ntohs(vhdr->h_vlan_TCI);
- memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
vlan_set_encap_proto(skb, vhdr);
- return __skb_pull(skb, VLAN_HLEN);
+ __skb_pull(skb, VLAN_HLEN);
+ skb_postpull_data_move(skb, VLAN_HLEN, 2 * ETH_ALEN);
}
/**
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a7cc3d1f4fd1..ff90281ddf90 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4564,6 +4564,81 @@ static inline void skb_metadata_clear(struct sk_buff *skb)
skb_metadata_set(skb, 0);
}
+/**
+ * skb_data_move - Move packet data and metadata after skb_push() or skb_pull().
+ * @skb: packet to operate on
+ * @len: number of bytes pushed or pulled from &sk_buff->data
+ * @n: number of bytes to memmove() from pre-push/pull &sk_buff->data
+ *
+ * Moves @n bytes of packet data, can be zero, and all bytes of skb metadata.
+ *
+ * Assumes metadata is located immediately before &sk_buff->data prior to the
+ * push/pull, and that sufficient headroom exists to hold it after an
+ * skb_push(). Otherwise, metadata is cleared and a one-time warning is issued.
+ *
+ * Prefer skb_postpull_data_move() or skb_postpush_data_move() to calling this
+ * helper directly.
+ */
+static inline void skb_data_move(struct sk_buff *skb, const int len,
+ const unsigned int n)
+{
+ const u8 meta_len = skb_metadata_len(skb);
+ u8 *meta, *meta_end;
+
+ if (!len || (!n && !meta_len))
+ return;
+
+ if (!meta_len)
+ goto no_metadata;
+
+ meta_end = skb_metadata_end(skb);
+ meta = meta_end - meta_len;
+
+ if (WARN_ON_ONCE(meta_end + len != skb->data ||
+ meta_len > skb_headroom(skb))) {
+ skb_metadata_clear(skb);
+ goto no_metadata;
+ }
+
+ memmove(meta + len, meta, meta_len + n);
+ return;
+
+no_metadata:
+ memmove(skb->data, skb->data - len, n);
+}
+
+/**
+ * skb_postpull_data_move - Move packet data and metadata after skb_pull().
+ * @skb: packet to operate on
+ * @len: number of bytes pulled from &sk_buff->data
+ * @n: number of bytes to memmove() from pre-pull &sk_buff->data
+ *
+ * See skb_data_move() for details.
+ */
+static inline void skb_postpull_data_move(struct sk_buff *skb,
+ const unsigned int len,
+ const unsigned int n)
+{
+ DEBUG_NET_WARN_ON_ONCE(len > INT_MAX);
+ skb_data_move(skb, len, n);
+}
+
+/**
+ * skb_postpush_data_move - Move packet data and metadata after skb_push().
+ * @skb: packet to operate on
+ * @len: number of bytes pushed onto &sk_buff->data
+ * @n: number of bytes to memmove() from pre-push &sk_buff->data
+ *
+ * See skb_data_move() for details.
+ */
+static inline void skb_postpush_data_move(struct sk_buff *skb,
+ const unsigned int len,
+ const unsigned int n)
+{
+ DEBUG_NET_WARN_ON_ONCE(len > INT_MAX);
+ skb_data_move(skb, -len, n);
+}
+
struct sk_buff *skb_clone_sk(struct sk_buff *skb);
#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h
index 6ceb12baec24..ed24c9f638ee 100644
--- a/include/net/netns/smc.h
+++ b/include/net/netns/smc.h
@@ -17,6 +17,9 @@ struct netns_smc {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *smc_hdr;
#endif
+#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
+ struct smc_hs_ctrl __rcu *hs_ctrl;
+#endif /* CONFIG_SMC_HS_CTRL_BPF */
unsigned int sysctl_autocorking_size;
unsigned int sysctl_smcr_buf_type;
int sysctl_smcr_testlink_time;
diff --git a/include/net/smc.h b/include/net/smc.h
index 08bee529ed8d..bfdc4c41f019 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -17,6 +17,8 @@
#include <linux/wait.h>
#include <linux/dibs.h>
+struct tcp_sock;
+struct inet_request_sock;
struct sock;
#define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */
@@ -50,4 +52,55 @@ struct smcd_dev {
u8 going_away : 1;
};
+#define SMC_HS_CTRL_NAME_MAX 16
+
+enum {
+ /* ops can be inherit from init_net */
+ SMC_HS_CTRL_FLAG_INHERITABLE = 0x1,
+
+ SMC_HS_CTRL_ALL_FLAGS = SMC_HS_CTRL_FLAG_INHERITABLE,
+};
+
+struct smc_hs_ctrl {
+ /* private */
+
+ struct list_head list;
+ struct module *owner;
+
+ /* public */
+
+ /* unique name */
+ char name[SMC_HS_CTRL_NAME_MAX];
+ int flags;
+
+ /* Invoked before computing SMC option for SYN packets.
+ * We can control whether to set SMC options by returning various value.
+ * Return 0 to disable SMC, or return any other value to enable it.
+ */
+ int (*syn_option)(struct tcp_sock *tp);
+
+ /* Invoked before Set up SMC options for SYN-ACK packets
+ * We can control whether to respond SMC options by returning various
+ * value. Return 0 to disable SMC, or return any other value to enable
+ * it.
+ */
+ int (*synack_option)(const struct tcp_sock *tp,
+ struct inet_request_sock *ireq);
+};
+
+#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
+#define smc_call_hsbpf(init_val, tp, func, ...) ({ \
+ typeof(init_val) __ret = (init_val); \
+ struct smc_hs_ctrl *ctrl; \
+ rcu_read_lock(); \
+ ctrl = rcu_dereference(sock_net((struct sock *)(tp))->smc.hs_ctrl); \
+ if (ctrl && ctrl->func) \
+ __ret = ctrl->func(tp, ##__VA_ARGS__); \
+ rcu_read_unlock(); \
+ __ret; \
+})
+#else
+#define smc_call_hsbpf(init_val, tp, ...) ({ (void)(tp); (init_val); })
+#endif /* CONFIG_SMC_HS_CTRL_BPF */
+
#endif /* _SMC_H */