From fd93eaffb3f977b23bc0a48d4c8616e654fcf133 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 20 Feb 2025 15:29:31 +0800 Subject: bpf: Prevent unsafe access to the sock fields in the BPF timestamping callback The subsequent patch will implement BPF TX timestamping. It will call the sockops BPF program without holding the sock lock. This breaks the current assumption that all sock ops programs will hold the sock lock. The sock's fields of the uapi's bpf_sock_ops requires this assumption. To address this, a new "u8 is_locked_tcp_sock;" field is added. This patch sets it in the current sock_ops callbacks. The "is_fullsock" test is then replaced by the "is_locked_tcp_sock" test during sock_ops_convert_ctx_access(). The new TX timestamping callbacks added in the subsequent patch will not have this set. This will prevent unsafe access from the new timestamping callbacks. Potentially, we could allow read-only access. However, this would require identifying which callback is read-safe-only and also requires additional BPF instruction rewrites in the covert_ctx. Since the BPF program can always read everything from a socket (e.g., by using bpf_core_cast), this patch keeps it simple and disables all read and write access to any socket fields through the bpf_sock_ops UAPI from the new TX timestamping callback. Moreover, note that some of the fields in bpf_sock_ops are specific to tcp_sock, and sock_ops currently only supports tcp_sock. In the future, UDP timestamping will be added, which will also break this assumption. The same idea used in this patch will be reused. Considering that the current sock_ops only supports tcp_sock, the variable is named is_locked_"tcp"_sock. Signed-off-by: Jason Xing Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20250220072940.99994-4-kerneljasonxing@gmail.com --- include/linux/filter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index a3ea46281595..d36d5d5180b1 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1508,6 +1508,7 @@ struct bpf_sock_ops_kern { void *skb_data_end; u8 op; u8 is_fullsock; + u8 is_locked_tcp_sock; u8 remaining_opt_len; u64 temp; /* temp and everything after is not * initialized to 0 before calling -- cgit v1.2.3 From 6b98ec7e882af1c3088a88757e2226d06c8514f9 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 20 Feb 2025 15:29:34 +0800 Subject: bpf: Add BPF_SOCK_OPS_TSTAMP_SCHED_CB callback Support SCM_TSTAMP_SCHED case for bpf timestamping. Add a new sock_ops callback, BPF_SOCK_OPS_TSTAMP_SCHED_CB. This callback will occur at the same timestamping point as the user space's SCM_TSTAMP_SCHED. The BPF program can use it to get the same SCM_TSTAMP_SCHED timestamp without modifying the user-space application. A new SKBTX_BPF flag is added to mark skb_shinfo(skb)->tx_flags, ensuring that the new BPF timestamping and the current user space's SO_TIMESTAMPING do not interfere with each other. Signed-off-by: Jason Xing Signed-off-by: Martin KaFai Lau Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250220072940.99994-7-kerneljasonxing@gmail.com --- include/linux/skbuff.h | 6 +++++- include/uapi/linux/bpf.h | 5 +++++ net/core/dev.c | 3 ++- net/core/skbuff.c | 20 ++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 5 +++++ 5 files changed, 37 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bb2b751d274a..52f6e033e704 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -489,10 +489,14 @@ enum { /* generate software time stamp when entering packet scheduling */ SKBTX_SCHED_TSTAMP = 1 << 6, + + /* used for bpf extension when a bpf program is loaded */ + SKBTX_BPF = 1 << 7, }; #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ - SKBTX_SCHED_TSTAMP) + SKBTX_SCHED_TSTAMP | \ + SKBTX_BPF) #define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | \ SKBTX_HW_TSTAMP_USE_CYCLES | \ SKBTX_ANY_SW_TSTAMP) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4e0632848440..bb62fe25b2d0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7031,6 +7031,11 @@ enum { * by the kernel or the * earlier bpf-progs. */ + BPF_SOCK_OPS_TSTAMP_SCHED_CB, /* Called when skb is passing + * through dev layer when + * SK_BPF_CB_TX_TIMESTAMPING + * feature is on. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect diff --git a/net/core/dev.c b/net/core/dev.c index d5ab9a4b318e..436f2bdfb2d5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4501,7 +4501,8 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) skb_reset_mac_header(skb); skb_assert_len(skb); - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP)) + if (unlikely(skb_shinfo(skb)->tx_flags & + (SKBTX_SCHED_TSTAMP | SKBTX_BPF))) __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED); /* Disable soft irqs for various locks below. Also diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 341a3290e898..3206f7708974 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5556,6 +5556,23 @@ static bool skb_tstamp_tx_report_so_timestamping(struct sk_buff *skb, return false; } +static void skb_tstamp_tx_report_bpf_timestamping(struct sk_buff *skb, + struct sock *sk, + int tstype) +{ + int op; + + switch (tstype) { + case SCM_TSTAMP_SCHED: + op = BPF_SOCK_OPS_TSTAMP_SCHED_CB; + break; + default: + return; + } + + bpf_skops_tx_timestamping(sk, skb, op); +} + void __skb_tstamp_tx(struct sk_buff *orig_skb, const struct sk_buff *ack_skb, struct skb_shared_hwtstamps *hwtstamps, @@ -5568,6 +5585,9 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, if (!sk) return; + if (skb_shinfo(orig_skb)->tx_flags & SKBTX_BPF) + skb_tstamp_tx_report_bpf_timestamping(orig_skb, sk, tstype); + if (!skb_tstamp_tx_report_so_timestamping(orig_skb, hwtstamps, tstype)) return; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4e0632848440..bb62fe25b2d0 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -7031,6 +7031,11 @@ enum { * by the kernel or the * earlier bpf-progs. */ + BPF_SOCK_OPS_TSTAMP_SCHED_CB, /* Called when skb is passing + * through dev layer when + * SK_BPF_CB_TX_TIMESTAMPING + * feature is on. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect -- cgit v1.2.3 From ecebb17ad818bc043e558c278a6c56d5bbaebacc Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 20 Feb 2025 15:29:35 +0800 Subject: bpf: Add BPF_SOCK_OPS_TSTAMP_SND_SW_CB callback Support sw SCM_TSTAMP_SND case for bpf timestamping. Add a new sock_ops callback, BPF_SOCK_OPS_TSTAMP_SND_SW_CB. This callback will occur at the same timestamping point as the user space's software SCM_TSTAMP_SND. The BPF program can use it to get the same SCM_TSTAMP_SND timestamp without modifying the user-space application. Based on this patch, BPF program will get the software timestamp when the driver is ready to send the skb. In the sebsequent patch, the hardware timestamp will be supported. Signed-off-by: Jason Xing Signed-off-by: Martin KaFai Lau Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250220072940.99994-8-kerneljasonxing@gmail.com --- include/linux/skbuff.h | 2 +- include/uapi/linux/bpf.h | 4 ++++ net/core/skbuff.c | 9 ++++++++- tools/include/uapi/linux/bpf.h | 4 ++++ 4 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 52f6e033e704..76582500c5ea 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4568,7 +4568,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, static inline void skb_tx_timestamp(struct sk_buff *skb) { skb_clone_tx_timestamp(skb); - if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP) + if (skb_shinfo(skb)->tx_flags & (SKBTX_SW_TSTAMP | SKBTX_BPF)) skb_tstamp_tx(skb, NULL); } diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bb62fe25b2d0..abe8d6c30dae 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7036,6 +7036,10 @@ enum { * SK_BPF_CB_TX_TIMESTAMPING * feature is on. */ + BPF_SOCK_OPS_TSTAMP_SND_SW_CB, /* Called when skb is about to send + * to the nic when SK_BPF_CB_TX_TIMESTAMPING + * feature is on. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3206f7708974..308db7dae1ab 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5557,6 +5557,7 @@ static bool skb_tstamp_tx_report_so_timestamping(struct sk_buff *skb, } static void skb_tstamp_tx_report_bpf_timestamping(struct sk_buff *skb, + struct skb_shared_hwtstamps *hwtstamps, struct sock *sk, int tstype) { @@ -5566,6 +5567,11 @@ static void skb_tstamp_tx_report_bpf_timestamping(struct sk_buff *skb, case SCM_TSTAMP_SCHED: op = BPF_SOCK_OPS_TSTAMP_SCHED_CB; break; + case SCM_TSTAMP_SND: + if (hwtstamps) + return; + op = BPF_SOCK_OPS_TSTAMP_SND_SW_CB; + break; default: return; } @@ -5586,7 +5592,8 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, return; if (skb_shinfo(orig_skb)->tx_flags & SKBTX_BPF) - skb_tstamp_tx_report_bpf_timestamping(orig_skb, sk, tstype); + skb_tstamp_tx_report_bpf_timestamping(orig_skb, hwtstamps, + sk, tstype); if (!skb_tstamp_tx_report_so_timestamping(orig_skb, hwtstamps, tstype)) return; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index bb62fe25b2d0..abe8d6c30dae 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -7036,6 +7036,10 @@ enum { * SK_BPF_CB_TX_TIMESTAMPING * feature is on. */ + BPF_SOCK_OPS_TSTAMP_SND_SW_CB, /* Called when skb is about to send + * to the nic when SK_BPF_CB_TX_TIMESTAMPING + * feature is on. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect -- cgit v1.2.3 From 2deaf7f42b8c551e84da20483ca2d4a65c3623b3 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 20 Feb 2025 15:29:36 +0800 Subject: bpf: Add BPF_SOCK_OPS_TSTAMP_SND_HW_CB callback Support hw SCM_TSTAMP_SND case for bpf timestamping. Add a new sock_ops callback, BPF_SOCK_OPS_TSTAMP_SND_HW_CB. This callback will occur at the same timestamping point as the user space's hardware SCM_TSTAMP_SND. The BPF program can use it to get the same SCM_TSTAMP_SND timestamp without modifying the user-space application. To avoid increasing the code complexity, replace SKBTX_HW_TSTAMP with SKBTX_HW_TSTAMP_NOBPF instead of changing numerous callers from driver side using SKBTX_HW_TSTAMP. The new definition of SKBTX_HW_TSTAMP means the combination tests of socket timestamping and bpf timestamping. After this patch, drivers can work under the bpf timestamping. Considering some drivers don't assign the skb with hardware timestamp, this patch does the assignment and then BPF program can acquire the hwstamp from skb directly. Signed-off-by: Jason Xing Signed-off-by: Martin KaFai Lau Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250220072940.99994-9-kerneljasonxing@gmail.com --- include/linux/skbuff.h | 4 +++- include/uapi/linux/bpf.h | 4 ++++ net/core/skbuff.c | 11 +++++++---- net/dsa/user.c | 2 +- net/socket.c | 2 +- tools/include/uapi/linux/bpf.h | 4 ++++ 6 files changed, 20 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 76582500c5ea..0b4f1889500d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -470,7 +470,7 @@ struct skb_shared_hwtstamps { /* Definitions for tx_flags in struct skb_shared_info */ enum { /* generate hardware time stamp */ - SKBTX_HW_TSTAMP = 1 << 0, + SKBTX_HW_TSTAMP_NOBPF = 1 << 0, /* generate software time stamp when queueing packet to NIC */ SKBTX_SW_TSTAMP = 1 << 1, @@ -494,6 +494,8 @@ enum { SKBTX_BPF = 1 << 7, }; +#define SKBTX_HW_TSTAMP (SKBTX_HW_TSTAMP_NOBPF | SKBTX_BPF) + #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ SKBTX_SCHED_TSTAMP | \ SKBTX_BPF) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index abe8d6c30dae..72045c242396 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7040,6 +7040,10 @@ enum { * to the nic when SK_BPF_CB_TX_TIMESTAMPING * feature is on. */ + BPF_SOCK_OPS_TSTAMP_SND_HW_CB, /* Called in hardware phase when + * SK_BPF_CB_TX_TIMESTAMPING feature + * is on. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 308db7dae1ab..77b8866f94c5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5547,7 +5547,7 @@ static bool skb_tstamp_tx_report_so_timestamping(struct sk_buff *skb, case SCM_TSTAMP_SCHED: return skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP; case SCM_TSTAMP_SND: - return skb_shinfo(skb)->tx_flags & (hwtstamps ? SKBTX_HW_TSTAMP : + return skb_shinfo(skb)->tx_flags & (hwtstamps ? SKBTX_HW_TSTAMP_NOBPF : SKBTX_SW_TSTAMP); case SCM_TSTAMP_ACK: return TCP_SKB_CB(skb)->txstamp_ack; @@ -5568,9 +5568,12 @@ static void skb_tstamp_tx_report_bpf_timestamping(struct sk_buff *skb, op = BPF_SOCK_OPS_TSTAMP_SCHED_CB; break; case SCM_TSTAMP_SND: - if (hwtstamps) - return; - op = BPF_SOCK_OPS_TSTAMP_SND_SW_CB; + if (hwtstamps) { + op = BPF_SOCK_OPS_TSTAMP_SND_HW_CB; + *skb_hwtstamps(skb) = *hwtstamps; + } else { + op = BPF_SOCK_OPS_TSTAMP_SND_SW_CB; + } break; default: return; diff --git a/net/dsa/user.c b/net/dsa/user.c index 2296a4ead020..804dc7dac4f2 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -897,7 +897,7 @@ static void dsa_skb_tx_timestamp(struct dsa_user_priv *p, { struct dsa_switch *ds = p->dp->ds; - if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NOBPF)) return; if (!ds->ops->port_txtstamp) diff --git a/net/socket.c b/net/socket.c index 28bae5a94234..0545e9ea7058 100644 --- a/net/socket.c +++ b/net/socket.c @@ -681,7 +681,7 @@ void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags) u8 flags = *tx_flags; if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) { - flags |= SKBTX_HW_TSTAMP; + flags |= SKBTX_HW_TSTAMP_NOBPF; /* PTP hardware clocks can provide a free running cycle counter * as a time base for virtual clocks. Tell driver to use the diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index abe8d6c30dae..72045c242396 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -7040,6 +7040,10 @@ enum { * to the nic when SK_BPF_CB_TX_TIMESTAMPING * feature is on. */ + BPF_SOCK_OPS_TSTAMP_SND_HW_CB, /* Called in hardware phase when + * SK_BPF_CB_TX_TIMESTAMPING feature + * is on. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect -- cgit v1.2.3