bpf: Reject TCP_NODELAY in bpf-tcp-cc

A BPF TCP congestion control program can call bpf_setsockopt() from its callbacks. In current kernels, if it calls bpf_setsockopt(TCP_NODELAY) from cwnd_event_tx_start(), the call can re-enter the TCP transmit path before the outer tcp_transmit_skb() has completed and advanced the send head. This can re-trigger CA_EVENT_TX_START and lead to unbounded recursion: tcp_transmit_skb() -> tcp_event_data_sent() -> tcp_ca_event(sk, CA_EVENT_TX_START) -> cwnd_event_tx_start() -> bpf_setsockopt(TCP_NODELAY) -> tcp_push_pending_frames() -> tcp_write_xmit() -> tcp_transmit_skb() This leads to unbounded recursion and can overflow the kernel stack. Reject TCP_NODELAY with -EOPNOTSUPP for bpf-tcp-cc by introducing a dedicated setsockopt proto for BPF_PROG_TYPE_STRUCT_OPS TCP congestion control programs. To keep it simple, all tcp-cc ops is rejected for TCP_NODELAY. Fixes: 7e41df5dbba2 ("bpf: Add a few optnames to bpf_setsockopt") Suggested-by: Martin KaFai Lau <martin.lau@linux.dev> Signed-off-by: KaFai Wan <kafai.wan@linux.dev> Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org> Reviewed-by: Jiayuan Chen <jiayuan.chen@linux.dev> Link: https://patch.msgid.link/20260421155804.135786-3-kafai.wan@linux.dev
author: KaFai Wan <kafai.wan@linux.dev> 2026-04-21 18:58:02 +0300
committer: Martin KaFai Lau <martin.lau@kernel.org> 2026-04-22 22:58:57 +0300
commit: 54377fcab51f6f1f8807827d3751be42279e1a6a (patch)
tree: 9701d312b26056ae6154bb69a97e7399777a9dc7
parent: 846c76ecc02973b05ae909dd4248c11bfa277fc1 (diff)
download: linux-54377fcab51f6f1f8807827d3751be42279e1a6a.tar.xz
3 files changed, 26 insertions, 1 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b4b703c90ca9..01e203964892 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -3725,6 +3725,7 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
 extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
 extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
 extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
+extern const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto;
 extern const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto;
 extern const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto;
 extern const struct bpf_func_proto bpf_find_vma_proto;
diff --git a/net/core/filter.c b/net/core/filter.c
index 96849f4c1fbc..2914f5330310 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5688,6 +5688,30 @@ const struct bpf_func_proto bpf_sk_getsockopt_proto = {
 	.arg5_type	= ARG_CONST_SIZE,
 };
 
+BPF_CALL_5(bpf_sk_setsockopt_nodelay, struct sock *, sk, int, level,
+	   int, optname, char *, optval, int, optlen)
+{
+	/*
+	 * TCP_NODELAY triggers tcp_push_pending_frames() and re-enters
+	 * CA_EVENT_TX_START in bpf_tcp_cc.
+	 */
+	if (level == SOL_TCP && optname == TCP_NODELAY)
+		return -EOPNOTSUPP;
+
+	return _bpf_setsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto = {
+	.func		= bpf_sk_setsockopt_nodelay,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
+	.arg5_type	= ARG_CONST_SIZE,
+};
+
 BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level,
 	   int, optname, char *, optval, int, optlen)
 {
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 008edc7f6688..791e15063237 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -168,7 +168,7 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
 		 */
 		if (prog_ops_moff(prog) !=
 		    offsetof(struct tcp_congestion_ops, release))
-			return &bpf_sk_setsockopt_proto;
+			return &bpf_sk_setsockopt_nodelay_proto;
 		return NULL;
 	case BPF_FUNC_getsockopt:
 		/* Since get/setsockopt is usually expected to
author	KaFai Wan <kafai.wan@linux.dev>	2026-04-21 18:58:02 +0300
committer	Martin KaFai Lau <martin.lau@kernel.org>	2026-04-22 22:58:57 +0300
commit	54377fcab51f6f1f8807827d3751be42279e1a6a (patch)
tree	9701d312b26056ae6154bb69a97e7399777a9dc7
parent	846c76ecc02973b05ae909dd4248c11bfa277fc1 (diff)
download	linux-54377fcab51f6f1f8807827d3751be42279e1a6a.tar.xz