summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin KaFai Lau <martin.lau@kernel.org>2026-04-22 22:00:23 +0300
committerMartin KaFai Lau <martin.lau@kernel.org>2026-04-22 22:59:49 +0300
commitcd0eb48b38e42ce77955137f633fb78621933870 (patch)
treea34a7ef5dbb92baf3780719fb96fc4dca9db02f1
parenteb5249b12507246dc959945454cd1be8d7dc3795 (diff)
parent2c7e33f1fc2e75fcfb4aa5d840bcd2e8b53c1847 (diff)
downloadlinux-cd0eb48b38e42ce77955137f633fb78621933870.tar.xz
Merge branch 'bpf-reject-tcp_nodelay-in-tcp-header-option'
KaFai Wan says: ==================== bpf: Reject TCP_NODELAY in TCP header option This small patchset is about avoid infinite recursion in TCP header option callbacks and bpf-tcp-cc callbacks via TCP_NODELAY setsockopt. v4: - Fix the test case for TCP header option callbacks (Martin and Jiayuan) - Reject TCP_NODELAY in bpf-tcp-cc callbacks (AI and Martin) - Add a test case for bpf-tcp-cc v3: - Remove CONFIG_INET check and add comment (Martin and Jiayuan) - Fix the test case (Martin) https://lore.kernel.org/bpf/20260417092035.2299913-1-kafai.wan@linux.dev/ v2: - Reject TCP_NODELAY in bpf_sock_ops_setsockopt() (AI and Martin) https://lore.kernel.org/bpf/20260416112308.1820332-1-kafai.wan@linux.dev/ v1: https://lore.kernel.org/bpf/20260414112310.1285783-1-kafai.wan@linux.dev/ ==================== Link: https://patch.msgid.link/20260421155804.135786-1-kafai.wan@linux.dev Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
-rw-r--r--include/linux/bpf.h1
-rw-r--r--net/core/filter.c30
-rw-r--r--net/ipv4/bpf_tcp_ca.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cubic.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c15
7 files changed, 68 insertions, 2 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b4b703c90ca9..01e203964892 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -3725,6 +3725,7 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
+extern const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto;
extern const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto;
extern const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto;
extern const struct bpf_func_proto bpf_find_vma_proto;
diff --git a/net/core/filter.c b/net/core/filter.c
index 5fa9189eb772..2914f5330310 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5688,6 +5688,30 @@ const struct bpf_func_proto bpf_sk_getsockopt_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+BPF_CALL_5(bpf_sk_setsockopt_nodelay, struct sock *, sk, int, level,
+ int, optname, char *, optval, int, optlen)
+{
+ /*
+ * TCP_NODELAY triggers tcp_push_pending_frames() and re-enters
+ * CA_EVENT_TX_START in bpf_tcp_cc.
+ */
+ if (level == SOL_TCP && optname == TCP_NODELAY)
+ return -EOPNOTSUPP;
+
+ return _bpf_setsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto = {
+ .func = bpf_sk_setsockopt_nodelay,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level,
int, optname, char *, optval, int, optlen)
{
@@ -5833,6 +5857,12 @@ BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
if (!is_locked_tcp_sock_ops(bpf_sock))
return -EOPNOTSUPP;
+ /* TCP_NODELAY triggers tcp_push_pending_frames() and re-enters these callbacks. */
+ if ((bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB ||
+ bpf_sock->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB) &&
+ level == SOL_TCP && optname == TCP_NODELAY)
+ return -EOPNOTSUPP;
+
return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
}
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 008edc7f6688..791e15063237 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -168,7 +168,7 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
*/
if (prog_ops_moff(prog) !=
offsetof(struct tcp_congestion_ops, release))
- return &bpf_sk_setsockopt_proto;
+ return &bpf_sk_setsockopt_nodelay_proto;
return NULL;
case BPF_FUNC_getsockopt:
/* Since get/setsockopt is usually expected to
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index f829b6f09bc9..fe30181e6336 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -112,6 +112,10 @@ static void test_cubic(void)
ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called");
+ ASSERT_TRUE(cubic_skel->bss->nodelay_init_reject, "init reject nodelay option");
+ ASSERT_TRUE(cubic_skel->bss->nodelay_cwnd_event_tx_start_reject,
+ "cwnd_event_tx_start reject nodelay option");
+
bpf_link__destroy(link);
bpf_cubic__destroy(cubic_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
index 56685fc03c7e..80e6315da2a5 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -507,6 +507,10 @@ static void misc(void)
ASSERT_EQ(misc_skel->bss->nr_hwtstamp, 0, "nr_hwtstamp");
+ ASSERT_TRUE(misc_skel->bss->nodelay_est_ok, "nodelay_est_ok");
+ ASSERT_TRUE(misc_skel->bss->nodelay_hdr_len_reject, "nodelay_hdr_len_reject");
+ ASSERT_TRUE(misc_skel->bss->nodelay_write_hdr_reject, "nodelay_write_hdr_reject");
+
check_linum:
ASSERT_FALSE(check_error_linum(&sk_fds), "check_error_linum");
sk_fds_close(&sk_fds);
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
index ce18a4db813f..ebd5a1e69f56 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -16,6 +16,7 @@
#include "bpf_tracing_net.h"
#include <bpf/bpf_tracing.h>
+#include <errno.h>
char _license[] SEC("license") = "GPL";
@@ -170,10 +171,18 @@ static void bictcp_hystart_reset(struct sock *sk)
ca->sample_cnt = 0;
}
+bool nodelay_init_reject = false;
+bool nodelay_cwnd_event_tx_start_reject = false;
+
SEC("struct_ops")
void BPF_PROG(bpf_cubic_init, struct sock *sk)
{
struct bpf_bictcp *ca = inet_csk_ca(sk);
+ int true_val = 1, ret;
+
+ ret = bpf_setsockopt(sk, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val));
+ if (ret == -EOPNOTSUPP)
+ nodelay_init_reject = true;
bictcp_reset(ca);
@@ -189,8 +198,13 @@ void BPF_PROG(bpf_cubic_cwnd_event_tx_start, struct sock *sk)
{
struct bpf_bictcp *ca = inet_csk_ca(sk);
__u32 now = tcp_jiffies32;
+ int true_val = 1, ret;
__s32 delta;
+ ret = bpf_setsockopt(sk, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val));
+ if (ret == -EOPNOTSUPP)
+ nodelay_cwnd_event_tx_start_reject = true;
+
delta = now - tcp_sk(sk)->lsndtime;
/* We were application limited (idle) for a while.
diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
index d487153a839d..ed5a0011b863 100644
--- a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
@@ -29,6 +29,10 @@ unsigned int nr_syn = 0;
unsigned int nr_fin = 0;
unsigned int nr_hwtstamp = 0;
+bool nodelay_est_ok = false;
+bool nodelay_hdr_len_reject = false;
+bool nodelay_write_hdr_reject = false;
+
/* Check the header received from the active side */
static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn)
{
@@ -300,7 +304,7 @@ static int handle_passive_estab(struct bpf_sock_ops *skops)
SEC("sockops")
int misc_estab(struct bpf_sock_ops *skops)
{
- int true_val = 1;
+ int true_val = 1, false_val = 0, ret;
switch (skops->op) {
case BPF_SOCK_OPS_TCP_LISTEN_CB:
@@ -316,10 +320,19 @@ int misc_estab(struct bpf_sock_ops *skops)
case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
return handle_parse_hdr(skops);
case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
+ ret = bpf_setsockopt(skops, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val));
+ if (ret == -EOPNOTSUPP)
+ nodelay_hdr_len_reject = true;
return handle_hdr_opt_len(skops);
case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
+ ret = bpf_setsockopt(skops, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val));
+ if (ret == -EOPNOTSUPP)
+ nodelay_write_hdr_reject = true;
return handle_write_hdr_opt(skops);
case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ ret = bpf_setsockopt(skops, SOL_TCP, TCP_NODELAY, &false_val, sizeof(false_val));
+ if (!ret)
+ nodelay_est_ok = true;
return handle_passive_estab(skops);
}