From be88c549e9d78828a2e06126ed7e17fc2e030f1f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 6 Nov 2025 00:32:40 +0000 Subject: tcp: Call tcp_syn_ack_timeout() directly. Since DCCP has been removed, we do not need to use request_sock_ops.syn_ack_timeout(). Let's call tcp_syn_ack_timeout() directly. Now other function pointers of request_sock_ops are protocol-dependent. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251106003357.273403-2-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/request_sock.h | 1 - net/ipv4/inet_connection_sock.c | 4 +++- net/ipv4/tcp_ipv4.c | 1 - net/ipv4/tcp_timer.c | 3 +-- net/ipv6/tcp_ipv6.c | 1 - 5 files changed, 4 insertions(+), 6 deletions(-) diff --git a/include/net/request_sock.h b/include/net/request_sock.h index cd4d4cf71d0d..9b9e04f6bb89 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -36,7 +36,6 @@ struct request_sock_ops { struct sk_buff *skb, enum sk_rst_reason reason); void (*destructor)(struct request_sock *req); - void (*syn_ack_timeout)(const struct request_sock *req); }; struct saved_syn { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 3b83b66b2284..6a86c1ac3011 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1096,9 +1096,11 @@ static void reqsk_timer_handler(struct timer_list *t) young <<= 1; } } + syn_ack_recalc(req, max_syn_ack_retries, READ_ONCE(queue->rskq_defer_accept), &expire, &resend); - req->rsk_ops->syn_ack_timeout(req); + tcp_syn_ack_timeout(req); + if (!expire && (!resend || !tcp_rtx_synack(sk_listener, req) || diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b7526a7888cb..0cfebac33a91 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1660,7 +1660,6 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { .send_ack = tcp_v4_reqsk_send_ack, .destructor = tcp_v4_reqsk_destructor, .send_reset = tcp_v4_send_reset, - .syn_ack_timeout = tcp_syn_ack_timeout, }; const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 2dd73a4e8e51..0672c3d8f4f1 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -458,7 +458,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) struct tcp_sock *tp = tcp_sk(sk); int max_retries; - req->rsk_ops->syn_ack_timeout(req); + tcp_syn_ack_timeout(req); /* Add one more retry for fastopen. * Paired with WRITE_ONCE() in tcp_sock_set_syncnt() @@ -752,7 +752,6 @@ void tcp_syn_ack_timeout(const struct request_sock *req) __NET_INC_STATS(net, LINUX_MIB_TCPTIMEOUTS); } -EXPORT_IPV6_MOD(tcp_syn_ack_timeout); void tcp_reset_keepalive_timer(struct sock *sk, unsigned long len) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7df21c1cba21..08113f430124 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -796,7 +796,6 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .send_ack = tcp_v6_reqsk_send_ack, .destructor = tcp_v6_reqsk_destructor, .send_reset = tcp_v6_send_reset, - .syn_ack_timeout = tcp_syn_ack_timeout, }; const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { -- cgit v1.2.3 From 3ce5dd8161ecdf12ffe0af99ff8980f1432f64a5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 6 Nov 2025 00:32:41 +0000 Subject: tcp: Remove timeout arg from reqsk_queue_hash_req(). inet_csk_reqsk_queue_hash_add() is no longer shared by DCCP. We do not need to pass req->timeout down to reqsk_queue_hash_req(). Let's move tcp_timeout_init() from tcp_conn_request() to reqsk_queue_hash_req(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251106003357.273403-3-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 3 +-- net/ipv4/inet_connection_sock.c | 11 +++++------ net/ipv4/tcp_input.c | 14 +++++--------- 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index b4b886647607..90a99a2fc804 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -267,8 +267,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, struct sock *inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, struct sock *child); -bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, - unsigned long timeout); +bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req); struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, struct request_sock *req, bool own_req); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 6a86c1ac3011..d9c674403eb0 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1144,8 +1144,7 @@ drop: reqsk_put(oreq); } -static bool reqsk_queue_hash_req(struct request_sock *req, - unsigned long timeout) +static bool reqsk_queue_hash_req(struct request_sock *req) { bool found_dup_sk = false; @@ -1153,8 +1152,9 @@ static bool reqsk_queue_hash_req(struct request_sock *req, return false; /* The timer needs to be setup after a successful insertion. */ + req->timeout = tcp_timeout_init((struct sock *)req); timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED); - mod_timer(&req->rsk_timer, jiffies + timeout); + mod_timer(&req->rsk_timer, jiffies + req->timeout); /* before letting lookups find us, make sure all req fields * are committed to memory and refcnt initialized. @@ -1164,10 +1164,9 @@ static bool reqsk_queue_hash_req(struct request_sock *req, return true; } -bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, - unsigned long timeout) +bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req) { - if (!reqsk_queue_hash_req(req, timeout)) + if (!reqsk_queue_hash_req(req)) return false; inet_csk_reqsk_queue_added(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6db1d4c36a88..804ec56bdd24 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7531,15 +7531,11 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, sock_put(fastopen_sk); } else { tcp_rsk(req)->tfo_listener = false; - if (!want_cookie) { - req->timeout = tcp_timeout_init((struct sock *)req); - if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, - req->timeout))) { - reqsk_free(req); - dst_release(dst); - return 0; - } - + if (!want_cookie && + unlikely(!inet_csk_reqsk_queue_hash_add(sk, req))) { + reqsk_free(req); + dst_release(dst); + return 0; } af_ops->send_synack(sk, dst, &fl, req, &foc, !want_cookie ? TCP_SYNACK_NORMAL : -- cgit v1.2.3 From 6fbf648d5cc48ebf250f32a8793a95a2596c0d50 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 6 Nov 2025 00:32:42 +0000 Subject: tcp: Remove redundant init for req->num_timeout. Commit 5903123f662e ("tcp: Use BPF timeout setting for SYN ACK RTO") introduced req->timeout and initialised it in 3 places: 1. reqsk_alloc() sets 0 2. inet_reqsk_alloc() sets TCP_TIMEOUT_INIT 3. tcp_conn_request() sets tcp_timeout_init() 1. has been always redundant as 2. overwrites it immediately. 2. was necessary for TFO SYN+ACK but is no longer needed after commit 8ea731d4c2ce ("tcp: Make SYN ACK RTO tunable by BPF programs with TFO"). 3. was moved to reqsk_queue_hash_req() in the previous patch. Now, we always initialise req->timeout just before scheduling the SYN+ACK timer: * For non-TFO SYN+ACK : reqsk_queue_hash_req() * For TFO SYN+ACK : tcp_fastopen_create_child() Let's remove the redundant initialisation of req->timeout in reqsk_alloc() and inet_reqsk_alloc(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251106003357.273403-4-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/inet_connection_sock.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d9c674403eb0..2bfe7af51bbb 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -885,7 +885,6 @@ reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener, sk_tx_queue_clear(req_to_sk(req)); req->saved_syn = NULL; req->syncookie = 0; - req->timeout = 0; req->num_timeout = 0; req->num_retrans = 0; req->sk = NULL; @@ -913,7 +912,6 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, ireq->ireq_state = TCP_NEW_SYN_RECV; write_pnet(&ireq->ireq_net, sock_net(sk_listener)); ireq->ireq_family = sk_listener->sk_family; - req->timeout = TCP_TIMEOUT_INIT; } return req; -- cgit v1.2.3 From 207ce0f6bc131812c96cf4f6db328af5396cebac Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 6 Nov 2025 00:32:43 +0000 Subject: tcp: Remove timeout arg from reqsk_timeout(). reqsk_timeout() is always called with @timeout being TCP_RTO_MAX. Let's remove the arg. As a prep for the next patch, reqsk_timeout() is moved to tcp.h and renamed to tcp_reqsk_timeout(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251106003357.273403-5-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 8 -------- include/net/tcp.h | 7 +++++++ net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/tcp_minisocks.c | 5 +++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 90a99a2fc804..fd40af2221b9 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -290,14 +290,6 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req); void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req); -static inline unsigned long -reqsk_timeout(struct request_sock *req, unsigned long max_timeout) -{ - u64 timeout = (u64)req->timeout << req->num_timeout; - - return (unsigned long)min_t(u64, timeout, max_timeout); -} - void inet_csk_destroy_sock(struct sock *sk); void inet_csk_prepare_for_destroy_sock(struct sock *sk); void inet_csk_prepare_forced_close(struct sock *sk); diff --git a/include/net/tcp.h b/include/net/tcp.h index 0aa1f07d036a..0c7274ac7ed5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -841,6 +841,13 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); } +static inline unsigned long tcp_reqsk_timeout(struct request_sock *req) +{ + u64 timeout = (u64)req->timeout << req->num_timeout; + + return (unsigned long)min_t(u64, timeout, TCP_RTO_MAX); +} + u32 tcp_delack_max(const struct sock *sk); /* Compute the actual rto_min value */ diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 2bfe7af51bbb..b4eae731c9ba 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1105,7 +1105,7 @@ static void reqsk_timer_handler(struct timer_list *t) inet_rsk(req)->acked)) { if (req->num_timeout++ == 0) atomic_dec(&queue->young); - mod_timer(&req->rsk_timer, jiffies + reqsk_timeout(req, TCP_RTO_MAX)); + mod_timer(&req->rsk_timer, jiffies + tcp_reqsk_timeout(req)); if (!nreq) return; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ded2cf1f6006..d8f4d813e8dd 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -714,7 +714,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * it can be estimated (approximately) * from another data. */ - tmp_opt.ts_recent_stamp = ktime_get_seconds() - reqsk_timeout(req, TCP_RTO_MAX) / HZ; + tmp_opt.ts_recent_stamp = ktime_get_seconds() - + tcp_reqsk_timeout(req) / HZ; paws_reject = tcp_paws_reject(&tmp_opt, th->rst); } } @@ -753,7 +754,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, !tcp_rtx_synack(sk, req)) { unsigned long expires = jiffies; - expires += reqsk_timeout(req, TCP_RTO_MAX); + expires += tcp_reqsk_timeout(req); if (!fastopen) mod_timer_pending(&req->rsk_timer, expires); else -- cgit v1.2.3 From 1e9d3005e02cba82047d49f859982fc73b9a100b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 6 Nov 2025 00:32:44 +0000 Subject: tcp: Apply max RTO to non-TFO SYN+ACK. Since commit 54a378f43425 ("tcp: add the ability to control max RTO"), TFO SYN+ACK RTO is capped by the TFO full sk's inet_csk(sk)->icsk_rto_max. The value is inherited from the parent listener. Let's apply the same cap to non-TFO SYN+ACK. Note that req->rsk_listener is always non-NULL when we call tcp_reqsk_timeout() in reqsk_timer_handler() or tcp_check_req(). It could be NULL for SYN cookie req, but we do not use req->timeout then. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251106003357.273403-6-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 0c7274ac7ed5..4833ec7903ec 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -845,7 +845,8 @@ static inline unsigned long tcp_reqsk_timeout(struct request_sock *req) { u64 timeout = (u64)req->timeout << req->num_timeout; - return (unsigned long)min_t(u64, timeout, TCP_RTO_MAX); + return (unsigned long)min_t(u64, timeout, + tcp_rto_max(req->rsk_listener)); } u32 tcp_delack_max(const struct sock *sk); -- cgit v1.2.3 From ffc56c90819e86d3a8c4eff6f831317d1c1476b6 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 6 Nov 2025 00:32:45 +0000 Subject: selftest: packetdrill: Add max RTO test for SYN+ACK. This script sets net.ipv4.tcp_rto_max_ms to 1000 and checks if SYN+ACK RTO is capped at 1s for TFO and non-TFO. Without the previous patch, the max RTO is applied to TFO SYN+ACK only, and non-TFO SYN+ACK RTO increases exponentially. # selftests: net/packetdrill: tcp_rto_synack_rto_max.pkt # TAP version 13 # 1..2 # tcp_rto_synack_rto_max.pkt:46: error handling packet: timing error: expected outbound packet at 5.091936 sec but happened at 6.107826 sec; tolerance 0.127974 sec # script packet: 5.091936 S. 0:0(0) ack 1 # actual packet: 6.107826 S. 0:0(0) ack 1 win 65535 # not ok 1 ipv4 # tcp_rto_synack_rto_max.pkt:46: error handling packet: timing error: expected outbound packet at 5.075901 sec but happened at 6.091841 sec; tolerance 0.127976 sec # script packet: 5.075901 S. 0:0(0) ack 1 # actual packet: 6.091841 S. 0:0(0) ack 1 win 65535 # not ok 2 ipv6 # # Totals: pass:0 fail:2 xfail:0 xpass:0 skip:0 error:0 not ok 49 selftests: net/packetdrill: tcp_rto_synack_rto_max.pkt # exit=1 With the previous patch, all SYN+ACKs are retransmitted after 1s. # selftests: net/packetdrill: tcp_rto_synack_rto_max.pkt # TAP version 13 # 1..2 # ok 1 ipv4 # ok 2 ipv6 # # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:0 error:0 ok 49 selftests: net/packetdrill: tcp_rto_synack_rto_max.pkt Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251106003357.273403-7-kuniyu@google.com Signed-off-by: Jakub Kicinski --- .../net/packetdrill/tcp_rto_synack_rto_max.pkt | 54 ++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt diff --git a/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt new file mode 100644 index 000000000000..47550df124ce --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Test SYN+ACK RTX with 1s RTO. +// +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_rto_max_ms=1000` + +// +// Test 1: TFO SYN+ACK +// + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 1000 + +0 > S. 0:0(0) ack 11 + +// RTO must be capped to 1s + +1 > S. 0:0(0) ack 11 + +1 > S. 0:0(0) ack 11 + +1 > S. 0:0(0) ack 11 + + +0 < . 11:11(0) ack 1 win 1000 + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 close(4) = 0 + +0 close(3) = 0 + + +// +// Test 2: non-TFO SYN+ACK +// + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 1000 + +0 > S. 0:0(0) ack 1 + +// RTO must be capped to 1s + +1 > S. 0:0(0) ack 1 + +1 > S. 0:0(0) ack 1 + +1 > S. 0:0(0) ack 1 + + +0 < . 1:1(0) ack 1 win 1000 + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + + +0 close(4) = 0 + +0 close(3) = 0 -- cgit v1.2.3