From 1eeb5043573981f3a1278876515851b7f6b1df1b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Mar 2024 15:32:03 +0000 Subject: tcp/dccp: do not care about families in inet_twsk_purge() We lost ability to unload ipv6 module a long time ago. Instead of calling expensive inet_twsk_purge() twice, we can handle all families in one round. Also remove an extra line added in my prior patch, per Kuniyuki Iwashima feedback. Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/netdev/20240327192934.6843-1-kuniyu@amazon.com/ Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240329153203.345203-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_minisocks.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp_minisocks.c') diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f0761f060a83..5b21a07ddf9a 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -388,7 +388,7 @@ void tcp_twsk_destructor(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); -void tcp_twsk_purge(struct list_head *net_exit_list, int family) +void tcp_twsk_purge(struct list_head *net_exit_list) { bool purged_once = false; struct net *net; @@ -396,9 +396,9 @@ void tcp_twsk_purge(struct list_head *net_exit_list, int family) list_for_each_entry(net, net_exit_list, exit_list) { if (net->ipv4.tcp_death_row.hashinfo->pernet) { /* Even if tw_refcount == 1, we must clean up kernel reqsk */ - inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo, family); + inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo); } else if (!purged_once) { - inet_twsk_purge(&tcp_hashinfo, family); + inet_twsk_purge(&tcp_hashinfo); purged_once = true; } } -- cgit v1.2.3 From 41eecbd712b73f0d5dcf1152b9a1c27b1f238028 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 7 Apr 2024 09:33:22 +0000 Subject: tcp: replace TCP_SKB_CB(skb)->tcp_tw_isn with a per-cpu field TCP can transform a TIMEWAIT socket into a SYN_RECV one from a SYN packet, and the ISN of the SYNACK packet is normally generated using TIMEWAIT tw_snd_nxt : tcp_timewait_state_process() ... u32 isn = tcptw->tw_snd_nxt + 65535 + 2; if (isn == 0) isn++; TCP_SKB_CB(skb)->tcp_tw_isn = isn; return TCP_TW_SYN; This SYN packet also bypasses normal checks against listen queue being full or not. tcp_conn_request() ... __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn; ... /* TW buckets are converted to open requests without * limitations, they conserve resources and peer is * evidently real one. */ if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) { want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name); if (!want_cookie) goto drop; } This was using TCP_SKB_CB(skb)->tcp_tw_isn field in skb. Unfortunately this field has been accidentally cleared after the call to tcp_timewait_state_process() returning TCP_TW_SYN. Using a field in TCP_SKB_CB(skb) for a temporary state is overkill. Switch instead to a per-cpu variable. As a bonus, we do not have to clear tcp_tw_isn in TCP receive fast path. It is temporarily set then cleared only in the TCP_TW_SYN dance. Fixes: 4ad19de8774e ("net: tcp6: fix double call of tcp_v6_fill_cb()") Fixes: eeea10b83a13 ("tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb()") Signed-off-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/tcp.h | 10 +++++----- net/ipv4/tcp.c | 3 +++ net/ipv4/tcp_input.c | 26 ++++++++++++++++---------- net/ipv4/tcp_ipv4.c | 5 +++-- net/ipv4/tcp_minisocks.c | 4 ++-- net/ipv6/tcp_ipv6.c | 5 +++-- 6 files changed, 32 insertions(+), 21 deletions(-) (limited to 'net/ipv4/tcp_minisocks.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index fa0ab77acee2..ba6c5ae86e22 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -52,6 +52,8 @@ extern struct inet_hashinfo tcp_hashinfo; DECLARE_PER_CPU(unsigned int, tcp_orphan_count); int tcp_orphan_count_sum(void); +DECLARE_PER_CPU(u32, tcp_tw_isn); + void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER) @@ -392,7 +394,8 @@ enum tcp_tw_status { enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, - const struct tcphdr *th); + const struct tcphdr *th, + u32 *tw_isn); struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, bool fastopen, bool *lost_race); @@ -935,13 +938,10 @@ struct tcp_skb_cb { __u32 seq; /* Starting sequence number */ __u32 end_seq; /* SEQ + FIN + SYN + datalen */ union { - /* Note : tcp_tw_isn is used in input path only - * (isn chosen by tcp_timewait_state_process()) - * + /* Note : * tcp_gso_segs/size are used in write queue only, * cf tcp_skb_pcount()/tcp_skb_mss() */ - __u32 tcp_tw_isn; struct { u16 tcp_gso_segs; u16 tcp_gso_size; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 664c8ecb076b..b07aa71b24ec 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -290,6 +290,9 @@ enum { DEFINE_PER_CPU(unsigned int, tcp_orphan_count); EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count); +DEFINE_PER_CPU(u32, tcp_tw_isn); +EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn); + long sysctl_tcp_mem[3] __read_mostly; EXPORT_SYMBOL(sysctl_tcp_mem); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 48c275e6ef02..5a45a0923a1f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7097,7 +7097,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, struct sock *sk, struct sk_buff *skb) { struct tcp_fastopen_cookie foc = { .len = -1 }; - __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn; struct tcp_options_received tmp_opt; struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); @@ -7107,21 +7106,28 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, struct dst_entry *dst; struct flowi fl; u8 syncookies; + u32 isn; #ifdef CONFIG_TCP_AO const struct tcp_ao_hdr *aoh; #endif - syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); + isn = __this_cpu_read(tcp_tw_isn); + if (isn) { + /* TW buckets are converted to open requests without + * limitations, they conserve resources and peer is + * evidently real one. + */ + __this_cpu_write(tcp_tw_isn, 0); + } else { + syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); - /* TW buckets are converted to open requests without - * limitations, they conserve resources and peer is - * evidently real one. - */ - if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) { - want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name); - if (!want_cookie) - goto drop; + if (syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) { + want_cookie = tcp_syn_flood_action(sk, + rsk_ops->slab_name); + if (!want_cookie) + goto drop; + } } if (sk_acceptq_is_full(sk)) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 81e2f05c244d..1e650ec71d2f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2146,7 +2146,6 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, skb->len - th->doff * 4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); - TCP_SKB_CB(skb)->tcp_tw_isn = 0; TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); TCP_SKB_CB(skb)->sacked = 0; TCP_SKB_CB(skb)->has_rxtstamp = @@ -2168,6 +2167,7 @@ int tcp_v4_rcv(struct sk_buff *skb) bool refcounted; struct sock *sk; int ret; + u32 isn; drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; if (skb->pkt_type != PACKET_HOST) @@ -2383,7 +2383,7 @@ do_time_wait: inet_twsk_put(inet_twsk(sk)); goto csum_error; } - switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { + switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) { case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, @@ -2397,6 +2397,7 @@ do_time_wait: sk = sk2; tcp_v4_restore_cb(skb); refcounted = false; + __this_cpu_write(tcp_tw_isn, isn); goto process; } } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 5b21a07ddf9a..f53c7ada2ace 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -95,7 +95,7 @@ static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq) */ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, - const struct tcphdr *th) + const struct tcphdr *th, u32 *tw_isn) { struct tcp_options_received tmp_opt; struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); @@ -228,7 +228,7 @@ kill: u32 isn = tcptw->tw_snd_nxt + 65535 + 2; if (isn == 0) isn++; - TCP_SKB_CB(skb)->tcp_tw_isn = isn; + *tw_isn = isn; return TCP_TW_SYN; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5141f7033abd..3aa9da5c9a66 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1741,7 +1741,6 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, skb->len - th->doff*4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); - TCP_SKB_CB(skb)->tcp_tw_isn = 0; TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); TCP_SKB_CB(skb)->sacked = 0; TCP_SKB_CB(skb)->has_rxtstamp = @@ -1758,6 +1757,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) bool refcounted; struct sock *sk; int ret; + u32 isn; struct net *net = dev_net(skb->dev); drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; @@ -1967,7 +1967,7 @@ do_time_wait: goto csum_error; } - switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { + switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) { case TCP_TW_SYN: { struct sock *sk2; @@ -1985,6 +1985,7 @@ do_time_wait: sk = sk2; tcp_v6_restore_cb(skb); refcounted = false; + __this_cpu_write(tcp_tw_isn, isn); goto process; } } -- cgit v1.2.3 From c51db4ac10d57c366f9a92121e3889bfc6c324cd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Apr 2024 07:19:42 +0000 Subject: tcp: do not export tcp_twsk_purge() After commit 1eeb50435739 ("tcp/dccp: do not care about families in inet_twsk_purge()") tcp_twsk_purge() is no longer potentially called from a module. Signed-off-by: Eric Dumazet Cc: Kuniyuki Iwashima Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv4/tcp_minisocks.c') diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f53c7ada2ace..146c061145b4 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -403,7 +403,6 @@ void tcp_twsk_purge(struct list_head *net_exit_list) } } } -EXPORT_SYMBOL_GPL(tcp_twsk_purge); /* Warning : This function is called without sk_listener being locked. * Be sure to read socket fields once, as their value could change under us. -- cgit v1.2.3 From 6be49deaa09576c141002a2e6f816a1709bc2c86 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 25 Apr 2024 11:13:35 +0800 Subject: rstreason: prepare for passive reset Adjust the parameter and support passing reason of reset which is for now NOT_SPECIFIED. No functional changes. Signed-off-by: Jason Xing Acked-by: Matthieu Baerts (NGI0) Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/request_sock.h | 4 +++- net/dccp/ipv4.c | 10 ++++++---- net/dccp/ipv6.c | 10 ++++++---- net/dccp/minisocks.c | 3 ++- net/ipv4/tcp_ipv4.c | 12 +++++++----- net/ipv4/tcp_minisocks.c | 3 ++- net/ipv6/tcp_ipv6.c | 15 +++++++++------ net/mptcp/subflow.c | 8 +++++--- 8 files changed, 40 insertions(+), 25 deletions(-) (limited to 'net/ipv4/tcp_minisocks.c') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 004e651e6067..bdc737832da6 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -18,6 +18,7 @@ #include #include +#include struct request_sock; struct sk_buff; @@ -34,7 +35,8 @@ struct request_sock_ops { void (*send_ack)(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); void (*send_reset)(const struct sock *sk, - struct sk_buff *skb); + struct sk_buff *skb, + enum sk_rst_reason reason); void (*destructor)(struct request_sock *req); void (*syn_ack_timeout)(const struct request_sock *req); }; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 9fc9cea4c251..ff41bd6f99c3 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "ackvec.h" #include "ccid.h" @@ -521,7 +522,8 @@ out: return err; } -static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) +static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb, + enum sk_rst_reason reason) { int err; const struct iphdr *rxiph; @@ -706,7 +708,7 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - dccp_v4_ctl_send_reset(sk, skb); + dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); kfree_skb(skb); return 0; } @@ -869,7 +871,7 @@ lookup: if (nsk == sk) { reqsk_put(req); } else if (dccp_child_process(sk, nsk, skb)) { - dccp_v4_ctl_send_reset(sk, skb); + dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); goto discard_and_relse; } else { sock_put(sk); @@ -909,7 +911,7 @@ no_dccp_socket: if (dh->dccph_type != DCCP_PKT_RESET) { DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; - dccp_v4_ctl_send_reset(sk, skb); + dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); } discard_it: diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index c8ca703dc331..85f4b8fdbe5e 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "dccp.h" #include "ipv6.h" @@ -256,7 +257,8 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req) kfree_skb(inet_rsk(req)->pktopts); } -static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) +static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb, + enum sk_rst_reason reason) { const struct ipv6hdr *rxip6h; struct sk_buff *skb; @@ -656,7 +658,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - dccp_v6_ctl_send_reset(sk, skb); + dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); discard: if (opt_skb != NULL) __kfree_skb(opt_skb); @@ -762,7 +764,7 @@ lookup: if (nsk == sk) { reqsk_put(req); } else if (dccp_child_process(sk, nsk, skb)) { - dccp_v6_ctl_send_reset(sk, skb); + dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); goto discard_and_relse; } else { sock_put(sk); @@ -801,7 +803,7 @@ no_dccp_socket: if (dh->dccph_type != DCCP_PKT_RESET) { DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; - dccp_v6_ctl_send_reset(sk, skb); + dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); } discard_it: diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 64d805b27add..251a57cf5822 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "ackvec.h" #include "ccid.h" @@ -202,7 +203,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; drop: if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) - req->rsk_ops->send_reset(sk, skb); + req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); inet_csk_reqsk_queue_drop(sk, req); out: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e06f0cd04f7e..202f6dfa958b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include @@ -723,7 +724,8 @@ out: * Exception: precedence violation. We do not implement it in any case. */ -static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) +static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, + enum sk_rst_reason reason) { const struct tcphdr *th = tcp_hdr(skb); struct { @@ -1934,7 +1936,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - tcp_v4_send_reset(rsk, skb); + tcp_v4_send_reset(rsk, skb, SK_RST_REASON_NOT_SPECIFIED); discard: kfree_skb_reason(skb, reason); /* Be careful here. If this function gets more complicated and @@ -2285,7 +2287,7 @@ lookup: } else { drop_reason = tcp_child_process(sk, nsk, skb); if (drop_reason) { - tcp_v4_send_reset(nsk, skb); + tcp_v4_send_reset(nsk, skb, SK_RST_REASON_NOT_SPECIFIED); goto discard_and_relse; } sock_put(sk); @@ -2364,7 +2366,7 @@ csum_error: bad_packet: __TCP_INC_STATS(net, TCP_MIB_INERRS); } else { - tcp_v4_send_reset(NULL, skb); + tcp_v4_send_reset(NULL, skb, SK_RST_REASON_NOT_SPECIFIED); } discard_it: @@ -2416,7 +2418,7 @@ do_time_wait: tcp_v4_timewait_ack(sk, skb); break; case TCP_TW_RST: - tcp_v4_send_reset(sk, skb); + tcp_v4_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); inet_twsk_deschedule_put(inet_twsk(sk)); goto discard_it; case TCP_TW_SUCCESS:; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 146c061145b4..7d543569a180 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -22,6 +22,7 @@ #include #include #include +#include static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { @@ -878,7 +879,7 @@ embryonic_reset: * avoid becoming vulnerable to outside attack aiming at * resetting legit local connections. */ - req->rsk_ops->send_reset(sk, skb); + req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); } else if (fastopen) { /* received a valid RST pkt */ reqsk_fastopen_remove(sk, req, true); tcp_reset(sk, skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bb7c3caf4f85..017f6293b5f4 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include @@ -69,7 +70,8 @@ #include -static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); +static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, + enum sk_rst_reason reason); static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); @@ -1008,7 +1010,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 kfree_skb(buff); } -static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) +static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, + enum sk_rst_reason reason) { const struct tcphdr *th = tcp_hdr(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); @@ -1677,7 +1680,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - tcp_v6_send_reset(sk, skb); + tcp_v6_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); discard: if (opt_skb) __kfree_skb(opt_skb); @@ -1862,7 +1865,7 @@ lookup: } else { drop_reason = tcp_child_process(sk, nsk, skb); if (drop_reason) { - tcp_v6_send_reset(nsk, skb); + tcp_v6_send_reset(nsk, skb, SK_RST_REASON_NOT_SPECIFIED); goto discard_and_relse; } sock_put(sk); @@ -1939,7 +1942,7 @@ csum_error: bad_packet: __TCP_INC_STATS(net, TCP_MIB_INERRS); } else { - tcp_v6_send_reset(NULL, skb); + tcp_v6_send_reset(NULL, skb, SK_RST_REASON_NOT_SPECIFIED); } discard_it: @@ -1995,7 +1998,7 @@ do_time_wait: tcp_v6_timewait_ack(sk, skb); break; case TCP_TW_RST: - tcp_v6_send_reset(sk, skb); + tcp_v6_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); inet_twsk_deschedule_put(inet_twsk(sk)); goto discard_it; case TCP_TW_SUCCESS: diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index b94d1dca1094..32fe2ef36d56 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -20,6 +20,8 @@ #include #endif #include +#include + #include "protocol.h" #include "mib.h" @@ -308,7 +310,7 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk, dst_release(dst); if (!req->syncookie) - tcp_request_sock_ops.send_reset(sk, skb); + tcp_request_sock_ops.send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); return NULL; } @@ -376,7 +378,7 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk, dst_release(dst); if (!req->syncookie) - tcp6_request_sock_ops.send_reset(sk, skb); + tcp6_request_sock_ops.send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); return NULL; } #endif @@ -911,7 +913,7 @@ dispose_child: tcp_rsk(req)->drop_req = true; inet_csk_prepare_for_destroy_sock(child); tcp_done(child); - req->rsk_ops->send_reset(sk, skb); + req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); /* The last child reference will be released by the caller */ return child; -- cgit v1.2.3 From 11f46ea9814d2f0a3c7a5bc749d7619e47251f75 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 10 May 2024 20:25:02 +0800 Subject: tcp: rstreason: fully support in tcp_check_req() We're going to send an RST due to invalid syn packet which is already checked whether 1) it is in sequence, 2) it is a retransmitted skb. As RFC 793 says, if the state of socket is not CLOSED/LISTEN/SYN-SENT, then we should send an RST when receiving bad syn packet: "fourth, check the SYN bit,...If the SYN is in the window it is an error, send a reset" Signed-off-by: Jason Xing Link: https://lore.kernel.org/r/20240510122502.27850-6-kerneljasonxing@gmail.com Signed-off-by: Jakub Kicinski --- include/net/rstreason.h | 8 ++++++++ net/ipv4/tcp_minisocks.c | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'net/ipv4/tcp_minisocks.c') diff --git a/include/net/rstreason.h b/include/net/rstreason.h index 7ae5bb55559b..2575c85d7f7a 100644 --- a/include/net/rstreason.h +++ b/include/net/rstreason.h @@ -16,6 +16,7 @@ FN(TCP_OLD_ACK) \ FN(TCP_ABORT_ON_DATA) \ FN(TCP_TIMEWAIT_SOCKET) \ + FN(INVALID_SYN) \ FN(MPTCP_RST_EUNSPEC) \ FN(MPTCP_RST_EMPTCP) \ FN(MPTCP_RST_ERESOURCE) \ @@ -76,6 +77,13 @@ enum sk_rst_reason { /* Here start with the independent reasons */ /** @SK_RST_REASON_TCP_TIMEWAIT_SOCKET: happen on the timewait socket */ SK_RST_REASON_TCP_TIMEWAIT_SOCKET, + /** + * @SK_RST_REASON_INVALID_SYN: receive bad syn packet + * RFC 793 says if the state is not CLOSED/LISTEN/SYN-SENT then + * "fourth, check the SYN bit,...If the SYN is in the window it is + * an error, send a reset" + */ + SK_RST_REASON_INVALID_SYN, /* Copy from include/uapi/linux/mptcp.h. * These reset fields will not be changed since they adhere to diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 7d543569a180..b93619b2384b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -879,7 +879,7 @@ embryonic_reset: * avoid becoming vulnerable to outside attack aiming at * resetting legit local connections. */ - req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); + req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_INVALID_SYN); } else if (fastopen) { /* received a valid RST pkt */ reqsk_fastopen_remove(sk, req, true); tcp_reset(sk, skb); -- cgit v1.2.3