From 61d2bcae99f66a640b3dd9632180209143fb5512 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 1 Feb 2016 21:03:07 -0800 Subject: tcp: fastopen: accept data/FIN present in SYNACK message RFC 7413 (TCP Fast Open) 4.2.2 states that the SYNACK message MAY include data and/or FIN This patch adds support for the client side : If we receive a SYNACK with payload or FIN, queue the skb instead of ignoring it. Since we already support the same for SYN, we refactor the existing code and reuse it. Note we need to clone the skb, so this operation might fail under memory pressure. Sara Dickinson pointed out FreeBSD server Fast Open implementation was planned to generate such SYNACK in the future. The server side might be implemented on linux later. Reported-by: Sara Dickinson Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index f6f8f032c73e..27f4c733116d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1437,6 +1437,7 @@ void tcp_free_fastopen_req(struct tcp_sock *tp); extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; int tcp_fastopen_reset_cipher(void *key, unsigned int len); +void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct tcp_fastopen_cookie *foc, -- cgit v1.2.3 From e3e17b773bfe45462b7f3fae20c550025975cb13 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 6 Feb 2016 11:16:28 -0800 Subject: tcp: fastopen: call tcp_fin() if FIN present in SYNACK When we acknowledge a FIN, it is not enough to ack the sequence number and queue the skb into receive queue. We also have to call tcp_fin() to properly update socket state and send proper poll() notifications. It seems we also had the problem if we received a SYN packet with the FIN flag set, but it does not seem an urgent issue, as no known implementation can do that. Fixes: 61d2bcae99f6 ("tcp: fastopen: accept data/FIN present in SYNACK message") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_fastopen.c | 3 +++ net/ipv4/tcp_input.c | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 27f4c733116d..479d535609fd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -568,6 +568,7 @@ void tcp_rearm_rto(struct sock *sk); void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); void tcp_reset(struct sock *sk); void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb); +void tcp_fin(struct sock *sk); /* tcp_timer.c */ void tcp_init_xmit_timers(struct sock *); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 6a6e11e54bae..fdb286ddba04 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -154,6 +154,9 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) * as we certainly are not changing upper 32bit value (0) */ tp->bytes_received = skb->len; + + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + tcp_fin(sk); } static struct sock *tcp_fastopen_create_child(struct sock *sk, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4add3eb40e58..8194a250a01e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3995,7 +3995,7 @@ void tcp_reset(struct sock *sk) * * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT. */ -static void tcp_fin(struct sock *sk) +void tcp_fin(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); -- cgit v1.2.3 From 6fa251663069e05daadd1666cbf3b658bf840ea4 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:49 +0200 Subject: ipv4: Namespaceify tcp syn retries sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 ++ include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 18 +++++++++--------- net/ipv4/tcp.c | 3 ++- net/ipv4/tcp_ipv4.c | 2 ++ net/ipv4/tcp_timer.c | 4 ++-- 6 files changed, 17 insertions(+), 13 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2b7907a35568..b7b5bd64df35 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -98,6 +98,8 @@ struct netns_ipv4 { int sysctl_tcp_keepalive_probes; int sysctl_tcp_keepalive_intvl; + int sysctl_tcp_syn_retries; + struct ping_group_range ping_group_range; atomic_t dev_addr_genid; diff --git a/include/net/tcp.h b/include/net/tcp.h index 479d535609fd..825485c7cc1a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_syn_retries; extern int sysctl_tcp_synack_retries; extern int sysctl_tcp_retries1; extern int sysctl_tcp_retries2; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4d367b4139a3..ae9dd8823134 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -291,15 +291,6 @@ static struct ctl_table ipv4_table[] = { .extra1 = &ip_ttl_min, .extra2 = &ip_ttl_max, }, - { - .procname = "tcp_syn_retries", - .data = &sysctl_tcp_syn_retries, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &tcp_syn_retries_min, - .extra2 = &tcp_syn_retries_max - }, { .procname = "tcp_synack_retries", .data = &sysctl_tcp_synack_retries, @@ -960,6 +951,15 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "tcp_syn_retries", + .data = &init_net.ipv4.sysctl_tcp_syn_retries, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_syn_retries_min, + .extra2 = &tcp_syn_retries_max + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c5075779e017..3dbb3637bb4b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2731,6 +2731,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); int val, len; if (get_user(len, optlen)) @@ -2765,7 +2766,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = keepalive_probes(tp); break; case TCP_SYNCNT: - val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; + val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; break; case TCP_LINGER2: val = tp->linger2; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a4d523709ab3..f7464852aaa1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2388,6 +2388,8 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; + net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES; + return 0; fail: tcp_sk_exit(net); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index a4730a28b220..c5d51f530c65 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -22,7 +22,6 @@ #include #include -int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; @@ -157,6 +156,7 @@ static int tcp_write_timeout(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); int retry_until; bool do_reset, syn_set = false; @@ -169,7 +169,7 @@ static int tcp_write_timeout(struct sock *sk) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); } - retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; + retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; syn_set = true; } else { if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) { -- cgit v1.2.3 From 7c083ecb3ba4583a625d5ff9655d1a819e374493 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:50 +0200 Subject: ipv4: Namespaceify tcp synack retries sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/inet_connection_sock.c | 7 ++----- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_timer.c | 3 +-- 6 files changed, 12 insertions(+), 15 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index b7b5bd64df35..9e83084ab8c1 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -99,6 +99,7 @@ struct netns_ipv4 { int sysctl_tcp_keepalive_intvl; int sysctl_tcp_syn_retries; + int sysctl_tcp_synack_retries; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index 825485c7cc1a..05659e860039 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_synack_retries; extern int sysctl_tcp_retries1; extern int sysctl_tcp_retries2; extern int sysctl_tcp_orphan_retries; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 46b9c887bede..9b17c1792dce 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -482,10 +482,6 @@ EXPORT_SYMBOL_GPL(inet_csk_route_child_sock); #define AF_INET_FAMILY(fam) true #endif -/* Only thing we need from tcp.h */ -extern int sysctl_tcp_synack_retries; - - /* Decide when to expire the request and when to resend SYN-ACK */ static inline void syn_ack_recalc(struct request_sock *req, const int thresh, const int max_retries, @@ -557,6 +553,7 @@ static void reqsk_timer_handler(unsigned long data) { struct request_sock *req = (struct request_sock *)data; struct sock *sk_listener = req->rsk_listener; + struct net *net = sock_net(sk_listener); struct inet_connection_sock *icsk = inet_csk(sk_listener); struct request_sock_queue *queue = &icsk->icsk_accept_queue; int qlen, expire = 0, resend = 0; @@ -566,7 +563,7 @@ static void reqsk_timer_handler(unsigned long data) if (sk_state_load(sk_listener) != TCP_LISTEN) goto drop; - max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; + max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries; thresh = max_retries; /* Normally all the openreqs are young and become mature * (i.e. converted to established socket) for first timeout. diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ae9dd8823134..bb682e36d8b7 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -291,13 +291,6 @@ static struct ctl_table ipv4_table[] = { .extra1 = &ip_ttl_min, .extra2 = &ip_ttl_max, }, - { - .procname = "tcp_synack_retries", - .data = &sysctl_tcp_synack_retries, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_max_orphans", .data = &sysctl_tcp_max_orphans, @@ -960,6 +953,13 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = &tcp_syn_retries_min, .extra2 = &tcp_syn_retries_max }, + { + .procname = "tcp_synack_retries", + .data = &init_net.ipv4.sysctl_tcp_synack_retries, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f7464852aaa1..3146279695b9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2389,6 +2389,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES; + net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; return 0; fail: diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index c5d51f530c65..ca25fdf0c525 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -22,7 +22,6 @@ #include #include -int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; int sysctl_tcp_orphan_retries __read_mostly; @@ -332,7 +331,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); int max_retries = icsk->icsk_syn_retries ? : - sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ + sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ struct request_sock *req; req = tcp_sk(sk)->fastopen_rsk; -- cgit v1.2.3 From 12ed8244ed8b31b023ea6d2851fd8b15f2999e9b Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:51 +0200 Subject: ipv4: Namespaceify tcp syncookies sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 ++ include/net/tcp.h | 1 - net/ipv4/syncookies.c | 4 +--- net/ipv4/sysctl_net_ipv4.c | 18 +++++++++--------- net/ipv4/tcp_input.c | 10 ++++++---- net/ipv4/tcp_ipv4.c | 3 ++- net/ipv4/tcp_minisocks.c | 3 --- net/ipv6/syncookies.c | 2 +- 8 files changed, 21 insertions(+), 22 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 9e83084ab8c1..ac000fccdf0f 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -101,6 +101,8 @@ struct netns_ipv4 { int sysctl_tcp_syn_retries; int sysctl_tcp_synack_retries; + int sysctl_tcp_syncookies; + struct ping_group_range ping_group_range; atomic_t dev_addr_genid; diff --git a/include/net/tcp.h b/include/net/tcp.h index 05659e860039..1fb23b70d237 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -243,7 +243,6 @@ extern int sysctl_tcp_fin_timeout; extern int sysctl_tcp_retries1; extern int sysctl_tcp_retries2; extern int sysctl_tcp_orphan_retries; -extern int sysctl_tcp_syncookies; extern int sysctl_tcp_fastopen; extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 643a86c49020..ba0dcffada3b 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -19,8 +19,6 @@ #include #include -extern int sysctl_tcp_syncookies; - static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly; #define COOKIEBITS 24 /* Upper bits store count */ @@ -307,7 +305,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) __u8 rcv_wscale; struct flowi4 fl4; - if (!sysctl_tcp_syncookies || !th->ack || th->rst) + if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk)) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index bb682e36d8b7..d80142570a8d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -341,15 +341,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, -#ifdef CONFIG_SYN_COOKIES - { - .procname = "tcp_syncookies", - .data = &sysctl_tcp_syncookies, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, -#endif { .procname = "tcp_fastopen", .data = &sysctl_tcp_fastopen, @@ -960,6 +951,15 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, +#ifdef CONFIG_SYN_COOKIES + { + .procname = "tcp_syncookies", + .data = &init_net.ipv4.sysctl_tcp_syncookies, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, +#endif { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 596c1cb6759a..b17aba42a368 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6163,9 +6163,10 @@ static bool tcp_syn_flood_action(const struct sock *sk, struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; const char *msg = "Dropping request"; bool want_cookie = false; + struct net *net = sock_net(sk); #ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) { + if (net->ipv4.sysctl_tcp_syncookies) { msg = "Sending cookies"; want_cookie = true; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); @@ -6174,7 +6175,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); if (!queue->synflood_warned && - sysctl_tcp_syncookies != 2 && + net->ipv4.sysctl_tcp_syncookies != 2 && xchg(&queue->synflood_warned, 1) == 0) pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", proto, ntohs(tcp_hdr(skb)->dest), msg); @@ -6207,6 +6208,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn; struct tcp_options_received tmp_opt; struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); struct sock *fastopen_sk = NULL; struct dst_entry *dst = NULL; struct request_sock *req; @@ -6217,7 +6219,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, * limitations, they conserve resources and peer is * evidently real one. */ - if ((sysctl_tcp_syncookies == 2 || + if ((net->ipv4.sysctl_tcp_syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) { want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name); if (!want_cookie) @@ -6283,7 +6285,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, } } /* Kill the following clause, if you dislike this way. */ - else if (!sysctl_tcp_syncookies && + else if (!net->ipv4.sysctl_tcp_syncookies && (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < (sysctl_max_syn_backlog >> 2)) && !tcp_peer_is_proven(req, dst, false, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3146279695b9..98313d10a2e0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -860,7 +860,6 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } - #ifdef CONFIG_TCP_MD5SIG /* * RFC2385 MD5 checksumming requires a mapping of @@ -2391,6 +2390,8 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES; net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; + net->ipv4.sysctl_tcp_syncookies = 0; + return 0; fail: tcp_sk_exit(net); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 75632a925824..fadd8b978951 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -27,9 +27,6 @@ #include #include -int sysctl_tcp_syncookies __read_mostly = 1; -EXPORT_SYMBOL(sysctl_tcp_syncookies); - int sysctl_tcp_abort_on_overflow __read_mostly; struct inet_timewait_death_row tcp_death_row = { diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 2906ef20795e..0e393ff7f5d0 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -148,7 +148,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) struct dst_entry *dst; __u8 rcv_wscale; - if (!sysctl_tcp_syncookies || !th->ack || th->rst) + if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk)) -- cgit v1.2.3 From 1043e25ff96a1efc7bd34d11f5f32203a28a3bd7 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:52 +0200 Subject: ipv4: Namespaceify tcp reordering sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 +- include/net/tcp.h | 4 +++- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 12 ++++++------ net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_metrics.c | 3 ++- 7 files changed, 21 insertions(+), 18 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index ac000fccdf0f..eb4cd0a3c296 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -100,8 +100,8 @@ struct netns_ipv4 { int sysctl_tcp_syn_retries; int sysctl_tcp_synack_retries; - int sysctl_tcp_syncookies; + int sysctl_tcp_reordering; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index 1fb23b70d237..7e9a147cabae 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -961,9 +961,11 @@ static inline void tcp_enable_fack(struct tcp_sock *tp) */ static inline void tcp_enable_early_retrans(struct tcp_sock *tp) { + struct net *net = sock_net((struct sock *)tp); + tp->do_early_retrans = sysctl_tcp_early_retrans && sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack && - sysctl_tcp_reordering == 3; + net->ipv4.sysctl_tcp_reordering == 3; } static inline void tcp_disable_early_retrans(struct tcp_sock *tp) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d80142570a8d..7cd20570588f 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -455,13 +455,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "tcp_reordering", - .data = &sysctl_tcp_reordering, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_max_reordering", .data = &sysctl_tcp_max_reordering, @@ -960,6 +953,13 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, #endif + { + .procname = "tcp_reordering", + .data = &init_net.ipv4.sysctl_tcp_reordering, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3dbb3637bb4b..f4db6b04cdb4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -406,7 +406,7 @@ void tcp_init_sock(struct sock *sk) tp->mss_cache = TCP_MSS_DEFAULT; u64_stats_init(&tp->syncp); - tp->reordering = sysctl_tcp_reordering; + tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; tcp_enable_early_retrans(tp); tcp_assign_congestion_control(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b17aba42a368..5ee6fe0d152d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -80,9 +80,7 @@ int sysctl_tcp_timestamps __read_mostly = 1; int sysctl_tcp_window_scaling __read_mostly = 1; int sysctl_tcp_sack __read_mostly = 1; int sysctl_tcp_fack __read_mostly = 1; -int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; int sysctl_tcp_max_reordering __read_mostly = 300; -EXPORT_SYMBOL(sysctl_tcp_reordering); int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 1; @@ -1883,6 +1881,7 @@ void tcp_enter_loss(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); struct sk_buff *skb; bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; bool is_reneg; /* is receiver reneging on SACKs? */ @@ -1933,9 +1932,9 @@ void tcp_enter_loss(struct sock *sk) * suggests that the degree of reordering is over-estimated. */ if (icsk->icsk_ca_state <= TCP_CA_Disorder && - tp->sacked_out >= sysctl_tcp_reordering) + tp->sacked_out >= net->ipv4.sysctl_tcp_reordering) tp->reordering = min_t(unsigned int, tp->reordering, - sysctl_tcp_reordering); + net->ipv4.sysctl_tcp_reordering); tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; tcp_ecn_queue_cwr(tp); @@ -2119,6 +2118,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) { struct tcp_sock *tp = tcp_sk(sk); __u32 packets_out; + int tcp_reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; /* Trick#1: The loss is proven. */ if (tp->lost_out) @@ -2133,7 +2133,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) */ packets_out = tp->packets_out; if (packets_out <= tp->reordering && - tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) && + tp->sacked_out >= max_t(__u32, packets_out/2, tcp_reordering) && !tcp_may_send_now(sk)) { /* We have nothing to send. This connection is limited * either by receiver window or by application. @@ -3317,7 +3317,7 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) * new SACK or ECE mark may first advance cwnd here and later reduce * cwnd in tcp_fastretrans_alert() based on more states. */ - if (tcp_sk(sk)->reordering > sysctl_tcp_reordering) + if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering) return flag & FLAG_FORWARD_PROGRESS; return flag & FLAG_DATA_ACKED; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 98313d10a2e0..10dfc8b5c0f8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2389,8 +2389,8 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES; net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; - net->ipv4.sysctl_tcp_syncookies = 0; + net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH; return 0; fail: diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index c8cbc2b4b792..c26241f3057b 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -369,6 +369,7 @@ void tcp_update_metrics(struct sock *sk) const struct inet_connection_sock *icsk = inet_csk(sk); struct dst_entry *dst = __sk_dst_get(sk); struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); struct tcp_metrics_block *tm; unsigned long rtt; u32 val; @@ -473,7 +474,7 @@ void tcp_update_metrics(struct sock *sk) if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) { val = tcp_metric_get(tm, TCP_METRIC_REORDERING); if (val < tp->reordering && - tp->reordering != sysctl_tcp_reordering) + tp->reordering != net->ipv4.sysctl_tcp_reordering) tcp_metric_set(tm, TCP_METRIC_REORDERING, tp->reordering); } -- cgit v1.2.3 From ae5c3f406cffe15ffd2aa544961b7cd027468d46 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:53 +0200 Subject: ipv4: Namespaceify tcp_retries1 sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 16 ++++++++-------- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_timer.c | 8 ++++---- 5 files changed, 14 insertions(+), 13 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index eb4cd0a3c296..dee6ba647461 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -102,6 +102,7 @@ struct netns_ipv4 { int sysctl_tcp_synack_retries; int sysctl_tcp_syncookies; int sysctl_tcp_reordering; + int sysctl_tcp_retries1; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index 7e9a147cabae..da96b9af3e5f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_retries1; extern int sysctl_tcp_retries2; extern int sysctl_tcp_orphan_retries; extern int sysctl_tcp_fastopen; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 7cd20570588f..52853c6dc929 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -319,14 +319,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_retries1", - .data = &sysctl_tcp_retries1, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra2 = &tcp_retr1_max - }, { .procname = "tcp_retries2", .data = &sysctl_tcp_retries2, @@ -960,6 +952,14 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_retries1", + .data = &init_net.ipv4.sysctl_tcp_retries1, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra2 = &tcp_retr1_max + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 10dfc8b5c0f8..57fe3c6bfb30 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2391,6 +2391,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; net->ipv4.sysctl_tcp_syncookies = 0; net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH; + net->ipv4.sysctl_tcp_retries1 = TCP_RETR1; return 0; fail: diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index ca25fdf0c525..6694e33149b9 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -22,7 +22,6 @@ #include #include -int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; int sysctl_tcp_orphan_retries __read_mostly; int sysctl_tcp_thin_linear_timeouts __read_mostly; @@ -171,7 +170,7 @@ static int tcp_write_timeout(struct sock *sk) retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; syn_set = true; } else { - if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) { + if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0, 0)) { /* Some middle-boxes may black-hole Fast Open _after_ * the handshake. Therefore we conservatively disable * Fast Open on this path on recurring timeouts with @@ -180,7 +179,7 @@ static int tcp_write_timeout(struct sock *sk) if (tp->syn_data_acked && tp->bytes_acked <= tp->rx_opt.mss_clamp) { tcp_fastopen_cache_set(sk, 0, NULL, true, 0); - if (icsk->icsk_retransmits == sysctl_tcp_retries1) + if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); } @@ -359,6 +358,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk) void tcp_retransmit_timer(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); struct inet_connection_sock *icsk = inet_csk(sk); if (tp->fastopen_rsk) { @@ -489,7 +489,7 @@ out_reset_timer: icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); - if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0, 0)) + if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0, 0)) __sk_dst_reset(sk); out:; -- cgit v1.2.3 From c6214a97c86c660de4f7ddb8eed925192e646161 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:54 +0200 Subject: ipv4: Namespaceify tcp_retries2 sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_output.c | 3 ++- net/ipv4/tcp_timer.c | 5 ++--- 6 files changed, 13 insertions(+), 12 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index dee6ba647461..d92c8e5d0fbc 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -103,6 +103,7 @@ struct netns_ipv4 { int sysctl_tcp_syncookies; int sysctl_tcp_reordering; int sysctl_tcp_retries1; + int sysctl_tcp_retries2; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index da96b9af3e5f..a786cfa6301b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_retries2; extern int sysctl_tcp_orphan_retries; extern int sysctl_tcp_fastopen; extern int sysctl_tcp_retrans_collapse; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 52853c6dc929..8e339d43619c 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -319,13 +319,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_retries2", - .data = &sysctl_tcp_retries2, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_fin_timeout", .data = &sysctl_tcp_fin_timeout, @@ -960,6 +953,13 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec_minmax, .extra2 = &tcp_retr1_max }, + { + .procname = "tcp_retries2", + .data = &init_net.ipv4.sysctl_tcp_retries2, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 57fe3c6bfb30..0710e6108a5e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2392,6 +2392,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_syncookies = 0; net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH; net->ipv4.sysctl_tcp_retries1 = TCP_RETR1; + net->ipv4.sysctl_tcp_retries2 = TCP_RETR2; return 0; fail: diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index fda379cd600d..7beb3f688b7a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3476,6 +3476,7 @@ void tcp_send_probe0(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); unsigned long probe_max; int err; @@ -3489,7 +3490,7 @@ void tcp_send_probe0(struct sock *sk) } if (err <= 0) { - if (icsk->icsk_backoff < sysctl_tcp_retries2) + if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2) icsk->icsk_backoff++; icsk->icsk_probes_out++; probe_max = TCP_RTO_MAX; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 6694e33149b9..09f4e0297e56 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -22,7 +22,6 @@ #include #include -int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; int sysctl_tcp_orphan_retries __read_mostly; int sysctl_tcp_thin_linear_timeouts __read_mostly; @@ -189,7 +188,7 @@ static int tcp_write_timeout(struct sock *sk) dst_negative_advice(sk); } - retry_until = sysctl_tcp_retries2; + retry_until = net->ipv4.sysctl_tcp_retries2; if (sock_flag(sk, SOCK_DEAD)) { const bool alive = icsk->icsk_rto < TCP_RTO_MAX; @@ -303,7 +302,7 @@ static void tcp_probe_timer(struct sock *sk) (s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout) goto abort; - max_probes = sysctl_tcp_retries2; + max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2; if (sock_flag(sk, SOCK_DEAD)) { const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; -- cgit v1.2.3 From c402d9beffb6141ab2e4d2ad8be71128803a28ca Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:55 +0200 Subject: ipv4: Namespaceify tcp_orphan_retries sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_timer.c | 3 +-- 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d92c8e5d0fbc..080230321985 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -104,6 +104,7 @@ struct netns_ipv4 { int sysctl_tcp_reordering; int sysctl_tcp_retries1; int sysctl_tcp_retries2; + int sysctl_tcp_orphan_retries; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index a786cfa6301b..71f840b89c76 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_orphan_retries; extern int sysctl_tcp_fastopen; extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 8e339d43619c..b7af6336985f 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -419,13 +419,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .procname = "tcp_orphan_retries", - .data = &sysctl_tcp_orphan_retries, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_fack", .data = &sysctl_tcp_fack, @@ -960,6 +953,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_orphan_retries", + .data = &init_net.ipv4.sysctl_tcp_orphan_retries, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0710e6108a5e..1240dd62eee1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2393,6 +2393,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH; net->ipv4.sysctl_tcp_retries1 = TCP_RETR1; net->ipv4.sysctl_tcp_retries2 = TCP_RETR2; + net->ipv4.sysctl_tcp_orphan_retries = 0; return 0; fail: diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 09f4e0297e56..49bc474f8e35 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -22,7 +22,6 @@ #include #include -int sysctl_tcp_orphan_retries __read_mostly; int sysctl_tcp_thin_linear_timeouts __read_mostly; static void tcp_write_err(struct sock *sk) @@ -78,7 +77,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) /* Calculate maximal number or retries on an orphaned socket. */ static int tcp_orphan_retries(struct sock *sk, bool alive) { - int retries = sysctl_tcp_orphan_retries; /* May be zero. */ + int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */ /* We know from an ICMP that something is wrong. */ if (sk->sk_err_soft && !alive) -- cgit v1.2.3 From 1e579caa18b96f9eb18f4f5416658cd15f37c062 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:56 +0200 Subject: ipv4: Namespaceify tcp_fin_timeout sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 3 +-- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp.c | 7 +++---- net/ipv4/tcp_ipv4.c | 1 + 5 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 080230321985..de5ff4385e84 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -105,6 +105,7 @@ struct netns_ipv4 { int sysctl_tcp_retries1; int sysctl_tcp_retries2; int sysctl_tcp_orphan_retries; + int sysctl_tcp_fin_timeout; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index 71f840b89c76..3f160c2e6960 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -239,7 +239,6 @@ extern struct inet_timewait_death_row tcp_death_row; extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; -extern int sysctl_tcp_fin_timeout; extern int sysctl_tcp_fastopen; extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; @@ -1249,7 +1248,7 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) static inline int tcp_fin_time(const struct sock *sk) { - int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout; + int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout; const int rto = inet_csk(sk)->icsk_rto; if (fin_timeout < (rto << 2) - (rto >> 1)) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index b7af6336985f..8bd335a2cba8 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -319,13 +319,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_fin_timeout", - .data = &sysctl_tcp_fin_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, { .procname = "tcp_fastopen", .data = &sysctl_tcp_fastopen, @@ -960,6 +953,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_fin_timeout", + .data = &init_net.ipv4.sysctl_tcp_fin_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f4db6b04cdb4..014f18e2f7b3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -282,8 +282,6 @@ #include #include -int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; - int sysctl_tcp_min_tso_segs __read_mostly = 2; int sysctl_tcp_autocorking __read_mostly = 1; @@ -2330,6 +2328,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct net *net = sock_net(sk); int val; int err = 0; @@ -2526,7 +2525,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_LINGER2: if (val < 0) tp->linger2 = -1; - else if (val > sysctl_tcp_fin_timeout / HZ) + else if (val > net->ipv4.sysctl_tcp_fin_timeout / HZ) tp->linger2 = 0; else tp->linger2 = val * HZ; @@ -2771,7 +2770,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_LINGER2: val = tp->linger2; if (val >= 0) - val = (val ? : sysctl_tcp_fin_timeout) / HZ; + val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ; break; case TCP_DEFER_ACCEPT: val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1240dd62eee1..36c83c28d9c9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2394,6 +2394,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_retries1 = TCP_RETR1; net->ipv4.sysctl_tcp_retries2 = TCP_RETR2; net->ipv4.sysctl_tcp_orphan_retries = 0; + net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; return 0; fail: -- cgit v1.2.3 From 4979f2d9f7262b9b180bc83de8d70f7a7721c085 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:57 +0200 Subject: ipv4: Namespaceify tcp_notsent_lowat sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 4 ++-- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_output.c | 3 --- 5 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index de5ff4385e84..4d6ec3f6fafe 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -106,6 +106,7 @@ struct netns_ipv4 { int sysctl_tcp_retries2; int sysctl_tcp_orphan_retries; int sysctl_tcp_fin_timeout; + unsigned int sysctl_tcp_notsent_lowat; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index 3f160c2e6960..9b2cb0c8d876 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -267,7 +267,6 @@ extern int sysctl_tcp_thin_dupack; extern int sysctl_tcp_early_retrans; extern int sysctl_tcp_limit_output_bytes; extern int sysctl_tcp_challenge_ack_limit; -extern unsigned int sysctl_tcp_notsent_lowat; extern int sysctl_tcp_min_tso_segs; extern int sysctl_tcp_min_rtt_wlen; extern int sysctl_tcp_autocorking; @@ -1682,7 +1681,8 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) { - return tp->notsent_lowat ?: sysctl_tcp_notsent_lowat; + struct net *net = sock_net((struct sock *)tp); + return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; } static inline bool tcp_stream_memory_free(const struct sock *sk) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 8bd335a2cba8..44bb59824267 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -455,13 +455,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &one, }, - { - .procname = "tcp_notsent_lowat", - .data = &sysctl_tcp_notsent_lowat, - .maxlen = sizeof(sysctl_tcp_notsent_lowat), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "tcp_rmem", .data = &sysctl_tcp_rmem, @@ -960,6 +953,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "tcp_notsent_lowat", + .data = &init_net.ipv4.sysctl_tcp_notsent_lowat, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 36c83c28d9c9..11ae706f53a1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2395,6 +2395,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_retries2 = TCP_RETR2; net->ipv4.sysctl_tcp_orphan_retries = 0; net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; + net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; return 0; fail: diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7beb3f688b7a..7d2c7a400456 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -62,9 +62,6 @@ int sysctl_tcp_tso_win_divisor __read_mostly = 3; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; -unsigned int sysctl_tcp_notsent_lowat __read_mostly = UINT_MAX; -EXPORT_SYMBOL(sysctl_tcp_notsent_lowat); - static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); -- cgit v1.2.3 From 473bd239b808a8af5241ce9996a16d283d88ddff Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 7 Mar 2016 14:11:05 -0800 Subject: tcp: Add tcp_inq to get available receive bytes on socket Create a common kernel function to get the number of bytes available on a TCP socket. This is based on code in INQ getsockopt and we now call the function for that getsockopt. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/tcp.h | 24 ++++++++++++++++++++++++ net/ipv4/tcp.c | 15 +-------------- 2 files changed, 25 insertions(+), 14 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index e90db8546806..0302636af98c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1816,4 +1816,28 @@ static inline void skb_set_tcp_pure_ack(struct sk_buff *skb) skb->truesize = 2; } +static inline int tcp_inq(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + int answ; + + if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { + answ = 0; + } else if (sock_flag(sk, SOCK_URGINLINE) || + !tp->urg_data || + before(tp->urg_seq, tp->copied_seq) || + !before(tp->urg_seq, tp->rcv_nxt)) { + + answ = tp->rcv_nxt - tp->copied_seq; + + /* Subtract 1, if FIN was received */ + if (answ && sock_flag(sk, SOCK_DONE)) + answ--; + } else { + answ = tp->urg_seq - tp->copied_seq; + } + + return answ; +} + #endif /* _TCP_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f9faadb42485..a265f00b9df9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -556,20 +556,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) return -EINVAL; slow = lock_sock_fast(sk); - if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) - answ = 0; - else if (sock_flag(sk, SOCK_URGINLINE) || - !tp->urg_data || - before(tp->urg_seq, tp->copied_seq) || - !before(tp->urg_seq, tp->rcv_nxt)) { - - answ = tp->rcv_nxt - tp->copied_seq; - - /* Subtract 1, if FIN was received */ - if (answ && sock_flag(sk, SOCK_DONE)) - answ--; - } else - answ = tp->urg_seq - tp->copied_seq; + answ = tcp_inq(sk); unlock_sock_fast(sk, slow); break; case SIOCATMARK: -- cgit v1.2.3 From a44d6eacdaf56f74fad699af7f4925a5f5ac0e7f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 14 Mar 2016 10:52:15 -0700 Subject: tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In Per RFC4898, they count segments sent/received containing a positive length data segment (that includes retransmission segments carrying data). Unlike tcpi_segs_out/in, tcpi_data_segs_out/in excludes segments carrying no data (e.g. pure ack). The patch also updates the segs_in in tcp_fastopen_add_skb() so that segs_in >= data_segs_in property is kept. Together with retransmission data, tcpi_data_segs_out gives a better signal on the rxmit rate. v6: Rebase on the latest net-next v5: Eric pointed out that checking skb->len is still needed in tcp_fastopen_add_skb() because skb can carry a FIN without data. Hence, instead of open coding segs_in and data_segs_in, tcp_segs_in() helper is used. Comment is added to the fastopen case to explain why segs_in has to be reset and tcp_segs_in() has to be called before __skb_pull(). v4: Add comment to the changes in tcp_fastopen_add_skb() and also add remark on this case in the commit message. v3: Add const modifier to the skb parameter in tcp_segs_in() v2: Rework based on recent fix by Eric: commit a9d99ce28ed3 ("tcp: fix tcpi_segs_in after connection establishment") Signed-off-by: Martin KaFai Lau Cc: Chris Rapier Cc: Eric Dumazet Cc: Marcelo Ricardo Leitner Cc: Neal Cardwell Cc: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 6 ++++++ include/net/tcp.h | 10 ++++++++++ include/uapi/linux/tcp.h | 2 ++ net/ipv4/tcp.c | 2 ++ net/ipv4/tcp_fastopen.c | 8 ++++++++ net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/tcp_output.c | 4 +++- net/ipv6/tcp_ipv6.c | 2 +- 9 files changed, 34 insertions(+), 4 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index bcbf51da4e1e..7be9b1242354 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -158,6 +158,9 @@ struct tcp_sock { u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn * total number of segments in. */ + u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn + * total number of data segments in. + */ u32 rcv_nxt; /* What we want to receive next */ u32 copied_seq; /* Head of yet unread data */ u32 rcv_wup; /* rcv_nxt on last window update sent */ @@ -165,6 +168,9 @@ struct tcp_sock { u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut * The total number of segments sent. */ + u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut + * total number of data segments sent. + */ u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked * sum(delta(snd_una)), or how many bytes * were acked. diff --git a/include/net/tcp.h b/include/net/tcp.h index 0302636af98c..c8dbd293daae 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1840,4 +1840,14 @@ static inline int tcp_inq(struct sock *sk) return answ; } +static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb) +{ + u16 segs_in; + + segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs); + tp->segs_in += segs_in; + if (skb->len > tcp_hdrlen(skb)) + tp->data_segs_in += segs_in; +} + #endif /* _TCP_H */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index fe95446e9abf..53e8e3fe6b1b 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -199,6 +199,8 @@ struct tcp_info { __u32 tcpi_notsent_bytes; __u32 tcpi_min_rtt; + __u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */ + __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */ }; /* for TCP_MD5SIG socket option */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a265f00b9df9..992b3103ec3e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2715,6 +2715,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_notsent_bytes = max(0, notsent_bytes); info->tcpi_min_rtt = tcp_min_rtt(tp); + info->tcpi_data_segs_in = tp->data_segs_in; + info->tcpi_data_segs_out = tp->data_segs_out; } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index fdb286ddba04..4fc0061bebf4 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -140,6 +140,14 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) return; skb_dst_drop(skb); + /* segs_in has been initialized to 1 in tcp_create_openreq_child(). + * Hence, reset segs_in to 0 before calling tcp_segs_in() + * to avoid double counting. Also, tcp_segs_in() expects + * skb->len to include the tcp_hdrlen. Hence, it should + * be called before __skb_pull(). + */ + tp->segs_in = 0; + tcp_segs_in(tp, skb); __skb_pull(skb, tcp_hdrlen(skb)); skb_set_owner_r(skb, sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4c8d58dfac9b..0b02ef773705 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1650,7 +1650,7 @@ process: sk_incoming_cpu_update(sk); bh_lock_sock_nested(sk); - tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); + tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ae90e4b34bd3..acb366dd61e6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -812,7 +812,7 @@ int tcp_child_process(struct sock *parent, struct sock *child, int ret = 0; int state = child->sk_state; - tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); + tcp_segs_in(tcp_sk(child), skb); if (!sock_owned_by_user(child)) { ret = tcp_rcv_state_process(child, skb); /* Wakeup parent, send SIGIO */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7d2c7a400456..7d2dc015cd19 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1003,8 +1003,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (likely(tcb->tcp_flags & TCPHDR_ACK)) tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); - if (skb->len != tcp_header_size) + if (skb->len != tcp_header_size) { tcp_event_data_sent(tp, sk); + tp->data_segs_out += tcp_skb_pcount(skb); + } if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 33f2820181f9..9c16565b70cc 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1443,7 +1443,7 @@ process: sk_incoming_cpu_update(sk); bh_lock_sock_nested(sk); - tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); + tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) -- cgit v1.2.3