diff options
| author | Eric Dumazet <edumazet@google.com> | 2016-04-14 08:05:39 +0300 | 
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2016-04-15 23:45:44 +0300 | 
| commit | b3d051477cf94e9d71d6acadb8a90de15237b9c1 (patch) | |
| tree | 59009bc698f472b31b15059972e6904cd9272d32 /net | |
| parent | ac18dd9e842294377dbaf1e8d169493567a81fa1 (diff) | |
| download | linux-b3d051477cf94e9d71d6acadb8a90de15237b9c1.tar.xz | |
tcp: do not mess with listener sk_wmem_alloc
When removing sk_refcnt manipulation on synflood, I missed that
using skb_set_owner_w() was racy, if sk->sk_wmem_alloc had already
transitioned to 0.
We should hold sk_refcnt instead, but this is a big deal under attack.
(Doing so increase performance from 3.2 Mpps to 3.8 Mpps only)
In this patch, I chose to not attach a socket to syncookies skb.
Performance is now 5 Mpps instead of 3.2 Mpps.
Following patch will remove last known false sharing in
tcp_rcv_state_process()
Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
| -rw-r--r-- | net/ipv4/tcp_input.c | 7 | ||||
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 4 | ||||
| -rw-r--r-- | net/ipv4/tcp_output.c | 16 | ||||
| -rw-r--r-- | net/ipv6/tcp_ipv6.c | 4 | 
4 files changed, 20 insertions, 11 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 983f04c11177..7ea7034af83f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6327,7 +6327,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,  	}  	if (fastopen_sk) {  		af_ops->send_synack(fastopen_sk, dst, &fl, req, -				    &foc, false); +				    &foc, TCP_SYNACK_FASTOPEN);  		/* Add the child socket directly into the accept queue */  		inet_csk_reqsk_queue_add(sk, req, fastopen_sk);  		sk->sk_data_ready(sk); @@ -6337,8 +6337,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,  		tcp_rsk(req)->tfo_listener = false;  		if (!want_cookie)  			inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); -		af_ops->send_synack(sk, dst, &fl, req, -				    &foc, !want_cookie); +		af_ops->send_synack(sk, dst, &fl, req, &foc, +				    !want_cookie ? TCP_SYNACK_NORMAL : +						   TCP_SYNACK_COOKIE);  		if (want_cookie) {  			reqsk_free(req);  			return 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f4f2a0a3849d..d2a5763e5abc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -830,7 +830,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,  			      struct flowi *fl,  			      struct request_sock *req,  			      struct tcp_fastopen_cookie *foc, -				  bool attach_req) +			      enum tcp_synack_type synack_type)  {  	const struct inet_request_sock *ireq = inet_rsk(req);  	struct flowi4 fl4; @@ -841,7 +841,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,  	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)  		return -1; -	skb = tcp_make_synack(sk, dst, req, foc, attach_req); +	skb = tcp_make_synack(sk, dst, req, foc, synack_type);  	if (skb) {  		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7d2dc015cd19..6451b83d81e9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2944,7 +2944,7 @@ int tcp_send_synack(struct sock *sk)  struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,  				struct request_sock *req,  				struct tcp_fastopen_cookie *foc, -				bool attach_req) +				enum tcp_synack_type synack_type)  {  	struct inet_request_sock *ireq = inet_rsk(req);  	const struct tcp_sock *tp = tcp_sk(sk); @@ -2964,14 +2964,22 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,  	/* Reserve space for headers. */  	skb_reserve(skb, MAX_TCP_HEADER); -	if (attach_req) { +	switch (synack_type) { +	case TCP_SYNACK_NORMAL:  		skb_set_owner_w(skb, req_to_sk(req)); -	} else { +		break; +	case TCP_SYNACK_COOKIE: +		/* Under synflood, we do not attach skb to a socket, +		 * to avoid false sharing. +		 */ +		break; +	case TCP_SYNACK_FASTOPEN:  		/* sk is a const pointer, because we want to express multiple  		 * cpu might call us concurrently.  		 * sk->sk_wmem_alloc in an atomic, we can promote to rw.  		 */  		skb_set_owner_w(skb, (struct sock *)sk); +		break;  	}  	skb_dst_set(skb, dst); @@ -3516,7 +3524,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)  	int res;  	tcp_rsk(req)->txhash = net_tx_rndhash(); -	res = af_ops->send_synack(sk, NULL, &fl, req, NULL, true); +	res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL);  	if (!res) {  		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);  		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0e621bc1ae11..800265c7fd3f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -439,7 +439,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,  			      struct flowi *fl,  			      struct request_sock *req,  			      struct tcp_fastopen_cookie *foc, -			      bool attach_req) +			      enum tcp_synack_type synack_type)  {  	struct inet_request_sock *ireq = inet_rsk(req);  	struct ipv6_pinfo *np = inet6_sk(sk); @@ -452,7 +452,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,  					       IPPROTO_TCP)) == NULL)  		goto done; -	skb = tcp_make_synack(sk, dst, req, foc, attach_req); +	skb = tcp_make_synack(sk, dst, req, foc, synack_type);  	if (skb) {  		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,  | 
