diff options
author | Eric Dumazet <edumazet@google.com> | 2016-04-01 18:52:17 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-04-05 05:11:20 +0300 |
commit | 3b24d854cb35383c30642116e5992fd619bdc9bc (patch) | |
tree | a2a0c8c8ad3f7f974d6a9b11b17865edfb62d6d1 /net/ipv4/tcp_ipv4.c | |
parent | 3a5d1c0e7cb5ba91aabbd7e28626e3cc925f8093 (diff) | |
download | linux-3b24d854cb35383c30642116e5992fd619bdc9bc.tar.xz |
tcp/dccp: do not touch listener sk_refcnt under synflood
When a SYNFLOOD targets a non SO_REUSEPORT listener, multiple
cpus contend on sk->sk_refcnt and sk->sk_wmem_alloc changes.
By letting listeners use SOCK_RCU_FREE infrastructure,
we can relax TCP_LISTEN lookup rules and avoid touching sk_refcnt
Note that we still use SLAB_DESTROY_BY_RCU rules for other sockets,
only listeners are impacted by this change.
Peak performance under SYNFLOOD is increased by ~33% :
On my test machine, I could process 3.2 Mpps instead of 2.4 Mpps
Most consuming functions are now skb_set_owner_w() and sock_wfree()
contending on sk->sk_wmem_alloc when cooking SYNACK and freeing them.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 66 |
1 files changed, 33 insertions, 33 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ad450509029b..e5f924b29946 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -628,6 +628,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); #ifdef CONFIG_TCP_MD5SIG + rcu_read_lock(); hash_location = tcp_parse_md5sig_option(th); if (sk && sk_fullsock(sk)) { key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *) @@ -646,16 +647,18 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) ntohs(th->source), inet_iif(skb)); /* don't send rst if it can't find key */ if (!sk1) - return; - rcu_read_lock(); + goto out; + key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *) &ip_hdr(skb)->saddr, AF_INET); if (!key) - goto release_sk1; + goto out; + genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) - goto release_sk1; + goto out; + } if (key) { @@ -698,11 +701,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); #ifdef CONFIG_TCP_MD5SIG -release_sk1: - if (sk1) { - rcu_read_unlock(); - sock_put(sk1); - } +out: + rcu_read_unlock(); #endif } @@ -1538,11 +1538,12 @@ EXPORT_SYMBOL(tcp_prequeue); int tcp_v4_rcv(struct sk_buff *skb) { + struct net *net = dev_net(skb->dev); const struct iphdr *iph; const struct tcphdr *th; + bool refcounted; struct sock *sk; int ret; - struct net *net = dev_net(skb->dev); if (skb->pkt_type != PACKET_HOST) goto discard_it; @@ -1588,7 +1589,7 @@ int tcp_v4_rcv(struct sk_buff *skb) lookup: sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, - th->dest); + th->dest, &refcounted); if (!sk) goto no_tcp_socket; @@ -1609,7 +1610,11 @@ process: inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } + /* We own a reference on the listener, increase it again + * as we might lose it too soon. + */ sock_hold(sk); + refcounted = true; nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); @@ -1665,7 +1670,8 @@ process: bh_unlock_sock(sk); put_and_return: - sock_put(sk); + if (refcounted) + sock_put(sk); return ret; @@ -1688,7 +1694,8 @@ discard_it: return 0; discard_and_relse: - sock_put(sk); + if (refcounted) + sock_put(sk); goto discard_it; do_time_wait: @@ -1712,6 +1719,7 @@ do_time_wait: if (sk2) { inet_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; + refcounted = false; goto process; } /* Fall through to ACK */ @@ -1845,17 +1853,17 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); */ static void *listening_get_next(struct seq_file *seq, void *cur) { - struct inet_connection_sock *icsk; - struct hlist_nulls_node *node; - struct sock *sk = cur; - struct inet_listen_hashbucket *ilb; struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); + struct inet_listen_hashbucket *ilb; + struct inet_connection_sock *icsk; + struct sock *sk = cur; if (!sk) { +get_head: ilb = &tcp_hashinfo.listening_hash[st->bucket]; spin_lock_bh(&ilb->lock); - sk = sk_nulls_head(&ilb->head); + sk = sk_head(&ilb->head); st->offset = 0; goto get_sk; } @@ -1863,28 +1871,20 @@ static void *listening_get_next(struct seq_file *seq, void *cur) ++st->num; ++st->offset; - sk = sk_nulls_next(sk); + sk = sk_next(sk); get_sk: - sk_nulls_for_each_from(sk, node) { + sk_for_each_from(sk) { if (!net_eq(sock_net(sk), net)) continue; - if (sk->sk_family == st->family) { - cur = sk; - goto out; - } + if (sk->sk_family == st->family) + return sk; icsk = inet_csk(sk); } spin_unlock_bh(&ilb->lock); st->offset = 0; - if (++st->bucket < INET_LHTABLE_SIZE) { - ilb = &tcp_hashinfo.listening_hash[st->bucket]; - spin_lock_bh(&ilb->lock); - sk = sk_nulls_head(&ilb->head); - goto get_sk; - } - cur = NULL; -out: - return cur; + if (++st->bucket < INET_LHTABLE_SIZE) + goto get_head; + return NULL; } static void *listening_get_idx(struct seq_file *seq, loff_t *pos) |