diff options
| author | David S. Miller <davem@davemloft.net> | 2016-04-05 05:11:21 +0300 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2016-04-05 05:11:21 +0300 |
| commit | 15f41e2ba13a6726632e44b1180e805a61e470ad (patch) | |
| tree | 0a650d759c08cd171c4074553203b92f85e3fa29 /include | |
| parent | 43e2dfb23eb8f3698718ec1e3936c76912de1c30 (diff) | |
| parent | 4ce7e93cb3fe87db5b700050172dc41def9834b3 (diff) | |
| download | linux-15f41e2ba13a6726632e44b1180e805a61e470ad.tar.xz | |
Merge branch 'tcp-udp-misc'
Eric Dumazet says:
====================
net: various udp/tcp changes
First round of patches for linux-4.7
Add a generic facility for sockets to be freed after an RCU grace
period, if they need to.
Then UDP stack is changed to no longer use SLAB_DESTROY_BY_RCU,
in order to speedup rx processing for traffic encapsulated in UDP.
It gives a 17 % speedup for normal UDP reception in stress conditions.
Then TCP listeners are changed to use SOCK_RCU_FREE as well
to avoid touching sk_refcnt in synflood case :
I got up to 30 % performance increase for a mono listener.
Then three patches add SK_MEMINFO_DROPS to sock_diag
and add per socket rx drops accounting to TCP.
Last patch adds rate limiting on ACK sent on behalf of SYN_RECV
to better resist to SYNFLOOD targeting one or few flows.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/udp.h | 8 | ||||
| -rw-r--r-- | include/net/inet6_hashtables.h | 12 | ||||
| -rw-r--r-- | include/net/inet_hashtables.h | 47 | ||||
| -rw-r--r-- | include/net/request_sock.h | 31 | ||||
| -rw-r--r-- | include/net/sock.h | 21 | ||||
| -rw-r--r-- | include/net/tcp.h | 13 | ||||
| -rw-r--r-- | include/net/udp.h | 2 | ||||
| -rw-r--r-- | include/uapi/linux/sock_diag.h | 1 |
8 files changed, 82 insertions, 53 deletions
diff --git a/include/linux/udp.h b/include/linux/udp.h index 87c094961bd5..32342754643a 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -98,11 +98,11 @@ static inline bool udp_get_no_check6_rx(struct sock *sk) return udp_sk(sk)->no_check6_rx; } -#define udp_portaddr_for_each_entry(__sk, node, list) \ - hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node) +#define udp_portaddr_for_each_entry(__sk, list) \ + hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node) -#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \ - hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node) +#define udp_portaddr_for_each_entry_rcu(__sk, list) \ + hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node) #define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag) diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 28332bdac333..b87becacd9d3 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -66,13 +66,15 @@ static inline struct sock *__inet6_lookup(struct net *net, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, - const int dif) + const int dif, + bool *refcounted) { struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr, sport, daddr, hnum, dif); + *refcounted = true; if (sk) return sk; - + *refcounted = false; return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport, daddr, hnum, dif); } @@ -81,17 +83,19 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be16 sport, const __be16 dport, - int iif) + int iif, + bool *refcounted) { struct sock *sk = skb_steal_sock(skb); + *refcounted = true; if (sk) return sk; return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, doff, &ipv6_hdr(skb)->saddr, sport, &ipv6_hdr(skb)->daddr, ntohs(dport), - iif); + iif, refcounted); } struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 50f635c2c536..0574493e3899 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -100,14 +100,10 @@ struct inet_bind_hashbucket { /* * Sockets can be hashed in established or listening table - * We must use different 'nulls' end-of-chain value for listening - * hash table, or we might find a socket that was closed and - * reallocated/inserted into established hash table */ -#define LISTENING_NULLS_BASE (1U << 29) struct inet_listen_hashbucket { spinlock_t lock; - struct hlist_nulls_head head; + struct hlist_head head; }; /* This is for listening sockets, thus all sockets which possess wildcards. */ @@ -280,11 +276,8 @@ static inline struct sock *inet_lookup_listener(struct net *net, net_eq(sock_net(__sk), (__net))) #endif /* 64-bit arch */ -/* - * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need +/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need * not check it for lookups anymore, thanks Alexey. -DaveM - * - * Local BH must be disabled here. */ struct sock *__inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, @@ -307,14 +300,20 @@ static inline struct sock *__inet_lookup(struct net *net, struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, - const int dif) + const int dif, + bool *refcounted) { u16 hnum = ntohs(dport); - struct sock *sk = __inet_lookup_established(net, hashinfo, - saddr, sport, daddr, hnum, dif); + struct sock *sk; - return sk ? : __inet_lookup_listener(net, hashinfo, skb, doff, saddr, - sport, daddr, hnum, dif); + sk = __inet_lookup_established(net, hashinfo, saddr, sport, + daddr, hnum, dif); + *refcounted = true; + if (sk) + return sk; + *refcounted = false; + return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, + sport, daddr, hnum, dif); } static inline struct sock *inet_lookup(struct net *net, @@ -325,12 +324,13 @@ static inline struct sock *inet_lookup(struct net *net, const int dif) { struct sock *sk; + bool refcounted; - local_bh_disable(); sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, - dport, dif); - local_bh_enable(); + dport, dif, &refcounted); + if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; return sk; } @@ -338,17 +338,20 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be16 sport, - const __be16 dport) + const __be16 dport, + bool *refcounted) { struct sock *sk = skb_steal_sock(skb); const struct iphdr *iph = ip_hdr(skb); + *refcounted = true; if (sk) return sk; - else - return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, - doff, iph->saddr, sport, - iph->daddr, dport, inet_iif(skb)); + + return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, + doff, iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + refcounted); } u32 sk_ehashfn(const struct sock *sk); diff --git a/include/net/request_sock.h b/include/net/request_sock.h index f49759decb28..6ebe13eb1c4c 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -85,24 +85,23 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, struct request_sock *req; req = kmem_cache_alloc(ops->slab, GFP_ATOMIC | __GFP_NOWARN); - - if (req) { - req->rsk_ops = ops; - if (attach_listener) { - sock_hold(sk_listener); - req->rsk_listener = sk_listener; - } else { - req->rsk_listener = NULL; + if (!req) + return NULL; + req->rsk_listener = NULL; + if (attach_listener) { + if (unlikely(!atomic_inc_not_zero(&sk_listener->sk_refcnt))) { + kmem_cache_free(ops->slab, req); + return NULL; } - req_to_sk(req)->sk_prot = sk_listener->sk_prot; - sk_node_init(&req_to_sk(req)->sk_node); - sk_tx_queue_clear(req_to_sk(req)); - req->saved_syn = NULL; - /* Following is temporary. It is coupled with debugging - * helpers in reqsk_put() & reqsk_free() - */ - atomic_set(&req->rsk_refcnt, 0); + req->rsk_listener = sk_listener; } + req->rsk_ops = ops; + req_to_sk(req)->sk_prot = sk_listener->sk_prot; + sk_node_init(&req_to_sk(req)->sk_node); + sk_tx_queue_clear(req_to_sk(req)); + req->saved_syn = NULL; + atomic_set(&req->rsk_refcnt, 0); + return req; } diff --git a/include/net/sock.h b/include/net/sock.h index e91b87f54f99..310c4367ea83 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -178,7 +178,7 @@ struct sock_common { int skc_bound_dev_if; union { struct hlist_node skc_bind_node; - struct hlist_nulls_node skc_portaddr_node; + struct hlist_node skc_portaddr_node; }; struct proto *skc_prot; possible_net_t skc_net; @@ -438,6 +438,7 @@ struct sock { struct sk_buff *skb); void (*sk_destruct)(struct sock *sk); struct sock_reuseport __rcu *sk_reuseport_cb; + struct rcu_head sk_rcu; }; #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) @@ -669,18 +670,18 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_for_each_entry(__sk, list, sk_bind_node) /** - * sk_nulls_for_each_entry_offset - iterate over a list at a given struct offset + * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset * @tpos: the type * to use as a loop cursor. * @pos: the &struct hlist_node to use as a loop cursor. * @head: the head for your list. * @offset: offset of hlist_node within the struct. * */ -#define sk_nulls_for_each_entry_offset(tpos, pos, head, offset) \ - for (pos = (head)->first; \ - (!is_a_nulls(pos)) && \ +#define sk_for_each_entry_offset_rcu(tpos, pos, head, offset) \ + for (pos = rcu_dereference((head)->first); \ + pos != NULL && \ ({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \ - pos = pos->next) + pos = rcu_dereference(pos->next)) static inline struct user_namespace *sk_user_ns(struct sock *sk) { @@ -720,6 +721,7 @@ enum sock_flags { */ SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */ SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */ + SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */ }; #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) @@ -2010,6 +2012,13 @@ sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb) SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops); } +static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb) +{ + int segs = max_t(u16, 1, skb_shinfo(skb)->gso_segs); + + atomic_add(segs, &sk->sk_drops); +} + void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, diff --git a/include/net/tcp.h b/include/net/tcp.h index a23282996ca9..74d3ed5eb219 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1836,4 +1836,17 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb) tp->data_segs_in += segs_in; } +/* + * TCP listen path runs lockless. + * We forced "struct sock" to be const qualified to make sure + * we don't modify one of its field by mistake. + * Here, we increment sk_drops which is an atomic_t, so we can safely + * make sock writable again. + */ +static inline void tcp_listendrop(const struct sock *sk) +{ + atomic_inc(&((struct sock *)sk)->sk_drops); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); +} + #endif /* _TCP_H */ diff --git a/include/net/udp.h b/include/net/udp.h index 92927f729ac8..d870ec1611c4 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -59,7 +59,7 @@ struct udp_skb_cb { * @lock: spinlock protecting changes to head/count */ struct udp_hslot { - struct hlist_nulls_head head; + struct hlist_head head; int count; spinlock_t lock; } __attribute__((aligned(2 * sizeof(long)))); diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h index bae2d80034d4..7ff505d8a47b 100644 --- a/include/uapi/linux/sock_diag.h +++ b/include/uapi/linux/sock_diag.h @@ -20,6 +20,7 @@ enum { SK_MEMINFO_WMEM_QUEUED, SK_MEMINFO_OPTMEM, SK_MEMINFO_BACKLOG, + SK_MEMINFO_DROPS, SK_MEMINFO_VARS, }; |
