summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-04-05 05:11:21 +0300
committerDavid S. Miller <davem@davemloft.net>2016-04-05 05:11:21 +0300
commit15f41e2ba13a6726632e44b1180e805a61e470ad (patch)
tree0a650d759c08cd171c4074553203b92f85e3fa29 /include
parent43e2dfb23eb8f3698718ec1e3936c76912de1c30 (diff)
parent4ce7e93cb3fe87db5b700050172dc41def9834b3 (diff)
downloadlinux-15f41e2ba13a6726632e44b1180e805a61e470ad.tar.xz
Merge branch 'tcp-udp-misc'
Eric Dumazet says: ==================== net: various udp/tcp changes First round of patches for linux-4.7 Add a generic facility for sockets to be freed after an RCU grace period, if they need to. Then UDP stack is changed to no longer use SLAB_DESTROY_BY_RCU, in order to speedup rx processing for traffic encapsulated in UDP. It gives a 17 % speedup for normal UDP reception in stress conditions. Then TCP listeners are changed to use SOCK_RCU_FREE as well to avoid touching sk_refcnt in synflood case : I got up to 30 % performance increase for a mono listener. Then three patches add SK_MEMINFO_DROPS to sock_diag and add per socket rx drops accounting to TCP. Last patch adds rate limiting on ACK sent on behalf of SYN_RECV to better resist to SYNFLOOD targeting one or few flows. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/udp.h8
-rw-r--r--include/net/inet6_hashtables.h12
-rw-r--r--include/net/inet_hashtables.h47
-rw-r--r--include/net/request_sock.h31
-rw-r--r--include/net/sock.h21
-rw-r--r--include/net/tcp.h13
-rw-r--r--include/net/udp.h2
-rw-r--r--include/uapi/linux/sock_diag.h1
8 files changed, 82 insertions, 53 deletions
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 87c094961bd5..32342754643a 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -98,11 +98,11 @@ static inline bool udp_get_no_check6_rx(struct sock *sk)
return udp_sk(sk)->no_check6_rx;
}
-#define udp_portaddr_for_each_entry(__sk, node, list) \
- hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node)
+#define udp_portaddr_for_each_entry(__sk, list) \
+ hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node)
-#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \
- hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node)
+#define udp_portaddr_for_each_entry_rcu(__sk, list) \
+ hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node)
#define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag)
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 28332bdac333..b87becacd9d3 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -66,13 +66,15 @@ static inline struct sock *__inet6_lookup(struct net *net,
const __be16 sport,
const struct in6_addr *daddr,
const u16 hnum,
- const int dif)
+ const int dif,
+ bool *refcounted)
{
struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr,
sport, daddr, hnum, dif);
+ *refcounted = true;
if (sk)
return sk;
-
+ *refcounted = false;
return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
daddr, hnum, dif);
}
@@ -81,17 +83,19 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const __be16 sport,
const __be16 dport,
- int iif)
+ int iif,
+ bool *refcounted)
{
struct sock *sk = skb_steal_sock(skb);
+ *refcounted = true;
if (sk)
return sk;
return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
doff, &ipv6_hdr(skb)->saddr, sport,
&ipv6_hdr(skb)->daddr, ntohs(dport),
- iif);
+ iif, refcounted);
}
struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 50f635c2c536..0574493e3899 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -100,14 +100,10 @@ struct inet_bind_hashbucket {
/*
* Sockets can be hashed in established or listening table
- * We must use different 'nulls' end-of-chain value for listening
- * hash table, or we might find a socket that was closed and
- * reallocated/inserted into established hash table
*/
-#define LISTENING_NULLS_BASE (1U << 29)
struct inet_listen_hashbucket {
spinlock_t lock;
- struct hlist_nulls_head head;
+ struct hlist_head head;
};
/* This is for listening sockets, thus all sockets which possess wildcards. */
@@ -280,11 +276,8 @@ static inline struct sock *inet_lookup_listener(struct net *net,
net_eq(sock_net(__sk), (__net)))
#endif /* 64-bit arch */
-/*
- * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
+/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
* not check it for lookups anymore, thanks Alexey. -DaveM
- *
- * Local BH must be disabled here.
*/
struct sock *__inet_lookup_established(struct net *net,
struct inet_hashinfo *hashinfo,
@@ -307,14 +300,20 @@ static inline struct sock *__inet_lookup(struct net *net,
struct sk_buff *skb, int doff,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const __be16 dport,
- const int dif)
+ const int dif,
+ bool *refcounted)
{
u16 hnum = ntohs(dport);
- struct sock *sk = __inet_lookup_established(net, hashinfo,
- saddr, sport, daddr, hnum, dif);
+ struct sock *sk;
- return sk ? : __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
- sport, daddr, hnum, dif);
+ sk = __inet_lookup_established(net, hashinfo, saddr, sport,
+ daddr, hnum, dif);
+ *refcounted = true;
+ if (sk)
+ return sk;
+ *refcounted = false;
+ return __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
+ sport, daddr, hnum, dif);
}
static inline struct sock *inet_lookup(struct net *net,
@@ -325,12 +324,13 @@ static inline struct sock *inet_lookup(struct net *net,
const int dif)
{
struct sock *sk;
+ bool refcounted;
- local_bh_disable();
sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
- dport, dif);
- local_bh_enable();
+ dport, dif, &refcounted);
+ if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
return sk;
}
@@ -338,17 +338,20 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
struct sk_buff *skb,
int doff,
const __be16 sport,
- const __be16 dport)
+ const __be16 dport,
+ bool *refcounted)
{
struct sock *sk = skb_steal_sock(skb);
const struct iphdr *iph = ip_hdr(skb);
+ *refcounted = true;
if (sk)
return sk;
- else
- return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
- doff, iph->saddr, sport,
- iph->daddr, dport, inet_iif(skb));
+
+ return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
+ doff, iph->saddr, sport,
+ iph->daddr, dport, inet_iif(skb),
+ refcounted);
}
u32 sk_ehashfn(const struct sock *sk);
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index f49759decb28..6ebe13eb1c4c 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -85,24 +85,23 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
struct request_sock *req;
req = kmem_cache_alloc(ops->slab, GFP_ATOMIC | __GFP_NOWARN);
-
- if (req) {
- req->rsk_ops = ops;
- if (attach_listener) {
- sock_hold(sk_listener);
- req->rsk_listener = sk_listener;
- } else {
- req->rsk_listener = NULL;
+ if (!req)
+ return NULL;
+ req->rsk_listener = NULL;
+ if (attach_listener) {
+ if (unlikely(!atomic_inc_not_zero(&sk_listener->sk_refcnt))) {
+ kmem_cache_free(ops->slab, req);
+ return NULL;
}
- req_to_sk(req)->sk_prot = sk_listener->sk_prot;
- sk_node_init(&req_to_sk(req)->sk_node);
- sk_tx_queue_clear(req_to_sk(req));
- req->saved_syn = NULL;
- /* Following is temporary. It is coupled with debugging
- * helpers in reqsk_put() & reqsk_free()
- */
- atomic_set(&req->rsk_refcnt, 0);
+ req->rsk_listener = sk_listener;
}
+ req->rsk_ops = ops;
+ req_to_sk(req)->sk_prot = sk_listener->sk_prot;
+ sk_node_init(&req_to_sk(req)->sk_node);
+ sk_tx_queue_clear(req_to_sk(req));
+ req->saved_syn = NULL;
+ atomic_set(&req->rsk_refcnt, 0);
+
return req;
}
diff --git a/include/net/sock.h b/include/net/sock.h
index e91b87f54f99..310c4367ea83 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -178,7 +178,7 @@ struct sock_common {
int skc_bound_dev_if;
union {
struct hlist_node skc_bind_node;
- struct hlist_nulls_node skc_portaddr_node;
+ struct hlist_node skc_portaddr_node;
};
struct proto *skc_prot;
possible_net_t skc_net;
@@ -438,6 +438,7 @@ struct sock {
struct sk_buff *skb);
void (*sk_destruct)(struct sock *sk);
struct sock_reuseport __rcu *sk_reuseport_cb;
+ struct rcu_head sk_rcu;
};
#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
@@ -669,18 +670,18 @@ static inline void sk_add_bind_node(struct sock *sk,
hlist_for_each_entry(__sk, list, sk_bind_node)
/**
- * sk_nulls_for_each_entry_offset - iterate over a list at a given struct offset
+ * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct hlist_node to use as a loop cursor.
* @head: the head for your list.
* @offset: offset of hlist_node within the struct.
*
*/
-#define sk_nulls_for_each_entry_offset(tpos, pos, head, offset) \
- for (pos = (head)->first; \
- (!is_a_nulls(pos)) && \
+#define sk_for_each_entry_offset_rcu(tpos, pos, head, offset) \
+ for (pos = rcu_dereference((head)->first); \
+ pos != NULL && \
({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \
- pos = pos->next)
+ pos = rcu_dereference(pos->next))
static inline struct user_namespace *sk_user_ns(struct sock *sk)
{
@@ -720,6 +721,7 @@ enum sock_flags {
*/
SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */
SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
+ SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */
};
#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
@@ -2010,6 +2012,13 @@ sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb)
SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops);
}
+static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb)
+{
+ int segs = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+
+ atomic_add(segs, &sk->sk_drops);
+}
+
void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb);
void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a23282996ca9..74d3ed5eb219 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1836,4 +1836,17 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb)
tp->data_segs_in += segs_in;
}
+/*
+ * TCP listen path runs lockless.
+ * We forced "struct sock" to be const qualified to make sure
+ * we don't modify one of its field by mistake.
+ * Here, we increment sk_drops which is an atomic_t, so we can safely
+ * make sock writable again.
+ */
+static inline void tcp_listendrop(const struct sock *sk)
+{
+ atomic_inc(&((struct sock *)sk)->sk_drops);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+}
+
#endif /* _TCP_H */
diff --git a/include/net/udp.h b/include/net/udp.h
index 92927f729ac8..d870ec1611c4 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -59,7 +59,7 @@ struct udp_skb_cb {
* @lock: spinlock protecting changes to head/count
*/
struct udp_hslot {
- struct hlist_nulls_head head;
+ struct hlist_head head;
int count;
spinlock_t lock;
} __attribute__((aligned(2 * sizeof(long))));
diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h
index bae2d80034d4..7ff505d8a47b 100644
--- a/include/uapi/linux/sock_diag.h
+++ b/include/uapi/linux/sock_diag.h
@@ -20,6 +20,7 @@ enum {
SK_MEMINFO_WMEM_QUEUED,
SK_MEMINFO_OPTMEM,
SK_MEMINFO_BACKLOG,
+ SK_MEMINFO_DROPS,
SK_MEMINFO_VARS,
};