From 407640de2152e33341ce1131dac269672c3d50f7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Mar 2015 18:32:26 -0700 Subject: inet: add sk_listener argument to inet_reqsk_alloc() listener socket can be used to set net pointer, and will be later used to hold a reference on listener. Add a const qualifier to first argument (struct request_sock_ops *), and factorize all write_pnet(&ireq->ireq_net, sock_net(sk)); Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_sock.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index c9ed91891887..cf7abb00941b 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -244,16 +244,19 @@ static inline unsigned int __inet_ehashfn(const __be32 laddr, initval); } -static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops) +static inline struct request_sock * +inet_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) { struct request_sock *req = reqsk_alloc(ops); - struct inet_request_sock *ireq = inet_rsk(req); - if (req != NULL) { + if (req) { + struct inet_request_sock *ireq = inet_rsk(req); + kmemcheck_annotate_bitfield(ireq, flags); ireq->opt = NULL; atomic64_set(&ireq->ir_cookie, 0); ireq->ireq_state = TCP_NEW_SYN_RECV; + write_pnet(&ireq->ireq_net, sock_net(sk_listener)); /* Following is temporary. It is coupled with debugging * helpers in reqsk_put() & reqsk_free() -- cgit v1.2.3 From e49bb337d77d54afebe4fe5b9008955e1337f83d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Mar 2015 18:32:27 -0700 Subject: inet: uninline inet_reqsk_alloc() inet_reqsk_alloc() is becoming fat and should not be inlined. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_sock.h | 24 ++---------------------- net/ipv4/tcp_input.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index cf7abb00941b..6fec7343070f 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -244,28 +244,8 @@ static inline unsigned int __inet_ehashfn(const __be32 laddr, initval); } -static inline struct request_sock * -inet_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) -{ - struct request_sock *req = reqsk_alloc(ops); - - if (req) { - struct inet_request_sock *ireq = inet_rsk(req); - - kmemcheck_annotate_bitfield(ireq, flags); - ireq->opt = NULL; - atomic64_set(&ireq->ir_cookie, 0); - ireq->ireq_state = TCP_NEW_SYN_RECV; - write_pnet(&ireq->ireq_net, sock_net(sk_listener)); - - /* Following is temporary. It is coupled with debugging - * helpers in reqsk_put() & reqsk_free() - */ - atomic_set(&ireq->ireq_refcnt, 0); - } - - return req; -} +struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk_listener); static inline __u8 inet_sk_flowi_flags(const struct sock *sk) { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2a480f6811ea..52b74e0eab2a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5967,6 +5967,30 @@ static void tcp_openreq_init(struct request_sock *req, ireq->ir_mark = inet_request_mark(sk, skb); } +struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk_listener) +{ + struct request_sock *req = reqsk_alloc(ops); + + if (req) { + struct inet_request_sock *ireq = inet_rsk(req); + + kmemcheck_annotate_bitfield(ireq, flags); + ireq->opt = NULL; + atomic64_set(&ireq->ir_cookie, 0); + ireq->ireq_state = TCP_NEW_SYN_RECV; + write_pnet(&ireq->ireq_net, sock_net(sk_listener)); + + /* Following is temporary. It is coupled with debugging + * helpers in reqsk_put() & reqsk_free() + */ + atomic_set(&ireq->ireq_refcnt, 0); + } + + return req; +} +EXPORT_SYMBOL(inet_reqsk_alloc); + int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) -- cgit v1.2.3 From 4e9a578e5b6bdfa8b7fed7a41f28a86a7cffc85f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Mar 2015 18:32:28 -0700 Subject: inet: add rsk_listener field to struct request_sock Once we'll be able to lookup request sockets in ehash table, we'll need to get access to listener which created this request. This avoid doing a lookup to find the listener, which benefits for a more solid SO_REUSEPORT, and is needed once we no longer queue request sock into a listener private queue. Note that 'struct tcp_request_sock'->listener could be reduced to a single bit, as TFO listener should match req->rsk_listener. TFO will no longer need to hold a reference on the listener. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/request_sock.h | 12 +++++++++--- net/ipv4/tcp_input.c | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 56dc2faba47e..723d1cbdf20e 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -52,6 +52,7 @@ struct request_sock { #define rsk_refcnt __req_common.skc_refcnt struct request_sock *dl_next; + struct sock *rsk_listener; u16 mss; u8 num_retrans; /* number of retransmits */ u8 cookie_ts:1; /* syncookie: encode tcpopts in timestamp */ @@ -67,13 +68,16 @@ struct request_sock { u32 peer_secid; }; -static inline struct request_sock *reqsk_alloc(const struct request_sock_ops *ops) +static inline struct request_sock * +reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) { struct request_sock *req = kmem_cache_alloc(ops->slab, GFP_ATOMIC); - if (req != NULL) + if (req) { req->rsk_ops = ops; - + sock_hold(sk_listener); + req->rsk_listener = sk_listener; + } return req; } @@ -88,6 +92,8 @@ static inline void reqsk_free(struct request_sock *req) WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0); req->rsk_ops->destructor(req); + if (req->rsk_listener) + sock_put(req->rsk_listener); kmem_cache_free(req->rsk_ops->slab, req); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 52b74e0eab2a..fbe518981d36 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5970,7 +5970,7 @@ static void tcp_openreq_init(struct request_sock *req, struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) { - struct request_sock *req = reqsk_alloc(ops); + struct request_sock *req = reqsk_alloc(ops, sk_listener); if (req) { struct inet_request_sock *ireq = inet_rsk(req); -- cgit v1.2.3 From 9439ce00f208d95703a6725e4ea986dd90e37ffd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Mar 2015 18:32:29 -0700 Subject: tcp: rename struct tcp_request_sock listener The listener field in struct tcp_request_sock is a pointer back to the listener. We now have req->rsk_listener, so TCP only needs one boolean and not a full pointer. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- net/core/request_sock.c | 18 +++++++----------- net/ipv4/inet_connection_sock.c | 7 +++---- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp_fastopen.c | 7 +------ net/ipv4/tcp_input.c | 2 +- net/ipv6/syncookies.c | 2 +- 7 files changed, 15 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 97dbf16f7d9d..f869ae8afbaf 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -111,7 +111,7 @@ struct tcp_request_sock_ops; struct tcp_request_sock { struct inet_request_sock req; const struct tcp_request_sock_ops *af_specific; - struct sock *listener; /* needed for TFO */ + bool tfo_listener; u32 rcv_isn; u32 snt_isn; u32 snt_synack; /* synack sent time */ diff --git a/net/core/request_sock.c b/net/core/request_sock.c index e910317ef6d9..cc39a2aa663a 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -153,24 +153,22 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) * case might also exist in tcp_v4_hnd_req() that will trigger this locking * order. * - * When a TFO req is created, it needs to sock_hold its listener to prevent - * the latter data structure from going away. - * - * This function also sets "treq->listener" to NULL and unreference listener - * socket. treq->listener is used by the listener so it is protected by the + * This function also sets "treq->tfo_listener" to false. + * treq->tfo_listener is used by the listener so it is protected by the * fastopenq->lock in this function. */ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, bool reset) { - struct sock *lsk = tcp_rsk(req)->listener; - struct fastopen_queue *fastopenq = - inet_csk(lsk)->icsk_accept_queue.fastopenq; + struct sock *lsk = req->rsk_listener; + struct fastopen_queue *fastopenq; + + fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq; tcp_sk(sk)->fastopen_rsk = NULL; spin_lock_bh(&fastopenq->lock); fastopenq->qlen--; - tcp_rsk(req)->listener = NULL; + tcp_rsk(req)->tfo_listener = false; if (req->sk) /* the child socket hasn't been accepted yet */ goto out; @@ -179,7 +177,6 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, * special RST handling below. */ spin_unlock_bh(&fastopenq->lock); - sock_put(lsk); reqsk_put(req); return; } @@ -201,5 +198,4 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, fastopenq->qlen++; out: spin_unlock_bh(&fastopenq->lock); - sock_put(lsk); } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 3390ba6f96b2..741f0d96a7f7 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -325,7 +325,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) sk_acceptq_removed(sk); if (sk->sk_protocol == IPPROTO_TCP && queue->fastopenq != NULL) { spin_lock_bh(&queue->fastopenq->lock); - if (tcp_rsk(req)->listener) { + if (tcp_rsk(req)->tfo_listener) { /* We are still waiting for the final ACK from 3WHS * so can't free req now. Instead, we set req->sk to * NULL to signify that the child socket is taken @@ -817,9 +817,9 @@ void inet_csk_listen_stop(struct sock *sk) percpu_counter_inc(sk->sk_prot->orphan_count); - if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->listener) { + if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { BUG_ON(tcp_sk(child)->fastopen_rsk != req); - BUG_ON(sk != tcp_rsk(req)->listener); + BUG_ON(sk != req->rsk_listener); /* Paranoid, to prevent race condition if * an inbound pkt destined for child is @@ -828,7 +828,6 @@ void inet_csk_listen_stop(struct sock *sk) * tcp_v4_destroy_sock(). */ tcp_sk(child)->fastopen_rsk = NULL; - sock_put(sk); } inet_csk_destroy_sock(child); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index eb940750bb1b..574b67765a06 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -345,7 +345,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; - treq->listener = NULL; + treq->tfo_listener = false; ireq->ireq_family = AF_INET; ireq->ir_iif = sk->sk_bound_dev_if; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 84381319e1bc..186fd394ec0a 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -155,12 +155,7 @@ static bool tcp_fastopen_create_child(struct sock *sk, tp = tcp_sk(child); tp->fastopen_rsk = req; - /* Do a hold on the listner sk so that if the listener is being - * closed, the child that has been accepted can live on and still - * access listen_lock. - */ - sock_hold(sk); - tcp_rsk(req)->listener = sk; + tcp_rsk(req)->tfo_listener = true; /* RFC1323: The window in SYN & SYN/ACK segments is never * scaled. So correct it appropriately. diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fbe518981d36..a94ddb96fc85 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6120,7 +6120,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (err || want_cookie) goto drop_and_free; - tcp_rsk(req)->listener = NULL; + tcp_rsk(req)->tfo_listener = false; af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 039e74dd29fe..1ef0c926ce9d 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -195,7 +195,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); treq = tcp_rsk(req); - treq->listener = NULL; + treq->tfo_listener = false; ireq->ireq_family = AF_INET6; if (security_inet_conn_request(sk, skb, req)) -- cgit v1.2.3 From 0470c8ca1d57927f2cc3e1d5add1fb2834609447 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Mar 2015 18:32:31 -0700 Subject: inet: fix request sock refcounting While testing last patch series, I found req sock refcounting was wrong. We must set skc_refcnt to 1 for all request socks added in hashes, but also on request sockets created by FastOpen or syncookies. It is tricky because we need to defer this initialization so that future RCU lookups do not try to take a refcount on a not yet fully initialized request socket. Also get rid of ireq_refcnt alias. Signed-off-by: Eric Dumazet Fixes: 13854e5a6046 ("inet: add proper refcounting to request sock") Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 5 ----- include/net/inet_sock.h | 1 - include/net/request_sock.h | 11 +++++++++++ net/ipv4/syncookies.c | 11 ++++++----- net/ipv4/tcp_fastopen.c | 1 + net/ipv4/tcp_input.c | 4 ---- net/ipv6/syncookies.c | 7 ++++--- 7 files changed, 22 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 191feec60205..b9a6b0a94cc6 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -275,11 +275,6 @@ static inline void inet_csk_reqsk_queue_add(struct sock *sk, struct sock *child) { reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); - /* before letting lookups find us, make sure all req fields - * are committed to memory. - */ - smp_wmb(); - atomic_set(&req->rsk_refcnt, 1); } void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 6fec7343070f..b6c3737da4e9 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -81,7 +81,6 @@ struct inet_request_sock { #define ir_cookie req.__req_common.skc_cookie #define ireq_net req.__req_common.skc_net #define ireq_state req.__req_common.skc_state -#define ireq_refcnt req.__req_common.skc_refcnt #define ireq_family req.__req_common.skc_family kmemcheck_bitfield_begin(flags); diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 723d1cbdf20e..3fa4f824900a 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -77,6 +77,11 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) req->rsk_ops = ops; sock_hold(sk_listener); req->rsk_listener = sk_listener; + + /* Following is temporary. It is coupled with debugging + * helpers in reqsk_put() & reqsk_free() + */ + atomic_set(&req->rsk_refcnt, 0); } return req; } @@ -292,6 +297,12 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, req->sk = NULL; req->dl_next = lopt->syn_table[hash]; + /* before letting lookups find us, make sure all req fields + * are committed to memory and refcnt initialized. + */ + smp_wmb(); + atomic_set(&req->rsk_refcnt, 1); + write_lock(&queue->syn_wait_lock); lopt->syn_table[hash] = req; write_unlock(&queue->syn_wait_lock); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 574b67765a06..34e755403715 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -227,11 +227,12 @@ static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct sock *child; child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst); - if (child) + if (child) { + atomic_set(&req->rsk_refcnt, 1); inet_csk_reqsk_queue_add(sk, req, child); - else + } else { reqsk_free(req); - + } return child; } @@ -356,7 +357,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->opt = tcp_v4_save_options(skb); if (security_inet_conn_request(sk, skb, req)) { - reqsk_put(req); + reqsk_free(req); goto out; } @@ -377,7 +378,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { - reqsk_put(req); + reqsk_free(req); goto out; } diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 186fd394ec0a..82e375a0cbcf 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -169,6 +169,7 @@ static bool tcp_fastopen_create_child(struct sock *sk, inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, TCP_TIMEOUT_INIT, TCP_RTO_MAX); + atomic_set(&req->rsk_refcnt, 1); /* Add the child socket directly into the accept queue */ inet_csk_reqsk_queue_add(sk, req, child); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a94ddb96fc85..1dfbaee3554e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5981,10 +5981,6 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, ireq->ireq_state = TCP_NEW_SYN_RECV; write_pnet(&ireq->ireq_net, sock_net(sk_listener)); - /* Following is temporary. It is coupled with debugging - * helpers in reqsk_put() & reqsk_free() - */ - atomic_set(&ireq->ireq_refcnt, 0); } return req; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 1ef0c926ce9d..da5823e5e5a7 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -49,11 +49,12 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct sock *child; child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst); - if (child) + if (child) { + atomic_set(&req->rsk_refcnt, 1); inet_csk_reqsk_queue_add(sk, req, child); - else + } else { reqsk_free(req); - + } return child; } -- cgit v1.2.3