diff options
Diffstat (limited to 'net/mptcp/protocol.c')
-rw-r--r-- | net/mptcp/protocol.c | 194 |
1 files changed, 92 insertions, 102 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 48e649fe2360..6ea0a1da8068 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -92,7 +92,6 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) msk->scaling_ratio = tcp_sk(ssock->sk)->scaling_ratio; WRITE_ONCE(msk->first, ssock->sk); - WRITE_ONCE(msk->subflow, ssock); subflow = mptcp_subflow_ctx(ssock->sk); list_add(&subflow->node, &msk->conn_list); sock_hold(ssock->sk); @@ -102,6 +101,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) /* This is the first subflow, always with id 0 */ subflow->local_id_valid = 1; mptcp_sock_graft(msk->first, sk->sk_socket); + iput(SOCK_INODE(ssock)); return 0; } @@ -109,7 +109,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) /* If the MPC handshake is not started, returns the first subflow, * eventually allocating it. */ -struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk) +struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; int ret; @@ -117,10 +117,7 @@ struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk) if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) return ERR_PTR(-EINVAL); - if (!msk->subflow) { - if (msk->first) - return ERR_PTR(-EINVAL); - + if (!msk->first) { ret = __mptcp_socket_create(msk); if (ret) return ERR_PTR(ret); @@ -128,7 +125,7 @@ struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk) mptcp_sockopt_sync(msk, msk->first); } - return msk->subflow; + return msk->first; } static void mptcp_drop(struct sock *sk, struct sk_buff *skb) @@ -1643,7 +1640,6 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, { unsigned int saved_flags = msg->msg_flags; struct mptcp_sock *msk = mptcp_sk(sk); - struct socket *ssock; struct sock *ssk; int ret; @@ -1654,9 +1650,9 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, * fastopen attempt, no need to check for additional subflow status. */ if (msg->msg_flags & MSG_FASTOPEN) { - ssock = __mptcp_nmpc_socket(msk); - if (IS_ERR(ssock)) - return PTR_ERR(ssock); + ssk = __mptcp_nmpc_sk(msk); + if (IS_ERR(ssk)) + return PTR_ERR(ssk); } if (!msk->first) return -EINVAL; @@ -2242,14 +2238,6 @@ static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk) return min_stale_count > 1 ? backup : NULL; } -static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk) -{ - if (msk->subflow) { - iput(SOCK_INODE(msk->subflow)); - WRITE_ONCE(msk->subflow, NULL); - } -} - bool __mptcp_retransmit_pending_data(struct sock *sk) { struct mptcp_data_frag *cur, *rtx_head; @@ -2328,7 +2316,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, goto out_release; } - dispose_it = !msk->subflow || ssk != msk->subflow->sk; + dispose_it = msk->free_first || ssk != msk->first; if (dispose_it) list_del(&subflow->node); @@ -2349,7 +2337,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, * disconnect should never fail */ WARN_ON_ONCE(tcp_disconnect(ssk, 0)); - msk->subflow->state = SS_UNCONNECTED; mptcp_subflow_ctx_reset(subflow); release_sock(ssk); @@ -2662,7 +2649,7 @@ unlock: sock_put(sk); } -static int __mptcp_init_sock(struct sock *sk) +static void __mptcp_init_sock(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -2689,8 +2676,6 @@ static int __mptcp_init_sock(struct sock *sk) /* re-use the csk retrans timer for MPTCP-level retrans */ timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0); - - return 0; } static void mptcp_ca_reset(struct sock *sk) @@ -2708,11 +2693,8 @@ static void mptcp_ca_reset(struct sock *sk) static int mptcp_init_sock(struct sock *sk) { struct net *net = sock_net(sk); - int ret; - ret = __mptcp_init_sock(sk); - if (ret) - return ret; + __mptcp_init_sock(sk); if (!mptcp_is_enabled(net)) return -ENOPROTOOPT; @@ -3110,7 +3092,6 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, msk = mptcp_sk(nsk); msk->local_key = subflow_req->local_key; msk->token = subflow_req->token; - WRITE_ONCE(msk->subflow, NULL); msk->in_accept_queue = 1; WRITE_ONCE(msk->fully_established, false); if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) @@ -3174,25 +3155,17 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk) WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd); } -static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, +static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err, bool kern) { - struct mptcp_sock *msk = mptcp_sk(sk); - struct socket *listener; struct sock *newsk; - listener = READ_ONCE(msk->subflow); - if (WARN_ON_ONCE(!listener)) { - *err = -EINVAL; - return NULL; - } - - pr_debug("msk=%p, listener=%p", msk, mptcp_subflow_ctx(listener->sk)); - newsk = inet_csk_accept(listener->sk, flags, err, kern); + pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk)); + newsk = inet_csk_accept(ssk, flags, err, kern); if (!newsk) return NULL; - pr_debug("msk=%p, subflow is mptcp=%d", msk, sk_is_mptcp(newsk)); + pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk)); if (sk_is_mptcp(newsk)) { struct mptcp_subflow_context *subflow; struct sock *new_mptcp_sock; @@ -3209,9 +3182,9 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, } newsk = new_mptcp_sock; - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK); + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK); } else { - MPTCP_INC_STATS(sock_net(sk), + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); } @@ -3252,10 +3225,8 @@ static void mptcp_destroy(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - /* clears msk->subflow, allowing the following to close - * even the initial subflow - */ - mptcp_dispose_initial_subflow(msk); + /* allow the following to close even the initial subflow */ + msk->free_first = 1; mptcp_destroy_common(msk, 0); sk_sockets_allocated_dec(sk); } @@ -3405,14 +3376,12 @@ static void mptcp_unhash(struct sock *sk) static int mptcp_get_port(struct sock *sk, unsigned short snum) { struct mptcp_sock *msk = mptcp_sk(sk); - struct socket *ssock; - ssock = msk->subflow; - pr_debug("msk=%p, subflow=%p", msk, ssock); - if (WARN_ON_ONCE(!ssock)) + pr_debug("msk=%p, ssk=%p", msk, msk->first); + if (WARN_ON_ONCE(!msk->first)) return -EINVAL; - return inet_csk_get_port(ssock->sk, snum); + return inet_csk_get_port(msk->first, snum); } void mptcp_finish_connect(struct sock *ssk) @@ -3587,25 +3556,24 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); - struct socket *ssock; int err = -EINVAL; + struct sock *ssk; - ssock = __mptcp_nmpc_socket(msk); - if (IS_ERR(ssock)) - return PTR_ERR(ssock); + ssk = __mptcp_nmpc_sk(msk); + if (IS_ERR(ssk)) + return PTR_ERR(ssk); - mptcp_token_destroy(msk); inet_sk_state_store(sk, TCP_SYN_SENT); - subflow = mptcp_subflow_ctx(ssock->sk); + subflow = mptcp_subflow_ctx(ssk); #ifdef CONFIG_TCP_MD5SIG /* no MPTCP if MD5SIG is enabled on this socket or we may run out of * TCP option space. */ - if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info)) + if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info)) mptcp_subflow_early_fallback(msk, subflow); #endif - if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) { - MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT); + if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT); mptcp_subflow_early_fallback(msk, subflow); } if (likely(!__mptcp_check_fallback(msk))) @@ -3614,25 +3582,42 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* if reaching here via the fastopen/sendmsg path, the caller already * acquired the subflow socket lock, too. */ - if (msk->fastopening) - err = __inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK, 1); - else - err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK); - inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect; + if (!msk->fastopening) + lock_sock(ssk); + + /* the following mirrors closely a very small chunk of code from + * __inet_stream_connect() + */ + if (ssk->sk_state != TCP_CLOSE) + goto out; + + if (BPF_CGROUP_PRE_CONNECT_ENABLED(ssk)) { + err = ssk->sk_prot->pre_connect(ssk, uaddr, addr_len); + if (err) + goto out; + } + + err = ssk->sk_prot->connect(ssk, uaddr, addr_len); + if (err < 0) + goto out; + + inet_sk(sk)->defer_connect = inet_sk(ssk)->defer_connect; + +out: + if (!msk->fastopening) + release_sock(ssk); /* on successful connect, the msk state will be moved to established by * subflow_finish_connect() */ - if (unlikely(err && err != -EINPROGRESS)) { - inet_sk_state_store(sk, inet_sk_state_load(ssock->sk)); + if (unlikely(err)) { + /* avoid leaving a dangling token in an unconnected socket */ + mptcp_token_destroy(msk); + inet_sk_state_store(sk, TCP_CLOSE); return err; } - mptcp_copy_inaddrs(sk, ssock->sk); - - /* silence EINPROGRESS and let the caller inet_stream_connect - * handle the connection in progress - */ + mptcp_copy_inaddrs(sk, ssk); return 0; } @@ -3673,22 +3658,27 @@ static struct proto mptcp_prot = { static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct mptcp_sock *msk = mptcp_sk(sock->sk); - struct socket *ssock; - int err; + struct sock *ssk, *sk = sock->sk; + int err = -EINVAL; - lock_sock(sock->sk); - ssock = __mptcp_nmpc_socket(msk); - if (IS_ERR(ssock)) { - err = PTR_ERR(ssock); + lock_sock(sk); + ssk = __mptcp_nmpc_sk(msk); + if (IS_ERR(ssk)) { + err = PTR_ERR(ssk); goto unlock; } - err = READ_ONCE(ssock->ops)->bind(ssock, uaddr, addr_len); + if (sk->sk_family == AF_INET) + err = inet_bind_sk(ssk, uaddr, addr_len); +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + else if (sk->sk_family == AF_INET6) + err = inet6_bind_sk(ssk, uaddr, addr_len); +#endif if (!err) - mptcp_copy_inaddrs(sock->sk, ssock->sk); + mptcp_copy_inaddrs(sk, ssk); unlock: - release_sock(sock->sk); + release_sock(sk); return err; } @@ -3696,7 +3686,7 @@ static int mptcp_listen(struct socket *sock, int backlog) { struct mptcp_sock *msk = mptcp_sk(sock->sk); struct sock *sk = sock->sk; - struct socket *ssock; + struct sock *ssk; int err; pr_debug("msk=%p", msk); @@ -3707,22 +3697,24 @@ static int mptcp_listen(struct socket *sock, int backlog) if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM) goto unlock; - ssock = __mptcp_nmpc_socket(msk); - if (IS_ERR(ssock)) { - err = PTR_ERR(ssock); + ssk = __mptcp_nmpc_sk(msk); + if (IS_ERR(ssk)) { + err = PTR_ERR(ssk); goto unlock; } - mptcp_token_destroy(msk); inet_sk_state_store(sk, TCP_LISTEN); sock_set_flag(sk, SOCK_RCU_FREE); - err = READ_ONCE(ssock->ops)->listen(ssock, backlog); - inet_sk_state_store(sk, inet_sk_state_load(ssock->sk)); + lock_sock(ssk); + err = __inet_listen_sk(ssk, backlog); + release_sock(ssk); + inet_sk_state_store(sk, inet_sk_state_load(ssk)); + if (!err) { sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - mptcp_copy_inaddrs(sk, ssock->sk); - mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED); + mptcp_copy_inaddrs(sk, ssk); + mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED); } unlock: @@ -3734,8 +3726,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, int flags, bool kern) { struct mptcp_sock *msk = mptcp_sk(sock->sk); - struct socket *ssock; - struct sock *newsk; + struct sock *ssk, *newsk; int err; pr_debug("msk=%p", msk); @@ -3743,11 +3734,11 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, /* Buggy applications can call accept on socket states other then LISTEN * but no need to allocate the first subflow just to error out. */ - ssock = READ_ONCE(msk->subflow); - if (!ssock) + ssk = READ_ONCE(msk->first); + if (!ssk) return -EINVAL; - newsk = mptcp_accept(sock->sk, flags, &err, kern); + newsk = mptcp_accept(ssk, flags, &err, kern); if (!newsk) return err; @@ -3774,11 +3765,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, /* Do late cleanup for the first subflow as necessary. Also * deal with bad peers not doing a complete shutdown. */ - if (msk->first && - unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) { + if (unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) { __mptcp_close_ssk(newsk, msk->first, mptcp_subflow_ctx(msk->first), 0); - if (unlikely(list_empty(&msk->conn_list))) + if (unlikely(list_is_singular(&msk->conn_list))) inet_sk_state_store(newsk, TCP_CLOSE); } } @@ -3817,12 +3807,12 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, state = inet_sk_state_load(sk); pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags); if (state == TCP_LISTEN) { - struct socket *ssock = READ_ONCE(msk->subflow); + struct sock *ssk = READ_ONCE(msk->first); - if (WARN_ON_ONCE(!ssock || !ssock->sk)) + if (WARN_ON_ONCE(!ssk)) return 0; - return inet_csk_listen_poll(ssock->sk); + return inet_csk_listen_poll(ssk); } shutdown = READ_ONCE(sk->sk_shutdown); |