summaryrefslogtreecommitdiff
path: root/net/mptcp/protocol.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp/protocol.c')
-rw-r--r--net/mptcp/protocol.c194
1 files changed, 92 insertions, 102 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 48e649fe2360..6ea0a1da8068 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -92,7 +92,6 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
msk->scaling_ratio = tcp_sk(ssock->sk)->scaling_ratio;
WRITE_ONCE(msk->first, ssock->sk);
- WRITE_ONCE(msk->subflow, ssock);
subflow = mptcp_subflow_ctx(ssock->sk);
list_add(&subflow->node, &msk->conn_list);
sock_hold(ssock->sk);
@@ -102,6 +101,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
/* This is the first subflow, always with id 0 */
subflow->local_id_valid = 1;
mptcp_sock_graft(msk->first, sk->sk_socket);
+ iput(SOCK_INODE(ssock));
return 0;
}
@@ -109,7 +109,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
/* If the MPC handshake is not started, returns the first subflow,
* eventually allocating it.
*/
-struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk)
+struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
int ret;
@@ -117,10 +117,7 @@ struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk)
if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
return ERR_PTR(-EINVAL);
- if (!msk->subflow) {
- if (msk->first)
- return ERR_PTR(-EINVAL);
-
+ if (!msk->first) {
ret = __mptcp_socket_create(msk);
if (ret)
return ERR_PTR(ret);
@@ -128,7 +125,7 @@ struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk)
mptcp_sockopt_sync(msk, msk->first);
}
- return msk->subflow;
+ return msk->first;
}
static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
@@ -1643,7 +1640,6 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
{
unsigned int saved_flags = msg->msg_flags;
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
struct sock *ssk;
int ret;
@@ -1654,9 +1650,9 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
* fastopen attempt, no need to check for additional subflow status.
*/
if (msg->msg_flags & MSG_FASTOPEN) {
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock))
- return PTR_ERR(ssock);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk))
+ return PTR_ERR(ssk);
}
if (!msk->first)
return -EINVAL;
@@ -2242,14 +2238,6 @@ static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk)
return min_stale_count > 1 ? backup : NULL;
}
-static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
-{
- if (msk->subflow) {
- iput(SOCK_INODE(msk->subflow));
- WRITE_ONCE(msk->subflow, NULL);
- }
-}
-
bool __mptcp_retransmit_pending_data(struct sock *sk)
{
struct mptcp_data_frag *cur, *rtx_head;
@@ -2328,7 +2316,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
goto out_release;
}
- dispose_it = !msk->subflow || ssk != msk->subflow->sk;
+ dispose_it = msk->free_first || ssk != msk->first;
if (dispose_it)
list_del(&subflow->node);
@@ -2349,7 +2337,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
* disconnect should never fail
*/
WARN_ON_ONCE(tcp_disconnect(ssk, 0));
- msk->subflow->state = SS_UNCONNECTED;
mptcp_subflow_ctx_reset(subflow);
release_sock(ssk);
@@ -2662,7 +2649,7 @@ unlock:
sock_put(sk);
}
-static int __mptcp_init_sock(struct sock *sk)
+static void __mptcp_init_sock(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -2689,8 +2676,6 @@ static int __mptcp_init_sock(struct sock *sk)
/* re-use the csk retrans timer for MPTCP-level retrans */
timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0);
-
- return 0;
}
static void mptcp_ca_reset(struct sock *sk)
@@ -2708,11 +2693,8 @@ static void mptcp_ca_reset(struct sock *sk)
static int mptcp_init_sock(struct sock *sk)
{
struct net *net = sock_net(sk);
- int ret;
- ret = __mptcp_init_sock(sk);
- if (ret)
- return ret;
+ __mptcp_init_sock(sk);
if (!mptcp_is_enabled(net))
return -ENOPROTOOPT;
@@ -3110,7 +3092,6 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
msk = mptcp_sk(nsk);
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
- WRITE_ONCE(msk->subflow, NULL);
msk->in_accept_queue = 1;
WRITE_ONCE(msk->fully_established, false);
if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
@@ -3174,25 +3155,17 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
}
-static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
+static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err,
bool kern)
{
- struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *listener;
struct sock *newsk;
- listener = READ_ONCE(msk->subflow);
- if (WARN_ON_ONCE(!listener)) {
- *err = -EINVAL;
- return NULL;
- }
-
- pr_debug("msk=%p, listener=%p", msk, mptcp_subflow_ctx(listener->sk));
- newsk = inet_csk_accept(listener->sk, flags, err, kern);
+ pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
+ newsk = inet_csk_accept(ssk, flags, err, kern);
if (!newsk)
return NULL;
- pr_debug("msk=%p, subflow is mptcp=%d", msk, sk_is_mptcp(newsk));
+ pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
if (sk_is_mptcp(newsk)) {
struct mptcp_subflow_context *subflow;
struct sock *new_mptcp_sock;
@@ -3209,9 +3182,9 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
}
newsk = new_mptcp_sock;
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
} else {
- MPTCP_INC_STATS(sock_net(sk),
+ MPTCP_INC_STATS(sock_net(ssk),
MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
}
@@ -3252,10 +3225,8 @@ static void mptcp_destroy(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- /* clears msk->subflow, allowing the following to close
- * even the initial subflow
- */
- mptcp_dispose_initial_subflow(msk);
+ /* allow the following to close even the initial subflow */
+ msk->free_first = 1;
mptcp_destroy_common(msk, 0);
sk_sockets_allocated_dec(sk);
}
@@ -3405,14 +3376,12 @@ static void mptcp_unhash(struct sock *sk)
static int mptcp_get_port(struct sock *sk, unsigned short snum)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
- ssock = msk->subflow;
- pr_debug("msk=%p, subflow=%p", msk, ssock);
- if (WARN_ON_ONCE(!ssock))
+ pr_debug("msk=%p, ssk=%p", msk, msk->first);
+ if (WARN_ON_ONCE(!msk->first))
return -EINVAL;
- return inet_csk_get_port(ssock->sk, snum);
+ return inet_csk_get_port(msk->first, snum);
}
void mptcp_finish_connect(struct sock *ssk)
@@ -3587,25 +3556,24 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
int err = -EINVAL;
+ struct sock *ssk;
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock))
- return PTR_ERR(ssock);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk))
+ return PTR_ERR(ssk);
- mptcp_token_destroy(msk);
inet_sk_state_store(sk, TCP_SYN_SENT);
- subflow = mptcp_subflow_ctx(ssock->sk);
+ subflow = mptcp_subflow_ctx(ssk);
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
* TCP option space.
*/
- if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
+ if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
mptcp_subflow_early_fallback(msk, subflow);
#endif
- if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
- MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
+ if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
mptcp_subflow_early_fallback(msk, subflow);
}
if (likely(!__mptcp_check_fallback(msk)))
@@ -3614,25 +3582,42 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* if reaching here via the fastopen/sendmsg path, the caller already
* acquired the subflow socket lock, too.
*/
- if (msk->fastopening)
- err = __inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK, 1);
- else
- err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
- inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
+ if (!msk->fastopening)
+ lock_sock(ssk);
+
+ /* the following mirrors closely a very small chunk of code from
+ * __inet_stream_connect()
+ */
+ if (ssk->sk_state != TCP_CLOSE)
+ goto out;
+
+ if (BPF_CGROUP_PRE_CONNECT_ENABLED(ssk)) {
+ err = ssk->sk_prot->pre_connect(ssk, uaddr, addr_len);
+ if (err)
+ goto out;
+ }
+
+ err = ssk->sk_prot->connect(ssk, uaddr, addr_len);
+ if (err < 0)
+ goto out;
+
+ inet_sk(sk)->defer_connect = inet_sk(ssk)->defer_connect;
+
+out:
+ if (!msk->fastopening)
+ release_sock(ssk);
/* on successful connect, the msk state will be moved to established by
* subflow_finish_connect()
*/
- if (unlikely(err && err != -EINPROGRESS)) {
- inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
+ if (unlikely(err)) {
+ /* avoid leaving a dangling token in an unconnected socket */
+ mptcp_token_destroy(msk);
+ inet_sk_state_store(sk, TCP_CLOSE);
return err;
}
- mptcp_copy_inaddrs(sk, ssock->sk);
-
- /* silence EINPROGRESS and let the caller inet_stream_connect
- * handle the connection in progress
- */
+ mptcp_copy_inaddrs(sk, ssk);
return 0;
}
@@ -3673,22 +3658,27 @@ static struct proto mptcp_prot = {
static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
- struct socket *ssock;
- int err;
+ struct sock *ssk, *sk = sock->sk;
+ int err = -EINVAL;
- lock_sock(sock->sk);
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
- err = PTR_ERR(ssock);
+ lock_sock(sk);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
+ err = PTR_ERR(ssk);
goto unlock;
}
- err = READ_ONCE(ssock->ops)->bind(ssock, uaddr, addr_len);
+ if (sk->sk_family == AF_INET)
+ err = inet_bind_sk(ssk, uaddr, addr_len);
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ else if (sk->sk_family == AF_INET6)
+ err = inet6_bind_sk(ssk, uaddr, addr_len);
+#endif
if (!err)
- mptcp_copy_inaddrs(sock->sk, ssock->sk);
+ mptcp_copy_inaddrs(sk, ssk);
unlock:
- release_sock(sock->sk);
+ release_sock(sk);
return err;
}
@@ -3696,7 +3686,7 @@ static int mptcp_listen(struct socket *sock, int backlog)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
struct sock *sk = sock->sk;
- struct socket *ssock;
+ struct sock *ssk;
int err;
pr_debug("msk=%p", msk);
@@ -3707,22 +3697,24 @@ static int mptcp_listen(struct socket *sock, int backlog)
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
goto unlock;
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
- err = PTR_ERR(ssock);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
+ err = PTR_ERR(ssk);
goto unlock;
}
- mptcp_token_destroy(msk);
inet_sk_state_store(sk, TCP_LISTEN);
sock_set_flag(sk, SOCK_RCU_FREE);
- err = READ_ONCE(ssock->ops)->listen(ssock, backlog);
- inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
+ lock_sock(ssk);
+ err = __inet_listen_sk(ssk, backlog);
+ release_sock(ssk);
+ inet_sk_state_store(sk, inet_sk_state_load(ssk));
+
if (!err) {
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
- mptcp_copy_inaddrs(sk, ssock->sk);
- mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
+ mptcp_copy_inaddrs(sk, ssk);
+ mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED);
}
unlock:
@@ -3734,8 +3726,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
int flags, bool kern)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
- struct socket *ssock;
- struct sock *newsk;
+ struct sock *ssk, *newsk;
int err;
pr_debug("msk=%p", msk);
@@ -3743,11 +3734,11 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
/* Buggy applications can call accept on socket states other then LISTEN
* but no need to allocate the first subflow just to error out.
*/
- ssock = READ_ONCE(msk->subflow);
- if (!ssock)
+ ssk = READ_ONCE(msk->first);
+ if (!ssk)
return -EINVAL;
- newsk = mptcp_accept(sock->sk, flags, &err, kern);
+ newsk = mptcp_accept(ssk, flags, &err, kern);
if (!newsk)
return err;
@@ -3774,11 +3765,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
/* Do late cleanup for the first subflow as necessary. Also
* deal with bad peers not doing a complete shutdown.
*/
- if (msk->first &&
- unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
+ if (unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
__mptcp_close_ssk(newsk, msk->first,
mptcp_subflow_ctx(msk->first), 0);
- if (unlikely(list_empty(&msk->conn_list)))
+ if (unlikely(list_is_singular(&msk->conn_list)))
inet_sk_state_store(newsk, TCP_CLOSE);
}
}
@@ -3817,12 +3807,12 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
state = inet_sk_state_load(sk);
pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);
if (state == TCP_LISTEN) {
- struct socket *ssock = READ_ONCE(msk->subflow);
+ struct sock *ssk = READ_ONCE(msk->first);
- if (WARN_ON_ONCE(!ssock || !ssock->sk))
+ if (WARN_ON_ONCE(!ssk))
return 0;
- return inet_csk_listen_poll(ssock->sk);
+ return inet_csk_listen_poll(ssk);
}
shutdown = READ_ONCE(sk->sk_shutdown);