diff options
Diffstat (limited to 'net/mptcp/protocol.c')
-rw-r--r-- | net/mptcp/protocol.c | 124 |
1 files changed, 87 insertions, 37 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 760404b15cd0..f599ad44ed24 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -662,9 +662,9 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, skb = skb_peek(&ssk->sk_receive_queue); if (!skb) { - /* if no data is found, a racing workqueue/recvmsg - * already processed the new data, stop here or we - * can enter an infinite loop + /* With racing move_skbs_to_msk() and __mptcp_move_skbs(), + * a different CPU can have already processed the pending + * data, stop here or we can enter an infinite loop */ if (!moved) done = true; @@ -672,9 +672,9 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, } if (__mptcp_check_fallback(msk)) { - /* if we are running under the workqueue, TCP could have - * collapsed skbs between dummy map creation and now - * be sure to adjust the size + /* Under fallback skbs have no MPTCP extension and TCP could + * collapse them between the dummy map creation and the + * current dequeue. Be sure to adjust the map size. */ map_remaining = skb->len; subflow->map_data_len = skb->len; @@ -1707,7 +1707,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto out; } else if (ret) { release_sock(ssk); - goto out; + goto do_error; } release_sock(ssk); } @@ -1717,9 +1717,13 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { ret = sk_stream_wait_connect(sk, &timeo); if (ret) - goto out; + goto do_error; } + ret = -EPIPE; + if (unlikely(sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))) + goto do_error; + pfrag = sk_page_frag(sk); while (msg_data_left(msg)) { @@ -1728,11 +1732,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) bool dfrag_collapsed; size_t psize, offset; - if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) { - ret = -EPIPE; - goto out; - } - /* reuse tail pfrag, if possible, or carve a new one from the * page allocator */ @@ -1764,7 +1763,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (copy_page_from_iter(dfrag->page, offset, psize, &msg->msg_iter) != psize) { ret = -EFAULT; - goto out; + goto do_error; } /* data successfully copied into the write queue */ @@ -1796,7 +1795,7 @@ wait_for_memory: __mptcp_push_pending(sk, msg->msg_flags); ret = sk_stream_wait_memory(sk, &timeo); if (ret) - goto out; + goto do_error; } if (copied) @@ -1804,7 +1803,14 @@ wait_for_memory: out: release_sock(sk); - return copied ? : ret; + return copied; + +do_error: + if (copied) + goto out; + + copied = sk_stream_error(sk, msg->msg_flags, ret); + goto out; } static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, @@ -2307,8 +2313,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); - if (flags & MPTCP_CF_FASTCLOSE) + if (flags & MPTCP_CF_FASTCLOSE) { + /* be sure to force the tcp_disconnect() path, + * to generate the egress reset + */ + ssk->sk_lingertime = 0; + sock_set_flag(ssk, SOCK_LINGER); subflow->send_fastclose = 1; + } need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk); if (!dispose_it) { @@ -2441,12 +2453,31 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) unlock_sock_fast(tcp_sk, slow); } + /* Mirror the tcp_reset() error propagation */ + switch (sk->sk_state) { + case TCP_SYN_SENT: + sk->sk_err = ECONNREFUSED; + break; + case TCP_CLOSE_WAIT: + sk->sk_err = EPIPE; + break; + case TCP_CLOSE: + return; + default: + sk->sk_err = ECONNRESET; + } + inet_sk_state_store(sk, TCP_CLOSE); sk->sk_shutdown = SHUTDOWN_MASK; smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags); - mptcp_close_wake_up(sk); + /* the calling mptcp_worker will properly destroy the socket */ + if (sock_flag(sk, SOCK_DEAD)) + return; + + sk->sk_state_change(sk); + sk_error_report(sk); } static void __mptcp_retrans(struct sock *sk) @@ -2552,6 +2583,16 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) mptcp_reset_timeout(msk, 0); } +static void mptcp_do_fastclose(struct sock *sk) +{ + struct mptcp_subflow_context *subflow, *tmp; + struct mptcp_sock *msk = mptcp_sk(sk); + + mptcp_for_each_subflow_safe(msk, subflow, tmp) + __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), + subflow, MPTCP_CF_FASTCLOSE); +} + static void mptcp_worker(struct work_struct *work) { struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); @@ -2580,11 +2621,15 @@ static void mptcp_worker(struct work_struct *work) * closed, but we need the msk around to reply to incoming DATA_FIN, * even if it is orphaned and in FIN_WAIT2 state */ - if (sock_flag(sk, SOCK_DEAD) && - (mptcp_check_close_timeout(sk) || sk->sk_state == TCP_CLOSE)) { - inet_sk_state_store(sk, TCP_CLOSE); - __mptcp_destroy_sock(sk); - goto unlock; + if (sock_flag(sk, SOCK_DEAD)) { + if (mptcp_check_close_timeout(sk)) { + inet_sk_state_store(sk, TCP_CLOSE); + mptcp_do_fastclose(sk); + } + if (sk->sk_state == TCP_CLOSE) { + __mptcp_destroy_sock(sk); + goto unlock; + } } if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) @@ -2825,6 +2870,18 @@ static void __mptcp_destroy_sock(struct sock *sk) sock_put(sk); } +static __poll_t mptcp_check_readable(struct mptcp_sock *msk) +{ + /* Concurrent splices from sk_receive_queue into receive_queue will + * always show at least one non-empty queue when checked in this order. + */ + if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) && + skb_queue_empty_lockless(&msk->receive_queue)) + return 0; + + return EPOLLIN | EPOLLRDNORM; +} + bool __mptcp_close(struct sock *sk, long timeout) { struct mptcp_subflow_context *subflow; @@ -2838,8 +2895,13 @@ bool __mptcp_close(struct sock *sk, long timeout) goto cleanup; } - if (mptcp_close_state(sk)) + if (mptcp_check_readable(msk)) { + /* the msk has read data, do the MPTCP equivalent of TCP reset */ + inet_sk_state_store(sk, TCP_CLOSE); + mptcp_do_fastclose(sk); + } else if (mptcp_close_state(sk)) { __mptcp_wr_shutdown(sk); + } sk_stream_wait_close(sk, timeout); @@ -3656,18 +3718,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, return err; } -static __poll_t mptcp_check_readable(struct mptcp_sock *msk) -{ - /* Concurrent splices from sk_receive_queue into receive_queue will - * always show at least one non-empty queue when checked in this order. - */ - if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) && - skb_queue_empty_lockless(&msk->receive_queue)) - return 0; - - return EPOLLIN | EPOLLRDNORM; -} - static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; @@ -3718,7 +3768,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; - /* This barrier is coupled with smp_wmb() in tcp_reset() */ + /* This barrier is coupled with smp_wmb() in __mptcp_error_report() */ smp_rmb(); if (sk->sk_err) mask |= EPOLLERR; |