diff options
Diffstat (limited to 'net/mptcp/protocol.c')
-rw-r--r-- | net/mptcp/protocol.c | 265 |
1 files changed, 151 insertions, 114 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c5d5e68940ea..4bde960e19dc 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -11,7 +11,6 @@ #include <linux/netdevice.h> #include <linux/sched/signal.h> #include <linux/atomic.h> -#include <linux/igmp.h> #include <net/sock.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> @@ -20,7 +19,6 @@ #include <net/tcp_states.h> #if IS_ENABLED(CONFIG_MPTCP_IPV6) #include <net/transp_v6.h> -#include <net/addrconf.h> #endif #include <net/mptcp.h> #include <net/xfrm.h> @@ -1061,6 +1059,12 @@ out: } } +static void __mptcp_clean_una_wakeup(struct sock *sk) +{ + __mptcp_clean_una(sk); + mptcp_write_space(sk); +} + static void mptcp_enter_memory_pressure(struct sock *sk) { struct mptcp_subflow_context *subflow; @@ -1189,6 +1193,7 @@ static bool mptcp_tx_cache_refill(struct sock *sk, int size, */ while (skbs->qlen > 1) { skb = __skb_dequeue_tail(skbs); + *total_ts -= skb->truesize; __kfree_skb(skb); } return skbs->qlen > 0; @@ -1444,7 +1449,7 @@ static void mptcp_push_release(struct sock *sk, struct sock *ssk, release_sock(ssk); } -static void mptcp_push_pending(struct sock *sk, unsigned int flags) +static void __mptcp_push_pending(struct sock *sk, unsigned int flags) { struct sock *prev_ssk = NULL, *ssk = NULL; struct mptcp_sock *msk = mptcp_sk(sk); @@ -1696,14 +1701,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) wait_for_memory: mptcp_set_nospace(sk); - mptcp_push_pending(sk, msg->msg_flags); + __mptcp_push_pending(sk, msg->msg_flags); ret = sk_stream_wait_memory(sk, &timeo); if (ret) goto out; } if (copied) - mptcp_push_pending(sk, msg->msg_flags); + __mptcp_push_pending(sk, msg->msg_flags); out: release_sock(sk); @@ -2115,6 +2120,14 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) return backup; } +static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk) +{ + if (msk->subflow) { + iput(SOCK_INODE(msk->subflow)); + msk->subflow = NULL; + } +} + /* subflow sockets can be either outgoing (connect) or incoming * (accept). * @@ -2126,6 +2139,8 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow) { + struct mptcp_sock *msk = mptcp_sk(sk); + list_del(&subflow->node); lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); @@ -2154,6 +2169,18 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, release_sock(ssk); sock_put(ssk); + + if (ssk == msk->last_snd) + msk->last_snd = NULL; + + if (ssk == msk->ack_hint) + msk->ack_hint = NULL; + + if (ssk == msk->first) + msk->first = NULL; + + if (msk->subflow && ssk == msk->subflow->sk) + mptcp_dispose_initial_subflow(msk); } void mptcp_close_ssk(struct sock *sk, struct sock *ssk, @@ -2238,14 +2265,58 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) mptcp_close_wake_up(sk); } -static void mptcp_worker(struct work_struct *work) +static void __mptcp_retrans(struct sock *sk) { - struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); - struct sock *ssk, *sk = &msk->sk.icsk_inet.sk; + struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_sendmsg_info info = {}; struct mptcp_data_frag *dfrag; size_t copied = 0; - int state, ret; + struct sock *ssk; + int ret; + + __mptcp_clean_una_wakeup(sk); + dfrag = mptcp_rtx_head(sk); + if (!dfrag) + return; + + ssk = mptcp_subflow_get_retrans(msk); + if (!ssk) + goto reset_timer; + + lock_sock(ssk); + + /* limit retransmission to the bytes already sent on some subflows */ + info.sent = 0; + info.limit = dfrag->already_sent; + while (info.sent < dfrag->already_sent) { + if (!mptcp_alloc_tx_skb(sk, ssk)) + break; + + ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); + if (ret <= 0) + break; + + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS); + copied += ret; + info.sent += ret; + } + if (copied) + tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, + info.size_goal); + + mptcp_set_timeout(sk, ssk); + release_sock(ssk); + +reset_timer: + if (!mptcp_timer_pending(sk)) + mptcp_reset_timer(sk); +} + +static void mptcp_worker(struct work_struct *work) +{ + struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); + struct sock *sk = &msk->sk.icsk_inet.sk; + int state; lock_sock(sk); state = sk->sk_state; @@ -2280,45 +2351,8 @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) __mptcp_close_subflow(msk); - if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) - goto unlock; - - __mptcp_clean_una(sk); - dfrag = mptcp_rtx_head(sk); - if (!dfrag) - goto unlock; - - ssk = mptcp_subflow_get_retrans(msk); - if (!ssk) - goto reset_unlock; - - lock_sock(ssk); - - /* limit retransmission to the bytes already sent on some subflows */ - info.sent = 0; - info.limit = dfrag->already_sent; - while (info.sent < dfrag->already_sent) { - if (!mptcp_alloc_tx_skb(sk, ssk)) - break; - - ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); - if (ret <= 0) - break; - - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS); - copied += ret; - info.sent += ret; - } - if (copied) - tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, - info.size_goal); - - mptcp_set_timeout(sk, ssk); - release_sock(ssk); - -reset_unlock: - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) + __mptcp_retrans(sk); unlock: release_sock(sk); @@ -2523,12 +2557,6 @@ static void __mptcp_destroy_sock(struct sock *sk) might_sleep(); - /* dispose the ancillatory tcp socket, if any */ - if (msk->subflow) { - iput(SOCK_INODE(msk->subflow)); - msk->subflow = NULL; - } - /* be sure to always acquire the join list lock, to sync vs * mptcp_finish_join(). */ @@ -2553,6 +2581,7 @@ static void __mptcp_destroy_sock(struct sock *sk) sk_stream_kill_queues(sk); xfrm_sk_free_policy(sk); sk_refcnt_debug_release(sk); + mptcp_dispose_initial_subflow(msk); sock_put(sk); } @@ -2847,6 +2876,48 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, return ret; } +static bool mptcp_unsupported(int level, int optname) +{ + if (level == SOL_IP) { + switch (optname) { + case IP_ADD_MEMBERSHIP: + case IP_ADD_SOURCE_MEMBERSHIP: + case IP_DROP_MEMBERSHIP: + case IP_DROP_SOURCE_MEMBERSHIP: + case IP_BLOCK_SOURCE: + case IP_UNBLOCK_SOURCE: + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + case MCAST_MSFILTER: + return true; + } + return false; + } + if (level == SOL_IPV6) { + switch (optname) { + case IPV6_ADDRFORM: + case IPV6_ADD_MEMBERSHIP: + case IPV6_DROP_MEMBERSHIP: + case IPV6_JOIN_ANYCAST: + case IPV6_LEAVE_ANYCAST: + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + case MCAST_MSFILTER: + return true; + } + return false; + } + return false; +} + static int mptcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { @@ -2855,6 +2926,9 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname, pr_debug("msk=%p", msk); + if (mptcp_unsupported(level, optname)) + return -ENOPROTOOPT; + if (level == SOL_SOCKET) return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); @@ -2934,13 +3008,14 @@ static void mptcp_release_cb(struct sock *sk) { unsigned long flags, nflags; - /* push_pending may touch wmem_reserved, do it before the later - * cleanup - */ - if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags)) - __mptcp_clean_una(sk); - if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) { - /* mptcp_push_pending() acquires the subflow socket lock + for (;;) { + flags = 0; + if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) + flags |= BIT(MPTCP_PUSH_PENDING); + if (!flags) + break; + + /* the following actions acquire the subflow socket lock * * 1) can't be invoked in atomic scope * 2) must avoid ABBA deadlock with msk socket spinlock: the RX @@ -2949,13 +3024,21 @@ static void mptcp_release_cb(struct sock *sk) */ spin_unlock_bh(&sk->sk_lock.slock); - mptcp_push_pending(sk, 0); + if (flags & BIT(MPTCP_PUSH_PENDING)) + __mptcp_push_pending(sk, 0); + + cond_resched(); spin_lock_bh(&sk->sk_lock.slock); } + + if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags)) + __mptcp_clean_una_wakeup(sk); if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags)) __mptcp_error_report(sk); - /* clear any wmem reservation and errors */ + /* push_pending may touch wmem_reserved, ensure we do the cleanup + * later + */ __mptcp_update_wmem(sk); __mptcp_update_rmem(sk); @@ -3285,6 +3368,9 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, /* PM/worker can now acquire the first subflow socket * lock without racing with listener queue cleanup, * we can notify it, if needed. + * + * Even if remote has reset the initial subflow by now + * the refcnt is still at least one. */ subflow = mptcp_subflow_ctx(msk->first); list_add(&subflow->node, &msk->conn_list); @@ -3376,34 +3462,10 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, return mask; } -static int mptcp_release(struct socket *sock) -{ - struct mptcp_subflow_context *subflow; - struct sock *sk = sock->sk; - struct mptcp_sock *msk; - - if (!sk) - return 0; - - lock_sock(sk); - - msk = mptcp_sk(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - ip_mc_drop_socket(ssk); - } - - release_sock(sk); - - return inet_release(sock); -} - static const struct proto_ops mptcp_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, - .release = mptcp_release, + .release = inet_release, .bind = mptcp_bind, .connect = mptcp_stream_connect, .socketpair = sock_no_socketpair, @@ -3495,35 +3557,10 @@ void __init mptcp_proto_init(void) } #if IS_ENABLED(CONFIG_MPTCP_IPV6) -static int mptcp6_release(struct socket *sock) -{ - struct mptcp_subflow_context *subflow; - struct mptcp_sock *msk; - struct sock *sk = sock->sk; - - if (!sk) - return 0; - - lock_sock(sk); - - msk = mptcp_sk(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - ip_mc_drop_socket(ssk); - ipv6_sock_mc_close(ssk); - ipv6_sock_ac_close(ssk); - } - - release_sock(sk); - return inet6_release(sock); -} - static const struct proto_ops mptcp_v6_stream_ops = { .family = PF_INET6, .owner = THIS_MODULE, - .release = mptcp6_release, + .release = inet6_release, .bind = mptcp_bind, .connect = mptcp_stream_connect, .socketpair = sock_no_socketpair, |