diff options
Diffstat (limited to 'net/mptcp')
-rw-r--r-- | net/mptcp/ctrl.c | 4 | ||||
-rw-r--r-- | net/mptcp/options.c | 28 | ||||
-rw-r--r-- | net/mptcp/pm.c | 8 | ||||
-rw-r--r-- | net/mptcp/pm_netlink.c | 26 | ||||
-rw-r--r-- | net/mptcp/pm_userspace.c | 8 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 61 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 66 | ||||
-rw-r--r-- | net/mptcp/sockopt.c | 28 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 67 |
9 files changed, 201 insertions, 95 deletions
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index b0dd008e2114..dd595d9b5e50 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -405,9 +405,9 @@ void mptcp_active_detect_blackhole(struct sock *ssk, bool expired) MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP); subflow->mpc_drop = 1; mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow); - } else { - subflow->mpc_drop = 0; } + } else if (ssk->sk_state == TCP_SYN_SENT) { + subflow->mpc_drop = 0; } } diff --git a/net/mptcp/options.c b/net/mptcp/options.c index a62bc874bf1e..a97505b78671 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -108,7 +108,6 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->suboptions |= OPTION_MPTCP_DSS; mp_opt->use_map = 1; mp_opt->mpc_map = 1; - mp_opt->use_ack = 0; mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2; } @@ -157,11 +156,6 @@ static void mptcp_parse_option(const struct sk_buff *skb, pr_debug("DSS\n"); ptr++; - /* we must clear 'mpc_map' be able to detect MP_CAPABLE - * map vs DSS map in mptcp_incoming_options(), and reconstruct - * map info accordingly - */ - mp_opt->mpc_map = 0; flags = (*ptr++) & MPTCP_DSS_FLAG_MASK; mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0; mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0; @@ -369,8 +363,11 @@ void mptcp_get_options(const struct sk_buff *skb, const unsigned char *ptr; int length; - /* initialize option status */ - mp_opt->suboptions = 0; + /* Ensure that casting the whole status to u32 is efficient and safe */ + BUILD_BUG_ON(sizeof_field(struct mptcp_options_received, status) != sizeof(u32)); + BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct mptcp_options_received, status), + sizeof(u32))); + *(u32 *)&mp_opt->status = 0; length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); @@ -607,7 +604,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, } opts->ext_copy.use_ack = 1; opts->suboptions = OPTION_MPTCP_DSS; - WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk)); /* Add kind/length/subtype/flag overhead if mapping is not populated */ if (dss_size == 0) @@ -655,6 +651,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * struct mptcp_sock *msk = mptcp_sk(subflow->conn); bool drop_other_suboptions = false; unsigned int opt_size = *size; + struct mptcp_addr_info addr; bool echo; int len; @@ -663,7 +660,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * */ if (!mptcp_pm_should_add_signal(msk) || (opts->suboptions & (OPTION_MPTCP_MPJ_ACK | OPTION_MPTCP_MPC_ACK)) || - !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &opts->addr, + !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &addr, &echo, &drop_other_suboptions)) return false; @@ -676,7 +673,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * else if (opts->suboptions & OPTION_MPTCP_DSS) return false; - len = mptcp_add_addr_len(opts->addr.family, echo, !!opts->addr.port); + len = mptcp_add_addr_len(addr.family, echo, !!addr.port); if (remaining < len) return false; @@ -693,6 +690,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * opts->ahmac = 0; *size -= opt_size; } + opts->addr = addr; opts->suboptions |= OPTION_MPTCP_ADD_ADDR; if (!echo) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX); @@ -981,8 +979,9 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, if (subflow->mp_join) goto reset; subflow->mp_capable = 0; + if (!mptcp_try_fallback(ssk)) + goto reset; pr_fallback(msk); - mptcp_do_fallback(ssk); return false; } @@ -1288,7 +1287,7 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th) } MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT); } - return; + goto update_wspace; } if (rcv_wnd_new != rcv_wnd_old) { @@ -1313,6 +1312,9 @@ raise_win: th->window = htons(new_win); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDSHARED); } + +update_wspace: + WRITE_ONCE(msk->old_wspace, tp->rcv_wnd); } __sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 620264c75dc2..2c8815daf5b0 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -303,8 +303,14 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) pr_debug("fail_seq=%llu\n", fail_seq); - if (!READ_ONCE(msk->allow_infinite_fallback)) + /* After accepting the fail, we can't create any other subflows */ + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_infinite_fallback) { + spin_unlock_bh(&msk->fallback_lock); return; + } + msk->allow_subflows = false; + spin_unlock_bh(&msk->fallback_lock); if (!subflow->fail_tout) { pr_debug("send MP_FAIL response and infinite map\n"); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 45a2b5f05d38..2a085ec5bfd0 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -968,7 +968,7 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, struct mptcp_pm_addr_entry *entry, - bool needs_id) + bool needs_id, bool replace) { struct mptcp_pm_addr_entry *cur, *del_entry = NULL; unsigned int addr_max; @@ -1008,6 +1008,17 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, if (entry->addr.id) goto out; + /* allow callers that only need to look up the local + * addr's id to skip replacement. This allows them to + * avoid calling synchronize_rcu in the packet recv + * path. + */ + if (!replace) { + kfree(entry); + ret = cur->addr.id; + goto out; + } + pernet->addrs--; entry->addr.id = cur->addr.id; list_del_rcu(&cur->list); @@ -1160,7 +1171,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc entry->ifindex = 0; entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; entry->lsk = NULL; - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false); if (ret < 0) kfree(entry); @@ -1432,7 +1443,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) } } ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, - !mptcp_pm_has_addr_attr_id(attr, info)); + !mptcp_pm_has_addr_attr_id(attr, info), + true); if (ret < 0) { GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); goto out_free; @@ -1513,11 +1525,6 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, if (mptcp_pm_is_userspace(msk)) goto next; - if (list_empty(&msk->conn_list)) { - mptcp_pm_remove_anno_addr(msk, addr, false); - goto next; - } - lock_sock(sk); remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr); mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && @@ -2049,7 +2056,8 @@ int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && - (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { + (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | + MPTCP_PM_ADDR_FLAG_IMPLICIT))) { spin_unlock_bh(&pernet->lock); GENL_SET_ERR_MSG(info, "invalid addr flags"); return -EINVAL; diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index e35178f5205f..bb76295d04c5 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -589,11 +589,9 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info) if (ret < 0) goto set_flags_err; - if (attr_rem) { - ret = mptcp_pm_parse_entry(attr_rem, info, false, &rem); - if (ret < 0) - goto set_flags_err; - } + ret = mptcp_pm_parse_entry(attr_rem, info, false, &rem); + if (ret < 0) + goto set_flags_err; if (loc.addr.family == AF_UNSPEC || rem.addr.family == AF_UNSPEC) { diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 4b9d850ce85a..d865d08a0c5e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -136,6 +136,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, int delta; if (MPTCP_SKB_CB(from)->offset || + ((to->len + from->len) > (sk->sk_rcvbuf >> 3)) || !skb_try_coalesce(to, from, &fragstolen, &delta)) return false; @@ -622,10 +623,9 @@ static bool mptcp_check_data_fin(struct sock *sk) static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk) { - if (READ_ONCE(msk->allow_infinite_fallback)) { + if (mptcp_try_fallback(ssk)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONFALLBACK); - mptcp_do_fallback(ssk); } else { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONRESET); mptcp_subflow_reset(ssk); @@ -877,7 +877,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) static void mptcp_subflow_joined(struct mptcp_sock *msk, struct sock *ssk) { mptcp_subflow_ctx(ssk)->map_seq = READ_ONCE(msk->ack_seq); - WRITE_ONCE(msk->allow_infinite_fallback, false); + msk->allow_infinite_fallback = false; mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC); } @@ -888,6 +888,14 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) if (sk->sk_state != TCP_ESTABLISHED) return false; + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_subflows) { + spin_unlock_bh(&msk->fallback_lock); + return false; + } + mptcp_subflow_joined(msk, ssk); + spin_unlock_bh(&msk->fallback_lock); + /* attach to msk socket only after we are sure we will deal with it * at close time */ @@ -896,7 +904,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++; mptcp_sockopt_sync_locked(msk, ssk); - mptcp_subflow_joined(msk, ssk); mptcp_stop_tout_timer(sk); __mptcp_propagate_sndbuf(sk, ssk); return true; @@ -1235,10 +1242,14 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk, mpext->infinite_map = 1; mpext->data_len = 0; + if (!mptcp_try_fallback(ssk)) { + mptcp_subflow_reset(ssk); + return; + } + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX); mptcp_subflow_ctx(ssk)->send_infinite_map = 0; pr_fallback(msk); - mptcp_do_fallback(ssk); } #define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1)) @@ -1766,8 +1777,10 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, * see mptcp_disconnect(). * Attempt it again outside the problematic scope. */ - if (!mptcp_disconnect(sk, 0)) + if (!mptcp_disconnect(sk, 0)) { + sk->sk_disconnects++; sk->sk_socket->state = SS_UNCONNECTED; + } } inet_clear_bit(DEFER_CONNECT, sk); @@ -2640,9 +2653,9 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) static void __mptcp_retrans(struct sock *sk) { + struct mptcp_sendmsg_info info = { .data_lock_held = true, }; struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_subflow_context *subflow; - struct mptcp_sendmsg_info info = {}; struct mptcp_data_frag *dfrag; struct sock *ssk; int ret, err; @@ -2687,6 +2700,18 @@ static void __mptcp_retrans(struct sock *sk) info.sent = 0; info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent; + + /* + * make the whole retrans decision, xmit, disallow + * fallback atomic + */ + spin_lock_bh(&msk->fallback_lock); + if (__mptcp_check_fallback(msk)) { + spin_unlock_bh(&msk->fallback_lock); + release_sock(ssk); + return; + } + while (info.sent < info.limit) { ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); if (ret <= 0) @@ -2700,8 +2725,9 @@ static void __mptcp_retrans(struct sock *sk) len = max(copied, len); tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); - WRITE_ONCE(msk->allow_infinite_fallback, false); + msk->allow_infinite_fallback = false; } + spin_unlock_bh(&msk->fallback_lock); release_sock(ssk); } @@ -2830,7 +2856,8 @@ static void __mptcp_init_sock(struct sock *sk) WRITE_ONCE(msk->first, NULL); inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); - WRITE_ONCE(msk->allow_infinite_fallback, true); + msk->allow_infinite_fallback = true; + msk->allow_subflows = true; msk->recovery = false; msk->subflow_id = 1; msk->last_data_sent = tcp_jiffies32; @@ -2838,6 +2865,7 @@ static void __mptcp_init_sock(struct sock *sk) msk->last_ack_recv = tcp_jiffies32; mptcp_pm_data_init(msk); + spin_lock_init(&msk->fallback_lock); /* re-use the csk retrans timer for MPTCP-level retrans */ timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); @@ -3221,7 +3249,16 @@ static int mptcp_disconnect(struct sock *sk, int flags) * subflow */ mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE); + + /* The first subflow is already in TCP_CLOSE status, the following + * can't overlap with a fallback anymore + */ + spin_lock_bh(&msk->fallback_lock); + msk->allow_subflows = true; + msk->allow_infinite_fallback = true; WRITE_ONCE(msk->flags, 0); + spin_unlock_bh(&msk->fallback_lock); + msk->cb_flags = 0; msk->recovery = false; WRITE_ONCE(msk->can_ack, false); @@ -3634,7 +3671,13 @@ bool mptcp_finish_join(struct sock *ssk) /* active subflow, already present inside the conn_list */ if (!list_empty(&subflow->node)) { + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_subflows) { + spin_unlock_bh(&msk->fallback_lock); + return false; + } mptcp_subflow_joined(msk, ssk); + spin_unlock_bh(&msk->fallback_lock); mptcp_propagate_sndbuf(parent, ssk); return true; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a93e661ef5c4..6f191b125978 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -149,22 +149,24 @@ struct mptcp_options_received { u32 subflow_seq; u16 data_len; __sum16 csum; - u16 suboptions; + struct_group(status, + u16 suboptions; + u16 use_map:1, + dsn64:1, + data_fin:1, + use_ack:1, + ack64:1, + mpc_map:1, + reset_reason:4, + reset_transient:1, + echo:1, + backup:1, + deny_join_id0:1, + __unused:2; + ); + u8 join_id; u32 token; u32 nonce; - u16 use_map:1, - dsn64:1, - data_fin:1, - use_ack:1, - ack64:1, - mpc_map:1, - reset_reason:4, - reset_transient:1, - echo:1, - backup:1, - deny_join_id0:1, - __unused:2; - u8 join_id; u64 thmac; u8 hmac[MPTCPOPT_HMAC_LEN]; struct mptcp_addr_info addr; @@ -340,10 +342,16 @@ struct mptcp_sock { u64 rtt_us; /* last maximum rtt of subflows */ } rcvq_space; u8 scaling_ratio; + bool allow_subflows; u32 subflow_id; u32 setsockopt_seq; char ca_name[TCP_CA_NAME_MAX]; + + spinlock_t fallback_lock; /* protects fallback, + * allow_infinite_fallback and + * allow_join + */ }; #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock) @@ -760,10 +768,15 @@ static inline u64 mptcp_data_avail(const struct mptcp_sock *msk) static inline bool mptcp_epollin_ready(const struct sock *sk) { + u64 data_avail = mptcp_data_avail(mptcp_sk(sk)); + + if (!data_avail) + return false; + /* mptcp doesn't have to deal with small skbs in the receive queue, - * at it can always coalesce them + * as it can always coalesce them */ - return (mptcp_data_avail(mptcp_sk(sk)) >= sk->sk_rcvlowat) || + return (data_avail >= sk->sk_rcvlowat) || (mem_cgroup_sockets_enabled && sk->sk_memcg && mem_cgroup_under_socket_pressure(sk->sk_memcg)) || READ_ONCE(tcp_memory_pressure); @@ -1181,13 +1194,22 @@ static inline bool mptcp_check_fallback(const struct sock *sk) return __mptcp_check_fallback(msk); } -static inline void __mptcp_do_fallback(struct mptcp_sock *msk) +static inline bool __mptcp_try_fallback(struct mptcp_sock *msk) { if (__mptcp_check_fallback(msk)) { pr_debug("TCP fallback already done (msk=%p)\n", msk); - return; + return true; } + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_infinite_fallback) { + spin_unlock_bh(&msk->fallback_lock); + return false; + } + + msk->allow_subflows = false; set_bit(MPTCP_FALLBACK_DONE, &msk->flags); + spin_unlock_bh(&msk->fallback_lock); + return true; } static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk) @@ -1199,14 +1221,15 @@ static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk) TCPF_SYN_RECV | TCPF_LISTEN)); } -static inline void mptcp_do_fallback(struct sock *ssk) +static inline bool mptcp_try_fallback(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = subflow->conn; struct mptcp_sock *msk; msk = mptcp_sk(sk); - __mptcp_do_fallback(msk); + if (!__mptcp_try_fallback(msk)) + return false; if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) { gfp_t saved_allocation = ssk->sk_allocation; @@ -1218,6 +1241,7 @@ static inline void mptcp_do_fallback(struct sock *ssk) tcp_shutdown(ssk, SEND_SHUTDOWN); ssk->sk_allocation = saved_allocation; } + return true; } #define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a) @@ -1227,7 +1251,7 @@ static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk, { pr_fallback(msk); subflow->request_mptcp = 0; - __mptcp_do_fallback(msk); + WARN_ON_ONCE(!__mptcp_try_fallback(msk)); } static inline bool mptcp_check_infinite_map(struct sk_buff *skb) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 505445a9598f..3caa0a9d3b38 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -1419,6 +1419,12 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, switch (optname) { case IP_TOS: return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos)); + case IP_FREEBIND: + return mptcp_put_int_option(msk, optval, optlen, + inet_test_bit(FREEBIND, sk)); + case IP_TRANSPARENT: + return mptcp_put_int_option(msk, optval, optlen, + inet_test_bit(TRANSPARENT, sk)); case IP_BIND_ADDRESS_NO_PORT: return mptcp_put_int_option(msk, optval, optlen, inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); @@ -1430,6 +1436,26 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, return -EOPNOTSUPP; } +static int mptcp_getsockopt_v6(struct mptcp_sock *msk, int optname, + char __user *optval, int __user *optlen) +{ + struct sock *sk = (void *)msk; + + switch (optname) { + case IPV6_V6ONLY: + return mptcp_put_int_option(msk, optval, optlen, + sk->sk_ipv6only); + case IPV6_TRANSPARENT: + return mptcp_put_int_option(msk, optval, optlen, + inet_test_bit(TRANSPARENT, sk)); + case IPV6_FREEBIND: + return mptcp_put_int_option(msk, optval, optlen, + inet_test_bit(FREEBIND, sk)); + } + + return -EOPNOTSUPP; +} + static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, char __user *optval, int __user *optlen) { @@ -1469,6 +1495,8 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname, if (level == SOL_IP) return mptcp_getsockopt_v4(msk, optname, optval, option); + if (level == SOL_IPV6) + return mptcp_getsockopt_v6(msk, optname, optval, option); if (level == SOL_TCP) return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); if (level == SOL_MPTCP) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 860903e06422..a05f201d194c 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -543,9 +543,11 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) mptcp_get_options(skb, &mp_opt); if (subflow->request_mptcp) { if (!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYNACK)) { + if (!mptcp_try_fallback(sk)) + goto do_reset; + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); - mptcp_do_fallback(sk); pr_fallback(msk); goto fallback; } @@ -754,8 +756,6 @@ static bool subflow_hmac_valid(const struct request_sock *req, subflow_req = mptcp_subflow_rsk(req); msk = subflow_req->msk; - if (!msk) - return false; subflow_generate_hmac(READ_ONCE(msk->remote_key), READ_ONCE(msk->local_key), @@ -853,12 +853,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, } else if (subflow_req->mp_join) { mptcp_get_options(skb, &mp_opt); - if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK) || - !subflow_hmac_valid(req, &mp_opt) || - !mptcp_can_accept_new_subflow(subflow_req->msk)) { - SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); + if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK)) fallback = true; - } } create_child: @@ -908,6 +904,17 @@ create_child: goto dispose_child; } + if (!subflow_hmac_valid(req, &mp_opt)) { + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); + subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); + goto dispose_child; + } + + if (!mptcp_can_accept_new_subflow(owner)) { + subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); + goto dispose_child; + } + /* move the msk reference ownership to the subflow */ subflow_req->msk = NULL; ctx->conn = (struct sock *)owner; @@ -1140,7 +1147,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk, if (data_len == 0) { pr_debug("infinite mapping received\n"); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); - subflow->map_data_len = 0; return MAPPING_INVALID; } @@ -1284,32 +1290,29 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss mptcp_schedule_work(sk); } -static bool subflow_can_fallback(struct mptcp_subflow_context *subflow) -{ - struct mptcp_sock *msk = mptcp_sk(subflow->conn); - - if (subflow->mp_join) - return false; - else if (READ_ONCE(msk->csum_enabled)) - return !subflow->valid_csum_seen; - else - return READ_ONCE(msk->allow_infinite_fallback); -} - -static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) +static bool mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); unsigned long fail_tout; + /* we are really failing, prevent any later subflow join */ + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_infinite_fallback) { + spin_unlock_bh(&msk->fallback_lock); + return false; + } + msk->allow_subflows = false; + spin_unlock_bh(&msk->fallback_lock); + /* graceful failure can happen only on the MPC subflow */ if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first))) - return; + return false; /* since the close timeout take precedence on the fail one, * no need to start the latter when the first is already set */ if (sock_flag((struct sock *)msk, SOCK_DEAD)) - return; + return true; /* we don't need extreme accuracy here, use a zero fail_tout as special * value meaning no fail timeout at all; @@ -1321,6 +1324,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) tcp_send_ack(ssk); mptcp_reset_tout_timer(msk, subflow->fail_tout); + return true; } static bool subflow_check_data_avail(struct sock *ssk) @@ -1381,17 +1385,16 @@ fallback: (subflow->mp_join || subflow->valid_csum_seen)) { subflow->send_mp_fail = 1; - if (!READ_ONCE(msk->allow_infinite_fallback)) { + if (!mptcp_subflow_fail(msk, ssk)) { subflow->reset_transient = 0; subflow->reset_reason = MPTCP_RST_EMIDDLEBOX; goto reset; } - mptcp_subflow_fail(msk, ssk); WRITE_ONCE(subflow->data_avail, true); return true; } - if (!subflow_can_fallback(subflow) && subflow->map_data_len) { + if (!mptcp_try_fallback(ssk)) { /* fatal protocol error, close the socket. * subflow_error_report() will introduce the appropriate barriers */ @@ -1407,8 +1410,6 @@ reset: WRITE_ONCE(subflow->data_avail, false); return false; } - - mptcp_do_fallback(ssk); } skb = skb_peek(&ssk->sk_receive_queue); @@ -1673,7 +1674,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local, /* discard the subflow socket */ mptcp_sock_graft(ssk, sk->sk_socket); iput(SOCK_INODE(sf)); - WRITE_ONCE(msk->allow_infinite_fallback, false); mptcp_stop_tout_timer(sk); return 0; @@ -1768,10 +1768,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, * needs it. * Update ns_tracker to current stack trace and refcounted tracker. */ - __netns_tracker_free(net, &sf->sk->ns_tracker, false); - sf->sk->sk_net_refcnt = 1; - get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + sk_net_refcnt_upgrade(sf->sk); err = tcp_set_ulp(sf->sk, "mptcp"); if (err) goto err_free; @@ -1853,7 +1850,7 @@ static void subflow_state_change(struct sock *sk) msk = mptcp_sk(parent); if (subflow_simultaneous_connect(sk)) { - mptcp_do_fallback(sk); + WARN_ON_ONCE(!mptcp_try_fallback(sk)); pr_fallback(msk); subflow->conn_finished = 1; mptcp_propagate_state(parent, sk, subflow, NULL); |