summaryrefslogtreecommitdiff
path: root/net/mptcp
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/ctrl.c4
-rw-r--r--net/mptcp/options.c28
-rw-r--r--net/mptcp/pm.c8
-rw-r--r--net/mptcp/pm_netlink.c26
-rw-r--r--net/mptcp/pm_userspace.c8
-rw-r--r--net/mptcp/protocol.c61
-rw-r--r--net/mptcp/protocol.h66
-rw-r--r--net/mptcp/sockopt.c28
-rw-r--r--net/mptcp/subflow.c67
9 files changed, 201 insertions, 95 deletions
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index b0dd008e2114..dd595d9b5e50 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -405,9 +405,9 @@ void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP);
subflow->mpc_drop = 1;
mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
- } else {
- subflow->mpc_drop = 0;
}
+ } else if (ssk->sk_state == TCP_SYN_SENT) {
+ subflow->mpc_drop = 0;
}
}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index a62bc874bf1e..a97505b78671 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -108,7 +108,6 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->suboptions |= OPTION_MPTCP_DSS;
mp_opt->use_map = 1;
mp_opt->mpc_map = 1;
- mp_opt->use_ack = 0;
mp_opt->data_len = get_unaligned_be16(ptr);
ptr += 2;
}
@@ -157,11 +156,6 @@ static void mptcp_parse_option(const struct sk_buff *skb,
pr_debug("DSS\n");
ptr++;
- /* we must clear 'mpc_map' be able to detect MP_CAPABLE
- * map vs DSS map in mptcp_incoming_options(), and reconstruct
- * map info accordingly
- */
- mp_opt->mpc_map = 0;
flags = (*ptr++) & MPTCP_DSS_FLAG_MASK;
mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0;
mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0;
@@ -369,8 +363,11 @@ void mptcp_get_options(const struct sk_buff *skb,
const unsigned char *ptr;
int length;
- /* initialize option status */
- mp_opt->suboptions = 0;
+ /* Ensure that casting the whole status to u32 is efficient and safe */
+ BUILD_BUG_ON(sizeof_field(struct mptcp_options_received, status) != sizeof(u32));
+ BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct mptcp_options_received, status),
+ sizeof(u32)));
+ *(u32 *)&mp_opt->status = 0;
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
@@ -607,7 +604,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
}
opts->ext_copy.use_ack = 1;
opts->suboptions = OPTION_MPTCP_DSS;
- WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk));
/* Add kind/length/subtype/flag overhead if mapping is not populated */
if (dss_size == 0)
@@ -655,6 +651,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
bool drop_other_suboptions = false;
unsigned int opt_size = *size;
+ struct mptcp_addr_info addr;
bool echo;
int len;
@@ -663,7 +660,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
*/
if (!mptcp_pm_should_add_signal(msk) ||
(opts->suboptions & (OPTION_MPTCP_MPJ_ACK | OPTION_MPTCP_MPC_ACK)) ||
- !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &opts->addr,
+ !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &addr,
&echo, &drop_other_suboptions))
return false;
@@ -676,7 +673,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
else if (opts->suboptions & OPTION_MPTCP_DSS)
return false;
- len = mptcp_add_addr_len(opts->addr.family, echo, !!opts->addr.port);
+ len = mptcp_add_addr_len(addr.family, echo, !!addr.port);
if (remaining < len)
return false;
@@ -693,6 +690,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
opts->ahmac = 0;
*size -= opt_size;
}
+ opts->addr = addr;
opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
if (!echo) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX);
@@ -981,8 +979,9 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (subflow->mp_join)
goto reset;
subflow->mp_capable = 0;
+ if (!mptcp_try_fallback(ssk))
+ goto reset;
pr_fallback(msk);
- mptcp_do_fallback(ssk);
return false;
}
@@ -1288,7 +1287,7 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
}
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT);
}
- return;
+ goto update_wspace;
}
if (rcv_wnd_new != rcv_wnd_old) {
@@ -1313,6 +1312,9 @@ raise_win:
th->window = htons(new_win);
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDSHARED);
}
+
+update_wspace:
+ WRITE_ONCE(msk->old_wspace, tp->rcv_wnd);
}
__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 620264c75dc2..2c8815daf5b0 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -303,8 +303,14 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
pr_debug("fail_seq=%llu\n", fail_seq);
- if (!READ_ONCE(msk->allow_infinite_fallback))
+ /* After accepting the fail, we can't create any other subflows */
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_infinite_fallback) {
+ spin_unlock_bh(&msk->fallback_lock);
return;
+ }
+ msk->allow_subflows = false;
+ spin_unlock_bh(&msk->fallback_lock);
if (!subflow->fail_tout) {
pr_debug("send MP_FAIL response and infinite map\n");
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 45a2b5f05d38..2a085ec5bfd0 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -968,7 +968,7 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
struct mptcp_pm_addr_entry *entry,
- bool needs_id)
+ bool needs_id, bool replace)
{
struct mptcp_pm_addr_entry *cur, *del_entry = NULL;
unsigned int addr_max;
@@ -1008,6 +1008,17 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
if (entry->addr.id)
goto out;
+ /* allow callers that only need to look up the local
+ * addr's id to skip replacement. This allows them to
+ * avoid calling synchronize_rcu in the packet recv
+ * path.
+ */
+ if (!replace) {
+ kfree(entry);
+ ret = cur->addr.id;
+ goto out;
+ }
+
pernet->addrs--;
entry->addr.id = cur->addr.id;
list_del_rcu(&cur->list);
@@ -1160,7 +1171,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc
entry->ifindex = 0;
entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT;
entry->lsk = NULL;
- ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true);
+ ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false);
if (ret < 0)
kfree(entry);
@@ -1432,7 +1443,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
}
}
ret = mptcp_pm_nl_append_new_local_addr(pernet, entry,
- !mptcp_pm_has_addr_attr_id(attr, info));
+ !mptcp_pm_has_addr_attr_id(attr, info),
+ true);
if (ret < 0) {
GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret);
goto out_free;
@@ -1513,11 +1525,6 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
if (mptcp_pm_is_userspace(msk))
goto next;
- if (list_empty(&msk->conn_list)) {
- mptcp_pm_remove_anno_addr(msk, addr, false);
- goto next;
- }
-
lock_sock(sk);
remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr);
mptcp_pm_remove_anno_addr(msk, addr, remove_subflow &&
@@ -2049,7 +2056,8 @@ int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
- (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+ (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL |
+ MPTCP_PM_ADDR_FLAG_IMPLICIT))) {
spin_unlock_bh(&pernet->lock);
GENL_SET_ERR_MSG(info, "invalid addr flags");
return -EINVAL;
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index e35178f5205f..bb76295d04c5 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -589,11 +589,9 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
goto set_flags_err;
- if (attr_rem) {
- ret = mptcp_pm_parse_entry(attr_rem, info, false, &rem);
- if (ret < 0)
- goto set_flags_err;
- }
+ ret = mptcp_pm_parse_entry(attr_rem, info, false, &rem);
+ if (ret < 0)
+ goto set_flags_err;
if (loc.addr.family == AF_UNSPEC ||
rem.addr.family == AF_UNSPEC) {
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 4b9d850ce85a..d865d08a0c5e 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -136,6 +136,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
int delta;
if (MPTCP_SKB_CB(from)->offset ||
+ ((to->len + from->len) > (sk->sk_rcvbuf >> 3)) ||
!skb_try_coalesce(to, from, &fragstolen, &delta))
return false;
@@ -622,10 +623,9 @@ static bool mptcp_check_data_fin(struct sock *sk)
static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk)
{
- if (READ_ONCE(msk->allow_infinite_fallback)) {
+ if (mptcp_try_fallback(ssk)) {
MPTCP_INC_STATS(sock_net(ssk),
MPTCP_MIB_DSSCORRUPTIONFALLBACK);
- mptcp_do_fallback(ssk);
} else {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONRESET);
mptcp_subflow_reset(ssk);
@@ -877,7 +877,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
static void mptcp_subflow_joined(struct mptcp_sock *msk, struct sock *ssk)
{
mptcp_subflow_ctx(ssk)->map_seq = READ_ONCE(msk->ack_seq);
- WRITE_ONCE(msk->allow_infinite_fallback, false);
+ msk->allow_infinite_fallback = false;
mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
}
@@ -888,6 +888,14 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
if (sk->sk_state != TCP_ESTABLISHED)
return false;
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_subflows) {
+ spin_unlock_bh(&msk->fallback_lock);
+ return false;
+ }
+ mptcp_subflow_joined(msk, ssk);
+ spin_unlock_bh(&msk->fallback_lock);
+
/* attach to msk socket only after we are sure we will deal with it
* at close time
*/
@@ -896,7 +904,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
mptcp_sockopt_sync_locked(msk, ssk);
- mptcp_subflow_joined(msk, ssk);
mptcp_stop_tout_timer(sk);
__mptcp_propagate_sndbuf(sk, ssk);
return true;
@@ -1235,10 +1242,14 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk,
mpext->infinite_map = 1;
mpext->data_len = 0;
+ if (!mptcp_try_fallback(ssk)) {
+ mptcp_subflow_reset(ssk);
+ return;
+ }
+
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX);
mptcp_subflow_ctx(ssk)->send_infinite_map = 0;
pr_fallback(msk);
- mptcp_do_fallback(ssk);
}
#define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1))
@@ -1766,8 +1777,10 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
* see mptcp_disconnect().
* Attempt it again outside the problematic scope.
*/
- if (!mptcp_disconnect(sk, 0))
+ if (!mptcp_disconnect(sk, 0)) {
+ sk->sk_disconnects++;
sk->sk_socket->state = SS_UNCONNECTED;
+ }
}
inet_clear_bit(DEFER_CONNECT, sk);
@@ -2640,9 +2653,9 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
static void __mptcp_retrans(struct sock *sk)
{
+ struct mptcp_sendmsg_info info = { .data_lock_held = true, };
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_subflow_context *subflow;
- struct mptcp_sendmsg_info info = {};
struct mptcp_data_frag *dfrag;
struct sock *ssk;
int ret, err;
@@ -2687,6 +2700,18 @@ static void __mptcp_retrans(struct sock *sk)
info.sent = 0;
info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len :
dfrag->already_sent;
+
+ /*
+ * make the whole retrans decision, xmit, disallow
+ * fallback atomic
+ */
+ spin_lock_bh(&msk->fallback_lock);
+ if (__mptcp_check_fallback(msk)) {
+ spin_unlock_bh(&msk->fallback_lock);
+ release_sock(ssk);
+ return;
+ }
+
while (info.sent < info.limit) {
ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
if (ret <= 0)
@@ -2700,8 +2725,9 @@ static void __mptcp_retrans(struct sock *sk)
len = max(copied, len);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
- WRITE_ONCE(msk->allow_infinite_fallback, false);
+ msk->allow_infinite_fallback = false;
}
+ spin_unlock_bh(&msk->fallback_lock);
release_sock(ssk);
}
@@ -2830,7 +2856,8 @@ static void __mptcp_init_sock(struct sock *sk)
WRITE_ONCE(msk->first, NULL);
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
- WRITE_ONCE(msk->allow_infinite_fallback, true);
+ msk->allow_infinite_fallback = true;
+ msk->allow_subflows = true;
msk->recovery = false;
msk->subflow_id = 1;
msk->last_data_sent = tcp_jiffies32;
@@ -2838,6 +2865,7 @@ static void __mptcp_init_sock(struct sock *sk)
msk->last_ack_recv = tcp_jiffies32;
mptcp_pm_data_init(msk);
+ spin_lock_init(&msk->fallback_lock);
/* re-use the csk retrans timer for MPTCP-level retrans */
timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
@@ -3221,7 +3249,16 @@ static int mptcp_disconnect(struct sock *sk, int flags)
* subflow
*/
mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
+
+ /* The first subflow is already in TCP_CLOSE status, the following
+ * can't overlap with a fallback anymore
+ */
+ spin_lock_bh(&msk->fallback_lock);
+ msk->allow_subflows = true;
+ msk->allow_infinite_fallback = true;
WRITE_ONCE(msk->flags, 0);
+ spin_unlock_bh(&msk->fallback_lock);
+
msk->cb_flags = 0;
msk->recovery = false;
WRITE_ONCE(msk->can_ack, false);
@@ -3634,7 +3671,13 @@ bool mptcp_finish_join(struct sock *ssk)
/* active subflow, already present inside the conn_list */
if (!list_empty(&subflow->node)) {
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_subflows) {
+ spin_unlock_bh(&msk->fallback_lock);
+ return false;
+ }
mptcp_subflow_joined(msk, ssk);
+ spin_unlock_bh(&msk->fallback_lock);
mptcp_propagate_sndbuf(parent, ssk);
return true;
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index a93e661ef5c4..6f191b125978 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -149,22 +149,24 @@ struct mptcp_options_received {
u32 subflow_seq;
u16 data_len;
__sum16 csum;
- u16 suboptions;
+ struct_group(status,
+ u16 suboptions;
+ u16 use_map:1,
+ dsn64:1,
+ data_fin:1,
+ use_ack:1,
+ ack64:1,
+ mpc_map:1,
+ reset_reason:4,
+ reset_transient:1,
+ echo:1,
+ backup:1,
+ deny_join_id0:1,
+ __unused:2;
+ );
+ u8 join_id;
u32 token;
u32 nonce;
- u16 use_map:1,
- dsn64:1,
- data_fin:1,
- use_ack:1,
- ack64:1,
- mpc_map:1,
- reset_reason:4,
- reset_transient:1,
- echo:1,
- backup:1,
- deny_join_id0:1,
- __unused:2;
- u8 join_id;
u64 thmac;
u8 hmac[MPTCPOPT_HMAC_LEN];
struct mptcp_addr_info addr;
@@ -340,10 +342,16 @@ struct mptcp_sock {
u64 rtt_us; /* last maximum rtt of subflows */
} rcvq_space;
u8 scaling_ratio;
+ bool allow_subflows;
u32 subflow_id;
u32 setsockopt_seq;
char ca_name[TCP_CA_NAME_MAX];
+
+ spinlock_t fallback_lock; /* protects fallback,
+ * allow_infinite_fallback and
+ * allow_join
+ */
};
#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
@@ -760,10 +768,15 @@ static inline u64 mptcp_data_avail(const struct mptcp_sock *msk)
static inline bool mptcp_epollin_ready(const struct sock *sk)
{
+ u64 data_avail = mptcp_data_avail(mptcp_sk(sk));
+
+ if (!data_avail)
+ return false;
+
/* mptcp doesn't have to deal with small skbs in the receive queue,
- * at it can always coalesce them
+ * as it can always coalesce them
*/
- return (mptcp_data_avail(mptcp_sk(sk)) >= sk->sk_rcvlowat) ||
+ return (data_avail >= sk->sk_rcvlowat) ||
(mem_cgroup_sockets_enabled && sk->sk_memcg &&
mem_cgroup_under_socket_pressure(sk->sk_memcg)) ||
READ_ONCE(tcp_memory_pressure);
@@ -1181,13 +1194,22 @@ static inline bool mptcp_check_fallback(const struct sock *sk)
return __mptcp_check_fallback(msk);
}
-static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
+static inline bool __mptcp_try_fallback(struct mptcp_sock *msk)
{
if (__mptcp_check_fallback(msk)) {
pr_debug("TCP fallback already done (msk=%p)\n", msk);
- return;
+ return true;
}
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_infinite_fallback) {
+ spin_unlock_bh(&msk->fallback_lock);
+ return false;
+ }
+
+ msk->allow_subflows = false;
set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
+ spin_unlock_bh(&msk->fallback_lock);
+ return true;
}
static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk)
@@ -1199,14 +1221,15 @@ static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk)
TCPF_SYN_RECV | TCPF_LISTEN));
}
-static inline void mptcp_do_fallback(struct sock *ssk)
+static inline bool mptcp_try_fallback(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = subflow->conn;
struct mptcp_sock *msk;
msk = mptcp_sk(sk);
- __mptcp_do_fallback(msk);
+ if (!__mptcp_try_fallback(msk))
+ return false;
if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) {
gfp_t saved_allocation = ssk->sk_allocation;
@@ -1218,6 +1241,7 @@ static inline void mptcp_do_fallback(struct sock *ssk)
tcp_shutdown(ssk, SEND_SHUTDOWN);
ssk->sk_allocation = saved_allocation;
}
+ return true;
}
#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a)
@@ -1227,7 +1251,7 @@ static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
{
pr_fallback(msk);
subflow->request_mptcp = 0;
- __mptcp_do_fallback(msk);
+ WARN_ON_ONCE(!__mptcp_try_fallback(msk));
}
static inline bool mptcp_check_infinite_map(struct sk_buff *skb)
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 505445a9598f..3caa0a9d3b38 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -1419,6 +1419,12 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname,
switch (optname) {
case IP_TOS:
return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos));
+ case IP_FREEBIND:
+ return mptcp_put_int_option(msk, optval, optlen,
+ inet_test_bit(FREEBIND, sk));
+ case IP_TRANSPARENT:
+ return mptcp_put_int_option(msk, optval, optlen,
+ inet_test_bit(TRANSPARENT, sk));
case IP_BIND_ADDRESS_NO_PORT:
return mptcp_put_int_option(msk, optval, optlen,
inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
@@ -1430,6 +1436,26 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname,
return -EOPNOTSUPP;
}
+static int mptcp_getsockopt_v6(struct mptcp_sock *msk, int optname,
+ char __user *optval, int __user *optlen)
+{
+ struct sock *sk = (void *)msk;
+
+ switch (optname) {
+ case IPV6_V6ONLY:
+ return mptcp_put_int_option(msk, optval, optlen,
+ sk->sk_ipv6only);
+ case IPV6_TRANSPARENT:
+ return mptcp_put_int_option(msk, optval, optlen,
+ inet_test_bit(TRANSPARENT, sk));
+ case IPV6_FREEBIND:
+ return mptcp_put_int_option(msk, optval, optlen,
+ inet_test_bit(FREEBIND, sk));
+ }
+
+ return -EOPNOTSUPP;
+}
+
static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
char __user *optval, int __user *optlen)
{
@@ -1469,6 +1495,8 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname,
if (level == SOL_IP)
return mptcp_getsockopt_v4(msk, optname, optval, option);
+ if (level == SOL_IPV6)
+ return mptcp_getsockopt_v6(msk, optname, optval, option);
if (level == SOL_TCP)
return mptcp_getsockopt_sol_tcp(msk, optname, optval, option);
if (level == SOL_MPTCP)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 860903e06422..a05f201d194c 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -543,9 +543,11 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
mptcp_get_options(skb, &mp_opt);
if (subflow->request_mptcp) {
if (!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYNACK)) {
+ if (!mptcp_try_fallback(sk))
+ goto do_reset;
+
MPTCP_INC_STATS(sock_net(sk),
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
- mptcp_do_fallback(sk);
pr_fallback(msk);
goto fallback;
}
@@ -754,8 +756,6 @@ static bool subflow_hmac_valid(const struct request_sock *req,
subflow_req = mptcp_subflow_rsk(req);
msk = subflow_req->msk;
- if (!msk)
- return false;
subflow_generate_hmac(READ_ONCE(msk->remote_key),
READ_ONCE(msk->local_key),
@@ -853,12 +853,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
} else if (subflow_req->mp_join) {
mptcp_get_options(skb, &mp_opt);
- if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK) ||
- !subflow_hmac_valid(req, &mp_opt) ||
- !mptcp_can_accept_new_subflow(subflow_req->msk)) {
- SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
+ if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK))
fallback = true;
- }
}
create_child:
@@ -908,6 +904,17 @@ create_child:
goto dispose_child;
}
+ if (!subflow_hmac_valid(req, &mp_opt)) {
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
+ subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
+ goto dispose_child;
+ }
+
+ if (!mptcp_can_accept_new_subflow(owner)) {
+ subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
+ goto dispose_child;
+ }
+
/* move the msk reference ownership to the subflow */
subflow_req->msk = NULL;
ctx->conn = (struct sock *)owner;
@@ -1140,7 +1147,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
if (data_len == 0) {
pr_debug("infinite mapping received\n");
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
- subflow->map_data_len = 0;
return MAPPING_INVALID;
}
@@ -1284,32 +1290,29 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss
mptcp_schedule_work(sk);
}
-static bool subflow_can_fallback(struct mptcp_subflow_context *subflow)
-{
- struct mptcp_sock *msk = mptcp_sk(subflow->conn);
-
- if (subflow->mp_join)
- return false;
- else if (READ_ONCE(msk->csum_enabled))
- return !subflow->valid_csum_seen;
- else
- return READ_ONCE(msk->allow_infinite_fallback);
-}
-
-static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
+static bool mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
unsigned long fail_tout;
+ /* we are really failing, prevent any later subflow join */
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_infinite_fallback) {
+ spin_unlock_bh(&msk->fallback_lock);
+ return false;
+ }
+ msk->allow_subflows = false;
+ spin_unlock_bh(&msk->fallback_lock);
+
/* graceful failure can happen only on the MPC subflow */
if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first)))
- return;
+ return false;
/* since the close timeout take precedence on the fail one,
* no need to start the latter when the first is already set
*/
if (sock_flag((struct sock *)msk, SOCK_DEAD))
- return;
+ return true;
/* we don't need extreme accuracy here, use a zero fail_tout as special
* value meaning no fail timeout at all;
@@ -1321,6 +1324,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
tcp_send_ack(ssk);
mptcp_reset_tout_timer(msk, subflow->fail_tout);
+ return true;
}
static bool subflow_check_data_avail(struct sock *ssk)
@@ -1381,17 +1385,16 @@ fallback:
(subflow->mp_join || subflow->valid_csum_seen)) {
subflow->send_mp_fail = 1;
- if (!READ_ONCE(msk->allow_infinite_fallback)) {
+ if (!mptcp_subflow_fail(msk, ssk)) {
subflow->reset_transient = 0;
subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
goto reset;
}
- mptcp_subflow_fail(msk, ssk);
WRITE_ONCE(subflow->data_avail, true);
return true;
}
- if (!subflow_can_fallback(subflow) && subflow->map_data_len) {
+ if (!mptcp_try_fallback(ssk)) {
/* fatal protocol error, close the socket.
* subflow_error_report() will introduce the appropriate barriers
*/
@@ -1407,8 +1410,6 @@ reset:
WRITE_ONCE(subflow->data_avail, false);
return false;
}
-
- mptcp_do_fallback(ssk);
}
skb = skb_peek(&ssk->sk_receive_queue);
@@ -1673,7 +1674,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local,
/* discard the subflow socket */
mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf));
- WRITE_ONCE(msk->allow_infinite_fallback, false);
mptcp_stop_tout_timer(sk);
return 0;
@@ -1768,10 +1768,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
* needs it.
* Update ns_tracker to current stack trace and refcounted tracker.
*/
- __netns_tracker_free(net, &sf->sk->ns_tracker, false);
- sf->sk->sk_net_refcnt = 1;
- get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL);
- sock_inuse_add(net, 1);
+ sk_net_refcnt_upgrade(sf->sk);
err = tcp_set_ulp(sf->sk, "mptcp");
if (err)
goto err_free;
@@ -1853,7 +1850,7 @@ static void subflow_state_change(struct sock *sk)
msk = mptcp_sk(parent);
if (subflow_simultaneous_connect(sk)) {
- mptcp_do_fallback(sk);
+ WARN_ON_ONCE(!mptcp_try_fallback(sk));
pr_fallback(msk);
subflow->conn_finished = 1;
mptcp_propagate_state(parent, sk, subflow, NULL);