summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/net/mptcp.h3
-rw-r--r--include/trace/events/mptcp.h6
-rw-r--r--net/mptcp/mib.c1
-rw-r--r--net/mptcp/mib.h1
-rw-r--r--net/mptcp/options.c8
-rw-r--r--net/mptcp/pm.c6
-rw-r--r--net/mptcp/protocol.c21
-rw-r--r--net/mptcp/protocol.h13
-rw-r--r--net/mptcp/subflow.c57
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh36
10 files changed, 121 insertions, 31 deletions
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 0a3b0fb04a3b..8b1afd6f5cc4 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -35,7 +35,8 @@ struct mptcp_ext {
frozen:1,
reset_transient:1;
u8 reset_reason:4,
- csum_reqd:1;
+ csum_reqd:1,
+ infinite_map:1;
};
#define MPTCP_RM_IDS_MAX 8
diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h
index f8e28e686c65..563e48617374 100644
--- a/include/trace/events/mptcp.h
+++ b/include/trace/events/mptcp.h
@@ -84,6 +84,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
__field(u8, reset_transient)
__field(u8, reset_reason)
__field(u8, csum_reqd)
+ __field(u8, infinite_map)
),
TP_fast_assign(
@@ -102,9 +103,10 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
__entry->reset_transient = mpext->reset_transient;
__entry->reset_reason = mpext->reset_reason;
__entry->csum_reqd = mpext->csum_reqd;
+ __entry->infinite_map = mpext->infinite_map;
),
- TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u csum=%x use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u csum_reqd=%u",
+ TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u csum=%x use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u csum_reqd=%u infinite_map=%u",
__entry->data_ack, __entry->data_seq,
__entry->subflow_seq, __entry->data_len,
__entry->csum, __entry->use_map,
@@ -112,7 +114,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
__entry->use_ack, __entry->ack64,
__entry->mpc_map, __entry->frozen,
__entry->reset_transient, __entry->reset_reason,
- __entry->csum_reqd)
+ __entry->csum_reqd, __entry->infinite_map)
);
DEFINE_EVENT(mptcp_dump_mpext, mptcp_sendmsg_frag,
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index e55d3dfbee0c..d93a8c9996fd 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -24,6 +24,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
+ SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX),
SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH),
SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 00576179a619..529d07af9e14 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -17,6 +17,7 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
+ MPTCP_MIB_INFINITEMAPTX, /* Sent an infinite mapping */
MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
MPTCP_MIB_DSSTCPMISMATCH, /* DSS-mapping did not map with TCP's sequence numbers */
MPTCP_MIB_DATACSUMERR, /* The data checksum fail */
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 325383646f5c..88f4ebbd6515 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -825,7 +825,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
opts->suboptions = 0;
- if (unlikely(__mptcp_check_fallback(msk)))
+ if (unlikely(__mptcp_check_fallback(msk) && !mptcp_check_infinite_map(skb)))
return false;
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
@@ -1340,8 +1340,12 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
put_unaligned_be32(mpext->subflow_seq, ptr);
ptr += 1;
if (opts->csum_reqd) {
+ /* data_len == 0 is reserved for the infinite mapping,
+ * the checksum will also be set to 0.
+ */
put_unaligned_be32(mpext->data_len << 16 |
- mptcp_make_csum(mpext), ptr);
+ (mpext->data_len ? mptcp_make_csum(mpext) : 0),
+ ptr);
} else {
put_unaligned_be32(mpext->data_len << 16 |
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 8aa0cdb7ad46..5c36870d3420 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -285,7 +285,13 @@ void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup)
void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
pr_debug("fail_seq=%llu", fail_seq);
+
+ if (!mptcp_has_another_subflow(sk) && READ_ONCE(msk->allow_infinite_fallback))
+ subflow->send_infinite_map = 1;
}
/* path manager helpers */
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 0492aa9308c7..4581c570ef68 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1229,6 +1229,22 @@ static void mptcp_update_data_checksum(struct sk_buff *skb, int added)
mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset));
}
+static void mptcp_update_infinite_map(struct mptcp_sock *msk,
+ struct sock *ssk,
+ struct mptcp_ext *mpext)
+{
+ if (!mpext)
+ return;
+
+ mpext->infinite_map = 1;
+ mpext->data_len = 0;
+
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX);
+ mptcp_subflow_ctx(ssk)->send_infinite_map = 0;
+ pr_fallback(msk);
+ __mptcp_do_fallback(msk);
+}
+
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
struct mptcp_data_frag *dfrag,
struct mptcp_sendmsg_info *info)
@@ -1360,6 +1376,8 @@ alloc_skb:
out:
if (READ_ONCE(msk->csum_enabled))
mptcp_update_data_checksum(skb, copy);
+ if (mptcp_subflow_ctx(ssk)->send_infinite_map)
+ mptcp_update_infinite_map(msk, ssk, mpext);
trace_mptcp_sendmsg_frag(mpext);
mptcp_subflow_ctx(ssk)->rel_write_seq += copy;
return copy;
@@ -2465,6 +2483,7 @@ static void __mptcp_retrans(struct sock *sk)
dfrag->already_sent = max(dfrag->already_sent, info.sent);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
+ WRITE_ONCE(msk->allow_infinite_fallback, false);
}
release_sock(ssk);
@@ -2539,6 +2558,7 @@ static int __mptcp_init_sock(struct sock *sk)
msk->first = NULL;
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
+ WRITE_ONCE(msk->allow_infinite_fallback, true);
msk->recovery = false;
mptcp_pm_data_init(msk);
@@ -3275,6 +3295,7 @@ err_prohibited:
}
subflow->map_seq = READ_ONCE(msk->ack_seq);
+ WRITE_ONCE(msk->allow_infinite_fallback, false);
out:
mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index aca1fb56523f..61d600693ffd 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -263,6 +263,7 @@ struct mptcp_sock {
bool rcv_fastclose;
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
bool csum_enabled;
+ bool allow_infinite_fallback;
u8 recvmsg_inq:1,
cork:1,
nodelay:1;
@@ -440,6 +441,7 @@ struct mptcp_subflow_context {
send_mp_prio : 1,
send_mp_fail : 1,
send_fastclose : 1,
+ send_infinite_map : 1,
rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */
disposable : 1, /* ctx can be free at ulp release time */
@@ -876,6 +878,17 @@ static inline void mptcp_do_fallback(struct sock *sk)
#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a)
+static inline bool mptcp_check_infinite_map(struct sk_buff *skb)
+{
+ struct mptcp_ext *mpext;
+
+ mpext = skb ? mptcp_get_ext(skb) : NULL;
+ if (mpext && mpext->infinite_map)
+ return true;
+
+ return false;
+}
+
static inline bool subflow_simultaneous_connect(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index aba260f547da..30ffb00661bb 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1006,7 +1006,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
data_len = mpext->data_len;
if (data_len == 0) {
+ pr_debug("infinite mapping received");
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
+ subflow->map_data_len = 0;
return MAPPING_INVALID;
}
@@ -1203,35 +1205,39 @@ no_data:
return false;
fallback:
- /* RFC 8684 section 3.7. */
- if (subflow->send_mp_fail) {
- if (mptcp_has_another_subflow(ssk)) {
- while ((skb = skb_peek(&ssk->sk_receive_queue)))
- sk_eat_skb(ssk, skb);
+ if (!__mptcp_check_fallback(msk)) {
+ /* RFC 8684 section 3.7. */
+ if (subflow->send_mp_fail) {
+ if (mptcp_has_another_subflow(ssk) ||
+ !READ_ONCE(msk->allow_infinite_fallback)) {
+ ssk->sk_err = EBADMSG;
+ tcp_set_state(ssk, TCP_CLOSE);
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ while ((skb = skb_peek(&ssk->sk_receive_queue)))
+ sk_eat_skb(ssk, skb);
+ }
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ return true;
}
- ssk->sk_err = EBADMSG;
- tcp_set_state(ssk, TCP_CLOSE);
- subflow->reset_transient = 0;
- subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
- tcp_send_active_reset(ssk, GFP_ATOMIC);
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
- return true;
- }
- if (subflow->mp_join || subflow->fully_established) {
- /* fatal protocol error, close the socket.
- * subflow_error_report() will introduce the appropriate barriers
- */
- ssk->sk_err = EBADMSG;
- tcp_set_state(ssk, TCP_CLOSE);
- subflow->reset_transient = 0;
- subflow->reset_reason = MPTCP_RST_EMPTCP;
- tcp_send_active_reset(ssk, GFP_ATOMIC);
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
- return false;
+ if ((subflow->mp_join || subflow->fully_established) && subflow->map_data_len) {
+ /* fatal protocol error, close the socket.
+ * subflow_error_report() will introduce the appropriate barriers
+ */
+ ssk->sk_err = EBADMSG;
+ tcp_set_state(ssk, TCP_CLOSE);
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_RST_EMPTCP;
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ return false;
+ }
+
+ __mptcp_do_fallback(msk);
}
- __mptcp_do_fallback(msk);
skb = skb_peek(&ssk->sk_receive_queue);
subflow->map_valid = 1;
subflow->map_seq = READ_ONCE(msk->ack_seq);
@@ -1483,6 +1489,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
/* discard the subflow socket */
mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf));
+ WRITE_ONCE(msk->allow_infinite_fallback, false);
return err;
failed_unlink:
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 7314257d248a..9eb4d889a24a 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1106,6 +1106,38 @@ chk_rst_nr()
echo "$extra_msg"
}
+chk_infi_nr()
+{
+ local infi_tx=$1
+ local infi_rx=$2
+ local count
+ local dump_stats
+
+ printf "%-${nr_blank}s %s" " " "itx"
+ count=$(ip netns exec $ns2 nstat -as | grep InfiniteMapTx | awk '{print $2}')
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$infi_tx" ]; then
+ echo "[fail] got $count infinite map[s] TX expected $infi_tx"
+ fail_test
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+
+ echo -n " - infirx"
+ count=$(ip netns exec $ns1 nstat -as | grep InfiniteMapRx | awk '{print $2}')
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$infi_rx" ]; then
+ echo "[fail] got $count infinite map[s] RX expected $infi_rx"
+ fail_test
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+
+ [ "${dump_stats}" = 1 ] && dump_stats
+}
+
chk_join_nr()
{
local syn_nr=$1
@@ -1115,7 +1147,8 @@ chk_join_nr()
local csum_ns2=${5:-0}
local fail_nr=${6:-0}
local rst_nr=${7:-0}
- local corrupted_pkts=${8:-0}
+ local infi_nr=${8:-0}
+ local corrupted_pkts=${9:-0}
local count
local dump_stats
local with_cookie
@@ -1170,6 +1203,7 @@ chk_join_nr()
chk_csum_nr $csum_ns1 $csum_ns2
chk_fail_nr $fail_nr $fail_nr
chk_rst_nr $rst_nr $rst_nr
+ chk_infi_nr $infi_nr $infi_nr
fi
}