summaryrefslogtreecommitdiff
path: root/net/mptcp
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/ctrl.c52
-rw-r--r--net/mptcp/mib.c1
-rw-r--r--net/mptcp/mib.h1
-rw-r--r--net/mptcp/mptcp_diag.c1
-rw-r--r--net/mptcp/options.c154
-rw-r--r--net/mptcp/pm_netlink.c15
-rw-r--r--net/mptcp/protocol.c57
-rw-r--r--net/mptcp/protocol.h25
-rw-r--r--net/mptcp/sockopt.c149
-rw-r--r--net/mptcp/subflow.c128
-rw-r--r--net/mptcp/token.c9
11 files changed, 443 insertions, 149 deletions
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 96ba616f59bf..6c2639bb9c19 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -4,7 +4,9 @@
* Copyright (c) 2019, Tessares SA.
*/
+#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
+#endif
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -15,10 +17,13 @@
static int mptcp_pernet_id;
struct mptcp_pernet {
+#ifdef CONFIG_SYSCTL
struct ctl_table_header *ctl_table_hdr;
+#endif
- int mptcp_enabled;
+ u8 mptcp_enabled;
unsigned int add_addr_timeout;
+ u8 checksum_enabled;
};
static struct mptcp_pernet *mptcp_get_pernet(struct net *net)
@@ -36,15 +41,30 @@ unsigned int mptcp_get_add_addr_timeout(struct net *net)
return mptcp_get_pernet(net)->add_addr_timeout;
}
+int mptcp_is_checksum_enabled(struct net *net)
+{
+ return mptcp_get_pernet(net)->checksum_enabled;
+}
+
+static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
+{
+ pernet->mptcp_enabled = 1;
+ pernet->add_addr_timeout = TCP_RTO_MAX;
+ pernet->checksum_enabled = 0;
+}
+
+#ifdef CONFIG_SYSCTL
static struct ctl_table mptcp_sysctl_table[] = {
{
.procname = "enabled",
- .maxlen = sizeof(int),
+ .maxlen = sizeof(u8),
.mode = 0644,
/* users with CAP_NET_ADMIN or root (not and) can change this
* value, same as other sysctl or the 'net' tree.
*/
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
},
{
.procname = "add_addr_timeout",
@@ -52,15 +72,17 @@ static struct ctl_table mptcp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "checksum_enabled",
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
+ },
{}
};
-static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
-{
- pernet->mptcp_enabled = 1;
- pernet->add_addr_timeout = TCP_RTO_MAX;
-}
-
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
{
struct ctl_table_header *hdr;
@@ -75,6 +97,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[0].data = &pernet->mptcp_enabled;
table[1].data = &pernet->add_addr_timeout;
+ table[2].data = &pernet->checksum_enabled;
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
if (!hdr)
@@ -100,6 +123,17 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet)
kfree(table);
}
+#else
+
+static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
+{
+ return 0;
+}
+
+static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
+
+#endif /* CONFIG_SYSCTL */
+
static int __net_init mptcp_net_init(struct net *net)
{
struct mptcp_pernet *pernet = mptcp_get_pernet(net);
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index eb2dc6dbe212..e7e60bc1fb96 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -25,6 +25,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
+ SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR),
SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL),
SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE),
SNMP_MIB_ITEM("OFOMerge", MPTCP_MIB_OFOMERGE),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index f0da4f060fe1..92e56c0cfbdd 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -18,6 +18,7 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
+ MPTCP_MIB_DATACSUMERR, /* The data checksum fail */
MPTCP_MIB_OFOQUEUETAIL, /* Segments inserted into OoO queue tail */
MPTCP_MIB_OFOQUEUE, /* Segments inserted into OoO queue */
MPTCP_MIB_OFOMERGE, /* Segments merged in OoO queue */
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index f16d9b5ee978..8f88ddeab6a2 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -144,6 +144,7 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
info->mptcpi_write_seq = READ_ONCE(msk->write_seq);
info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
+ info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
unlock_sock_fast(sk, slow);
}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 9b263f27ce9b..25189595ed1d 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -44,7 +44,20 @@ static void mptcp_parse_option(const struct sk_buff *skb,
else
expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
}
- if (opsize != expected_opsize)
+
+ /* Cfr RFC 8684 Section 3.3.0:
+ * If a checksum is present but its use had
+ * not been negotiated in the MP_CAPABLE handshake, the receiver MUST
+ * close the subflow with a RST, as it is not behaving as negotiated.
+ * If a checksum is not present when its use has been negotiated, the
+ * receiver MUST close the subflow with a RST, as it is considered
+ * broken
+ * We parse even option with mismatching csum presence, so that
+ * later in subflow_data_ready we can trigger the reset.
+ */
+ if (opsize != expected_opsize &&
+ (expected_opsize != TCPOLEN_MPTCP_MPC_ACK_DATA ||
+ opsize != TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM))
break;
/* try to be gentle vs future versions on the initial syn */
@@ -66,16 +79,9 @@ static void mptcp_parse_option(const struct sk_buff *skb,
* host requires the use of checksums, checksums MUST be used.
* In other words, the only way for checksums not to be used
* is if both hosts in their SYNs set A=0."
- *
- * Section 3.3.0:
- * "If a checksum is not present when its use has been
- * negotiated, the receiver MUST close the subflow with a RST as
- * it is considered broken."
- *
- * We don't implement DSS checksum - fall back to TCP.
*/
if (flags & MPTCP_CAP_CHECKSUM_REQD)
- break;
+ mp_opt->csum_reqd = 1;
mp_opt->mp_capable = 1;
if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
@@ -86,7 +92,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->rcvr_key = get_unaligned_be64(ptr);
ptr += 8;
}
- if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) {
+ if (opsize >= TCPOLEN_MPTCP_MPC_ACK_DATA) {
/* Section 3.1.:
* "the data parameters in a MP_CAPABLE are semantically
* equivalent to those in a DSS option and can be used
@@ -98,9 +104,14 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->data_len = get_unaligned_be16(ptr);
ptr += 2;
}
- pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d",
+ if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) {
+ mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+ mp_opt->csum_reqd = 1;
+ ptr += 2;
+ }
+ pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u",
version, flags, opsize, mp_opt->sndr_key,
- mp_opt->rcvr_key, mp_opt->data_len);
+ mp_opt->rcvr_key, mp_opt->data_len, mp_opt->csum);
break;
case MPTCPOPT_MP_JOIN:
@@ -171,10 +182,8 @@ static void mptcp_parse_option(const struct sk_buff *skb,
expected_opsize += TCPOLEN_MPTCP_DSS_MAP32;
}
- /* RFC 6824, Section 3.3:
- * If a checksum is present, but its use had
- * not been negotiated in the MP_CAPABLE handshake,
- * the checksum field MUST be ignored.
+ /* Always parse any csum presence combination, we will enforce
+ * RFC 8684 Section 3.3.0 checks later in subflow_data_ready
*/
if (opsize != expected_opsize &&
opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM)
@@ -209,9 +218,15 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->data_len = get_unaligned_be16(ptr);
ptr += 2;
- pr_debug("data_seq=%llu subflow_seq=%u data_len=%u",
+ if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) {
+ mp_opt->csum_reqd = 1;
+ mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+ ptr += 2;
+ }
+
+ pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
mp_opt->data_seq, mp_opt->subflow_seq,
- mp_opt->data_len);
+ mp_opt->data_len, mp_opt->csum_reqd, mp_opt->csum);
}
break;
@@ -323,9 +338,12 @@ static void mptcp_parse_option(const struct sk_buff *skb,
}
}
-void mptcp_get_options(const struct sk_buff *skb,
+void mptcp_get_options(const struct sock *sk,
+ const struct sk_buff *skb,
struct mptcp_options_received *mp_opt)
{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
const struct tcphdr *th = tcp_hdr(skb);
const unsigned char *ptr;
int length;
@@ -341,6 +359,7 @@ void mptcp_get_options(const struct sk_buff *skb,
mp_opt->dss = 0;
mp_opt->mp_prio = 0;
mp_opt->reset = 0;
+ mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled);
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
@@ -382,6 +401,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
if (subflow->request_mptcp) {
opts->suboptions = OPTION_MPTCP_MPC_SYN;
+ opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk));
*size = TCPOLEN_MPTCP_MPC_SYN;
return true;
} else if (subflow->request_join) {
@@ -437,8 +457,10 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
struct mptcp_ext *mpext;
unsigned int data_len;
+ u8 len;
/* When skb is not available, we better over-estimate the emitted
* options len. A full DSS option (28 bytes) is longer than
@@ -467,16 +489,26 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
opts->suboptions = OPTION_MPTCP_MPC_ACK;
opts->sndr_key = subflow->local_key;
opts->rcvr_key = subflow->remote_key;
+ opts->csum_reqd = READ_ONCE(msk->csum_enabled);
/* Section 3.1.
* The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
* packets that start the first subflow of an MPTCP connection,
* as well as the first packet that carries data
*/
- if (data_len > 0)
- *size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4);
- else
+ if (data_len > 0) {
+ len = TCPOLEN_MPTCP_MPC_ACK_DATA;
+ if (opts->csum_reqd) {
+ /* we need to propagate more info to csum the pseudo hdr */
+ opts->ext_copy.data_seq = mpext->data_seq;
+ opts->ext_copy.subflow_seq = mpext->subflow_seq;
+ opts->ext_copy.csum = mpext->csum;
+ len += TCPOLEN_MPTCP_DSS_CHECKSUM;
+ }
+ *size = ALIGN(len, 4);
+ } else {
*size = TCPOLEN_MPTCP_MPC_ACK;
+ }
pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
subflow, subflow->local_key, subflow->remote_key,
@@ -537,18 +569,21 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
bool ret = false;
u64 ack_seq;
+ opts->csum_reqd = READ_ONCE(msk->csum_enabled);
mpext = skb ? mptcp_get_ext(skb) : NULL;
if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
- unsigned int map_size;
+ unsigned int map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
- map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
+ if (mpext) {
+ if (opts->csum_reqd)
+ map_size += TCPOLEN_MPTCP_DSS_CHECKSUM;
- remaining -= map_size;
- dss_size = map_size;
- if (mpext)
opts->ext_copy = *mpext;
+ }
+ remaining -= map_size;
+ dss_size = map_size;
if (skb && snd_data_fin_enable)
mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
ret = true;
@@ -791,6 +826,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
if (subflow_req->mp_capable) {
opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
opts->sndr_key = subflow_req->local_key;
+ opts->csum_reqd = subflow_req->csum_reqd;
*size = TCPOLEN_MPTCP_MPC_SYNACK;
pr_debug("subflow_req=%p, local_key=%llu",
subflow_req, subflow_req->local_key);
@@ -1009,7 +1045,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
return;
}
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk, skb, &mp_opt);
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
return;
@@ -1101,6 +1137,10 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
}
mpext->data_len = mp_opt.data_len;
mpext->use_map = 1;
+ mpext->csum_reqd = mp_opt.csum_reqd;
+
+ if (mpext->csum_reqd)
+ mpext->csum = mp_opt.csum;
}
}
@@ -1120,25 +1160,50 @@ static void mptcp_set_rwin(const struct tcp_sock *tp)
WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
}
+static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
+{
+ struct csum_pseudo_header header;
+ __wsum csum;
+
+ /* cfr RFC 8684 3.3.1.:
+ * the data sequence number used in the pseudo-header is
+ * always the 64-bit value, irrespective of what length is used in the
+ * DSS option itself.
+ */
+ header.data_seq = cpu_to_be64(mpext->data_seq);
+ header.subflow_seq = htonl(mpext->subflow_seq);
+ header.data_len = htons(mpext->data_len);
+ header.csum = 0;
+
+ csum = csum_partial(&header, sizeof(header), ~csum_unfold(mpext->csum));
+ return (__force u16)csum_fold(csum);
+}
+
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
struct mptcp_out_options *opts)
{
if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
- u8 len;
+ u8 len, flag = MPTCP_CAP_HMAC_SHA256;
- if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
+ if (OPTION_MPTCP_MPC_SYN & opts->suboptions) {
len = TCPOLEN_MPTCP_MPC_SYN;
- else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
+ } else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) {
len = TCPOLEN_MPTCP_MPC_SYNACK;
- else if (opts->ext_copy.data_len)
+ } else if (opts->ext_copy.data_len) {
len = TCPOLEN_MPTCP_MPC_ACK_DATA;
- else
+ if (opts->csum_reqd)
+ len += TCPOLEN_MPTCP_DSS_CHECKSUM;
+ } else {
len = TCPOLEN_MPTCP_MPC_ACK;
+ }
+
+ if (opts->csum_reqd)
+ flag |= MPTCP_CAP_CHECKSUM_REQD;
*ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len,
MPTCP_SUPPORTED_VERSION,
- MPTCP_CAP_HMAC_SHA256);
+ flag);
if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) &
opts->suboptions))
@@ -1154,8 +1219,13 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
if (!opts->ext_copy.data_len)
goto mp_capable_done;
- put_unaligned_be32(opts->ext_copy.data_len << 16 |
- TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ if (opts->csum_reqd) {
+ put_unaligned_be32(opts->ext_copy.data_len << 16 |
+ mptcp_make_csum(&opts->ext_copy), ptr);
+ } else {
+ put_unaligned_be32(opts->ext_copy.data_len << 16 |
+ TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ }
ptr += 1;
}
@@ -1307,6 +1377,9 @@ mp_capable_done:
flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
if (mpext->data_fin)
flags |= MPTCP_DSS_DATA_FIN;
+
+ if (opts->csum_reqd)
+ len += TCPOLEN_MPTCP_DSS_CHECKSUM;
}
*ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
@@ -1326,8 +1399,13 @@ mp_capable_done:
ptr += 2;
put_unaligned_be32(mpext->subflow_seq, ptr);
ptr += 1;
- put_unaligned_be32(mpext->data_len << 16 |
- TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ if (opts->csum_reqd) {
+ put_unaligned_be32(mpext->data_len << 16 |
+ mptcp_make_csum(mpext), ptr);
+ } else {
+ put_unaligned_be32(mpext->data_len << 16 |
+ TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ }
}
}
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 2469e06a3a9d..09722598994d 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -971,8 +971,14 @@ skip_family:
if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
- if (tb[MPTCP_PM_ADDR_ATTR_PORT])
+ if (tb[MPTCP_PM_ADDR_ATTR_PORT]) {
+ if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+ NL_SET_ERR_MSG_ATTR(info->extack, attr,
+ "flags must have signal when using port");
+ return -EINVAL;
+ }
entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
+ }
return 0;
}
@@ -1913,10 +1919,13 @@ static int __net_init pm_nl_init_net(struct net *net)
struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id);
INIT_LIST_HEAD_RCU(&pernet->local_addr_list);
- __reset_counters(pernet);
pernet->next_id = 1;
- bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1);
spin_lock_init(&pernet->lock);
+
+ /* No need to initialize other pernet fields, the struct is zeroed at
+ * allocation time.
+ */
+
return 0;
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 632350018fb6..b5f2f504b85b 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -39,10 +39,15 @@ struct mptcp_skb_cb {
u64 map_seq;
u64 end_seq;
u32 offset;
+ u8 has_rxtstamp:1;
};
#define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0]))
+enum {
+ MPTCP_CMSG_TS = BIT(0),
+};
+
static struct percpu_counter mptcp_sockets_allocated;
static void __mptcp_destroy_sock(struct sock *sk);
@@ -272,6 +277,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = (struct sock *)msk;
struct sk_buff *tail;
+ bool has_rxtstamp;
__skb_unlink(skb, &ssk->sk_receive_queue);
@@ -289,6 +295,8 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
sk->sk_forward_alloc += amount;
}
+ has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
+
/* the skb map_seq accounts for the skb offset:
* mptcp_subflow_get_mapped_dsn() is based on the current tp->copied_seq
* value
@@ -296,6 +304,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
MPTCP_SKB_CB(skb)->map_seq = mptcp_subflow_get_mapped_dsn(subflow);
MPTCP_SKB_CB(skb)->end_seq = MPTCP_SKB_CB(skb)->map_seq + copy_len;
MPTCP_SKB_CB(skb)->offset = offset;
+ MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp;
if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) {
/* in sequence */
@@ -1298,6 +1307,18 @@ static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk)
return __mptcp_alloc_tx_skb(sk, ssk, sk->sk_allocation);
}
+/* note: this always recompute the csum on the whole skb, even
+ * if we just appended a single frag. More status info needed
+ */
+static void mptcp_update_data_checksum(struct sk_buff *skb, int added)
+{
+ struct mptcp_ext *mpext = mptcp_get_ext(skb);
+ __wsum csum = ~csum_unfold(mpext->csum);
+ int offset = skb->len - added;
+
+ mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset));
+}
+
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
struct mptcp_data_frag *dfrag,
struct mptcp_sendmsg_info *info)
@@ -1392,10 +1413,14 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
if (zero_window_probe) {
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
mpext->frozen = 1;
- ret = 0;
+ if (READ_ONCE(msk->csum_enabled))
+ mptcp_update_data_checksum(tail, ret);
tcp_push_pending_frames(ssk);
+ return 0;
}
out:
+ if (READ_ONCE(msk->csum_enabled))
+ mptcp_update_data_checksum(tail, ret);
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
return ret;
}
@@ -1770,7 +1795,9 @@ static void mptcp_wait_data(struct sock *sk, long *timeo)
static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
struct msghdr *msg,
- size_t len, int flags)
+ size_t len, int flags,
+ struct scm_timestamping_internal *tss,
+ int *cmsg_flags)
{
struct sk_buff *skb, *tmp;
int copied = 0;
@@ -1790,6 +1817,11 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
}
}
+ if (MPTCP_SKB_CB(skb)->has_rxtstamp) {
+ tcp_update_recv_tstamps(skb, tss);
+ *cmsg_flags |= MPTCP_CMSG_TS;
+ }
+
copied += count;
if (count < data_len) {
@@ -1980,7 +2012,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- int copied = 0;
+ struct scm_timestamping_internal tss;
+ int copied = 0, cmsg_flags = 0;
int target;
long timeo;
@@ -2002,7 +2035,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
while (copied < len) {
int bytes_read;
- bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags);
+ bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags);
if (unlikely(bytes_read < 0)) {
if (!copied)
copied = bytes_read;
@@ -2083,6 +2116,11 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
set_bit(MPTCP_DATA_READY, &msk->flags);
}
out_err:
+ if (cmsg_flags && copied >= 0) {
+ if (cmsg_flags & MPTCP_CMSG_TS)
+ tcp_recv_timestamp(msg, sk, &tss);
+ }
+
pr_debug("msk=%p data_ready=%d rx queue empty=%d copied=%d",
msk, test_bit(MPTCP_DATA_READY, &msk->flags),
skb_queue_empty_lockless(&sk->sk_receive_queue), copied);
@@ -2339,8 +2377,8 @@ static void __mptcp_retrans(struct sock *sk)
/* limit retransmission to the bytes already sent on some subflows */
info.sent = 0;
- info.limit = dfrag->already_sent;
- while (info.sent < dfrag->already_sent) {
+ info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent;
+ while (info.sent < info.limit) {
if (!mptcp_alloc_tx_skb(sk, ssk))
break;
@@ -2352,9 +2390,11 @@ static void __mptcp_retrans(struct sock *sk)
copied += ret;
info.sent += ret;
}
- if (copied)
+ if (copied) {
+ dfrag->already_sent = max(dfrag->already_sent, info.sent);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
+ }
mptcp_set_timeout(sk, ssk);
release_sock(ssk);
@@ -2433,6 +2473,7 @@ static int __mptcp_init_sock(struct sock *sk)
msk->ack_hint = NULL;
msk->first = NULL;
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
+ WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
mptcp_pm_data_init(msk);
@@ -2773,6 +2814,8 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->token = subflow_req->token;
msk->subflow = NULL;
WRITE_ONCE(msk->fully_established, false);
+ if (mp_opt->csum_reqd)
+ WRITE_ONCE(msk->csum_enabled, true);
msk->write_seq = subflow_req->idsn + 1;
msk->snd_nxt = msk->write_seq;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 385796f0ef19..160d716ebc2b 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -68,6 +68,8 @@
#define TCPOLEN_MPTCP_FASTCLOSE 12
#define TCPOLEN_MPTCP_RST 4
+#define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
+
/* MPTCP MP_JOIN flags */
#define MPTCPOPT_BACKUP BIT(0)
#define MPTCPOPT_HMAC_LEN 20
@@ -124,6 +126,7 @@ struct mptcp_options_received {
u64 data_seq;
u32 subflow_seq;
u16 data_len;
+ __sum16 csum;
u16 mp_capable : 1,
mp_join : 1,
fastclose : 1,
@@ -133,6 +136,7 @@ struct mptcp_options_received {
rm_addr : 1,
mp_prio : 1,
echo : 1,
+ csum_reqd : 1,
backup : 1;
u32 token;
u32 nonce;
@@ -234,6 +238,7 @@ struct mptcp_sock {
bool snd_data_fin_enable;
bool rcv_fastclose;
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
+ bool csum_enabled;
spinlock_t join_list_lock;
struct sock *ack_hint;
struct work_struct work;
@@ -335,11 +340,19 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
}
+struct csum_pseudo_header {
+ __be64 data_seq;
+ __be32 subflow_seq;
+ __be16 data_len;
+ __sum16 csum;
+};
+
struct mptcp_subflow_request_sock {
struct tcp_request_sock sk;
u16 mp_capable : 1,
mp_join : 1,
- backup : 1;
+ backup : 1,
+ csum_reqd : 1;
u8 local_id;
u8 remote_id;
u64 local_key;
@@ -386,6 +399,8 @@ struct mptcp_subflow_context {
u32 map_subflow_seq;
u32 ssn_offset;
u32 map_data_len;
+ __wsum map_data_csum;
+ u32 map_csum_len;
u32 request_mptcp : 1, /* send MP_CAPABLE */
request_join : 1, /* send MP_JOIN */
request_bkup : 1,
@@ -395,6 +410,8 @@ struct mptcp_subflow_context {
pm_notified : 1, /* PM hook called for established status */
conn_finished : 1,
map_valid : 1,
+ map_csum_reqd : 1,
+ map_data_fin : 1,
mpc_map : 1,
backup : 1,
send_mp_prio : 1,
@@ -524,6 +541,7 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su
int mptcp_is_enabled(struct net *net);
unsigned int mptcp_get_add_addr_timeout(struct net *net);
+int mptcp_is_checksum_enabled(struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt);
bool mptcp_subflow_data_available(struct sock *sk);
@@ -575,7 +593,8 @@ int __init mptcp_proto_v6_init(void);
struct sock *mptcp_sk_clone(const struct sock *sk,
const struct mptcp_options_received *mp_opt,
struct request_sock *req);
-void mptcp_get_options(const struct sk_buff *skb,
+void mptcp_get_options(const struct sock *sk,
+ const struct sk_buff *skb,
struct mptcp_options_received *mp_opt);
void mptcp_finish_connect(struct sock *sk);
@@ -626,6 +645,8 @@ static inline void mptcp_write_space(struct sock *sk)
void mptcp_destroy_common(struct mptcp_sock *msk);
+#define MPTCP_TOKEN_MAX_RETRIES 4
+
void __init mptcp_token_init(void);
static inline void mptcp_token_init_request(struct request_sock *req)
{
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index a79798189599..092d1f635d27 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -140,6 +140,43 @@ static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val)
mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val);
}
+static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val)
+{
+ sockptr_t optval = KERNEL_SOCKPTR(&val);
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ int ret;
+
+ ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
+ optval, sizeof(val));
+ if (ret)
+ return ret;
+
+ lock_sock(sk);
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ bool slow = lock_sock_fast(ssk);
+
+ switch (optname) {
+ case SO_TIMESTAMP_OLD:
+ case SO_TIMESTAMP_NEW:
+ case SO_TIMESTAMPNS_OLD:
+ case SO_TIMESTAMPNS_NEW:
+ sock_set_timestamp(sk, optname, !!val);
+ break;
+ case SO_TIMESTAMPING_NEW:
+ case SO_TIMESTAMPING_OLD:
+ sock_set_timestamping(sk, optname, val);
+ break;
+ }
+
+ unlock_sock_fast(ssk, slow);
+ }
+
+ release_sock(sk);
+ return 0;
+}
+
static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -164,6 +201,13 @@ static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
case SO_INCOMING_CPU:
mptcp_so_incoming_cpu(msk, val);
return 0;
+ case SO_TIMESTAMP_OLD:
+ case SO_TIMESTAMP_NEW:
+ case SO_TIMESTAMPNS_OLD:
+ case SO_TIMESTAMPNS_NEW:
+ case SO_TIMESTAMPING_OLD:
+ case SO_TIMESTAMPING_NEW:
+ return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val);
}
return -ENOPROTOOPT;
@@ -251,9 +295,23 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
case SO_MARK:
case SO_INCOMING_CPU:
case SO_DEBUG:
+ case SO_TIMESTAMP_OLD:
+ case SO_TIMESTAMP_NEW:
+ case SO_TIMESTAMPNS_OLD:
+ case SO_TIMESTAMPNS_NEW:
+ case SO_TIMESTAMPING_OLD:
+ case SO_TIMESTAMPING_NEW:
return mptcp_setsockopt_sol_socket_int(msk, optname, optval, optlen);
case SO_LINGER:
return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen);
+ case SO_RCVLOWAT:
+ case SO_RCVTIMEO_OLD:
+ case SO_RCVTIMEO_NEW:
+ case SO_BUSY_POLL:
+ case SO_PREFER_BUSY_POLL:
+ case SO_BUSY_POLL_BUDGET:
+ /* No need to copy: only relevant for msk */
+ return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
case SO_NO_CHECK:
case SO_DONTROUTE:
case SO_BROADCAST:
@@ -267,7 +325,24 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
return 0;
}
- return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
+ /* SO_OOBINLINE is not supported, let's avoid the related mess
+ * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF,
+ * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER,
+ * we must be careful with subflows
+ *
+ * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks
+ * explicitly the sk_protocol field
+ *
+ * SO_PEEK_OFF is unsupported, as it is for plain TCP
+ * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows
+ * SO_CNX_ADVICE is currently unsupported, could possibly be relevant,
+ * but likely needs careful design
+ *
+ * SO_ZEROCOPY is currently unsupported, TODO in sndmsg
+ * SO_TXTIME is currently unsupported
+ */
+
+ return -EOPNOTSUPP;
}
static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
@@ -299,72 +374,6 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
static bool mptcp_supported_sockopt(int level, int optname)
{
- if (level == SOL_SOCKET) {
- switch (optname) {
- case SO_DEBUG:
- case SO_REUSEPORT:
- case SO_REUSEADDR:
-
- /* the following ones need a better implementation,
- * but are quite common we want to preserve them
- */
- case SO_BINDTODEVICE:
- case SO_SNDBUF:
- case SO_SNDBUFFORCE:
- case SO_RCVBUF:
- case SO_RCVBUFFORCE:
- case SO_KEEPALIVE:
- case SO_PRIORITY:
- case SO_LINGER:
- case SO_TIMESTAMP_OLD:
- case SO_TIMESTAMP_NEW:
- case SO_TIMESTAMPNS_OLD:
- case SO_TIMESTAMPNS_NEW:
- case SO_TIMESTAMPING_OLD:
- case SO_TIMESTAMPING_NEW:
- case SO_RCVLOWAT:
- case SO_RCVTIMEO_OLD:
- case SO_RCVTIMEO_NEW:
- case SO_SNDTIMEO_OLD:
- case SO_SNDTIMEO_NEW:
- case SO_MARK:
- case SO_INCOMING_CPU:
- case SO_BINDTOIFINDEX:
- case SO_BUSY_POLL:
- case SO_PREFER_BUSY_POLL:
- case SO_BUSY_POLL_BUDGET:
-
- /* next ones are no-op for plain TCP */
- case SO_NO_CHECK:
- case SO_DONTROUTE:
- case SO_BROADCAST:
- case SO_BSDCOMPAT:
- case SO_PASSCRED:
- case SO_PASSSEC:
- case SO_RXQ_OVFL:
- case SO_WIFI_STATUS:
- case SO_NOFCS:
- case SO_SELECT_ERR_QUEUE:
- return true;
- }
-
- /* SO_OOBINLINE is not supported, let's avoid the related mess */
- /* SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF,
- * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER,
- * we must be careful with subflows
- */
- /* SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks
- * explicitly the sk_protocol field
- */
- /* SO_PEEK_OFF is unsupported, as it is for plain TCP */
- /* SO_MAX_PACING_RATE is unsupported, we must be careful with subflows */
- /* SO_CNX_ADVICE is currently unsupported, could possibly be relevant,
- * but likely needs careful design
- */
- /* SO_ZEROCOPY is currently unsupported, TODO in sndmsg */
- /* SO_TXTIME is currently unsupported */
- return false;
- }
if (level == SOL_IP) {
switch (optname) {
/* should work fine */
@@ -574,12 +583,12 @@ int mptcp_setsockopt(struct sock *sk, int level, int optname,
pr_debug("msk=%p", msk);
- if (!mptcp_supported_sockopt(level, optname))
- return -ENOPROTOOPT;
-
if (level == SOL_SOCKET)
return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
+ if (!mptcp_supported_sockopt(level, optname))
+ return -ENOPROTOOPT;
+
/* @@ the meaning of setsockopt() when the socket is connected and
* there are multiple subflows is not yet defined. It is up to the
* MPTCP-level socket to configure the subflows until the subflow
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index be1de4084196..8976ff586b87 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -108,6 +108,7 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis
subflow_req->mp_capable = 0;
subflow_req->mp_join = 0;
+ subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener));
subflow_req->msk = NULL;
mptcp_token_init_request(req);
}
@@ -150,7 +151,7 @@ static int subflow_check_req(struct request_sock *req,
return -EINVAL;
#endif
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk_listener, skb, &mp_opt);
if (mp_opt.mp_capable) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
@@ -162,7 +163,7 @@ static int subflow_check_req(struct request_sock *req,
}
if (mp_opt.mp_capable && listener->request_mptcp) {
- int err, retries = 4;
+ int err, retries = MPTCP_TOKEN_MAX_RETRIES;
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
again:
@@ -247,7 +248,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
int err;
subflow_init_req(req, sk_listener);
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk_listener, skb, &mp_opt);
if (mp_opt.mp_capable && mp_opt.mp_join)
return -EINVAL;
@@ -394,7 +395,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk, skb, &mp_opt);
if (subflow->request_mptcp) {
if (!mp_opt.mp_capable) {
MPTCP_INC_STATS(sock_net(sk),
@@ -404,6 +405,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
goto fallback;
}
+ if (mp_opt.csum_reqd)
+ WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true);
subflow->mp_capable = 1;
subflow->can_ack = 1;
subflow->remote_key = mp_opt.sndr_key;
@@ -430,15 +433,15 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
goto do_reset;
}
+ if (!mptcp_finish_join(sk))
+ goto do_reset;
+
subflow_generate_hmac(subflow->local_key, subflow->remote_key,
subflow->local_nonce,
subflow->remote_nonce,
hmac);
memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
- if (!mptcp_finish_join(sk))
- goto do_reset;
-
subflow->mp_join = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
@@ -638,7 +641,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* reordered MPC will cause fallback, but we don't have other
* options.
*/
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk, skb, &mp_opt);
if (!mp_opt.mp_capable) {
fallback = true;
goto create_child;
@@ -648,7 +651,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
if (!new_msk)
fallback = true;
} else if (subflow_req->mp_join) {
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk, skb, &mp_opt);
if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) ||
!mptcp_can_accept_new_subflow(subflow_req->msk)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
@@ -824,10 +827,92 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
return true;
}
+static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *skb,
+ bool csum_reqd)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct csum_pseudo_header header;
+ u32 offset, seq, delta;
+ __wsum csum;
+ int len;
+
+ if (!csum_reqd)
+ return MAPPING_OK;
+
+ /* mapping already validated on previous traversal */
+ if (subflow->map_csum_len == subflow->map_data_len)
+ return MAPPING_OK;
+
+ /* traverse the receive queue, ensuring it contains a full
+ * DSS mapping and accumulating the related csum.
+ * Preserve the accoumlate csum across multiple calls, to compute
+ * the csum only once
+ */
+ delta = subflow->map_data_len - subflow->map_csum_len;
+ for (;;) {
+ seq = tcp_sk(ssk)->copied_seq + subflow->map_csum_len;
+ offset = seq - TCP_SKB_CB(skb)->seq;
+
+ /* if the current skb has not been accounted yet, csum its contents
+ * up to the amount covered by the current DSS
+ */
+ if (offset < skb->len) {
+ __wsum csum;
+
+ len = min(skb->len - offset, delta);
+ csum = skb_checksum(skb, offset, len, 0);
+ subflow->map_data_csum = csum_block_add(subflow->map_data_csum, csum,
+ subflow->map_csum_len);
+
+ delta -= len;
+ subflow->map_csum_len += len;
+ }
+ if (delta == 0)
+ break;
+
+ if (skb_queue_is_last(&ssk->sk_receive_queue, skb)) {
+ /* if this subflow is closed, the partial mapping
+ * will be never completed; flush the pending skbs, so
+ * that subflow_sched_work_if_closed() can kick in
+ */
+ if (unlikely(ssk->sk_state == TCP_CLOSE))
+ while ((skb = skb_peek(&ssk->sk_receive_queue)))
+ sk_eat_skb(ssk, skb);
+
+ /* not enough data to validate the csum */
+ return MAPPING_EMPTY;
+ }
+
+ /* the DSS mapping for next skbs will be validated later,
+ * when a get_mapping_status call will process such skb
+ */
+ skb = skb->next;
+ }
+
+ /* note that 'map_data_len' accounts only for the carried data, does
+ * not include the eventual seq increment due to the data fin,
+ * while the pseudo header requires the original DSS data len,
+ * including that
+ */
+ header.data_seq = cpu_to_be64(subflow->map_seq);
+ header.subflow_seq = htonl(subflow->map_subflow_seq);
+ header.data_len = htons(subflow->map_data_len + subflow->map_data_fin);
+ header.csum = 0;
+
+ csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
+ if (unlikely(csum_fold(csum))) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
+ return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
+ }
+
+ return MAPPING_OK;
+}
+
static enum mapping_status get_mapping_status(struct sock *ssk,
struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ bool csum_reqd = READ_ONCE(msk->csum_enabled);
struct mptcp_ext *mpext;
struct sk_buff *skb;
u16 data_len;
@@ -920,9 +1005,10 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
/* Allow replacing only with an identical map */
if (subflow->map_seq == map_seq &&
subflow->map_subflow_seq == mpext->subflow_seq &&
- subflow->map_data_len == data_len) {
+ subflow->map_data_len == data_len &&
+ subflow->map_csum_reqd == mpext->csum_reqd) {
skb_ext_del(skb, SKB_EXT_MPTCP);
- return MAPPING_OK;
+ goto validate_csum;
}
/* If this skb data are fully covered by the current mapping,
@@ -934,17 +1020,27 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
}
/* will validate the next map after consuming the current one */
- return MAPPING_OK;
+ goto validate_csum;
}
subflow->map_seq = map_seq;
subflow->map_subflow_seq = mpext->subflow_seq;
subflow->map_data_len = data_len;
subflow->map_valid = 1;
+ subflow->map_data_fin = mpext->data_fin;
subflow->mpc_map = mpext->mpc_map;
- pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
+ subflow->map_csum_reqd = mpext->csum_reqd;
+ subflow->map_csum_len = 0;
+ subflow->map_data_csum = csum_unfold(mpext->csum);
+
+ /* Cfr RFC 8684 Section 3.3.0 */
+ if (unlikely(subflow->map_csum_reqd != csum_reqd))
+ return MAPPING_INVALID;
+
+ pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
subflow->map_seq, subflow->map_subflow_seq,
- subflow->map_data_len);
+ subflow->map_data_len, subflow->map_csum_reqd,
+ subflow->map_data_csum);
validate_seq:
/* we revalidate valid mapping on new skb, because we must ensure
@@ -954,7 +1050,9 @@ validate_seq:
return MAPPING_INVALID;
skb_ext_del(skb, SKB_EXT_MPTCP);
- return MAPPING_OK;
+
+validate_csum:
+ return validate_data_csum(ssk, skb, csum_reqd);
}
static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index 8f0270a780ce..a98e554b034f 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -33,7 +33,6 @@
#include <net/mptcp.h>
#include "protocol.h"
-#define TOKEN_MAX_RETRIES 4
#define TOKEN_MAX_CHAIN_LEN 4
struct token_bucket {
@@ -153,12 +152,9 @@ int mptcp_token_new_connect(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
- int retries = TOKEN_MAX_RETRIES;
+ int retries = MPTCP_TOKEN_MAX_RETRIES;
struct token_bucket *bucket;
- pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n",
- sk, subflow->local_key, subflow->token, subflow->idsn);
-
again:
mptcp_crypto_key_gen_sha(&subflow->local_key, &subflow->token,
&subflow->idsn);
@@ -172,6 +168,9 @@ again:
goto again;
}
+ pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n",
+ sk, subflow->local_key, subflow->token, subflow->idsn);
+
WRITE_ONCE(msk->token, subflow->token);
__sk_nulls_add_node_rcu((struct sock *)msk, &bucket->msk_chain);
bucket->chain_len++;