summaryrefslogtreecommitdiff
path: root/net/mptcp/options.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp/options.c')
-rw-r--r--net/mptcp/options.c196
1 files changed, 144 insertions, 52 deletions
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 9b263f27ce9b..b5850afea343 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -44,7 +44,20 @@ static void mptcp_parse_option(const struct sk_buff *skb,
else
expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
}
- if (opsize != expected_opsize)
+
+ /* Cfr RFC 8684 Section 3.3.0:
+ * If a checksum is present but its use had
+ * not been negotiated in the MP_CAPABLE handshake, the receiver MUST
+ * close the subflow with a RST, as it is not behaving as negotiated.
+ * If a checksum is not present when its use has been negotiated, the
+ * receiver MUST close the subflow with a RST, as it is considered
+ * broken
+ * We parse even option with mismatching csum presence, so that
+ * later in subflow_data_ready we can trigger the reset.
+ */
+ if (opsize != expected_opsize &&
+ (expected_opsize != TCPOLEN_MPTCP_MPC_ACK_DATA ||
+ opsize != TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM))
break;
/* try to be gentle vs future versions on the initial syn */
@@ -66,16 +79,12 @@ static void mptcp_parse_option(const struct sk_buff *skb,
* host requires the use of checksums, checksums MUST be used.
* In other words, the only way for checksums not to be used
* is if both hosts in their SYNs set A=0."
- *
- * Section 3.3.0:
- * "If a checksum is not present when its use has been
- * negotiated, the receiver MUST close the subflow with a RST as
- * it is considered broken."
- *
- * We don't implement DSS checksum - fall back to TCP.
*/
if (flags & MPTCP_CAP_CHECKSUM_REQD)
- break;
+ mp_opt->csum_reqd = 1;
+
+ if (flags & MPTCP_CAP_DENY_JOIN_ID0)
+ mp_opt->deny_join_id0 = 1;
mp_opt->mp_capable = 1;
if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
@@ -86,7 +95,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->rcvr_key = get_unaligned_be64(ptr);
ptr += 8;
}
- if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) {
+ if (opsize >= TCPOLEN_MPTCP_MPC_ACK_DATA) {
/* Section 3.1.:
* "the data parameters in a MP_CAPABLE are semantically
* equivalent to those in a DSS option and can be used
@@ -98,9 +107,14 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->data_len = get_unaligned_be16(ptr);
ptr += 2;
}
- pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d",
+ if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) {
+ mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+ mp_opt->csum_reqd = 1;
+ ptr += 2;
+ }
+ pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u",
version, flags, opsize, mp_opt->sndr_key,
- mp_opt->rcvr_key, mp_opt->data_len);
+ mp_opt->rcvr_key, mp_opt->data_len, mp_opt->csum);
break;
case MPTCPOPT_MP_JOIN:
@@ -171,10 +185,8 @@ static void mptcp_parse_option(const struct sk_buff *skb,
expected_opsize += TCPOLEN_MPTCP_DSS_MAP32;
}
- /* RFC 6824, Section 3.3:
- * If a checksum is present, but its use had
- * not been negotiated in the MP_CAPABLE handshake,
- * the checksum field MUST be ignored.
+ /* Always parse any csum presence combination, we will enforce
+ * RFC 8684 Section 3.3.0 checks later in subflow_data_ready
*/
if (opsize != expected_opsize &&
opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM)
@@ -209,9 +221,15 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->data_len = get_unaligned_be16(ptr);
ptr += 2;
- pr_debug("data_seq=%llu subflow_seq=%u data_len=%u",
+ if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) {
+ mp_opt->csum_reqd = 1;
+ mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+ ptr += 2;
+ }
+
+ pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
mp_opt->data_seq, mp_opt->subflow_seq,
- mp_opt->data_len);
+ mp_opt->data_len, mp_opt->csum_reqd, mp_opt->csum);
}
break;
@@ -323,9 +341,12 @@ static void mptcp_parse_option(const struct sk_buff *skb,
}
}
-void mptcp_get_options(const struct sk_buff *skb,
+void mptcp_get_options(const struct sock *sk,
+ const struct sk_buff *skb,
struct mptcp_options_received *mp_opt)
{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
const struct tcphdr *th = tcp_hdr(skb);
const unsigned char *ptr;
int length;
@@ -341,6 +362,8 @@ void mptcp_get_options(const struct sk_buff *skb,
mp_opt->dss = 0;
mp_opt->mp_prio = 0;
mp_opt->reset = 0;
+ mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled);
+ mp_opt->deny_join_id0 = 0;
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
@@ -382,6 +405,8 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
if (subflow->request_mptcp) {
opts->suboptions = OPTION_MPTCP_MPC_SYN;
+ opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk));
+ opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk));
*size = TCPOLEN_MPTCP_MPC_SYN;
return true;
} else if (subflow->request_join) {
@@ -437,8 +462,10 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
struct mptcp_ext *mpext;
unsigned int data_len;
+ u8 len;
/* When skb is not available, we better over-estimate the emitted
* options len. A full DSS option (28 bytes) is longer than
@@ -467,16 +494,27 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
opts->suboptions = OPTION_MPTCP_MPC_ACK;
opts->sndr_key = subflow->local_key;
opts->rcvr_key = subflow->remote_key;
+ opts->csum_reqd = READ_ONCE(msk->csum_enabled);
+ opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk));
/* Section 3.1.
* The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
* packets that start the first subflow of an MPTCP connection,
* as well as the first packet that carries data
*/
- if (data_len > 0)
- *size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4);
- else
+ if (data_len > 0) {
+ len = TCPOLEN_MPTCP_MPC_ACK_DATA;
+ if (opts->csum_reqd) {
+ /* we need to propagate more info to csum the pseudo hdr */
+ opts->ext_copy.data_seq = mpext->data_seq;
+ opts->ext_copy.subflow_seq = mpext->subflow_seq;
+ opts->ext_copy.csum = mpext->csum;
+ len += TCPOLEN_MPTCP_DSS_CHECKSUM;
+ }
+ *size = ALIGN(len, 4);
+ } else {
*size = TCPOLEN_MPTCP_MPC_ACK;
+ }
pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
subflow, subflow->local_key, subflow->remote_key,
@@ -537,18 +575,21 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
bool ret = false;
u64 ack_seq;
+ opts->csum_reqd = READ_ONCE(msk->csum_enabled);
mpext = skb ? mptcp_get_ext(skb) : NULL;
if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
- unsigned int map_size;
+ unsigned int map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
- map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
+ if (mpext) {
+ if (opts->csum_reqd)
+ map_size += TCPOLEN_MPTCP_DSS_CHECKSUM;
- remaining -= map_size;
- dss_size = map_size;
- if (mpext)
opts->ext_copy = *mpext;
+ }
+ remaining -= map_size;
+ dss_size = map_size;
if (skb && snd_data_fin_enable)
mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
ret = true;
@@ -791,6 +832,8 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
if (subflow_req->mp_capable) {
opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
opts->sndr_key = subflow_req->local_key;
+ opts->csum_reqd = subflow_req->csum_reqd;
+ opts->allow_join_id0 = subflow_req->allow_join_id0;
*size = TCPOLEN_MPTCP_MPC_SYNACK;
pr_debug("subflow_req=%p, local_key=%llu",
subflow_req, subflow_req->local_key);
@@ -869,6 +912,9 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
return false;
}
+ if (mp_opt->deny_join_id0)
+ WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
+
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
mptcp_subflow_fully_established(subflow, mp_opt);
@@ -896,19 +942,20 @@ reset:
return false;
}
-static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit)
+u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq)
{
- u32 old_ack32, cur_ack32;
-
- if (use_64bit)
- return cur_ack;
-
- old_ack32 = (u32)old_ack;
- cur_ack32 = (u32)cur_ack;
- cur_ack = (old_ack & GENMASK_ULL(63, 32)) + cur_ack32;
- if (unlikely(before(cur_ack32, old_ack32)))
- return cur_ack + (1LL << 32);
- return cur_ack;
+ u32 old_seq32, cur_seq32;
+
+ old_seq32 = (u32)old_seq;
+ cur_seq32 = (u32)cur_seq;
+ cur_seq = (old_seq & GENMASK_ULL(63, 32)) + cur_seq32;
+ if (unlikely(cur_seq32 < old_seq32 && before(old_seq32, cur_seq32)))
+ return cur_seq + (1LL << 32);
+
+ /* reverse wrap could happen, too */
+ if (unlikely(cur_seq32 > old_seq32 && after(old_seq32, cur_seq32)))
+ return cur_seq - (1LL << 32);
+ return cur_seq;
}
static void ack_update_msk(struct mptcp_sock *msk,
@@ -926,7 +973,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
* more dangerous than missing an ack
*/
old_snd_una = msk->snd_una;
- new_snd_una = expand_ack(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
+ new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
/* ACK for data not even sent yet? Ignore. */
if (after64(new_snd_una, snd_nxt))
@@ -963,7 +1010,7 @@ bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool us
return false;
WRITE_ONCE(msk->rcv_data_fin_seq,
- expand_ack(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit));
+ mptcp_expand_seq(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit));
WRITE_ONCE(msk->rcv_data_fin, 1);
return true;
@@ -1009,7 +1056,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
return;
}
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk, skb, &mp_opt);
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
return;
@@ -1101,6 +1148,10 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
}
mpext->data_len = mp_opt.data_len;
mpext->use_map = 1;
+ mpext->csum_reqd = mp_opt.csum_reqd;
+
+ if (mpext->csum_reqd)
+ mpext->csum = mp_opt.csum;
}
}
@@ -1120,25 +1171,53 @@ static void mptcp_set_rwin(const struct tcp_sock *tp)
WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
}
+static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
+{
+ struct csum_pseudo_header header;
+ __wsum csum;
+
+ /* cfr RFC 8684 3.3.1.:
+ * the data sequence number used in the pseudo-header is
+ * always the 64-bit value, irrespective of what length is used in the
+ * DSS option itself.
+ */
+ header.data_seq = cpu_to_be64(mpext->data_seq);
+ header.subflow_seq = htonl(mpext->subflow_seq);
+ header.data_len = htons(mpext->data_len);
+ header.csum = 0;
+
+ csum = csum_partial(&header, sizeof(header), ~csum_unfold(mpext->csum));
+ return (__force u16)csum_fold(csum);
+}
+
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
struct mptcp_out_options *opts)
{
if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
- u8 len;
+ u8 len, flag = MPTCP_CAP_HMAC_SHA256;
- if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
+ if (OPTION_MPTCP_MPC_SYN & opts->suboptions) {
len = TCPOLEN_MPTCP_MPC_SYN;
- else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
+ } else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) {
len = TCPOLEN_MPTCP_MPC_SYNACK;
- else if (opts->ext_copy.data_len)
+ } else if (opts->ext_copy.data_len) {
len = TCPOLEN_MPTCP_MPC_ACK_DATA;
- else
+ if (opts->csum_reqd)
+ len += TCPOLEN_MPTCP_DSS_CHECKSUM;
+ } else {
len = TCPOLEN_MPTCP_MPC_ACK;
+ }
+
+ if (opts->csum_reqd)
+ flag |= MPTCP_CAP_CHECKSUM_REQD;
+
+ if (!opts->allow_join_id0)
+ flag |= MPTCP_CAP_DENY_JOIN_ID0;
*ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len,
MPTCP_SUPPORTED_VERSION,
- MPTCP_CAP_HMAC_SHA256);
+ flag);
if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) &
opts->suboptions))
@@ -1154,8 +1233,13 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
if (!opts->ext_copy.data_len)
goto mp_capable_done;
- put_unaligned_be32(opts->ext_copy.data_len << 16 |
- TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ if (opts->csum_reqd) {
+ put_unaligned_be32(opts->ext_copy.data_len << 16 |
+ mptcp_make_csum(&opts->ext_copy), ptr);
+ } else {
+ put_unaligned_be32(opts->ext_copy.data_len << 16 |
+ TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ }
ptr += 1;
}
@@ -1307,6 +1391,9 @@ mp_capable_done:
flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
if (mpext->data_fin)
flags |= MPTCP_DSS_DATA_FIN;
+
+ if (opts->csum_reqd)
+ len += TCPOLEN_MPTCP_DSS_CHECKSUM;
}
*ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
@@ -1326,8 +1413,13 @@ mp_capable_done:
ptr += 2;
put_unaligned_be32(mpext->subflow_seq, ptr);
ptr += 1;
- put_unaligned_be32(mpext->data_len << 16 |
- TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ if (opts->csum_reqd) {
+ put_unaligned_be32(mpext->data_len << 16 |
+ mptcp_make_csum(mpext), ptr);
+ } else {
+ put_unaligned_be32(mpext->data_len << 16 |
+ TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ }
}
}