diff options
Diffstat (limited to 'net/mptcp/options.c')
-rw-r--r-- | net/mptcp/options.c | 198 |
1 files changed, 146 insertions, 52 deletions
diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 6b825fb3fa83..b5850afea343 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -44,7 +44,20 @@ static void mptcp_parse_option(const struct sk_buff *skb, else expected_opsize = TCPOLEN_MPTCP_MPC_SYN; } - if (opsize != expected_opsize) + + /* Cfr RFC 8684 Section 3.3.0: + * If a checksum is present but its use had + * not been negotiated in the MP_CAPABLE handshake, the receiver MUST + * close the subflow with a RST, as it is not behaving as negotiated. + * If a checksum is not present when its use has been negotiated, the + * receiver MUST close the subflow with a RST, as it is considered + * broken + * We parse even option with mismatching csum presence, so that + * later in subflow_data_ready we can trigger the reset. + */ + if (opsize != expected_opsize && + (expected_opsize != TCPOLEN_MPTCP_MPC_ACK_DATA || + opsize != TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM)) break; /* try to be gentle vs future versions on the initial syn */ @@ -66,16 +79,12 @@ static void mptcp_parse_option(const struct sk_buff *skb, * host requires the use of checksums, checksums MUST be used. * In other words, the only way for checksums not to be used * is if both hosts in their SYNs set A=0." - * - * Section 3.3.0: - * "If a checksum is not present when its use has been - * negotiated, the receiver MUST close the subflow with a RST as - * it is considered broken." - * - * We don't implement DSS checksum - fall back to TCP. */ if (flags & MPTCP_CAP_CHECKSUM_REQD) - break; + mp_opt->csum_reqd = 1; + + if (flags & MPTCP_CAP_DENY_JOIN_ID0) + mp_opt->deny_join_id0 = 1; mp_opt->mp_capable = 1; if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) { @@ -86,7 +95,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->rcvr_key = get_unaligned_be64(ptr); ptr += 8; } - if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) { + if (opsize >= TCPOLEN_MPTCP_MPC_ACK_DATA) { /* Section 3.1.: * "the data parameters in a MP_CAPABLE are semantically * equivalent to those in a DSS option and can be used @@ -98,9 +107,14 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2; } - pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d", + if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) { + mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr); + mp_opt->csum_reqd = 1; + ptr += 2; + } + pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u", version, flags, opsize, mp_opt->sndr_key, - mp_opt->rcvr_key, mp_opt->data_len); + mp_opt->rcvr_key, mp_opt->data_len, mp_opt->csum); break; case MPTCPOPT_MP_JOIN: @@ -171,10 +185,8 @@ static void mptcp_parse_option(const struct sk_buff *skb, expected_opsize += TCPOLEN_MPTCP_DSS_MAP32; } - /* RFC 6824, Section 3.3: - * If a checksum is present, but its use had - * not been negotiated in the MP_CAPABLE handshake, - * the checksum field MUST be ignored. + /* Always parse any csum presence combination, we will enforce + * RFC 8684 Section 3.3.0 checks later in subflow_data_ready */ if (opsize != expected_opsize && opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) @@ -209,9 +221,15 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2; - pr_debug("data_seq=%llu subflow_seq=%u data_len=%u", + if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) { + mp_opt->csum_reqd = 1; + mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr); + ptr += 2; + } + + pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", mp_opt->data_seq, mp_opt->subflow_seq, - mp_opt->data_len); + mp_opt->data_len, mp_opt->csum_reqd, mp_opt->csum); } break; @@ -323,9 +341,12 @@ static void mptcp_parse_option(const struct sk_buff *skb, } } -void mptcp_get_options(const struct sk_buff *skb, +void mptcp_get_options(const struct sock *sk, + const struct sk_buff *skb, struct mptcp_options_received *mp_opt) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); const struct tcphdr *th = tcp_hdr(skb); const unsigned char *ptr; int length; @@ -341,6 +362,8 @@ void mptcp_get_options(const struct sk_buff *skb, mp_opt->dss = 0; mp_opt->mp_prio = 0; mp_opt->reset = 0; + mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled); + mp_opt->deny_join_id0 = 0; length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); @@ -356,6 +379,8 @@ void mptcp_get_options(const struct sk_buff *skb, length--; continue; default: + if (length < 2) + return; opsize = *ptr++; if (opsize < 2) /* "silly options" */ return; @@ -380,6 +405,8 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, subflow->snd_isn = TCP_SKB_CB(skb)->end_seq; if (subflow->request_mptcp) { opts->suboptions = OPTION_MPTCP_MPC_SYN; + opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk)); + opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk)); *size = TCPOLEN_MPTCP_MPC_SYN; return true; } else if (subflow->request_join) { @@ -435,8 +462,10 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_ext *mpext; unsigned int data_len; + u8 len; /* When skb is not available, we better over-estimate the emitted * options len. A full DSS option (28 bytes) is longer than @@ -465,16 +494,27 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, opts->suboptions = OPTION_MPTCP_MPC_ACK; opts->sndr_key = subflow->local_key; opts->rcvr_key = subflow->remote_key; + opts->csum_reqd = READ_ONCE(msk->csum_enabled); + opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk)); /* Section 3.1. * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK * packets that start the first subflow of an MPTCP connection, * as well as the first packet that carries data */ - if (data_len > 0) - *size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4); - else + if (data_len > 0) { + len = TCPOLEN_MPTCP_MPC_ACK_DATA; + if (opts->csum_reqd) { + /* we need to propagate more info to csum the pseudo hdr */ + opts->ext_copy.data_seq = mpext->data_seq; + opts->ext_copy.subflow_seq = mpext->subflow_seq; + opts->ext_copy.csum = mpext->csum; + len += TCPOLEN_MPTCP_DSS_CHECKSUM; + } + *size = ALIGN(len, 4); + } else { *size = TCPOLEN_MPTCP_MPC_ACK; + } pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d", subflow, subflow->local_key, subflow->remote_key, @@ -535,18 +575,21 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, bool ret = false; u64 ack_seq; + opts->csum_reqd = READ_ONCE(msk->csum_enabled); mpext = skb ? mptcp_get_ext(skb) : NULL; if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) { - unsigned int map_size; + unsigned int map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; - map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; + if (mpext) { + if (opts->csum_reqd) + map_size += TCPOLEN_MPTCP_DSS_CHECKSUM; - remaining -= map_size; - dss_size = map_size; - if (mpext) opts->ext_copy = *mpext; + } + remaining -= map_size; + dss_size = map_size; if (skb && snd_data_fin_enable) mptcp_write_data_fin(subflow, skb, &opts->ext_copy); ret = true; @@ -789,6 +832,8 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, if (subflow_req->mp_capable) { opts->suboptions = OPTION_MPTCP_MPC_SYNACK; opts->sndr_key = subflow_req->local_key; + opts->csum_reqd = subflow_req->csum_reqd; + opts->allow_join_id0 = subflow_req->allow_join_id0; *size = TCPOLEN_MPTCP_MPC_SYNACK; pr_debug("subflow_req=%p, local_key=%llu", subflow_req, subflow_req->local_key); @@ -867,6 +912,9 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, return false; } + if (mp_opt->deny_join_id0) + WRITE_ONCE(msk->pm.remote_deny_join_id0, true); + if (unlikely(!READ_ONCE(msk->pm.server_side))) pr_warn_once("bogus mpc option on established client sk"); mptcp_subflow_fully_established(subflow, mp_opt); @@ -894,19 +942,20 @@ reset: return false; } -static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit) +u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq) { - u32 old_ack32, cur_ack32; - - if (use_64bit) - return cur_ack; - - old_ack32 = (u32)old_ack; - cur_ack32 = (u32)cur_ack; - cur_ack = (old_ack & GENMASK_ULL(63, 32)) + cur_ack32; - if (unlikely(before(cur_ack32, old_ack32))) - return cur_ack + (1LL << 32); - return cur_ack; + u32 old_seq32, cur_seq32; + + old_seq32 = (u32)old_seq; + cur_seq32 = (u32)cur_seq; + cur_seq = (old_seq & GENMASK_ULL(63, 32)) + cur_seq32; + if (unlikely(cur_seq32 < old_seq32 && before(old_seq32, cur_seq32))) + return cur_seq + (1LL << 32); + + /* reverse wrap could happen, too */ + if (unlikely(cur_seq32 > old_seq32 && after(old_seq32, cur_seq32))) + return cur_seq - (1LL << 32); + return cur_seq; } static void ack_update_msk(struct mptcp_sock *msk, @@ -924,7 +973,7 @@ static void ack_update_msk(struct mptcp_sock *msk, * more dangerous than missing an ack */ old_snd_una = msk->snd_una; - new_snd_una = expand_ack(old_snd_una, mp_opt->data_ack, mp_opt->ack64); + new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64); /* ACK for data not even sent yet? Ignore. */ if (after64(new_snd_una, snd_nxt)) @@ -961,7 +1010,7 @@ bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool us return false; WRITE_ONCE(msk->rcv_data_fin_seq, - expand_ack(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit)); + mptcp_expand_seq(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit)); WRITE_ONCE(msk->rcv_data_fin, 1); return true; @@ -1007,7 +1056,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) return; } - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) return; @@ -1099,6 +1148,10 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) } mpext->data_len = mp_opt.data_len; mpext->use_map = 1; + mpext->csum_reqd = mp_opt.csum_reqd; + + if (mpext->csum_reqd) + mpext->csum = mp_opt.csum; } } @@ -1118,25 +1171,53 @@ static void mptcp_set_rwin(const struct tcp_sock *tp) WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); } +static u16 mptcp_make_csum(const struct mptcp_ext *mpext) +{ + struct csum_pseudo_header header; + __wsum csum; + + /* cfr RFC 8684 3.3.1.: + * the data sequence number used in the pseudo-header is + * always the 64-bit value, irrespective of what length is used in the + * DSS option itself. + */ + header.data_seq = cpu_to_be64(mpext->data_seq); + header.subflow_seq = htonl(mpext->subflow_seq); + header.data_len = htons(mpext->data_len); + header.csum = 0; + + csum = csum_partial(&header, sizeof(header), ~csum_unfold(mpext->csum)); + return (__force u16)csum_fold(csum); +} + void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, struct mptcp_out_options *opts) { if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) & opts->suboptions) { - u8 len; + u8 len, flag = MPTCP_CAP_HMAC_SHA256; - if (OPTION_MPTCP_MPC_SYN & opts->suboptions) + if (OPTION_MPTCP_MPC_SYN & opts->suboptions) { len = TCPOLEN_MPTCP_MPC_SYN; - else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) + } else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) { len = TCPOLEN_MPTCP_MPC_SYNACK; - else if (opts->ext_copy.data_len) + } else if (opts->ext_copy.data_len) { len = TCPOLEN_MPTCP_MPC_ACK_DATA; - else + if (opts->csum_reqd) + len += TCPOLEN_MPTCP_DSS_CHECKSUM; + } else { len = TCPOLEN_MPTCP_MPC_ACK; + } + + if (opts->csum_reqd) + flag |= MPTCP_CAP_CHECKSUM_REQD; + + if (!opts->allow_join_id0) + flag |= MPTCP_CAP_DENY_JOIN_ID0; *ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len, MPTCP_SUPPORTED_VERSION, - MPTCP_CAP_HMAC_SHA256); + flag); if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) & opts->suboptions)) @@ -1152,8 +1233,13 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, if (!opts->ext_copy.data_len) goto mp_capable_done; - put_unaligned_be32(opts->ext_copy.data_len << 16 | - TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + if (opts->csum_reqd) { + put_unaligned_be32(opts->ext_copy.data_len << 16 | + mptcp_make_csum(&opts->ext_copy), ptr); + } else { + put_unaligned_be32(opts->ext_copy.data_len << 16 | + TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + } ptr += 1; } @@ -1305,6 +1391,9 @@ mp_capable_done: flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64; if (mpext->data_fin) flags |= MPTCP_DSS_DATA_FIN; + + if (opts->csum_reqd) + len += TCPOLEN_MPTCP_DSS_CHECKSUM; } *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags); @@ -1324,8 +1413,13 @@ mp_capable_done: ptr += 2; put_unaligned_be32(mpext->subflow_seq, ptr); ptr += 1; - put_unaligned_be32(mpext->data_len << 16 | - TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + if (opts->csum_reqd) { + put_unaligned_be32(mpext->data_len << 16 | + mptcp_make_csum(mpext), ptr); + } else { + put_unaligned_be32(mpext->data_len << 16 | + TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + } } } |