summaryrefslogtreecommitdiff
path: root/net/mptcp/options.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-16 00:22:29 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-16 00:22:29 +0300
commitd635a69dd4981cc51f90293f5f64268620ed1565 (patch)
tree5e0a758b402ea7d624c25c3a343545dd29e80f31 /net/mptcp/options.c
parentac73e3dc8acd0a3be292755db30388c3580f5674 (diff)
parentefd5a1584537698220578227e6467638307c2a0b (diff)
downloadlinux-d635a69dd4981cc51f90293f5f64268620ed1565.tar.xz
Merge tag 'net-next-5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core: - support "prefer busy polling" NAPI operation mode, where we defer softirq for some time expecting applications to periodically busy poll - AF_XDP: improve efficiency by more batching and hindering the adjacency cache prefetcher - af_packet: make packet_fanout.arr size configurable up to 64K - tcp: optimize TCP zero copy receive in presence of partial or unaligned reads making zero copy a performance win for much smaller messages - XDP: add bulk APIs for returning / freeing frames - sched: support fragmenting IP packets as they come out of conntrack - net: allow virtual netdevs to forward UDP L4 and fraglist GSO skbs BPF: - BPF switch from crude rlimit-based to memcg-based memory accounting - BPF type format information for kernel modules and related tracing enhancements - BPF implement task local storage for BPF LSM - allow the FENTRY/FEXIT/RAW_TP tracing programs to use bpf_sk_storage Protocols: - mptcp: improve multiple xmit streams support, memory accounting and many smaller improvements - TLS: support CHACHA20-POLY1305 cipher - seg6: add support for SRv6 End.DT4/DT6 behavior - sctp: Implement RFC 6951: UDP Encapsulation of SCTP - ppp_generic: add ability to bridge channels directly - bridge: Connectivity Fault Management (CFM) support as is defined in IEEE 802.1Q section 12.14. Drivers: - mlx5: make use of the new auxiliary bus to organize the driver internals - mlx5: more accurate port TX timestamping support - mlxsw: - improve the efficiency of offloaded next hop updates by using the new nexthop object API - support blackhole nexthops - support IEEE 802.1ad (Q-in-Q) bridging - rtw88: major bluetooth co-existance improvements - iwlwifi: support new 6 GHz frequency band - ath11k: Fast Initial Link Setup (FILS) - mt7915: dual band concurrent (DBDC) support - net: ipa: add basic support for IPA v4.5 Refactor: - a few pieces of in_interrupt() cleanup work from Sebastian Andrzej Siewior - phy: add support for shared interrupts; get rid of multiple driver APIs and have the drivers write a full IRQ handler, slight growth of driver code should be compensated by the simpler API which also allows shared IRQs - add common code for handling netdev per-cpu counters - move TX packet re-allocation from Ethernet switch tag drivers to a central place - improve efficiency and rename nla_strlcpy - number of W=1 warning cleanups as we now catch those in a patchwork build bot Old code removal: - wan: delete the DLCI / SDLA drivers - wimax: move to staging - wifi: remove old WDS wifi bridging support" * tag 'net-next-5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1922 commits) net: hns3: fix expression that is currently always true net: fix proc_fs init handling in af_packet and tls nfc: pn533: convert comma to semicolon af_vsock: Assign the vsock transport considering the vsock address flags af_vsock: Set VMADDR_FLAG_TO_HOST flag on the receive path vsock_addr: Check for supported flag values vm_sockets: Add VMADDR_FLAG_TO_HOST vsock flag vm_sockets: Add flags field in the vsock address data structure net: Disable NETIF_F_HW_TLS_TX when HW_CSUM is disabled tcp: Add logic to check for SYN w/ data in tcp_simple_retransmit net: mscc: ocelot: install MAC addresses in .ndo_set_rx_mode from process context nfc: s3fwrn5: Release the nfc firmware net: vxget: clean up sparse warnings mlxsw: spectrum_router: Use eXtended mezzanine to offload IPv4 router mlxsw: spectrum: Set KVH XLT cache mode for Spectrum2/3 mlxsw: spectrum_router_xm: Introduce basic XM cache flushing mlxsw: reg: Add Router LPM Cache Enable Register mlxsw: reg: Add Router LPM Cache ML Delete Register mlxsw: spectrum_router_xm: Implement L-value tracking for M-index mlxsw: reg: Add XM Router M Table Register ...
Diffstat (limited to 'net/mptcp/options.c')
-rw-r--r--net/mptcp/options.c218
1 files changed, 163 insertions, 55 deletions
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 90cd52df99a6..c5328f407aab 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -242,7 +242,6 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->add_addr = 1;
mp_opt->addr_id = *ptr++;
- pr_debug("ADD_ADDR: id=%d, echo=%d", mp_opt->addr_id, mp_opt->echo);
if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) {
memcpy((u8 *)&mp_opt->addr.s_addr, (u8 *)ptr, 4);
ptr += 4;
@@ -267,6 +266,9 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->ahmac = get_unaligned_be64(ptr);
ptr += 8;
}
+ pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d",
+ (mp_opt->family == MPTCP_ADDR_IPVERSION_6) ? "6" : "",
+ mp_opt->addr_id, mp_opt->ahmac, mp_opt->echo, mp_opt->port);
break;
case MPTCPOPT_RM_ADDR:
@@ -280,6 +282,16 @@ static void mptcp_parse_option(const struct sk_buff *skb,
pr_debug("RM_ADDR: id=%d", mp_opt->rm_id);
break;
+ case MPTCPOPT_MP_FASTCLOSE:
+ if (opsize != TCPOLEN_MPTCP_FASTCLOSE)
+ break;
+
+ ptr += 2;
+ mp_opt->rcvr_key = get_unaligned_be64(ptr);
+ ptr += 8;
+ mp_opt->fastclose = 1;
+ break;
+
default:
break;
}
@@ -297,6 +309,7 @@ void mptcp_get_options(const struct sk_buff *skb,
mp_opt->mp_join = 0;
mp_opt->add_addr = 0;
mp_opt->ahmac = 0;
+ mp_opt->fastclose = 0;
mp_opt->port = 0;
mp_opt->rm_addr = 0;
mp_opt->dss = 0;
@@ -492,7 +505,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
bool ret = false;
mpext = skb ? mptcp_get_ext(skb) : NULL;
- snd_data_fin_enable = READ_ONCE(msk->snd_data_fin_enable);
+ snd_data_fin_enable = mptcp_data_fin_enabled(msk);
if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
unsigned int map_size;
@@ -528,6 +541,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
opts->ext_copy.ack64 = 0;
}
opts->ext_copy.use_ack = 1;
+ WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk));
/* Add kind/length/subtype/flag overhead if mapping is not populated */
if (dss_size == 0)
@@ -573,27 +587,43 @@ static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id,
}
#endif
-static bool mptcp_established_options_add_addr(struct sock *sk,
+static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *skb,
unsigned int *size,
unsigned int remaining,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+ bool drop_other_suboptions = false;
+ unsigned int opt_size = *size;
struct mptcp_addr_info saddr;
bool echo;
+ bool port;
int len;
+ if ((mptcp_pm_should_add_signal_ipv6(msk) ||
+ mptcp_pm_should_add_signal_port(msk)) &&
+ skb && skb_is_tcp_pure_ack(skb)) {
+ pr_debug("drop other suboptions");
+ opts->suboptions = 0;
+ remaining += opt_size;
+ drop_other_suboptions = true;
+ }
+
if (!mptcp_pm_should_add_signal(msk) ||
- !(mptcp_pm_add_addr_signal(msk, remaining, &saddr, &echo)))
+ !(mptcp_pm_add_addr_signal(msk, remaining, &saddr, &echo, &port)))
return false;
- len = mptcp_add_addr_len(saddr.family, echo);
+ len = mptcp_add_addr_len(saddr.family, echo, port);
if (remaining < len)
return false;
*size = len;
+ if (drop_other_suboptions)
+ *size -= opt_size;
opts->addr_id = saddr.id;
+ if (port)
+ opts->port = ntohs(saddr.port);
if (saddr.family == AF_INET) {
opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
opts->addr = saddr.addr;
@@ -616,7 +646,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk,
}
}
#endif
- pr_debug("addr_id=%d, ahmac=%llu, echo=%d", opts->addr_id, opts->ahmac, echo);
+ pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d",
+ opts->addr_id, opts->ahmac, echo, opts->port);
return true;
}
@@ -678,7 +709,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
*size += opt_size;
remaining -= opt_size;
- if (mptcp_established_options_add_addr(sk, &opt_size, remaining, opts)) {
+ if (mptcp_established_options_add_addr(sk, skb, &opt_size, remaining, opts)) {
*size += opt_size;
remaining -= opt_size;
ret = true;
@@ -759,6 +790,11 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
goto fully_established;
}
+ if (mp_opt->add_addr) {
+ WRITE_ONCE(msk->fully_established, true);
+ return true;
+ }
+
/* If the first established packet does not contain MP_CAPABLE + data
* then fallback to TCP. Fallback scenarios requires a reset for
* MP_JOIN subflows.
@@ -777,7 +813,12 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
mptcp_subflow_fully_established(subflow, mp_opt);
fully_established:
- if (likely(subflow->pm_notified))
+ /* if the subflow is not already linked into the conn_list, we can't
+ * notify the PM: this subflow is still on the listener queue
+ * and the PM possibly acquiring the subflow lock could race with
+ * the listener close
+ */
+ if (likely(subflow->pm_notified) || list_empty(&subflow->node))
return true;
subflow->pm_notified = 1;
@@ -809,31 +850,39 @@ static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit)
return cur_ack;
}
-static void update_una(struct mptcp_sock *msk,
- struct mptcp_options_received *mp_opt)
+static void ack_update_msk(struct mptcp_sock *msk,
+ struct sock *ssk,
+ struct mptcp_options_received *mp_opt)
{
- u64 new_snd_una, snd_una, old_snd_una = atomic64_read(&msk->snd_una);
- u64 write_seq = READ_ONCE(msk->write_seq);
+ u64 new_wnd_end, new_snd_una, snd_nxt = READ_ONCE(msk->snd_nxt);
+ struct sock *sk = (struct sock *)msk;
+ u64 old_snd_una;
+
+ mptcp_data_lock(sk);
/* avoid ack expansion on update conflict, to reduce the risk of
* wrongly expanding to a future ack sequence number, which is way
* more dangerous than missing an ack
*/
+ old_snd_una = msk->snd_una;
new_snd_una = expand_ack(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
/* ACK for data not even sent yet? Ignore. */
- if (after64(new_snd_una, write_seq))
+ if (after64(new_snd_una, snd_nxt))
new_snd_una = old_snd_una;
- while (after64(new_snd_una, old_snd_una)) {
- snd_una = old_snd_una;
- old_snd_una = atomic64_cmpxchg(&msk->snd_una, snd_una,
- new_snd_una);
- if (old_snd_una == snd_una) {
- mptcp_data_acked((struct sock *)msk);
- break;
- }
+ new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
+
+ if (after64(new_wnd_end, msk->wnd_end)) {
+ msk->wnd_end = new_wnd_end;
+ __mptcp_wnd_updated(sk, ssk);
+ }
+
+ if (after64(new_snd_una, old_snd_una)) {
+ msk->snd_una = new_snd_una;
+ __mptcp_data_acked(sk);
}
+ mptcp_data_unlock(sk);
}
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit)
@@ -886,13 +935,30 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
struct mptcp_options_received mp_opt;
struct mptcp_ext *mpext;
- if (__mptcp_check_fallback(msk))
+ if (__mptcp_check_fallback(msk)) {
+ /* Keep it simple and unconditionally trigger send data cleanup and
+ * pending queue spooling. We will need to acquire the data lock
+ * for more accurate checks, and once the lock is acquired, such
+ * helpers are cheap.
+ */
+ mptcp_data_lock(subflow->conn);
+ if (mptcp_send_head(subflow->conn))
+ __mptcp_wnd_updated(subflow->conn, sk);
+ __mptcp_data_acked(subflow->conn);
+ mptcp_data_unlock(subflow->conn);
return;
+ }
mptcp_get_options(skb, &mp_opt);
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
return;
+ if (mp_opt.fastclose &&
+ msk->local_key == mp_opt.rcvr_key) {
+ WRITE_ONCE(msk->rcv_fastclose, true);
+ mptcp_schedule_work((struct sock *)msk);
+ }
+
if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
struct mptcp_addr_info addr;
@@ -930,7 +996,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
* monodirectional flows will stuck
*/
if (mp_opt.use_ack)
- update_una(msk, &mp_opt);
+ ack_update_msk(msk, sk, &mp_opt);
/* Zero-data-length packets are dropped by the caller and not
* propagated to the MPTCP layer, so the skb extension does not
@@ -975,7 +1041,24 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
}
}
-void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
+static void mptcp_set_rwin(const struct tcp_sock *tp)
+{
+ const struct sock *ssk = (const struct sock *)tp;
+ const struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk;
+ u64 ack_seq;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ msk = mptcp_sk(subflow->conn);
+
+ ack_seq = READ_ONCE(msk->ack_seq) + tp->rcv_wnd;
+
+ if (after64(ack_seq, READ_ONCE(msk->rcv_wnd_sent)))
+ WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
+}
+
+void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
+ struct mptcp_out_options *opts)
{
if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
@@ -1014,44 +1097,66 @@ void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
}
mp_capable_done:
- if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
- if (opts->ahmac)
- *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
- TCPOLEN_MPTCP_ADD_ADDR, 0,
- opts->addr_id);
- else
- *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
- TCPOLEN_MPTCP_ADD_ADDR_BASE,
- MPTCP_ADDR_ECHO,
- opts->addr_id);
- memcpy((u8 *)ptr, (u8 *)&opts->addr.s_addr, 4);
- ptr += 1;
+ if ((OPTION_MPTCP_ADD_ADDR
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ | OPTION_MPTCP_ADD_ADDR6
+#endif
+ ) & opts->suboptions) {
+ u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
+ u8 echo = MPTCP_ADDR_ECHO;
+
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions)
+ len = TCPOLEN_MPTCP_ADD_ADDR6_BASE;
+#endif
+
+ if (opts->port)
+ len += TCPOLEN_MPTCP_PORT_LEN;
+
if (opts->ahmac) {
- put_unaligned_be64(opts->ahmac, ptr);
- ptr += 2;
+ len += sizeof(opts->ahmac);
+ echo = 0;
}
- }
+ *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
+ len, echo, opts->addr_id);
+ if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
+ memcpy((u8 *)ptr, (u8 *)&opts->addr.s_addr, 4);
+ ptr += 1;
+ }
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions) {
- if (opts->ahmac)
- *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
- TCPOLEN_MPTCP_ADD_ADDR6, 0,
- opts->addr_id);
- else
- *ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
- TCPOLEN_MPTCP_ADD_ADDR6_BASE,
- MPTCP_ADDR_ECHO,
- opts->addr_id);
- memcpy((u8 *)ptr, opts->addr6.s6_addr, 16);
- ptr += 4;
- if (opts->ahmac) {
- put_unaligned_be64(opts->ahmac, ptr);
- ptr += 2;
+ else if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions) {
+ memcpy((u8 *)ptr, opts->addr6.s6_addr, 16);
+ ptr += 4;
}
- }
#endif
+ if (!opts->port) {
+ if (opts->ahmac) {
+ put_unaligned_be64(opts->ahmac, ptr);
+ ptr += 2;
+ }
+ } else {
+ if (opts->ahmac) {
+ u8 *bptr = (u8 *)ptr;
+
+ put_unaligned_be16(opts->port, bptr);
+ bptr += 2;
+ put_unaligned_be64(opts->ahmac, bptr);
+ bptr += 8;
+ put_unaligned_be16(TCPOPT_NOP << 8 |
+ TCPOPT_NOP, bptr);
+
+ ptr += 3;
+ } else {
+ put_unaligned_be32(opts->port << 16 |
+ TCPOPT_NOP << 8 |
+ TCPOPT_NOP, ptr);
+ ptr += 1;
+ }
+ }
+ }
+
if (OPTION_MPTCP_RM_ADDR & opts->suboptions) {
*ptr++ = mptcp_option(MPTCPOPT_RM_ADDR,
TCPOLEN_MPTCP_RM_ADDR_BASE,
@@ -1132,4 +1237,7 @@ mp_capable_done:
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
}
}
+
+ if (tp)
+ mptcp_set_rwin(tp);
}