diff options
author | David S. Miller <davem@davemloft.net> | 2021-08-24 11:28:29 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2021-08-24 11:28:29 +0300 |
commit | 0384dd9d2d807b7d1470ce0abd549b8855037f99 (patch) | |
tree | 7fec529fa48ee86e9dcab388fe75377ff878130f | |
parent | faf482ca196a5b16007190529b3b2dd32ab3f761 (diff) | |
parent | 33c563ad28e3bf614c82450fbf83a7c3c203db87 (diff) | |
download | linux-0384dd9d2d807b7d1470ce0abd549b8855037f99.tar.xz |
Merge branch 'mptcp-refactor'
Mat Martineau says:
====================
mptcp: Refactor ADD_ADDR/RM_ADDR handling
This patch set changes the way MPTCP ADD_ADDR and RM_ADDR options are
handled to improve the reliability of sending and updating address
advertisements. The information used to populate outgoing advertisement
option headers is now stored separately to avoid rare cases where a more
recent request would overwrite something that had not been sent
yet. While the peers would recover from this, it's better to avoid the
problem in the first place.
Patch 1 moves an advertisement option check under a lock so the changes
made in the next several patches will not introduce a race.
Patches 2-4 make sure ADD_ADDR, ADD_ADDR echo, and RM_ADDR options use
separate flags and data.
Patch 5 removes some now-redundant flags.
Patch 6 adds a selftest that confirms the advertisement reliability
improvements.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/mptcp/options.c | 28 | ||||
-rw-r--r-- | net/mptcp/pm.c | 58 | ||||
-rw-r--r-- | net/mptcp/pm_netlink.c | 10 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 24 | ||||
-rwxr-xr-x | tools/testing/selftests/net/mptcp/mptcp_join.sh | 15 |
5 files changed, 83 insertions, 52 deletions
diff --git a/net/mptcp/options.c b/net/mptcp/options.c index bebb759f470e..4c37f4b215ee 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -667,29 +667,29 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * bool port; int len; - if ((mptcp_pm_should_add_signal_ipv6(msk) || - mptcp_pm_should_add_signal_port(msk) || - mptcp_pm_should_add_signal_echo(msk)) && - skb && skb_is_tcp_pure_ack(skb)) { - pr_debug("drop other suboptions"); - opts->suboptions = 0; - opts->ext_copy.use_ack = 0; - opts->ext_copy.use_map = 0; - remaining += opt_size; - drop_other_suboptions = true; - } - + /* add addr will strip the existing options, be sure to avoid breaking + * MPC/MPJ handshakes + */ if (!mptcp_pm_should_add_signal(msk) || - !(mptcp_pm_add_addr_signal(msk, remaining, &opts->addr, &echo, &port))) + (opts->suboptions & (OPTION_MPTCP_MPJ_ACK | OPTION_MPTCP_MPC_ACK)) || + !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &opts->addr, + &echo, &port, &drop_other_suboptions)) return false; + if (drop_other_suboptions) + remaining += opt_size; len = mptcp_add_addr_len(opts->addr.family, echo, port); if (remaining < len) return false; *size = len; - if (drop_other_suboptions) + if (drop_other_suboptions) { + pr_debug("drop other suboptions"); + opts->suboptions = 0; + opts->ext_copy.use_ack = 0; + opts->ext_copy.use_map = 0; *size -= opt_size; + } opts->suboptions |= OPTION_MPTCP_ADD_ADDR; if (!echo) { opts->ahmac = add_addr_generate_hmac(msk->local_key, diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 0ed3e565f8f8..da0c4c925350 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -20,23 +20,23 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, { u8 add_addr = READ_ONCE(msk->pm.addr_signal); - pr_debug("msk=%p, local_id=%d", msk, addr->id); + pr_debug("msk=%p, local_id=%d, echo=%d", msk, addr->id, echo); lockdep_assert_held(&msk->pm.lock); - if (add_addr) { - pr_warn("addr_signal error, add_addr=%d", add_addr); + if (add_addr & + (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) { + pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo); return -EINVAL; } - msk->pm.local = *addr; - add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL); - if (echo) + if (echo) { + msk->pm.remote = *addr; add_addr |= BIT(MPTCP_ADD_ADDR_ECHO); - if (addr->family == AF_INET6) - add_addr |= BIT(MPTCP_ADD_ADDR_IPV6); - if (addr->port) - add_addr |= BIT(MPTCP_ADD_ADDR_PORT); + } else { + msk->pm.local = *addr; + add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL); + } WRITE_ONCE(msk->pm.addr_signal, add_addr); return 0; } @@ -251,10 +251,14 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup) /* path manager helpers */ -bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining, - struct mptcp_addr_info *saddr, bool *echo, bool *port) +bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb, + unsigned int opt_size, unsigned int remaining, + struct mptcp_addr_info *addr, bool *echo, + bool *port, bool *drop_other_suboptions) { int ret = false; + u8 add_addr; + u8 family; spin_lock_bh(&msk->pm.lock); @@ -262,14 +266,30 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining, if (!mptcp_pm_should_add_signal(msk)) goto out_unlock; + /* always drop every other options for pure ack ADD_ADDR; this is a + * plain dup-ack from TCP perspective. The other MPTCP-relevant info, + * if any, will be carried by the 'original' TCP ack + */ + if (skb && skb_is_tcp_pure_ack(skb)) { + remaining += opt_size; + *drop_other_suboptions = true; + } + *echo = mptcp_pm_should_add_signal_echo(msk); - *port = mptcp_pm_should_add_signal_port(msk); + *port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port); - if (remaining < mptcp_add_addr_len(msk->pm.local.family, *echo, *port)) + family = *echo ? msk->pm.remote.family : msk->pm.local.family; + if (remaining < mptcp_add_addr_len(family, *echo, *port)) goto out_unlock; - *saddr = msk->pm.local; - WRITE_ONCE(msk->pm.addr_signal, 0); + if (*echo) { + *addr = msk->pm.remote; + add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO); + } else { + *addr = msk->pm.local; + add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL); + } + WRITE_ONCE(msk->pm.addr_signal, add_addr); ret = true; out_unlock: @@ -281,6 +301,7 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, struct mptcp_rm_list *rm_list) { int ret = false, len; + u8 rm_addr; spin_lock_bh(&msk->pm.lock); @@ -288,16 +309,17 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, if (!mptcp_pm_should_rm_signal(msk)) goto out_unlock; + rm_addr = msk->pm.addr_signal & ~BIT(MPTCP_RM_ADDR_SIGNAL); len = mptcp_rm_addr_len(&msk->pm.rm_list_tx); if (len < 0) { - WRITE_ONCE(msk->pm.addr_signal, 0); + WRITE_ONCE(msk->pm.addr_signal, rm_addr); goto out_unlock; } if (remaining < len) goto out_unlock; *rm_list = msk->pm.rm_list_tx; - WRITE_ONCE(msk->pm.addr_signal, 0); + WRITE_ONCE(msk->pm.addr_signal, rm_addr); ret = true; out_unlock: diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 480f43ec1bfb..1e4289c507ff 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -317,14 +317,14 @@ static void mptcp_pm_add_timer(struct timer_list *timer) if (!entry->addr.id) return; - if (mptcp_pm_should_add_signal(msk)) { + if (mptcp_pm_should_add_signal_addr(msk)) { sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); goto out; } spin_lock_bh(&msk->pm.lock); - if (!mptcp_pm_should_add_signal(msk)) { + if (!mptcp_pm_should_add_signal_addr(msk)) { pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id); mptcp_pm_announce_addr(msk, &entry->addr, false); mptcp_pm_add_addr_send_ack(msk); @@ -647,10 +647,8 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) bool slow; spin_unlock_bh(&msk->pm.lock); - pr_debug("send ack for %s%s%s", - mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr", - mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "", - mptcp_pm_should_add_signal_port(msk) ? " [port]" : ""); + pr_debug("send ack for %s", + mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr"); slow = lock_sock_fast(ssk); tcp_send_ack(ssk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index bc1bfd7ac9c1..7cd3d5979bcd 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -178,8 +178,6 @@ enum mptcp_pm_status { enum mptcp_addr_signal_status { MPTCP_ADD_ADDR_SIGNAL, MPTCP_ADD_ADDR_ECHO, - MPTCP_ADD_ADDR_IPV6, - MPTCP_ADD_ADDR_PORT, MPTCP_RM_ADDR_SIGNAL, }; @@ -748,22 +746,18 @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { - return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL); + return READ_ONCE(msk->pm.addr_signal) & + (BIT(MPTCP_ADD_ADDR_SIGNAL) | BIT(MPTCP_ADD_ADDR_ECHO)); } -static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk) +static inline bool mptcp_pm_should_add_signal_addr(struct mptcp_sock *msk) { - return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO); -} - -static inline bool mptcp_pm_should_add_signal_ipv6(struct mptcp_sock *msk) -{ - return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_IPV6); + return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL); } -static inline bool mptcp_pm_should_add_signal_port(struct mptcp_sock *msk) +static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk) { - return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_PORT); + return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO); } static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk) @@ -794,8 +788,10 @@ static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list) return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1; } -bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining, - struct mptcp_addr_info *saddr, bool *echo, bool *port); +bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb, + unsigned int opt_size, unsigned int remaining, + struct mptcp_addr_info *addr, bool *echo, + bool *port, bool *drop_other_suboptions); bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, struct mptcp_rm_list *rm_list); int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 8c7117e2c337..7b3e6cc56935 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1016,6 +1016,21 @@ signal_address_tests() run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "signal invalid addresses" 1 1 1 chk_add_nr 3 3 + + # signal addresses race test + reset + ip netns exec $ns1 ./pm_nl_ctl limits 4 4 + ip netns exec $ns2 ./pm_nl_ctl limits 4 4 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags signal + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags signal + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal + ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_add_nr 4 4 } link_failure_tests() |