summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2026-05-07 04:16:49 +0300
committerJakub Kicinski <kuba@kernel.org>2026-05-07 04:16:50 +0300
commit2b1f48cc0f31abd1115a8c4b74b6425aba46eae4 (patch)
tree5aa3af72025501df785192808a303899c3f9db99
parentc8f7244c8cccaaed4e6c9fe4b8a07e101d0423e5 (diff)
parent53705ddfa18408f8e1f064331b6387509fa19f7f (diff)
downloadlinux-2b1f48cc0f31abd1115a8c4b74b6425aba46eae4.tar.xz
Merge branch 'mptcp-pm-misc-fixes-for-v7-1-rc3'
Matthieu Baerts says: ==================== mptcp: pm: misc. fixes for v7.1-rc3 Here are various fixes, mainly related to ADD_ADDRs: - Patch 1: save ADD_ADDR for rtx with ID0 when needed. A fix for v6.1. - Patch 2: remove unneeded exception for ID 0. A fix for v5.10. - Patches 3-5: fix potential data-race and leaks during ADD_ADDR rtx. A fix for v5.10. - Patch 6: resched blocked ADD_ADDR rtx after a more appropriated timeout, not after 15 seconds. A fix for v5.10. - Patch 7: skip inactive subflows when when looking at the max RTO. A fix for v6.18. - Patch 8: avoid iterating over all subflows when there is no need to. A fix for v6.18. - Patch 9: skip closed subflows when looking at sending MP_PRIO. A fix for v5.17. - Patch 10: properly catch errors when using check_output() in the selftests. A fix for v6.9. - Patch 11: skip the 'unknown' flag test when 'ip mptcp' is used. A fix for v6.10. ==================== Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-0-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--net/mptcp/pm.c62
-rw-r--r--net/mptcp/pm_kernel.c13
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh16
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh20
4 files changed, 73 insertions, 38 deletions
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 57a456690406..3c152bf66cd5 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -16,6 +16,7 @@ struct mptcp_pm_add_entry {
struct list_head list;
struct mptcp_addr_info addr;
u8 retrans_times;
+ bool timer_done;
struct timer_list add_timer;
struct mptcp_sock *sock;
struct rcu_head rcu;
@@ -283,6 +284,9 @@ int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk,
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
struct mptcp_addr_info local, remote;
+ if (!__mptcp_subflow_active(subflow))
+ continue;
+
mptcp_local_address((struct sock_common *)ssk, &local);
if (!mptcp_addresses_equal(&local, addr, addr->port))
continue;
@@ -305,18 +309,31 @@ static unsigned int mptcp_adjust_add_addr_timeout(struct mptcp_sock *msk)
const struct net *net = sock_net((struct sock *)msk);
unsigned int rto = mptcp_get_add_addr_timeout(net);
struct mptcp_subflow_context *subflow;
- unsigned int max = 0;
+ unsigned int max = 0, max_stale = 0;
+
+ if (!rto)
+ return 0;
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
struct inet_connection_sock *icsk = inet_csk(ssk);
- if (icsk->icsk_rto > max)
+ if (!__mptcp_subflow_active(subflow))
+ continue;
+
+ if (unlikely(subflow->stale)) {
+ if (icsk->icsk_rto > max_stale)
+ max_stale = icsk->icsk_rto;
+ } else if (icsk->icsk_rto > max) {
max = icsk->icsk_rto;
+ }
}
- if (max && max < rto)
- rto = max;
+ if (max)
+ return min(max, rto);
+
+ if (max_stale)
+ return min(max_stale, rto);
return rto;
}
@@ -327,26 +344,22 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
add_timer);
struct mptcp_sock *msk = entry->sock;
struct sock *sk = (struct sock *)msk;
- unsigned int timeout;
+ unsigned int timeout = 0;
pr_debug("msk=%p\n", msk);
- if (!msk)
- return;
-
- if (inet_sk_state_load(sk) == TCP_CLOSE)
- return;
-
- if (!entry->addr.id)
- return;
+ bh_lock_sock(sk);
+ if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE))
+ goto out;
- if (mptcp_pm_should_add_signal_addr(msk)) {
- sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8);
+ if (sock_owned_by_user(sk)) {
+ /* Try again later. */
+ timeout = HZ / 20;
goto out;
}
timeout = mptcp_adjust_add_addr_timeout(msk);
- if (!timeout)
+ if (!timeout || mptcp_pm_should_add_signal_addr(msk))
goto out;
spin_lock_bh(&msk->pm.lock);
@@ -359,8 +372,9 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
}
if (entry->retrans_times < ADD_ADDR_RETRANS_MAX)
- sk_reset_timer(sk, timer,
- jiffies + (timeout << entry->retrans_times));
+ timeout <<= entry->retrans_times;
+ else
+ timeout = 0;
spin_unlock_bh(&msk->pm.lock);
@@ -368,7 +382,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
mptcp_pm_subflow_established(msk);
out:
- __sock_put(sk);
+ if (timeout)
+ sk_reset_timer(sk, timer, jiffies + timeout);
+ else
+ /* if sock_put calls sk_free: avoid waiting for this timer */
+ entry->timer_done = true;
+ bh_unlock_sock(sk);
+ sock_put(sk);
}
struct mptcp_pm_add_entry *
@@ -431,6 +451,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0);
reset_timer:
+ add_entry->timer_done = false;
timeout = mptcp_adjust_add_addr_timeout(msk);
if (timeout)
sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout);
@@ -451,7 +472,8 @@ static void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
spin_unlock_bh(&msk->pm.lock);
list_for_each_entry_safe(entry, tmp, &free_list, list) {
- sk_stop_timer_sync(sk, &entry->add_timer);
+ if (!entry->timer_done)
+ sk_stop_timer_sync(sk, &entry->add_timer);
kfree_rcu(entry, rcu);
}
}
diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c
index c9f1e5af3cd3..fc818b63752e 100644
--- a/net/mptcp/pm_kernel.c
+++ b/net/mptcp/pm_kernel.c
@@ -347,6 +347,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
/* check first for announce */
if (msk->pm.add_addr_signaled < endp_signal_max) {
+ u8 endp_id;
+
/* due to racing events on both ends we can reach here while
* previous add address is still running: if we invoke now
* mptcp_pm_announce_addr(), that will fail and the
@@ -360,19 +362,20 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
if (!select_signal_address(pernet, msk, &local))
goto subflow;
+ /* Special case for ID0: set the correct ID */
+ endp_id = local.addr.id;
+ if (endp_id == msk->mpc_endpoint_id)
+ local.addr.id = 0;
+
/* If the alloc fails, we are on memory pressure, not worth
* continuing, and trying to create subflows.
*/
if (!mptcp_pm_alloc_anno_list(msk, &local.addr))
return;
- __clear_bit(local.addr.id, msk->pm.id_avail_bitmap);
+ __clear_bit(endp_id, msk->pm.id_avail_bitmap);
msk->pm.add_addr_signaled++;
- /* Special case for ID0: set the correct ID */
- if (local.addr.id == msk->mpc_endpoint_id)
- local.addr.id = 0;
-
mptcp_pm_announce_addr(msk, &local.addr, false);
mptcp_pm_addr_send_ack(msk);
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 5fea7e7df628..989a5975dcea 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -474,20 +474,24 @@ mptcp_lib_wait_local_port_listen() {
wait_local_port_listen "${@}" "tcp"
}
+# $1: error file, $2: cmd, $3: expected msg, [$4: expected error]
mptcp_lib_check_output() {
local err="${1}"
local cmd="${2}"
local expected="${3}"
+ local exp_error="${4:-0}"
local cmd_ret=0
local out
- if ! out=$(${cmd} 2>"${err}"); then
- cmd_ret=${?}
- fi
+ out=$(${cmd} 2>"${err}") || cmd_ret=1
- if [ ${cmd_ret} -ne 0 ]; then
- mptcp_lib_pr_fail "command execution '${cmd}' stderr"
- cat "${err}"
+ if [ "${cmd_ret}" != "${exp_error}" ]; then
+ mptcp_lib_pr_fail "unexpected returned code for '${cmd}', info:"
+ if [ "${exp_error}" = 0 ]; then
+ cat "${err}"
+ else
+ echo "${out}"
+ fi
return 2
elif [ "${out}" = "${expected}" ]; then
return 0
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index 123d9d7a0278..04594dfc22b1 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -122,10 +122,12 @@ check()
local cmd="$1"
local expected="$2"
local msg="$3"
+ local exp_error="$4"
local rc=0
mptcp_lib_print_title "$msg"
- mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?}
+ mptcp_lib_check_output "${err}" "${cmd}" "${expected}" "${exp_error}" ||
+ rc=${?}
if [ ${rc} -eq 2 ]; then
mptcp_lib_result_fail "${msg} # error ${rc}"
ret=${KSFT_FAIL}
@@ -158,13 +160,13 @@ check "show_endpoints" \
"3,10.0.1.3,signal backup")" "dump addrs"
del_endpoint 2
-check "get_endpoint 2" "" "simple del addr"
+check "get_endpoint 2" "" "simple del addr" 1
check "show_endpoints" \
"$(format_endpoints "1,10.0.1.1" \
"3,10.0.1.3,signal backup")" "dump addrs after del"
add_endpoint 10.0.1.3 2>/dev/null
-check "get_endpoint 4" "" "duplicate addr"
+check "get_endpoint 4" "" "duplicate addr" 1
add_endpoint 10.0.1.4 flags signal
check "get_endpoint 4" "$(format_endpoints "4,10.0.1.4,signal")" "id addr increment"
@@ -173,7 +175,7 @@ for i in $(seq 5 9); do
add_endpoint "10.0.1.${i}" flags signal >/dev/null 2>&1
done
check "get_endpoint 9" "$(format_endpoints "9,10.0.1.9,signal")" "hard addr limit"
-check "get_endpoint 10" "" "above hard addr limit"
+check "get_endpoint 10" "" "above hard addr limit" 1
del_endpoint 9
for i in $(seq 10 255); do
@@ -192,9 +194,13 @@ check "show_endpoints" \
flush_endpoint
check "show_endpoints" "" "flush addrs"
-add_endpoint 10.0.1.1 flags unknown
-check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" "ignore unknown flags"
-flush_endpoint
+# "unknown" flag is only supported by pm_nl_ctl
+if ! mptcp_lib_is_ip_mptcp; then
+ add_endpoint 10.0.1.1 flags unknown
+ check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" \
+ "ignore unknown flags"
+ flush_endpoint
+fi
set_limits 9 1 2>/dev/null
check "get_limits" "${default_limits}" "rcv addrs above hard limit"