summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-06-11 02:47:45 +0300
committerDavid S. Miller <davem@davemloft.net>2021-06-11 02:47:45 +0300
commit232e3683b4ee529a0643fa45b3f0f6c06590aca2 (patch)
treecda31a1aa2fb6132768b9b4c1fdf9c8255a76dcb
parent22488e45501eca74653b502b194eb0eb25d2ad00 (diff)
parent499ada5073361c631f2a3c4a8aed44d53b6f82ec (diff)
downloadlinux-232e3683b4ee529a0643fa45b3f0f6c06590aca2.tar.xz
Merge branch 'mptcp-fixes'
Mat Martineau says: ==================== mptcp: More v5.13 fixes Here's another batch of MPTCP fixes for v5.13. Patch 1 cleans up memory accounting between the MPTCP-level socket and the subflows to more reliably transfer forward allocated memory under pressure. Patch 2 wakes up socket readers more reliably. Patch 3 changes a WARN_ONCE to a pr_debug. Patch 4 changes the selftests to only use syncookies in test cases where they do not cause spurious failures. Patch 5 modifies socket error reporting to avoid a possible soft lockup. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/mptcp/protocol.c52
-rw-r--r--net/mptcp/protocol.h1
-rw-r--r--net/mptcp/subflow.c108
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh11
4 files changed, 88 insertions, 84 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 5edc686faff1..632350018fb6 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -280,11 +280,13 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
/* try to fetch required memory from subflow */
if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
- if (ssk->sk_forward_alloc < skb->truesize)
- goto drop;
- __sk_mem_reclaim(ssk, skb->truesize);
- if (!sk_rmem_schedule(sk, skb, skb->truesize))
+ int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT;
+
+ if (ssk->sk_forward_alloc < amount)
goto drop;
+
+ ssk->sk_forward_alloc -= amount;
+ sk->sk_forward_alloc += amount;
}
/* the skb map_seq accounts for the skb offset:
@@ -668,18 +670,22 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
/* In most cases we will be able to lock the mptcp socket. If its already
* owned, we need to defer to the work queue to avoid ABBA deadlock.
*/
-static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
+static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
{
struct sock *sk = (struct sock *)msk;
unsigned int moved = 0;
if (inet_sk_state_load(sk) == TCP_CLOSE)
- return;
-
- mptcp_data_lock(sk);
+ return false;
__mptcp_move_skbs_from_subflow(msk, ssk, &moved);
__mptcp_ofo_queue(msk);
+ if (unlikely(ssk->sk_err)) {
+ if (!sock_owned_by_user(sk))
+ __mptcp_error_report(sk);
+ else
+ set_bit(MPTCP_ERROR_REPORT, &msk->flags);
+ }
/* If the moves have caught up with the DATA_FIN sequence number
* it's time to ack the DATA_FIN and change socket state, but
@@ -688,7 +694,7 @@ static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
*/
if (mptcp_pending_data_fin(sk, NULL))
mptcp_schedule_work(sk);
- mptcp_data_unlock(sk);
+ return moved > 0;
}
void mptcp_data_ready(struct sock *sk, struct sock *ssk)
@@ -696,7 +702,6 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct mptcp_sock *msk = mptcp_sk(sk);
int sk_rbuf, ssk_rbuf;
- bool wake;
/* The peer can send data while we are shutting down this
* subflow at msk destruction time, but we must avoid enqueuing
@@ -705,28 +710,22 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
if (unlikely(subflow->disposable))
return;
- /* move_skbs_to_msk below can legitly clear the data_avail flag,
- * but we will need later to properly woke the reader, cache its
- * value
- */
- wake = subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL;
- if (wake)
- set_bit(MPTCP_DATA_READY, &msk->flags);
-
ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf);
sk_rbuf = READ_ONCE(sk->sk_rcvbuf);
if (unlikely(ssk_rbuf > sk_rbuf))
sk_rbuf = ssk_rbuf;
- /* over limit? can't append more skbs to msk */
+ /* over limit? can't append more skbs to msk, Also, no need to wake-up*/
if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf)
- goto wake;
-
- move_skbs_to_msk(msk, ssk);
+ return;
-wake:
- if (wake)
+ /* Wake-up the reader only for in-sequence data */
+ mptcp_data_lock(sk);
+ if (move_skbs_to_msk(msk, ssk)) {
+ set_bit(MPTCP_DATA_READY, &msk->flags);
sk->sk_data_ready(sk);
+ }
+ mptcp_data_unlock(sk);
}
static bool mptcp_do_flush_join_list(struct mptcp_sock *msk)
@@ -858,7 +857,7 @@ static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
sock_owned_by_me(sk);
mptcp_for_each_subflow(msk, subflow) {
- if (subflow->data_avail)
+ if (READ_ONCE(subflow->data_avail))
return mptcp_subflow_tcp_sock(subflow);
}
@@ -1955,6 +1954,9 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
mptcp_data_unlock(sk);
tcp_cleanup_rbuf(ssk, moved);
+
+ if (unlikely(ssk->sk_err))
+ __mptcp_error_report(sk);
unlock_sock_fast(ssk, slowpath);
} while (!done);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 0c6f99c67345..385796f0ef19 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -362,7 +362,6 @@ mptcp_subflow_rsk(const struct request_sock *rsk)
enum mptcp_data_avail {
MPTCP_SUBFLOW_NODATA,
MPTCP_SUBFLOW_DATA_AVAIL,
- MPTCP_SUBFLOW_OOO_DATA
};
struct mptcp_delegated_action {
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index ef3d037f984a..be1de4084196 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -784,10 +784,10 @@ static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
}
-static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
+static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
{
- WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
- ssn, subflow->map_subflow_seq, subflow->map_data_len);
+ pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
+ ssn, subflow->map_subflow_seq, subflow->map_data_len);
}
static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
@@ -812,13 +812,13 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
/* Mapping covers data later in the subflow stream,
* currently unsupported.
*/
- warn_bad_map(subflow, ssn);
+ dbg_bad_map(subflow, ssn);
return false;
}
if (unlikely(!before(ssn, subflow->map_subflow_seq +
subflow->map_data_len))) {
/* Mapping does covers past subflow data, invalid */
- warn_bad_map(subflow, ssn + skb->len);
+ dbg_bad_map(subflow, ssn);
return false;
}
return true;
@@ -1000,7 +1000,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
struct sk_buff *skb;
if (!skb_peek(&ssk->sk_receive_queue))
- subflow->data_avail = 0;
+ WRITE_ONCE(subflow->data_avail, 0);
if (subflow->data_avail)
return true;
@@ -1039,18 +1039,13 @@ static bool subflow_check_data_avail(struct sock *ssk)
ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
ack_seq);
- if (ack_seq == old_ack) {
- subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
- break;
- } else if (after64(ack_seq, old_ack)) {
- subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA;
- break;
+ if (unlikely(before64(ack_seq, old_ack))) {
+ mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
+ continue;
}
- /* only accept in-sequence mapping. Old values are spurious
- * retransmission
- */
- mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+ break;
}
return true;
@@ -1065,12 +1060,11 @@ fallback:
* subflow_error_report() will introduce the appropriate barriers
*/
ssk->sk_err = EBADMSG;
- ssk->sk_error_report(ssk);
tcp_set_state(ssk, TCP_CLOSE);
subflow->reset_transient = 0;
subflow->reset_reason = MPTCP_RST_EMPTCP;
tcp_send_active_reset(ssk, GFP_ATOMIC);
- subflow->data_avail = 0;
+ WRITE_ONCE(subflow->data_avail, 0);
return false;
}
@@ -1080,7 +1074,7 @@ fallback:
subflow->map_seq = READ_ONCE(msk->ack_seq);
subflow->map_data_len = skb->len;
subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
- subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
return true;
}
@@ -1092,7 +1086,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
if (subflow->map_valid &&
mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
subflow->map_valid = 0;
- subflow->data_avail = 0;
+ WRITE_ONCE(subflow->data_avail, 0);
pr_debug("Done with mapping: seq=%u data_len=%u",
subflow->map_subflow_seq,
@@ -1120,41 +1114,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
*full_space = tcp_full_space(sk);
}
-static void subflow_data_ready(struct sock *sk)
-{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- u16 state = 1 << inet_sk_state_load(sk);
- struct sock *parent = subflow->conn;
- struct mptcp_sock *msk;
-
- msk = mptcp_sk(parent);
- if (state & TCPF_LISTEN) {
- /* MPJ subflow are removed from accept queue before reaching here,
- * avoid stray wakeups
- */
- if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
- return;
-
- set_bit(MPTCP_DATA_READY, &msk->flags);
- parent->sk_data_ready(parent);
- return;
- }
-
- WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
- !subflow->mp_join && !(state & TCPF_CLOSE));
-
- if (mptcp_subflow_data_available(sk))
- mptcp_data_ready(parent, sk);
-}
-
-static void subflow_write_space(struct sock *ssk)
-{
- struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
-
- mptcp_propagate_sndbuf(sk, ssk);
- mptcp_write_space(sk);
-}
-
void __mptcp_error_report(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
@@ -1195,6 +1154,43 @@ static void subflow_error_report(struct sock *ssk)
mptcp_data_unlock(sk);
}
+static void subflow_data_ready(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ u16 state = 1 << inet_sk_state_load(sk);
+ struct sock *parent = subflow->conn;
+ struct mptcp_sock *msk;
+
+ msk = mptcp_sk(parent);
+ if (state & TCPF_LISTEN) {
+ /* MPJ subflow are removed from accept queue before reaching here,
+ * avoid stray wakeups
+ */
+ if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
+ return;
+
+ set_bit(MPTCP_DATA_READY, &msk->flags);
+ parent->sk_data_ready(parent);
+ return;
+ }
+
+ WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
+ !subflow->mp_join && !(state & TCPF_CLOSE));
+
+ if (mptcp_subflow_data_available(sk))
+ mptcp_data_ready(parent, sk);
+ else if (unlikely(sk->sk_err))
+ subflow_error_report(sk);
+}
+
+static void subflow_write_space(struct sock *ssk)
+{
+ struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
+
+ mptcp_propagate_sndbuf(sk, ssk);
+ mptcp_write_space(sk);
+}
+
static struct inet_connection_sock_af_ops *
subflow_default_af_ops(struct sock *sk)
{
@@ -1505,6 +1501,8 @@ static void subflow_state_change(struct sock *sk)
*/
if (mptcp_subflow_data_available(sk))
mptcp_data_ready(parent, sk);
+ else if (unlikely(sk->sk_err))
+ subflow_error_report(sk);
subflow_sched_work_if_closed(mptcp_sk(parent), sk);
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 9ca5f1ba461e..2b495dc8d78e 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -197,9 +197,6 @@ ip -net "$ns4" link set ns4eth3 up
ip -net "$ns4" route add default via 10.0.3.2
ip -net "$ns4" route add default via dead:beef:3::2
-# use TCP syn cookies, even if no flooding was detected.
-ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
-
set_ethtool_flags() {
local ns="$1"
local dev="$2"
@@ -737,6 +734,14 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
exit $ret
fi
+ # ns1<->ns2 is not subject to reordering/tc delays. Use it to test
+ # mptcp syncookie support.
+ if [ $sender = $ns1 ]; then
+ ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
+ else
+ ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1
+ fi
+
run_tests "$ns2" $sender 10.0.1.2
run_tests "$ns2" $sender dead:beef:1::2
run_tests "$ns2" $sender 10.0.2.1