Merge branch 'tcp-make-tcp_ack-faster'

Eric Dumazet says: ==================== tcp: make tcp_ack() faster Move tcp_rack_update_reo_wnd() and tcp_rack_advance() to tcp_input.c to allow their (auto)inlining. No functional change in this series. ==================== Link: https://patch.msgid.link/20260127032147.3498272-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
author: Jakub Kicinski <kuba@kernel.org> 2026-01-29 06:31:53 +0300
committer: Jakub Kicinski <kuba@kernel.org> 2026-01-29 06:31:53 +0300
commit: c0f38f31367962635cf0e80cc7098aa68bfbf1ae (patch)
tree: 8f3a5edf410b972bd49b4d8da765cbbc328c1fb1
parent: 239f09e258b906deced5c2a7c1ac8aed301b558b (diff)
parent: d5fb143dbe8d3050c9abcd390d65928e2a3e646e (diff)
download: linux-c0f38f31367962635cf0e80cc7098aa68bfbf1ae.tar.xz
3 files changed, 77 insertions, 80 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index efff433de9a4..f1cf9e6730c8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2514,10 +2514,7 @@ void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced);
 extern s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb,
 				u32 reo_wnd);
 extern bool tcp_rack_mark_lost(struct sock *sk);
-extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
-			     u64 xmit_time);
 extern void tcp_rack_reo_timeout(struct sock *sk);
-extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs);
 
 /* tcp_plb.c */
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9e91ddbc6253..a2a872382fc0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1558,6 +1558,38 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
 	return in_sack;
 }
 
+/* Record the most recently (re)sent time among the (s)acked packets
+ * This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from
+ * draft-cheng-tcpm-rack-00.txt
+ */
+static void tcp_rack_advance(struct tcp_sock *tp, u8 sacked,
+			     u32 end_seq, u64 xmit_time)
+{
+	u32 rtt_us;
+
+	rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time);
+	if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) {
+		/* If the sacked packet was retransmitted, it's ambiguous
+		 * whether the retransmission or the original (or the prior
+		 * retransmission) was sacked.
+		 *
+		 * If the original is lost, there is no ambiguity. Otherwise
+		 * we assume the original can be delayed up to aRTT + min_rtt.
+		 * the aRTT term is bounded by the fast recovery or timeout,
+		 * so it's at least one RTT (i.e., retransmission is at least
+		 * an RTT later).
+		 */
+		return;
+	}
+	tp->rack.advanced = 1;
+	tp->rack.rtt_us = rtt_us;
+	if (tcp_skb_sent_after(xmit_time, tp->rack.mstamp,
+			       end_seq, tp->rack.end_seq)) {
+		tp->rack.mstamp = xmit_time;
+		tp->rack.end_seq = end_seq;
+	}
+}
+
 /* Mark the given newly-SACKed range as such, adjusting counters and hints. */
 static u8 tcp_sacktag_one(struct sock *sk,
 			  struct tcp_sacktag_state *state, u8 sacked,
@@ -4149,6 +4181,49 @@ static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered,
 	return delivered;
 }
 
+/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries.
+ *
+ * If a DSACK is received that seems like it may have been due to reordering
+ * triggering fast recovery, increment reo_wnd by min_rtt/4 (upper bounded
+ * by srtt), since there is possibility that spurious retransmission was
+ * due to reordering delay longer than reo_wnd.
+ *
+ * Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16)
+ * no. of successful recoveries (accounts for full DSACK-based loss
+ * recovery undo). After that, reset it to default (min_rtt/4).
+ *
+ * At max, reo_wnd is incremented only once per rtt. So that the new
+ * DSACK on which we are reacting, is due to the spurious retx (approx)
+ * after the reo_wnd has been updated last time.
+ *
+ * reo_wnd is tracked in terms of steps (of min_rtt/4), rather than
+ * absolute value to account for change in rtt.
+ */
+static void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+	     TCP_RACK_STATIC_REO_WND) ||
+	    !rs->prior_delivered)
+		return;
+
+	/* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */
+	if (before(rs->prior_delivered, tp->rack.last_delivered))
+		tp->rack.dsack_seen = 0;
+
+	/* Adjust the reo_wnd if update is pending */
+	if (tp->rack.dsack_seen) {
+		tp->rack.reo_wnd_steps = min_t(u32, 0xFF,
+					       tp->rack.reo_wnd_steps + 1);
+		tp->rack.dsack_seen = 0;
+		tp->rack.last_delivered = tp->delivered;
+		tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH;
+	} else if (!tp->rack.reo_wnd_persist) {
+		tp->rack.reo_wnd_steps = 1;
+	}
+}
+
 /* This routine deals with incoming acks, but not outgoing ones. */
 static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 {
@@ -4283,7 +4358,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 
 	tcp_in_ack_event(sk, flag);
 
-	if (tp->tlp_high_seq)
+	if (unlikely(tp->tlp_high_seq))
 		tcp_process_tlp_ack(sk, ack, flag);
 
 	if (tcp_ack_is_dubious(sk, flag)) {
@@ -4333,7 +4408,7 @@ no_queue:
 	 */
 	tcp_ack_probe(sk);
 
-	if (tp->tlp_high_seq)
+	if (unlikely(tp->tlp_high_seq))
 		tcp_process_tlp_ack(sk, ack, flag);
 	return 1;
 
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index c52fd3254b6e..139646751073 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -111,38 +111,6 @@ bool tcp_rack_mark_lost(struct sock *sk)
 	return !!timeout;
 }
 
-/* Record the most recently (re)sent time among the (s)acked packets
- * This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from
- * draft-cheng-tcpm-rack-00.txt
- */
-void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
-		      u64 xmit_time)
-{
-	u32 rtt_us;
-
-	rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time);
-	if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) {
-		/* If the sacked packet was retransmitted, it's ambiguous
-		 * whether the retransmission or the original (or the prior
-		 * retransmission) was sacked.
-		 *
-		 * If the original is lost, there is no ambiguity. Otherwise
-		 * we assume the original can be delayed up to aRTT + min_rtt.
-		 * the aRTT term is bounded by the fast recovery or timeout,
-		 * so it's at least one RTT (i.e., retransmission is at least
-		 * an RTT later).
-		 */
-		return;
-	}
-	tp->rack.advanced = 1;
-	tp->rack.rtt_us = rtt_us;
-	if (tcp_skb_sent_after(xmit_time, tp->rack.mstamp,
-			       end_seq, tp->rack.end_seq)) {
-		tp->rack.mstamp = xmit_time;
-		tp->rack.end_seq = end_seq;
-	}
-}
-
 /* We have waited long enough to accommodate reordering. Mark the expired
  * packets lost and retransmit them.
  */
@@ -166,49 +134,6 @@ void tcp_rack_reo_timeout(struct sock *sk)
 		tcp_rearm_rto(sk);
 }
 
-/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries.
- *
- * If a DSACK is received that seems like it may have been due to reordering
- * triggering fast recovery, increment reo_wnd by min_rtt/4 (upper bounded
- * by srtt), since there is possibility that spurious retransmission was
- * due to reordering delay longer than reo_wnd.
- *
- * Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16)
- * no. of successful recoveries (accounts for full DSACK-based loss
- * recovery undo). After that, reset it to default (min_rtt/4).
- *
- * At max, reo_wnd is incremented only once per rtt. So that the new
- * DSACK on which we are reacting, is due to the spurious retx (approx)
- * after the reo_wnd has been updated last time.
- *
- * reo_wnd is tracked in terms of steps (of min_rtt/4), rather than
- * absolute value to account for change in rtt.
- */
-void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
-	     TCP_RACK_STATIC_REO_WND) ||
-	    !rs->prior_delivered)
-		return;
-
-	/* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */
-	if (before(rs->prior_delivered, tp->rack.last_delivered))
-		tp->rack.dsack_seen = 0;
-
-	/* Adjust the reo_wnd if update is pending */
-	if (tp->rack.dsack_seen) {
-		tp->rack.reo_wnd_steps = min_t(u32, 0xFF,
-					       tp->rack.reo_wnd_steps + 1);
-		tp->rack.dsack_seen = 0;
-		tp->rack.last_delivered = tp->delivered;
-		tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH;
-	} else if (!tp->rack.reo_wnd_persist) {
-		tp->rack.reo_wnd_steps = 1;
-	}
-}
-
 /* RFC6582 NewReno recovery for non-SACK connection. It simply retransmits
  * the next unacked packet upon receiving
  * a) three or more DUPACKs to start the fast recovery
author	Jakub Kicinski <kuba@kernel.org>	2026-01-29 06:31:53 +0300
committer	Jakub Kicinski <kuba@kernel.org>	2026-01-29 06:31:53 +0300
commit	c0f38f31367962635cf0e80cc7098aa68bfbf1ae (patch)
tree	8f3a5edf410b972bd49b4d8da765cbbc328c1fb1
parent	239f09e258b906deced5c2a7c1ac8aed301b558b (diff)
parent	d5fb143dbe8d3050c9abcd390d65928e2a3e646e (diff)
download	linux-c0f38f31367962635cf0e80cc7098aa68bfbf1ae.tar.xz