diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2026-01-29 06:31:53 +0300 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2026-01-29 06:31:53 +0300 |
| commit | c0f38f31367962635cf0e80cc7098aa68bfbf1ae (patch) | |
| tree | 8f3a5edf410b972bd49b4d8da765cbbc328c1fb1 | |
| parent | 239f09e258b906deced5c2a7c1ac8aed301b558b (diff) | |
| parent | d5fb143dbe8d3050c9abcd390d65928e2a3e646e (diff) | |
| download | linux-c0f38f31367962635cf0e80cc7098aa68bfbf1ae.tar.xz | |
Merge branch 'tcp-make-tcp_ack-faster'
Eric Dumazet says:
====================
tcp: make tcp_ack() faster
Move tcp_rack_update_reo_wnd() and tcp_rack_advance() to tcp_input.c
to allow their (auto)inlining.
No functional change in this series.
====================
Link: https://patch.msgid.link/20260127032147.3498272-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
| -rw-r--r-- | include/net/tcp.h | 3 | ||||
| -rw-r--r-- | net/ipv4/tcp_input.c | 79 | ||||
| -rw-r--r-- | net/ipv4/tcp_recovery.c | 75 |
3 files changed, 77 insertions, 80 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h index efff433de9a4..f1cf9e6730c8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2514,10 +2514,7 @@ void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced); extern s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb, u32 reo_wnd); extern bool tcp_rack_mark_lost(struct sock *sk); -extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, - u64 xmit_time); extern void tcp_rack_reo_timeout(struct sock *sk); -extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs); /* tcp_plb.c */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9e91ddbc6253..a2a872382fc0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1558,6 +1558,38 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, return in_sack; } +/* Record the most recently (re)sent time among the (s)acked packets + * This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from + * draft-cheng-tcpm-rack-00.txt + */ +static void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, + u32 end_seq, u64 xmit_time) +{ + u32 rtt_us; + + rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time); + if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) { + /* If the sacked packet was retransmitted, it's ambiguous + * whether the retransmission or the original (or the prior + * retransmission) was sacked. + * + * If the original is lost, there is no ambiguity. Otherwise + * we assume the original can be delayed up to aRTT + min_rtt. + * the aRTT term is bounded by the fast recovery or timeout, + * so it's at least one RTT (i.e., retransmission is at least + * an RTT later). + */ + return; + } + tp->rack.advanced = 1; + tp->rack.rtt_us = rtt_us; + if (tcp_skb_sent_after(xmit_time, tp->rack.mstamp, + end_seq, tp->rack.end_seq)) { + tp->rack.mstamp = xmit_time; + tp->rack.end_seq = end_seq; + } +} + /* Mark the given newly-SACKed range as such, adjusting counters and hints. */ static u8 tcp_sacktag_one(struct sock *sk, struct tcp_sacktag_state *state, u8 sacked, @@ -4149,6 +4181,49 @@ static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, return delivered; } +/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries. + * + * If a DSACK is received that seems like it may have been due to reordering + * triggering fast recovery, increment reo_wnd by min_rtt/4 (upper bounded + * by srtt), since there is possibility that spurious retransmission was + * due to reordering delay longer than reo_wnd. + * + * Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16) + * no. of successful recoveries (accounts for full DSACK-based loss + * recovery undo). After that, reset it to default (min_rtt/4). + * + * At max, reo_wnd is incremented only once per rtt. So that the new + * DSACK on which we are reacting, is due to the spurious retx (approx) + * after the reo_wnd has been updated last time. + * + * reo_wnd is tracked in terms of steps (of min_rtt/4), rather than + * absolute value to account for change in rtt. + */ +static void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & + TCP_RACK_STATIC_REO_WND) || + !rs->prior_delivered) + return; + + /* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */ + if (before(rs->prior_delivered, tp->rack.last_delivered)) + tp->rack.dsack_seen = 0; + + /* Adjust the reo_wnd if update is pending */ + if (tp->rack.dsack_seen) { + tp->rack.reo_wnd_steps = min_t(u32, 0xFF, + tp->rack.reo_wnd_steps + 1); + tp->rack.dsack_seen = 0; + tp->rack.last_delivered = tp->delivered; + tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH; + } else if (!tp->rack.reo_wnd_persist) { + tp->rack.reo_wnd_steps = 1; + } +} + /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { @@ -4283,7 +4358,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_in_ack_event(sk, flag); - if (tp->tlp_high_seq) + if (unlikely(tp->tlp_high_seq)) tcp_process_tlp_ack(sk, ack, flag); if (tcp_ack_is_dubious(sk, flag)) { @@ -4333,7 +4408,7 @@ no_queue: */ tcp_ack_probe(sk); - if (tp->tlp_high_seq) + if (unlikely(tp->tlp_high_seq)) tcp_process_tlp_ack(sk, ack, flag); return 1; diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index c52fd3254b6e..139646751073 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -111,38 +111,6 @@ bool tcp_rack_mark_lost(struct sock *sk) return !!timeout; } -/* Record the most recently (re)sent time among the (s)acked packets - * This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from - * draft-cheng-tcpm-rack-00.txt - */ -void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, - u64 xmit_time) -{ - u32 rtt_us; - - rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time); - if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) { - /* If the sacked packet was retransmitted, it's ambiguous - * whether the retransmission or the original (or the prior - * retransmission) was sacked. - * - * If the original is lost, there is no ambiguity. Otherwise - * we assume the original can be delayed up to aRTT + min_rtt. - * the aRTT term is bounded by the fast recovery or timeout, - * so it's at least one RTT (i.e., retransmission is at least - * an RTT later). - */ - return; - } - tp->rack.advanced = 1; - tp->rack.rtt_us = rtt_us; - if (tcp_skb_sent_after(xmit_time, tp->rack.mstamp, - end_seq, tp->rack.end_seq)) { - tp->rack.mstamp = xmit_time; - tp->rack.end_seq = end_seq; - } -} - /* We have waited long enough to accommodate reordering. Mark the expired * packets lost and retransmit them. */ @@ -166,49 +134,6 @@ void tcp_rack_reo_timeout(struct sock *sk) tcp_rearm_rto(sk); } -/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries. - * - * If a DSACK is received that seems like it may have been due to reordering - * triggering fast recovery, increment reo_wnd by min_rtt/4 (upper bounded - * by srtt), since there is possibility that spurious retransmission was - * due to reordering delay longer than reo_wnd. - * - * Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16) - * no. of successful recoveries (accounts for full DSACK-based loss - * recovery undo). After that, reset it to default (min_rtt/4). - * - * At max, reo_wnd is incremented only once per rtt. So that the new - * DSACK on which we are reacting, is due to the spurious retx (approx) - * after the reo_wnd has been updated last time. - * - * reo_wnd is tracked in terms of steps (of min_rtt/4), rather than - * absolute value to account for change in rtt. - */ -void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & - TCP_RACK_STATIC_REO_WND) || - !rs->prior_delivered) - return; - - /* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */ - if (before(rs->prior_delivered, tp->rack.last_delivered)) - tp->rack.dsack_seen = 0; - - /* Adjust the reo_wnd if update is pending */ - if (tp->rack.dsack_seen) { - tp->rack.reo_wnd_steps = min_t(u32, 0xFF, - tp->rack.reo_wnd_steps + 1); - tp->rack.dsack_seen = 0; - tp->rack.last_delivered = tp->delivered; - tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH; - } else if (!tp->rack.reo_wnd_persist) { - tp->rack.reo_wnd_steps = 1; - } -} - /* RFC6582 NewReno recovery for non-SACK connection. It simply retransmits * the next unacked packet upon receiving * a) three or more DUPACKs to start the fast recovery |
