summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--net/ipv4/tcp_output.c47
-rw-r--r--net/ipv4/tcp_timer.c83
2 files changed, 54 insertions, 76 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 730bc44dbad9..6527f61f59ff 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -980,7 +980,6 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb,
{
struct tcp_sock *tp = tcp_sk(sk);
- skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
if (sk->sk_pacing_status != SK_PACING_NONE) {
unsigned long rate = sk->sk_pacing_rate;
@@ -1028,7 +1027,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
BUG_ON(!skb || !tcp_skb_pcount(skb));
tp = tcp_sk(sk);
-
+ prior_wstamp = tp->tcp_wstamp_ns;
+ tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
+ skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
if (clone_it) {
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- tp->snd_una;
@@ -1045,11 +1046,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
return -ENOBUFS;
}
- prior_wstamp = tp->tcp_wstamp_ns;
- tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
-
- skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
-
inet = inet_sk(sk);
tcb = TCP_SKB_CB(skb);
memset(&opts, 0, sizeof(opts));
@@ -2937,12 +2933,16 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
+ /* To avoid taking spuriously low RTT samples based on a timestamp
+ * for a transmit that never happened, always mark EVER_RETRANS
+ */
+ TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
+
if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG))
tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB,
TCP_SKB_CB(skb)->seq, segs, err);
if (likely(!err)) {
- TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
trace_tcp_retransmit_skb(sk, skb);
} else if (err != -EBUSY) {
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs);
@@ -2963,13 +2963,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
#endif
TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
tp->retrans_out += tcp_skb_pcount(skb);
-
- /* Save stamp of the first retransmit. */
- if (!tp->retrans_stamp)
- tp->retrans_stamp = tcp_skb_timestamp(skb);
-
}
+ /* Save stamp of the first (attempted) retransmit. */
+ if (!tp->retrans_stamp)
+ tp->retrans_stamp = tcp_skb_timestamp(skb);
+
if (tp->undo_retrans < 0)
tp->undo_retrans = 0;
tp->undo_retrans += tcp_skb_pcount(skb);
@@ -3750,7 +3749,7 @@ void tcp_send_probe0(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
- unsigned long probe_max;
+ unsigned long timeout;
int err;
err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
@@ -3762,26 +3761,18 @@ void tcp_send_probe0(struct sock *sk)
return;
}
+ icsk->icsk_probes_out++;
if (err <= 0) {
if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
icsk->icsk_backoff++;
- icsk->icsk_probes_out++;
- probe_max = TCP_RTO_MAX;
+ timeout = tcp_probe0_when(sk, TCP_RTO_MAX);
} else {
/* If packet was not sent due to local congestion,
- * do not backoff and do not remember icsk_probes_out.
- * Let local senders to fight for local resources.
- *
- * Use accumulated backoff yet.
+ * Let senders fight for local resources conservatively.
*/
- if (!icsk->icsk_probes_out)
- icsk->icsk_probes_out = 1;
- probe_max = TCP_RESOURCE_PROBE_INTERVAL;
- }
- tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
- tcp_probe0_when(sk, probe_max),
- TCP_RTO_MAX,
- NULL);
+ timeout = TCP_RESOURCE_PROBE_INTERVAL;
+ }
+ tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX, NULL);
}
int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 71a29e9c0620..d7399a89469d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,28 +22,14 @@
#include <linux/gfp.h>
#include <net/tcp.h>
-static u32 tcp_retransmit_stamp(const struct sock *sk)
-{
- u32 start_ts = tcp_sk(sk)->retrans_stamp;
-
- if (unlikely(!start_ts)) {
- struct sk_buff *head = tcp_rtx_queue_head(sk);
-
- if (!head)
- return 0;
- start_ts = tcp_skb_timestamp(head);
- }
- return start_ts;
-}
-
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
u32 elapsed, start_ts;
s32 remaining;
- start_ts = tcp_retransmit_stamp(sk);
- if (!icsk->icsk_user_timeout || !start_ts)
+ start_ts = tcp_sk(sk)->retrans_stamp;
+ if (!icsk->icsk_user_timeout)
return icsk->icsk_rto;
elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
remaining = icsk->icsk_user_timeout - elapsed;
@@ -173,7 +159,20 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
}
-
+static unsigned int tcp_model_timeout(struct sock *sk,
+ unsigned int boundary,
+ unsigned int rto_base)
+{
+ unsigned int linear_backoff_thresh, timeout;
+
+ linear_backoff_thresh = ilog2(TCP_RTO_MAX / rto_base);
+ if (boundary <= linear_backoff_thresh)
+ timeout = ((2 << boundary) - 1) * rto_base;
+ else
+ timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
+ (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
+ return jiffies_to_msecs(timeout);
+}
/**
* retransmits_timed_out() - returns true if this connection has timed out
* @sk: The current socket
@@ -191,26 +190,15 @@ static bool retransmits_timed_out(struct sock *sk,
unsigned int boundary,
unsigned int timeout)
{
- const unsigned int rto_base = TCP_RTO_MIN;
- unsigned int linear_backoff_thresh, start_ts;
+ unsigned int start_ts;
if (!inet_csk(sk)->icsk_retransmits)
return false;
- start_ts = tcp_retransmit_stamp(sk);
- if (!start_ts)
- return false;
-
- if (likely(timeout == 0)) {
- linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
+ start_ts = tcp_sk(sk)->retrans_stamp;
+ if (likely(timeout == 0))
+ timeout = tcp_model_timeout(sk, boundary, TCP_RTO_MIN);
- if (boundary <= linear_backoff_thresh)
- timeout = ((2 << boundary) - 1) * rto_base;
- else
- timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
- (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
- timeout = jiffies_to_msecs(timeout);
- }
return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
}
@@ -345,7 +333,6 @@ static void tcp_probe_timer(struct sock *sk)
struct sk_buff *skb = tcp_send_head(sk);
struct tcp_sock *tp = tcp_sk(sk);
int max_probes;
- u32 start_ts;
if (tp->packets_out || !skb) {
icsk->icsk_probes_out = 0;
@@ -360,12 +347,13 @@ static void tcp_probe_timer(struct sock *sk)
* corresponding system limit. We also implement similar policy when
* we use RTO to probe window in tcp_retransmit_timer().
*/
- start_ts = tcp_skb_timestamp(skb);
- if (!start_ts)
- skb->skb_mstamp_ns = tp->tcp_clock_cache;
- else if (icsk->icsk_user_timeout &&
- (s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout)
- goto abort;
+ if (icsk->icsk_user_timeout) {
+ u32 elapsed = tcp_model_timeout(sk, icsk->icsk_probes_out,
+ tcp_probe0_base(sk));
+
+ if (elapsed >= icsk->icsk_user_timeout)
+ goto abort;
+ }
max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
if (sock_flag(sk, SOCK_DEAD)) {
@@ -395,6 +383,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
int max_retries = icsk->icsk_syn_retries ? :
sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
+ struct tcp_sock *tp = tcp_sk(sk);
struct request_sock *req;
req = tcp_sk(sk)->fastopen_rsk;
@@ -412,6 +401,8 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
inet_rtx_syn_ack(sk, req);
req->num_timeout++;
icsk->icsk_retransmits++;
+ if (!tp->retrans_stamp)
+ tp->retrans_stamp = tcp_time_stamp(tp);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
}
@@ -443,10 +434,8 @@ void tcp_retransmit_timer(struct sock *sk)
*/
return;
}
- if (!tp->packets_out)
- goto out;
-
- WARN_ON(tcp_rtx_queue_empty(sk));
+ if (!tp->packets_out || WARN_ON_ONCE(tcp_rtx_queue_empty(sk)))
+ return;
tp->tlp_high_seq = 0;
@@ -511,14 +500,13 @@ void tcp_retransmit_timer(struct sock *sk)
tcp_enter_loss(sk);
+ icsk->icsk_retransmits++;
if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) {
/* Retransmission failed because of local congestion,
- * do not backoff.
+ * Let senders fight for local resources conservatively.
*/
- if (!icsk->icsk_retransmits)
- icsk->icsk_retransmits = 1;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL),
+ TCP_RESOURCE_PROBE_INTERVAL,
TCP_RTO_MAX);
goto out;
}
@@ -539,7 +527,6 @@ void tcp_retransmit_timer(struct sock *sk)
* the 120 second clamps though!
*/
icsk->icsk_backoff++;
- icsk->icsk_retransmits++;
out_reset_timer:
/* If stream is thin, use linear timeouts. Since 'icsk_backoff' is