summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c85
1 files changed, 55 insertions, 30 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9c34b97d365d..730bc44dbad9 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -180,10 +180,10 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
{
struct tcp_sock *tp = tcp_sk(sk);
- if (unlikely(tp->compressed_ack)) {
+ if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
- tp->compressed_ack);
- tp->compressed_ack = 0;
+ tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+ tp->compressed_ack = TCP_FASTRETRANS_THRESH;
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
__sock_put(sk);
}
@@ -233,16 +233,14 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
if (init_rcv_wnd)
*rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
- (*rcv_wscale) = 0;
+ *rcv_wscale = 0;
if (wscale_ok) {
/* Set window scaling on max possible window */
space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp);
- while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
- space >>= 1;
- (*rcv_wscale)++;
- }
+ *rcv_wscale = clamp_t(int, ilog2(space) - 15,
+ 0, TCP_MAX_WSCALE);
}
/* Set the clamp no higher than max representable value */
(*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
@@ -596,7 +594,8 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
*md5 = NULL;
#ifdef CONFIG_TCP_MD5SIG
- if (unlikely(rcu_access_pointer(tp->md5sig_info))) {
+ if (static_key_false(&tcp_md5_needed) &&
+ rcu_access_pointer(tp->md5sig_info)) {
*md5 = tp->af_specific->md5_lookup(sk, sk);
if (*md5) {
opts->options |= OPTION_MD5;
@@ -732,7 +731,8 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
*md5 = NULL;
#ifdef CONFIG_TCP_MD5SIG
- if (unlikely(rcu_access_pointer(tp->md5sig_info))) {
+ if (static_key_false(&tcp_md5_needed) &&
+ rcu_access_pointer(tp->md5sig_info)) {
*md5 = tp->af_specific->md5_lookup(sk, sk);
if (*md5) {
opts->options |= OPTION_MD5;
@@ -1904,24 +1904,27 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
* This algorithm is from John Heffner.
*/
static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
- bool *is_cwnd_limited, u32 max_segs)
+ bool *is_cwnd_limited,
+ bool *is_rwnd_limited,
+ u32 max_segs)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- u32 age, send_win, cong_win, limit, in_flight;
+ u32 send_win, cong_win, limit, in_flight;
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *head;
int win_divisor;
-
- if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
- goto send_now;
+ s64 delta;
if (icsk->icsk_ca_state >= TCP_CA_Recovery)
goto send_now;
/* Avoid bursty behavior by allowing defer
- * only if the last write was recent.
+ * only if the last write was recent (1 ms).
+ * Note that tp->tcp_wstamp_ns can be in the future if we have
+ * packets waiting in a qdisc or device for EDT delivery.
*/
- if ((s32)(tcp_jiffies32 - tp->lsndtime) > 0)
+ delta = tp->tcp_clock_cache - tp->tcp_wstamp_ns - NSEC_PER_MSEC;
+ if (delta > 0)
goto send_now;
in_flight = tcp_packets_in_flight(tp);
@@ -1968,15 +1971,33 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
head = tcp_rtx_queue_head(sk);
if (!head)
goto send_now;
- age = tcp_stamp_us_delta(tp->tcp_mstamp, tcp_skb_timestamp_us(head));
+ delta = tp->tcp_clock_cache - head->tstamp;
/* If next ACK is likely to come too late (half srtt), do not defer */
- if (age < (tp->srtt_us >> 4))
+ if ((s64)(delta - (u64)NSEC_PER_USEC * (tp->srtt_us >> 4)) < 0)
goto send_now;
- /* Ok, it looks like it is advisable to defer. */
+ /* Ok, it looks like it is advisable to defer.
+ * Three cases are tracked :
+ * 1) We are cwnd-limited
+ * 2) We are rwnd-limited
+ * 3) We are application limited.
+ */
+ if (cong_win < send_win) {
+ if (cong_win <= skb->len) {
+ *is_cwnd_limited = true;
+ return true;
+ }
+ } else {
+ if (send_win <= skb->len) {
+ *is_rwnd_limited = true;
+ return true;
+ }
+ }
- if (cong_win < send_win && cong_win <= skb->len)
- *is_cwnd_limited = true;
+ /* If this packet won't get more data, do not wait. */
+ if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) ||
+ TCP_SKB_CB(skb)->eor)
+ goto send_now;
return true;
@@ -2212,8 +2233,9 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
limit = max_t(unsigned long,
2 * skb->truesize,
sk->sk_pacing_rate >> sk->sk_pacing_shift);
- limit = min_t(unsigned long, limit,
- sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
+ if (sk->sk_pacing_status == SK_PACING_NONE)
+ limit = min_t(unsigned long, limit,
+ sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
limit <<= factor;
if (refcount_read(&sk->sk_wmem_alloc) > limit) {
@@ -2356,7 +2378,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
} else {
if (!push_one &&
tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
- max_segs))
+ &is_rwnd_limited, max_segs))
break;
}
@@ -2494,15 +2516,18 @@ void tcp_send_loss_probe(struct sock *sk)
goto rearm_timer;
}
skb = skb_rb_last(&sk->tcp_rtx_queue);
+ if (unlikely(!skb)) {
+ WARN_ONCE(tp->packets_out,
+ "invalid inflight: %u state %u cwnd %u mss %d\n",
+ tp->packets_out, sk->sk_state, tp->snd_cwnd, mss);
+ inet_csk(sk)->icsk_pending = 0;
+ return;
+ }
/* At most one outstanding TLP retransmission. */
if (tp->tlp_high_seq)
goto rearm_timer;
- /* Retransmit last segment. */
- if (WARN_ON(!skb))
- goto rearm_timer;
-
if (skb_still_in_host_queue(sk, skb))
goto rearm_timer;
@@ -2920,7 +2945,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
trace_tcp_retransmit_skb(sk, skb);
} else if (err != -EBUSY) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs);
}
return err;
}