summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c135
1 files changed, 76 insertions, 59 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d93a5a89c569..d176e7888a20 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -419,6 +419,20 @@ static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr
return false;
}
+static void tcp_count_delivered_ce(struct tcp_sock *tp, u32 ecn_count)
+{
+ tp->delivered_ce += ecn_count;
+}
+
+/* Updates the delivered and delivered_ce counts */
+static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
+ bool ece_ack)
+{
+ tp->delivered += delivered;
+ if (ece_ack)
+ tcp_count_delivered_ce(tp, delivered);
+}
+
/* Buffer size and advertised window tuning.
*
* 1. Tuning sk->sk_sndbuf, when connection enters established state.
@@ -651,10 +665,12 @@ EXPORT_SYMBOL(tcp_initialize_rcv_mss);
*/
static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
{
- u32 new_sample = tp->rcv_rtt_est.rtt_us;
- long m = sample;
+ u32 new_sample, old_sample = tp->rcv_rtt_est.rtt_us;
+ long m = sample << 3;
- if (new_sample != 0) {
+ if (old_sample == 0 || m < old_sample) {
+ new_sample = m;
+ } else {
/* If we sample in larger samples in the non-timestamp
* case, we could grossly overestimate the RTT especially
* with chatty applications or bulk transfer apps which
@@ -665,17 +681,9 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
* else with timestamps disabled convergence takes too
* long.
*/
- if (!win_dep) {
- m -= (new_sample >> 3);
- new_sample += m;
- } else {
- m <<= 3;
- if (m < new_sample)
- new_sample = m;
- }
- } else {
- /* No previous measure. */
- new_sample = m << 3;
+ if (win_dep)
+ return;
+ new_sample = old_sample - (old_sample >> 3) + sample;
}
tp->rcv_rtt_est.rtt_us = new_sample;
@@ -699,7 +707,7 @@ new_measure:
tp->rcv_rtt_est.time = tp->tcp_mstamp;
}
-static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp)
+static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp, u32 min_delta)
{
u32 delta, delta_us;
@@ -709,7 +717,7 @@ static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp)
if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
if (!delta)
- delta = 1;
+ delta = min_delta;
delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
return delta_us;
}
@@ -727,9 +735,9 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
if (TCP_SKB_CB(skb)->end_seq -
TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
- s32 delta = tcp_rtt_tsopt_us(tp);
+ s32 delta = tcp_rtt_tsopt_us(tp, 0);
- if (delta >= 0)
+ if (delta > 0)
tcp_rcv_rtt_update(tp, delta, 0);
}
}
@@ -741,8 +749,7 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
void tcp_rcv_space_adjust(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- u32 copied;
- int time;
+ int time, inq, copied;
trace_tcp_rcv_space_adjust(sk);
@@ -753,6 +760,9 @@ void tcp_rcv_space_adjust(struct sock *sk)
/* Number of bytes copied to user in last RTT */
copied = tp->copied_seq - tp->rcvq_space.seq;
+ /* Number of bytes in receive queue. */
+ inq = tp->rcv_nxt - tp->copied_seq;
+ copied -= inq;
if (copied <= tp->rcvq_space.space)
goto new_measure;
@@ -1154,15 +1164,6 @@ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
}
}
-/* Updates the delivered and delivered_ce counts */
-static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
- bool ece_ack)
-{
- tp->delivered += delivered;
- if (ece_ack)
- tp->delivered_ce += delivered;
-}
-
/* This procedure tags the retransmission queue when SACKs arrive.
*
* We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
@@ -2481,20 +2482,33 @@ static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
{
const struct sock *sk = (const struct sock *)tp;
- if (tp->retrans_stamp &&
- tcp_tsopt_ecr_before(tp, tp->retrans_stamp))
- return true; /* got echoed TS before first retransmission */
+ /* Received an echoed timestamp before the first retransmission? */
+ if (tp->retrans_stamp)
+ return tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
+
+ /* We set tp->retrans_stamp upon the first retransmission of a loss
+ * recovery episode, so normally if tp->retrans_stamp is 0 then no
+ * retransmission has happened yet (likely due to TSQ, which can cause
+ * fast retransmits to be delayed). So if snd_una advanced while
+ * (tp->retrans_stamp is 0 then apparently a packet was merely delayed,
+ * not lost. But there are exceptions where we retransmit but then
+ * clear tp->retrans_stamp, so we check for those exceptions.
+ */
+
+ /* (1) For non-SACK connections, tcp_is_non_sack_preventing_reopen()
+ * clears tp->retrans_stamp when snd_una == high_seq.
+ */
+ if (!tcp_is_sack(tp) && !before(tp->snd_una, tp->high_seq))
+ return false;
- /* Check if nothing was retransmitted (retrans_stamp==0), which may
- * happen in fast recovery due to TSQ. But we ignore zero retrans_stamp
- * in TCP_SYN_SENT, since when we set FLAG_SYN_ACKED we also clear
- * retrans_stamp even if we had retransmitted the SYN.
+ /* (2) In TCP_SYN_SENT tcp_clean_rtx_queue() clears tp->retrans_stamp
+ * when setting FLAG_SYN_ACKED is set, even if the SYN was
+ * retransmitted.
*/
- if (!tp->retrans_stamp && /* no record of a retransmit/SYN? */
- sk->sk_state != TCP_SYN_SENT) /* not the FLAG_SYN_ACKED case? */
- return true; /* nothing was retransmitted */
+ if (sk->sk_state == TCP_SYN_SENT)
+ return false;
- return false;
+ return true; /* tp->retrans_stamp is zero; no retransmit yet */
}
/* Undo procedures. */
@@ -3221,7 +3235,7 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
*/
if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp &&
tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED)
- seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us(tp);
+ seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us(tp, 1);
rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
if (seq_rtt_us < 0)
@@ -3862,12 +3876,23 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
}
}
-static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
+static void tcp_in_ack_event(struct sock *sk, int flag)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- if (icsk->icsk_ca_ops->in_ack_event)
- icsk->icsk_ca_ops->in_ack_event(sk, flags);
+ if (icsk->icsk_ca_ops->in_ack_event) {
+ u32 ack_ev_flags = 0;
+
+ if (flag & FLAG_WIN_UPDATE)
+ ack_ev_flags |= CA_ACK_WIN_UPDATE;
+ if (flag & FLAG_SLOWPATH) {
+ ack_ev_flags |= CA_ACK_SLOWPATH;
+ if (flag & FLAG_ECE)
+ ack_ev_flags |= CA_ACK_ECE;
+ }
+
+ icsk->icsk_ca_ops->in_ack_event(sk, ack_ev_flags);
+ }
}
/* Congestion control has updated the cwnd already. So if we're in
@@ -3984,12 +4009,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_snd_una_update(tp, ack);
flag |= FLAG_WIN_UPDATE;
- tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
-
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
} else {
- u32 ack_ev_flags = CA_ACK_SLOWPATH;
-
if (ack_seq != TCP_SKB_CB(skb)->end_seq)
flag |= FLAG_DATA;
else
@@ -4001,19 +4022,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_state);
- if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
+ if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb)))
flag |= FLAG_ECE;
- ack_ev_flags |= CA_ACK_ECE;
- }
if (sack_state.sack_delivered)
tcp_count_delivered(tp, sack_state.sack_delivered,
flag & FLAG_ECE);
-
- if (flag & FLAG_WIN_UPDATE)
- ack_ev_flags |= CA_ACK_WIN_UPDATE;
-
- tcp_in_ack_event(sk, ack_ev_flags);
}
/* This is a deviation from RFC3168 since it states that:
@@ -4040,6 +4054,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_rack_update_reo_wnd(sk, &rs);
+ tcp_in_ack_event(sk, flag);
+
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
@@ -4071,6 +4087,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
return 1;
no_queue:
+ tcp_in_ack_event(sk, flag);
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK) {
tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
@@ -6833,6 +6850,9 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (!tp->srtt_us)
tcp_synack_rtt_meas(sk, req);
+ if (tp->rx_opt.tstamp_ok)
+ tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
+
if (req) {
tcp_rcv_synrecv_state_fastopen(sk);
} else {
@@ -6858,9 +6878,6 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
- if (tp->rx_opt.tstamp_ok)
- tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
-
if (!inet_csk(sk)->icsk_ca_ops->cong_control)
tcp_update_pacing_rate(sk);