diff options
| author | Jani Nikula <jani.nikula@intel.com> | 2025-06-09 12:40:46 +0300 | 
|---|---|---|
| committer | Jani Nikula <jani.nikula@intel.com> | 2025-06-09 12:40:46 +0300 | 
| commit | 34c55367af96f62e89221444f04487440ebc6487 (patch) | |
| tree | fdb36ba67d7dea09455b55037e26043b7e051ef9 /net/ipv4/tcp_input.c | |
| parent | 7247efca0dcbc8ac6147db9200ed1549c0662465 (diff) | |
| parent | 19272b37aa4f83ca52bdf9c16d5d81bdd1354494 (diff) | |
| download | linux-34c55367af96f62e89221444f04487440ebc6487.tar.xz | |
Merge drm/drm-next into drm-intel-next
Sync to v6.16-rc1, among other things to get the fixed size GENMASK_U*()
and BIT_U*() macros.
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Diffstat (limited to 'net/ipv4/tcp_input.c')
| -rw-r--r-- | net/ipv4/tcp_input.c | 110 | 
1 files changed, 53 insertions, 57 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a35018e2d0ba..8ec92dec321a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -664,10 +664,12 @@ EXPORT_IPV6_MOD(tcp_initialize_rcv_mss);   */  static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)  { -	u32 new_sample = tp->rcv_rtt_est.rtt_us; -	long m = sample; +	u32 new_sample, old_sample = tp->rcv_rtt_est.rtt_us; +	long m = sample << 3; -	if (new_sample != 0) { +	if (old_sample == 0 || m < old_sample) { +		new_sample = m; +	} else {  		/* If we sample in larger samples in the non-timestamp  		 * case, we could grossly overestimate the RTT especially  		 * with chatty applications or bulk transfer apps which @@ -678,17 +680,12 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)  		 * else with timestamps disabled convergence takes too  		 * long.  		 */ -		if (!win_dep) { -			m -= (new_sample >> 3); -			new_sample += m; -		} else { -			m <<= 3; -			if (m < new_sample) -				new_sample = m; -		} -	} else { -		/* No previous measure. */ -		new_sample = m << 3; +		if (win_dep) +			return; +		/* Do not use this sample if receive queue is not empty. */ +		if (tp->rcv_nxt != tp->copied_seq) +			return; +		new_sample = old_sample - (old_sample >> 3) + sample;  	}  	tp->rcv_rtt_est.rtt_us = new_sample; @@ -712,7 +709,7 @@ new_measure:  	tp->rcv_rtt_est.time = tp->tcp_mstamp;  } -static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp) +static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp, u32 min_delta)  {  	u32 delta, delta_us; @@ -722,7 +719,7 @@ static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp)  	if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {  		if (!delta) -			delta = 1; +			delta = min_delta;  		delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);  		return delta_us;  	} @@ -740,13 +737,39 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,  	if (TCP_SKB_CB(skb)->end_seq -  	    TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) { -		s32 delta = tcp_rtt_tsopt_us(tp); +		s32 delta = tcp_rtt_tsopt_us(tp, 0); -		if (delta >= 0) +		if (delta > 0)  			tcp_rcv_rtt_update(tp, delta, 0);  	}  } +static void tcp_rcvbuf_grow(struct sock *sk) +{ +	const struct net *net = sock_net(sk); +	struct tcp_sock *tp = tcp_sk(sk); +	int rcvwin, rcvbuf, cap; + +	if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) || +	    (sk->sk_userlocks & SOCK_RCVBUF_LOCK)) +		return; + +	/* slow start: allow the sender to double its rate. */ +	rcvwin = tp->rcvq_space.space << 1; + +	if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) +		rcvwin += TCP_SKB_CB(tp->ooo_last_skb)->end_seq - tp->rcv_nxt; + +	cap = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]); + +	rcvbuf = min_t(u32, tcp_space_from_win(sk, rcvwin), cap); +	if (rcvbuf > sk->sk_rcvbuf) { +		WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); +		/* Make the window clamp follow along.  */ +		WRITE_ONCE(tp->window_clamp, +			   tcp_win_from_space(sk, rcvbuf)); +	} +}  /*   * This function should be called every time data is copied to user space.   * It calculates the appropriate TCP receive buffer space. @@ -754,8 +777,7 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,  void tcp_rcv_space_adjust(struct sock *sk)  {  	struct tcp_sock *tp = tcp_sk(sk); -	u32 copied; -	int time; +	int time, inq, copied;  	trace_tcp_rcv_space_adjust(sk); @@ -766,45 +788,18 @@ void tcp_rcv_space_adjust(struct sock *sk)  	/* Number of bytes copied to user in last RTT */  	copied = tp->copied_seq - tp->rcvq_space.seq; +	/* Number of bytes in receive queue. */ +	inq = tp->rcv_nxt - tp->copied_seq; +	copied -= inq;  	if (copied <= tp->rcvq_space.space)  		goto new_measure; -	/* A bit of theory : -	 * copied = bytes received in previous RTT, our base window -	 * To cope with packet losses, we need a 2x factor -	 * To cope with slow start, and sender growing its cwin by 100 % -	 * every RTT, we need a 4x factor, because the ACK we are sending -	 * now is for the next RTT, not the current one : -	 * <prev RTT . ><current RTT .. ><next RTT .... > -	 */ - -	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) && -	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { -		u64 rcvwin, grow; -		int rcvbuf; - -		/* minimal window to cope with packet losses, assuming -		 * steady state. Add some cushion because of small variations. -		 */ -		rcvwin = ((u64)copied << 1) + 16 * tp->advmss; +	trace_tcp_rcvbuf_grow(sk, time); -		/* Accommodate for sender rate increase (eg. slow start) */ -		grow = rcvwin * (copied - tp->rcvq_space.space); -		do_div(grow, tp->rcvq_space.space); -		rcvwin += (grow << 1); - -		rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin), -			       READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); -		if (rcvbuf > sk->sk_rcvbuf) { -			WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); - -			/* Make the window clamp follow along.  */ -			WRITE_ONCE(tp->window_clamp, -				   tcp_win_from_space(sk, rcvbuf)); -		} -	}  	tp->rcvq_space.space = copied; +	tcp_rcvbuf_grow(sk); +  new_measure:  	tp->rcvq_space.seq = tp->copied_seq;  	tp->rcvq_space.time = tp->tcp_mstamp; @@ -3226,7 +3221,7 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,  	 */  	if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp &&  	    tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED) -		seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us(tp); +		seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us(tp, 1);  	rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */  	if (seq_rtt_us < 0) @@ -5173,6 +5168,7 @@ end:  		skb_condense(skb);  		skb_set_owner_r(skb, sk);  	} +	tcp_rcvbuf_grow(sk);  }  static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, @@ -6873,6 +6869,9 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)  		if (!tp->srtt_us)  			tcp_synack_rtt_meas(sk, req); +		if (tp->rx_opt.tstamp_ok) +			tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; +  		if (req) {  			tcp_rcv_synrecv_state_fastopen(sk);  		} else { @@ -6898,9 +6897,6 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)  		tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;  		tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); -		if (tp->rx_opt.tstamp_ok) -			tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; -  		if (!inet_csk(sk)->icsk_ca_ops->cong_control)  			tcp_update_pacing_rate(sk);  | 
