diff options
| -rw-r--r-- | include/net/inet_connection_sock.h | 5 | ||||
| -rw-r--r-- | include/net/tcp.h | 2 | ||||
| -rw-r--r-- | net/dccp/timer.c | 4 | ||||
| -rw-r--r-- | net/ipv4/tcp.c | 4 | ||||
| -rw-r--r-- | net/ipv4/tcp_input.c | 29 | ||||
| -rw-r--r-- | net/ipv4/tcp_timer.c | 2 | 
6 files changed, 37 insertions, 9 deletions
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 5d2fcc137b88..d6d9d1c1985a 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -114,7 +114,10 @@ struct inet_connection_sock {  		__u8		  quick;	 /* Scheduled number of quick acks	   */  		__u8		  pingpong;	 /* The session is interactive		   */  		__u8		  retry;	 /* Number of attempts			   */ -		__u32		  ato;		 /* Predicted tick of soft clock	   */ +		#define ATO_BITS 8 +		__u32		  ato:ATO_BITS,	 /* Predicted tick of soft clock	   */ +				  lrcv_flowlabel:20, /* last received ipv6 flowlabel	   */ +				  unused:4;  		unsigned long	  timeout;	 /* Currently scheduled timeout		   */  		__u32		  lrcvtime;	 /* timestamp of last received data packet */  		__u16		  last_seg_size; /* Size of last incoming segment	   */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 9eb0a2855311..7fdedf5c71f0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -131,6 +131,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);  #define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */  #define TCP_DELACK_MAX	((unsigned)(HZ/5))	/* maximal time to delay before sending an ACK */ +static_assert((1 << ATO_BITS) > TCP_DELACK_MAX); +  #if HZ >= 100  #define TCP_DELACK_MIN	((unsigned)(HZ/25))	/* minimal time to delay before sending an ACK */  #define TCP_ATO_MIN	((unsigned)(HZ/25)) diff --git a/net/dccp/timer.c b/net/dccp/timer.c index b3255e87cc7e..a4cfb47b60e5 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -196,8 +196,8 @@ static void dccp_delack_timer(struct timer_list *t)  	if (inet_csk_ack_scheduled(sk)) {  		if (!inet_csk_in_pingpong_mode(sk)) {  			/* Delayed ACK missed: inflate ATO. */ -			icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, -						 icsk->icsk_rto); +			icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1, +						   icsk->icsk_rto);  		} else {  			/* Delayed ACK missed: leave pingpong mode and  			 * deflate ATO. diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9a8b134d8ada..faabb5a4a378 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3756,8 +3756,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)  		info->tcpi_options |= TCPI_OPT_SYN_DATA;  	info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); -	info->tcpi_ato = jiffies_to_usecs(min(icsk->icsk_ack.ato, -					      tcp_delack_max(sk))); +	info->tcpi_ato = jiffies_to_usecs(min_t(u32, icsk->icsk_ack.ato, +						tcp_delack_max(sk)));  	info->tcpi_snd_mss = tp->mss_cache;  	info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4b8f2e74d71d..ab87f0285b72 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -778,6 +778,16 @@ new_measure:  	tp->rcvq_space.time = tp->tcp_mstamp;  } +static void tcp_save_lrcv_flowlabel(struct sock *sk, const struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) +	struct inet_connection_sock *icsk = inet_csk(sk); + +	if (skb->protocol == htons(ETH_P_IPV6)) +		icsk->icsk_ack.lrcv_flowlabel = ntohl(ip6_flowlabel(ipv6_hdr(skb))); +#endif +} +  /* There is something which you must keep in mind when you analyze the   * behavior of the tp->ato delayed ack timeout interval.  When a   * connection starts up, we want to ack as quickly as possible.  The @@ -826,6 +836,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)  		}  	}  	icsk->icsk_ack.lrcvtime = now; +	tcp_save_lrcv_flowlabel(sk, skb);  	tcp_ecn_check_ce(sk, skb); @@ -4513,12 +4524,23 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)  {  	/* When the ACK path fails or drops most ACKs, the sender would  	 * timeout and spuriously retransmit the same segment repeatedly. -	 * The receiver remembers and reflects via DSACKs. Leverage the -	 * DSACK state and change the txhash to re-route speculatively. +	 * If it seems our ACKs are not reaching the other side, +	 * based on receiving a duplicate data segment with new flowlabel +	 * (suggesting the sender suffered an RTO), and we are not already +	 * repathing due to our own RTO, then rehash the socket to repath our +	 * packets.  	 */ -	if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq && +#if IS_ENABLED(CONFIG_IPV6) +	if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss && +	    skb->protocol == htons(ETH_P_IPV6) && +	    (tcp_sk(sk)->inet_conn.icsk_ack.lrcv_flowlabel != +	     ntohl(ip6_flowlabel(ipv6_hdr(skb)))) &&  	    sk_rethink_txhash(sk))  		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH); + +	/* Save last flowlabel after a spurious retrans. */ +	tcp_save_lrcv_flowlabel(sk, skb); +#endif  }  static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) @@ -4835,6 +4857,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)  	u32 seq, end_seq;  	bool fragstolen; +	tcp_save_lrcv_flowlabel(sk, skb);  	tcp_ecn_check_ce(sk, skb);  	if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 3f61c6a70a1f..0862b73dd3b5 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -322,7 +322,7 @@ void tcp_delack_timer_handler(struct sock *sk)  	if (inet_csk_ack_scheduled(sk)) {  		if (!inet_csk_in_pingpong_mode(sk)) {  			/* Delayed ACK missed: inflate ATO. */ -			icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); +			icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1, icsk->icsk_rto);  		} else {  			/* Delayed ACK missed: leave pingpong mode and  			 * deflate ATO.  | 
