diff options
Diffstat (limited to 'net/ipv4/tcp_metrics.c')
| -rw-r--r-- | net/ipv4/tcp_metrics.c | 83 | 
1 files changed, 49 insertions, 34 deletions
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index d547075d8300..dcaf72f10216 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -33,6 +33,11 @@ struct tcp_fastopen_metrics {  	struct	tcp_fastopen_cookie	cookie;  }; +/* TCP_METRIC_MAX includes 2 extra fields for userspace compatibility + * Kernel only stores RTT and RTTVAR in usec resolution + */ +#define TCP_METRIC_MAX_KERNEL (TCP_METRIC_MAX - 2) +  struct tcp_metrics_block {  	struct tcp_metrics_block __rcu	*tcpm_next;  	struct inetpeer_addr		tcpm_saddr; @@ -41,7 +46,7 @@ struct tcp_metrics_block {  	u32				tcpm_ts;  	u32				tcpm_ts_stamp;  	u32				tcpm_lock; -	u32				tcpm_vals[TCP_METRIC_MAX + 1]; +	u32				tcpm_vals[TCP_METRIC_MAX_KERNEL + 1];  	struct tcp_fastopen_metrics	tcpm_fastopen;  	struct rcu_head			rcu_head; @@ -59,12 +64,6 @@ static u32 tcp_metric_get(struct tcp_metrics_block *tm,  	return tm->tcpm_vals[idx];  } -static u32 tcp_metric_get_jiffies(struct tcp_metrics_block *tm, -				  enum tcp_metric_index idx) -{ -	return msecs_to_jiffies(tm->tcpm_vals[idx]); -} -  static void tcp_metric_set(struct tcp_metrics_block *tm,  			   enum tcp_metric_index idx,  			   u32 val) @@ -72,13 +71,6 @@ static void tcp_metric_set(struct tcp_metrics_block *tm,  	tm->tcpm_vals[idx] = val;  } -static void tcp_metric_set_msecs(struct tcp_metrics_block *tm, -				 enum tcp_metric_index idx, -				 u32 val) -{ -	tm->tcpm_vals[idx] = jiffies_to_msecs(val); -} -  static bool addr_same(const struct inetpeer_addr *a,  		      const struct inetpeer_addr *b)  { @@ -101,9 +93,11 @@ struct tcpm_hash_bucket {  static DEFINE_SPINLOCK(tcp_metrics_lock); -static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst, +static void tcpm_suck_dst(struct tcp_metrics_block *tm, +			  const struct dst_entry *dst,  			  bool fastopen_clear)  { +	u32 msval;  	u32 val;  	tm->tcpm_stamp = jiffies; @@ -121,8 +115,11 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst,  		val |= 1 << TCP_METRIC_REORDERING;  	tm->tcpm_lock = val; -	tm->tcpm_vals[TCP_METRIC_RTT] = dst_metric_raw(dst, RTAX_RTT); -	tm->tcpm_vals[TCP_METRIC_RTTVAR] = dst_metric_raw(dst, RTAX_RTTVAR); +	msval = dst_metric_raw(dst, RTAX_RTT); +	tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC; + +	msval = dst_metric_raw(dst, RTAX_RTTVAR); +	tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC;  	tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);  	tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);  	tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); @@ -384,7 +381,7 @@ void tcp_update_metrics(struct sock *sk)  		dst_confirm(dst);  	rcu_read_lock(); -	if (icsk->icsk_backoff || !tp->srtt) { +	if (icsk->icsk_backoff || !tp->srtt_us) {  		/* This session failed to estimate rtt. Why?  		 * Probably, no packets returned in time.  Reset our  		 * results. @@ -399,8 +396,8 @@ void tcp_update_metrics(struct sock *sk)  	if (!tm)  		goto out_unlock; -	rtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); -	m = rtt - tp->srtt; +	rtt = tcp_metric_get(tm, TCP_METRIC_RTT); +	m = rtt - tp->srtt_us;  	/* If newly calculated rtt larger than stored one, store new  	 * one. Otherwise, use EWMA. Remember, rtt overestimation is @@ -408,10 +405,10 @@ void tcp_update_metrics(struct sock *sk)  	 */  	if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) {  		if (m <= 0) -			rtt = tp->srtt; +			rtt = tp->srtt_us;  		else  			rtt -= (m >> 3); -		tcp_metric_set_msecs(tm, TCP_METRIC_RTT, rtt); +		tcp_metric_set(tm, TCP_METRIC_RTT, rtt);  	}  	if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) { @@ -422,16 +419,16 @@ void tcp_update_metrics(struct sock *sk)  		/* Scale deviation to rttvar fixed point */  		m >>= 1; -		if (m < tp->mdev) -			m = tp->mdev; +		if (m < tp->mdev_us) +			m = tp->mdev_us; -		var = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR); +		var = tcp_metric_get(tm, TCP_METRIC_RTTVAR);  		if (m >= var)  			var = m;  		else  			var -= (var - m) >> 2; -		tcp_metric_set_msecs(tm, TCP_METRIC_RTTVAR, var); +		tcp_metric_set(tm, TCP_METRIC_RTTVAR, var);  	}  	if (tcp_in_initial_slowstart(tp)) { @@ -528,7 +525,7 @@ void tcp_init_metrics(struct sock *sk)  		tp->reordering = val;  	} -	crtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); +	crtt = tcp_metric_get(tm, TCP_METRIC_RTT);  	rcu_read_unlock();  reset:  	/* The initial RTT measurement from the SYN/SYN-ACK is not ideal @@ -551,18 +548,20 @@ reset:  	 * to low value, and then abruptly stops to do it and starts to delay  	 * ACKs, wait for troubles.  	 */ -	if (crtt > tp->srtt) { +	if (crtt > tp->srtt_us) {  		/* Set RTO like tcp_rtt_estimator(), but from cached RTT. */ -		crtt >>= 3; +		crtt /= 8 * USEC_PER_MSEC;  		inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk)); -	} else if (tp->srtt == 0) { +	} else if (tp->srtt_us == 0) {  		/* RFC6298: 5.7 We've failed to get a valid RTT sample from  		 * 3WHS. This is most likely due to retransmission,  		 * including spurious one. Reset the RTO back to 3secs  		 * from the more aggressive 1sec to avoid more spurious  		 * retransmission.  		 */ -		tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; +		tp->rttvar_us = jiffies_to_usecs(TCP_TIMEOUT_FALLBACK); +		tp->mdev_us = tp->mdev_max_us = tp->rttvar_us; +  		inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;  	}  	/* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been @@ -809,10 +808,26 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,  		nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS);  		if (!nest)  			goto nla_put_failure; -		for (i = 0; i < TCP_METRIC_MAX + 1; i++) { -			if (!tm->tcpm_vals[i]) +		for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) { +			u32 val = tm->tcpm_vals[i]; + +			if (!val)  				continue; -			if (nla_put_u32(msg, i + 1, tm->tcpm_vals[i]) < 0) +			if (i == TCP_METRIC_RTT) { +				if (nla_put_u32(msg, TCP_METRIC_RTT_US + 1, +						val) < 0) +					goto nla_put_failure; +				n++; +				val = max(val / 1000, 1U); +			} +			if (i == TCP_METRIC_RTTVAR) { +				if (nla_put_u32(msg, TCP_METRIC_RTTVAR_US + 1, +						val) < 0) +					goto nla_put_failure; +				n++; +				val = max(val / 1000, 1U); +			} +			if (nla_put_u32(msg, i + 1, val) < 0)  				goto nla_put_failure;  			n++;  		}  | 
