summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-05-17 23:06:03 +0300
committerDavid S. Miller <davem@davemloft.net>2017-05-17 23:06:03 +0300
commite26925ec03b31f5ae4c1fc544515486229334ef9 (patch)
tree6f1502edf55ecb7205660d62bd683ebcf912cfea /include
parent9d4f97f97bb8adc47f569d995402c33de9a4fa19 (diff)
parent9a568de4818dea9a05af141046bd3e589245ab83 (diff)
downloadlinux-e26925ec03b31f5ae4c1fc544515486229334ef9.tar.xz
Merge branch 'tcp-TCP-TS-option-use-1-ms-clock'
Eric Dumazet says: ==================== tcp: TCP TS option use 1 ms clock TCP Timestamps option is defined in RFC 7323 Traditionally on linux, it has been tied to the internal 'jiffy' variable, because it had been a cheap and good enough generator. Unfortunately some distros use HZ=250 or even HZ=100 leading to not very useful TCP timestamps. For TCP flows in the DC, Google has used usec resolution for more than two years with great success [1]. RCVBUF autotuning is more precise. This series converts tp->tcp_mstamp to a plain u64 value storing a 1 usec TCP clock. This choice will allow us to upstream the 1 usec TS option as discussed in IETF 97. Kathleen Nichols [2] and others advocate for 1ms TS clocks for network analysis. (1ms being the lowest value supported by RFC 7323.) [1] https://www.ietf.org/proceedings/97/slides/slides-97-tcpm-tcp-options-for-low-latency-00.pdf [2] http://netseminar.stanford.edu/seminars/02_02_17.pdf ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/skbuff.h62
-rw-r--r--include/linux/tcp.h22
-rw-r--r--include/net/tcp.h74
3 files changed, 71 insertions, 87 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bfc7892f6c33..7c0cb2ce8b01 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -506,66 +506,6 @@ typedef unsigned int sk_buff_data_t;
typedef unsigned char *sk_buff_data_t;
#endif
-/**
- * struct skb_mstamp - multi resolution time stamps
- * @stamp_us: timestamp in us resolution
- * @stamp_jiffies: timestamp in jiffies
- */
-struct skb_mstamp {
- union {
- u64 v64;
- struct {
- u32 stamp_us;
- u32 stamp_jiffies;
- };
- };
-};
-
-/**
- * skb_mstamp_get - get current timestamp
- * @cl: place to store timestamps
- */
-static inline void skb_mstamp_get(struct skb_mstamp *cl)
-{
- u64 val = local_clock();
-
- do_div(val, NSEC_PER_USEC);
- cl->stamp_us = (u32)val;
- cl->stamp_jiffies = (u32)jiffies;
-}
-
-/**
- * skb_mstamp_delta - compute the difference in usec between two skb_mstamp
- * @t1: pointer to newest sample
- * @t0: pointer to oldest sample
- */
-static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
- const struct skb_mstamp *t0)
-{
- s32 delta_us = t1->stamp_us - t0->stamp_us;
- u32 delta_jiffies = t1->stamp_jiffies - t0->stamp_jiffies;
-
- /* If delta_us is negative, this might be because interval is too big,
- * or local_clock() drift is too big : fallback using jiffies.
- */
- if (delta_us <= 0 ||
- delta_jiffies >= (INT_MAX / (USEC_PER_SEC / HZ)))
-
- delta_us = jiffies_to_usecs(delta_jiffies);
-
- return delta_us;
-}
-
-static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
- const struct skb_mstamp *t0)
-{
- s32 diff = t1->stamp_jiffies - t0->stamp_jiffies;
-
- if (!diff)
- diff = t1->stamp_us - t0->stamp_us;
- return diff > 0;
-}
-
/**
* struct sk_buff - socket buffer
* @next: Next buffer in list
@@ -646,7 +586,7 @@ struct sk_buff {
union {
ktime_t tstamp;
- struct skb_mstamp skb_mstamp;
+ u64 skb_mstamp;
};
};
struct rb_node rbnode; /* used in netem & tcp stack */
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 22854f028434..542ca1ae02c4 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -123,7 +123,7 @@ struct tcp_request_sock_ops;
struct tcp_request_sock {
struct inet_request_sock req;
const struct tcp_request_sock_ops *af_specific;
- struct skb_mstamp snt_synack; /* first SYNACK sent time */
+ u64 snt_synack; /* first SYNACK sent time */
bool tfo_listener;
u32 txhash;
u32 rcv_isn;
@@ -211,7 +211,7 @@ struct tcp_sock {
/* Information of the most recently (s)acked skb */
struct tcp_rack {
- struct skb_mstamp mstamp; /* (Re)sent time of the skb */
+ u64 mstamp; /* (Re)sent time of the skb */
u32 rtt_us; /* Associated RTT */
u32 end_seq; /* Ending TCP sequence of the skb */
u8 advanced; /* mstamp advanced since last lost marking */
@@ -240,7 +240,7 @@ struct tcp_sock {
u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */
/* RTT measurement */
- struct skb_mstamp tcp_mstamp; /* most recent packet received/sent */
+ u64 tcp_mstamp; /* most recent packet received/sent */
u32 srtt_us; /* smoothed round trip time << 3 in usecs */
u32 mdev_us; /* medium deviation */
u32 mdev_max_us; /* maximal mdev for the last rtt period */
@@ -280,8 +280,8 @@ struct tcp_sock {
u32 delivered; /* Total data packets delivered incl. rexmits */
u32 lost; /* Total data packets lost incl. rexmits */
u32 app_limited; /* limited until "delivered" reaches this val */
- struct skb_mstamp first_tx_mstamp; /* start of window send phase */
- struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */
+ u64 first_tx_mstamp; /* start of window send phase */
+ u64 delivered_mstamp; /* time we reached "delivered" */
u32 rate_delivered; /* saved rate sample: packets delivered */
u32 rate_interval_us; /* saved rate sample: time elapsed */
@@ -335,16 +335,16 @@ struct tcp_sock {
/* Receiver side RTT estimation */
struct {
- u32 rtt_us;
- u32 seq;
- struct skb_mstamp time;
+ u32 rtt_us;
+ u32 seq;
+ u64 time;
} rcv_rtt_est;
/* Receiver queue space */
struct {
- int space;
- u32 seq;
- struct skb_mstamp time;
+ int space;
+ u32 seq;
+ u64 time;
} rcvq_space;
/* TCP-specific MTU probe information. */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b4dc93dae98c..82462db97183 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -519,7 +519,7 @@ static inline u32 tcp_cookie_time(void)
u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
u16 *mssp);
__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss);
-__u32 cookie_init_timestamp(struct request_sock *req);
+u64 cookie_init_timestamp(struct request_sock *req);
bool cookie_timestamp_decode(struct tcp_options_received *opt);
bool cookie_ecn_ok(const struct tcp_options_received *opt,
const struct net *net, const struct dst_entry *dst);
@@ -700,17 +700,61 @@ u32 __tcp_select_window(struct sock *sk);
void tcp_send_window_probe(struct sock *sk);
-/* TCP timestamps are only 32-bits, this causes a slight
- * complication on 64-bit systems since we store a snapshot
- * of jiffies in the buffer control blocks below. We decided
- * to use only the low 32-bits of jiffies and hide the ugly
- * casts with the following macro.
+/* TCP uses 32bit jiffies to save some space.
+ * Note that this is different from tcp_time_stamp, which
+ * historically has been the same until linux-4.13.
*/
-#define tcp_time_stamp ((__u32)(jiffies))
+#define tcp_jiffies32 ((u32)jiffies)
+
+/*
+ * Deliver a 32bit value for TCP timestamp option (RFC 7323)
+ * It is no longer tied to jiffies, but to 1 ms clock.
+ * Note: double check if you want to use tcp_jiffies32 instead of this.
+ */
+#define TCP_TS_HZ 1000
+
+static inline u64 tcp_clock_ns(void)
+{
+ return local_clock();
+}
+
+static inline u64 tcp_clock_us(void)
+{
+ return div_u64(tcp_clock_ns(), NSEC_PER_USEC);
+}
+
+/* This should only be used in contexts where tp->tcp_mstamp is up to date */
+static inline u32 tcp_time_stamp(const struct tcp_sock *tp)
+{
+ return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ);
+}
+
+/* Could use tcp_clock_us() / 1000, but this version uses a single divide */
+static inline u32 tcp_time_stamp_raw(void)
+{
+ return div_u64(tcp_clock_ns(), NSEC_PER_SEC / TCP_TS_HZ);
+}
+
+
+/* Refresh 1us clock of a TCP socket,
+ * ensuring monotically increasing values.
+ */
+static inline void tcp_mstamp_refresh(struct tcp_sock *tp)
+{
+ u64 val = tcp_clock_us();
+
+ if (val > tp->tcp_mstamp)
+ tp->tcp_mstamp = val;
+}
+
+static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
+{
+ return max_t(s64, t1 - t0, 0);
+}
static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
{
- return skb->skb_mstamp.stamp_jiffies;
+ return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ);
}
@@ -775,9 +819,9 @@ struct tcp_skb_cb {
/* pkts S/ACKed so far upon tx of skb, incl retrans: */
__u32 delivered;
/* start of send pipeline phase */
- struct skb_mstamp first_tx_mstamp;
+ u64 first_tx_mstamp;
/* when we reached the "delivered" count */
- struct skb_mstamp delivered_mstamp;
+ u64 delivered_mstamp;
} tx; /* only used for outgoing skbs */
union {
struct inet_skb_parm h4;
@@ -893,7 +937,7 @@ struct ack_sample {
* A sample is invalid if "delivered" or "interval_us" is negative.
*/
struct rate_sample {
- struct skb_mstamp prior_mstamp; /* starting timestamp for interval */
+ u64 prior_mstamp; /* starting timestamp for interval */
u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
s32 delivered; /* number of packets delivered over interval */
long interval_us; /* time for tp->delivered to incr "delivered" */
@@ -1242,7 +1286,7 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk)
if (!sysctl_tcp_slow_start_after_idle || tp->packets_out ||
ca_ops->cong_control)
return;
- delta = tcp_time_stamp - tp->lsndtime;
+ delta = tcp_jiffies32 - tp->lsndtime;
if (delta > inet_csk(sk)->icsk_rto)
tcp_cwnd_restart(sk, delta);
}
@@ -1304,8 +1348,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
{
const struct inet_connection_sock *icsk = &tp->inet_conn;
- return min_t(u32, tcp_time_stamp - icsk->icsk_ack.lrcvtime,
- tcp_time_stamp - tp->rcv_tstamp);
+ return min_t(u32, tcp_jiffies32 - icsk->icsk_ack.lrcvtime,
+ tcp_jiffies32 - tp->rcv_tstamp);
}
static inline int tcp_fin_time(const struct sock *sk)
@@ -1859,7 +1903,7 @@ void tcp_init(void);
/* tcp_recovery.c */
extern void tcp_rack_mark_lost(struct sock *sk);
extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
- const struct skb_mstamp *xmit_time);
+ u64 xmit_time);
extern void tcp_rack_reo_timeout(struct sock *sk);
/*