diff options
| author | David S. Miller <davem@davemloft.net> | 2016-11-30 18:04:32 +0300 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2016-11-30 18:04:32 +0300 |
| commit | 6d5274eba48539bf4fcea3fa85d8789e4a6b3425 (patch) | |
| tree | 4878c60feb2769477239b63194ce5946129bcd75 /include | |
| parent | a090994980a15f8cc14fc188b5929bd61d2ae9c3 (diff) | |
| parent | 1c885808e45601b2b6f68b30ac1d999e10b6f606 (diff) | |
| download | linux-6d5274eba48539bf4fcea3fa85d8789e4a6b3425.tar.xz | |
Merge branch 'tcp-sender-chronographs'
Yuchung Cheng says:
====================
tcp: sender chronographs instrumentation
This patch set provides instrumentation on TCP sender limitations.
While developing the BBR congestion control, we noticed that TCP
sending process is often limited by factors unrelated to congestion
control: insufficient sender buffer and/or insufficient receive
window/buffer to saturate the network bandwidth. Unfortunately these
limits are not visible to the users and often the poor performance
is attributed to the congestion control of choice.
Thie patch aims to help users get the high level understanding of
where sending process is limited by, similar to the TCP_INFO design.
It is not to replace detailed kernel tracing and instrumentation
facilities.
In addition this patch set provide a new option to the timestamping
work to instrument these limits on application data unit. For exampe,
one can use SO_TIMESTAMPING and this patch set to measure the how
long a particular HTTP response is limited by small receive window.
Patch set was initially written by Francis Yan then polished
by Yuchung Cheng, with lots of help from Eric Dumazet and Soheil
Hassas Yeganeh.
====================
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/tcp.h | 9 | ||||
| -rw-r--r-- | include/net/tcp.h | 20 | ||||
| -rw-r--r-- | include/uapi/asm-generic/socket.h | 2 | ||||
| -rw-r--r-- | include/uapi/linux/net_tstamp.h | 3 | ||||
| -rw-r--r-- | include/uapi/linux/tcp.h | 12 |
5 files changed, 42 insertions, 4 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 32a7c7e35b71..00e0ee8f001f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -211,8 +211,11 @@ struct tcp_sock { u8 reord; /* reordering detected */ } rack; u16 advmss; /* Advertised MSS */ - u8 rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ - unused:7; + u32 chrono_start; /* Start time in jiffies of a TCP chrono */ + u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ + u8 chrono_type:2, /* current chronograph type */ + rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ + unused:5; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ thin_dupack : 1,/* Fast retransmit on first dupack */ @@ -425,4 +428,6 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp) tp->saved_syn = NULL; } +struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk); + #endif /* _LINUX_TCP_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 7de80739adab..3e097e39d4d2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1516,11 +1516,26 @@ struct tcp_fastopen_context { struct rcu_head rcu; }; +/* Latencies incurred by various limits for a sender. They are + * chronograph-like stats that are mutually exclusive. + */ +enum tcp_chrono { + TCP_CHRONO_UNSPEC, + TCP_CHRONO_BUSY, /* Actively sending data (non-empty write queue) */ + TCP_CHRONO_RWND_LIMITED, /* Stalled by insufficient receive window */ + TCP_CHRONO_SNDBUF_LIMITED, /* Stalled by insufficient send buffer */ + __TCP_CHRONO_MAX, +}; + +void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type); +void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type); + /* write queue abstraction */ static inline void tcp_write_queue_purge(struct sock *sk) { struct sk_buff *skb; + tcp_chrono_stop(sk, TCP_CHRONO_BUSY); while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) sk_wmem_free_skb(sk, skb); sk_mem_reclaim(sk); @@ -1579,8 +1594,10 @@ static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff * static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) { - if (sk->sk_send_head == skb_unlinked) + if (sk->sk_send_head == skb_unlinked) { sk->sk_send_head = NULL; + tcp_chrono_stop(sk, TCP_CHRONO_BUSY); + } if (tcp_sk(sk)->highest_sack == skb_unlinked) tcp_sk(sk)->highest_sack = NULL; } @@ -1602,6 +1619,7 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb /* Queue it, remembering where we must start sending. */ if (sk->sk_send_head == NULL) { sk->sk_send_head = skb; + tcp_chrono_start(sk, TCP_CHRONO_BUSY); if (tcp_sk(sk)->highest_sack == NULL) tcp_sk(sk)->highest_sack = skb; diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 67d632f1743d..2c748ddad5f8 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -92,4 +92,6 @@ #define SO_CNX_ADVICE 53 +#define SCM_TIMESTAMPING_OPT_STATS 54 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 264e515de16f..464dcca5ed68 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -25,8 +25,9 @@ enum { SOF_TIMESTAMPING_TX_ACK = (1<<9), SOF_TIMESTAMPING_OPT_CMSG = (1<<10), SOF_TIMESTAMPING_OPT_TSONLY = (1<<11), + SOF_TIMESTAMPING_OPT_STATS = (1<<12), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TSONLY, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_STATS, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 73ac0db487f8..c53de2691cec 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -214,6 +214,18 @@ struct tcp_info { __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */ __u64 tcpi_delivery_rate; + + __u64 tcpi_busy_time; /* Time (usec) busy sending data */ + __u64 tcpi_rwnd_limited; /* Time (usec) limited by receive window */ + __u64 tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */ +}; + +/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ +enum { + TCP_NLA_PAD, + TCP_NLA_BUSY, /* Time (usec) busy sending data */ + TCP_NLA_RWND_LIMITED, /* Time (usec) limited by receive window */ + TCP_NLA_SNDBUF_LIMITED, /* Time (usec) limited by send buffer */ }; /* for TCP_MD5SIG socket option */ |
