summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_bbr.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_bbr.c')
-rw-r--r--net/ipv4/tcp_bbr.c92
1 files changed, 60 insertions, 32 deletions
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index b89bce4c721e..69ee877574d0 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -52,10 +52,9 @@
* There is a public e-mail list for discussing BBR development and testing:
* https://groups.google.com/forum/#!forum/bbr-dev
*
- * NOTE: BBR *must* be used with the fq qdisc ("man tc-fq") with pacing enabled,
- * since pacing is integral to the BBR design and implementation.
- * BBR without pacing would not function properly, and may incur unnecessary
- * high packet loss rates.
+ * NOTE: BBR might be used with the fq qdisc ("man tc-fq") with pacing enabled,
+ * otherwise TCP stack falls back to an internal pacing using one high
+ * resolution timer per TCP socket and may use more resources.
*/
#include <linux/module.h>
#include <net/tcp.h>
@@ -92,7 +91,7 @@ struct bbr {
struct minmax bw; /* Max recent delivery rate in pkts/uS << 24 */
u32 rtt_cnt; /* count of packet-timed rounds elapsed */
u32 next_rtt_delivered; /* scb->tx.delivered at end of round */
- struct skb_mstamp cycle_mstamp; /* time of this cycle phase start */
+ u64 cycle_mstamp; /* time of this cycle phase start */
u32 mode:3, /* current bbr_mode in state machine */
prev_ca_state:3, /* CA state on previous ACK */
packet_conservation:1, /* use packet conservation? */
@@ -113,7 +112,8 @@ struct bbr {
cwnd_gain:10, /* current gain for setting cwnd */
full_bw_cnt:3, /* number of rounds without large bw gains */
cycle_idx:3, /* current index in pacing_gain cycle array */
- unused_b:6;
+ has_seen_rtt:1, /* have we seen an RTT sample yet? */
+ unused_b:5;
u32 prior_cwnd; /* prior cwnd upon entering loss recovery */
u32 full_bw; /* recent bw, to estimate if pipe is full */
};
@@ -212,6 +212,35 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
return rate >> BW_SCALE;
}
+/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
+static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
+{
+ u64 rate = bw;
+
+ rate = bbr_rate_bytes_per_sec(sk, rate, gain);
+ rate = min_t(u64, rate, sk->sk_max_pacing_rate);
+ return rate;
+}
+
+/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
+static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ u64 bw;
+ u32 rtt_us;
+
+ if (tp->srtt_us) { /* any RTT sample yet? */
+ rtt_us = max(tp->srtt_us >> 3, 1U);
+ bbr->has_seen_rtt = 1;
+ } else { /* no RTT sample yet */
+ rtt_us = USEC_PER_MSEC; /* use nominal default RTT */
+ }
+ bw = (u64)tp->snd_cwnd * BW_UNIT;
+ do_div(bw, rtt_us);
+ sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
+}
+
/* Pace using current bw estimate and a gain factor. In order to help drive the
* network toward lower queues while maintaining high utilization and low
* latency, the average pacing rate aims to be slightly (~1%) lower than the
@@ -221,12 +250,13 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
*/
static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
{
+ struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- u64 rate = bw;
+ u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain);
- rate = bbr_rate_bytes_per_sec(sk, rate, gain);
- rate = min_t(u64, rate, sk->sk_max_pacing_rate);
- if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate)
+ if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
+ bbr_init_pacing_rate_from_rtt(sk);
+ if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
sk->sk_pacing_rate = rate;
}
@@ -412,7 +442,7 @@ static bool bbr_is_next_cycle_phase(struct sock *sk,
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
bool is_full_length =
- skb_mstamp_us_delta(&tp->delivered_mstamp, &bbr->cycle_mstamp) >
+ tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) >
bbr->min_rtt_us;
u32 inflight, bw;
@@ -498,7 +528,7 @@ static void bbr_reset_lt_bw_sampling_interval(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- bbr->lt_last_stamp = tp->delivered_mstamp.stamp_jiffies;
+ bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC);
bbr->lt_last_delivered = tp->delivered;
bbr->lt_last_lost = tp->lost;
bbr->lt_rtt_cnt = 0;
@@ -552,7 +582,7 @@ static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
struct bbr *bbr = inet_csk_ca(sk);
u32 lost, delivered;
u64 bw;
- s32 t;
+ u32 t;
if (bbr->lt_use_bw) { /* already using long-term rate, lt_bw? */
if (bbr->mode == BBR_PROBE_BW && bbr->round_start &&
@@ -604,15 +634,15 @@ static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
return;
/* Find average delivery rate in this sampling interval. */
- t = (s32)(tp->delivered_mstamp.stamp_jiffies - bbr->lt_last_stamp);
- if (t < 1)
- return; /* interval is less than one jiffy, so wait */
- t = jiffies_to_usecs(t);
- /* Interval long enough for jiffies_to_usecs() to return a bogus 0? */
- if (t < 1) {
+ t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp;
+ if ((s32)t < 1)
+ return; /* interval is less than one ms, so wait */
+ /* Check if can multiply without overflow */
+ if (t >= ~0U / USEC_PER_MSEC) {
bbr_reset_lt_bw_sampling(sk); /* interval too long; reset */
return;
}
+ t *= USEC_PER_MSEC;
bw = (u64)delivered * BW_UNIT;
do_div(bw, t);
bbr_lt_bw_interval_done(sk, bw);
@@ -731,12 +761,12 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
bool filter_expired;
/* Track min RTT seen in the min_rtt_win_sec filter window: */
- filter_expired = after(tcp_time_stamp,
+ filter_expired = after(tcp_jiffies32,
bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
if (rs->rtt_us >= 0 &&
(rs->rtt_us <= bbr->min_rtt_us || filter_expired)) {
bbr->min_rtt_us = rs->rtt_us;
- bbr->min_rtt_stamp = tcp_time_stamp;
+ bbr->min_rtt_stamp = tcp_jiffies32;
}
if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
@@ -755,7 +785,7 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
/* Maintain min packets in flight for max(200 ms, 1 round). */
if (!bbr->probe_rtt_done_stamp &&
tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) {
- bbr->probe_rtt_done_stamp = tcp_time_stamp +
+ bbr->probe_rtt_done_stamp = tcp_jiffies32 +
msecs_to_jiffies(bbr_probe_rtt_mode_ms);
bbr->probe_rtt_round_done = 0;
bbr->next_rtt_delivered = tp->delivered;
@@ -763,8 +793,8 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
if (bbr->round_start)
bbr->probe_rtt_round_done = 1;
if (bbr->probe_rtt_round_done &&
- after(tcp_time_stamp, bbr->probe_rtt_done_stamp)) {
- bbr->min_rtt_stamp = tcp_time_stamp;
+ after(tcp_jiffies32, bbr->probe_rtt_done_stamp)) {
+ bbr->min_rtt_stamp = tcp_jiffies32;
bbr->restore_cwnd = 1; /* snap to prior_cwnd */
bbr_reset_mode(sk);
}
@@ -799,7 +829,6 @@ static void bbr_init(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- u64 bw;
bbr->prior_cwnd = 0;
bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */
@@ -811,25 +840,24 @@ static void bbr_init(struct sock *sk)
bbr->probe_rtt_done_stamp = 0;
bbr->probe_rtt_round_done = 0;
bbr->min_rtt_us = tcp_min_rtt(tp);
- bbr->min_rtt_stamp = tcp_time_stamp;
+ bbr->min_rtt_stamp = tcp_jiffies32;
minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */
- /* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
- bw = (u64)tp->snd_cwnd * BW_UNIT;
- do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC);
- sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */
- bbr_set_pacing_rate(sk, bw, bbr_high_gain);
+ bbr->has_seen_rtt = 0;
+ bbr_init_pacing_rate_from_rtt(sk);
bbr->restore_cwnd = 0;
bbr->round_start = 0;
bbr->idle_restart = 0;
bbr->full_bw = 0;
bbr->full_bw_cnt = 0;
- bbr->cycle_mstamp.v64 = 0;
+ bbr->cycle_mstamp = 0;
bbr->cycle_idx = 0;
bbr_reset_lt_bw_sampling(sk);
bbr_reset_startup_mode(sk);
+
+ cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
}
static u32 bbr_sndbuf_expand(struct sock *sk)