summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ip-sysctl.rst8
-rw-r--r--include/linux/tcp.h1
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--net/ipv4/sysctl_net_ipv4.c7
-rw-r--r--net/ipv4/tcp_input.c51
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/tcp_timer.c8
8 files changed, 68 insertions, 15 deletions
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 3266aee9e052..50b440d29a13 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -651,6 +651,14 @@ tcp_comp_sack_delay_ns - LONG INTEGER
Default : 1,000,000 ns (1 ms)
+tcp_comp_sack_slack_ns - LONG INTEGER
+ This sysctl control the slack used when arming the
+ timer used by SACK compression. This gives extra time
+ for small RTT flows, and reduces system overhead by allowing
+ opportunistic reduction of timer interrupts.
+
+ Default : 100,000 ns (100 us)
+
tcp_comp_sack_nr - INTEGER
Max number of SACK that can be compressed.
Using 0 disables SACK compression.
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 421c99c12291..2c6f87e9f0cf 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -268,6 +268,7 @@ struct tcp_sock {
} rack;
u16 advmss; /* Advertised MSS */
u8 compressed_ack;
+ u8 dup_ack_counter;
u32 chrono_start; /* Start time in jiffies of a TCP chrono */
u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
u8 chrono_type:2, /* current chronograph type */
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 5acdb4d414c4..9e36738c1fe1 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -173,6 +173,7 @@ struct netns_ipv4 {
int sysctl_tcp_rmem[3];
int sysctl_tcp_comp_sack_nr;
unsigned long sysctl_tcp_comp_sack_delay_ns;
+ unsigned long sysctl_tcp_comp_sack_slack_ns;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 95ad71e76cc3..3a628423d27b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1330,6 +1330,13 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_doulongvec_minmax,
},
{
+ .procname = "tcp_comp_sack_slack_ns",
+ .data = &init_net.ipv4.sysctl_tcp_comp_sack_slack_ns,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
.procname = "tcp_comp_sack_nr",
.data = &init_net.ipv4.sysctl_tcp_comp_sack_nr,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bf4ced9273e8..d68128a672ab 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4327,6 +4327,33 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
}
}
+static void tcp_sack_compress_send_ack(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (!tp->compressed_ack)
+ return;
+
+ if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
+ __sock_put(sk);
+
+ /* Since we have to send one ack finally,
+ * substract one from tp->compressed_ack to keep
+ * LINUX_MIB_TCPACKCOMPRESSED accurate.
+ */
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
+ tp->compressed_ack - 1);
+
+ tp->compressed_ack = 0;
+ tcp_send_ack(sk);
+}
+
+/* Reasonable amount of sack blocks included in TCP SACK option
+ * The max is 4, but this becomes 3 if TCP timestamps are there.
+ * Given that SACK packets might be lost, be conservative and use 2.
+ */
+#define TCP_SACK_BLOCKS_EXPECTED 2
+
static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -4339,6 +4366,8 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
if (tcp_sack_extend(sp, seq, end_seq)) {
+ if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
+ tcp_sack_compress_send_ack(sk);
/* Rotate this_sack to the first one. */
for (; this_sack > 0; this_sack--, sp--)
swap(*sp, *(sp - 1));
@@ -4348,6 +4377,9 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
}
}
+ if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
+ tcp_sack_compress_send_ack(sk);
+
/* Could not find an adjacent existing SACK, build a new one,
* put it at the front, and shift everyone else down. We
* always know there is at least one SACK present already here.
@@ -4355,8 +4387,6 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
* If the sack array is full, forget about the last one.
*/
if (this_sack >= TCP_NUM_SACKS) {
- if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
- tcp_send_ack(sk);
this_sack--;
tp->rx_opt.num_sacks--;
sp--;
@@ -5275,15 +5305,13 @@ send_now:
if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
- if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
- NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
- tp->compressed_ack - TCP_FASTRETRANS_THRESH);
- tp->compressed_ack = 0;
+ tp->dup_ack_counter = 0;
}
-
- if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
+ if (tp->dup_ack_counter < TCP_FASTRETRANS_THRESH) {
+ tp->dup_ack_counter++;
goto send_now;
-
+ }
+ tp->compressed_ack++;
if (hrtimer_is_queued(&tp->compressed_ack_timer))
return;
@@ -5296,8 +5324,9 @@ send_now:
delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
rtt * (NSEC_PER_USEC >> 3)/20);
sock_hold(sk);
- hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay),
- HRTIMER_MODE_REL_PINNED_SOFT);
+ hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
+ sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
+ HRTIMER_MODE_REL_PINNED_SOFT);
}
static inline void tcp_ack_snd_check(struct sock *sk)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 83a5d24e13b8..6c05f1ceb538 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2780,6 +2780,7 @@ static int __net_init tcp_sk_init(struct net *net)
sizeof(init_net.ipv4.sysctl_tcp_wmem));
}
net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
+ net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC;
net->ipv4.sysctl_tcp_comp_sack_nr = 44;
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ba4482130f08..c414aeb1efa9 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -184,10 +184,10 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
{
struct tcp_sock *tp = tcp_sk(sk);
- if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
+ if (unlikely(tp->compressed_ack)) {
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
- tp->compressed_ack - TCP_FASTRETRANS_THRESH);
- tp->compressed_ack = TCP_FASTRETRANS_THRESH;
+ tp->compressed_ack);
+ tp->compressed_ack = 0;
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
__sock_put(sk);
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c3f26dcd6704..ada046f425d2 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -753,8 +753,14 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
- if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
+ if (tp->compressed_ack) {
+ /* Since we have to send one ack finally,
+ * substract one from tp->compressed_ack to keep
+ * LINUX_MIB_TCPACKCOMPRESSED accurate.
+ */
+ tp->compressed_ack--;
tcp_send_ack(sk);
+ }
} else {
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,
&sk->sk_tsq_flags))