summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorFan Du <fan.du@intel.com>2015-03-06 06:18:23 +0300
committerDavid S. Miller <davem@davemloft.net>2015-03-06 22:57:41 +0300
commit6b58e0a5f32dedb609438bb9c9c82aa6e23381f2 (patch)
treedcfd707a04fd8295b1384cc18fc60addf22ae23a /net
parentdcd8fb8533ceb493146ce030d15f7965b82d0c27 (diff)
downloadlinux-6b58e0a5f32dedb609438bb9c9c82aa6e23381f2.tar.xz
ipv4: Use binary search to choose tcp PMTU probe_size
Current probe_size is chosen by doubling mss_cache, the probing process will end shortly with a sub-optimal mss size, and the link mtu will not be taken full advantage of, in return, this will make user to tweak tcp_base_mss with care. Use binary search to choose probe_size in a fine granularity manner, an optimal mss will be found to boost performance as its maxmium. In addition, introduce a sysctl_tcp_probe_threshold to control when probing will stop in respect to the width of search range. Test env: Docker instance with vxlan encapuslation(82599EB) iperf -c 10.0.0.24 -t 60 before this patch: 1.26 Gbits/sec After this patch: increase 26% 1.59 Gbits/sec Signed-off-by: Fan Du <fan.du@intel.com> Acked-by: John Heffner <johnwheffner@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/sysctl_net_ipv4.c7
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/tcp_output.c14
3 files changed, 19 insertions, 3 deletions
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d151539da8e6..d3c09c12ee81 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -883,6 +883,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "tcp_probe_threshold",
+ .data = &init_net.ipv4.sysctl_tcp_probe_threshold,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
{ }
};
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5a2dfed4783b..35790d977a2b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2460,6 +2460,7 @@ static int __net_init tcp_sk_init(struct net *net)
}
net->ipv4.sysctl_tcp_ecn = 2;
net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
+ net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
return 0;
fail:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8bbd86cd81c8..ed024cbb097f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1842,11 +1842,13 @@ static int tcp_mtu_probe(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
struct sk_buff *skb, *nskb, *next;
+ struct net *net = sock_net(sk);
int len;
int probe_size;
int size_needed;
int copy;
int mss_now;
+ int interval;
/* Not currently probing/verifying,
* not in recovery,
@@ -1859,11 +1861,17 @@ static int tcp_mtu_probe(struct sock *sk)
tp->rx_opt.num_sacks || tp->rx_opt.dsack)
return -1;
- /* Very simple search strategy: just double the MSS. */
+ /* Use binary search for probe_size between tcp_mss_base,
+ * and current mss_clamp. if (search_high - search_low)
+ * smaller than a threshold, backoff from probing.
+ */
mss_now = tcp_current_mss(sk);
- probe_size = 2 * tp->mss_cache;
+ probe_size = tcp_mtu_to_mss(sk, (icsk->icsk_mtup.search_high +
+ icsk->icsk_mtup.search_low) >> 1);
size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
- if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
+ interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low;
+ if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
+ interval < max(1, net->ipv4.sysctl_tcp_probe_threshold)) {
/* TODO: set timer for probe_converge_event */
return -1;
}