diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 110 |
1 files changed, 77 insertions, 33 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a2a796c5536b..2e69b8d16e68 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -601,15 +601,14 @@ static unsigned int tcp_synack_options(struct sock *sk, struct request_sock *req, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, - struct tcp_md5sig_key **md5, + const struct tcp_md5sig_key *md5, struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; #ifdef CONFIG_TCP_MD5SIG - *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); - if (*md5) { + if (md5) { opts->options |= OPTION_MD5; remaining -= TCPOLEN_MD5SIG_ALIGNED; @@ -620,8 +619,6 @@ static unsigned int tcp_synack_options(struct sock *sk, */ ireq->tstamp_ok &= !ireq->sack_ok; } -#else - *md5 = NULL; #endif /* We always send an MSS option. */ @@ -989,7 +986,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (md5) { sk_nocaps_add(sk, NETIF_F_GSO_MASK); tp->af_specific->calc_md5_hash(opts.hash_location, - md5, sk, NULL, skb); + md5, sk, skb); } #endif @@ -1354,6 +1351,8 @@ void tcp_mtup_init(struct sock *sk) icsk->icsk_af_ops->net_header_len; icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); icsk->icsk_mtup.probe_size = 0; + if (icsk->icsk_mtup.enabled) + icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; } EXPORT_SYMBOL(tcp_mtup_init); @@ -1752,20 +1751,23 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, bool *is_cwnd_limited, u32 max_segs) { - struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); - u32 send_win, cong_win, limit, in_flight; + u32 age, send_win, cong_win, limit, in_flight; + struct tcp_sock *tp = tcp_sk(sk); + struct skb_mstamp now; + struct sk_buff *head; int win_divisor; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto send_now; - if (icsk->icsk_ca_state != TCP_CA_Open) + if (!((1 << icsk->icsk_ca_state) & (TCPF_CA_Open | TCPF_CA_CWR))) goto send_now; - /* Defer for less than two clock ticks. */ - if (tp->tso_deferred && - (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1) + /* Avoid bursty behavior by allowing defer + * only if the last write was recent. + */ + if ((s32)(tcp_time_stamp - tp->lsndtime) > 0) goto send_now; in_flight = tcp_packets_in_flight(tp); @@ -1807,11 +1809,14 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, goto send_now; } - /* Ok, it looks like it is advisable to defer. - * Do not rearm the timer if already set to not break TCP ACK clocking. - */ - if (!tp->tso_deferred) - tp->tso_deferred = 1 | (jiffies << 1); + head = tcp_write_queue_head(sk); + skb_mstamp_get(&now); + age = skb_mstamp_us_delta(&now, &head->skb_mstamp); + /* If next ACK is likely to come too late (half srtt), do not defer */ + if (age < (tp->srtt_us >> 4)) + goto send_now; + + /* Ok, it looks like it is advisable to defer. */ if (cong_win < send_win && cong_win < skb->len) *is_cwnd_limited = true; @@ -1819,10 +1824,34 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, return true; send_now: - tp->tso_deferred = 0; return false; } +static inline void tcp_mtu_check_reprobe(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); + u32 interval; + s32 delta; + + interval = net->ipv4.sysctl_tcp_probe_interval; + delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp; + if (unlikely(delta >= interval * HZ)) { + int mss = tcp_current_mss(sk); + + /* Update current search range */ + icsk->icsk_mtup.probe_size = 0; + icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + + sizeof(struct tcphdr) + + icsk->icsk_af_ops->net_header_len; + icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); + + /* Update probe time stamp */ + icsk->icsk_mtup.probe_timestamp = tcp_time_stamp; + } +} + /* Create a new MTU probe if we are ready. * MTU probe is regularly attempting to increase the path MTU by * deliberately sending larger packets. This discovers routing @@ -1837,11 +1866,13 @@ static int tcp_mtu_probe(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); struct sk_buff *skb, *nskb, *next; + struct net *net = sock_net(sk); int len; int probe_size; int size_needed; int copy; int mss_now; + int interval; /* Not currently probing/verifying, * not in recovery, @@ -1854,12 +1885,25 @@ static int tcp_mtu_probe(struct sock *sk) tp->rx_opt.num_sacks || tp->rx_opt.dsack) return -1; - /* Very simple search strategy: just double the MSS. */ + /* Use binary search for probe_size between tcp_mss_base, + * and current mss_clamp. if (search_high - search_low) + * smaller than a threshold, backoff from probing. + */ mss_now = tcp_current_mss(sk); - probe_size = 2 * tp->mss_cache; + probe_size = tcp_mtu_to_mss(sk, (icsk->icsk_mtup.search_high + + icsk->icsk_mtup.search_low) >> 1); size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache; - if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) { - /* TODO: set timer for probe_converge_event */ + interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low; + /* When misfortune happens, we are reprobing actively, + * and then reprobe timer has expired. We stick with current + * probing process by not resetting search range to its orignal. + */ + if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) || + interval < net->ipv4.sysctl_tcp_probe_threshold) { + /* Check whether enough time has elaplased for + * another round of probing. + */ + tcp_mtu_check_reprobe(sk); return -1; } @@ -2773,15 +2817,11 @@ void tcp_send_fin(struct sock *sk) } else { /* Socket is locked, keep trying until memory is available. */ for (;;) { - skb = alloc_skb_fclone(MAX_TCP_HEADER, - sk->sk_allocation); + skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); if (skb) break; yield(); } - - /* Reserve space for headers and prepare control bits. */ - skb_reserve(skb, MAX_TCP_HEADER); /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ tcp_init_nondata_skb(skb, tp->write_seq, TCPHDR_ACK | TCPHDR_FIN); @@ -2870,7 +2910,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct tcp_sock *tp = tcp_sk(sk); struct tcphdr *th; struct sk_buff *skb; - struct tcp_md5sig_key *md5; + struct tcp_md5sig_key *md5 = NULL; int tcp_header_size; int mss; @@ -2883,7 +2923,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, skb_reserve(skb, MAX_TCP_HEADER); skb_dst_set(skb, dst); - security_skb_owned_by(skb, sk); mss = dst_metric_advmss(dst); if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) @@ -2896,7 +2935,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, else #endif skb_mstamp_get(&skb->skb_mstamp); - tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, + +#ifdef CONFIG_TCP_MD5SIG + rcu_read_lock(); + md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); +#endif + tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, foc) + sizeof(*th); skb_push(skb, tcp_header_size); @@ -2927,10 +2971,10 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, #ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ - if (md5) { + if (md5) tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location, - md5, NULL, req, skb); - } + md5, req_to_sk(req), skb); + rcu_read_unlock(); #endif return skb; |