diff options
author | Jakub Kicinski <kuba@kernel.org> | 2022-07-29 04:21:16 +0300 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2022-07-29 04:21:16 +0300 |
commit | 272ac32f566e3f925b20c231a2b30f6893aa258a (patch) | |
tree | c7c04a13d89349f0d007914904347df79438f3fa /net/ipv4 | |
parent | 623cd87006983935de6c2ad8e2d50e68f1b7d6e7 (diff) | |
parent | 33ea1340bafe1f394e5bf96fceef73e9771d066b (diff) | |
download | linux-272ac32f566e3f925b20c231a2b30f6893aa258a.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
No conflicts.
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/fib_trie.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 23 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 41 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_metrics.c | 10 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 27 |
6 files changed, 62 insertions, 50 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 46e8a5125853..452ff177e4da 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1042,6 +1042,7 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri) void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri) { + u8 fib_notify_on_flag_change; struct fib_alias *fa_match; struct sk_buff *skb; int err; @@ -1063,14 +1064,16 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri) WRITE_ONCE(fa_match->offload, fri->offload); WRITE_ONCE(fa_match->trap, fri->trap); + fib_notify_on_flag_change = READ_ONCE(net->ipv4.sysctl_fib_notify_on_flag_change); + /* 2 means send notifications only if offload_failed was changed. */ - if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 && + if (fib_notify_on_flag_change == 2 && READ_ONCE(fa_match->offload_failed) == fri->offload_failed) goto out; WRITE_ONCE(fa_match->offload_failed, fri->offload_failed); - if (!net->ipv4.sysctl_fib_notify_on_flag_change) + if (!fib_notify_on_flag_change) goto out; skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dc7cc3ce6a53..970e9a2cca4a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -454,8 +454,8 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_sync_mss = tcp_sync_mss; - WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]); - WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); + WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1])); + WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1])); sk_sockets_allocated_inc(sk); } @@ -688,7 +688,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, int size_goal) { return skb->len < size_goal && - sock_net(sk)->ipv4.sysctl_tcp_autocorking && + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) && !tcp_rtx_queue_empty(sk) && refcount_read(&sk->sk_wmem_alloc) > skb->truesize && tcp_skb_can_collapse_to(skb); @@ -1842,7 +1842,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val) if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) cap = sk->sk_rcvbuf >> 1; else - cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1; + cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; val = min(val, cap); WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); @@ -4573,9 +4573,18 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, return SKB_DROP_REASON_TCP_MD5UNEXPECTED; } - /* check the signature */ - genhash = tp->af_specific->calc_md5_hash(newhash, hash_expected, - NULL, skb); + /* Check the signature. + * To support dual stack listeners, we need to handle + * IPv4-mapped case. + */ + if (family == AF_INET) + genhash = tcp_v4_md5_hash_skb(newhash, + hash_expected, + NULL, skb); + else + genhash = tp->af_specific->calc_md5_hash(newhash, + hash_expected, + NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ae73b34d32e9..ab5f0ea166f1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -426,7 +426,7 @@ static void tcp_sndbuf_expand(struct sock *sk) if (sk->sk_sndbuf < sndmem) WRITE_ONCE(sk->sk_sndbuf, - min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2])); + min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2]))); } /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) @@ -461,7 +461,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); /* Optimize this! */ int truesize = tcp_win_from_space(sk, skbtruesize) >> 1; - int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; + int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1; while (tp->rcv_ssthresh <= window) { if (truesize <= skb->len) @@ -534,7 +534,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb, */ static void tcp_init_buffer_space(struct sock *sk) { - int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win; + int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win); struct tcp_sock *tp = tcp_sk(sk); int maxwin; @@ -574,16 +574,17 @@ static void tcp_clamp_window(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); struct net *net = sock_net(sk); + int rmem2; icsk->icsk_ack.quick = 0; + rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]); - if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] && + if (sk->sk_rcvbuf < rmem2 && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && !tcp_under_memory_pressure(sk) && sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { WRITE_ONCE(sk->sk_rcvbuf, - min(atomic_read(&sk->sk_rmem_alloc), - net->ipv4.sysctl_tcp_rmem[2])); + min(atomic_read(&sk->sk_rmem_alloc), rmem2)); } if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss); @@ -724,7 +725,7 @@ void tcp_rcv_space_adjust(struct sock *sk) * <prev RTT . ><current RTT .. ><next RTT .... > */ - if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { int rcvmem, rcvbuf; u64 rcvwin, grow; @@ -745,7 +746,7 @@ void tcp_rcv_space_adjust(struct sock *sk) do_div(rcvwin, tp->advmss); rcvbuf = min_t(u64, rcvwin * rcvmem, - sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); if (rcvbuf > sk->sk_rcvbuf) { WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); @@ -909,9 +910,9 @@ static void tcp_update_pacing_rate(struct sock *sk) * end of slow start and should slow down. */ if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2) - rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio; + rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio); else - rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio; + rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio); rate *= max(tcp_snd_cwnd(tp), tp->packets_out); @@ -2174,7 +2175,7 @@ void tcp_enter_loss(struct sock *sk) * loss recovery is underway except recurring timeout(s) on * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing */ - tp->frto = net->ipv4.sysctl_tcp_frto && + tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) && (new_recovery || icsk->icsk_retransmits) && !inet_csk(sk)->icsk_mtup.probe_size; } @@ -3057,7 +3058,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag) { - u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ; + u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ; struct tcp_sock *tp = tcp_sk(sk); if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) { @@ -3580,7 +3581,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, if (*last_oow_ack_time) { s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); - if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) { + if (0 <= elapsed && + elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) { NET_INC_STATS(net, mib_idx); return true; /* rate-limited: don't send yet! */ } @@ -3628,7 +3630,7 @@ static void tcp_send_challenge_ack(struct sock *sk) /* Then check host-wide RFC 5961 rate limit. */ now = jiffies / HZ; if (now != challenge_timestamp) { - u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit; + u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit); u32 half = (ack_limit + 1) >> 1; challenge_timestamp = now; @@ -4425,7 +4427,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) { struct tcp_sock *tp = tcp_sk(sk); - if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) { int mib_idx; if (before(seq, tp->rcv_nxt)) @@ -4472,7 +4474,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); - if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; tcp_rcv_spurious_retrans(sk, skb); @@ -5516,7 +5518,7 @@ send_now: } if (!tcp_is_sack(tp) || - tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr) + tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)) goto send_now; if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) { @@ -5537,11 +5539,12 @@ send_now: if (tp->srtt_us && tp->srtt_us < rtt) rtt = tp->srtt_us; - delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns, + delay = min_t(unsigned long, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns), rtt * (NSEC_PER_USEC >> 3)/20); sock_hold(sk); hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay), - sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns), HRTIMER_MODE_REL_PINNED_SOFT); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c7e7101647dc..0c83780dc9bf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1008,7 +1008,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); - tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? + tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | (inet_sk(sk)->tos & INET_ECN_MASK) : inet_sk(sk)->tos; @@ -1528,7 +1528,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, /* Set ToS of the new socket based upon the value of incoming SYN. * ECT bits are set later in tcp_init_transfer(). */ - if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; if (!dst) { diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index a501150deaa3..d58e672be31c 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk) int m; sk_dst_confirm(sk); - if (net->ipv4.sysctl_tcp_nometrics_save || !dst) + if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst) return; rcu_read_lock(); @@ -385,7 +385,7 @@ void tcp_update_metrics(struct sock *sk) if (tcp_in_initial_slowstart(tp)) { /* Slow start still did not finish. */ - if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && + if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); if (val && (tcp_snd_cwnd(tp) >> 1) > val) @@ -401,7 +401,7 @@ void tcp_update_metrics(struct sock *sk) } else if (!tcp_in_slow_start(tp) && icsk->icsk_ca_state == TCP_CA_Open) { /* Cong. avoidance phase, cwnd is reliable. */ - if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && + if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) tcp_metric_set(tm, TCP_METRIC_SSTHRESH, max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh)); @@ -418,7 +418,7 @@ void tcp_update_metrics(struct sock *sk) tcp_metric_set(tm, TCP_METRIC_CWND, (val + tp->snd_ssthresh) >> 1); } - if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && + if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); if (val && tp->snd_ssthresh > val) @@ -463,7 +463,7 @@ void tcp_init_metrics(struct sock *sk) if (tcp_metric_locked(tm, TCP_METRIC_CWND)) tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND); - val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ? + val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ? 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH); if (val) { tp->snd_ssthresh = val; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2b72ccd2e651..78b654ff421b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -167,16 +167,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp, if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); - /* If this is the first data packet sent in response to the - * previous received data, - * and it is a reply for ato after last received packet, - * increase pingpong count. - */ - if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) && - (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) - inet_csk_inc_pingpong_cnt(sk); - tp->lsndtime = now; + + /* If it is a reply for ato after last received + * packet, enter pingpong mode. + */ + if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) + inet_csk_enter_pingpong_mode(sk); } /* Account for an ACK we sent. */ @@ -230,7 +227,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, * which we interpret as a sign the remote TCP is not * misinterpreting the window field as a signed quantity. */ - if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)) (*rcv_wnd) = min(space, MAX_TCP_WINDOW); else (*rcv_wnd) = min_t(u32, space, U16_MAX); @@ -241,7 +238,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, *rcv_wscale = 0; if (wscale_ok) { /* Set window scaling on max possible window */ - space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); + space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); space = max_t(u32, space, sysctl_rmem_max); space = min_t(u32, space, *window_clamp); *rcv_wscale = clamp_t(int, ilog2(space) - 15, @@ -285,7 +282,7 @@ static u16 tcp_select_window(struct sock *sk) * scaled window. */ if (!tp->rx_opt.rcv_wscale && - sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows) + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)) new_win = min(new_win, MAX_TCP_WINDOW); else new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); @@ -1976,7 +1973,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift); - r = tcp_min_rtt(tcp_sk(sk)) >> sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log; + r = tcp_min_rtt(tcp_sk(sk)) >> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log); if (r < BITS_PER_TYPE(sk->sk_gso_max_size)) bytes += sk->sk_gso_max_size >> r; @@ -1995,7 +1992,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) min_tso = ca_ops->min_tso_segs ? ca_ops->min_tso_segs(sk) : - sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs; + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); return min_t(u32, tso_segs, sk->sk_gso_max_segs); @@ -2507,7 +2504,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift)); if (sk->sk_pacing_status == SK_PACING_NONE) limit = min_t(unsigned long, limit, - sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes)); limit <<= factor; if (static_branch_unlikely(&tcp_tx_delay_enabled) && |