From cee1af825d65b8122627fc2efbc36c1bd51ee103 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Mar 2023 20:57:41 +0000 Subject: tcp: annotate lockless accesses to sk->sk_err_soft This field can be read/written without lock synchronization. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ea370afa70ed..4f6894469b62 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -361,7 +361,7 @@ void tcp_v4_mtu_reduced(struct sock *sk) * for the case, if this connection will not able to recover. */ if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) - sk->sk_err_soft = EMSGSIZE; + WRITE_ONCE(sk->sk_err_soft, EMSGSIZE); mtu = dst_mtu(dst); @@ -602,7 +602,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) tcp_done(sk); } else { - sk->sk_err_soft = err; + WRITE_ONCE(sk->sk_err_soft, err); } goto out; } @@ -628,7 +628,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) sk->sk_err = err; sk_error_report(sk); } else { /* Only an error on timeout */ - sk->sk_err_soft = err; + WRITE_ONCE(sk->sk_err_soft, err); } out: -- cgit v1.2.3 From e13ec3da05d130f0d10da8e1fbe1be26dcdb0e27 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Mar 2023 20:57:44 +0000 Subject: tcp: annotate lockless access to sk->sk_err tcp_poll() reads sk->sk_err without socket lock held/owned. We should used READ_ONCE() here, and update writers to use WRITE_ONCE(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 11 ++++++----- net/ipv4/tcp_input.c | 6 +++--- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv4/tcp_output.c | 2 +- net/ipv4/tcp_timer.c | 2 +- net/ipv6/tcp_ipv6.c | 4 ++-- 6 files changed, 15 insertions(+), 14 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 288693981b00..01569de651b6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -589,7 +589,8 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) } /* This barrier is coupled with smp_wmb() in tcp_reset() */ smp_rmb(); - if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) + if (READ_ONCE(sk->sk_err) || + !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR; return mask; @@ -3094,7 +3095,7 @@ int tcp_disconnect(struct sock *sk, int flags) if (old_state == TCP_LISTEN) { inet_csk_listen_stop(sk); } else if (unlikely(tp->repair)) { - sk->sk_err = ECONNABORTED; + WRITE_ONCE(sk->sk_err, ECONNABORTED); } else if (tcp_need_reset(old_state) || (tp->snd_nxt != tp->write_seq && (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { @@ -3102,9 +3103,9 @@ int tcp_disconnect(struct sock *sk, int flags) * states */ tcp_send_active_reset(sk, gfp_any()); - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); } else if (old_state == TCP_SYN_SENT) - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); @@ -4692,7 +4693,7 @@ int tcp_abort(struct sock *sk, int err) bh_lock_sock(sk); if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); /* This barrier is coupled with smp_rmb() in tcp_poll() */ smp_wmb(); sk_error_report(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8b5b6ca6617d..754ddbe0577f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4322,15 +4322,15 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb) /* We want the right error as BSD sees it (and indeed as we do). */ switch (sk->sk_state) { case TCP_SYN_SENT: - sk->sk_err = ECONNREFUSED; + WRITE_ONCE(sk->sk_err, ECONNREFUSED); break; case TCP_CLOSE_WAIT: - sk->sk_err = EPIPE; + WRITE_ONCE(sk->sk_err, EPIPE); break; case TCP_CLOSE: return; default: - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); } /* This barrier is coupled with smp_rmb() in tcp_poll() */ smp_wmb(); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4f6894469b62..89daa6b953ff 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -596,7 +596,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th); if (!sock_owned_by_user(sk)) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); @@ -625,7 +625,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) inet = inet_sk(sk); if (!sock_owned_by_user(sk) && inet->recverr) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); } else { /* Only an error on timeout */ WRITE_ONCE(sk->sk_err_soft, err); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 71d01cf3c13e..f7e00d90a730 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3699,7 +3699,7 @@ static void tcp_connect_init(struct sock *sk) tp->rx_opt.rcv_wscale = rcv_wscale; tp->rcv_ssthresh = tp->rcv_wnd; - sk->sk_err = 0; + WRITE_ONCE(sk->sk_err, 0); sock_reset_flag(sk, SOCK_DONE); tp->snd_wnd = 0; tcp_init_wl(tp, 0); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 8823e2182713..b839c2f91292 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -67,7 +67,7 @@ u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) static void tcp_write_err(struct sock *sk) { - sk->sk_err = READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT; + WRITE_ONCE(sk->sk_err, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT); sk_error_report(sk); tcp_write_queue_purge(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index dc963eebc668..35cf523c9efd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -493,7 +493,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); if (!sock_owned_by_user(sk)) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ tcp_done(sk); @@ -513,7 +513,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (!sock_owned_by_user(sk) && np->recverr) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); } else { WRITE_ONCE(sk->sk_err_soft, err); -- cgit v1.2.3 From 580031ff9952b7dbf48dedba6b56a100ae002bef Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 27 Mar 2023 17:42:32 -0700 Subject: bpf: tcp: Use sock_gen_put instead of sock_put in bpf_iter_tcp While reviewing the udp-iter batching patches, noticed the bpf_iter_tcp calling sock_put() is incorrect. It should call sock_gen_put instead because bpf_iter_tcp is iterating the ehash table which has the req sk and tw sk. This patch replaces all sock_put with sock_gen_put in the bpf_iter_tcp codepath. Fixes: 04c7820b776f ("bpf: tcp: Bpf iter batching and lock_sock") Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230328004232.2134233-1-martin.lau@linux.dev --- net/ipv4/tcp_ipv4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ea370afa70ed..b9d55277cb85 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2780,7 +2780,7 @@ static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, static void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter) { while (iter->cur_sk < iter->end_sk) - sock_put(iter->batch[iter->cur_sk++]); + sock_gen_put(iter->batch[iter->cur_sk++]); } static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter, @@ -2941,7 +2941,7 @@ static void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) * st->bucket. See tcp_seek_last_pos(). */ st->offset++; - sock_put(iter->batch[iter->cur_sk++]); + sock_gen_put(iter->batch[iter->cur_sk++]); } if (iter->cur_sk < iter->end_sk) -- cgit v1.2.3 From 1e306ec49a1f206fd2cc89a42fac6e6f592a8cc1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 May 2023 11:47:49 +0000 Subject: tcp: fix possible sk_priority leak in tcp_v4_send_reset() When tcp_v4_send_reset() is called with @sk == NULL, we do not change ctl_sk->sk_priority, which could have been set from a prior invocation. Change tcp_v4_send_reset() to set sk_priority and sk_mark fields before calling ip_send_unicast_reply(). This means tcp_v4_send_reset() and tcp_v4_send_ack() no longer have to clear ctl_sk->sk_mark after their call to ip_send_unicast_reply(). Fixes: f6c0f5d209fa ("tcp: honor SO_PRIORITY in TIME_WAIT state") Signed-off-by: Eric Dumazet Cc: Antoine Tenart Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 39bda2b1066e..06d2573685ca 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -829,6 +829,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) inet_twsk(sk)->tw_priority : sk->sk_priority; transmit_time = tcp_transmit_time(sk); xfrm_sk_clone_policy(ctl_sk, sk); + } else { + ctl_sk->sk_mark = 0; + ctl_sk->sk_priority = 0; } ip_send_unicast_reply(ctl_sk, skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -836,7 +839,6 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) &arg, arg.iov[0].iov_len, transmit_time); - ctl_sk->sk_mark = 0; xfrm_sk_free_policy(ctl_sk); sock_net_set(ctl_sk, &init_net); __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); @@ -935,7 +937,6 @@ static void tcp_v4_send_ack(const struct sock *sk, &arg, arg.iov[0].iov_len, transmit_time); - ctl_sk->sk_mark = 0; sock_net_set(ctl_sk, &init_net); __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); local_bh_enable(); -- cgit v1.2.3