From 34c899af6c1a9d65aa85c765b2eecb1b9a88e8b8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:39 +0900 Subject: af_unix: Set error only when needed in unix_stream_connect(). We will introduce skb drop reason for AF_UNIX, then we need to set an errno and a drop reason for each path. Let's set an error only when it's needed in unix_stream_connect(). Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 6b1762300443..23f419f561b8 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1575,12 +1575,12 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, goto out; } - err = -ENOMEM; - /* Allocate skb for sending to listening sock */ skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); - if (skb == NULL) + if (!skb) { + err = -ENOMEM; goto out; + } restart: /* Find listening sock. */ @@ -1600,16 +1600,17 @@ restart: goto restart; } - err = -ECONNREFUSED; - if (other->sk_state != TCP_LISTEN) - goto out_unlock; - if (other->sk_shutdown & RCV_SHUTDOWN) + if (other->sk_state != TCP_LISTEN || + other->sk_shutdown & RCV_SHUTDOWN) { + err = -ECONNREFUSED; goto out_unlock; + } if (unix_recvq_full_lockless(other)) { - err = -EAGAIN; - if (!timeo) + if (!timeo) { + err = -EAGAIN; goto out_unlock; + } timeo = unix_wait_for_peer(other, timeo); -- cgit v1.2.3 From e26ee0a736bd949ce6fa51829fd0a2f6381391de Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:40 +0900 Subject: af_unix: Clean up error paths in unix_stream_connect(). The label order is weird in unix_stream_connect(), and all NULL checks are unnecessary if reordered. Let's clean up the error paths to make it easy to set a drop reason for each path. While at it, a comment with the old style is updated. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 23f419f561b8..21e17e739f88 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1563,15 +1563,14 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); /* First of all allocate resources. - If we will make it after state is locked, - we will have to recheck all again in any case. + * If we will make it after state is locked, + * we will have to recheck all again in any case. */ /* create new sock for complete connection */ newsk = unix_create1(net, NULL, 0, sock->type); if (IS_ERR(newsk)) { err = PTR_ERR(newsk); - newsk = NULL; goto out; } @@ -1579,7 +1578,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); if (!skb) { err = -ENOMEM; - goto out; + goto out_free_sk; } restart: @@ -1587,8 +1586,7 @@ restart: other = unix_find_other(net, sunaddr, addr_len, sk->sk_type); if (IS_ERR(other)) { err = PTR_ERR(other); - other = NULL; - goto out; + goto out_free_skb; } unix_state_lock(other); @@ -1613,11 +1611,12 @@ restart: } timeo = unix_wait_for_peer(other, timeo); + sock_put(other); err = sock_intr_errno(timeo); if (signal_pending(current)) - goto out; - sock_put(other); + goto out_free_skb; + goto restart; } @@ -1702,15 +1701,13 @@ restart: return 0; out_unlock: - if (other) - unix_state_unlock(other); - -out: + unix_state_unlock(other); + sock_put(other); +out_free_skb: kfree_skb(skb); - if (newsk) - unix_release_sock(newsk, 0); - if (other) - sock_put(other); +out_free_sk: + unix_release_sock(newsk, 0); +out: return err; } -- cgit v1.2.3 From 6c444255b193b5b9c5a18c3784d960e10e1833a2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:41 +0900 Subject: af_unix: Set error only when needed in unix_stream_sendmsg(). We will introduce skb drop reason for AF_UNIX, then we need to set an errno and a drop reason for each path. Let's set an error only when it's needed in unix_stream_sendmsg(). Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 21e17e739f88..660d8b8130ca 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2254,8 +2254,8 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, wait_for_unix_gc(scm.fp); - err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) { + err = -EOPNOTSUPP; #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (len) len--; @@ -2268,10 +2268,11 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; goto out_err; } else { - err = -ENOTCONN; other = unix_peer(sk); - if (!other) + if (!other) { + err = -ENOTCONN; goto out_err; + } } if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) -- cgit v1.2.3 From d460b04bc452cf15810b79c15381fffd9d201915 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:42 +0900 Subject: af_unix: Clean up error paths in unix_stream_sendmsg(). If we move send_sig() to the SEND_SHUTDOWN check before the while loop, then we can reuse the same kfree_skb() after the pipe_err_free label. Let's gather the scattered kfree_skb()s in error paths. While at it, some style issues are fixed, and the pipe_err_free label is renamed to out_pipe to match other label names. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 660d8b8130ca..d30bcd50527e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2275,8 +2275,13 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, } } - if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) - goto pipe_err; + if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) { + if (!(msg->msg_flags & MSG_NOSIGNAL)) + send_sig(SIGPIPE, current, 0); + + err = -EPIPE; + goto out_err; + } while (sent < len) { size = len - sent; @@ -2305,20 +2310,18 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, /* Only send the fds in the first buffer */ err = unix_scm_to_skb(&scm, skb, !fds_sent); - if (err < 0) { - kfree_skb(skb); - goto out_err; - } + if (err < 0) + goto out_free; + fds_sent = true; if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) { skb->ip_summed = CHECKSUM_UNNECESSARY; err = skb_splice_from_iter(skb, &msg->msg_iter, size, sk->sk_allocation); - if (err < 0) { - kfree_skb(skb); - goto out_err; - } + if (err < 0) + goto out_free; + size = err; refcount_add(size, &sk->sk_wmem_alloc); } else { @@ -2326,17 +2329,15 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, skb->data_len = data_len; skb->len = size; err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); - if (err) { - kfree_skb(skb); - goto out_err; - } + if (err) + goto out_free; } unix_state_lock(other); if (sock_flag(other, SOCK_DEAD) || (other->sk_shutdown & RCV_SHUTDOWN)) - goto pipe_err_free; + goto out_pipe; maybe_add_creds(skb, sock, other); scm_stat_add(other, skb); @@ -2359,13 +2360,13 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, return sent; -pipe_err_free: +out_pipe: unix_state_unlock(other); - kfree_skb(skb); -pipe_err: - if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) + if (!sent && !(msg->msg_flags & MSG_NOSIGNAL)) send_sig(SIGPIPE, current, 0); err = -EPIPE; +out_free: + kfree_skb(skb); out_err: scm_destroy(&scm); return sent ? : err; -- cgit v1.2.3 From 001a25088c35ab69bd4b2f208e47eb8acbce6353 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:43 +0900 Subject: af_unix: Set error only when needed in unix_dgram_sendmsg(). We will introduce skb drop reason for AF_UNIX, then we need to set an errno and a drop reason for each path. Let's set an error only when it's needed in unix_dgram_sendmsg(). Then, we need not (re)set 0 to err. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d30bcd50527e..07d6fba99a7c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1978,9 +1978,10 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, wait_for_unix_gc(scm.fp); - err = -EOPNOTSUPP; - if (msg->msg_flags&MSG_OOB) + if (msg->msg_flags & MSG_OOB) { + err = -EOPNOTSUPP; goto out; + } if (msg->msg_namelen) { err = unix_validate_addr(sunaddr, msg->msg_namelen); @@ -1995,10 +1996,11 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out; } else { sunaddr = NULL; - err = -ENOTCONN; other = unix_peer_get(sk); - if (!other) + if (!other) { + err = -ENOTCONN; goto out; + } } if ((test_bit(SOCK_PASSCRED, &sock->flags) || @@ -2009,9 +2011,10 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out; } - err = -EMSGSIZE; - if (len > READ_ONCE(sk->sk_sndbuf) - 32) + if (len > READ_ONCE(sk->sk_sndbuf) - 32) { + err = -EMSGSIZE; goto out; + } if (len > SKB_MAX_ALLOC) { data_len = min_t(size_t, @@ -2043,9 +2046,10 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, restart: if (!other) { - err = -ECONNRESET; - if (sunaddr == NULL) + if (!sunaddr) { + err = -ECONNRESET; goto out_free; + } other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, sk->sk_type); @@ -2065,9 +2069,11 @@ restart: sk_locked = 0; unix_state_lock(other); restart_locked: - err = -EPERM; - if (!unix_may_send(sk, other)) + + if (!unix_may_send(sk, other)) { + err = -EPERM; goto out_unlock; + } if (unlikely(sock_flag(other, SOCK_DEAD))) { /* @@ -2080,7 +2086,6 @@ restart_locked: if (!sk_locked) unix_state_lock(sk); - err = 0; if (sk->sk_type == SOCK_SEQPACKET) { /* We are here only when racing with unix_release_sock() * is clearing @other. Never change state to TCP_CLOSE @@ -2108,9 +2113,10 @@ restart_locked: goto restart; } - err = -EPIPE; - if (other->sk_shutdown & RCV_SHUTDOWN) + if (other->sk_shutdown & RCV_SHUTDOWN) { + err = -EPIPE; goto out_unlock; + } if (sk->sk_type != SOCK_SEQPACKET) { err = security_unix_may_send(sk->sk_socket, other->sk_socket); -- cgit v1.2.3 From f4dd63165b08ba3b72117973d5daea456f36377d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:44 +0900 Subject: af_unix: Move !sunaddr case in unix_dgram_sendmsg(). When other is NULL in unix_dgram_sendmsg(), we check if sunaddr is NULL before looking up a receiver socket. There are three paths going through the check, but it's always false for 2 out of the 3 paths: the first socket lookup and the second 'goto restart'. The condition can be true for the first 'goto restart' only when SOCK_DEAD is flagged for the socket found with msg->msg_name. Let's move the check to the single appropriate path. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 07d6fba99a7c..111f95384990 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2046,11 +2046,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, restart: if (!other) { - if (!sunaddr) { - err = -ECONNRESET; - goto out_free; - } - other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, sk->sk_type); if (IS_ERR(other)) { @@ -2105,6 +2100,9 @@ restart_locked: err = -ECONNREFUSED; } else { unix_state_unlock(sk); + + if (!sunaddr) + err = -ECONNRESET; } other = NULL; -- cgit v1.2.3 From 3c05329a2abe312ed85a60a325b930063f61e817 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:45 +0900 Subject: af_unix: Use msg->{msg_name,msg_namelen} in unix_dgram_sendmsg(). In unix_dgram_sendmsg(), we use a local variable sunaddr pointing NULL or msg->msg_name based on msg->msg_namelen. Let's remove sunaddr and simplify the usage. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 111f95384990..ae74fdcf5dcd 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1962,7 +1962,6 @@ static void scm_stat_del(struct sock *sk, struct sk_buff *skb) static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { - DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); struct sock *sk = sock->sk, *other = NULL; struct unix_sock *u = unix_sk(sk); struct scm_cookie scm; @@ -1984,7 +1983,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, } if (msg->msg_namelen) { - err = unix_validate_addr(sunaddr, msg->msg_namelen); + err = unix_validate_addr(msg->msg_name, msg->msg_namelen); if (err) goto out; @@ -1995,7 +1994,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, if (err) goto out; } else { - sunaddr = NULL; other = unix_peer_get(sk); if (!other) { err = -ENOTCONN; @@ -2046,8 +2044,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, restart: if (!other) { - other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, - sk->sk_type); + other = unix_find_other(sock_net(sk), msg->msg_name, + msg->msg_namelen, sk->sk_type); if (IS_ERR(other)) { err = PTR_ERR(other); other = NULL; @@ -2101,7 +2099,7 @@ restart_locked: } else { unix_state_unlock(sk); - if (!sunaddr) + if (!msg->msg_namelen) err = -ECONNRESET; } -- cgit v1.2.3 From a700b43358ccc3c5ae857eeea37ff50ce0529b1c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:46 +0900 Subject: af_unix: Split restart label in unix_dgram_sendmsg(). There are two paths jumping to the restart label in unix_dgram_sendmsg(). One requires another lookup and sk_filter(), but the other doesn't. Let's split the label to make each flow more straightforward. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ae74fdcf5dcd..513d0fd12e6a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2042,8 +2042,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); -restart: if (!other) { +lookup: other = unix_find_other(sock_net(sk), msg->msg_name, msg->msg_namelen, sk->sk_type); if (IS_ERR(other)) { @@ -2059,6 +2059,7 @@ restart: goto out_free; } +restart: sk_locked = 0; unix_state_lock(other); restart_locked: @@ -2106,7 +2107,8 @@ restart_locked: other = NULL; if (err) goto out_free; - goto restart; + + goto lookup; } if (other->sk_shutdown & RCV_SHUTDOWN) { -- cgit v1.2.3 From 689c398885cc27d2a5bb2ad5d70324107d4a78ec Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:47 +0900 Subject: af_unix: Defer sock_put() to clean up path in unix_dgram_sendmsg(). When other has SOCK_DEAD in unix_dgram_sendmsg(), we call sock_put() for it first and then set NULL to other before jumping to the error path. This is to skip sock_put() in the error path. Let's not set NULL to other and defer the sock_put() to the error path to clean up the labels later. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 513d0fd12e6a..b8adfb41d11b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2075,7 +2075,6 @@ restart_locked: * datagram error */ unix_state_unlock(other); - sock_put(other); if (!sk_locked) unix_state_lock(sk); @@ -2104,7 +2103,6 @@ restart_locked: err = -ECONNRESET; } - other = NULL; if (err) goto out_free; -- cgit v1.2.3 From 106d979b85e575b0ab10224fcde5c3eb94566e05 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:48 +0900 Subject: af_unix: Clean up SOCK_DEAD error paths in unix_dgram_sendmsg(). When other has SOCK_DEAD in unix_dgram_sendmsg(), we hold unix_state_lock() for the sender socket first. However, we do not need it for sk->sk_type. Let's move the lock down a bit. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index b8adfb41d11b..22c689b0044f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2070,23 +2070,23 @@ restart_locked: } if (unlikely(sock_flag(other, SOCK_DEAD))) { - /* - * Check with 1003.1g - what should - * datagram error - */ - unix_state_unlock(other); + /* Check with 1003.1g - what should datagram error */ - if (!sk_locked) - unix_state_lock(sk); + unix_state_unlock(other); if (sk->sk_type == SOCK_SEQPACKET) { /* We are here only when racing with unix_release_sock() * is clearing @other. Never change state to TCP_CLOSE * unlike SOCK_DGRAM wants. */ - unix_state_unlock(sk); err = -EPIPE; - } else if (unix_peer(sk) == other) { + goto out_free; + } + + if (!sk_locked) + unix_state_lock(sk); + + if (unix_peer(sk) == other) { unix_peer(sk) = NULL; unix_dgram_peer_wake_disconnect_wakeup(sk, other); @@ -2096,15 +2096,15 @@ restart_locked: unix_dgram_disconnected(sk, other); sock_put(other); err = -ECONNREFUSED; - } else { - unix_state_unlock(sk); - - if (!msg->msg_namelen) - err = -ECONNRESET; + goto out_free; } - if (err) + unix_state_unlock(sk); + + if (!msg->msg_namelen) { + err = -ECONNRESET; goto out_free; + } goto lookup; } -- cgit v1.2.3 From 62c6db251e667e8a240dc8209c00313240120fd6 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:49 +0900 Subject: af_unix: Clean up error paths in unix_dgram_sendmsg(). The error path is complicated in unix_dgram_sendmsg() because there are two timings when other could be non-NULL: when it's fetched from unix_peer_get() and when it's looked up by unix_find_other(). Let's move unix_peer_get() to the else branch for unix_find_other() and clean up the error paths. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 22c689b0044f..239ce2f77d55 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1993,12 +1993,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, NULL); if (err) goto out; - } else { - other = unix_peer_get(sk); - if (!other) { - err = -ENOTCONN; - goto out; - } } if ((test_bit(SOCK_PASSCRED, &sock->flags) || @@ -2026,7 +2020,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, skb = sock_alloc_send_pskb(sk, len - data_len, data_len, msg->msg_flags & MSG_DONTWAIT, &err, PAGE_ALLOC_COSTLY_ORDER); - if (skb == NULL) + if (!skb) goto out; err = unix_scm_to_skb(&scm, skb, true); @@ -2042,13 +2036,18 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); - if (!other) { + if (msg->msg_namelen) { lookup: other = unix_find_other(sock_net(sk), msg->msg_name, msg->msg_namelen, sk->sk_type); if (IS_ERR(other)) { err = PTR_ERR(other); - other = NULL; + goto out_free; + } + } else { + other = unix_peer_get(sk); + if (!other) { + err = -ENOTCONN; goto out_free; } } @@ -2056,7 +2055,7 @@ lookup: if (sk_filter(other, skb) < 0) { /* Toss the packet but do not return any error to the sender */ err = len; - goto out_free; + goto out_sock_put; } restart: @@ -2080,7 +2079,7 @@ restart_locked: * unlike SOCK_DGRAM wants. */ err = -EPIPE; - goto out_free; + goto out_sock_put; } if (!sk_locked) @@ -2096,14 +2095,14 @@ restart_locked: unix_dgram_disconnected(sk, other); sock_put(other); err = -ECONNREFUSED; - goto out_free; + goto out_sock_put; } unix_state_unlock(sk); if (!msg->msg_namelen) { err = -ECONNRESET; - goto out_free; + goto out_sock_put; } goto lookup; @@ -2132,7 +2131,7 @@ restart_locked: err = sock_intr_errno(timeo); if (signal_pending(current)) - goto out_free; + goto out_sock_put; goto restart; } @@ -2173,11 +2172,11 @@ out_unlock: if (sk_locked) unix_state_unlock(sk); unix_state_unlock(other); +out_sock_put: + sock_put(other); out_free: kfree_skb(skb); out: - if (other) - sock_put(other); scm_destroy(&scm); return err; } -- cgit v1.2.3 From bf61ffeb9cc48ee7d1945f26578291da5d9305e4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:50 +0900 Subject: af_unix: Remove unix_our_peer(). unix_our_peer() is used only in unix_may_send(). Let's inline it in unix_may_send(). Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 239ce2f77d55..8f2b605ce5b3 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -286,14 +286,9 @@ static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) } #endif /* CONFIG_SECURITY_NETWORK */ -static inline int unix_our_peer(struct sock *sk, struct sock *osk) -{ - return unix_peer(osk) == sk; -} - static inline int unix_may_send(struct sock *sk, struct sock *osk) { - return unix_peer(osk) == NULL || unix_our_peer(sk, osk); + return !unix_peer(osk) || unix_peer(osk) == sk; } static inline int unix_recvq_full_lockless(const struct sock *sk) -- cgit v1.2.3 From c32f0bd7d4838982c6724fca0da92353f27c6f88 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 16 Jan 2025 14:34:35 +0900 Subject: af_unix: Set drop reason in unix_release_sock(). unix_release_sock() is called when the last refcnt of struct file is released. Let's define a new drop reason SKB_DROP_REASON_SOCKET_CLOSE and set it for kfree_skb() in unix_release_sock(). # echo 1 > /sys/kernel/tracing/events/skb/kfree_skb/enable # python3 >>> from socket import * >>> s1, s2 = socketpair(AF_UNIX) >>> s1.send(b'hello world') >>> s2.close() # cat /sys/kernel/tracing/trace_pipe ... python3-280 ... kfree_skb: ... protocol=0 location=unix_release_sock+0x260/0x420 reason: SOCKET_CLOSE To be precise, unix_release_sock() is also called for a new child socket in unix_stream_connect() when something fails, but the new sk does not have skb in the recv queue then and no event is logged. Note that only tcp_inbound_ao_hash() uses a similar drop reason, SKB_DROP_REASON_TCP_CLOSE, and this can be generalised later. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250116053441.5758-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 3 +++ net/unix/af_unix.c | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index f3714cbea50d..b9e7ff853ce3 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -6,6 +6,7 @@ #define DEFINE_DROP_REASON(FN, FNe) \ FN(NOT_SPECIFIED) \ FN(NO_SOCKET) \ + FN(SOCKET_CLOSE) \ FN(SOCKET_FILTER) \ FN(SOCKET_RCVBUFF) \ FN(PKT_TOO_SMALL) \ @@ -138,6 +139,8 @@ enum skb_drop_reason { * 3) no valid child socket during 3WHS process */ SKB_DROP_REASON_NO_SOCKET, + /** @SKB_DROP_REASON_SOCKET_CLOSE: socket is close()d */ + SKB_DROP_REASON_SOCKET_CLOSE, /** @SKB_DROP_REASON_SOCKET_FILTER: dropped by socket filter */ SKB_DROP_REASON_SOCKET_FILTER, /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */ diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 8f2b605ce5b3..a05d25cc5545 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -715,8 +715,8 @@ static void unix_release_sock(struct sock *sk, int embrion) if (state == TCP_LISTEN) unix_release_sock(skb->sk, 1); - /* passed fds are erased in the kfree_skb hook */ - kfree_skb(skb); + /* passed fds are erased in the kfree_skb hook */ + kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_CLOSE); } if (path.dentry) -- cgit v1.2.3 From 4d0446b7a214e2aa28c0e914329610731f665ad2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 16 Jan 2025 14:34:36 +0900 Subject: af_unix: Set drop reason in unix_sock_destructor(). unix_sock_destructor() is called as sk->sk_destruct() just before the socket is actually freed. Let's use SKB_DROP_REASON_SOCKET_CLOSE for skb_queue_purge(). Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250116053441.5758-4-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a05d25cc5545..41b99984008a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -640,7 +640,7 @@ static void unix_sock_destructor(struct sock *sk) { struct unix_sock *u = unix_sk(sk); - skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge_reason(&sk->sk_receive_queue, SKB_DROP_REASON_SOCKET_CLOSE); DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); -- cgit v1.2.3 From 533643b091dd6e246d57caf81e6892fa9cbb1cc9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 16 Jan 2025 14:34:38 +0900 Subject: af_unix: Set drop reason in manage_oob(). AF_UNIX SOCK_STREAM socket supports MSG_OOB. When OOB data is sent to a socket, recv() will break at that point. If the next recv() does not have MSG_OOB, the normal data following the OOB data is returned. Then, the OOB skb is dropped. Let's define a new drop reason for that case in manage_oob(). # echo 1 > /sys/kernel/tracing/events/skb/kfree_skb/enable # python3 >>> from socket import * >>> s1, s2 = socketpair(AF_UNIX) >>> s1.send(b'a', MSG_OOB) >>> s1.send(b'b') >>> s2.recv(2) b'b' # cat /sys/kernel/tracing/trace_pipe ... python3-223 ... kfree_skb: ... location=unix_stream_read_generic+0x59e/0xc20 reason: UNIX_SKIP_OOB Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250116053441.5758-6-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 6 ++++++ net/unix/af_unix.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index b9e7ff853ce3..d6c9d841eb11 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -9,6 +9,7 @@ FN(SOCKET_CLOSE) \ FN(SOCKET_FILTER) \ FN(SOCKET_RCVBUFF) \ + FN(UNIX_SKIP_OOB) \ FN(PKT_TOO_SMALL) \ FN(TCP_CSUM) \ FN(UDP_CSUM) \ @@ -145,6 +146,11 @@ enum skb_drop_reason { SKB_DROP_REASON_SOCKET_FILTER, /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */ SKB_DROP_REASON_SOCKET_RCVBUFF, + /** + * @SKB_DROP_REASON_UNIX_SKIP_OOB: Out-Of-Band data is skipped by + * recv() without MSG_OOB so dropped. + */ + SKB_DROP_REASON_UNIX_SKIP_OOB, /** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */ SKB_DROP_REASON_PKT_TOO_SMALL, /** @SKB_DROP_REASON_TCP_CSUM: TCP checksum error */ diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 41b99984008a..e31fda1d319f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2695,7 +2695,7 @@ unlock: spin_unlock(&sk->sk_receive_queue.lock); consume_skb(read_skb); - kfree_skb(unread_skb); + kfree_skb_reason(unread_skb, SKB_DROP_REASON_UNIX_SKIP_OOB); return skb; } -- cgit v1.2.3 From bace4b468049a558295a0f59460fcb51e28f8fde Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 16 Jan 2025 14:34:39 +0900 Subject: af_unix: Set drop reason in unix_stream_read_skb(). unix_stream_read_skb() is called when BPF SOCKMAP reads some data from a socket in the map. SOCKMAP does not support MSG_OOB, and reading OOB results in a drop. Let's set drop reasons respectively. * SOCKET_CLOSE : the socket in SOCKMAP was close()d * UNIX_SKIP_OOB : OOB was read from the socket in SOCKMAP Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250116053441.5758-7-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e31fda1d319f..de4966e1b7ff 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2724,7 +2724,7 @@ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) if (sock_flag(sk, SOCK_DEAD)) { unix_state_unlock(sk); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_CLOSE); return -ECONNRESET; } @@ -2738,7 +2738,7 @@ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) unix_state_unlock(sk); if (drop) { - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_UNIX_SKIP_OOB); return -EAGAIN; } } -- cgit v1.2.3 From b3e365bbf4f47b8f76b25b0fcf3f38916ca53e42 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 16 Jan 2025 14:34:40 +0900 Subject: af_unix: Set drop reason in unix_dgram_disconnected(). unix_dgram_disconnected() is called from two places: 1. when a connect()ed socket dis-connect()s or re-connect()s to another socket 2. when sendmsg() fails because the peer socket that the client has connect()ed to has been close()d Then, the client's recv queue is purged to remove all messages from the old peer socket. Let's define a new drop reason for that case. # echo 1 > /sys/kernel/tracing/events/skb/kfree_skb/enable # python3 >>> from socket import * >>> >>> # s1 has a message from s2 >>> s1, s2 = socketpair(AF_UNIX, SOCK_DGRAM) >>> s2.send(b'hello world') >>> >>> # re-connect() drops the message from s2 >>> s3 = socket(AF_UNIX, SOCK_DGRAM) >>> s3.bind('') >>> s1.connect(s3.getsockname()) # cat /sys/kernel/tracing/trace_pipe python3-250 ... kfree_skb: ... location=skb_queue_purge_reason+0xdc/0x110 reason: UNIX_DISCONNECT Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250116053441.5758-8-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 7 +++++++ net/unix/af_unix.c | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index d6c9d841eb11..32a34dfe8cc5 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -9,6 +9,7 @@ FN(SOCKET_CLOSE) \ FN(SOCKET_FILTER) \ FN(SOCKET_RCVBUFF) \ + FN(UNIX_DISCONNECT) \ FN(UNIX_SKIP_OOB) \ FN(PKT_TOO_SMALL) \ FN(TCP_CSUM) \ @@ -146,6 +147,12 @@ enum skb_drop_reason { SKB_DROP_REASON_SOCKET_FILTER, /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */ SKB_DROP_REASON_SOCKET_RCVBUFF, + /** + * @SKB_DROP_REASON_UNIX_DISCONNECT: recv queue is purged when SOCK_DGRAM + * or SOCK_SEQPACKET socket re-connect()s to another socket or notices + * during send() that the peer has been close()d. + */ + SKB_DROP_REASON_UNIX_DISCONNECT, /** * @SKB_DROP_REASON_UNIX_SKIP_OOB: Out-Of-Band data is skipped by * recv() without MSG_OOB so dropped. diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index de4966e1b7ff..5e1b408c19da 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -622,7 +622,9 @@ static void unix_write_space(struct sock *sk) static void unix_dgram_disconnected(struct sock *sk, struct sock *other) { if (!skb_queue_empty(&sk->sk_receive_queue)) { - skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge_reason(&sk->sk_receive_queue, + SKB_DROP_REASON_UNIX_DISCONNECT); + wake_up_interruptible_all(&unix_sk(sk)->peer_wait); /* If one link of bidirectional dgram pipe is disconnected, -- cgit v1.2.3 From 3b2d40dc13c26a4efde438beb664576d20a9fb4a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 16 Jan 2025 14:34:41 +0900 Subject: af_unix: Reuse out_pipe label in unix_stream_sendmsg(). This is a follow-up of commit d460b04bc452 ("af_unix: Clean up error paths in unix_stream_sendmsg()."). If we initialise skb with NULL in unix_stream_sendmsg(), we can reuse the existing out_pipe label for the SEND_SHUTDOWN check. Let's rename it and adjust the existing label as out_pipe_lock. While at it, size and data_len are moved to the while loop scope. Suggested-by: Paolo Abeni Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250116053441.5758-9-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5e1b408c19da..43a45cf06f2e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2238,13 +2238,11 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; + struct sk_buff *skb = NULL; struct sock *other = NULL; - int err, size; - struct sk_buff *skb; - int sent = 0; struct scm_cookie scm; bool fds_sent = false; - int data_len; + int err, sent = 0; err = scm_send(sock, msg, &scm, false); if (err < 0) @@ -2273,16 +2271,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, } } - if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) { - if (!(msg->msg_flags & MSG_NOSIGNAL)) - send_sig(SIGPIPE, current, 0); - - err = -EPIPE; - goto out_err; - } + if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) + goto out_pipe; while (sent < len) { - size = len - sent; + int size = len - sent; + int data_len; if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) { skb = sock_alloc_send_pskb(sk, 0, 0, @@ -2335,7 +2329,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, if (sock_flag(other, SOCK_DEAD) || (other->sk_shutdown & RCV_SHUTDOWN)) - goto out_pipe; + goto out_pipe_unlock; maybe_add_creds(skb, sock, other); scm_stat_add(other, skb); @@ -2358,8 +2352,9 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, return sent; -out_pipe: +out_pipe_unlock: unix_state_unlock(other); +out_pipe: if (!sent && !(msg->msg_flags & MSG_NOSIGNAL)) send_sig(SIGPIPE, current, 0); err = -EPIPE; -- cgit v1.2.3 From 085e6cba85ca81fbb4ebfc238c934108f0e8467e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 16 Jan 2025 14:34:42 +0900 Subject: af_unix: Use consume_skb() in connect() and sendmsg(). This is based on Donald Hunter's patch. These functions could fail for various reasons, sometimes triggering kfree_skb(). * unix_stream_connect() : connect() * unix_stream_sendmsg() : sendmsg() * queue_oob() : sendmsg(MSG_OOB) * unix_dgram_sendmsg() : sendmsg() Such kfree_skb() is tied to the errno of connect() and sendmsg(), and we need not define skb drop reasons. Let's use consume_skb() not to churn kfree_skb() events. Link: https://lore.kernel.org/netdev/eb30b164-7f86-46bf-a5d3-0f8bda5e9398@redhat.com/ Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250116053441.5758-10-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 43a45cf06f2e..34945de1fb1f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1701,7 +1701,7 @@ out_unlock: unix_state_unlock(other); sock_put(other); out_free_skb: - kfree_skb(skb); + consume_skb(skb); out_free_sk: unix_release_sock(newsk, 0); out: @@ -2172,7 +2172,7 @@ out_unlock: out_sock_put: sock_put(other); out_free: - kfree_skb(skb); + consume_skb(skb); out: scm_destroy(&scm); return err; @@ -2189,7 +2189,7 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other { struct unix_sock *ousk = unix_sk(other); struct sk_buff *skb; - int err = 0; + int err; skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); @@ -2197,25 +2197,22 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other return err; err = unix_scm_to_skb(scm, skb, !fds_sent); - if (err < 0) { - kfree_skb(skb); - return err; - } + if (err < 0) + goto out; + skb_put(skb, 1); err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); - if (err) { - kfree_skb(skb); - return err; - } + if (err) + goto out; unix_state_lock(other); if (sock_flag(other, SOCK_DEAD) || (other->sk_shutdown & RCV_SHUTDOWN)) { unix_state_unlock(other); - kfree_skb(skb); - return -EPIPE; + err = -EPIPE; + goto out; } maybe_add_creds(skb, sock, other); @@ -2230,6 +2227,9 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other unix_state_unlock(other); other->sk_data_ready(other); + return 0; +out: + consume_skb(skb); return err; } #endif @@ -2359,7 +2359,7 @@ out_pipe: send_sig(SIGPIPE, current, 0); err = -EPIPE; out_free: - kfree_skb(skb); + consume_skb(skb); out_err: scm_destroy(&scm); return sent ? : err; -- cgit v1.2.3