From 709894c52c1cafa36fe2047ba5a0b83bdf398133 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Wed, 9 Apr 2025 14:50:58 +0200 Subject: af_unix: Remove unix_unhash() Dummy unix_unhash() was introduced for sockmap in commit 94531cfcbe79 ("af_unix: Add unix_stream_proto for sockmap"), but there's no need to implement it anymore. ->unhash() is only called conditionally: in unix_shutdown() since commit d359902d5c35 ("af_unix: Fix NULL pointer bug in unix_shutdown"), and in BPF proto's sock_map_unhash() since commit 5b4a79ba65a1 ("bpf, sockmap: Don't let sock_map_{close,destroy,unhash} call itself"). Remove it. Signed-off-by: Michal Luczaj Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250409-cleanup-drop-unix-unhash-v1-1-1659e5b8ee84@rbox.co Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f78a2492826f..2ab20821d6bb 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -950,13 +950,6 @@ static void unix_close(struct sock *sk, long timeout) */ } -static void unix_unhash(struct sock *sk) -{ - /* Nothing to do here, unix socket does not need a ->unhash(). - * This is merely for sockmap. - */ -} - static bool unix_bpf_bypass_getsockopt(int level, int optname) { if (level == SOL_SOCKET) { @@ -987,7 +980,6 @@ struct proto unix_stream_proto = { .owner = THIS_MODULE, .obj_size = sizeof(struct unix_sock), .close = unix_close, - .unhash = unix_unhash, .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = unix_stream_bpf_update_proto, -- cgit v1.2.3 From 350d4546295949c8a29e345b340e57772813037a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 19 May 2025 13:57:52 -0700 Subject: af_unix: Factorise test_bit() for SOCK_PASSCRED and SOCK_PASSPIDFD. Currently, the same checks for SOCK_PASSCRED and SOCK_PASSPIDFD are scattered across many places. Let's centralise the bit tests to make the following changes cleaner. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/unix/af_unix.c | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2ab20821d6bb..464e183ffdb8 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -765,6 +765,14 @@ static void copy_peercred(struct sock *sk, struct sock *peersk) spin_unlock(&sk->sk_peer_lock); } +static bool unix_may_passcred(const struct sock *sk) +{ + struct socket *sock = sk->sk_socket; + + return test_bit(SOCK_PASSCRED, &sock->flags) || + test_bit(SOCK_PASSPIDFD, &sock->flags); +} + static int unix_listen(struct socket *sock, int backlog) { int err; @@ -1411,9 +1419,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, if (err) goto out; - if ((test_bit(SOCK_PASSCRED, &sock->flags) || - test_bit(SOCK_PASSPIDFD, &sock->flags)) && - !READ_ONCE(unix_sk(sk)->addr)) { + if (unix_may_passcred(sk) && !READ_ONCE(unix_sk(sk)->addr)) { err = unix_autobind(sk); if (err) goto out; @@ -1531,9 +1537,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, if (err) goto out; - if ((test_bit(SOCK_PASSCRED, &sock->flags) || - test_bit(SOCK_PASSPIDFD, &sock->flags)) && - !READ_ONCE(u->addr)) { + if (unix_may_passcred(sk) && !READ_ONCE(u->addr)) { err = unix_autobind(sk); if (err) goto out; @@ -1877,16 +1881,6 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen return err; } -static bool unix_passcred_enabled(const struct socket *sock, - const struct sock *other) -{ - return test_bit(SOCK_PASSCRED, &sock->flags) || - test_bit(SOCK_PASSPIDFD, &sock->flags) || - !other->sk_socket || - test_bit(SOCK_PASSCRED, &other->sk_socket->flags) || - test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags); -} - /* * Some apps rely on write() giving SCM_CREDENTIALS * We include credentials if source or destination socket @@ -1897,7 +1891,9 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, { if (UNIXCB(skb).pid) return; - if (unix_passcred_enabled(sock, other)) { + + if (unix_may_passcred(sock->sk) || + !other->sk_socket || unix_may_passcred(other)) { UNIXCB(skb).pid = get_pid(task_tgid(current)); current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } @@ -1974,9 +1970,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out; } - if ((test_bit(SOCK_PASSCRED, &sock->flags) || - test_bit(SOCK_PASSPIDFD, &sock->flags)) && - !READ_ONCE(u->addr)) { + if (unix_may_passcred(sk) && !READ_ONCE(u->addr)) { err = unix_autobind(sk); if (err) goto out; @@ -2846,8 +2840,7 @@ unlock: /* Never glue messages from different writers */ if (!unix_skb_scm_eq(skb, &scm)) break; - } else if (test_bit(SOCK_PASSCRED, &sock->flags) || - test_bit(SOCK_PASSPIDFD, &sock->flags)) { + } else if (unix_may_passcred(sk)) { /* Copy credentials */ scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); unix_set_secdata(&scm, skb); -- cgit v1.2.3 From 3041bbbeb41b807d2e24d7d78d9cc1387d95898a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 19 May 2025 13:57:53 -0700 Subject: af_unix: Don't pass struct socket to maybe_add_creds(). We will move SOCK_PASS{CRED,PIDFD,SEC} from struct socket.flags to struct sock for better handling with SOCK_PASSRIGHTS. Then, we don't need to access struct socket in maybe_add_creds(). Let's pass struct sock to maybe_add_creds() and its caller queue_oob(). While at it, we append the unix_ prefix and fix double spaces around the pid assignment. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/unix/af_unix.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 464e183ffdb8..a39497fd6e98 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1869,7 +1869,7 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen { int err = 0; - UNIXCB(skb).pid = get_pid(scm->pid); + UNIXCB(skb).pid = get_pid(scm->pid); UNIXCB(skb).uid = scm->creds.uid; UNIXCB(skb).gid = scm->creds.gid; UNIXCB(skb).fp = NULL; @@ -1886,15 +1886,15 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen * We include credentials if source or destination socket * asserted SOCK_PASSCRED. */ -static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, - const struct sock *other) +static void unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk, + const struct sock *other) { if (UNIXCB(skb).pid) return; - if (unix_may_passcred(sock->sk) || + if (unix_may_passcred(sk) || !other->sk_socket || unix_may_passcred(other)) { - UNIXCB(skb).pid = get_pid(task_tgid(current)); + UNIXCB(skb).pid = get_pid(task_tgid(current)); current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } } @@ -2133,7 +2133,8 @@ restart_locked: if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); - maybe_add_creds(skb, sock, other); + + unix_maybe_add_creds(skb, sk, other); scm_stat_add(other, skb); skb_queue_tail(&other->sk_receive_queue, skb); unix_state_unlock(other); @@ -2161,14 +2162,14 @@ out: #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) #if IS_ENABLED(CONFIG_AF_UNIX_OOB) -static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other, +static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other, struct scm_cookie *scm, bool fds_sent) { struct unix_sock *ousk = unix_sk(other); struct sk_buff *skb; int err; - skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); + skb = sock_alloc_send_skb(sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) return err; @@ -2192,7 +2193,7 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other goto out; } - maybe_add_creds(skb, sock, other); + unix_maybe_add_creds(skb, sk, other); scm_stat_add(other, skb); spin_lock(&other->sk_receive_queue.lock); @@ -2308,7 +2309,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, (other->sk_shutdown & RCV_SHUTDOWN)) goto out_pipe_unlock; - maybe_add_creds(skb, sock, other); + unix_maybe_add_creds(skb, sk, other); scm_stat_add(other, skb); skb_queue_tail(&other->sk_receive_queue, skb); unix_state_unlock(other); @@ -2318,7 +2319,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (msg->msg_flags & MSG_OOB) { - err = queue_oob(sock, msg, other, &scm, fds_sent); + err = queue_oob(sk, msg, other, &scm, fds_sent); if (err) goto out_err; sent++; -- cgit v1.2.3 From 0e81cfd971dc4833c699dcd8924e54a5021bc4e8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 19 May 2025 13:57:57 -0700 Subject: af_unix: Move SOCK_PASS{CRED,PIDFD,SEC} to struct sock. As explained in the next patch, SO_PASSRIGHTS would have a problem if we assigned a corresponding bit to socket->flags, so it must be managed in struct sock. Mixing socket->flags and sk->sk_flags for similar options will look confusing, and sk->sk_flags does not have enough space on 32bit system. Also, as mentioned in commit 16e572626961 ("af_unix: dont send SCM_CREDENTIALS by default"), SOCK_PASSCRED and SOCK_PASSPID handling is known to be slow, and managing the flags in struct socket cannot avoid that for embryo sockets. Let's move SOCK_PASS{CRED,PIDFD,SEC} to struct sock. While at it, other SOCK_XXX flags in net.h are grouped as enum. Note that assign_bit() was atomic, so the writer side is moved down after lock_sock() in setsockopt(), but the bit is only read once in sendmsg() and recvmsg(), so lock_sock() is not needed there. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/net.h | 15 +++++++-------- include/net/sock.h | 16 +++++++++++++++- net/core/scm.c | 29 ++++++++++++++--------------- net/core/sock.c | 44 +++++++++++++++++++++++--------------------- net/unix/af_unix.c | 18 ++---------------- 5 files changed, 61 insertions(+), 61 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/include/linux/net.h b/include/linux/net.h index 0ff950eecc6b..f8418d6e33e0 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -36,14 +36,13 @@ struct net; * in sock->flags, but moved into sk->sk_wq->flags to be RCU protected. * Eventually all flags will be in sk->sk_wq->flags. */ -#define SOCKWQ_ASYNC_NOSPACE 0 -#define SOCKWQ_ASYNC_WAITDATA 1 -#define SOCK_NOSPACE 2 -#define SOCK_PASSCRED 3 -#define SOCK_PASSSEC 4 -#define SOCK_SUPPORT_ZC 5 -#define SOCK_CUSTOM_SOCKOPT 6 -#define SOCK_PASSPIDFD 7 +enum socket_flags { + SOCKWQ_ASYNC_NOSPACE, + SOCKWQ_ASYNC_WAITDATA, + SOCK_NOSPACE, + SOCK_SUPPORT_ZC, + SOCK_CUSTOM_SOCKOPT, +}; #ifndef ARCH_HAS_SOCKET_TYPES /** diff --git a/include/net/sock.h b/include/net/sock.h index 483522377955..d90a71f66ab8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -337,6 +337,11 @@ struct sk_filter; * @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME * @sk_txtime_report_errors: set report errors mode for SO_TXTIME * @sk_txtime_unused: unused txtime flags + * @sk_scm_recv_flags: all flags used by scm_recv() + * @sk_scm_credentials: flagged by SO_PASSCRED to recv SCM_CREDENTIALS + * @sk_scm_security: flagged by SO_PASSSEC to recv SCM_SECURITY + * @sk_scm_pidfd: flagged by SO_PASSPIDFD to recv SCM_PIDFD + * @sk_scm_unused: unused flags for scm_recv() * @ns_tracker: tracker for netns reference * @sk_user_frags: xarray of pages the user is holding a reference on. * @sk_owner: reference to the real owner of the socket that calls @@ -523,7 +528,16 @@ struct sock { #endif int sk_disconnects; - u8 sk_txrehash; + union { + u8 sk_txrehash; + u8 sk_scm_recv_flags; + struct { + u8 sk_scm_credentials : 1, + sk_scm_security : 1, + sk_scm_pidfd : 1, + sk_scm_unused : 5; + }; + }; u8 sk_clockid; u8 sk_txtime_deadline_mode : 1, sk_txtime_report_errors : 1, diff --git a/net/core/scm.c b/net/core/scm.c index 66e02b18c359..0225bd94170f 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -406,12 +406,12 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) EXPORT_SYMBOL(scm_fp_dup); #ifdef CONFIG_SECURITY_NETWORK -static void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm) +static void scm_passec(struct sock *sk, struct msghdr *msg, struct scm_cookie *scm) { struct lsm_context ctx; int err; - if (test_bit(SOCK_PASSSEC, &sock->flags)) { + if (sk->sk_scm_security) { err = security_secid_to_secctx(scm->secid, &ctx); if (err >= 0) { @@ -423,16 +423,16 @@ static void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cooki } } -static bool scm_has_secdata(struct socket *sock) +static bool scm_has_secdata(struct sock *sk) { - return test_bit(SOCK_PASSSEC, &sock->flags); + return sk->sk_scm_security; } #else -static void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm) +static void scm_passec(struct sock *sk, struct msghdr *msg, struct scm_cookie *scm) { } -static bool scm_has_secdata(struct socket *sock) +static bool scm_has_secdata(struct sock *sk) { return false; } @@ -474,20 +474,19 @@ static void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm) fd_install(pidfd, pidfd_file); } -static bool __scm_recv_common(struct socket *sock, struct msghdr *msg, +static bool __scm_recv_common(struct sock *sk, struct msghdr *msg, struct scm_cookie *scm, int flags) { if (!msg->msg_control) { - if (test_bit(SOCK_PASSCRED, &sock->flags) || - test_bit(SOCK_PASSPIDFD, &sock->flags) || - scm->fp || scm_has_secdata(sock)) + if (sk->sk_scm_credentials || sk->sk_scm_pidfd || + scm->fp || scm_has_secdata(sk)) msg->msg_flags |= MSG_CTRUNC; scm_destroy(scm); return false; } - if (test_bit(SOCK_PASSCRED, &sock->flags)) { + if (sk->sk_scm_credentials) { struct user_namespace *current_ns = current_user_ns(); struct ucred ucreds = { .pid = scm->creds.pid, @@ -498,7 +497,7 @@ static bool __scm_recv_common(struct socket *sock, struct msghdr *msg, put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(ucreds), &ucreds); } - scm_passec(sock, msg, scm); + scm_passec(sk, msg, scm); if (scm->fp) scm_detach_fds(msg, scm); @@ -509,7 +508,7 @@ static bool __scm_recv_common(struct socket *sock, struct msghdr *msg, void scm_recv(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm, int flags) { - if (!__scm_recv_common(sock, msg, scm, flags)) + if (!__scm_recv_common(sock->sk, msg, scm, flags)) return; scm_destroy_cred(scm); @@ -519,10 +518,10 @@ EXPORT_SYMBOL(scm_recv); void scm_recv_unix(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm, int flags) { - if (!__scm_recv_common(sock, msg, scm, flags)) + if (!__scm_recv_common(sock->sk, msg, scm, flags)) return; - if (test_bit(SOCK_PASSPIDFD, &sock->flags)) + if (sock->sk->sk_scm_pidfd) scm_pidfd_recv(msg, scm); scm_destroy_cred(scm); diff --git a/net/core/sock.c b/net/core/sock.c index fd5f9d3873c1..381abf8f25b7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1220,24 +1220,6 @@ int sk_setsockopt(struct sock *sk, int level, int optname, return 0; } return -EPERM; - case SO_PASSSEC: - if (!IS_ENABLED(CONFIG_SECURITY_NETWORK) || sk_may_scm_recv(sk)) - return -EOPNOTSUPP; - - assign_bit(SOCK_PASSSEC, &sock->flags, valbool); - return 0; - case SO_PASSCRED: - if (!sk_may_scm_recv(sk)) - return -EOPNOTSUPP; - - assign_bit(SOCK_PASSCRED, &sock->flags, valbool); - return 0; - case SO_PASSPIDFD: - if (!sk_is_unix(sk)) - return -EOPNOTSUPP; - - assign_bit(SOCK_PASSPIDFD, &sock->flags, valbool); - return 0; case SO_TYPE: case SO_PROTOCOL: case SO_DOMAIN: @@ -1568,6 +1550,26 @@ set_sndbuf: sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); break; + case SO_PASSCRED: + if (sk_may_scm_recv(sk)) + sk->sk_scm_credentials = valbool; + else + ret = -EOPNOTSUPP; + break; + + case SO_PASSSEC: + if (IS_ENABLED(CONFIG_SECURITY_NETWORK) && sk_may_scm_recv(sk)) + sk->sk_scm_security = valbool; + else + ret = -EOPNOTSUPP; + break; + + case SO_PASSPIDFD: + if (sk_is_unix(sk)) + sk->sk_scm_pidfd = valbool; + else + ret = -EOPNOTSUPP; + break; case SO_INCOMING_CPU: reuseport_update_incoming_cpu(sk, val); @@ -1867,14 +1869,14 @@ int sk_getsockopt(struct sock *sk, int level, int optname, if (!sk_may_scm_recv(sk)) return -EOPNOTSUPP; - v.val = !!test_bit(SOCK_PASSCRED, &sock->flags); + v.val = sk->sk_scm_credentials; break; case SO_PASSPIDFD: if (!sk_is_unix(sk)) return -EOPNOTSUPP; - v.val = !!test_bit(SOCK_PASSPIDFD, &sock->flags); + v.val = sk->sk_scm_pidfd; break; case SO_PEERCRED: @@ -1974,7 +1976,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname, if (!IS_ENABLED(CONFIG_SECURITY_NETWORK) || !sk_may_scm_recv(sk)) return -EOPNOTSUPP; - v.val = !!test_bit(SOCK_PASSSEC, &sock->flags); + v.val = sk->sk_scm_security; break; case SO_PEERSEC: diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a39497fd6e98..27ebda4cd9b9 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -767,10 +767,7 @@ static void copy_peercred(struct sock *sk, struct sock *peersk) static bool unix_may_passcred(const struct sock *sk) { - struct socket *sock = sk->sk_socket; - - return test_bit(SOCK_PASSCRED, &sock->flags) || - test_bit(SOCK_PASSPIDFD, &sock->flags); + return sk->sk_scm_credentials || sk->sk_scm_pidfd; } static int unix_listen(struct socket *sock, int backlog) @@ -1713,17 +1710,6 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb) return 0; } -static void unix_sock_inherit_flags(const struct socket *old, - struct socket *new) -{ - if (test_bit(SOCK_PASSCRED, &old->flags)) - set_bit(SOCK_PASSCRED, &new->flags); - if (test_bit(SOCK_PASSPIDFD, &old->flags)) - set_bit(SOCK_PASSPIDFD, &new->flags); - if (test_bit(SOCK_PASSSEC, &old->flags)) - set_bit(SOCK_PASSSEC, &new->flags); -} - static int unix_accept(struct socket *sock, struct socket *newsock, struct proto_accept_arg *arg) { @@ -1760,7 +1746,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, unix_state_lock(tsk); unix_update_edges(unix_sk(tsk)); newsock->state = SS_CONNECTED; - unix_sock_inherit_flags(sock, newsock); + tsk->sk_scm_recv_flags = READ_ONCE(sk->sk_scm_recv_flags); sock_graft(tsk, newsock); unix_state_unlock(tsk); return 0; -- cgit v1.2.3 From 3f84d577b79d2fce8221244f2509734940609ca6 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 19 May 2025 13:57:58 -0700 Subject: af_unix: Inherit sk_flags at connect(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For SOCK_STREAM embryo sockets, the SO_PASS{CRED,PIDFD,SEC} options are inherited from the parent listen()ing socket. Currently, this inheritance happens at accept(), because these attributes were stored in sk->sk_socket->flags and the struct socket is not allocated until accept(). This leads to unintentional behaviour. When a peer sends data to an embryo socket in the accept() queue, unix_maybe_add_creds() embeds credentials into the skb, even if neither the peer nor the listener has enabled these options. If the option is enabled, the embryo socket receives the ancillary data after accept(). If not, the data is silently discarded. This conservative approach works for SO_PASS{CRED,PIDFD,SEC}, but would not for SO_PASSRIGHTS; once an SCM_RIGHTS with a hung file descriptor was sent, it'd be game over. To avoid this, we will need to preserve SOCK_PASSRIGHTS even on embryo sockets. Commit aed6ecef55d7 ("af_unix: Save listener for embryo socket.") made it possible to access the parent's flags in sendmsg() via unix_sk(other)->listener->sk->sk_socket->flags, but this introduces an unnecessary condition that is irrelevant for most sockets, accept()ed sockets and clients. Therefore, we moved SOCK_PASSXXX into struct sock. Let’s inherit sk->sk_scm_recv_flags at connect() to avoid receiving SCM_RIGHTS on embryo sockets created from a parent with SO_PASSRIGHTS=0. Note that the parent socket is locked in connect() so we don't need READ_ONCE() for sk_scm_recv_flags. Now, we can remove !other->sk_socket check in unix_maybe_add_creds() to avoid slow SOCK_PASS{CRED,PIDFD} handling for embryo sockets created from a parent with SO_PASS{CRED,PIDFD}=0. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/unix/af_unix.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 27ebda4cd9b9..900bad88fbd2 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1626,10 +1626,12 @@ restart: /* The way is open! Fastly set all the necessary fields... */ sock_hold(sk); - unix_peer(newsk) = sk; - newsk->sk_state = TCP_ESTABLISHED; - newsk->sk_type = sk->sk_type; + unix_peer(newsk) = sk; + newsk->sk_state = TCP_ESTABLISHED; + newsk->sk_type = sk->sk_type; + newsk->sk_scm_recv_flags = other->sk_scm_recv_flags; init_peercred(newsk); + newu = unix_sk(newsk); newu->listener = other; RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); @@ -1746,7 +1748,6 @@ static int unix_accept(struct socket *sock, struct socket *newsock, unix_state_lock(tsk); unix_update_edges(unix_sk(tsk)); newsock->state = SS_CONNECTED; - tsk->sk_scm_recv_flags = READ_ONCE(sk->sk_scm_recv_flags); sock_graft(tsk, newsock); unix_state_unlock(tsk); return 0; @@ -1878,8 +1879,7 @@ static void unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk, if (UNIXCB(skb).pid) return; - if (unix_may_passcred(sk) || - !other->sk_socket || unix_may_passcred(other)) { + if (unix_may_passcred(sk) || unix_may_passcred(other)) { UNIXCB(skb).pid = get_pid(task_tgid(current)); current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } -- cgit v1.2.3 From 77cbe1a6d8730a07f99f9263c2d5f2304cf5e830 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 19 May 2025 13:57:59 -0700 Subject: af_unix: Introduce SO_PASSRIGHTS. As long as recvmsg() or recvmmsg() is used with cmsg, it is not possible to avoid receiving file descriptors via SCM_RIGHTS. This behaviour has occasionally been flagged as problematic, as it can be (ab)used to trigger DoS during close(), for example, by passing a FUSE-controlled fd or a hung NFS fd. For instance, as noted on the uAPI Group page [0], an untrusted peer could send a file descriptor pointing to a hung NFS mount and then close it. Once the receiver calls recvmsg() with msg_control, the descriptor is automatically installed, and then the responsibility for the final close() now falls on the receiver, which may result in blocking the process for a long time. Regarding this, systemd calls cmsg_close_all() [1] after each recvmsg() to close() unwanted file descriptors sent via SCM_RIGHTS. However, this cannot work around the issue at all, because the final fput() may still occur on the receiver's side once sendmsg() with SCM_RIGHTS succeeds. Also, even filtering by LSM at recvmsg() does not work for the same reason. Thus, we need a better way to refuse SCM_RIGHTS at sendmsg(). Let's introduce SO_PASSRIGHTS to disable SCM_RIGHTS. Note that this option is enabled by default for backward compatibility. Link: https://uapi-group.org/kernel-features/#disabling-reception-of-scm_rights-for-af_unix-sockets #[0] Link: https://github.com/systemd/systemd/blob/v257.5/src/basic/fd-util.c#L612-L628 #[1] Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 2 ++ arch/mips/include/uapi/asm/socket.h | 2 ++ arch/parisc/include/uapi/asm/socket.h | 2 ++ arch/sparc/include/uapi/asm/socket.h | 2 ++ include/net/sock.h | 4 +++- include/uapi/asm-generic/socket.h | 2 ++ net/core/sock.c | 14 ++++++++++++++ net/unix/af_unix.c | 22 ++++++++++++++++++++-- tools/include/uapi/asm-generic/socket.h | 2 ++ 9 files changed, 49 insertions(+), 3 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 3df5f2dd4c0f..8f1f18adcdb5 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -150,6 +150,8 @@ #define SO_RCVPRIORITY 82 +#define SO_PASSRIGHTS 83 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 22fa8f19924a..31ac655b7837 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -161,6 +161,8 @@ #define SO_RCVPRIORITY 82 +#define SO_PASSRIGHTS 83 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 96831c988606..1f2d5b7a7f5d 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -142,6 +142,8 @@ #define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF #define SO_DEVMEM_DONTNEED 0x4050 +#define SO_PASSRIGHTS 0x4051 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 5b464a568664..adcba7329386 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -143,6 +143,8 @@ #define SO_RCVPRIORITY 0x005b +#define SO_PASSRIGHTS 0x005c + #if !defined(__KERNEL__) diff --git a/include/net/sock.h b/include/net/sock.h index d90a71f66ab8..92e7c1aae3cc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -341,6 +341,7 @@ struct sk_filter; * @sk_scm_credentials: flagged by SO_PASSCRED to recv SCM_CREDENTIALS * @sk_scm_security: flagged by SO_PASSSEC to recv SCM_SECURITY * @sk_scm_pidfd: flagged by SO_PASSPIDFD to recv SCM_PIDFD + * @sk_scm_rights: flagged by SO_PASSRIGHTS to recv SCM_RIGHTS * @sk_scm_unused: unused flags for scm_recv() * @ns_tracker: tracker for netns reference * @sk_user_frags: xarray of pages the user is holding a reference on. @@ -535,7 +536,8 @@ struct sock { u8 sk_scm_credentials : 1, sk_scm_security : 1, sk_scm_pidfd : 1, - sk_scm_unused : 5; + sk_scm_rights : 1, + sk_scm_unused : 4; }; }; u8 sk_clockid; diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index aa5016ff3d91..f333a0ac4ee4 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -145,6 +145,8 @@ #define SO_RCVPRIORITY 82 +#define SO_PASSRIGHTS 83 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/net/core/sock.c b/net/core/sock.c index 381abf8f25b7..0cb52e590094 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1571,6 +1571,13 @@ set_sndbuf: ret = -EOPNOTSUPP; break; + case SO_PASSRIGHTS: + if (sk_is_unix(sk)) + sk->sk_scm_rights = valbool; + else + ret = -EOPNOTSUPP; + break; + case SO_INCOMING_CPU: reuseport_update_incoming_cpu(sk, val); break; @@ -1879,6 +1886,13 @@ int sk_getsockopt(struct sock *sk, int level, int optname, v.val = sk->sk_scm_pidfd; break; + case SO_PASSRIGHTS: + if (!sk_is_unix(sk)) + return -EOPNOTSUPP; + + v.val = sk->sk_scm_rights; + break; + case SO_PEERCRED: { struct ucred peercred; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 900bad88fbd2..bd507f74e35e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1015,6 +1015,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, sock_init_data(sock, sk); + sk->sk_scm_rights = 1; sk->sk_hash = unix_unbound_hash(sk); sk->sk_allocation = GFP_KERNEL_ACCOUNT; sk->sk_write_space = unix_write_space; @@ -2073,6 +2074,11 @@ restart_locked: goto out_unlock; } + if (UNIXCB(skb).fp && !other->sk_scm_rights) { + err = -EPERM; + goto out_unlock; + } + if (sk->sk_type != SOCK_SEQPACKET) { err = security_unix_may_send(sk->sk_socket, other->sk_socket); if (err) @@ -2174,9 +2180,13 @@ static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other, if (sock_flag(other, SOCK_DEAD) || (other->sk_shutdown & RCV_SHUTDOWN)) { - unix_state_unlock(other); err = -EPIPE; - goto out; + goto out_unlock; + } + + if (UNIXCB(skb).fp && !other->sk_scm_rights) { + err = -EPERM; + goto out_unlock; } unix_maybe_add_creds(skb, sk, other); @@ -2192,6 +2202,8 @@ static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other, other->sk_data_ready(other); return 0; +out_unlock: + unix_state_unlock(other); out: consume_skb(skb); return err; @@ -2295,6 +2307,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, (other->sk_shutdown & RCV_SHUTDOWN)) goto out_pipe_unlock; + if (UNIXCB(skb).fp && !other->sk_scm_rights) { + unix_state_unlock(other); + err = -EPERM; + goto out_free; + } + unix_maybe_add_creds(skb, sk, other); scm_stat_add(other, skb); skb_queue_tail(&other->sk_receive_queue, skb); diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h index aa5016ff3d91..f333a0ac4ee4 100644 --- a/tools/include/uapi/asm-generic/socket.h +++ b/tools/include/uapi/asm-generic/socket.h @@ -145,6 +145,8 @@ #define SO_RCVPRIORITY 82 +#define SO_PASSRIGHTS 83 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) -- cgit v1.2.3