From 27eac47b00789522ba00501b0838026e1ecb6f05 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Mon, 17 Jul 2017 11:35:54 +0200 Subject: net/unix: drop obsolete fd-recursion limits All unix sockets now account inflight FDs to the respective sender. This was introduced in: commit 712f4aad406bb1ed67f3f98d04c044191f0ff593 Author: willy tarreau Date: Sun Jan 10 07:54:56 2016 +0100 unix: properly account for FDs passed over unix sockets and further refined in: commit 415e3d3e90ce9e18727e8843ae343eda5a58fad6 Author: Hannes Frederic Sowa Date: Wed Feb 3 02:11:03 2016 +0100 unix: correctly track in-flight fds in sending process user_struct Hence, regardless of the stacking depth of FDs, the total number of inflight FDs is limited, and accounted. There is no known way for a local user to exceed those limits or exploit the accounting. Furthermore, the GC logic is independent of the recursion/stacking depth as well. It solely depends on the total number of inflight FDs, regardless of their layout. Lastly, the current `recursion_level' suffers a TOCTOU race, since it checks and inherits depths only at queue time. If we consider `A<-B' to mean `queue-B-on-A', the following sequence circumvents the recursion level easily: A<-B B<-C C<-D ... Y<-Z resulting in: A<-B<-C<-...<-Z With all of this in mind, lets drop the recursion limit. It has no additional security value, anymore. On the contrary, it randomly confuses message brokers that try to forward file-descriptors, since any sendmsg(2) call can fail spuriously with ETOOMANYREFS if a client maliciously modifies the FD while inflight. Cc: Alban Crequy Cc: Simon McVittie Signed-off-by: David Herrmann Reviewed-by: Tom Gundersen Signed-off-by: David S. Miller --- net/unix/af_unix.c | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7b52a380d710..5c53f22d62e8 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1528,26 +1528,13 @@ static inline bool too_many_unix_fds(struct task_struct *p) return false; } -#define MAX_RECURSION_LEVEL 4 - static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; - unsigned char max_level = 0; if (too_many_unix_fds(current)) return -ETOOMANYREFS; - for (i = scm->fp->count - 1; i >= 0; i--) { - struct sock *sk = unix_get_socket(scm->fp->fp[i]); - - if (sk) - max_level = max(max_level, - unix_sk(sk)->recursion_level); - } - if (unlikely(max_level > MAX_RECURSION_LEVEL)) - return -ETOOMANYREFS; - /* * Need to duplicate file references for the sake of garbage * collection. Otherwise a socket in the fps might become a @@ -1559,7 +1546,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) for (i = scm->fp->count - 1; i >= 0; i--) unix_inflight(scm->fp->user, scm->fp->fp[i]); - return max_level; + return 0; } static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) @@ -1649,7 +1636,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, struct sk_buff *skb; long timeo; struct scm_cookie scm; - int max_level; int data_len = 0; int sk_locked; @@ -1701,7 +1687,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, err = unix_scm_to_skb(&scm, skb, true); if (err < 0) goto out_free; - max_level = err + 1; skb_put(skb, len - data_len); skb->data_len = data_len; @@ -1819,8 +1804,6 @@ restart_locked: __net_timestamp(skb); maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); - if (max_level > unix_sk(other)->recursion_level) - unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other); sock_put(other); @@ -1855,7 +1838,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int sent = 0; struct scm_cookie scm; bool fds_sent = false; - int max_level; int data_len; wait_for_unix_gc(); @@ -1905,7 +1887,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, kfree_skb(skb); goto out_err; } - max_level = err + 1; fds_sent = true; skb_put(skb, size - data_len); @@ -1925,8 +1906,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); - if (max_level > unix_sk(other)->recursion_level) - unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other); sent += size; @@ -2324,7 +2303,6 @@ redo: last_len = last ? last->len : 0; again: if (skb == NULL) { - unix_sk(sk)->recursion_level = 0; if (copied >= target) goto unlock; -- cgit v1.2.3 From a0917e0bc6efc05834c0c1eafebd579a9c75e6e9 Mon Sep 17 00:00:00 2001 From: Matthew Dawson Date: Fri, 18 Aug 2017 15:04:54 -0400 Subject: datagram: When peeking datagrams with offset < 0 don't skip empty skbs Due to commit e6afc8ace6dd5cef5e812f26c72579da8806f5ac ("udp: remove headers from UDP packets before queueing"), when udp packets are being peeked the requested extra offset is always 0 as there is no need to skip the udp header. However, when the offset is 0 and the next skb is of length 0, it is only returned once. The behaviour can be seen with the following python script: from socket import *; f=socket(AF_INET6, SOCK_DGRAM | SOCK_NONBLOCK, 0); g=socket(AF_INET6, SOCK_DGRAM | SOCK_NONBLOCK, 0); f.bind(('::', 0)); addr=('::1', f.getsockname()[1]); g.sendto(b'', addr) g.sendto(b'b', addr) print(f.recvfrom(10, MSG_PEEK)); print(f.recvfrom(10, MSG_PEEK)); Where the expected output should be the empty string twice. Instead, make sk_peek_offset return negative values, and pass those values to __skb_try_recv_datagram/__skb_try_recv_from_queue. If the passed offset to __skb_try_recv_from_queue is negative, the checked skb is never skipped. __skb_try_recv_from_queue will then ensure the offset is reset back to 0 if a peek is requested without an offset, unless no packets are found. Also simplify the if condition in __skb_try_recv_from_queue. If _off is greater then 0, and off is greater then or equal to skb->len, then (_off || skb->len) must always be true assuming skb->len >= 0 is always true. Also remove a redundant check around a call to sk_peek_offset in af_unix.c, as it double checked if MSG_PEEK was set in the flags. V2: - Moved the negative fixup into __skb_try_recv_from_queue, and remove now redundant checks - Fix peeking in udp{,v6}_recvmsg to report the right value when the offset is 0 V3: - Marked new branch in __skb_try_recv_from_queue as unlikely. Signed-off-by: Matthew Dawson Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/sock.h | 4 +--- net/core/datagram.c | 12 +++++++++--- net/ipv4/udp.c | 3 ++- net/ipv6/udp.c | 3 ++- net/unix/af_unix.c | 5 +---- 5 files changed, 15 insertions(+), 12 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/include/net/sock.h b/include/net/sock.h index 7c0632c7e870..aeeec62992ca 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -507,9 +507,7 @@ int sk_set_peek_off(struct sock *sk, int val); static inline int sk_peek_offset(struct sock *sk, int flags) { if (unlikely(flags & MSG_PEEK)) { - s32 off = READ_ONCE(sk->sk_peek_off); - if (off >= 0) - return off; + return READ_ONCE(sk->sk_peek_off); } return 0; diff --git a/net/core/datagram.c b/net/core/datagram.c index ee5647bd91b3..a21ca8dee5ea 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -169,14 +169,20 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, int *peeked, int *off, int *err, struct sk_buff **last) { + bool peek_at_off = false; struct sk_buff *skb; - int _off = *off; + int _off = 0; + + if (unlikely(flags & MSG_PEEK && *off >= 0)) { + peek_at_off = true; + _off = *off; + } *last = queue->prev; skb_queue_walk(queue, skb) { if (flags & MSG_PEEK) { - if (_off >= skb->len && (skb->len || _off || - skb->peeked)) { + if (peek_at_off && _off >= skb->len && + (_off || skb->peeked)) { _off -= skb->len; continue; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a7c804f73990..cd1d044a7fa5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1574,7 +1574,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, return ip_recv_error(sk, msg, len, addr_len); try_again: - peeking = off = sk_peek_offset(sk, flags); + peeking = flags & MSG_PEEK; + off = sk_peek_offset(sk, flags); skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); if (!skb) return err; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 578142b7ca3e..20039c8501eb 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -362,7 +362,8 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: - peeking = off = sk_peek_offset(sk, flags); + peeking = flags & MSG_PEEK; + off = sk_peek_offset(sk, flags); skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); if (!skb) return err; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7b52a380d710..be8982b4f8c0 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2304,10 +2304,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, */ mutex_lock(&u->iolock); - if (flags & MSG_PEEK) - skip = sk_peek_offset(sk, flags); - else - skip = 0; + skip = max(sk_peek_offset(sk, flags), 0); do { int chunk; -- cgit v1.2.3 From 110af3acb8cfd79dcb5676a01e07cb2b6afa4c04 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 20 Oct 2017 12:05:30 -0500 Subject: net: af_unix: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/unix/af_unix.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7f46bab4ce5c..a9ee634f3c42 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -814,6 +814,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol, */ case SOCK_RAW: sock->type = SOCK_DGRAM; + /* fall through */ case SOCK_DGRAM: sock->ops = &unix_dgram_ops; break; -- cgit v1.2.3