diff options
Diffstat (limited to 'net/unix/af_unix.c')
-rw-r--r-- | net/unix/af_unix.c | 243 |
1 files changed, 147 insertions, 96 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5d1192ceb139..ba7ced947e51 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -262,6 +262,14 @@ static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) sk_add_node(sk, list); } +static void __unix_set_addr(struct sock *sk, struct unix_address *addr, + unsigned hash) +{ + __unix_remove_socket(sk); + smp_store_release(&unix_sk(sk)->addr, addr); + __unix_insert_socket(&unix_socket_table[hash], sk); +} + static inline void unix_remove_socket(struct sock *sk) { spin_lock(&unix_table_lock); @@ -278,11 +286,11 @@ static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) static struct sock *__unix_find_socket_byname(struct net *net, struct sockaddr_un *sunname, - int len, int type, unsigned int hash) + int len, unsigned int hash) { struct sock *s; - sk_for_each(s, &unix_socket_table[hash ^ type]) { + sk_for_each(s, &unix_socket_table[hash]) { struct unix_sock *u = unix_sk(s); if (!net_eq(sock_net(s), net)) @@ -297,13 +305,12 @@ static struct sock *__unix_find_socket_byname(struct net *net, static inline struct sock *unix_find_socket_byname(struct net *net, struct sockaddr_un *sunname, - int len, int type, - unsigned int hash) + int len, unsigned int hash) { struct sock *s; spin_lock(&unix_table_lock); - s = __unix_find_socket_byname(net, sunname, len, type, hash); + s = __unix_find_socket_byname(net, sunname, len, hash); if (s) sock_hold(s); spin_unlock(&unix_table_lock); @@ -484,7 +491,7 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other) */ if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { other->sk_err = ECONNRESET; - other->sk_error_report(other); + sk_error_report(other); } } } @@ -891,12 +898,12 @@ static int unix_autobind(struct socket *sock) retry: addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); + addr->hash ^= sk->sk_type; spin_lock(&unix_table_lock); ordernum = (ordernum+1)&0xFFFFF; - if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, - addr->hash)) { + if (__unix_find_socket_byname(net, addr->name, addr->len, addr->hash)) { spin_unlock(&unix_table_lock); /* * __unix_find_socket_byname() may take long time if many names @@ -911,11 +918,8 @@ retry: } goto retry; } - addr->hash ^= sk->sk_type; - __unix_remove_socket(sk); - smp_store_release(&u->addr, addr); - __unix_insert_socket(&unix_socket_table[addr->hash], sk); + __unix_set_addr(sk, addr, addr->hash); spin_unlock(&unix_table_lock); err = 0; @@ -960,7 +964,7 @@ static struct sock *unix_find_other(struct net *net, } } else { err = -ECONNREFUSED; - u = unix_find_socket_byname(net, sunname, len, type, hash); + u = unix_find_socket_byname(net, sunname, len, type ^ hash); if (u) { struct dentry *dentry; dentry = unix_sk(u)->path.dentry; @@ -978,125 +982,125 @@ fail: return NULL; } -static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) +static int unix_bind_bsd(struct sock *sk, struct unix_address *addr) { + struct unix_sock *u = unix_sk(sk); + umode_t mode = S_IFSOCK | + (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); + struct user_namespace *ns; // barf... + struct path parent; struct dentry *dentry; - struct path path; - int err = 0; + unsigned int hash; + int err; + /* * Get the parent directory, calculate the hash for last * component. */ - dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); - err = PTR_ERR(dentry); + dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); if (IS_ERR(dentry)) - return err; + return PTR_ERR(dentry); + ns = mnt_user_ns(parent.mnt); /* * All right, let's create it. */ - err = security_path_mknod(&path, dentry, mode, 0); - if (!err) { - err = vfs_mknod(mnt_user_ns(path.mnt), d_inode(path.dentry), - dentry, mode, 0); - if (!err) { - res->mnt = mntget(path.mnt); - res->dentry = dget(dentry); - } - } - done_path_create(&path, dentry); + err = security_path_mknod(&parent, dentry, mode, 0); + if (!err) + err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0); + if (err) + goto out; + err = mutex_lock_interruptible(&u->bindlock); + if (err) + goto out_unlink; + if (u->addr) + goto out_unlock; + + addr->hash = UNIX_HASH_SIZE; + hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); + spin_lock(&unix_table_lock); + u->path.mnt = mntget(parent.mnt); + u->path.dentry = dget(dentry); + __unix_set_addr(sk, addr, hash); + spin_unlock(&unix_table_lock); + mutex_unlock(&u->bindlock); + done_path_create(&parent, dentry); + return 0; + +out_unlock: + mutex_unlock(&u->bindlock); + err = -EINVAL; +out_unlink: + /* failed after successful mknod? unlink what we'd created... */ + vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL); +out: + done_path_create(&parent, dentry); return err; } +static int unix_bind_abstract(struct sock *sk, struct unix_address *addr) +{ + struct unix_sock *u = unix_sk(sk); + int err; + + err = mutex_lock_interruptible(&u->bindlock); + if (err) + return err; + + if (u->addr) { + mutex_unlock(&u->bindlock); + return -EINVAL; + } + + spin_lock(&unix_table_lock); + if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, + addr->hash)) { + spin_unlock(&unix_table_lock); + mutex_unlock(&u->bindlock); + return -EADDRINUSE; + } + __unix_set_addr(sk, addr, addr->hash); + spin_unlock(&unix_table_lock); + mutex_unlock(&u->bindlock); + return 0; +} + static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; - struct net *net = sock_net(sk); - struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; char *sun_path = sunaddr->sun_path; int err; unsigned int hash; struct unix_address *addr; - struct hlist_head *list; - struct path path = { }; - err = -EINVAL; if (addr_len < offsetofend(struct sockaddr_un, sun_family) || sunaddr->sun_family != AF_UNIX) - goto out; + return -EINVAL; - if (addr_len == sizeof(short)) { - err = unix_autobind(sock); - goto out; - } + if (addr_len == sizeof(short)) + return unix_autobind(sock); err = unix_mkname(sunaddr, addr_len, &hash); if (err < 0) - goto out; + return err; addr_len = err; - - if (sun_path[0]) { - umode_t mode = S_IFSOCK | - (SOCK_INODE(sock)->i_mode & ~current_umask()); - err = unix_mknod(sun_path, mode, &path); - if (err) { - if (err == -EEXIST) - err = -EADDRINUSE; - goto out; - } - } - - err = mutex_lock_interruptible(&u->bindlock); - if (err) - goto out_put; - - err = -EINVAL; - if (u->addr) - goto out_up; - - err = -ENOMEM; addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); if (!addr) - goto out_up; + return -ENOMEM; memcpy(addr->name, sunaddr, addr_len); addr->len = addr_len; addr->hash = hash ^ sk->sk_type; refcount_set(&addr->refcnt, 1); - if (sun_path[0]) { - addr->hash = UNIX_HASH_SIZE; - hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); - spin_lock(&unix_table_lock); - u->path = path; - list = &unix_socket_table[hash]; - } else { - spin_lock(&unix_table_lock); - err = -EADDRINUSE; - if (__unix_find_socket_byname(net, sunaddr, addr_len, - sk->sk_type, hash)) { - unix_release_addr(addr); - goto out_unlock; - } - - list = &unix_socket_table[addr->hash]; - } - - err = 0; - __unix_remove_socket(sk); - smp_store_release(&u->addr, addr); - __unix_insert_socket(list, sk); - -out_unlock: - spin_unlock(&unix_table_lock); -out_up: - mutex_unlock(&u->bindlock); -out_put: + if (sun_path[0]) + err = unix_bind_bsd(sk, addr); + else + err = unix_bind_abstract(sk, addr); if (err) - path_put(&path); -out: - return err; + unix_release_addr(addr); + return err == -EEXIST ? -EADDRINUSE : err; } static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) @@ -1393,7 +1397,7 @@ restart: unix_state_unlock(sk); - /* take ten and and send info to listening sock */ + /* take ten and send info to listening sock */ spin_lock(&other->sk_receive_queue.lock); __skb_queue_tail(&other->sk_receive_queue, skb); spin_unlock(&other->sk_receive_queue.lock); @@ -1522,6 +1526,53 @@ out: return err; } +static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) +{ + scm->fp = scm_fp_dup(UNIXCB(skb).fp); + + /* + * Garbage collection of unix sockets starts by selecting a set of + * candidate sockets which have reference only from being in flight + * (total_refs == inflight_refs). This condition is checked once during + * the candidate collection phase, and candidates are marked as such, so + * that non-candidates can later be ignored. While inflight_refs is + * protected by unix_gc_lock, total_refs (file count) is not, hence this + * is an instantaneous decision. + * + * Once a candidate, however, the socket must not be reinstalled into a + * file descriptor while the garbage collection is in progress. + * + * If the above conditions are met, then the directed graph of + * candidates (*) does not change while unix_gc_lock is held. + * + * Any operations that changes the file count through file descriptors + * (dup, close, sendmsg) does not change the graph since candidates are + * not installed in fds. + * + * Dequeing a candidate via recvmsg would install it into an fd, but + * that takes unix_gc_lock to decrement the inflight count, so it's + * serialized with garbage collection. + * + * MSG_PEEK is special in that it does not change the inflight count, + * yet does install the socket into an fd. The following lock/unlock + * pair is to ensure serialization with garbage collection. It must be + * done between incrementing the file count and installing the file into + * an fd. + * + * If garbage collection starts after the barrier provided by the + * lock/unlock, then it will see the elevated refcount and not mark this + * as a candidate. If a garbage collection is already in progress + * before the file count was incremented, then the lock/unlock pair will + * ensure that garbage collection is finished before progressing to + * installing the fd. + * + * (*) A -> B where B is on the queue of A or B is on the queue of C + * which is on the queue of listening socket A. + */ + spin_lock(&unix_gc_lock); + spin_unlock(&unix_gc_lock); +} + static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) { int err = 0; @@ -2171,7 +2222,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, sk_peek_offset_fwd(sk, size); if (UNIXCB(skb).fp) - scm.fp = scm_fp_dup(UNIXCB(skb).fp); + unix_peek_fds(&scm, skb); } err = (flags & MSG_TRUNC) ? skb->len - skip : size; @@ -2414,7 +2465,7 @@ unlock: /* It is questionable, see note in unix_dgram_recvmsg. */ if (UNIXCB(skb).fp) - scm.fp = scm_fp_dup(UNIXCB(skb).fp); + unix_peek_fds(&scm, skb); sk_peek_offset_fwd(sk, chunk); |