From d91ee87d8d85a0808c01787e8b4a6b48f2ba487b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 23 May 2016 14:51:59 -0500 Subject: vfs: Pass data, ns, and ns->userns to mount_ns Today what is normally called data (the mount options) is not passed to fill_super through mount_ns. Pass the mount options and the namespace separately to mount_ns so that filesystems such as proc that have mount options, can use mount_ns. Pass the user namespace to mount_ns so that the standard permission check that verifies the mounter has permissions over the namespace can be performed in mount_ns instead of in each filesystems .mount method. Thus removing the duplication between mqueuefs and proc in terms of permission checks. The extra permission check does not currently affect the rpc_pipefs filesystem and the nfsd filesystem as those filesystems do not currently allow unprivileged mounts. Without unpvileged mounts it is guaranteed that the caller has already passed capable(CAP_SYS_ADMIN) which guarantees extra permission check will pass. Update rpc_pipefs and the nfsd filesystem to ensure that the network namespace reference is always taken in fill_super and always put in kill_sb so that the logic is simpler and so that errors originating inside of fill_super do not cause a network namespace leak. Acked-by: Seth Forshee Signed-off-by: "Eric W. Biederman" --- ipc/mqueue.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'ipc/mqueue.c') diff --git a/ipc/mqueue.c b/ipc/mqueue.c index ade739f67f1d..60d97082f4dc 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -305,7 +305,7 @@ err: static int mqueue_fill_super(struct super_block *sb, void *data, int silent) { struct inode *inode; - struct ipc_namespace *ns = data; + struct ipc_namespace *ns = sb->s_fs_info; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; @@ -326,17 +326,14 @@ static struct dentry *mqueue_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - if (!(flags & MS_KERNMOUNT)) { - struct ipc_namespace *ns = current->nsproxy->ipc_ns; - /* Don't allow mounting unless the caller has CAP_SYS_ADMIN - * over the ipc namespace. - */ - if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) - return ERR_PTR(-EPERM); - - data = ns; + struct ipc_namespace *ns; + if (flags & MS_KERNMOUNT) { + ns = data; + data = NULL; + } else { + ns = current->nsproxy->ipc_ns; } - return mount_ns(fs_type, flags, data, mqueue_fill_super); + return mount_ns(fs_type, flags, data, ns, ns->user_ns, mqueue_fill_super); } static void init_once(void *foo) -- cgit v1.2.3 From 3ee690143c3c99f6c0e83f08ff17556890bc6027 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 May 2016 15:42:21 -0500 Subject: ipc/mqueue: The mqueue filesystem should never contain executables Set SB_I_NOEXEC on mqueuefs to ensure small implementation mistakes do not result in executable on mqueuefs by accident. Acked-by: Seth Forshee Signed-off-by: "Eric W. Biederman" --- ipc/mqueue.c | 1 + 1 file changed, 1 insertion(+) (limited to 'ipc/mqueue.c') diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 60d97082f4dc..5bdd50de7d05 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -307,6 +307,7 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent) struct inode *inode; struct ipc_namespace *ns = sb->s_fs_info; + sb->s_iflags |= SB_I_NOEXEC; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = MQUEUE_MAGIC; -- cgit v1.2.3 From a2982cc922c3068783eb9a1f77a5626a1ec36a1f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 9 Jun 2016 15:34:02 -0500 Subject: vfs: Generalize filesystem nodev handling. Introduce a function may_open_dev that tests MNT_NODEV and a new superblock flab SB_I_NODEV. Use this new function in all of the places where MNT_NODEV was previously tested. Add the new SB_I_NODEV s_iflag to proc, sysfs, and mqueuefs as those filesystems should never support device nodes, and a simple superblock flags makes that very hard to get wrong. With SB_I_NODEV set if any device nodes somehow manage to show up on on a filesystem those device nodes will be unopenable. Acked-by: Seth Forshee Signed-off-by: "Eric W. Biederman" --- fs/block_dev.c | 2 +- fs/kernfs/mount.c | 4 ++-- fs/namei.c | 8 +++++++- fs/proc/inode.c | 4 ++-- include/linux/fs.h | 2 ++ ipc/mqueue.c | 2 +- 6 files changed, 15 insertions(+), 7 deletions(-) (limited to 'ipc/mqueue.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index 71ccab1d22c6..30b8d568203a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1857,7 +1857,7 @@ struct block_device *lookup_bdev(const char *pathname) if (!S_ISBLK(inode->i_mode)) goto fail; error = -EACCES; - if (path.mnt->mnt_flags & MNT_NODEV) + if (!may_open_dev(&path)) goto fail; error = -ENOMEM; bdev = bd_acquire(inode); diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 1443df670260..b3d73ad52b22 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -152,8 +152,8 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic) struct dentry *root; info->sb = sb; - /* Userspace would break if executables appear on sysfs */ - sb->s_iflags |= SB_I_NOEXEC; + /* Userspace would break if executables or devices appear on sysfs */ + sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = magic; diff --git a/fs/namei.c b/fs/namei.c index 6a82fb7e2127..757a32725d92 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2881,6 +2881,12 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, } EXPORT_SYMBOL(vfs_create); +bool may_open_dev(const struct path *path) +{ + return !(path->mnt->mnt_flags & MNT_NODEV) && + !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV); +} + static int may_open(struct path *path, int acc_mode, int flag) { struct dentry *dentry = path->dentry; @@ -2899,7 +2905,7 @@ static int may_open(struct path *path, int acc_mode, int flag) break; case S_IFBLK: case S_IFCHR: - if (path->mnt->mnt_flags & MNT_NODEV) + if (!may_open_dev(path)) return -EACCES; /*FALLTHRU*/ case S_IFIFO: diff --git a/fs/proc/inode.c b/fs/proc/inode.c index f4817efb25a6..a5b2c33745b7 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -466,8 +466,8 @@ int proc_fill_super(struct super_block *s, void *data, int silent) if (!proc_parse_options(data, ns)) return -EINVAL; - /* User space would break if executables appear on proc */ - s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC; + /* User space would break if executables or devices appear on proc */ + s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; s->s_blocksize = 1024; s->s_blocksize_bits = 10; diff --git a/include/linux/fs.h b/include/linux/fs.h index 9eef64f23a75..e05983170d23 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1327,6 +1327,7 @@ struct mm_struct; /* sb->s_iflags */ #define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ #define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */ +#define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */ /* sb->s_iflags to limit user namespace mounts */ #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ @@ -1602,6 +1603,7 @@ extern int vfs_whiteout(struct inode *, struct dentry *); */ extern void inode_init_owner(struct inode *inode, const struct inode *dir, umode_t mode); +extern bool may_open_dev(const struct path *path); /* * VFS FS_IOC_FIEMAP helper definitions. */ diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 5bdd50de7d05..0b13ace266f2 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -307,7 +307,7 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent) struct inode *inode; struct ipc_namespace *ns = sb->s_fs_info; - sb->s_iflags |= SB_I_NOEXEC; + sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = MQUEUE_MAGIC; -- cgit v1.2.3