From d04bccd8c19d601232ed3e3c9e248c0040167d47 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 7 Jun 2024 16:55:37 +0200 Subject: listmount: allow listing in reverse order util-linux is about to implement listmount() and statmount() support. Karel requested the ability to scan the mount table in backwards order because that's what libmount currently does in order to get the latest mount first. We currently don't support this in listmount(). Add a new LISTMOUNT_REVERSE flag to allow listing mounts in reverse order. For example, listing all child mounts of /sys without LISTMOUNT_REVERSE gives: /sys/kernel/security @ mnt_id: 4294968369 /sys/fs/cgroup @ mnt_id: 4294968370 /sys/firmware/efi/efivars @ mnt_id: 4294968371 /sys/fs/bpf @ mnt_id: 4294968372 /sys/kernel/tracing @ mnt_id: 4294968373 /sys/kernel/debug @ mnt_id: 4294968374 /sys/fs/fuse/connections @ mnt_id: 4294968375 /sys/kernel/config @ mnt_id: 4294968376 whereas with LISTMOUNT_REVERSE it gives: /sys/kernel/config @ mnt_id: 4294968376 /sys/fs/fuse/connections @ mnt_id: 4294968375 /sys/kernel/debug @ mnt_id: 4294968374 /sys/kernel/tracing @ mnt_id: 4294968373 /sys/fs/bpf @ mnt_id: 4294968372 /sys/firmware/efi/efivars @ mnt_id: 4294968371 /sys/fs/cgroup @ mnt_id: 4294968370 /sys/kernel/security @ mnt_id: 4294968369 Link: https://lore.kernel.org/r/20240607-vfs-listmount-reverse-v1-4-7877a2bfa5e5@kernel.org Reviewed-by: Josef Bacik Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index ad5478dbad00..88d78de1519f 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -207,5 +207,6 @@ struct mnt_id_req { * Special @mnt_id values that can be passed to listmount */ #define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ #endif /* _UAPI_LINUX_MOUNT_H */ -- cgit v1.2.3 From 09b31295f833031c88419550172703d45c5401e3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 24 Jun 2024 11:49:47 -0400 Subject: fs: export the mount ns id via statmount In order to allow users to iterate through children mount namespaces via listmount we need a way for them to know what the ns id for the mount. Add a new field to statmount called mnt_ns_id which will carry the ns id for the given mount entry. Co-developed-by: Christian Brauner Signed-off-by: Josef Bacik Link: https://lore.kernel.org/r/6dabf437331fb7415d886f7c64b21cb2a50b1c66.1719243756.git.josef@toxicpanda.com Signed-off-by: Christian Brauner --- fs/namespace.c | 11 +++++++++++ include/uapi/linux/mount.h | 4 +++- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/fs/namespace.c b/fs/namespace.c index eebe9d912a71..ed2d9353e4be 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4974,6 +4974,14 @@ static int statmount_fs_type(struct kstatmount *s, struct seq_file *seq) return 0; } +static void statmount_mnt_ns_id(struct kstatmount *s) +{ + struct mnt_namespace *ns = current->nsproxy->mnt_ns; + + s->sm.mask |= STATMOUNT_MNT_NS_ID; + s->sm.mnt_ns_id = ns->seq; +} + static int statmount_string(struct kstatmount *s, u64 flag) { int ret; @@ -5070,6 +5078,9 @@ static int do_statmount(struct kstatmount *s) if (!err && s->mask & STATMOUNT_MNT_POINT) err = statmount_string(s, STATMOUNT_MNT_POINT); + if (!err && s->mask & STATMOUNT_MNT_NS_ID) + statmount_mnt_ns_id(s); + if (err) return err; diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 88d78de1519f..a07508aee518 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -172,7 +172,8 @@ struct statmount { __u64 propagate_from; /* Propagation from in current namespace */ __u32 mnt_root; /* [str] Root of mount relative to root of fs */ __u32 mnt_point; /* [str] Mountpoint relative to current root */ - __u64 __spare2[50]; + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u64 __spare2[49]; char str[]; /* Variable size part containing strings */ }; @@ -202,6 +203,7 @@ struct mnt_id_req { #define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ #define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ /* * Special @mnt_id values that can be passed to listmount -- cgit v1.2.3 From 0a3deb11858ae8a0b3849b5fda45512ad383f0e1 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 24 Jun 2024 11:49:48 -0400 Subject: fs: Allow listmount() in foreign mount namespace Expand struct mnt_id_req to add an optional mnt_ns_id field. When this field is populated, listmount() will be performed on the specified mount namespace, provided the currently application has CAP_SYS_ADMIN in its user namespace and the mount namespace is a child of the current namespace. Co-developed-by: Josef Bacik Signed-off-by: Josef Bacik Link: https://lore.kernel.org/r/49930bdce29a8367a213eb14c1e68e7e49284f86.1719243756.git.josef@toxicpanda.com Signed-off-by: Christian Brauner --- fs/namespace.c | 88 ++++++++++++++++++++++++++++++++++++---------- include/uapi/linux/mount.h | 2 ++ 2 files changed, 72 insertions(+), 18 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/namespace.c b/fs/namespace.c index ed2d9353e4be..a54d68f822a8 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5122,7 +5122,7 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req, int ret; size_t usize; - BUILD_BUG_ON(sizeof(struct mnt_id_req) != MNT_ID_REQ_SIZE_VER0); + BUILD_BUG_ON(sizeof(struct mnt_id_req) != MNT_ID_REQ_SIZE_VER1); ret = get_user(usize, &req->size); if (ret) @@ -5140,6 +5140,58 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req, return 0; } +static struct mount *listmnt_next(struct mount *curr, bool reverse) +{ + struct rb_node *node; + + if (reverse) + node = rb_prev(&curr->mnt_node); + else + node = rb_next(&curr->mnt_node); + + return node_to_mount(node); +} + +static int grab_requested_root(struct mnt_namespace *ns, struct path *root) +{ + struct mount *first; + + rwsem_assert_held(&namespace_sem); + + /* We're looking at our own ns, just use get_fs_root. */ + if (ns == current->nsproxy->mnt_ns) { + get_fs_root(current->fs, root); + return 0; + } + + /* + * We have to find the first mount in our ns and use that, however it + * may not exist, so handle that properly. + */ + if (RB_EMPTY_ROOT(&ns->mounts)) + return -ENOENT; + + first = listmnt_next(ns->root, false); + if (!first) + return -ENOENT; + root->mnt = mntget(&first->mnt); + root->dentry = dget(root->mnt->mnt_root); + return 0; +} + +/* + * If the user requested a specific mount namespace id, look that up and return + * that, or if not simply grab a passive reference on our mount namespace and + * return that. + */ +static struct mnt_namespace *grab_requested_mnt_ns(u64 mnt_ns_id) +{ + if (mnt_ns_id) + return lookup_mnt_ns(mnt_ns_id); + refcount_inc(¤t->nsproxy->mnt_ns->passive); + return current->nsproxy->mnt_ns; +} + SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req, struct statmount __user *, buf, size_t, bufsize, unsigned int, flags) @@ -5185,30 +5237,21 @@ retry: return ret; } -static struct mount *listmnt_next(struct mount *curr, bool reverse) -{ - struct rb_node *node; - - if (reverse) - node = rb_prev(&curr->mnt_node); - else - node = rb_next(&curr->mnt_node); - - return node_to_mount(node); -} - -static ssize_t do_listmount(u64 mnt_parent_id, u64 last_mnt_id, u64 *mnt_ids, - size_t nr_mnt_ids, bool reverse) +static ssize_t do_listmount(struct mnt_namespace *ns, u64 mnt_parent_id, + u64 last_mnt_id, u64 *mnt_ids, size_t nr_mnt_ids, + bool reverse) { struct path root __free(path_put) = {}; - struct mnt_namespace *ns = current->nsproxy->mnt_ns; struct path orig; struct mount *r, *first; ssize_t ret; rwsem_assert_held(&namespace_sem); - get_fs_root(current->fs, &root); + ret = grab_requested_root(ns, &root); + if (ret) + return ret; + if (mnt_parent_id == LSMT_ROOT) { orig = root; } else { @@ -5260,6 +5303,7 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, { u64 *kmnt_ids __free(kvfree) = NULL; const size_t maxcount = 1000000; + struct mnt_namespace *ns __free(mnt_ns_release) = NULL; struct mnt_id_req kreq; ssize_t ret; @@ -5286,8 +5330,16 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, if (!kmnt_ids) return -ENOMEM; + ns = grab_requested_mnt_ns(kreq.mnt_ns_id); + if (!ns) + return -ENOENT; + + if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) && + !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN)) + return -ENOENT; + scoped_guard(rwsem_read, &namespace_sem) - ret = do_listmount(kreq.mnt_id, kreq.param, kmnt_ids, + ret = do_listmount(ns, kreq.mnt_id, kreq.param, kmnt_ids, nr_mnt_ids, (flags & LISTMOUNT_REVERSE)); if (copy_to_user(mnt_ids, kmnt_ids, ret * sizeof(*mnt_ids))) diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index a07508aee518..ee1559cd6764 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -189,10 +189,12 @@ struct mnt_id_req { __u32 spare; __u64 mnt_id; __u64 param; + __u64 mnt_ns_id; }; /* List of all mnt_id_req versions. */ #define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ /* * @mask bits for statmount(2) -- cgit v1.2.3 From e8e43a1fcc5c07575f37e40f8a2cd78aee46f9a0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 24 Jun 2024 11:49:50 -0400 Subject: fs: add an ioctl to get the mnt ns id from nsfs In order to utilize the listmount() and statmount() extensions that allow us to call them on different namespaces we need a way to get the mnt namespace id from user space. Add an ioctl to nsfs that will allow us to extract the mnt namespace id in order to make these new extensions usable. Signed-off-by: Josef Bacik Link: https://lore.kernel.org/r/180449959d5a756af7306d6bda55f41b9d53e3cb.1719243756.git.josef@toxicpanda.com Signed-off-by: Christian Brauner --- fs/nsfs.c | 14 ++++++++++++++ include/uapi/linux/nsfs.h | 2 ++ 2 files changed, 16 insertions(+) (limited to 'include/uapi/linux') diff --git a/fs/nsfs.c b/fs/nsfs.c index 07e22a15ef02..af352dadffe1 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -12,6 +12,7 @@ #include #include +#include "mount.h" #include "internal.h" static struct vfsmount *nsfs_mnt; @@ -143,6 +144,19 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl, argp = (uid_t __user *) arg; uid = from_kuid_munged(current_user_ns(), user_ns->owner); return put_user(uid, argp); + case NS_GET_MNTNS_ID: { + struct mnt_namespace *mnt_ns; + __u64 __user *idp; + __u64 id; + + if (ns->ops->type != CLONE_NEWNS) + return -EINVAL; + + mnt_ns = container_of(ns, struct mnt_namespace, ns); + idp = (__u64 __user *)arg; + id = mnt_ns->seq; + return put_user(id, idp); + } default: return -ENOTTY; } diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index a0c8552b64ee..56e8b1639b98 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -15,5 +15,7 @@ #define NS_GET_NSTYPE _IO(NSIO, 0x3) /* Get owner UID (in the caller's user namespace) for a user namespace */ #define NS_GET_OWNER_UID _IO(NSIO, 0x4) +/* Get the id for a mount namespace */ +#define NS_GET_MNTNS_ID _IO(NSIO, 0x5) #endif /* __LINUX_NSFS_H */ -- cgit v1.2.3 From f9af549d1fd31487bbbc666b5b158cfc940ccc17 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 24 Jun 2024 15:40:52 -0400 Subject: fs: export mount options via statmount() statmount() can export arbitrary strings, so utilize the __spare1 slot for a mnt_opts string pointer, and then support asking for and setting the mount options during statmount(). This calls into the helper for showing mount options, which already uses a seq_file, so fits in nicely with our existing mechanism for exporting strings via statmount(). Signed-off-by: Josef Bacik Link: https://lore.kernel.org/r/3aa6bf8bd5d0a21df9ebd63813af8ab532c18276.1719257716.git.josef@toxicpanda.com Reviewed-by: Jeff Layton [brauner: only call sb->s_op->show_options()] Signed-off-by: Christian Brauner --- fs/namespace.c | 37 ++++++++++++++++++++++++++++++++++++- include/uapi/linux/mount.h | 3 ++- 2 files changed, 38 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/namespace.c b/fs/namespace.c index a989e89b0a10..c53a0ee748c6 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4980,6 +4980,34 @@ static void statmount_mnt_ns_id(struct kstatmount *s, struct mnt_namespace *ns) s->sm.mnt_ns_id = ns->seq; } +static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq) +{ + struct vfsmount *mnt = s->mnt; + struct super_block *sb = mnt->mnt_sb; + int err; + + if (sb->s_op->show_options) { + size_t start = seq->count; + + err = sb->s_op->show_options(seq, mnt->mnt_root); + if (err) + return err; + + if (unlikely(seq_has_overflowed(seq))) + return -EAGAIN; + + if (seq->count == start) + return 0; + + /* skip leading comma */ + memmove(seq->buf + start, seq->buf + start + 1, + seq->count - start - 1); + seq->count--; + } + + return 0; +} + static int statmount_string(struct kstatmount *s, u64 flag) { int ret; @@ -5000,6 +5028,10 @@ static int statmount_string(struct kstatmount *s, u64 flag) sm->mnt_point = seq->count; ret = statmount_mnt_point(s, seq); break; + case STATMOUNT_MNT_OPTS: + sm->mnt_opts = seq->count; + ret = statmount_mnt_opts(s, seq); + break; default: WARN_ON_ONCE(true); return -EINVAL; @@ -5130,6 +5162,9 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, if (!err && s->mask & STATMOUNT_MNT_POINT) err = statmount_string(s, STATMOUNT_MNT_POINT); + if (!err && s->mask & STATMOUNT_MNT_OPTS) + err = statmount_string(s, STATMOUNT_MNT_OPTS); + if (!err && s->mask & STATMOUNT_MNT_NS_ID) statmount_mnt_ns_id(s, ns); @@ -5151,7 +5186,7 @@ static inline bool retry_statmount(const long ret, size_t *seq_size) } #define STATMOUNT_STRING_REQ (STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | \ - STATMOUNT_FS_TYPE) + STATMOUNT_FS_TYPE | STATMOUNT_MNT_OPTS) static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq, struct statmount __user *buf, size_t bufsize, diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index ee1559cd6764..225bc366ffcb 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -154,7 +154,7 @@ struct mount_attr { */ struct statmount { __u32 size; /* Total size, including strings */ - __u32 __spare1; + __u32 mnt_opts; /* [str] Mount options of the mount */ __u64 mask; /* What results were written */ __u32 sb_dev_major; /* Device ID */ __u32 sb_dev_minor; @@ -206,6 +206,7 @@ struct mnt_id_req { #define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ #define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ /* * Special @mnt_id values that can be passed to listmount -- cgit v1.2.3