summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-09-29 21:20:29 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2025-09-29 21:20:29 +0300
commit18b19abc3709b109676ffd1f48dcd332c2e477d4 (patch)
treed0d51d0a3d6f99e6082b42cf6d1ca710a46b6b49 /include
parent5484a4ea7a1f208b886b58dd55cc55f418930f8a (diff)
parent6e65f4e8fc5b02f7a60ebb5b1b83772df0b86663 (diff)
downloadlinux-18b19abc3709b109676ffd1f48dcd332c2e477d4.tar.xz
Merge tag 'namespace-6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull namespace updates from Christian Brauner: "This contains a larger set of changes around the generic namespace infrastructure of the kernel. Each specific namespace type (net, cgroup, mnt, ...) embedds a struct ns_common which carries the reference count of the namespace and so on. We open-coded and cargo-culted so many quirks for each namespace type that it just wasn't scalable anymore. So given there's a bunch of new changes coming in that area I've started cleaning all of this up. The core change is to make it possible to correctly initialize every namespace uniformly and derive the correct initialization settings from the type of the namespace such as namespace operations, namespace type and so on. This leaves the new ns_common_init() function with a single parameter which is the specific namespace type which derives the correct parameters statically. This also means the compiler will yell as soon as someone does something remotely fishy. The ns_common_init() addition also allows us to remove ns_alloc_inum() and drops any special-casing of the initial network namespace in the network namespace initialization code that Linus complained about. Another part is reworking the reference counting. The reference counting was open-coded and copy-pasted for each namespace type even though they all followed the same rules. This also removes all open accesses to the reference count and makes it private and only uses a very small set of dedicated helpers to manipulate them just like we do for e.g., files. In addition this generalizes the mount namespace iteration infrastructure introduced a few cycles ago. As reminder, the vfs makes it possible to iterate sequentially and bidirectionally through all mount namespaces on the system or all mount namespaces that the caller holds privilege over. This allow userspace to iterate over all mounts in all mount namespaces using the listmount() and statmount() system call. Each mount namespace has a unique identifier for the lifetime of the systems that is exposed to userspace. The network namespace also has a unique identifier working exactly the same way. This extends the concept to all other namespace types. The new nstree type makes it possible to lookup namespaces purely by their identifier and to walk the namespace list sequentially and bidirectionally for all namespace types, allowing userspace to iterate through all namespaces. Looking up namespaces in the namespace tree works completely locklessly. This also means we can move the mount namespace onto the generic infrastructure and remove a bunch of code and members from struct mnt_namespace itself. There's a bunch of stuff coming on top of this in the future but for now this uses the generic namespace tree to extend a concept introduced first for pidfs a few cycles ago. For a while now we have supported pidfs file handles for pidfds. This has proven to be very useful. This extends the concept to cover namespaces as well. It is possible to encode and decode namespace file handles using the common name_to_handle_at() and open_by_handle_at() apis. As with pidfs file handles, namespace file handles are exhaustive, meaning it is not required to actually hold a reference to nsfs in able to decode aka open_by_handle_at() a namespace file handle. Instead the FD_NSFS_ROOT constant can be passed which will let the kernel grab a reference to the root of nsfs internally and thus decode the file handle. Namespaces file descriptors can already be derived from pidfds which means they aren't subject to overmount protection bugs. IOW, it's irrelevant if the caller would not have access to an appropriate /proc/<pid>/ns/ directory as they could always just derive the namespace based on a pidfd already. It has the same advantage as pidfds. It's possible to reliably and for the lifetime of the system refer to a namespace without pinning any resources and to compare them trivially. Permission checking is kept simple. If the caller is located in the namespace the file handle refers to they are able to open it otherwise they must hold privilege over the owning namespace of the relevant namespace. The namespace file handle layout is exposed as uapi and has a stable and extensible format. For now it simply contains the namespace identifier, the namespace type, and the inode number. The stable format means that userspace may construct its own namespace file handles without going through name_to_handle_at() as they are already allowed for pidfs and cgroup file handles" * tag 'namespace-6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (65 commits) ns: drop assert ns: move ns type into struct ns_common nstree: make struct ns_tree private ns: add ns_debug() ns: simplify ns_common_init() further cgroup: add missing ns_common include ns: use inode initializer for initial namespaces selftests/namespaces: verify initial namespace inode numbers ns: rename to __ns_ref nsfs: port to ns_ref_*() helpers net: port to ns_ref_*() helpers uts: port to ns_ref_*() helpers ipv4: use check_net() net: use check_net() net-sysfs: use check_net() user: port to ns_ref_*() helpers time: port to ns_ref_*() helpers pid: port to ns_ref_*() helpers ipc: port to ns_ref_*() helpers cgroup: port to ns_ref_*() helpers ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/cgroup.h47
-rw-r--r--include/linux/cgroup_namespace.h58
-rw-r--r--include/linux/exportfs.h6
-rw-r--r--include/linux/fs.h14
-rw-r--r--include/linux/ipc_namespace.h9
-rw-r--r--include/linux/mnt_namespace.h2
-rw-r--r--include/linux/ns_common.h139
-rw-r--r--include/linux/nsfs.h40
-rw-r--r--include/linux/nsproxy.h11
-rw-r--r--include/linux/nstree.h78
-rw-r--r--include/linux/pid_namespace.h7
-rw-r--r--include/linux/proc_ns.h22
-rw-r--r--include/linux/time_namespace.h13
-rw-r--r--include/linux/user_namespace.h9
-rw-r--r--include/linux/uts_namespace.h65
-rw-r--r--include/linux/utsname.h53
-rw-r--r--include/net/net_namespace.h13
-rw-r--r--include/uapi/linux/fcntl.h1
-rw-r--r--include/uapi/linux/nsfs.h18
19 files changed, 461 insertions, 144 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 56d9556a181a..bab98357960d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -27,6 +27,7 @@
#include <linux/kernel_stat.h>
#include <linux/cgroup-defs.h>
+#include <linux/cgroup_namespace.h>
struct kernel_clone_args;
@@ -783,52 +784,6 @@ static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {}
#endif /* CONFIG_CGROUP_DATA */
-struct cgroup_namespace {
- struct ns_common ns;
- struct user_namespace *user_ns;
- struct ucounts *ucounts;
- struct css_set *root_cset;
-};
-
-extern struct cgroup_namespace init_cgroup_ns;
-
-#ifdef CONFIG_CGROUPS
-
-void free_cgroup_ns(struct cgroup_namespace *ns);
-
-struct cgroup_namespace *copy_cgroup_ns(u64 flags,
- struct user_namespace *user_ns,
- struct cgroup_namespace *old_ns);
-
-int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
- struct cgroup_namespace *ns);
-
-static inline void get_cgroup_ns(struct cgroup_namespace *ns)
-{
- refcount_inc(&ns->ns.count);
-}
-
-static inline void put_cgroup_ns(struct cgroup_namespace *ns)
-{
- if (refcount_dec_and_test(&ns->ns.count))
- free_cgroup_ns(ns);
-}
-
-#else /* !CONFIG_CGROUPS */
-
-static inline void free_cgroup_ns(struct cgroup_namespace *ns) { }
-static inline struct cgroup_namespace *
-copy_cgroup_ns(u64 flags, struct user_namespace *user_ns,
- struct cgroup_namespace *old_ns)
-{
- return old_ns;
-}
-
-static inline void get_cgroup_ns(struct cgroup_namespace *ns) { }
-static inline void put_cgroup_ns(struct cgroup_namespace *ns) { }
-
-#endif /* !CONFIG_CGROUPS */
-
#ifdef CONFIG_CGROUPS
void cgroup_enter_frozen(void);
diff --git a/include/linux/cgroup_namespace.h b/include/linux/cgroup_namespace.h
new file mode 100644
index 000000000000..78a8418558a4
--- /dev/null
+++ b/include/linux/cgroup_namespace.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CGROUP_NAMESPACE_H
+#define _LINUX_CGROUP_NAMESPACE_H
+
+#include <linux/ns_common.h>
+
+struct cgroup_namespace {
+ struct ns_common ns;
+ struct user_namespace *user_ns;
+ struct ucounts *ucounts;
+ struct css_set *root_cset;
+};
+
+extern struct cgroup_namespace init_cgroup_ns;
+
+#ifdef CONFIG_CGROUPS
+
+static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct cgroup_namespace, ns);
+}
+
+void free_cgroup_ns(struct cgroup_namespace *ns);
+
+struct cgroup_namespace *copy_cgroup_ns(u64 flags,
+ struct user_namespace *user_ns,
+ struct cgroup_namespace *old_ns);
+
+int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
+ struct cgroup_namespace *ns);
+
+static inline void get_cgroup_ns(struct cgroup_namespace *ns)
+{
+ ns_ref_inc(ns);
+}
+
+static inline void put_cgroup_ns(struct cgroup_namespace *ns)
+{
+ if (ns_ref_put(ns))
+ free_cgroup_ns(ns);
+}
+
+#else /* !CONFIG_CGROUPS */
+
+static inline void free_cgroup_ns(struct cgroup_namespace *ns) { }
+static inline struct cgroup_namespace *
+copy_cgroup_ns(u64 flags, struct user_namespace *user_ns,
+ struct cgroup_namespace *old_ns)
+{
+ return old_ns;
+}
+
+static inline void get_cgroup_ns(struct cgroup_namespace *ns) { }
+static inline void put_cgroup_ns(struct cgroup_namespace *ns) { }
+
+#endif /* !CONFIG_CGROUPS */
+
+#endif /* _LINUX_CGROUP_NAMESPACE_H */
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index cfb0dd1ea49c..3aac58a520c7 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -123,6 +123,12 @@ enum fid_type {
FILEID_BCACHEFS_WITH_PARENT = 0xb2,
/*
+ *
+ * 64 bit namespace identifier, 32 bit namespace type, 32 bit inode number.
+ */
+ FILEID_NSFS = 0xf1,
+
+ /*
* 64 bit unique kernfs id
*/
FILEID_KERNFS = 0xfe,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a854f2910cd9..5540836f674b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -4018,4 +4018,18 @@ static inline bool vfs_empty_path(int dfd, const char __user *path)
int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter);
+static inline bool extensible_ioctl_valid(unsigned int cmd_a,
+ unsigned int cmd_b, size_t min_size)
+{
+ if (_IOC_DIR(cmd_a) != _IOC_DIR(cmd_b))
+ return false;
+ if (_IOC_TYPE(cmd_a) != _IOC_TYPE(cmd_b))
+ return false;
+ if (_IOC_NR(cmd_a) != _IOC_NR(cmd_b))
+ return false;
+ if (_IOC_SIZE(cmd_a) < min_size)
+ return false;
+ return true;
+}
+
#endif /* _LINUX_FS_H */
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 4b399893e2b3..12faca29bbb9 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -129,20 +129,25 @@ static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
#endif
#if defined(CONFIG_IPC_NS)
+static inline struct ipc_namespace *to_ipc_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct ipc_namespace, ns);
+}
+
extern struct ipc_namespace *copy_ipcs(u64 flags,
struct user_namespace *user_ns, struct ipc_namespace *ns);
static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
{
if (ns)
- refcount_inc(&ns->ns.count);
+ ns_ref_inc(ns);
return ns;
}
static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
{
if (ns) {
- if (refcount_inc_not_zero(&ns->ns.count))
+ if (ns_ref_get(ns))
return ns;
}
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index ff290c87b2e7..0acd1089d149 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -11,6 +11,8 @@ struct fs_struct;
struct user_namespace;
struct ns_common;
+extern struct mnt_namespace init_mnt_ns;
+
extern struct mnt_namespace *copy_mnt_ns(u64, struct mnt_namespace *,
struct user_namespace *, struct fs_struct *);
extern void put_mnt_ns(struct mnt_namespace *ns);
diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 7d22ea50b098..f5b68b8abb54 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -3,14 +3,151 @@
#define _LINUX_NS_COMMON_H
#include <linux/refcount.h>
+#include <linux/rbtree.h>
+#include <uapi/linux/sched.h>
struct proc_ns_operations;
+struct cgroup_namespace;
+struct ipc_namespace;
+struct mnt_namespace;
+struct net;
+struct pid_namespace;
+struct time_namespace;
+struct user_namespace;
+struct uts_namespace;
+
+extern struct cgroup_namespace init_cgroup_ns;
+extern struct ipc_namespace init_ipc_ns;
+extern struct mnt_namespace init_mnt_ns;
+extern struct net init_net;
+extern struct pid_namespace init_pid_ns;
+extern struct time_namespace init_time_ns;
+extern struct user_namespace init_user_ns;
+extern struct uts_namespace init_uts_ns;
+
+extern const struct proc_ns_operations netns_operations;
+extern const struct proc_ns_operations utsns_operations;
+extern const struct proc_ns_operations ipcns_operations;
+extern const struct proc_ns_operations pidns_operations;
+extern const struct proc_ns_operations pidns_for_children_operations;
+extern const struct proc_ns_operations userns_operations;
+extern const struct proc_ns_operations mntns_operations;
+extern const struct proc_ns_operations cgroupns_operations;
+extern const struct proc_ns_operations timens_operations;
+extern const struct proc_ns_operations timens_for_children_operations;
+
struct ns_common {
+ u32 ns_type;
struct dentry *stashed;
const struct proc_ns_operations *ops;
unsigned int inum;
- refcount_t count;
+ refcount_t __ns_ref; /* do not use directly */
+ union {
+ struct {
+ u64 ns_id;
+ struct rb_node ns_tree_node;
+ struct list_head ns_list_node;
+ };
+ struct rcu_head ns_rcu;
+ };
};
+int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_operations *ops, int inum);
+void __ns_common_free(struct ns_common *ns);
+
+#define to_ns_common(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: &(__ns)->ns, \
+ const struct cgroup_namespace *: &(__ns)->ns, \
+ struct ipc_namespace *: &(__ns)->ns, \
+ const struct ipc_namespace *: &(__ns)->ns, \
+ struct mnt_namespace *: &(__ns)->ns, \
+ const struct mnt_namespace *: &(__ns)->ns, \
+ struct net *: &(__ns)->ns, \
+ const struct net *: &(__ns)->ns, \
+ struct pid_namespace *: &(__ns)->ns, \
+ const struct pid_namespace *: &(__ns)->ns, \
+ struct time_namespace *: &(__ns)->ns, \
+ const struct time_namespace *: &(__ns)->ns, \
+ struct user_namespace *: &(__ns)->ns, \
+ const struct user_namespace *: &(__ns)->ns, \
+ struct uts_namespace *: &(__ns)->ns, \
+ const struct uts_namespace *: &(__ns)->ns)
+
+#define ns_init_inum(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: CGROUP_NS_INIT_INO, \
+ struct ipc_namespace *: IPC_NS_INIT_INO, \
+ struct mnt_namespace *: MNT_NS_INIT_INO, \
+ struct net *: NET_NS_INIT_INO, \
+ struct pid_namespace *: PID_NS_INIT_INO, \
+ struct time_namespace *: TIME_NS_INIT_INO, \
+ struct user_namespace *: USER_NS_INIT_INO, \
+ struct uts_namespace *: UTS_NS_INIT_INO)
+
+#define ns_init_ns(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: &init_cgroup_ns, \
+ struct ipc_namespace *: &init_ipc_ns, \
+ struct mnt_namespace *: &init_mnt_ns, \
+ struct net *: &init_net, \
+ struct pid_namespace *: &init_pid_ns, \
+ struct time_namespace *: &init_time_ns, \
+ struct user_namespace *: &init_user_ns, \
+ struct uts_namespace *: &init_uts_ns)
+
+#define to_ns_operations(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: (IS_ENABLED(CONFIG_CGROUPS) ? &cgroupns_operations : NULL), \
+ struct ipc_namespace *: (IS_ENABLED(CONFIG_IPC_NS) ? &ipcns_operations : NULL), \
+ struct mnt_namespace *: &mntns_operations, \
+ struct net *: (IS_ENABLED(CONFIG_NET_NS) ? &netns_operations : NULL), \
+ struct pid_namespace *: (IS_ENABLED(CONFIG_PID_NS) ? &pidns_operations : NULL), \
+ struct time_namespace *: (IS_ENABLED(CONFIG_TIME_NS) ? &timens_operations : NULL), \
+ struct user_namespace *: (IS_ENABLED(CONFIG_USER_NS) ? &userns_operations : NULL), \
+ struct uts_namespace *: (IS_ENABLED(CONFIG_UTS_NS) ? &utsns_operations : NULL))
+
+#define ns_common_type(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: CLONE_NEWCGROUP, \
+ struct ipc_namespace *: CLONE_NEWIPC, \
+ struct mnt_namespace *: CLONE_NEWNS, \
+ struct net *: CLONE_NEWNET, \
+ struct pid_namespace *: CLONE_NEWPID, \
+ struct time_namespace *: CLONE_NEWTIME, \
+ struct user_namespace *: CLONE_NEWUSER, \
+ struct uts_namespace *: CLONE_NEWUTS)
+
+#define ns_common_init(__ns) \
+ __ns_common_init(to_ns_common(__ns), \
+ ns_common_type(__ns), \
+ to_ns_operations(__ns), \
+ (((__ns) == ns_init_ns(__ns)) ? ns_init_inum(__ns) : 0))
+
+#define ns_common_init_inum(__ns, __inum) \
+ __ns_common_init(to_ns_common(__ns), \
+ ns_common_type(__ns), \
+ to_ns_operations(__ns), \
+ __inum)
+
+#define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns)))
+
+static __always_inline __must_check bool __ns_ref_put(struct ns_common *ns)
+{
+ return refcount_dec_and_test(&ns->__ns_ref);
+}
+
+static __always_inline __must_check bool __ns_ref_get(struct ns_common *ns)
+{
+ return refcount_inc_not_zero(&ns->__ns_ref);
+}
+
+#define ns_ref_read(__ns) refcount_read(&to_ns_common((__ns))->__ns_ref)
+#define ns_ref_inc(__ns) refcount_inc(&to_ns_common((__ns))->__ns_ref)
+#define ns_ref_get(__ns) __ns_ref_get(to_ns_common((__ns)))
+#define ns_ref_put(__ns) __ns_ref_put(to_ns_common((__ns)))
+#define ns_ref_put_and_lock(__ns, __lock) \
+ refcount_dec_and_lock(&to_ns_common((__ns))->__ns_ref, (__lock))
+
#endif
diff --git a/include/linux/nsfs.h b/include/linux/nsfs.h
new file mode 100644
index 000000000000..e5a5fa83d36b
--- /dev/null
+++ b/include/linux/nsfs.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2025 Christian Brauner <brauner@kernel.org> */
+
+#ifndef _LINUX_NSFS_H
+#define _LINUX_NSFS_H
+
+#include <linux/ns_common.h>
+#include <linux/cred.h>
+#include <linux/pid_namespace.h>
+
+struct path;
+struct task_struct;
+struct proc_ns_operations;
+
+int ns_get_path(struct path *path, struct task_struct *task,
+ const struct proc_ns_operations *ns_ops);
+typedef struct ns_common *ns_get_path_helper_t(void *);
+int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb,
+ void *private_data);
+
+bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino);
+
+int ns_get_name(char *buf, size_t size, struct task_struct *task,
+ const struct proc_ns_operations *ns_ops);
+void nsfs_init(void);
+
+#define __current_namespace_from_type(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: current->nsproxy->cgroup_ns, \
+ struct ipc_namespace *: current->nsproxy->ipc_ns, \
+ struct net *: current->nsproxy->net_ns, \
+ struct pid_namespace *: task_active_pid_ns(current), \
+ struct mnt_namespace *: current->nsproxy->mnt_ns, \
+ struct time_namespace *: current->nsproxy->time_ns, \
+ struct user_namespace *: current_user_ns(), \
+ struct uts_namespace *: current->nsproxy->uts_ns)
+
+#define current_in_namespace(__ns) (__current_namespace_from_type(__ns) == __ns)
+
+#endif /* _LINUX_NSFS_H */
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 82533e899ff4..bd118a187dec 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -42,17 +42,6 @@ struct nsproxy {
};
extern struct nsproxy init_nsproxy;
-#define to_ns_common(__ns) \
- _Generic((__ns), \
- struct cgroup_namespace *: &(__ns->ns), \
- struct ipc_namespace *: &(__ns->ns), \
- struct net *: &(__ns->ns), \
- struct pid_namespace *: &(__ns->ns), \
- struct mnt_namespace *: &(__ns->ns), \
- struct time_namespace *: &(__ns->ns), \
- struct user_namespace *: &(__ns->ns), \
- struct uts_namespace *: &(__ns->ns))
-
/*
* A structure to encompass all bits needed to install
* a partial or complete new set of namespaces.
diff --git a/include/linux/nstree.h b/include/linux/nstree.h
new file mode 100644
index 000000000000..8b8636690473
--- /dev/null
+++ b/include/linux/nstree.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_NSTREE_H
+#define _LINUX_NSTREE_H
+
+#include <linux/ns_common.h>
+#include <linux/nsproxy.h>
+#include <linux/rbtree.h>
+#include <linux/seqlock.h>
+#include <linux/rculist.h>
+#include <linux/cookie.h>
+
+extern struct ns_tree cgroup_ns_tree;
+extern struct ns_tree ipc_ns_tree;
+extern struct ns_tree mnt_ns_tree;
+extern struct ns_tree net_ns_tree;
+extern struct ns_tree pid_ns_tree;
+extern struct ns_tree time_ns_tree;
+extern struct ns_tree user_ns_tree;
+extern struct ns_tree uts_ns_tree;
+
+#define to_ns_tree(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: &(cgroup_ns_tree), \
+ struct ipc_namespace *: &(ipc_ns_tree), \
+ struct net *: &(net_ns_tree), \
+ struct pid_namespace *: &(pid_ns_tree), \
+ struct mnt_namespace *: &(mnt_ns_tree), \
+ struct time_namespace *: &(time_ns_tree), \
+ struct user_namespace *: &(user_ns_tree), \
+ struct uts_namespace *: &(uts_ns_tree))
+
+u64 ns_tree_gen_id(struct ns_common *ns);
+void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree);
+void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree);
+struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type);
+struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns,
+ struct ns_tree *ns_tree,
+ bool previous);
+
+static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree)
+{
+ ns_tree_gen_id(ns);
+ __ns_tree_add_raw(ns, ns_tree);
+}
+
+/**
+ * ns_tree_add_raw - Add a namespace to a namespace
+ * @ns: Namespace to add
+ *
+ * This function adds a namespace to the appropriate namespace tree
+ * without assigning a id.
+ */
+#define ns_tree_add_raw(__ns) __ns_tree_add_raw(to_ns_common(__ns), to_ns_tree(__ns))
+
+/**
+ * ns_tree_add - Add a namespace to a namespace tree
+ * @ns: Namespace to add
+ *
+ * This function assigns a new id to the namespace and adds it to the
+ * appropriate namespace tree and list.
+ */
+#define ns_tree_add(__ns) __ns_tree_add(to_ns_common(__ns), to_ns_tree(__ns))
+
+/**
+ * ns_tree_remove - Remove a namespace from a namespace tree
+ * @ns: Namespace to remove
+ *
+ * This function removes a namespace from the appropriate namespace
+ * tree and list.
+ */
+#define ns_tree_remove(__ns) __ns_tree_remove(to_ns_common(__ns), to_ns_tree(__ns))
+
+#define ns_tree_adjoined_rcu(__ns, __previous) \
+ __ns_tree_adjoined_rcu(to_ns_common(__ns), to_ns_tree(__ns), __previous)
+
+#define ns_tree_active(__ns) (!RB_EMPTY_NODE(&to_ns_common(__ns)->ns_tree_node))
+
+#endif /* _LINUX_NSTREE_H */
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index c2af02d70201..445517a72ad0 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -54,10 +54,15 @@ extern struct pid_namespace init_pid_ns;
#define PIDNS_ADDING (1U << 31)
#ifdef CONFIG_PID_NS
+static inline struct pid_namespace *to_pid_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct pid_namespace, ns);
+}
+
static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
{
if (ns != &init_pid_ns)
- refcount_inc(&ns->ns.count);
+ ns_ref_inc(ns);
return ns;
}
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 4b20375f3783..e81b8e596e4f 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -5,7 +5,7 @@
#ifndef _LINUX_PROC_NS_H
#define _LINUX_PROC_NS_H
-#include <linux/ns_common.h>
+#include <linux/nsfs.h>
#include <uapi/linux/nsfs.h>
struct pid_namespace;
@@ -17,7 +17,6 @@ struct inode;
struct proc_ns_operations {
const char *name;
const char *real_ns_name;
- int type;
struct ns_common *(*get)(struct task_struct *task);
void (*put)(struct ns_common *ns);
int (*install)(struct nsset *nsset, struct ns_common *ns);
@@ -66,25 +65,6 @@ static inline void proc_free_inum(unsigned int inum) {}
#endif /* CONFIG_PROC_FS */
-static inline int ns_alloc_inum(struct ns_common *ns)
-{
- WRITE_ONCE(ns->stashed, NULL);
- return proc_alloc_inum(&ns->inum);
-}
-
-#define ns_free_inum(ns) proc_free_inum((ns)->inum)
-
#define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
-extern int ns_get_path(struct path *path, struct task_struct *task,
- const struct proc_ns_operations *ns_ops);
-typedef struct ns_common *ns_get_path_helper_t(void *);
-extern int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb,
- void *private_data);
-
-extern bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino);
-
-extern int ns_get_name(char *buf, size_t size, struct task_struct *task,
- const struct proc_ns_operations *ns_ops);
-extern void nsfs_init(void);
#endif /* _LINUX_PROC_NS_H */
diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index b6e36525e0be..c514d0e5a45c 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -33,13 +33,18 @@ struct time_namespace {
extern struct time_namespace init_time_ns;
#ifdef CONFIG_TIME_NS
+static inline struct time_namespace *to_time_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct time_namespace, ns);
+}
+void __init time_ns_init(void);
extern int vdso_join_timens(struct task_struct *task,
struct time_namespace *ns);
extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns);
static inline struct time_namespace *get_time_ns(struct time_namespace *ns)
{
- refcount_inc(&ns->ns.count);
+ ns_ref_inc(ns);
return ns;
}
@@ -52,7 +57,7 @@ struct page *find_timens_vvar_page(struct vm_area_struct *vma);
static inline void put_time_ns(struct time_namespace *ns)
{
- if (refcount_dec_and_test(&ns->ns.count))
+ if (ns_ref_put(ns))
free_time_ns(ns);
}
@@ -108,6 +113,10 @@ static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim)
}
#else
+static inline void __init time_ns_init(void)
+{
+}
+
static inline int vdso_join_timens(struct task_struct *task,
struct time_namespace *ns)
{
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index a0bb6d012137..9a9aebbf96b9 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -168,10 +168,15 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns,
#ifdef CONFIG_USER_NS
+static inline struct user_namespace *to_user_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct user_namespace, ns);
+}
+
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
{
if (ns)
- refcount_inc(&ns->ns.count);
+ ns_ref_inc(ns);
return ns;
}
@@ -181,7 +186,7 @@ extern void __put_user_ns(struct user_namespace *ns);
static inline void put_user_ns(struct user_namespace *ns)
{
- if (ns && refcount_dec_and_test(&ns->ns.count))
+ if (ns && ns_ref_put(ns))
__put_user_ns(ns);
}
diff --git a/include/linux/uts_namespace.h b/include/linux/uts_namespace.h
new file mode 100644
index 000000000000..60f37fec0f4b
--- /dev/null
+++ b/include/linux/uts_namespace.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_UTS_NAMESPACE_H
+#define _LINUX_UTS_NAMESPACE_H
+
+#include <linux/ns_common.h>
+#include <uapi/linux/utsname.h>
+
+struct user_namespace;
+extern struct user_namespace init_user_ns;
+
+struct uts_namespace {
+ struct new_utsname name;
+ struct user_namespace *user_ns;
+ struct ucounts *ucounts;
+ struct ns_common ns;
+} __randomize_layout;
+
+extern struct uts_namespace init_uts_ns;
+
+#ifdef CONFIG_UTS_NS
+static inline struct uts_namespace *to_uts_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct uts_namespace, ns);
+}
+
+static inline void get_uts_ns(struct uts_namespace *ns)
+{
+ ns_ref_inc(ns);
+}
+
+extern struct uts_namespace *copy_utsname(u64 flags,
+ struct user_namespace *user_ns, struct uts_namespace *old_ns);
+extern void free_uts_ns(struct uts_namespace *ns);
+
+static inline void put_uts_ns(struct uts_namespace *ns)
+{
+ if (ns_ref_put(ns))
+ free_uts_ns(ns);
+}
+
+void uts_ns_init(void);
+#else
+static inline void get_uts_ns(struct uts_namespace *ns)
+{
+}
+
+static inline void put_uts_ns(struct uts_namespace *ns)
+{
+}
+
+static inline struct uts_namespace *copy_utsname(u64 flags,
+ struct user_namespace *user_ns, struct uts_namespace *old_ns)
+{
+ if (flags & CLONE_NEWUTS)
+ return ERR_PTR(-EINVAL);
+
+ return old_ns;
+}
+
+static inline void uts_ns_init(void)
+{
+}
+#endif
+
+#endif /* _LINUX_UTS_NAMESPACE_H */
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index ba34ec0e2f95..547bd4439706 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -7,7 +7,7 @@
#include <linux/nsproxy.h>
#include <linux/ns_common.h>
#include <linux/err.h>
-#include <uapi/linux/utsname.h>
+#include <linux/uts_namespace.h>
enum uts_proc {
UTS_PROC_ARCH,
@@ -18,57 +18,6 @@ enum uts_proc {
UTS_PROC_DOMAINNAME,
};
-struct user_namespace;
-extern struct user_namespace init_user_ns;
-
-struct uts_namespace {
- struct new_utsname name;
- struct user_namespace *user_ns;
- struct ucounts *ucounts;
- struct ns_common ns;
-} __randomize_layout;
-extern struct uts_namespace init_uts_ns;
-
-#ifdef CONFIG_UTS_NS
-static inline void get_uts_ns(struct uts_namespace *ns)
-{
- refcount_inc(&ns->ns.count);
-}
-
-extern struct uts_namespace *copy_utsname(u64 flags,
- struct user_namespace *user_ns, struct uts_namespace *old_ns);
-extern void free_uts_ns(struct uts_namespace *ns);
-
-static inline void put_uts_ns(struct uts_namespace *ns)
-{
- if (refcount_dec_and_test(&ns->ns.count))
- free_uts_ns(ns);
-}
-
-void uts_ns_init(void);
-#else
-static inline void get_uts_ns(struct uts_namespace *ns)
-{
-}
-
-static inline void put_uts_ns(struct uts_namespace *ns)
-{
-}
-
-static inline struct uts_namespace *copy_utsname(u64 flags,
- struct user_namespace *user_ns, struct uts_namespace *old_ns)
-{
- if (flags & CLONE_NEWUTS)
- return ERR_PTR(-EINVAL);
-
- return old_ns;
-}
-
-static inline void uts_ns_init(void)
-{
-}
-#endif
-
#ifdef CONFIG_PROC_SYSCTL
extern void uts_proc_notify(enum uts_proc proc);
#else
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 0e008cfe159d..cb664f6e3558 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -262,10 +262,15 @@ void ipx_unregister_sysctl(void);
#ifdef CONFIG_NET_NS
void __put_net(struct net *net);
+static inline struct net *to_net_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct net, ns);
+}
+
/* Try using get_net_track() instead */
static inline struct net *get_net(struct net *net)
{
- refcount_inc(&net->ns.count);
+ ns_ref_inc(net);
return net;
}
@@ -276,7 +281,7 @@ static inline struct net *maybe_get_net(struct net *net)
* exists. If the reference count is zero this
* function fails and returns NULL.
*/
- if (!refcount_inc_not_zero(&net->ns.count))
+ if (!ns_ref_get(net))
net = NULL;
return net;
}
@@ -284,7 +289,7 @@ static inline struct net *maybe_get_net(struct net *net)
/* Try using put_net_track() instead */
static inline void put_net(struct net *net)
{
- if (refcount_dec_and_test(&net->ns.count))
+ if (ns_ref_put(net))
__put_net(net);
}
@@ -296,7 +301,7 @@ int net_eq(const struct net *net1, const struct net *net2)
static inline int check_net(const struct net *net)
{
- return refcount_read(&net->ns.count) != 0;
+ return ns_ref_read(net) != 0;
}
void net_drop_ns(void *);
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index f291ab4f94eb..3741ea1b73d8 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -111,6 +111,7 @@
#define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */
#define FD_PIDFS_ROOT -10002 /* Root of the pidfs filesystem */
+#define FD_NSFS_ROOT -10003 /* Root of the nsfs filesystem */
#define FD_INVALID -10009 /* Invalid file descriptor: -10000 - EBADF = -10009 */
/* Generic flags for the *at(2) family of syscalls. */
diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
index 97d8d80d139f..e098759ec917 100644
--- a/include/uapi/linux/nsfs.h
+++ b/include/uapi/linux/nsfs.h
@@ -16,8 +16,6 @@
#define NS_GET_NSTYPE _IO(NSIO, 0x3)
/* Get owner UID (in the caller's user namespace) for a user namespace */
#define NS_GET_OWNER_UID _IO(NSIO, 0x4)
-/* Get the id for a mount namespace */
-#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64)
/* Translate pid from target pid namespace into the caller's pid namespace. */
#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int)
/* Return thread-group leader id of pid in the callers pid namespace. */
@@ -42,6 +40,10 @@ struct mnt_ns_info {
/* Get previous namespace. */
#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info)
+/* Retrieve namespace identifiers. */
+#define NS_GET_MNTNS_ID _IOR(NSIO, 5, __u64)
+#define NS_GET_ID _IOR(NSIO, 13, __u64)
+
enum init_ns_ino {
IPC_NS_INIT_INO = 0xEFFFFFFFU,
UTS_NS_INIT_INO = 0xEFFFFFFEU,
@@ -51,6 +53,18 @@ enum init_ns_ino {
TIME_NS_INIT_INO = 0xEFFFFFFAU,
NET_NS_INIT_INO = 0xEFFFFFF9U,
MNT_NS_INIT_INO = 0xEFFFFFF8U,
+#ifdef __KERNEL__
+ MNT_NS_ANON_INO = 0xEFFFFFF7U,
+#endif
+};
+
+struct nsfs_file_handle {
+ __u64 ns_id;
+ __u32 ns_type;
+ __u32 ns_inum;
};
+#define NSFS_FILE_HANDLE_SIZE_VER0 16 /* sizeof first published struct */
+#define NSFS_FILE_HANDLE_SIZE_LATEST sizeof(struct nsfs_file_handle) /* sizeof latest published struct */
+
#endif /* __LINUX_NSFS_H */