summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/ns_common.h141
-rw-r--r--include/linux/nsfs.h3
-rw-r--r--include/linux/nsproxy.h3
3 files changed, 145 insertions, 2 deletions
diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 5e09facafd93..bdd0df15ad9c 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -4,7 +4,9 @@
#include <linux/refcount.h>
#include <linux/rbtree.h>
+#include <linux/vfsdebug.h>
#include <uapi/linux/sched.h>
+#include <uapi/linux/nsfs.h>
struct proc_ns_operations;
@@ -37,6 +39,67 @@ extern const struct proc_ns_operations cgroupns_operations;
extern const struct proc_ns_operations timens_operations;
extern const struct proc_ns_operations timens_for_children_operations;
+/*
+ * Namespace lifetimes are managed via a two-tier reference counting model:
+ *
+ * (1) __ns_ref (refcount_t): Main reference count tracking memory
+ * lifetime. Controls when the namespace structure itself is freed.
+ * It also pins the namespace on the namespace trees whereas (2)
+ * only regulates their visibility to userspace.
+ *
+ * (2) __ns_ref_active (atomic_t): Reference count tracking active users.
+ * Controls visibility of the namespace in the namespace trees.
+ * Any live task that uses the namespace (via nsproxy or cred) holds
+ * an active reference. Any open file descriptor or bind-mount of
+ * the namespace holds an active reference. Once all tasks have
+ * called exited their namespaces and all file descriptors and
+ * bind-mounts have been released the active reference count drops
+ * to zero and the namespace becomes inactive. IOW, the namespace
+ * cannot be listed or opened via file handles anymore.
+ *
+ * Note that it is valid to transition from active to inactive and
+ * back from inactive to active e.g., when resurrecting an inactive
+ * namespace tree via the SIOCGSKNS ioctl().
+ *
+ * Relationship and lifecycle states:
+ *
+ * - Active (__ns_ref_active > 0):
+ * Namespace is actively used and visible to userspace. The namespace
+ * can be reopened via /proc/<pid>/ns/<ns_type>, via namespace file
+ * handles, or discovered via listns().
+ *
+ * - Inactive (__ns_ref_active == 0, __ns_ref > 0):
+ * No tasks are actively using the namespace and it isn't pinned by
+ * any bind-mounts or open file descriptors anymore. But the namespace
+ * is still kept alive by internal references. For example, the user
+ * namespace could be pinned by an open file through file->f_cred
+ * references when one of the now defunct tasks had opened a file and
+ * handed the file descriptor off to another process via a UNIX
+ * sockets. Such references keep the namespace structure alive through
+ * __ns_ref but will not hold an active reference.
+ *
+ * - Destroyed (__ns_ref == 0):
+ * No references remain. The namespace is removed from the tree and freed.
+ *
+ * State transitions:
+ *
+ * Active -> Inactive:
+ * When the last task using the namespace exits it drops its active
+ * references to all namespaces. However, user and pid namespaces
+ * remain accessible until the task has been reaped.
+ *
+ * Inactive -> Active:
+ * An inactive namespace tree might be resurrected due to e.g., the
+ * SIOCGSKNS ioctl() on a socket.
+ *
+ * Inactive -> Destroyed:
+ * When __ns_ref drops to zero the namespace is removed from the
+ * namespaces trees and the memory is freed (after RCU grace period).
+ *
+ * Initial namespaces:
+ * Boot-time namespaces (init_net, init_pid_ns, etc.) start with
+ * __ns_ref_active = 1 and remain active forever.
+ */
struct ns_common {
u32 ns_type;
struct dentry *stashed;
@@ -48,6 +111,7 @@ struct ns_common {
u64 ns_id;
struct rb_node ns_tree_node;
struct list_head ns_list_node;
+ atomic_t __ns_ref_active; /* do not use directly */
};
struct rcu_head ns_rcu;
};
@@ -56,6 +120,13 @@ struct ns_common {
int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_operations *ops, int inum);
void __ns_common_free(struct ns_common *ns);
+static __always_inline bool is_initial_namespace(struct ns_common *ns)
+{
+ VFS_WARN_ON_ONCE(ns->inum == 0);
+ return unlikely(in_range(ns->inum, MNT_NS_INIT_INO,
+ IPC_NS_INIT_INO - MNT_NS_INIT_INO + 1));
+}
+
#define to_ns_common(__ns) \
_Generic((__ns), \
struct cgroup_namespace *: &(__ns)->ns, \
@@ -127,6 +198,7 @@ void __ns_common_free(struct ns_common *ns);
.ops = to_ns_operations(&nsname), \
.stashed = NULL, \
.__ns_ref = REFCOUNT_INIT(refs), \
+ .__ns_ref_active = ATOMIC_INIT(1), \
.ns_list_node = LIST_HEAD_INIT(nsname.ns.ns_list_node), \
}
@@ -144,14 +216,26 @@ void __ns_common_free(struct ns_common *ns);
#define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns)))
+static __always_inline __must_check int __ns_ref_active_read(const struct ns_common *ns)
+{
+ return atomic_read(&ns->__ns_ref_active);
+}
+
static __always_inline __must_check bool __ns_ref_put(struct ns_common *ns)
{
- return refcount_dec_and_test(&ns->__ns_ref);
+ if (refcount_dec_and_test(&ns->__ns_ref)) {
+ VFS_WARN_ON_ONCE(__ns_ref_active_read(ns));
+ return true;
+ }
+ return false;
}
static __always_inline __must_check bool __ns_ref_get(struct ns_common *ns)
{
- return refcount_inc_not_zero(&ns->__ns_ref);
+ if (refcount_inc_not_zero(&ns->__ns_ref))
+ return true;
+ VFS_WARN_ON_ONCE(__ns_ref_active_read(ns));
+ return false;
}
static __always_inline __must_check int __ns_ref_read(const struct ns_common *ns)
@@ -166,4 +250,57 @@ static __always_inline __must_check int __ns_ref_read(const struct ns_common *ns
#define ns_ref_put_and_lock(__ns, __lock) \
refcount_dec_and_lock(&to_ns_common((__ns))->__ns_ref, (__lock))
+#define ns_ref_active_read(__ns) \
+ ((__ns) ? __ns_ref_active_read(to_ns_common(__ns)) : 0)
+
+void __ns_ref_active_get_owner(struct ns_common *ns);
+
+static __always_inline void __ns_ref_active_get(struct ns_common *ns)
+{
+ WARN_ON_ONCE(atomic_add_negative(1, &ns->__ns_ref_active));
+ VFS_WARN_ON_ONCE(is_initial_namespace(ns) && __ns_ref_active_read(ns) <= 0);
+}
+#define ns_ref_active_get(__ns) \
+ do { if (__ns) __ns_ref_active_get(to_ns_common(__ns)); } while (0)
+
+static __always_inline bool __ns_ref_active_get_not_zero(struct ns_common *ns)
+{
+ if (atomic_inc_not_zero(&ns->__ns_ref_active)) {
+ VFS_WARN_ON_ONCE(!__ns_ref_read(ns));
+ return true;
+ }
+ return false;
+}
+
+#define ns_ref_active_get_owner(__ns) \
+ do { if (__ns) __ns_ref_active_get_owner(to_ns_common(__ns)); } while (0)
+
+void __ns_ref_active_put_owner(struct ns_common *ns);
+
+static __always_inline void __ns_ref_active_put(struct ns_common *ns)
+{
+ if (atomic_dec_and_test(&ns->__ns_ref_active)) {
+ VFS_WARN_ON_ONCE(is_initial_namespace(ns));
+ VFS_WARN_ON_ONCE(!__ns_ref_read(ns));
+ __ns_ref_active_put_owner(ns);
+ }
+}
+#define ns_ref_active_put(__ns) \
+ do { if (__ns) __ns_ref_active_put(to_ns_common(__ns)); } while (0)
+
+static __always_inline struct ns_common *__must_check ns_get_unless_inactive(struct ns_common *ns)
+{
+ VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) && !__ns_ref_read(ns));
+ if (!__ns_ref_active_read(ns))
+ return NULL;
+ if (!__ns_ref_get(ns))
+ return NULL;
+ return ns;
+}
+
+void __ns_ref_active_resurrect(struct ns_common *ns);
+
+#define ns_ref_active_resurrect(__ns) \
+ do { if (__ns) __ns_ref_active_resurrect(to_ns_common(__ns)); } while (0)
+
#endif
diff --git a/include/linux/nsfs.h b/include/linux/nsfs.h
index e5a5fa83d36b..731b67fc2fec 100644
--- a/include/linux/nsfs.h
+++ b/include/linux/nsfs.h
@@ -37,4 +37,7 @@ void nsfs_init(void);
#define current_in_namespace(__ns) (__current_namespace_from_type(__ns) == __ns)
+void nsproxy_ns_active_get(struct nsproxy *ns);
+void nsproxy_ns_active_put(struct nsproxy *ns);
+
#endif /* _LINUX_NSFS_H */
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 538ba8dba184..ac825eddec59 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -93,7 +93,10 @@ static inline struct cred *nsset_cred(struct nsset *set)
*/
int copy_namespaces(u64 flags, struct task_struct *tsk);
+void switch_cred_namespaces(const struct cred *old, const struct cred *new);
void exit_nsproxy_namespaces(struct task_struct *tsk);
+void get_cred_namespaces(struct task_struct *tsk);
+void exit_cred_namespaces(struct task_struct *tsk);
void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
int exec_task_namespaces(void);
void free_nsproxy(struct nsproxy *ns);