diff options
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/ns_common.h | 141 | ||||
| -rw-r--r-- | include/linux/nsfs.h | 3 | ||||
| -rw-r--r-- | include/linux/nsproxy.h | 3 |
3 files changed, 145 insertions, 2 deletions
diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 5e09facafd93..bdd0df15ad9c 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -4,7 +4,9 @@ #include <linux/refcount.h> #include <linux/rbtree.h> +#include <linux/vfsdebug.h> #include <uapi/linux/sched.h> +#include <uapi/linux/nsfs.h> struct proc_ns_operations; @@ -37,6 +39,67 @@ extern const struct proc_ns_operations cgroupns_operations; extern const struct proc_ns_operations timens_operations; extern const struct proc_ns_operations timens_for_children_operations; +/* + * Namespace lifetimes are managed via a two-tier reference counting model: + * + * (1) __ns_ref (refcount_t): Main reference count tracking memory + * lifetime. Controls when the namespace structure itself is freed. + * It also pins the namespace on the namespace trees whereas (2) + * only regulates their visibility to userspace. + * + * (2) __ns_ref_active (atomic_t): Reference count tracking active users. + * Controls visibility of the namespace in the namespace trees. + * Any live task that uses the namespace (via nsproxy or cred) holds + * an active reference. Any open file descriptor or bind-mount of + * the namespace holds an active reference. Once all tasks have + * called exited their namespaces and all file descriptors and + * bind-mounts have been released the active reference count drops + * to zero and the namespace becomes inactive. IOW, the namespace + * cannot be listed or opened via file handles anymore. + * + * Note that it is valid to transition from active to inactive and + * back from inactive to active e.g., when resurrecting an inactive + * namespace tree via the SIOCGSKNS ioctl(). + * + * Relationship and lifecycle states: + * + * - Active (__ns_ref_active > 0): + * Namespace is actively used and visible to userspace. The namespace + * can be reopened via /proc/<pid>/ns/<ns_type>, via namespace file + * handles, or discovered via listns(). + * + * - Inactive (__ns_ref_active == 0, __ns_ref > 0): + * No tasks are actively using the namespace and it isn't pinned by + * any bind-mounts or open file descriptors anymore. But the namespace + * is still kept alive by internal references. For example, the user + * namespace could be pinned by an open file through file->f_cred + * references when one of the now defunct tasks had opened a file and + * handed the file descriptor off to another process via a UNIX + * sockets. Such references keep the namespace structure alive through + * __ns_ref but will not hold an active reference. + * + * - Destroyed (__ns_ref == 0): + * No references remain. The namespace is removed from the tree and freed. + * + * State transitions: + * + * Active -> Inactive: + * When the last task using the namespace exits it drops its active + * references to all namespaces. However, user and pid namespaces + * remain accessible until the task has been reaped. + * + * Inactive -> Active: + * An inactive namespace tree might be resurrected due to e.g., the + * SIOCGSKNS ioctl() on a socket. + * + * Inactive -> Destroyed: + * When __ns_ref drops to zero the namespace is removed from the + * namespaces trees and the memory is freed (after RCU grace period). + * + * Initial namespaces: + * Boot-time namespaces (init_net, init_pid_ns, etc.) start with + * __ns_ref_active = 1 and remain active forever. + */ struct ns_common { u32 ns_type; struct dentry *stashed; @@ -48,6 +111,7 @@ struct ns_common { u64 ns_id; struct rb_node ns_tree_node; struct list_head ns_list_node; + atomic_t __ns_ref_active; /* do not use directly */ }; struct rcu_head ns_rcu; }; @@ -56,6 +120,13 @@ struct ns_common { int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_operations *ops, int inum); void __ns_common_free(struct ns_common *ns); +static __always_inline bool is_initial_namespace(struct ns_common *ns) +{ + VFS_WARN_ON_ONCE(ns->inum == 0); + return unlikely(in_range(ns->inum, MNT_NS_INIT_INO, + IPC_NS_INIT_INO - MNT_NS_INIT_INO + 1)); +} + #define to_ns_common(__ns) \ _Generic((__ns), \ struct cgroup_namespace *: &(__ns)->ns, \ @@ -127,6 +198,7 @@ void __ns_common_free(struct ns_common *ns); .ops = to_ns_operations(&nsname), \ .stashed = NULL, \ .__ns_ref = REFCOUNT_INIT(refs), \ + .__ns_ref_active = ATOMIC_INIT(1), \ .ns_list_node = LIST_HEAD_INIT(nsname.ns.ns_list_node), \ } @@ -144,14 +216,26 @@ void __ns_common_free(struct ns_common *ns); #define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns))) +static __always_inline __must_check int __ns_ref_active_read(const struct ns_common *ns) +{ + return atomic_read(&ns->__ns_ref_active); +} + static __always_inline __must_check bool __ns_ref_put(struct ns_common *ns) { - return refcount_dec_and_test(&ns->__ns_ref); + if (refcount_dec_and_test(&ns->__ns_ref)) { + VFS_WARN_ON_ONCE(__ns_ref_active_read(ns)); + return true; + } + return false; } static __always_inline __must_check bool __ns_ref_get(struct ns_common *ns) { - return refcount_inc_not_zero(&ns->__ns_ref); + if (refcount_inc_not_zero(&ns->__ns_ref)) + return true; + VFS_WARN_ON_ONCE(__ns_ref_active_read(ns)); + return false; } static __always_inline __must_check int __ns_ref_read(const struct ns_common *ns) @@ -166,4 +250,57 @@ static __always_inline __must_check int __ns_ref_read(const struct ns_common *ns #define ns_ref_put_and_lock(__ns, __lock) \ refcount_dec_and_lock(&to_ns_common((__ns))->__ns_ref, (__lock)) +#define ns_ref_active_read(__ns) \ + ((__ns) ? __ns_ref_active_read(to_ns_common(__ns)) : 0) + +void __ns_ref_active_get_owner(struct ns_common *ns); + +static __always_inline void __ns_ref_active_get(struct ns_common *ns) +{ + WARN_ON_ONCE(atomic_add_negative(1, &ns->__ns_ref_active)); + VFS_WARN_ON_ONCE(is_initial_namespace(ns) && __ns_ref_active_read(ns) <= 0); +} +#define ns_ref_active_get(__ns) \ + do { if (__ns) __ns_ref_active_get(to_ns_common(__ns)); } while (0) + +static __always_inline bool __ns_ref_active_get_not_zero(struct ns_common *ns) +{ + if (atomic_inc_not_zero(&ns->__ns_ref_active)) { + VFS_WARN_ON_ONCE(!__ns_ref_read(ns)); + return true; + } + return false; +} + +#define ns_ref_active_get_owner(__ns) \ + do { if (__ns) __ns_ref_active_get_owner(to_ns_common(__ns)); } while (0) + +void __ns_ref_active_put_owner(struct ns_common *ns); + +static __always_inline void __ns_ref_active_put(struct ns_common *ns) +{ + if (atomic_dec_and_test(&ns->__ns_ref_active)) { + VFS_WARN_ON_ONCE(is_initial_namespace(ns)); + VFS_WARN_ON_ONCE(!__ns_ref_read(ns)); + __ns_ref_active_put_owner(ns); + } +} +#define ns_ref_active_put(__ns) \ + do { if (__ns) __ns_ref_active_put(to_ns_common(__ns)); } while (0) + +static __always_inline struct ns_common *__must_check ns_get_unless_inactive(struct ns_common *ns) +{ + VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) && !__ns_ref_read(ns)); + if (!__ns_ref_active_read(ns)) + return NULL; + if (!__ns_ref_get(ns)) + return NULL; + return ns; +} + +void __ns_ref_active_resurrect(struct ns_common *ns); + +#define ns_ref_active_resurrect(__ns) \ + do { if (__ns) __ns_ref_active_resurrect(to_ns_common(__ns)); } while (0) + #endif diff --git a/include/linux/nsfs.h b/include/linux/nsfs.h index e5a5fa83d36b..731b67fc2fec 100644 --- a/include/linux/nsfs.h +++ b/include/linux/nsfs.h @@ -37,4 +37,7 @@ void nsfs_init(void); #define current_in_namespace(__ns) (__current_namespace_from_type(__ns) == __ns) +void nsproxy_ns_active_get(struct nsproxy *ns); +void nsproxy_ns_active_put(struct nsproxy *ns); + #endif /* _LINUX_NSFS_H */ diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 538ba8dba184..ac825eddec59 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -93,7 +93,10 @@ static inline struct cred *nsset_cred(struct nsset *set) */ int copy_namespaces(u64 flags, struct task_struct *tsk); +void switch_cred_namespaces(const struct cred *old, const struct cred *new); void exit_nsproxy_namespaces(struct task_struct *tsk); +void get_cred_namespaces(struct task_struct *tsk); +void exit_cred_namespaces(struct task_struct *tsk); void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); int exec_task_namespaces(void); void free_nsproxy(struct nsproxy *ns); |
