summaryrefslogtreecommitdiff
path: root/fs/pidfs.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/pidfs.c')
-rw-r--r--fs/pidfs.c189
1 files changed, 104 insertions, 85 deletions
diff --git a/fs/pidfs.c b/fs/pidfs.c
index 0ef5b47d796a..dba703d4ce4a 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -39,20 +39,20 @@ void pidfs_get_root(struct path *path)
path_get(path);
}
-/*
- * Stashes information that userspace needs to access even after the
- * process has been reaped.
- */
-struct pidfs_exit_info {
- __u64 cgroupid;
- __s32 exit_code;
- __u32 coredump_mask;
+enum pidfs_attr_mask_bits {
+ PIDFS_ATTR_BIT_EXIT = 0,
+ PIDFS_ATTR_BIT_COREDUMP = 1,
};
struct pidfs_attr {
+ unsigned long attr_mask;
struct simple_xattrs *xattrs;
- struct pidfs_exit_info __pei;
- struct pidfs_exit_info *exit_info;
+ struct /* exit info */ {
+ __u64 cgroupid;
+ __s32 exit_code;
+ };
+ __u32 coredump_mask;
+ __u32 coredump_signal;
};
static struct rb_root pidfs_ino_tree = RB_ROOT;
@@ -293,6 +293,15 @@ static __u32 pidfs_coredump_mask(unsigned long mm_flags)
return 0;
}
+/* This must be updated whenever a new flag is added */
+#define PIDFD_INFO_SUPPORTED (PIDFD_INFO_PID | \
+ PIDFD_INFO_CREDS | \
+ PIDFD_INFO_CGROUPID | \
+ PIDFD_INFO_EXIT | \
+ PIDFD_INFO_COREDUMP | \
+ PIDFD_INFO_SUPPORTED_MASK | \
+ PIDFD_INFO_COREDUMP_SIGNAL)
+
static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
{
struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg;
@@ -300,12 +309,13 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
struct pid *pid = pidfd_pid(file);
size_t usize = _IOC_SIZE(cmd);
struct pidfd_info kinfo = {};
- struct pidfs_exit_info *exit_info;
struct user_namespace *user_ns;
struct pidfs_attr *attr;
const struct cred *c;
__u64 mask;
+ BUILD_BUG_ON(sizeof(struct pidfd_info) != PIDFD_INFO_SIZE_VER2);
+
if (!uinfo)
return -EINVAL;
if (usize < PIDFD_INFO_SIZE_VER0)
@@ -323,20 +333,24 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
attr = READ_ONCE(pid->attr);
if (mask & PIDFD_INFO_EXIT) {
- exit_info = READ_ONCE(attr->exit_info);
- if (exit_info) {
+ if (test_bit(PIDFS_ATTR_BIT_EXIT, &attr->attr_mask)) {
+ smp_rmb();
kinfo.mask |= PIDFD_INFO_EXIT;
#ifdef CONFIG_CGROUPS
- kinfo.cgroupid = exit_info->cgroupid;
+ kinfo.cgroupid = attr->cgroupid;
kinfo.mask |= PIDFD_INFO_CGROUPID;
#endif
- kinfo.exit_code = exit_info->exit_code;
+ kinfo.exit_code = attr->exit_code;
}
}
if (mask & PIDFD_INFO_COREDUMP) {
- kinfo.mask |= PIDFD_INFO_COREDUMP;
- kinfo.coredump_mask = READ_ONCE(attr->__pei.coredump_mask);
+ if (test_bit(PIDFS_ATTR_BIT_COREDUMP, &attr->attr_mask)) {
+ smp_rmb();
+ kinfo.mask |= PIDFD_INFO_COREDUMP | PIDFD_INFO_COREDUMP_SIGNAL;
+ kinfo.coredump_mask = attr->coredump_mask;
+ kinfo.coredump_signal = attr->coredump_signal;
+ }
}
task = get_pid_task(pid, PIDTYPE_PID);
@@ -355,14 +369,15 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
if (!c)
return -ESRCH;
- if ((kinfo.mask & PIDFD_INFO_COREDUMP) && !(kinfo.coredump_mask)) {
- task_lock(task);
+ if ((mask & PIDFD_INFO_COREDUMP) && !kinfo.coredump_mask) {
+ guard(task_lock)(task);
if (task->mm) {
unsigned long flags = __mm_flags_get_dumpable(task->mm);
kinfo.coredump_mask = pidfs_coredump_mask(flags);
+ kinfo.mask |= PIDFD_INFO_COREDUMP;
+ /* No coredump actually took place, so no coredump signal. */
}
- task_unlock(task);
}
/* Unconditionally return identifiers and credentials, the rest only on request */
@@ -409,6 +424,13 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
return -ESRCH;
copy_out:
+ if (mask & PIDFD_INFO_SUPPORTED_MASK) {
+ kinfo.mask |= PIDFD_INFO_SUPPORTED_MASK;
+ kinfo.supported_mask = PIDFD_INFO_SUPPORTED;
+ }
+
+ /* Are there bits in the return mask not present in PIDFD_INFO_SUPPORTED? */
+ WARN_ON_ONCE(~PIDFD_INFO_SUPPORTED & kinfo.mask);
/*
* If userspace and the kernel have the same struct size it can just
* be copied. If userspace provides an older struct, only the bits that
@@ -454,7 +476,6 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct task_struct *task __free(put_task) = NULL;
struct nsproxy *nsp __free(put_nsproxy) = NULL;
struct ns_common *ns_common = NULL;
- struct pid_namespace *pid_ns;
if (!pidfs_ioctl_valid(cmd))
return -ENOIOCTLCMD;
@@ -496,66 +517,64 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
switch (cmd) {
/* Namespaces that hang of nsproxy. */
case PIDFD_GET_CGROUP_NAMESPACE:
- if (IS_ENABLED(CONFIG_CGROUPS)) {
- get_cgroup_ns(nsp->cgroup_ns);
- ns_common = to_ns_common(nsp->cgroup_ns);
- }
+ if (!ns_ref_get(nsp->cgroup_ns))
+ break;
+ ns_common = to_ns_common(nsp->cgroup_ns);
break;
case PIDFD_GET_IPC_NAMESPACE:
- if (IS_ENABLED(CONFIG_IPC_NS)) {
- get_ipc_ns(nsp->ipc_ns);
- ns_common = to_ns_common(nsp->ipc_ns);
- }
+ if (!ns_ref_get(nsp->ipc_ns))
+ break;
+ ns_common = to_ns_common(nsp->ipc_ns);
break;
case PIDFD_GET_MNT_NAMESPACE:
- get_mnt_ns(nsp->mnt_ns);
+ if (!ns_ref_get(nsp->mnt_ns))
+ break;
ns_common = to_ns_common(nsp->mnt_ns);
break;
case PIDFD_GET_NET_NAMESPACE:
- if (IS_ENABLED(CONFIG_NET_NS)) {
- ns_common = to_ns_common(nsp->net_ns);
- get_net_ns(ns_common);
- }
+ if (!ns_ref_get(nsp->net_ns))
+ break;
+ ns_common = to_ns_common(nsp->net_ns);
break;
case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE:
- if (IS_ENABLED(CONFIG_PID_NS)) {
- get_pid_ns(nsp->pid_ns_for_children);
- ns_common = to_ns_common(nsp->pid_ns_for_children);
- }
+ if (!ns_ref_get(nsp->pid_ns_for_children))
+ break;
+ ns_common = to_ns_common(nsp->pid_ns_for_children);
break;
case PIDFD_GET_TIME_NAMESPACE:
- if (IS_ENABLED(CONFIG_TIME_NS)) {
- get_time_ns(nsp->time_ns);
- ns_common = to_ns_common(nsp->time_ns);
- }
+ if (!ns_ref_get(nsp->time_ns))
+ break;
+ ns_common = to_ns_common(nsp->time_ns);
break;
case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE:
- if (IS_ENABLED(CONFIG_TIME_NS)) {
- get_time_ns(nsp->time_ns_for_children);
- ns_common = to_ns_common(nsp->time_ns_for_children);
- }
+ if (!ns_ref_get(nsp->time_ns_for_children))
+ break;
+ ns_common = to_ns_common(nsp->time_ns_for_children);
break;
case PIDFD_GET_UTS_NAMESPACE:
- if (IS_ENABLED(CONFIG_UTS_NS)) {
- get_uts_ns(nsp->uts_ns);
- ns_common = to_ns_common(nsp->uts_ns);
- }
+ if (!ns_ref_get(nsp->uts_ns))
+ break;
+ ns_common = to_ns_common(nsp->uts_ns);
break;
/* Namespaces that don't hang of nsproxy. */
case PIDFD_GET_USER_NAMESPACE:
- if (IS_ENABLED(CONFIG_USER_NS)) {
- rcu_read_lock();
- ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns)));
- rcu_read_unlock();
+ scoped_guard(rcu) {
+ struct user_namespace *user_ns;
+
+ user_ns = task_cred_xxx(task, user_ns);
+ if (!ns_ref_get(user_ns))
+ break;
+ ns_common = to_ns_common(user_ns);
}
break;
case PIDFD_GET_PID_NAMESPACE:
- if (IS_ENABLED(CONFIG_PID_NS)) {
- rcu_read_lock();
+ scoped_guard(rcu) {
+ struct pid_namespace *pid_ns;
+
pid_ns = task_active_pid_ns(task);
- if (pid_ns)
- ns_common = to_ns_common(get_pid_ns(pid_ns));
- rcu_read_unlock();
+ if (!ns_ref_get(pid_ns))
+ break;
+ ns_common = to_ns_common(pid_ns);
}
break;
default:
@@ -606,24 +625,25 @@ void pidfs_exit(struct task_struct *tsk)
{
struct pid *pid = task_pid(tsk);
struct pidfs_attr *attr;
- struct pidfs_exit_info *exit_info;
#ifdef CONFIG_CGROUPS
struct cgroup *cgrp;
#endif
might_sleep();
- guard(spinlock_irq)(&pid->wait_pidfd.lock);
- attr = pid->attr;
- if (!attr) {
- /*
- * No one ever held a pidfd for this struct pid.
- * Mark it as dead so no one can add a pidfs
- * entry anymore. We're about to be reaped and
- * so no exit information would be available.
- */
- pid->attr = PIDFS_PID_DEAD;
- return;
+ /* Synchronize with pidfs_register_pid(). */
+ scoped_guard(spinlock_irq, &pid->wait_pidfd.lock) {
+ attr = pid->attr;
+ if (!attr) {
+ /*
+ * No one ever held a pidfd for this struct pid.
+ * Mark it as dead so no one can add a pidfs
+ * entry anymore. We're about to be reaped and
+ * so no exit information would be available.
+ */
+ pid->attr = PIDFS_PID_DEAD;
+ return;
+ }
}
/*
@@ -634,41 +654,39 @@ void pidfs_exit(struct task_struct *tsk)
* is put
*/
- exit_info = &attr->__pei;
-
#ifdef CONFIG_CGROUPS
rcu_read_lock();
cgrp = task_dfl_cgroup(tsk);
- exit_info->cgroupid = cgroup_id(cgrp);
+ attr->cgroupid = cgroup_id(cgrp);
rcu_read_unlock();
#endif
- exit_info->exit_code = tsk->exit_code;
+ attr->exit_code = tsk->exit_code;
/* Ensure that PIDFD_GET_INFO sees either all or nothing. */
- smp_store_release(&attr->exit_info, &attr->__pei);
+ smp_wmb();
+ set_bit(PIDFS_ATTR_BIT_EXIT, &attr->attr_mask);
}
#ifdef CONFIG_COREDUMP
void pidfs_coredump(const struct coredump_params *cprm)
{
struct pid *pid = cprm->pid;
- struct pidfs_exit_info *exit_info;
struct pidfs_attr *attr;
- __u32 coredump_mask = 0;
attr = READ_ONCE(pid->attr);
VFS_WARN_ON_ONCE(!attr);
VFS_WARN_ON_ONCE(attr == PIDFS_PID_DEAD);
- exit_info = &attr->__pei;
- /* Note how we were coredumped. */
- coredump_mask = pidfs_coredump_mask(cprm->mm_flags);
- /* Note that we actually did coredump. */
- coredump_mask |= PIDFD_COREDUMPED;
+ /* Note how we were coredumped and that we coredumped. */
+ attr->coredump_mask = pidfs_coredump_mask(cprm->mm_flags) |
+ PIDFD_COREDUMPED;
/* If coredumping is set to skip we should never end up here. */
- VFS_WARN_ON_ONCE(coredump_mask & PIDFD_COREDUMP_SKIP);
- smp_store_release(&exit_info->coredump_mask, coredump_mask);
+ VFS_WARN_ON_ONCE(attr->coredump_mask & PIDFD_COREDUMP_SKIP);
+ /* Expose the signal number that caused the coredump. */
+ attr->coredump_signal = cprm->siginfo->si_signo;
+ smp_wmb();
+ set_bit(PIDFS_ATTR_BIT_COREDUMP, &attr->attr_mask);
}
#endif
@@ -1022,6 +1040,7 @@ static int pidfs_init_fs_context(struct fs_context *fc)
fc->s_iflags |= SB_I_NOEXEC;
fc->s_iflags |= SB_I_NODEV;
+ ctx->s_d_flags |= DCACHE_DONTCACHE;
ctx->ops = &pidfs_sops;
ctx->eops = &pidfs_export_operations;
ctx->dops = &pidfs_dentry_operations;