diff options
Diffstat (limited to 'kernel/cgroup')
-rw-r--r-- | kernel/cgroup/Makefile | 1 | ||||
-rw-r--r-- | kernel/cgroup/cgroup-internal.h | 5 | ||||
-rw-r--r-- | kernel/cgroup/cgroup-v1.c | 155 | ||||
-rw-r--r-- | kernel/cgroup/cgroup.c | 226 | ||||
-rw-r--r-- | kernel/cgroup/cpuset.c | 38 | ||||
-rw-r--r-- | kernel/cgroup/debug.c | 357 |
6 files changed, 545 insertions, 237 deletions
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile index 387348a40c64..ce693ccb8c58 100644 --- a/kernel/cgroup/Makefile +++ b/kernel/cgroup/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_CGROUP_FREEZER) += freezer.o obj-$(CONFIG_CGROUP_PIDS) += pids.o obj-$(CONFIG_CGROUP_RDMA) += rdma.o obj-$(CONFIG_CPUSETS) += cpuset.o +obj-$(CONFIG_CGROUP_DEBUG) += debug.o diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 00f4d6bf048f..8b4c3c2f2509 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -33,6 +33,9 @@ struct cgroup_taskset { struct list_head src_csets; struct list_head dst_csets; + /* the number of tasks in the set */ + int nr_tasks; + /* the subsys currently being processed */ int ssid; @@ -192,6 +195,8 @@ int cgroup_rmdir(struct kernfs_node *kn); int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, struct kernfs_root *kf_root); +int cgroup_task_count(const struct cgroup *cgrp); + /* * namespace.c */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 85d75152402d..7bf4b1533f34 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -334,19 +334,15 @@ static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp, /** * cgroup_task_count - count the number of tasks in a cgroup. * @cgrp: the cgroup in question - * - * Return the number of tasks in the cgroup. The returned number can be - * higher than the actual number of tasks due to css_set references from - * namespace roots and temporary usages. */ -static int cgroup_task_count(const struct cgroup *cgrp) +int cgroup_task_count(const struct cgroup *cgrp) { int count = 0; struct cgrp_cset_link *link; spin_lock_irq(&css_set_lock); list_for_each_entry(link, &cgrp->cset_links, cset_link) - count += refcount_read(&link->cset->refcount); + count += link->cset->nr_tasks; spin_unlock_irq(&css_set_lock); return count; } @@ -1263,150 +1259,3 @@ static int __init cgroup_no_v1(char *str) return 1; } __setup("cgroup_no_v1=", cgroup_no_v1); - - -#ifdef CONFIG_CGROUP_DEBUG -static struct cgroup_subsys_state * -debug_css_alloc(struct cgroup_subsys_state *parent_css) -{ - struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); - - if (!css) - return ERR_PTR(-ENOMEM); - - return css; -} - -static void debug_css_free(struct cgroup_subsys_state *css) -{ - kfree(css); -} - -static u64 debug_taskcount_read(struct cgroup_subsys_state *css, - struct cftype *cft) -{ - return cgroup_task_count(css->cgroup); -} - -static u64 current_css_set_read(struct cgroup_subsys_state *css, - struct cftype *cft) -{ - return (u64)(unsigned long)current->cgroups; -} - -static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css, - struct cftype *cft) -{ - u64 count; - - rcu_read_lock(); - count = refcount_read(&task_css_set(current)->refcount); - rcu_read_unlock(); - return count; -} - -static int current_css_set_cg_links_read(struct seq_file *seq, void *v) -{ - struct cgrp_cset_link *link; - struct css_set *cset; - char *name_buf; - - name_buf = kmalloc(NAME_MAX + 1, GFP_KERNEL); - if (!name_buf) - return -ENOMEM; - - spin_lock_irq(&css_set_lock); - rcu_read_lock(); - cset = rcu_dereference(current->cgroups); - list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { - struct cgroup *c = link->cgrp; - - cgroup_name(c, name_buf, NAME_MAX + 1); - seq_printf(seq, "Root %d group %s\n", - c->root->hierarchy_id, name_buf); - } - rcu_read_unlock(); - spin_unlock_irq(&css_set_lock); - kfree(name_buf); - return 0; -} - -#define MAX_TASKS_SHOWN_PER_CSS 25 -static int cgroup_css_links_read(struct seq_file *seq, void *v) -{ - struct cgroup_subsys_state *css = seq_css(seq); - struct cgrp_cset_link *link; - - spin_lock_irq(&css_set_lock); - list_for_each_entry(link, &css->cgroup->cset_links, cset_link) { - struct css_set *cset = link->cset; - struct task_struct *task; - int count = 0; - - seq_printf(seq, "css_set %pK\n", cset); - - list_for_each_entry(task, &cset->tasks, cg_list) { - if (count++ > MAX_TASKS_SHOWN_PER_CSS) - goto overflow; - seq_printf(seq, " task %d\n", task_pid_vnr(task)); - } - - list_for_each_entry(task, &cset->mg_tasks, cg_list) { - if (count++ > MAX_TASKS_SHOWN_PER_CSS) - goto overflow; - seq_printf(seq, " task %d\n", task_pid_vnr(task)); - } - continue; - overflow: - seq_puts(seq, " ...\n"); - } - spin_unlock_irq(&css_set_lock); - return 0; -} - -static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft) -{ - return (!cgroup_is_populated(css->cgroup) && - !css_has_online_children(&css->cgroup->self)); -} - -static struct cftype debug_files[] = { - { - .name = "taskcount", - .read_u64 = debug_taskcount_read, - }, - - { - .name = "current_css_set", - .read_u64 = current_css_set_read, - }, - - { - .name = "current_css_set_refcount", - .read_u64 = current_css_set_refcount_read, - }, - - { - .name = "current_css_set_cg_links", - .seq_show = current_css_set_cg_links_read, - }, - - { - .name = "cgroup_css_links", - .seq_show = cgroup_css_links_read, - }, - - { - .name = "releasable", - .read_u64 = releasable_read, - }, - - { } /* terminate */ -}; - -struct cgroup_subsys debug_cgrp_subsys = { - .css_alloc = debug_css_alloc, - .css_free = debug_css_free, - .legacy_cftypes = debug_files, -}; -#endif /* CONFIG_CGROUP_DEBUG */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index c3c9a0e1b3c9..df2e0f14a95d 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -573,6 +573,11 @@ static int css_set_count = 1; /* 1 for init_css_set */ /** * css_set_populated - does a css_set contain any tasks? * @cset: target css_set + * + * css_set_populated() should be the same as !!cset->nr_tasks at steady + * state. However, css_set_populated() can be called while a task is being + * added to or removed from the linked list before the nr_tasks is + * properly updated. Hence, we can't just look at ->nr_tasks here. */ static bool css_set_populated(struct css_set *cset) { @@ -1542,10 +1547,56 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, return len; } +static int parse_cgroup_root_flags(char *data, unsigned int *root_flags) +{ + char *token; + + *root_flags = 0; + + if (!data) + return 0; + + while ((token = strsep(&data, ",")) != NULL) { + if (!strcmp(token, "nsdelegate")) { + *root_flags |= CGRP_ROOT_NS_DELEGATE; + continue; + } + + pr_err("cgroup2: unknown option \"%s\"\n", token); + return -EINVAL; + } + + return 0; +} + +static void apply_cgroup_root_flags(unsigned int root_flags) +{ + if (current->nsproxy->cgroup_ns == &init_cgroup_ns) { + if (root_flags & CGRP_ROOT_NS_DELEGATE) + cgrp_dfl_root.flags |= CGRP_ROOT_NS_DELEGATE; + else + cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE; + } +} + +static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root) +{ + if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) + seq_puts(seq, ",nsdelegate"); + return 0; +} + static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) { - pr_err("remount is not allowed\n"); - return -EINVAL; + unsigned int root_flags; + int ret; + + ret = parse_cgroup_root_flags(data, &root_flags); + if (ret) + return ret; + + apply_cgroup_root_flags(root_flags); + return 0; } /* @@ -1598,6 +1649,7 @@ static void cgroup_enable_task_cg_lists(void) css_set_update_populated(cset, true); list_add_tail(&p->cg_list, &cset->tasks); get_css_set(cset); + cset->nr_tasks++; } spin_unlock(&p->sighand->siglock); } while_each_thread(g, p); @@ -1784,6 +1836,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, { struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct dentry *dentry; + int ret; get_cgroup_ns(ns); @@ -1801,16 +1854,21 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, cgroup_enable_task_cg_lists(); if (fs_type == &cgroup2_fs_type) { - if (data) { - pr_err("cgroup2: unknown option \"%s\"\n", (char *)data); + unsigned int root_flags; + + ret = parse_cgroup_root_flags(data, &root_flags); + if (ret) { put_cgroup_ns(ns); - return ERR_PTR(-EINVAL); + return ERR_PTR(ret); } + cgrp_dfl_visible = true; cgroup_get_live(&cgrp_dfl_root.cgrp); dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root, CGROUP2_SUPER_MAGIC, ns); + if (!IS_ERR(dentry)) + apply_cgroup_root_flags(root_flags); } else { dentry = cgroup1_mount(&cgroup_fs_type, flags, data, CGROUP_SUPER_MAGIC, ns); @@ -1948,6 +2006,8 @@ static void cgroup_migrate_add_task(struct task_struct *task, if (!cset->mg_src_cgrp) return; + mgctx->tset.nr_tasks++; + list_move_tail(&task->cg_list, &cset->mg_tasks); if (list_empty(&cset->mg_node)) list_add_tail(&cset->mg_node, @@ -2036,21 +2096,19 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx) struct css_set *cset, *tmp_cset; int ssid, failed_ssid, ret; - /* methods shouldn't be called if no task is actually migrating */ - if (list_empty(&tset->src_csets)) - return 0; - /* check that we can legitimately attach to the cgroup */ - do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { - if (ss->can_attach) { - tset->ssid = ssid; - ret = ss->can_attach(tset); - if (ret) { - failed_ssid = ssid; - goto out_cancel_attach; + if (tset->nr_tasks) { + do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { + if (ss->can_attach) { + tset->ssid = ssid; + ret = ss->can_attach(tset); + if (ret) { + failed_ssid = ssid; + goto out_cancel_attach; + } } - } - } while_each_subsys_mask(); + } while_each_subsys_mask(); + } /* * Now that we're guaranteed success, proceed to move all tasks to @@ -2064,8 +2122,10 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx) struct css_set *to_cset = cset->mg_dst_cset; get_css_set(to_cset); + to_cset->nr_tasks++; css_set_move_task(task, from_cset, to_cset, true); put_css_set_locked(from_cset); + from_cset->nr_tasks--; } } spin_unlock_irq(&css_set_lock); @@ -2077,25 +2137,29 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx) */ tset->csets = &tset->dst_csets; - do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { - if (ss->attach) { - tset->ssid = ssid; - ss->attach(tset); - } - } while_each_subsys_mask(); + if (tset->nr_tasks) { + do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { + if (ss->attach) { + tset->ssid = ssid; + ss->attach(tset); + } + } while_each_subsys_mask(); + } ret = 0; goto out_release_tset; out_cancel_attach: - do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { - if (ssid == failed_ssid) - break; - if (ss->cancel_attach) { - tset->ssid = ssid; - ss->cancel_attach(tset); - } - } while_each_subsys_mask(); + if (tset->nr_tasks) { + do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { + if (ssid == failed_ssid) + break; + if (ss->cancel_attach) { + tset->ssid = ssid; + ss->cancel_attach(tset); + } + } while_each_subsys_mask(); + } out_release_tset: spin_lock_irq(&css_set_lock); list_splice_init(&tset->dst_csets, &tset->src_csets); @@ -2355,27 +2419,14 @@ static int cgroup_procs_write_permission(struct task_struct *task, struct cgroup *dst_cgrp, struct kernfs_open_file *of) { - int ret = 0; - - if (cgroup_on_dfl(dst_cgrp)) { - struct super_block *sb = of->file->f_path.dentry->d_sb; - struct cgroup *cgrp; - struct inode *inode; - - spin_lock_irq(&css_set_lock); - cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); - spin_unlock_irq(&css_set_lock); - - while (!cgroup_is_descendant(dst_cgrp, cgrp)) - cgrp = cgroup_parent(cgrp); + struct super_block *sb = of->file->f_path.dentry->d_sb; + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; + struct cgroup *root_cgrp = ns->root_cset->dfl_cgrp; + struct cgroup *src_cgrp, *com_cgrp; + struct inode *inode; + int ret; - ret = -ENOMEM; - inode = kernfs_get_inode(sb, cgrp->procs_file.kn); - if (inode) { - ret = inode_permission(inode, MAY_WRITE); - iput(inode); - } - } else { + if (!cgroup_on_dfl(dst_cgrp)) { const struct cred *cred = current_cred(); const struct cred *tcred = get_task_cred(task); @@ -2383,14 +2434,47 @@ static int cgroup_procs_write_permission(struct task_struct *task, * even if we're attaching all tasks in the thread group, * we only need to check permissions on one of them. */ - if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && - !uid_eq(cred->euid, tcred->uid) && - !uid_eq(cred->euid, tcred->suid)) + if (uid_eq(cred->euid, GLOBAL_ROOT_UID) || + uid_eq(cred->euid, tcred->uid) || + uid_eq(cred->euid, tcred->suid)) + ret = 0; + else ret = -EACCES; + put_cred(tcred); + return ret; } - return ret; + /* find the source cgroup */ + spin_lock_irq(&css_set_lock); + src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); + spin_unlock_irq(&css_set_lock); + + /* and the common ancestor */ + com_cgrp = src_cgrp; + while (!cgroup_is_descendant(dst_cgrp, com_cgrp)) + com_cgrp = cgroup_parent(com_cgrp); + + /* %current should be authorized to migrate to the common ancestor */ + inode = kernfs_get_inode(sb, com_cgrp->procs_file.kn); + if (!inode) + return -ENOMEM; + + ret = inode_permission(inode, MAY_WRITE); + iput(inode); + if (ret) + return ret; + + /* + * If namespaces are delegation boundaries, %current must be able + * to see both source and destination cgroups from its namespace. + */ + if ((cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) && + (!cgroup_is_descendant(src_cgrp, root_cgrp) || + !cgroup_is_descendant(dst_cgrp, root_cgrp))) + return -ENOENT; + + return 0; } /* @@ -2917,11 +3001,11 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, cgrp->subtree_control &= ~disable; ret = cgroup_apply_control(cgrp); - cgroup_finalize_control(cgrp, ret); + if (ret) + goto out_unlock; kernfs_activate(cgrp->kn); - ret = 0; out_unlock: cgroup_kn_unlock(of->kn); return ret ?: nbytes; @@ -2954,11 +3038,23 @@ static void cgroup_file_release(struct kernfs_open_file *of) static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup *cgrp = of->kn->parent->priv; struct cftype *cft = of->kn->priv; struct cgroup_subsys_state *css; int ret; + /* + * If namespaces are delegation boundaries, disallow writes to + * files in an non-init namespace root from inside the namespace + * except for the files explicitly marked delegatable - + * cgroup.procs and cgroup.subtree_control. + */ + if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) && + !(cft->flags & CFTYPE_NS_DELEGATABLE) && + ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp) + return -EPERM; + if (cft->write) return cft->write(of, buf, nbytes, off); @@ -3792,6 +3888,7 @@ static int cgroup_procs_show(struct seq_file *s, void *v) static struct cftype cgroup_base_files[] = { { .name = "cgroup.procs", + .flags = CFTYPE_NS_DELEGATABLE, .file_offset = offsetof(struct cgroup, procs_file), .release = cgroup_procs_release, .seq_start = cgroup_procs_start, @@ -3805,6 +3902,7 @@ static struct cftype cgroup_base_files[] = { }, { .name = "cgroup.subtree_control", + .flags = CFTYPE_NS_DELEGATABLE, .seq_show = cgroup_subtree_control_show, .write = cgroup_subtree_control_write, }, @@ -4265,6 +4363,11 @@ static void kill_css(struct cgroup_subsys_state *css) { lockdep_assert_held(&cgroup_mutex); + if (css->flags & CSS_DYING) + return; + + css->flags |= CSS_DYING; + /* * This must happen before css is disassociated with its cgroup. * See seq_css() for details. @@ -4388,6 +4491,7 @@ int cgroup_rmdir(struct kernfs_node *kn) } static struct kernfs_syscall_ops cgroup_kf_syscall_ops = { + .show_options = cgroup_show_options, .remount_fs = cgroup_remount, .mkdir = cgroup_mkdir, .rmdir = cgroup_rmdir, @@ -4569,6 +4673,10 @@ int __init cgroup_init(void) if (ss->bind) ss->bind(init_css_set.subsys[ssid]); + + mutex_lock(&cgroup_mutex); + css_populate_dir(init_css_set.subsys[ssid]); + mutex_unlock(&cgroup_mutex); } /* init_css_set.subsys[] has been updated, re-hash */ @@ -4784,6 +4892,7 @@ void cgroup_post_fork(struct task_struct *child) cset = task_css_set(current); if (list_empty(&child->cg_list)) { get_css_set(cset); + cset->nr_tasks++; css_set_move_task(child, NULL, cset, false); } spin_unlock_irq(&css_set_lock); @@ -4833,6 +4942,7 @@ void cgroup_exit(struct task_struct *tsk) if (!list_empty(&tsk->cg_list)) { spin_lock_irq(&css_set_lock); css_set_move_task(tsk, cset, NULL, false); + cset->nr_tasks--; spin_unlock_irq(&css_set_lock); } else { get_css_set(cset); diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index f6501f4f6040..8d5151688504 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -63,6 +63,7 @@ #include <linux/cgroup.h> #include <linux/wait.h> +DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key); DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key); /* See "Frequency meter" comments, below. */ @@ -176,9 +177,9 @@ typedef enum { } cpuset_flagbits_t; /* convenient tests for these bits */ -static inline bool is_cpuset_online(const struct cpuset *cs) +static inline bool is_cpuset_online(struct cpuset *cs) { - return test_bit(CS_ONLINE, &cs->flags); + return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css); } static inline int is_cpu_exclusive(const struct cpuset *cs) @@ -1038,40 +1039,25 @@ static void cpuset_post_attach(void) * @tsk: the task to change * @newmems: new nodes that the task will be set * - * In order to avoid seeing no nodes if the old and new nodes are disjoint, - * we structure updates as setting all new allowed nodes, then clearing newly - * disallowed ones. + * We use the mems_allowed_seq seqlock to safely update both tsk->mems_allowed + * and rebind an eventual tasks' mempolicy. If the task is allocating in + * parallel, it might temporarily see an empty intersection, which results in + * a seqlock check and retry before OOM or allocation failure. */ static void cpuset_change_task_nodemask(struct task_struct *tsk, nodemask_t *newmems) { - bool need_loop; - task_lock(tsk); - /* - * Determine if a loop is necessary if another thread is doing - * read_mems_allowed_begin(). If at least one node remains unchanged and - * tsk does not have a mempolicy, then an empty nodemask will not be - * possible when mems_allowed is larger than a word. - */ - need_loop = task_has_mempolicy(tsk) || - !nodes_intersects(*newmems, tsk->mems_allowed); - if (need_loop) { - local_irq_disable(); - write_seqcount_begin(&tsk->mems_allowed_seq); - } + local_irq_disable(); + write_seqcount_begin(&tsk->mems_allowed_seq); nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); - mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); - - mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); + mpol_rebind_task(tsk, newmems); tsk->mems_allowed = *newmems; - if (need_loop) { - write_seqcount_end(&tsk->mems_allowed_seq); - local_irq_enable(); - } + write_seqcount_end(&tsk->mems_allowed_seq); + local_irq_enable(); task_unlock(tsk); } diff --git a/kernel/cgroup/debug.c b/kernel/cgroup/debug.c new file mode 100644 index 000000000000..dac46af22782 --- /dev/null +++ b/kernel/cgroup/debug.c @@ -0,0 +1,357 @@ +/* + * Debug controller + * + * WARNING: This controller is for cgroup core debugging only. + * Its interfaces are unstable and subject to changes at any time. + */ +#include <linux/ctype.h> +#include <linux/mm.h> +#include <linux/slab.h> + +#include "cgroup-internal.h" + +static struct cgroup_subsys_state * +debug_css_alloc(struct cgroup_subsys_state *parent_css) +{ + struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); + + if (!css) + return ERR_PTR(-ENOMEM); + + return css; +} + +static void debug_css_free(struct cgroup_subsys_state *css) +{ + kfree(css); +} + +/* + * debug_taskcount_read - return the number of tasks in a cgroup. + * @cgrp: the cgroup in question + */ +static u64 debug_taskcount_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + return cgroup_task_count(css->cgroup); +} + +static int current_css_set_read(struct seq_file *seq, void *v) +{ + struct kernfs_open_file *of = seq->private; + struct css_set *cset; + struct cgroup_subsys *ss; + struct cgroup_subsys_state *css; + int i, refcnt; + + if (!cgroup_kn_lock_live(of->kn, false)) + return -ENODEV; + + spin_lock_irq(&css_set_lock); + rcu_read_lock(); + cset = rcu_dereference(current->cgroups); + refcnt = refcount_read(&cset->refcount); + seq_printf(seq, "css_set %pK %d", cset, refcnt); + if (refcnt > cset->nr_tasks) + seq_printf(seq, " +%d", refcnt - cset->nr_tasks); + seq_puts(seq, "\n"); + + /* + * Print the css'es stored in the current css_set. + */ + for_each_subsys(ss, i) { + css = cset->subsys[ss->id]; + if (!css) + continue; + seq_printf(seq, "%2d: %-4s\t- %lx[%d]\n", ss->id, ss->name, + (unsigned long)css, css->id); + } + rcu_read_unlock(); + spin_unlock_irq(&css_set_lock); + cgroup_kn_unlock(of->kn); + return 0; +} + +static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + u64 count; + + rcu_read_lock(); + count = refcount_read(&task_css_set(current)->refcount); + rcu_read_unlock(); + return count; +} + +static int current_css_set_cg_links_read(struct seq_file *seq, void *v) +{ + struct cgrp_cset_link *link; + struct css_set *cset; + char *name_buf; + + name_buf = kmalloc(NAME_MAX + 1, GFP_KERNEL); + if (!name_buf) + return -ENOMEM; + + spin_lock_irq(&css_set_lock); + rcu_read_lock(); + cset = rcu_dereference(current->cgroups); + list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { + struct cgroup *c = link->cgrp; + + cgroup_name(c, name_buf, NAME_MAX + 1); + seq_printf(seq, "Root %d group %s\n", + c->root->hierarchy_id, name_buf); + } + rcu_read_unlock(); + spin_unlock_irq(&css_set_lock); + kfree(name_buf); + return 0; +} + +#define MAX_TASKS_SHOWN_PER_CSS 25 +static int cgroup_css_links_read(struct seq_file *seq, void *v) +{ + struct cgroup_subsys_state *css = seq_css(seq); + struct cgrp_cset_link *link; + int dead_cnt = 0, extra_refs = 0; + + spin_lock_irq(&css_set_lock); + list_for_each_entry(link, &css->cgroup->cset_links, cset_link) { + struct css_set *cset = link->cset; + struct task_struct *task; + int count = 0; + int refcnt = refcount_read(&cset->refcount); + + seq_printf(seq, " %d", refcnt); + if (refcnt - cset->nr_tasks > 0) { + int extra = refcnt - cset->nr_tasks; + + seq_printf(seq, " +%d", extra); + /* + * Take out the one additional reference in + * init_css_set. + */ + if (cset == &init_css_set) + extra--; + extra_refs += extra; + } + seq_puts(seq, "\n"); + + list_for_each_entry(task, &cset->tasks, cg_list) { + if (count++ <= MAX_TASKS_SHOWN_PER_CSS) + seq_printf(seq, " task %d\n", + task_pid_vnr(task)); + } + + list_for_each_entry(task, &cset->mg_tasks, cg_list) { + if (count++ <= MAX_TASKS_SHOWN_PER_CSS) + seq_printf(seq, " task %d\n", + task_pid_vnr(task)); + } + /* show # of overflowed tasks */ + if (count > MAX_TASKS_SHOWN_PER_CSS) + seq_printf(seq, " ... (%d)\n", + count - MAX_TASKS_SHOWN_PER_CSS); + + if (cset->dead) { + seq_puts(seq, " [dead]\n"); + dead_cnt++; + } + + WARN_ON(count != cset->nr_tasks); + } + spin_unlock_irq(&css_set_lock); + + if (!dead_cnt && !extra_refs) + return 0; + + seq_puts(seq, "\n"); + if (extra_refs) + seq_printf(seq, "extra references = %d\n", extra_refs); + if (dead_cnt) + seq_printf(seq, "dead css_sets = %d\n", dead_cnt); + + return 0; +} + +static int cgroup_subsys_states_read(struct seq_file *seq, void *v) +{ + struct kernfs_open_file *of = seq->private; + struct cgroup *cgrp; + struct cgroup_subsys *ss; + struct cgroup_subsys_state *css; + char pbuf[16]; + int i; + + cgrp = cgroup_kn_lock_live(of->kn, false); + if (!cgrp) + return -ENODEV; + + for_each_subsys(ss, i) { + css = rcu_dereference_check(cgrp->subsys[ss->id], true); + if (!css) + continue; + + pbuf[0] = '\0'; + + /* Show the parent CSS if applicable*/ + if (css->parent) + snprintf(pbuf, sizeof(pbuf) - 1, " P=%d", + css->parent->id); + seq_printf(seq, "%2d: %-4s\t- %lx[%d] %d%s\n", ss->id, ss->name, + (unsigned long)css, css->id, + atomic_read(&css->online_cnt), pbuf); + } + + cgroup_kn_unlock(of->kn); + return 0; +} + +static void cgroup_masks_read_one(struct seq_file *seq, const char *name, + u16 mask) +{ + struct cgroup_subsys *ss; + int ssid; + bool first = true; + + seq_printf(seq, "%-17s: ", name); + for_each_subsys(ss, ssid) { + if (!(mask & (1 << ssid))) + continue; + if (!first) + seq_puts(seq, ", "); + seq_puts(seq, ss->name); + first = false; + } + seq_putc(seq, '\n'); +} + +static int cgroup_masks_read(struct seq_file *seq, void *v) +{ + struct kernfs_open_file *of = seq->private; + struct cgroup *cgrp; + + cgrp = cgroup_kn_lock_live(of->kn, false); + if (!cgrp) + return -ENODEV; + + cgroup_masks_read_one(seq, "subtree_control", cgrp->subtree_control); + cgroup_masks_read_one(seq, "subtree_ss_mask", cgrp->subtree_ss_mask); + + cgroup_kn_unlock(of->kn); + return 0; +} + +static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft) +{ + return (!cgroup_is_populated(css->cgroup) && + !css_has_online_children(&css->cgroup->self)); +} + +static struct cftype debug_legacy_files[] = { + { + .name = "taskcount", + .read_u64 = debug_taskcount_read, + }, + + { + .name = "current_css_set", + .seq_show = current_css_set_read, + .flags = CFTYPE_ONLY_ON_ROOT, + }, + + { + .name = "current_css_set_refcount", + .read_u64 = current_css_set_refcount_read, + .flags = CFTYPE_ONLY_ON_ROOT, + }, + + { + .name = "current_css_set_cg_links", + .seq_show = current_css_set_cg_links_read, + .flags = CFTYPE_ONLY_ON_ROOT, + }, + + { + .name = "cgroup_css_links", + .seq_show = cgroup_css_links_read, + }, + + { + .name = "cgroup_subsys_states", + .seq_show = cgroup_subsys_states_read, + }, + + { + .name = "cgroup_masks", + .seq_show = cgroup_masks_read, + }, + + { + .name = "releasable", + .read_u64 = releasable_read, + }, + + { } /* terminate */ +}; + +static struct cftype debug_files[] = { + { + .name = "taskcount", + .read_u64 = debug_taskcount_read, + }, + + { + .name = "current_css_set", + .seq_show = current_css_set_read, + .flags = CFTYPE_ONLY_ON_ROOT, + }, + + { + .name = "current_css_set_refcount", + .read_u64 = current_css_set_refcount_read, + .flags = CFTYPE_ONLY_ON_ROOT, + }, + + { + .name = "current_css_set_cg_links", + .seq_show = current_css_set_cg_links_read, + .flags = CFTYPE_ONLY_ON_ROOT, + }, + + { + .name = "css_links", + .seq_show = cgroup_css_links_read, + }, + + { + .name = "csses", + .seq_show = cgroup_subsys_states_read, + }, + + { + .name = "masks", + .seq_show = cgroup_masks_read, + }, + + { } /* terminate */ +}; + +struct cgroup_subsys debug_cgrp_subsys = { + .css_alloc = debug_css_alloc, + .css_free = debug_css_free, + .legacy_cftypes = debug_legacy_files, +}; + +/* + * On v2, debug is an implicit controller enabled by "cgroup_debug" boot + * parameter. + */ +static int __init enable_cgroup_debug(char *str) +{ + debug_cgrp_subsys.dfl_cftypes = debug_files; + debug_cgrp_subsys.implicit_on_dfl = true; + return 1; +} +__setup("cgroup_debug", enable_cgroup_debug); |