From 251f8c0364f99fc21fcc7b07e4ec6b4f3250d841 Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Mon, 25 Aug 2014 19:27:52 +0800 Subject: cgroup: fix a typo in comment. There is no function named cgroup_enable_task_cg_links(). Instead, the correct function name in this comment should be cgroup_enabled_task_cg_lists(). Signed-off-by: Dongsheng Yang Signed-off-by: Tejun Heo --- kernel/cgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7dc8788cfd52..64bbb56496c2 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5161,7 +5161,7 @@ void cgroup_post_fork(struct task_struct *child) int i; /* - * This may race against cgroup_enable_task_cg_links(). As that + * This may race against cgroup_enable_task_cg_lists(). As that * function sets use_task_css_set_links before grabbing * tasklist_lock and we just went through tasklist_lock to add * @child, it's guaranteed that either we see the set @@ -5176,7 +5176,7 @@ void cgroup_post_fork(struct task_struct *child) * when implementing operations which need to migrate all tasks of * a cgroup to another. * - * Note that if we lose to cgroup_enable_task_cg_links(), @child + * Note that if we lose to cgroup_enable_task_cg_lists(), @child * will remain in init_css_set. This is safe because all tasks are * in the init_css_set before cg_links is enabled and there's no * operation which transfers all tasks out of init_css_set. -- cgit v1.2.3 From 6213daab2547fdc0d02a86abf3ac209ac6881ae3 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 17 Sep 2014 18:18:09 +0800 Subject: cgroup: remove some useless forward declarations Signed-off-by: Zefan Li Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 1 - kernel/cgroup.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b5223c570eba..f7898e0bce1e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -27,7 +27,6 @@ struct cgroup_root; struct cgroup_subsys; -struct inode; struct cgroup; extern int cgroup_init_early(void); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index ebd4476c57de..619aae399a3a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -185,7 +185,6 @@ static int need_forkexit_callback __read_mostly; static struct cftype cgroup_dfl_base_files[]; static struct cftype cgroup_legacy_base_files[]; -static void cgroup_put(struct cgroup *cgrp); static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask); static int cgroup_destroy_locked(struct cgroup *cgrp); @@ -195,7 +194,6 @@ static void css_release(struct percpu_ref *ref); static void kill_css(struct cgroup_subsys_state *css); static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], bool is_add); -static void cgroup_pidlist_destroy_all(struct cgroup *cgrp); /* IDR wrappers which synchronize using cgroup_idr_lock */ static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end, -- cgit v1.2.3 From 244bb9a6336d2aa53526261ec35c593ebd5c1a33 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 17 Sep 2014 18:18:34 +0800 Subject: cgroup: remove redundant code in cgroup_rmdir() We no longer clear kn->priv in cgroup_rmdir(), so we don't need to get an extra refcnt. Signed-off-by: Zefan Li Signed-off-by: Tejun Heo --- kernel/cgroup.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 619aae399a3a..d739a732edb9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4841,13 +4841,10 @@ static int cgroup_rmdir(struct kernfs_node *kn) cgrp = cgroup_kn_lock_live(kn); if (!cgrp) return 0; - cgroup_get(cgrp); /* for @kn->priv clearing */ ret = cgroup_destroy_locked(cgrp); cgroup_kn_unlock(kn); - - cgroup_put(cgrp); return ret; } -- cgit v1.2.3 From 0c8fc2c1210556434835adfb2274f41704853e8a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 17 Sep 2014 18:19:24 +0800 Subject: cgroup: remove bogus comments We never grab cgroup mutex in fork and exit paths no matter whether notify_on_release is set or not. Signed-off-by: Zefan Li Signed-off-by: Tejun Heo --- kernel/cgroup.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d739a732edb9..4ddc75588983 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -967,14 +967,6 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task, * knows that the cgroup won't be removed, as cgroup_rmdir() * needs that mutex. * - * The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't - * (usually) take cgroup_mutex. These are the two most performance - * critical pieces of code here. The exception occurs on cgroup_exit(), - * when a task in a notify_on_release cgroup exits. Then cgroup_mutex - * is taken, and if the cgroup count is zero, a usermode call made - * to the release agent with the name of the cgroup (path relative to - * the root of cgroup file system) as the argument. - * * A cgroup can only be deleted if both its 'count' of using tasks * is zero, and its list of 'children' cgroups is empty. Since all * tasks in the system use _some_ cgroup, and since there is always at -- cgit v1.2.3 From 971ff49355387fef41d1327434d8939721a4eb35 Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Thu, 18 Sep 2014 16:06:19 +0800 Subject: cgroup: use a per-cgroup work for release agent Instead of using a global work to schedule release agent on removable cgroups, we change to use a per-cgroup work to do this, which makes the code much simpler. v2: use a dedicated work instead of reusing css->destroy_work. (Tejun) Signed-off-by: Zefan Li Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 10 ++--- kernel/cgroup.c | 108 +++++++++++++++---------------------------------- 2 files changed, 36 insertions(+), 82 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f7898e0bce1e..51958d0fb88f 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -233,13 +233,6 @@ struct cgroup { */ struct list_head e_csets[CGROUP_SUBSYS_COUNT]; - /* - * Linked list running through all cgroups that can - * potentially be reaped by the release agent. Protected by - * release_list_lock - */ - struct list_head release_list; - /* * list of pidlists, up to two for each namespace (one for procs, one * for tasks); created on demand. @@ -249,6 +242,9 @@ struct cgroup { /* used to wait for offlining of csses */ wait_queue_head_t offline_waitq; + + /* used to schedule release agent */ + struct work_struct release_agent_work; }; #define MAX_CGROUP_ROOT_NAMELEN 64 diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4ddc75588983..db19a4884a7f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -392,12 +392,7 @@ static int notify_on_release(const struct cgroup *cgrp) ; \ else -/* the list of cgroups eligible for automatic release. Protected by - * release_list_lock */ -static LIST_HEAD(release_list); -static DEFINE_RAW_SPINLOCK(release_list_lock); static void cgroup_release_agent(struct work_struct *work); -static DECLARE_WORK(release_agent_work, cgroup_release_agent); static void check_for_release(struct cgroup *cgrp); /* @@ -1577,7 +1572,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) INIT_LIST_HEAD(&cgrp->self.sibling); INIT_LIST_HEAD(&cgrp->self.children); INIT_LIST_HEAD(&cgrp->cset_links); - INIT_LIST_HEAD(&cgrp->release_list); INIT_LIST_HEAD(&cgrp->pidlists); mutex_init(&cgrp->pidlist_mutex); cgrp->self.cgroup = cgrp; @@ -1587,6 +1581,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) INIT_LIST_HEAD(&cgrp->e_csets[ssid]); init_waitqueue_head(&cgrp->offline_waitq); + INIT_WORK(&cgrp->release_agent_work, cgroup_release_agent); } static void init_cgroup_root(struct cgroup_root *root, @@ -4342,6 +4337,7 @@ static void css_free_work_fn(struct work_struct *work) /* cgroup free path */ atomic_dec(&cgrp->root->nr_cgrps); cgroup_pidlist_destroy_all(cgrp); + cancel_work_sync(&cgrp->release_agent_work); if (cgroup_parent(cgrp)) { /* @@ -4804,12 +4800,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) for_each_css(css, ssid, cgrp) kill_css(css); - /* CSS_ONLINE is clear, remove from ->release_list for the last time */ - raw_spin_lock(&release_list_lock); - if (!list_empty(&cgrp->release_list)) - list_del_init(&cgrp->release_list); - raw_spin_unlock(&release_list_lock); - /* * Remove @cgrp directory along with the base files. @cgrp has an * extra ref on its kn. @@ -5271,25 +5261,9 @@ void cgroup_exit(struct task_struct *tsk) static void check_for_release(struct cgroup *cgrp) { - if (cgroup_is_releasable(cgrp) && list_empty(&cgrp->cset_links) && - !css_has_online_children(&cgrp->self)) { - /* - * Control Group is currently removeable. If it's not - * already queued for a userspace notification, queue - * it now - */ - int need_schedule_work = 0; - - raw_spin_lock(&release_list_lock); - if (!cgroup_is_dead(cgrp) && - list_empty(&cgrp->release_list)) { - list_add(&cgrp->release_list, &release_list); - need_schedule_work = 1; - } - raw_spin_unlock(&release_list_lock); - if (need_schedule_work) - schedule_work(&release_agent_work); - } + if (cgroup_is_releasable(cgrp) && !cgroup_has_tasks(cgrp) && + !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp)) + schedule_work(&cgrp->release_agent_work); } /* @@ -5317,52 +5291,36 @@ static void check_for_release(struct cgroup *cgrp) */ static void cgroup_release_agent(struct work_struct *work) { - BUG_ON(work != &release_agent_work); + struct cgroup *cgrp = + container_of(work, struct cgroup, release_agent_work); + char *pathbuf = NULL, *agentbuf = NULL, *path; + char *argv[3], *envp[3]; + mutex_lock(&cgroup_mutex); - raw_spin_lock(&release_list_lock); - while (!list_empty(&release_list)) { - char *argv[3], *envp[3]; - int i; - char *pathbuf = NULL, *agentbuf = NULL, *path; - struct cgroup *cgrp = list_entry(release_list.next, - struct cgroup, - release_list); - list_del_init(&cgrp->release_list); - raw_spin_unlock(&release_list_lock); - pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); - if (!pathbuf) - goto continue_free; - path = cgroup_path(cgrp, pathbuf, PATH_MAX); - if (!path) - goto continue_free; - agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL); - if (!agentbuf) - goto continue_free; - - i = 0; - argv[i++] = agentbuf; - argv[i++] = path; - argv[i] = NULL; - - i = 0; - /* minimal command environment */ - envp[i++] = "HOME=/"; - envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; - envp[i] = NULL; - - /* Drop the lock while we invoke the usermode helper, - * since the exec could involve hitting disk and hence - * be a slow process */ - mutex_unlock(&cgroup_mutex); - call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); - mutex_lock(&cgroup_mutex); - continue_free: - kfree(pathbuf); - kfree(agentbuf); - raw_spin_lock(&release_list_lock); - } - raw_spin_unlock(&release_list_lock); + + pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); + agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL); + if (!pathbuf || !agentbuf) + goto out; + + path = cgroup_path(cgrp, pathbuf, PATH_MAX); + if (!path) + goto out; + + argv[0] = agentbuf; + argv[1] = path; + argv[2] = NULL; + + /* minimal command environment */ + envp[0] = "HOME=/"; + envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; + envp[2] = NULL; + mutex_unlock(&cgroup_mutex); + call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); +out: + kfree(agentbuf); + kfree(pathbuf); } static int __init cgroup_disable(char *str) -- cgit v1.2.3 From 006f4ac49742b5f70ef7e39176fd42a500144ccc Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Thu, 18 Sep 2014 16:03:15 +0800 Subject: cgroup: simplify proc_cgroup_show() Use the ONE macro instead of REG, and we can simplify proc_cgroup_show(). Signed-off-by: Zefan Li Signed-off-by: Tejun Heo --- fs/proc/base.c | 19 ++----------------- include/linux/cgroup.h | 3 ++- kernel/cgroup.c | 18 +++--------------- 3 files changed, 7 insertions(+), 33 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index baf852b648ad..6b96892015ec 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -376,21 +376,6 @@ static const struct file_operations proc_lstats_operations = { #endif -#ifdef CONFIG_CGROUPS -static int cgroup_open(struct inode *inode, struct file *file) -{ - struct pid *pid = PROC_I(inode)->pid; - return single_open(file, proc_cgroup_show, pid); -} - -static const struct file_operations proc_cgroup_operations = { - .open = cgroup_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif - #ifdef CONFIG_PROC_PID_CPUSET static int cpuset_open(struct inode *inode, struct file *file) @@ -2576,7 +2561,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("cpuset", S_IRUGO, proc_cpuset_operations), #endif #ifdef CONFIG_CGROUPS - REG("cgroup", S_IRUGO, proc_cgroup_operations), + ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), @@ -2922,7 +2907,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("cpuset", S_IRUGO, proc_cpuset_operations), #endif #ifdef CONFIG_CGROUPS - REG("cgroup", S_IRUGO, proc_cgroup_operations), + ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 51958d0fb88f..77a1d37b742b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -37,7 +37,8 @@ extern void cgroup_exit(struct task_struct *p); extern int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); -extern int proc_cgroup_show(struct seq_file *, void *); +extern int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *tsk); /* define the enumeration of all cgroup subsystems */ #define SUBSYS(_x) _x ## _cgrp_id, diff --git a/kernel/cgroup.c b/kernel/cgroup.c index db19a4884a7f..df7733b48d2e 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5030,12 +5030,9 @@ core_initcall(cgroup_wq_init); * - Print task's cgroup paths into seq_file, one line for each hierarchy * - Used for /proc//cgroup. */ - -/* TODO: Use a proper seq_file iterator */ -int proc_cgroup_show(struct seq_file *m, void *v) +int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *tsk) { - struct pid *pid; - struct task_struct *tsk; char *buf, *path; int retval; struct cgroup_root *root; @@ -5045,14 +5042,6 @@ int proc_cgroup_show(struct seq_file *m, void *v) if (!buf) goto out; - retval = -ESRCH; - pid = m->private; - tsk = get_pid_task(pid, PIDTYPE_PID); - if (!tsk) - goto out_free; - - retval = 0; - mutex_lock(&cgroup_mutex); down_read(&css_set_rwsem); @@ -5082,11 +5071,10 @@ int proc_cgroup_show(struct seq_file *m, void *v) seq_putc(m, '\n'); } + retval = 0; out_unlock: up_read(&css_set_rwsem); mutex_unlock(&cgroup_mutex); - put_task_struct(tsk); -out_free: kfree(buf); out: return retval; -- cgit v1.2.3 From 52de4779f201758ddcf37360f09a16895756e708 Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Thu, 18 Sep 2014 16:03:36 +0800 Subject: cpuset: simplify proc_cpuset_show() Use the ONE macro instead of REG, and we can simplify proc_cpuset_show(). Signed-off-by: Zefan Li Signed-off-by: Tejun Heo --- fs/proc/base.c | 20 ++------------------ include/linux/cpuset.h | 3 ++- kernel/cpuset.c | 15 +++------------ 3 files changed, 7 insertions(+), 31 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 6b96892015ec..4e8aa35fc3eb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -376,22 +376,6 @@ static const struct file_operations proc_lstats_operations = { #endif -#ifdef CONFIG_PROC_PID_CPUSET - -static int cpuset_open(struct inode *inode, struct file *file) -{ - struct pid *pid = PROC_I(inode)->pid; - return single_open(file, proc_cpuset_show, pid); -} - -static const struct file_operations proc_cpuset_operations = { - .open = cpuset_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif - static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -2558,7 +2542,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET - REG("cpuset", S_IRUGO, proc_cpuset_operations), + ONE("cpuset", S_IRUGO, proc_cpuset_show), #endif #ifdef CONFIG_CGROUPS ONE("cgroup", S_IRUGO, proc_cgroup_show), @@ -2904,7 +2888,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET - REG("cpuset", S_IRUGO, proc_cpuset_operations), + ONE("cpuset", S_IRUGO, proc_cpuset_show), #endif #ifdef CONFIG_CGROUPS ONE("cgroup", S_IRUGO, proc_cgroup_show), diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index ade2390ffe92..0d4e0675b318 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -86,7 +86,8 @@ extern void __cpuset_memory_pressure_bump(void); extern void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task); -extern int proc_cpuset_show(struct seq_file *, void *); +extern int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *tsk); extern int cpuset_mem_spread_node(void); extern int cpuset_slab_spread_node(void); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 22874d7cf2c0..a37f4ed24867 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2729,10 +2729,9 @@ void __cpuset_memory_pressure_bump(void) * and we take cpuset_mutex, keeping cpuset_attach() from changing it * anyway. */ -int proc_cpuset_show(struct seq_file *m, void *unused_v) +int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *tsk) { - struct pid *pid; - struct task_struct *tsk; char *buf, *p; struct cgroup_subsys_state *css; int retval; @@ -2742,24 +2741,16 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v) if (!buf) goto out; - retval = -ESRCH; - pid = m->private; - tsk = get_pid_task(pid, PIDTYPE_PID); - if (!tsk) - goto out_free; - retval = -ENAMETOOLONG; rcu_read_lock(); css = task_css(tsk, cpuset_cgrp_id); p = cgroup_path(css->cgroup, buf, PATH_MAX); rcu_read_unlock(); if (!p) - goto out_put_task; + goto out_free; seq_puts(m, p); seq_putc(m, '\n'); retval = 0; -out_put_task: - put_task_struct(tsk); out_free: kfree(buf); out: -- cgit v1.2.3 From f29374b146dd02f5f99742aedaddd6ef3512fc9c Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Fri, 19 Sep 2014 16:29:31 +0800 Subject: cgroup: remove redundant check in cgroup_ino() After we implemented default unified hierarchy, cgrp->kn can never be NULL. Signed-off-by: Zefan Li Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 7 ++----- mm/memory-failure.c | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 77a1d37b742b..818a81fe7ccc 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -532,13 +532,10 @@ static inline bool cgroup_has_tasks(struct cgroup *cgrp) return !list_empty(&cgrp->cset_links); } -/* returns ino associated with a cgroup, 0 indicates unmounted root */ +/* returns ino associated with a cgroup */ static inline ino_t cgroup_ino(struct cgroup *cgrp) { - if (cgrp->kn) - return cgrp->kn->ino; - else - return 0; + return cgrp->kn->ino; } /* cft/css accessors for cftype->write() operation */ diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 44c6bd201d3a..8639f6b28746 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -148,7 +148,7 @@ static int hwpoison_filter_task(struct page *p) ino = cgroup_ino(css->cgroup); css_put(css); - if (!ino || ino != hwpoison_filter_memcg) + if (ino != hwpoison_filter_memcg) return -EINVAL; return 0; -- cgit v1.2.3 From 4e2ba65068ac1d0e8c9df78a4ad787cf39640418 Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Fri, 19 Sep 2014 16:53:14 +0800 Subject: perf/cgroup: Remove perf_put_cgroup() Commit 5a17f543ed68 ("cgroup: improve css_from_dir() into css_tryget_from_dir()") removed perf_tryget_cgroup(), so let's also remove perf_put_cgroup(). Signed-off-by: Zefan Li Signed-off-by: Tejun Heo --- kernel/events/core.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 1cf24b3e42ec..8be3e34274b9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -391,14 +391,9 @@ perf_cgroup_match(struct perf_event *event) event->cgrp->css.cgroup); } -static inline void perf_put_cgroup(struct perf_event *event) -{ - css_put(&event->cgrp->css); -} - static inline void perf_detach_cgroup(struct perf_event *event) { - perf_put_cgroup(event); + css_put(&event->cgrp->css); event->cgrp = NULL; } -- cgit v1.2.3 From a25eb52e81a40e986179a790fbb5a1f02f482b7a Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Fri, 19 Sep 2014 16:51:00 +0800 Subject: cgroup: remove CGRP_RELEASABLE flag We call put_css_set() after setting CGRP_RELEASABLE flag in cgroup_task_migrate(), but in other places we call it without setting the flag. I don't see the necessity of this flag. Moreover once the flag is set, it will never be cleared, unless writing to the notify_on_release control file, so it can be quite confusing if we look at the output of debug.releasable. # mount -t cgroup -o debug xxx /cgroup # mkdir /cgroup/child # cat /cgroup/child/debug.releasable 0 <-- shows 0 though the cgroup is empty # echo $$ > /cgroup/child/tasks # cat /cgroup/child/debug.releasable 0 # echo $$ > /cgroup/tasks && echo $$ > /cgroup/child/tasks # cat /proc/child/debug.releasable 1 <-- shows 1 though the cgroup is not empty This patch removes the flag, and now debug.releasable shows if the cgroup is empty or not. Signed-off-by: Zefan Li Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 5 ----- kernel/cgroup.c | 40 +++++++++++++--------------------------- 2 files changed, 13 insertions(+), 32 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 818a81fe7ccc..1d5196889048 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -161,11 +161,6 @@ static inline void css_put(struct cgroup_subsys_state *css) /* bits in struct cgroup flags field */ enum { - /* - * Control Group has previously had a child cgroup or a task, - * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) - */ - CGRP_RELEASABLE, /* Control Group requires release notifications to userspace */ CGRP_NOTIFY_ON_RELEASE, /* diff --git a/kernel/cgroup.c b/kernel/cgroup.c index df7733b48d2e..16e3a4f5c9dc 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -329,14 +329,6 @@ bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor) return false; } -static int cgroup_is_releasable(const struct cgroup *cgrp) -{ - const int bits = - (1 << CGRP_RELEASABLE) | - (1 << CGRP_NOTIFY_ON_RELEASE); - return (cgrp->flags & bits) == bits; -} - static int notify_on_release(const struct cgroup *cgrp) { return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); @@ -491,7 +483,7 @@ static unsigned long css_set_hash(struct cgroup_subsys_state *css[]) return key; } -static void put_css_set_locked(struct css_set *cset, bool taskexit) +static void put_css_set_locked(struct css_set *cset) { struct cgrp_cset_link *link, *tmp_link; struct cgroup_subsys *ss; @@ -517,11 +509,7 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit) /* @cgrp can't go away while we're holding css_set_rwsem */ if (list_empty(&cgrp->cset_links)) { cgroup_update_populated(cgrp, false); - if (notify_on_release(cgrp)) { - if (taskexit) - set_bit(CGRP_RELEASABLE, &cgrp->flags); - check_for_release(cgrp); - } + check_for_release(cgrp); } kfree(link); @@ -530,7 +518,7 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit) kfree_rcu(cset, rcu_head); } -static void put_css_set(struct css_set *cset, bool taskexit) +static void put_css_set(struct css_set *cset) { /* * Ensure that the refcount doesn't hit zero while any readers @@ -541,7 +529,7 @@ static void put_css_set(struct css_set *cset, bool taskexit) return; down_write(&css_set_rwsem); - put_css_set_locked(cset, taskexit); + put_css_set_locked(cset); up_write(&css_set_rwsem); } @@ -2037,8 +2025,7 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp, * task. As trading it for new_cset is protected by cgroup_mutex, * we're safe to drop it here; it will be freed under RCU. */ - set_bit(CGRP_RELEASABLE, &old_cgrp->flags); - put_css_set_locked(old_cset, false); + put_css_set_locked(old_cset); } /** @@ -2059,7 +2046,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets) cset->mg_src_cgrp = NULL; cset->mg_dst_cset = NULL; list_del_init(&cset->mg_preload_node); - put_css_set_locked(cset, false); + put_css_set_locked(cset); } up_write(&css_set_rwsem); } @@ -2153,8 +2140,8 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, if (src_cset == dst_cset) { src_cset->mg_src_cgrp = NULL; list_del_init(&src_cset->mg_preload_node); - put_css_set(src_cset, false); - put_css_set(dst_cset, false); + put_css_set(src_cset); + put_css_set(dst_cset); continue; } @@ -2163,7 +2150,7 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, if (list_empty(&dst_cset->mg_preload_node)) list_add(&dst_cset->mg_preload_node, &csets); else - put_css_set(dst_cset, false); + put_css_set(dst_cset); } list_splice_tail(&csets, preloaded_csets); @@ -4159,7 +4146,6 @@ static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css, static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css, struct cftype *cft, u64 val) { - clear_bit(CGRP_RELEASABLE, &css->cgroup->flags); if (val) set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags); else @@ -4806,7 +4792,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) */ kernfs_remove(cgrp->kn); - set_bit(CGRP_RELEASABLE, &cgroup_parent(cgrp)->flags); check_for_release(cgroup_parent(cgrp)); /* put the base reference */ @@ -5244,12 +5229,12 @@ void cgroup_exit(struct task_struct *tsk) } if (put_cset) - put_css_set(cset, true); + put_css_set(cset); } static void check_for_release(struct cgroup *cgrp) { - if (cgroup_is_releasable(cgrp) && !cgroup_has_tasks(cgrp) && + if (notify_on_release(cgrp) && !cgroup_has_tasks(cgrp) && !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp)) schedule_work(&cgrp->release_agent_work); } @@ -5496,7 +5481,8 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v) static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft) { - return test_bit(CGRP_RELEASABLE, &css->cgroup->flags); + return (!cgroup_has_tasks(css->cgroup) && + !css_has_online_children(&css->cgroup->self)); } static struct cftype debug_files[] = { -- cgit v1.2.3 From 3e2cd91ab92665148616a80dc0745c499d2746a7 Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Sat, 20 Sep 2014 14:35:43 +0800 Subject: cgroup: fix missing unlock in cgroup_release_agent() The patch 971ff4935538: "cgroup: use a per-cgroup work for release agent" from Sep 18, 2014, leads to the following static checker warning: kernel/cgroup.c:5310 cgroup_release_agent() warn: 'mutex:&cgroup_mutex' is sometimes locked here and sometimes unlocked. Reported-by: Dan Carpenter Signed-off-by: Zefan Li Signed-off-by: Tejun Heo --- kernel/cgroup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 16e3a4f5c9dc..f873c4681316 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5291,7 +5291,10 @@ static void cgroup_release_agent(struct work_struct *work) mutex_unlock(&cgroup_mutex); call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); + goto out_free; out: + mutex_unlock(&cgroup_mutex); +out_free: kfree(agentbuf); kfree(pathbuf); } -- cgit v1.2.3 From 0c7bf3e8cab7900e17ce7f97104c39927d835469 Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Sat, 20 Sep 2014 14:49:10 +0800 Subject: cgroup: remove redundant variable in cgroup_mount() Both pinned_sb and new_sb indicate if a new superblock is needed, so we can just remove new_sb. Note now we must check if kernfs_tryget_sb() returns NULL, because when it returns NULL, kernfs_mount() may still re-use an existing superblock, which is just allocated by another concurent mount. Suggested-by: Tejun Heo Signed-off-by: Zefan Li Signed-off-by: Tejun Heo --- kernel/cgroup.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f873c4681316..5eb20cd1709c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1694,7 +1694,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, struct dentry *dentry; int ret; int i; - bool new_sb; /* * The first time anyone tries to mount a cgroup, enable the list @@ -1785,7 +1784,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, * path is super cold. Let's just sleep a bit and retry. */ pinned_sb = kernfs_pin_sb(root->kf_root, NULL); - if (IS_ERR(pinned_sb) || + if (IS_ERR_OR_NULL(pinned_sb) || !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { mutex_unlock(&cgroup_mutex); if (!IS_ERR_OR_NULL(pinned_sb)) @@ -1831,18 +1830,16 @@ out_free: return ERR_PTR(ret); dentry = kernfs_mount(fs_type, flags, root->kf_root, - CGROUP_SUPER_MAGIC, &new_sb); - if (IS_ERR(dentry) || !new_sb) + CGROUP_SUPER_MAGIC, NULL); + if (IS_ERR(dentry) || pinned_sb) cgroup_put(&root->cgrp); /* * If @pinned_sb, we're reusing an existing root and holding an * extra ref on its sb. Mount is complete. Put the extra ref. */ - if (pinned_sb) { - WARN_ON(new_sb); + if (pinned_sb) deactivate_super(pinned_sb); - } return dentry; } -- cgit v1.2.3 From e756c7b698604f11a979f2781d06eb7b80aba363 Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Fri, 26 Sep 2014 12:03:25 +0800 Subject: Revert "cgroup: remove redundant variable in cgroup_mount()" This reverts commit 0c7bf3e8cab7900e17ce7f97104c39927d835469. If there are child cgroups in the cgroupfs and then we umount it, the superblock will be destroyed but the cgroup_root will be kept around. When we mount it again, cgroup_mount() will find this cgroup_root and allocate a new sb for it. So with this commit we will be trapped in a dead loop in the case described above, because kernfs_pin_sb() keeps returning NULL. Currently I don't see how we can avoid using both pinned_sb and new_sb, so just revert it. Cc: Al Viro Reported-by: Andrey Wagin Signed-off-by: Zefan Li Signed-off-by: Tejun Heo --- kernel/cgroup.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5eb20cd1709c..f873c4681316 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1694,6 +1694,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, struct dentry *dentry; int ret; int i; + bool new_sb; /* * The first time anyone tries to mount a cgroup, enable the list @@ -1784,7 +1785,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, * path is super cold. Let's just sleep a bit and retry. */ pinned_sb = kernfs_pin_sb(root->kf_root, NULL); - if (IS_ERR_OR_NULL(pinned_sb) || + if (IS_ERR(pinned_sb) || !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { mutex_unlock(&cgroup_mutex); if (!IS_ERR_OR_NULL(pinned_sb)) @@ -1830,16 +1831,18 @@ out_free: return ERR_PTR(ret); dentry = kernfs_mount(fs_type, flags, root->kf_root, - CGROUP_SUPER_MAGIC, NULL); - if (IS_ERR(dentry) || pinned_sb) + CGROUP_SUPER_MAGIC, &new_sb); + if (IS_ERR(dentry) || !new_sb) cgroup_put(&root->cgrp); /* * If @pinned_sb, we're reusing an existing root and holding an * extra ref on its sb. Mount is complete. Put the extra ref. */ - if (pinned_sb) + if (pinned_sb) { + WARN_ON(new_sb); deactivate_super(pinned_sb); + } return dentry; } -- cgit v1.2.3