From 79bd9814e5ec9a288d6599f53aeac0b548fdfe52 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 Nov 2013 18:20:42 -0500 Subject: cgroup, memcg: move cgroup_event implementation to memcg cgroup_event is way over-designed and tries to build a generic flexible event mechanism into cgroup - fully customizable event specification for each user of the interface. This is utterly unnecessary and overboard especially in the light of the planned unified hierarchy as there's gonna be single agent. Simply generating events at fixed points, or if that's too restrictive, configureable cadence or single set of configureable points should be enough. Thankfully, memcg is the only user and gets to keep it. Replacing it with something simpler on sane_behavior is strongly recommended. This patch moves cgroup_event and "cgroup.event_control" implementation to mm/memcontrol.c. Clearing of events on cgroup destruction is moved from cgroup_destroy_locked() to mem_cgroup_css_offline(), which shouldn't make any noticeable difference. cgroup_css() and __file_cft() are exported to enable the move; however, this will soon be reverted once the event code is updated to be memcg specific. Note that "cgroup.event_control" will now exist only on the hierarchy with memcg attached to it. While this change is visible to userland, it is unlikely to be noticeable as the file has never been meaningful outside memcg. Aside from the above change, this is pure code relocation. v2: Per Li Zefan's comments, init/Kconfig updated accordingly and poll.h inclusion moved from cgroup.c to memcontrol.c. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Kirill A. Shutemov Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Balbir Singh --- include/linux/cgroup.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 3561d305b1e0..40c2427806c9 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -907,6 +907,11 @@ unsigned short css_id(struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_from_dir(struct dentry *dentry, struct cgroup_subsys *ss); +/* XXX: temporary */ +struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, + struct cgroup_subsys *ss); +struct cftype *__file_cft(struct file *file); + #else /* !CONFIG_CGROUPS */ static inline int cgroup_init_early(void) { return 0; } -- cgit v1.2.3 From fba94807837850e211f8975e1970e23e7804ff4d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 Nov 2013 18:20:43 -0500 Subject: cgroup, memcg: move cgroup->event_list[_lock] and event callbacks into memcg cgroup_event is being moved from cgroup core to memcg and the implementation is already moved by the previous patch. This patch moves the data fields and callbacks. * cgroup->event_list[_lock] are moved to mem_cgroup. * cftype->[un]register_event() are moved to cgroup_event. This makes it impossible for individual cftype definitions to specify their event callbacks. This is worked around by simply hard-coding filename to event callback mapping in cgroup_write_event_control(). This is awkward and inflexible, which is actually desirable given that we don't want to grow more usages of this feature. * eventfd_ctx declaration is removed from cgroup.h, which makes vmpressure.h miss eventfd_ctx declaration. Include eventfd.h from vmpressure.h. v2: Use file name from dentry instead of cftype. This will allow removing all cftype handling in the function. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Kirill A. Shutemov Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Balbir Singh --- include/linux/cgroup.h | 24 ------------- include/linux/vmpressure.h | 1 + kernel/cgroup.c | 2 -- mm/memcontrol.c | 87 ++++++++++++++++++++++++++++++++-------------- 4 files changed, 61 insertions(+), 53 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 40c2427806c9..612adc5b87c5 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -29,7 +29,6 @@ struct cgroup_subsys; struct inode; struct cgroup; struct css_id; -struct eventfd_ctx; extern int cgroup_init_early(void); extern int cgroup_init(void); @@ -239,10 +238,6 @@ struct cgroup { struct rcu_head rcu_head; struct work_struct destroy_work; - /* List of events which userspace want to receive */ - struct list_head event_list; - spinlock_t event_list_lock; - /* directory xattrs */ struct simple_xattrs xattrs; }; @@ -506,25 +501,6 @@ struct cftype { int (*trigger)(struct cgroup_subsys_state *css, unsigned int event); int (*release)(struct inode *inode, struct file *file); - - /* - * register_event() callback will be used to add new userspace - * waiter for changes related to the cftype. Implement it if - * you want to provide this functionality. Use eventfd_signal() - * on eventfd to send notification to userspace. - */ - int (*register_event)(struct cgroup_subsys_state *css, - struct cftype *cft, struct eventfd_ctx *eventfd, - const char *args); - /* - * unregister_event() callback will be called when userspace - * closes the eventfd or on cgroup removing. - * This callback must be implemented, if you want provide - * notification functionality. - */ - void (*unregister_event)(struct cgroup_subsys_state *css, - struct cftype *cft, - struct eventfd_ctx *eventfd); }; /* diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 3f3788d49362..9dd1914f1a6c 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h @@ -7,6 +7,7 @@ #include #include #include +#include struct vmpressure { unsigned long scanned; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4bccaa7dda35..feda7c54fa6b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1352,8 +1352,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) INIT_LIST_HEAD(&cgrp->pidlists); mutex_init(&cgrp->pidlist_mutex); cgrp->dummy_css.cgroup = cgrp; - INIT_LIST_HEAD(&cgrp->event_list); - spin_lock_init(&cgrp->event_list_lock); simple_xattrs_init(&cgrp->xattrs); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d00368110b08..2fcacb18404b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -248,6 +248,22 @@ struct cgroup_event { * Each of these stored in a list by the cgroup. */ struct list_head list; + /* + * register_event() callback will be used to add new userspace + * waiter for changes related to this event. Use eventfd_signal() + * on eventfd to send notification to userspace. + */ + int (*register_event)(struct cgroup_subsys_state *css, + struct cftype *cft, struct eventfd_ctx *eventfd, + const char *args); + /* + * unregister_event() callback will be called when userspace closes + * the eventfd or on cgroup removing. This callback must be set, + * if you want provide notification functionality. + */ + void (*unregister_event)(struct cgroup_subsys_state *css, + struct cftype *cft, + struct eventfd_ctx *eventfd); /* * All fields below needed to unregister event when * userspace closes eventfd. @@ -362,6 +378,10 @@ struct mem_cgroup { atomic_t numainfo_updating; #endif + /* List of events which userspace want to receive */ + struct list_head event_list; + spinlock_t event_list_lock; + struct mem_cgroup_per_node *nodeinfo[0]; /* WARNING: nodeinfo must be the last member here */ }; @@ -5992,7 +6012,7 @@ static void cgroup_event_remove(struct work_struct *work) remove_wait_queue(event->wqh, &event->wait); - event->cft->unregister_event(css, event->cft, event->eventfd); + event->unregister_event(css, event->cft, event->eventfd); /* Notify userspace the event is going away. */ eventfd_signal(event->eventfd, 1); @@ -6012,7 +6032,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, { struct cgroup_event *event = container_of(wait, struct cgroup_event, wait); - struct cgroup *cgrp = event->css->cgroup; + struct mem_cgroup *memcg = mem_cgroup_from_css(event->css); unsigned long flags = (unsigned long)key; if (flags & POLLHUP) { @@ -6025,7 +6045,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, * side will require wqh->lock via remove_wait_queue(), * which we hold. */ - spin_lock(&cgrp->event_list_lock); + spin_lock(&memcg->event_list_lock); if (!list_empty(&event->list)) { list_del_init(&event->list); /* @@ -6034,7 +6054,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, */ schedule_work(&event->remove); } - spin_unlock(&cgrp->event_list_lock); + spin_unlock(&memcg->event_list_lock); } return 0; @@ -6059,12 +6079,13 @@ static void cgroup_event_ptable_queue_proc(struct file *file, static int cgroup_write_event_control(struct cgroup_subsys_state *css, struct cftype *cft, const char *buffer) { - struct cgroup *cgrp = css->cgroup; + struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct cgroup_event *event; struct cgroup_subsys_state *cfile_css; unsigned int efd, cfd; struct fd efile; struct fd cfile; + const char *name; char *endp; int ret; @@ -6118,6 +6139,31 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *css, goto out_put_cfile; } + /* + * Determine the event callbacks and set them in @event. This used + * to be done via struct cftype but cgroup core no longer knows + * about these events. The following is crude but the whole thing + * is for compatibility anyway. + */ + name = cfile.file->f_dentry->d_name.name; + + if (!strcmp(name, "memory.usage_in_bytes")) { + event->register_event = mem_cgroup_usage_register_event; + event->unregister_event = mem_cgroup_usage_unregister_event; + } else if (!strcmp(name, "memory.oom_control")) { + event->register_event = mem_cgroup_oom_register_event; + event->unregister_event = mem_cgroup_oom_unregister_event; + } else if (!strcmp(name, "memory.pressure_level")) { + event->register_event = vmpressure_register_event; + event->unregister_event = vmpressure_unregister_event; + } else if (!strcmp(name, "memory.memsw.usage_in_bytes")) { + event->register_event = mem_cgroup_usage_register_event; + event->unregister_event = mem_cgroup_usage_unregister_event; + } else { + ret = -EINVAL; + goto out_put_cfile; + } + /* * Verify @cfile should belong to @css. Also, remaining events are * automatically removed on cgroup destruction but the removal is @@ -6135,21 +6181,15 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *css, if (ret) goto out_put_cfile; - if (!event->cft->register_event || !event->cft->unregister_event) { - ret = -EINVAL; - goto out_put_css; - } - - ret = event->cft->register_event(css, event->cft, - event->eventfd, buffer); + ret = event->register_event(css, event->cft, event->eventfd, buffer); if (ret) goto out_put_css; efile.file->f_op->poll(efile.file, &event->pt); - spin_lock(&cgrp->event_list_lock); - list_add(&event->list, &cgrp->event_list); - spin_unlock(&cgrp->event_list_lock); + spin_lock(&memcg->event_list_lock); + list_add(&event->list, &memcg->event_list); + spin_unlock(&memcg->event_list_lock); fdput(cfile); fdput(efile); @@ -6175,8 +6215,6 @@ static struct cftype mem_cgroup_files[] = { .name = "usage_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), .read = mem_cgroup_read, - .register_event = mem_cgroup_usage_register_event, - .unregister_event = mem_cgroup_usage_unregister_event, }, { .name = "max_usage_in_bytes", @@ -6236,14 +6274,10 @@ static struct cftype mem_cgroup_files[] = { .name = "oom_control", .read_map = mem_cgroup_oom_control_read, .write_u64 = mem_cgroup_oom_control_write, - .register_event = mem_cgroup_oom_register_event, - .unregister_event = mem_cgroup_oom_unregister_event, .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL), }, { .name = "pressure_level", - .register_event = vmpressure_register_event, - .unregister_event = vmpressure_unregister_event, }, #ifdef CONFIG_NUMA { @@ -6291,8 +6325,6 @@ static struct cftype memsw_cgroup_files[] = { .name = "memsw.usage_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), .read = mem_cgroup_read, - .register_event = mem_cgroup_usage_register_event, - .unregister_event = mem_cgroup_usage_unregister_event, }, { .name = "memsw.max_usage_in_bytes", @@ -6483,6 +6515,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) mutex_init(&memcg->thresholds_lock); spin_lock_init(&memcg->move_lock); vmpressure_init(&memcg->vmpressure); + INIT_LIST_HEAD(&memcg->event_list); + spin_lock_init(&memcg->event_list_lock); return &memcg->css; @@ -6555,7 +6589,6 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg) static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); - struct cgroup *cgrp = css->cgroup; struct cgroup_event *event, *tmp; /* @@ -6563,12 +6596,12 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) * Notify userspace about cgroup removing only after rmdir of cgroup * directory to avoid race between userspace and kernelspace. */ - spin_lock(&cgrp->event_list_lock); - list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) { + spin_lock(&memcg->event_list_lock); + list_for_each_entry_safe(event, tmp, &memcg->event_list, list) { list_del_init(&event->list); schedule_work(&event->remove); } - spin_unlock(&cgrp->event_list_lock); + spin_unlock(&memcg->event_list_lock); kmem_cgroup_css_offline(memcg); -- cgit v1.2.3 From b36824c75c7855585d6476eef2b234f6e0e68872 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 Nov 2013 18:20:44 -0500 Subject: cgroup: unexport cgroup_css() and remove __file_cft() Now that cgroup_event is made memcg specific, the temporarily exported functions are no longer necessary. Unexport cgroup_css() and remove __file_cft() which doesn't have any user left. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Kirill A. Shutemov --- include/linux/cgroup.h | 5 ----- kernel/cgroup.c | 14 ++------------ 2 files changed, 2 insertions(+), 17 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 612adc5b87c5..8d9fa8967c9e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -883,11 +883,6 @@ unsigned short css_id(struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_from_dir(struct dentry *dentry, struct cgroup_subsys *ss); -/* XXX: temporary */ -struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, - struct cgroup_subsys *ss); -struct cftype *__file_cft(struct file *file); - #else /* !CONFIG_CGROUPS */ static inline int cgroup_init_early(void) { return 0; } diff --git a/kernel/cgroup.c b/kernel/cgroup.c index feda7c54fa6b..c0248e16461d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -202,8 +202,8 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], * keep accessing it outside the said locks. This function may return * %NULL if @cgrp doesn't have @subsys_id enabled. */ -struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, - struct cgroup_subsys *ss) +static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, + struct cgroup_subsys *ss) { if (ss) return rcu_dereference_check(cgrp->subsys[ss->subsys_id], @@ -2625,16 +2625,6 @@ static const struct inode_operations cgroup_dir_inode_operations = { .removexattr = cgroup_removexattr, }; -/* - * Check if a file is a control file - */ -struct cftype *__file_cft(struct file *file) -{ - if (file_inode(file)->i_fop != &cgroup_file_operations) - return ERR_PTR(-EINVAL); - return __d_cft(file->f_dentry); -} - static int cgroup_create_file(struct dentry *dentry, umode_t mode, struct super_block *sb) { -- cgit v1.2.3