From 7bcc1bb1232de6efc0b85e0c7fe38e90b2436318 Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Thu, 29 Jan 2009 14:25:11 -0800 Subject: memcg: get/put parents at create/free The lifetime of struct cgroup and struct mem_cgroup is different and mem_cgroup has its own reference count for handling references from swap_cgroup. This causes strange problem that the parent mem_cgroup dies while child mem_cgroup alive, and this problem causes a bug in case of use_hierarchy==1 because res_counter_uncharge climbs up the tree. This patch is for avoiding it by getting the parent at create, and putting it at freeing. Signed-off-by: Daisuke Nishimura Reviewed-by; KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Pavel Emelyanov Cc: Li Zefan Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4d0ea3ceba6d..76feccd26dc3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -202,6 +202,7 @@ pcg_default_flags[NR_CHARGE_TYPE] = { static void mem_cgroup_get(struct mem_cgroup *mem); static void mem_cgroup_put(struct mem_cgroup *mem); +static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem); static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, struct page_cgroup *pc, @@ -2193,10 +2194,23 @@ static void mem_cgroup_get(struct mem_cgroup *mem) static void mem_cgroup_put(struct mem_cgroup *mem) { - if (atomic_dec_and_test(&mem->refcnt)) + if (atomic_dec_and_test(&mem->refcnt)) { + struct mem_cgroup *parent = parent_mem_cgroup(mem); __mem_cgroup_free(mem); + if (parent) + mem_cgroup_put(parent); + } } +/* + * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled. + */ +static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem) +{ + if (!mem->res.parent) + return NULL; + return mem_cgroup_from_res_counter(mem->res.parent, res); +} #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP static void __init enable_swap_cgroup(void) @@ -2235,6 +2249,13 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) if (parent && parent->use_hierarchy) { res_counter_init(&mem->res, &parent->res); res_counter_init(&mem->memsw, &parent->memsw); + /* + * We increment refcnt of the parent to ensure that we can + * safely access it on res_counter_charge/uncharge. + * This refcnt will be decremented when freeing this + * mem_cgroup(see mem_cgroup_put). + */ + mem_cgroup_get(parent); } else { res_counter_init(&mem->res, NULL); res_counter_init(&mem->memsw, NULL); -- cgit v1.2.3 From 299b4eaa302138426d5a9ecd954de1f565d76c94 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Thu, 29 Jan 2009 14:25:17 -0800 Subject: memcg: NULL pointer dereference at rmdir on some NUMA systems N_POSSIBLE doesn't means there is memory...and force_empty can visit invalid node which have no pgdat. To visit all valid nodes, N_HIGH_MEMORY should be used. Reported-by: Li Zefan Signed-off-by: KAMEZAWA Hiroyuki Tested-by: Li Zefan Cc: Balbir Singh Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 76feccd26dc3..8e4be9cb2a6a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1685,7 +1685,7 @@ move_account: /* This is for making all *used* pages to be on LRU. */ lru_add_drain_all(); ret = 0; - for_each_node_state(node, N_POSSIBLE) { + for_each_node_state(node, N_HIGH_MEMORY) { for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { enum lru_list l; for_each_lru(l) { -- cgit v1.2.3