From 925b7673cce39116ce61e7a06683a4a0dad1e72a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 12 Jan 2012 17:18:15 -0800 Subject: mm: make per-memcg LRU lists exclusive Now that all code that operated on global per-zone LRU lists is converted to operate on per-memory cgroup LRU lists instead, there is no reason to keep the double-LRU scheme around any longer. The pc->lru member is removed and page->lru is linked directly to the per-memory cgroup LRU lists, which removes two pointers from a descriptor that exists for every page frame in the system. Signed-off-by: Johannes Weiner Signed-off-by: Hugh Dickins Signed-off-by: Ying Han Reviewed-by: KAMEZAWA Hiroyuki Reviewed-by: Michal Hocko Reviewed-by: Kirill A. Shutemov Cc: Daisuke Nishimura Cc: Balbir Singh Cc: Greg Thelen Cc: Michel Lespinasse Cc: Rik van Riel Cc: Minchan Kim Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_cgroup.c | 1 - 1 file changed, 1 deletion(-) (limited to 'mm/page_cgroup.c') diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 2d123f94a8df..f59405a8d752 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -16,7 +16,6 @@ static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id) pc->flags = 0; set_page_cgroup_array_id(pc, id); pc->mem_cgroup = NULL; - INIT_LIST_HEAD(&pc->lru); } static unsigned long total_usage; -- cgit v1.2.3 From 6b208e3f6e35aa76d254c395bdcd984b17c6b626 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 12 Jan 2012 17:18:18 -0800 Subject: mm: memcg: remove unused node/section info from pc->flags To find the page corresponding to a certain page_cgroup, the pc->flags encoded the node or section ID with the base array to compare the pc pointer to. Now that the per-memory cgroup LRU lists link page descriptors directly, there is no longer any code that knows the struct page_cgroup of a PFN but not the struct page. [hughd@google.com: remove unused node/section info from pc->flags fix] Signed-off-by: Johannes Weiner Reviewed-by: KAMEZAWA Hiroyuki Reviewed-by: Michal Hocko Reviewed-by: Kirill A. Shutemov Cc: KAMEZAWA Hiroyuki Cc: Michal Hocko Cc: "Kirill A. Shutemov" Cc: Daisuke Nishimura Cc: Balbir Singh Cc: Ying Han Cc: Greg Thelen Cc: Michel Lespinasse Cc: Rik van Riel Cc: Minchan Kim Cc: Christoph Hellwig Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_cgroup.h | 33 ------------------------- mm/page_cgroup.c | 59 ++++++--------------------------------------- 2 files changed, 7 insertions(+), 85 deletions(-) (limited to 'mm/page_cgroup.c') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 5bae7535c202..aaa60da8783c 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -121,39 +121,6 @@ static inline void move_unlock_page_cgroup(struct page_cgroup *pc, local_irq_restore(*flags); } -#ifdef CONFIG_SPARSEMEM -#define PCG_ARRAYID_WIDTH SECTIONS_SHIFT -#else -#define PCG_ARRAYID_WIDTH NODES_SHIFT -#endif - -#if (PCG_ARRAYID_WIDTH > BITS_PER_LONG - NR_PCG_FLAGS) -#error Not enough space left in pc->flags to store page_cgroup array IDs -#endif - -/* pc->flags: ARRAY-ID | FLAGS */ - -#define PCG_ARRAYID_MASK ((1UL << PCG_ARRAYID_WIDTH) - 1) - -#define PCG_ARRAYID_OFFSET (BITS_PER_LONG - PCG_ARRAYID_WIDTH) -/* - * Zero the shift count for non-existent fields, to prevent compiler - * warnings and ensure references are optimized away. - */ -#define PCG_ARRAYID_SHIFT (PCG_ARRAYID_OFFSET * (PCG_ARRAYID_WIDTH != 0)) - -static inline void set_page_cgroup_array_id(struct page_cgroup *pc, - unsigned long id) -{ - pc->flags &= ~(PCG_ARRAYID_MASK << PCG_ARRAYID_SHIFT); - pc->flags |= (id & PCG_ARRAYID_MASK) << PCG_ARRAYID_SHIFT; -} - -static inline unsigned long page_cgroup_array_id(struct page_cgroup *pc) -{ - return (pc->flags >> PCG_ARRAYID_SHIFT) & PCG_ARRAYID_MASK; -} - #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct page_cgroup; diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index f59405a8d752..f0559e049e00 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -11,12 +11,6 @@ #include #include -static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id) -{ - pc->flags = 0; - set_page_cgroup_array_id(pc, id); - pc->mem_cgroup = NULL; -} static unsigned long total_usage; #if !defined(CONFIG_SPARSEMEM) @@ -41,28 +35,13 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) return base + offset; } -struct page *lookup_cgroup_page(struct page_cgroup *pc) -{ - unsigned long pfn; - struct page *page; - pg_data_t *pgdat; - - pgdat = NODE_DATA(page_cgroup_array_id(pc)); - pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn; - page = pfn_to_page(pfn); - VM_BUG_ON(pc != lookup_page_cgroup(page)); - return page; -} - static int __init alloc_node_page_cgroup(int nid) { - struct page_cgroup *base, *pc; + struct page_cgroup *base; unsigned long table_size; - unsigned long start_pfn, nr_pages, index; + unsigned long nr_pages; - start_pfn = NODE_DATA(nid)->node_start_pfn; nr_pages = NODE_DATA(nid)->node_spanned_pages; - if (!nr_pages) return 0; @@ -72,10 +51,6 @@ static int __init alloc_node_page_cgroup(int nid) table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); if (!base) return -ENOMEM; - for (index = 0; index < nr_pages; index++) { - pc = base + index; - init_page_cgroup(pc, nid); - } NODE_DATA(nid)->node_page_cgroup = base; total_usage += table_size; return 0; @@ -116,23 +91,10 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) return section->page_cgroup + pfn; } -struct page *lookup_cgroup_page(struct page_cgroup *pc) -{ - struct mem_section *section; - struct page *page; - unsigned long nr; - - nr = page_cgroup_array_id(pc); - section = __nr_to_section(nr); - page = pfn_to_page(pc - section->page_cgroup); - VM_BUG_ON(pc != lookup_page_cgroup(page)); - return page; -} - static void *__meminit alloc_page_cgroup(size_t size, int nid) { + gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN; void *addr = NULL; - gfp_t flags = GFP_KERNEL | __GFP_NOWARN; addr = alloc_pages_exact_nid(nid, size, flags); if (addr) { @@ -141,9 +103,9 @@ static void *__meminit alloc_page_cgroup(size_t size, int nid) } if (node_state(nid, N_HIGH_MEMORY)) - addr = vmalloc_node(size, nid); + addr = vzalloc_node(size, nid); else - addr = vmalloc(size); + addr = vzalloc(size); return addr; } @@ -166,14 +128,11 @@ static void free_page_cgroup(void *addr) static int __meminit init_section_page_cgroup(unsigned long pfn, int nid) { - struct page_cgroup *base, *pc; struct mem_section *section; + struct page_cgroup *base; unsigned long table_size; - unsigned long nr; - int index; - nr = pfn_to_section_nr(pfn); - section = __nr_to_section(nr); + section = __pfn_to_section(pfn); if (section->page_cgroup) return 0; @@ -193,10 +152,6 @@ static int __meminit init_section_page_cgroup(unsigned long pfn, int nid) return -ENOMEM; } - for (index = 0; index < PAGES_PER_SECTION; index++) { - pc = base + index; - init_page_cgroup(pc, nr); - } /* * The passed "pfn" may not be aligned to SECTION. For the calculation * we need to apply a mask. -- cgit v1.2.3 From 00c54c0bac24bb02d2460c516da76651a7451286 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 12 Jan 2012 17:18:40 -0800 Subject: mm: page_cgroup: check page_cgroup arrays in lookup_page_cgroup() only when necessary lookup_page_cgroup() is usually used only against pages that are used in userspace. The exception is the CONFIG_DEBUG_VM-only memcg check from the page allocator: it can run on pages without page_cgroup descriptors allocated when the pages are fed into the page allocator for the first time during boot or memory hotplug. Include the array check only when CONFIG_DEBUG_VM is set and save the unnecessary check in production kernels. Signed-off-by: Johannes Weiner Acked-by: KAMEZAWA Hiroyuki Acked-by: Michal Hocko Cc: Balbir Singh Cc: David Rientjes Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_cgroup.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'mm/page_cgroup.c') diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index f0559e049e00..e910524e5a08 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -28,9 +28,16 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) struct page_cgroup *base; base = NODE_DATA(page_to_nid(page))->node_page_cgroup; +#ifdef CONFIG_DEBUG_VM + /* + * The sanity checks the page allocator does upon freeing a + * page can reach here before the page_cgroup arrays are + * allocated when feeding a range of pages to the allocator + * for the first time during bootup or memory hotplug. + */ if (unlikely(!base)) return NULL; - +#endif offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn; return base + offset; } @@ -85,9 +92,16 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) { unsigned long pfn = page_to_pfn(page); struct mem_section *section = __pfn_to_section(pfn); - +#ifdef CONFIG_DEBUG_VM + /* + * The sanity checks the page allocator does upon freeing a + * page can reach here before the page_cgroup arrays are + * allocated when feeding a range of pages to the allocator + * for the first time during bootup or memory hotplug. + */ if (!section->page_cgroup) return NULL; +#endif return section->page_cgroup + pfn; } -- cgit v1.2.3 From 9fb4b7cc0724f178d4b24a2a566ea1e7cb120b82 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Thu, 12 Jan 2012 17:18:48 -0800 Subject: page_cgroup: add helper function to get swap_cgroup There are multiple places which need to get the swap_cgroup address, so add a helper function: static struct swap_cgroup *swap_cgroup_getsc(swp_entry_t ent, struct swap_cgroup_ctrl **ctrl); to simplify the code. Signed-off-by: Bob Liu Acked-by: Michal Hocko Acked-by: KAMEZAWA Hiroyuki Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_cgroup.h | 4 ++-- mm/memcontrol.c | 4 ++-- mm/page_cgroup.c | 56 ++++++++++++++++----------------------------- 3 files changed, 24 insertions(+), 40 deletions(-) (limited to 'mm/page_cgroup.c') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index aaa60da8783c..1153095ee457 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -149,7 +149,7 @@ static inline void __init page_cgroup_init_flatmem(void) extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, unsigned short old, unsigned short new); extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id); -extern unsigned short lookup_swap_cgroup(swp_entry_t ent); +extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent); extern int swap_cgroup_swapon(int type, unsigned long max_pages); extern void swap_cgroup_swapoff(int type); #else @@ -161,7 +161,7 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) } static inline -unsigned short lookup_swap_cgroup(swp_entry_t ent) +unsigned short lookup_swap_cgroup_id(swp_entry_t ent) { return 0; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 71a9774e6ead..4c53e971749e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2474,7 +2474,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) memcg = NULL; } else if (PageSwapCache(page)) { ent.val = page_private(page); - id = lookup_swap_cgroup(ent); + id = lookup_swap_cgroup_id(ent); rcu_read_lock(); memcg = mem_cgroup_lookup(id); if (memcg && !css_tryget(&memcg->css)) @@ -5264,7 +5264,7 @@ static int is_target_pte_for_mc(struct vm_area_struct *vma, } /* There is a swap entry and a page doesn't exist or isn't charged */ if (ent.val && !ret && - css_id(&mc.from->css) == lookup_swap_cgroup(ent)) { + css_id(&mc.from->css) == lookup_swap_cgroup_id(ent)) { ret = MC_TARGET_SWAP; if (target) target->ent = ent; diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index e910524e5a08..b99d19edf89b 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -334,7 +334,6 @@ struct swap_cgroup { unsigned short id; }; #define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) -#define SC_POS_MASK (SC_PER_PAGE - 1) /* * SwapCgroup implements "lookup" and "exchange" operations. @@ -376,6 +375,21 @@ not_enough_page: return -ENOMEM; } +static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent, + struct swap_cgroup_ctrl **ctrlp) +{ + pgoff_t offset = swp_offset(ent); + struct swap_cgroup_ctrl *ctrl; + struct page *mappage; + + ctrl = &swap_cgroup_ctrl[swp_type(ent)]; + if (ctrlp) + *ctrlp = ctrl; + + mappage = ctrl->map[offset / SC_PER_PAGE]; + return page_address(mappage) + offset % SC_PER_PAGE; +} + /** * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry. * @end: swap entry to be cmpxchged @@ -388,21 +402,13 @@ not_enough_page: unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, unsigned short old, unsigned short new) { - int type = swp_type(ent); - unsigned long offset = swp_offset(ent); - unsigned long idx = offset / SC_PER_PAGE; - unsigned long pos = offset & SC_POS_MASK; struct swap_cgroup_ctrl *ctrl; - struct page *mappage; struct swap_cgroup *sc; unsigned long flags; unsigned short retval; - ctrl = &swap_cgroup_ctrl[type]; + sc = lookup_swap_cgroup(ent, &ctrl); - mappage = ctrl->map[idx]; - sc = page_address(mappage); - sc += pos; spin_lock_irqsave(&ctrl->lock, flags); retval = sc->id; if (retval == old) @@ -423,21 +429,13 @@ unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, */ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) { - int type = swp_type(ent); - unsigned long offset = swp_offset(ent); - unsigned long idx = offset / SC_PER_PAGE; - unsigned long pos = offset & SC_POS_MASK; struct swap_cgroup_ctrl *ctrl; - struct page *mappage; struct swap_cgroup *sc; unsigned short old; unsigned long flags; - ctrl = &swap_cgroup_ctrl[type]; + sc = lookup_swap_cgroup(ent, &ctrl); - mappage = ctrl->map[idx]; - sc = page_address(mappage); - sc += pos; spin_lock_irqsave(&ctrl->lock, flags); old = sc->id; sc->id = id; @@ -447,28 +445,14 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) } /** - * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry + * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry * @ent: swap entry to be looked up. * * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID) */ -unsigned short lookup_swap_cgroup(swp_entry_t ent) +unsigned short lookup_swap_cgroup_id(swp_entry_t ent) { - int type = swp_type(ent); - unsigned long offset = swp_offset(ent); - unsigned long idx = offset / SC_PER_PAGE; - unsigned long pos = offset & SC_POS_MASK; - struct swap_cgroup_ctrl *ctrl; - struct page *mappage; - struct swap_cgroup *sc; - unsigned short ret; - - ctrl = &swap_cgroup_ctrl[type]; - mappage = ctrl->map[idx]; - sc = page_address(mappage); - sc += pos; - ret = sc->id; - return ret; + return lookup_swap_cgroup(ent, NULL)->id; } int swap_cgroup_swapon(int type, unsigned long max_pages) -- cgit v1.2.3 From 0efc8eb9c6a177836dac88b2cbb8815f9e4f8d5a Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Thu, 12 Jan 2012 17:19:08 -0800 Subject: page_cgroup: drop multi CONFIG_MEMORY_HOTPLUG No need for two CONFIG_MEMORY_HOTPLUG blocks. Signed-off-by: Bob Liu Acked-by: Michal Hocko Cc: Johannes Weiner Acked-by: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_cgroup.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) (limited to 'mm/page_cgroup.c') diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index b99d19edf89b..de1616aa9b1e 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -124,22 +124,6 @@ static void *__meminit alloc_page_cgroup(size_t size, int nid) return addr; } -#ifdef CONFIG_MEMORY_HOTPLUG -static void free_page_cgroup(void *addr) -{ - if (is_vmalloc_addr(addr)) { - vfree(addr); - } else { - struct page *page = virt_to_page(addr); - size_t table_size = - sizeof(struct page_cgroup) * PAGES_PER_SECTION; - - BUG_ON(PageReserved(page)); - free_pages_exact(addr, table_size); - } -} -#endif - static int __meminit init_section_page_cgroup(unsigned long pfn, int nid) { struct mem_section *section; @@ -176,6 +160,20 @@ static int __meminit init_section_page_cgroup(unsigned long pfn, int nid) return 0; } #ifdef CONFIG_MEMORY_HOTPLUG +static void free_page_cgroup(void *addr) +{ + if (is_vmalloc_addr(addr)) { + vfree(addr); + } else { + struct page *page = virt_to_page(addr); + size_t table_size = + sizeof(struct page_cgroup) * PAGES_PER_SECTION; + + BUG_ON(PageReserved(page)); + free_pages_exact(addr, table_size); + } +} + void __free_page_cgroup(unsigned long pfn) { struct mem_section *ms; -- cgit v1.2.3