From 894bc310419ac95f4fa4142dc364401a7e607f65 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Sat, 18 Oct 2008 20:26:39 -0700 Subject: Unevictable LRU Infrastructure When the system contains lots of mlocked or otherwise unevictable pages, the pageout code (kswapd) can spend lots of time scanning over these pages. Worse still, the presence of lots of unevictable pages can confuse kswapd into thinking that more aggressive pageout modes are required, resulting in all kinds of bad behaviour. Infrastructure to manage pages excluded from reclaim--i.e., hidden from vmscan. Based on a patch by Larry Woodman of Red Hat. Reworked to maintain "unevictable" pages on a separate per-zone LRU list, to "hide" them from vmscan. Kosaki Motohiro added the support for the memory controller unevictable lru list. Pages on the unevictable list have both PG_unevictable and PG_lru set. Thus, PG_unevictable is analogous to and mutually exclusive with PG_active--it specifies which LRU list the page is on. The unevictable infrastructure is enabled by a new mm Kconfig option [CONFIG_]UNEVICTABLE_LRU. A new function 'page_evictable(page, vma)' in vmscan.c tests whether or not a page may be evictable. Subsequent patches will add the various !evictable tests. We'll want to keep these tests light-weight for use in shrink_active_list() and, possibly, the fault path. To avoid races between tasks putting pages [back] onto an LRU list and tasks that might be moving the page from non-evictable to evictable state, the new function 'putback_lru_page()' -- inverse to 'isolate_lru_page()' -- tests the "evictability" of a page after placing it on the LRU, before dropping the reference. If the page has become unevictable, putback_lru_page() will redo the 'putback', thus moving the page to the unevictable list. This way, we avoid "stranding" evictable pages on the unevictable list. [akpm@linux-foundation.org: fix fallout from out-of-order merge] [riel@redhat.com: fix UNEVICTABLE_LRU and !PROC_PAGE_MONITOR build] [nishimura@mxp.nes.nec.co.jp: remove redundant mapping check] [kosaki.motohiro@jp.fujitsu.com: unevictable-lru-infrastructure: putback_lru_page()/unevictable page handling rework] [kosaki.motohiro@jp.fujitsu.com: kill unnecessary lock_page() in vmscan.c] [kosaki.motohiro@jp.fujitsu.com: revert migration change of unevictable lru infrastructure] [kosaki.motohiro@jp.fujitsu.com: revert to unevictable-lru-infrastructure-kconfig-fix.patch] [kosaki.motohiro@jp.fujitsu.com: restore patch failure of vmstat-unevictable-and-mlocked-pages-vm-events.patch] Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Debugged-by: Benjamin Kidwell Signed-off-by: Daisuke Nishimura Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 +- include/linux/mm_inline.h | 23 ++++++++++++++++------- include/linux/mmzone.h | 24 +++++++++++++++++++++++- include/linux/page-flags.h | 22 +++++++++++++++++++++- include/linux/pagevec.h | 1 - include/linux/swap.h | 12 ++++++++++++ 6 files changed, 73 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8d8f05c1515a..ee1b2fcb4410 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -34,9 +34,9 @@ extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); +extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru); extern void mem_cgroup_uncharge_page(struct page *page); extern void mem_cgroup_uncharge_cache_page(struct page *page); -extern void mem_cgroup_move_lists(struct page *page, bool active); extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask); extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index f451fedd1e75..67d7697fd019 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -91,11 +91,16 @@ del_page_from_lru(struct zone *zone, struct page *page) enum lru_list l = LRU_BASE; list_del(&page->lru); - if (PageActive(page)) { - __ClearPageActive(page); - l += LRU_ACTIVE; + if (PageUnevictable(page)) { + __ClearPageUnevictable(page); + l = LRU_UNEVICTABLE; + } else { + if (PageActive(page)) { + __ClearPageActive(page); + l += LRU_ACTIVE; + } + l += page_is_file_cache(page); } - l += page_is_file_cache(page); __dec_zone_state(zone, NR_LRU_BASE + l); } @@ -110,9 +115,13 @@ static inline enum lru_list page_lru(struct page *page) { enum lru_list lru = LRU_BASE; - if (PageActive(page)) - lru += LRU_ACTIVE; - lru += page_is_file_cache(page); + if (PageUnevictable(page)) + lru = LRU_UNEVICTABLE; + else { + if (PageActive(page)) + lru += LRU_ACTIVE; + lru += page_is_file_cache(page); + } return lru; } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9c5111f49a32..d1f60d5fe2ea 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -86,6 +86,11 @@ enum zone_stat_item { NR_ACTIVE_ANON, /* " " " " " */ NR_INACTIVE_FILE, /* " " " " " */ NR_ACTIVE_FILE, /* " " " " " */ +#ifdef CONFIG_UNEVICTABLE_LRU + NR_UNEVICTABLE, /* " " " " " */ +#else + NR_UNEVICTABLE = NR_ACTIVE_FILE, /* avoid compiler errors in dead code */ +#endif NR_ANON_PAGES, /* Mapped anonymous pages */ NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. only modified from process context */ @@ -128,10 +133,18 @@ enum lru_list { LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE, LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE, LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE, - NR_LRU_LISTS }; +#ifdef CONFIG_UNEVICTABLE_LRU + LRU_UNEVICTABLE, +#else + LRU_UNEVICTABLE = LRU_ACTIVE_FILE, /* avoid compiler errors in dead code */ +#endif + NR_LRU_LISTS +}; #define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++) +#define for_each_evictable_lru(l) for (l = 0; l <= LRU_ACTIVE_FILE; l++) + static inline int is_file_lru(enum lru_list l) { return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE); @@ -142,6 +155,15 @@ static inline int is_active_lru(enum lru_list l) return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE); } +static inline int is_unevictable_lru(enum lru_list l) +{ +#ifdef CONFIG_UNEVICTABLE_LRU + return (l == LRU_UNEVICTABLE); +#else + return 0; +#endif +} + struct per_cpu_pages { int count; /* number of pages in the list */ int high; /* high watermark, emptying needed */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 3d31616dcd23..ec1a1baad348 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -94,6 +94,9 @@ enum pageflags { PG_reclaim, /* To be reclaimed asap */ PG_buddy, /* Page is free, on buddy lists */ PG_swapbacked, /* Page is backed by RAM/swap */ +#ifdef CONFIG_UNEVICTABLE_LRU + PG_unevictable, /* Page is "unevictable" */ +#endif #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR PG_uncached, /* Page has been mapped as uncached */ #endif @@ -182,6 +185,7 @@ PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced) PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty) PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru) PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) + TESTCLEARFLAG(Active, active) __PAGEFLAG(Slab, slab) PAGEFLAG(Checked, checked) /* Used by some filesystems */ PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ @@ -225,6 +229,15 @@ PAGEFLAG(SwapCache, swapcache) PAGEFLAG_FALSE(SwapCache) #endif +#ifdef CONFIG_UNEVICTABLE_LRU +PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) + TESTCLEARFLAG(Unevictable, unevictable) +#else +PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable) + SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable) + __CLEARPAGEFLAG_NOOP(Unevictable) +#endif + #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR PAGEFLAG(Uncached, uncached) #else @@ -340,9 +353,16 @@ static inline void __ClearPageTail(struct page *page) #endif /* !PAGEFLAGS_EXTENDED */ +#ifdef CONFIG_UNEVICTABLE_LRU +#define __PG_UNEVICTABLE (1 << PG_unevictable) +#else +#define __PG_UNEVICTABLE 0 +#endif + #define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ 1 << PG_buddy | 1 << PG_writeback | \ - 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active) + 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ + __PG_UNEVICTABLE) /* * Flags checked in bad_page(). Pages on the free list should not have diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 5fc96a4e760f..e90a2cb02915 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -101,7 +101,6 @@ static inline void __pagevec_lru_add_active_file(struct pagevec *pvec) ____pagevec_lru_add(pvec, LRU_ACTIVE_FILE); } - static inline void pagevec_lru_add_file(struct pagevec *pvec) { if (pagevec_count(pvec)) diff --git a/include/linux/swap.h b/include/linux/swap.h index 7d09d79997a4..a2113044d20a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -180,6 +180,8 @@ extern int lru_add_drain_all(void); extern void rotate_reclaimable_page(struct page *page); extern void swap_setup(void); +extern void add_page_to_unevictable_list(struct page *page); + /** * lru_cache_add: add a page to the page lists * @page: the page to add @@ -228,6 +230,16 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order) } #endif +#ifdef CONFIG_UNEVICTABLE_LRU +extern int page_evictable(struct page *page, struct vm_area_struct *vma); +#else +static inline int page_evictable(struct page *page, + struct vm_area_struct *vma) +{ + return 1; +} +#endif + extern int kswapd_run(int nid); #ifdef CONFIG_MMU -- cgit v1.2.3