From 3d48ae45e72390ddf8cc5256ac32ed6f7a19cbea Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 May 2011 17:12:06 -0700 Subject: mm: Convert i_mmap_lock to a mutex Straightforward conversion of i_mmap_lock to a mutex. Signed-off-by: Peter Zijlstra Acked-by: Hugh Dickins Cc: Benjamin Herrenschmidt Cc: David Miller Cc: Martin Schwidefsky Cc: Russell King Cc: Paul Mundt Cc: Jeff Dike Cc: Richard Weinberger Cc: Tony Luck Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Nick Piggin Cc: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 2b9a5eef39e0..12178ec32ab5 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -429,7 +429,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, */ read_lock(&tasklist_lock); - spin_lock(&mapping->i_mmap_lock); + mutex_lock(&mapping->i_mmap_mutex); for_each_process(tsk) { pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); @@ -449,7 +449,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, add_to_kill(tsk, page, vma, to_kill, tkc); } } - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); read_unlock(&tasklist_lock); } -- cgit v1.2.3 From bd486285f24ac2fd1ff64688fb0729712c5712c4 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 24 May 2011 17:12:20 -0700 Subject: mem-hwpoison: fix page refcount around isolate_lru_page() Drop first page reference only after calling isolate_lru_page() to keep page stable reference while isolating. Signed-off-by: Konstantin Khlebnikov Cc: Andi Kleen Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Lee Schermerhorn Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 12178ec32ab5..369b80e81416 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1440,16 +1440,12 @@ int soft_offline_page(struct page *page, int flags) */ ret = invalidate_inode_page(page); unlock_page(page); - /* - * Drop count because page migration doesn't like raised - * counts. The page could get re-allocated, but if it becomes - * LRU the isolation will just fail. * RED-PEN would be better to keep it isolated here, but we * would need to fix isolation locking first. */ - put_page(page); if (ret == 1) { + put_page(page); ret = 0; pr_info("soft_offline: %#lx: invalidated\n", pfn); goto done; @@ -1461,6 +1457,11 @@ int soft_offline_page(struct page *page, int flags) * handles a large number of cases for us. */ ret = isolate_lru_page(page); + /* + * Drop page reference which is came from get_any_page() + * successful isolate_lru_page() already took another one. + */ + put_page(page); if (!ret) { LIST_HEAD(pagelist); -- cgit v1.2.3 From a09ed5e00084448453c8bada4dcd31e5fbfc2f21 Mon Sep 17 00:00:00 2001 From: Ying Han Date: Tue, 24 May 2011 17:12:26 -0700 Subject: vmscan: change shrink_slab() interfaces by passing shrink_control Consolidate the existing parameters to shrink_slab() into a new shrink_control struct. This is needed later to pass the same struct to shrinkers. Signed-off-by: Ying Han Cc: KOSAKI Motohiro Cc: Minchan Kim Acked-by: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Acked-by: Rik van Riel Cc: Johannes Weiner Cc: Hugh Dickins Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/drop_caches.c | 6 +++++- include/linux/mm.h | 13 +++++++++++-- mm/memory-failure.c | 7 ++++++- mm/vmscan.c | 46 +++++++++++++++++++++++++++++++++------------- 4 files changed, 55 insertions(+), 17 deletions(-) (limited to 'mm/memory-failure.c') diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 98b77c89494c..440999c24353 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -40,9 +40,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) static void drop_slab(void) { int nr_objects; + struct shrink_control shrink = { + .gfp_mask = GFP_KERNEL, + .nr_scanned = 1000, + }; do { - nr_objects = shrink_slab(1000, GFP_KERNEL, 1000); + nr_objects = shrink_slab(&shrink, 1000); } while (nr_objects > 10); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 8e2f2cd821f7..32cfa9602d00 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1161,6 +1161,15 @@ static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm) } #endif +/* + * This struct is used to pass information from page reclaim to the shrinkers. + * We consolidate the values for easier extention later. + */ +struct shrink_control { + unsigned long nr_scanned; + gfp_t gfp_mask; +}; + /* * A callback you can register to apply pressure to ageable caches. * @@ -1630,8 +1639,8 @@ int in_gate_area_no_mm(unsigned long addr); int drop_caches_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, - unsigned long lru_pages); +unsigned long shrink_slab(struct shrink_control *shrink, + unsigned long lru_pages); #ifndef CONFIG_MMU #define randomize_va_space 0 diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 369b80e81416..341341b2b47b 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -239,7 +239,12 @@ void shake_page(struct page *p, int access) if (access) { int nr; do { - nr = shrink_slab(1000, GFP_KERNEL, 1000); + struct shrink_control shrink = { + .gfp_mask = GFP_KERNEL, + .nr_scanned = 1000, + }; + + nr = shrink_slab(&shrink, 1000); if (page_count(p) == 1) break; } while (nr > 10); diff --git a/mm/vmscan.c b/mm/vmscan.c index 890f54184d9a..e4e245ed1a5b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -222,11 +222,13 @@ EXPORT_SYMBOL(unregister_shrinker); * * Returns the number of slab objects which we shrunk. */ -unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, - unsigned long lru_pages) +unsigned long shrink_slab(struct shrink_control *shrink, + unsigned long lru_pages) { struct shrinker *shrinker; unsigned long ret = 0; + unsigned long scanned = shrink->nr_scanned; + gfp_t gfp_mask = shrink->gfp_mask; if (scanned == 0) scanned = SWAP_CLUSTER_MAX; @@ -2035,7 +2037,8 @@ static bool all_unreclaimable(struct zonelist *zonelist, * else, the number of pages reclaimed */ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, - struct scan_control *sc) + struct scan_control *sc, + struct shrink_control *shrink) { int priority; unsigned long total_scanned = 0; @@ -2069,7 +2072,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, lru_pages += zone_reclaimable_pages(zone); } - shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages); + shrink->nr_scanned = sc->nr_scanned; + shrink_slab(shrink, lru_pages); if (reclaim_state) { sc->nr_reclaimed += reclaim_state->reclaimed_slab; reclaim_state->reclaimed_slab = 0; @@ -2141,12 +2145,15 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, .mem_cgroup = NULL, .nodemask = nodemask, }; + struct shrink_control shrink = { + .gfp_mask = sc.gfp_mask, + }; trace_mm_vmscan_direct_reclaim_begin(order, sc.may_writepage, gfp_mask); - nr_reclaimed = do_try_to_free_pages(zonelist, &sc); + nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink); trace_mm_vmscan_direct_reclaim_end(nr_reclaimed); @@ -2206,17 +2213,20 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, .order = 0, .mem_cgroup = mem_cont, .nodemask = NULL, /* we don't care the placement */ + .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | + (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), + }; + struct shrink_control shrink = { + .gfp_mask = sc.gfp_mask, }; - sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | - (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); zonelist = NODE_DATA(numa_node_id())->node_zonelists; trace_mm_vmscan_memcg_reclaim_begin(0, sc.may_writepage, sc.gfp_mask); - nr_reclaimed = do_try_to_free_pages(zonelist, &sc); + nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink); trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); @@ -2344,6 +2354,9 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, .order = order, .mem_cgroup = NULL, }; + struct shrink_control shrink = { + .gfp_mask = sc.gfp_mask, + }; loop_again: total_scanned = 0; sc.nr_reclaimed = 0; @@ -2443,8 +2456,8 @@ loop_again: end_zone, 0)) shrink_zone(priority, zone, &sc); reclaim_state->reclaimed_slab = 0; - nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, - lru_pages); + shrink.nr_scanned = sc.nr_scanned; + nr_slab = shrink_slab(&shrink, lru_pages); sc.nr_reclaimed += reclaim_state->reclaimed_slab; total_scanned += sc.nr_scanned; @@ -2796,7 +2809,10 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) .swappiness = vm_swappiness, .order = 0, }; - struct zonelist * zonelist = node_zonelist(numa_node_id(), sc.gfp_mask); + struct shrink_control shrink = { + .gfp_mask = sc.gfp_mask, + }; + struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask); struct task_struct *p = current; unsigned long nr_reclaimed; @@ -2805,7 +2821,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; - nr_reclaimed = do_try_to_free_pages(zonelist, &sc); + nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink); p->reclaim_state = NULL; lockdep_clear_current_reclaim_state(); @@ -2980,6 +2996,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) .swappiness = vm_swappiness, .order = order, }; + struct shrink_control shrink = { + .gfp_mask = sc.gfp_mask, + }; unsigned long nr_slab_pages0, nr_slab_pages1; cond_resched(); @@ -3006,6 +3025,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) } nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE); + shrink.nr_scanned = sc.nr_scanned; if (nr_slab_pages0 > zone->min_slab_pages) { /* * shrink_slab() does not currently allow us to determine how @@ -3021,7 +3041,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) unsigned long lru_pages = zone_reclaimable_pages(zone); /* No reclaimable slab or very low memory pressure */ - if (!shrink_slab(sc.nr_scanned, gfp_mask, lru_pages)) + if (!shrink_slab(&shrink, lru_pages)) break; /* Freed enough memory */ -- cgit v1.2.3 From 1495f230fa7750479c79e3656286b9183d662077 Mon Sep 17 00:00:00 2001 From: Ying Han Date: Tue, 24 May 2011 17:12:27 -0700 Subject: vmscan: change shrinker API by passing shrink_control struct Change each shrinker's API by consolidating the existing parameters into shrink_control struct. This will simplify any further features added w/o touching each file of shrinker. [akpm@linux-foundation.org: fix build] [akpm@linux-foundation.org: fix warning] [kosaki.motohiro@jp.fujitsu.com: fix up new shrinker API] [akpm@linux-foundation.org: fix xfs warning] [akpm@linux-foundation.org: update gfs2] Signed-off-by: Ying Han Cc: KOSAKI Motohiro Cc: Minchan Kim Acked-by: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Acked-by: Rik van Riel Cc: Johannes Weiner Cc: Hugh Dickins Cc: Dave Hansen Cc: Steven Whitehouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kvm/mmu.c | 3 ++- drivers/gpu/drm/i915/i915_gem.c | 9 +++------ drivers/gpu/drm/ttm/ttm_page_alloc.c | 4 +++- drivers/staging/zcache/zcache.c | 5 ++++- fs/dcache.c | 8 ++++++-- fs/drop_caches.c | 3 +-- fs/gfs2/glock.c | 5 ++++- fs/gfs2/quota.c | 12 +++++++----- fs/gfs2/quota.h | 4 +++- fs/inode.c | 6 +++++- fs/mbcache.c | 10 ++++++---- fs/nfs/dir.c | 5 ++++- fs/nfs/internal.h | 2 +- fs/quota/dquot.c | 5 ++++- fs/xfs/linux-2.6/xfs_buf.c | 4 ++-- fs/xfs/linux-2.6/xfs_sync.c | 5 +++-- fs/xfs/quota/xfs_qm.c | 6 +++--- include/linux/mm.h | 19 +++++++++++-------- mm/memory-failure.c | 3 +-- mm/vmscan.c | 34 +++++++++++++++++++--------------- net/sunrpc/auth.c | 4 +++- 21 files changed, 95 insertions(+), 61 deletions(-) (limited to 'mm/memory-failure.c') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 28418054b880..bd14bb4c8594 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3545,10 +3545,11 @@ static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, return kvm_mmu_prepare_zap_page(kvm, page, invalid_list); } -static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) +static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) { struct kvm *kvm; struct kvm *kvm_freed = NULL; + int nr_to_scan = sc->nr_to_scan; if (nr_to_scan == 0) goto out; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c6289034e29a..0b2e167d2bce 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -56,9 +56,7 @@ static int i915_gem_phys_pwrite(struct drm_device *dev, static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj); static int i915_gem_inactive_shrink(struct shrinker *shrinker, - int nr_to_scan, - gfp_t gfp_mask); - + struct shrink_control *sc); /* some bookkeeping */ static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, @@ -4092,9 +4090,7 @@ i915_gpu_is_active(struct drm_device *dev) } static int -i915_gem_inactive_shrink(struct shrinker *shrinker, - int nr_to_scan, - gfp_t gfp_mask) +i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) { struct drm_i915_private *dev_priv = container_of(shrinker, @@ -4102,6 +4098,7 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, mm.inactive_shrinker); struct drm_device *dev = dev_priv->dev; struct drm_i915_gem_object *obj, *next; + int nr_to_scan = sc->nr_to_scan; int cnt; if (!mutex_trylock(&dev->struct_mutex)) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index 9d9d92945f8c..d948575717bf 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -395,12 +395,14 @@ static int ttm_pool_get_num_unused_pages(void) /** * Callback for mm to request pool to reduce number of page held. */ -static int ttm_pool_mm_shrink(struct shrinker *shrink, int shrink_pages, gfp_t gfp_mask) +static int ttm_pool_mm_shrink(struct shrinker *shrink, + struct shrink_control *sc) { static atomic_t start_pool = ATOMIC_INIT(0); unsigned i; unsigned pool_offset = atomic_add_return(1, &start_pool); struct ttm_page_pool *pool; + int shrink_pages = sc->nr_to_scan; pool_offset = pool_offset % NUM_POOLS; /* select start pool in round robin fashion */ diff --git a/drivers/staging/zcache/zcache.c b/drivers/staging/zcache/zcache.c index b8a2b30a1572..77ac2d4d3ef1 100644 --- a/drivers/staging/zcache/zcache.c +++ b/drivers/staging/zcache/zcache.c @@ -1181,9 +1181,12 @@ static bool zcache_freeze; /* * zcache shrinker interface (only useful for ephemeral pages, so zbud only) */ -static int shrink_zcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) +static int shrink_zcache_memory(struct shrinker *shrink, + struct shrink_control *sc) { int ret = -1; + int nr = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; if (nr >= 0) { if (!(gfp_mask & __GFP_FS)) diff --git a/fs/dcache.c b/fs/dcache.c index 18b2a1f10ed8..37f72ee5bf7c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1220,7 +1220,7 @@ void shrink_dcache_parent(struct dentry * parent) EXPORT_SYMBOL(shrink_dcache_parent); /* - * Scan `nr' dentries and return the number which remain. + * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain. * * We need to avoid reentering the filesystem if the caller is performing a * GFP_NOFS allocation attempt. One example deadlock is: @@ -1231,8 +1231,12 @@ EXPORT_SYMBOL(shrink_dcache_parent); * * In this case we return -1 to tell the caller that we baled. */ -static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) +static int shrink_dcache_memory(struct shrinker *shrink, + struct shrink_control *sc) { + int nr = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; + if (nr) { if (!(gfp_mask & __GFP_FS)) return -1; diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 440999c24353..c00e055b6282 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -42,11 +42,10 @@ static void drop_slab(void) int nr_objects; struct shrink_control shrink = { .gfp_mask = GFP_KERNEL, - .nr_scanned = 1000, }; do { - nr_objects = shrink_slab(&shrink, 1000); + nr_objects = shrink_slab(&shrink, 1000, 1000); } while (nr_objects > 10); } diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a2a6abbccc07..2792a790e50b 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1346,11 +1346,14 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) } -static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) +static int gfs2_shrink_glock_memory(struct shrinker *shrink, + struct shrink_control *sc) { struct gfs2_glock *gl; int may_demote; int nr_skipped = 0; + int nr = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; LIST_HEAD(skipped); if (nr == 0) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index e23d9864c418..42e8d23bc047 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -77,19 +78,20 @@ static LIST_HEAD(qd_lru_list); static atomic_t qd_lru_count = ATOMIC_INIT(0); static DEFINE_SPINLOCK(qd_lru_lock); -int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) +int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) { struct gfs2_quota_data *qd; struct gfs2_sbd *sdp; + int nr_to_scan = sc->nr_to_scan; - if (nr == 0) + if (nr_to_scan == 0) goto out; - if (!(gfp_mask & __GFP_FS)) + if (!(sc->gfp_mask & __GFP_FS)) return -1; spin_lock(&qd_lru_lock); - while (nr && !list_empty(&qd_lru_list)) { + while (nr_to_scan && !list_empty(&qd_lru_list)) { qd = list_entry(qd_lru_list.next, struct gfs2_quota_data, qd_reclaim); sdp = qd->qd_gl->gl_sbd; @@ -110,7 +112,7 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) spin_unlock(&qd_lru_lock); kmem_cache_free(gfs2_quotad_cachep, qd); spin_lock(&qd_lru_lock); - nr--; + nr_to_scan--; } spin_unlock(&qd_lru_lock); diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index e7d236ca48bd..90bf1c302a98 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -12,6 +12,7 @@ struct gfs2_inode; struct gfs2_sbd; +struct shrink_control; #define NO_QUOTA_CHANGE ((u32)-1) @@ -51,7 +52,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) return ret; } -extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask); +extern int gfs2_shrink_qd_memory(struct shrinker *shrink, + struct shrink_control *sc); extern const struct quotactl_ops gfs2_quotactl_ops; #endif /* __QUOTA_DOT_H__ */ diff --git a/fs/inode.c b/fs/inode.c index 88aa3bcc7681..990d284877a1 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -751,8 +751,12 @@ static void prune_icache(int nr_to_scan) * This function is passed the number of inodes to scan, and it returns the * total number of remaining possibly-reclaimable inodes. */ -static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) +static int shrink_icache_memory(struct shrinker *shrink, + struct shrink_control *sc) { + int nr = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; + if (nr) { /* * Nasty deadlock avoidance. We may hold various FS locks, diff --git a/fs/mbcache.c b/fs/mbcache.c index 2f174be06555..8c32ef3ba88e 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -90,7 +90,8 @@ static DEFINE_SPINLOCK(mb_cache_spinlock); * What the mbcache registers as to get shrunk dynamically. */ -static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); +static int mb_cache_shrink_fn(struct shrinker *shrink, + struct shrink_control *sc); static struct shrinker mb_cache_shrinker = { .shrink = mb_cache_shrink_fn, @@ -156,18 +157,19 @@ forget: * gets low. * * @shrink: (ignored) - * @nr_to_scan: Number of objects to scan - * @gfp_mask: (ignored) + * @sc: shrink_control passed from reclaim * * Returns the number of objects which are present in the cache. */ static int -mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) +mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc) { LIST_HEAD(free_list); struct mb_cache *cache; struct mb_cache_entry *entry, *tmp; int count = 0; + int nr_to_scan = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; mb_debug("trying to free %d entries", nr_to_scan); spin_lock(&mb_cache_spinlock); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 7237672216c8..424e47773a84 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2042,11 +2042,14 @@ static void nfs_access_free_list(struct list_head *head) } } -int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) +int nfs_access_cache_shrinker(struct shrinker *shrink, + struct shrink_control *sc) { LIST_HEAD(head); struct nfs_inode *nfsi, *next; struct nfs_access_entry *cache; + int nr_to_scan = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) return (nr_to_scan == 0) ? 0 : -1; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ce118ce885dd..2df6ca7b5898 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -234,7 +234,7 @@ extern int nfs_init_client(struct nfs_client *clp, /* dir.c */ extern int nfs_access_cache_shrinker(struct shrinker *shrink, - int nr_to_scan, gfp_t gfp_mask); + struct shrink_control *sc); /* inode.c */ extern struct workqueue_struct *nfsiod_workqueue; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index d3c032f5fa0a..5b572c89e6c4 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -691,8 +691,11 @@ static void prune_dqcache(int count) * This is called from kswapd when we think we need some * more memory */ -static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) +static int shrink_dqcache_memory(struct shrinker *shrink, + struct shrink_control *sc) { + int nr = sc->nr_to_scan; + if (nr) { spin_lock(&dq_list_lock); prune_dqcache(nr); diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 52b2b5da566e..5e68099db2a5 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1422,12 +1422,12 @@ restart: int xfs_buftarg_shrink( struct shrinker *shrink, - int nr_to_scan, - gfp_t mask) + struct shrink_control *sc) { struct xfs_buftarg *btp = container_of(shrink, struct xfs_buftarg, bt_shrinker); struct xfs_buf *bp; + int nr_to_scan = sc->nr_to_scan; LIST_HEAD(dispose); if (!nr_to_scan) diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index cb1bb2080e44..8ecad5ff9f9b 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -1032,13 +1032,14 @@ xfs_reclaim_inodes( static int xfs_reclaim_inode_shrink( struct shrinker *shrink, - int nr_to_scan, - gfp_t gfp_mask) + struct shrink_control *sc) { struct xfs_mount *mp; struct xfs_perag *pag; xfs_agnumber_t ag; int reclaimable; + int nr_to_scan = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; mp = container_of(shrink, struct xfs_mount, m_inode_shrink); if (nr_to_scan) { diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 69228aa8605a..b94dace4e785 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -60,7 +60,7 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); STATIC int xfs_qm_init_quotainos(xfs_mount_t *); STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); -STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t); +STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); static struct shrinker xfs_qm_shaker = { .shrink = xfs_qm_shake, @@ -2009,10 +2009,10 @@ xfs_qm_shake_freelist( STATIC int xfs_qm_shake( struct shrinker *shrink, - int nr_to_scan, - gfp_t gfp_mask) + struct shrink_control *sc) { int ndqused, nfree, n; + gfp_t gfp_mask = sc->gfp_mask; if (!kmem_shake_allow(gfp_mask)) return 0; diff --git a/include/linux/mm.h b/include/linux/mm.h index 32cfa9602d00..5cbbf78eaac7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1166,18 +1166,20 @@ static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm) * We consolidate the values for easier extention later. */ struct shrink_control { - unsigned long nr_scanned; gfp_t gfp_mask; + + /* How many slab objects shrinker() should scan and try to reclaim */ + unsigned long nr_to_scan; }; /* * A callback you can register to apply pressure to ageable caches. * - * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'. It should - * look through the least-recently-used 'nr_to_scan' entries and - * attempt to free them up. It should return the number of objects - * which remain in the cache. If it returns -1, it means it cannot do - * any scanning at this time (eg. there is a risk of deadlock). + * 'sc' is passed shrink_control which includes a count 'nr_to_scan' + * and a 'gfpmask'. It should look through the least-recently-used + * 'nr_to_scan' entries and attempt to free them up. It should return + * the number of objects which remain in the cache. If it returns -1, it means + * it cannot do any scanning at this time (eg. there is a risk of deadlock). * * The 'gfpmask' refers to the allocation we are currently trying to * fulfil. @@ -1186,7 +1188,7 @@ struct shrink_control { * querying the cache size, so a fastpath for that case is appropriate. */ struct shrinker { - int (*shrink)(struct shrinker *, int nr_to_scan, gfp_t gfp_mask); + int (*shrink)(struct shrinker *, struct shrink_control *sc); int seeks; /* seeks to recreate an obj */ /* These are for internal use */ @@ -1640,7 +1642,8 @@ int in_gate_area_no_mm(unsigned long addr); int drop_caches_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); unsigned long shrink_slab(struct shrink_control *shrink, - unsigned long lru_pages); + unsigned long nr_pages_scanned, + unsigned long lru_pages); #ifndef CONFIG_MMU #define randomize_va_space 0 diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 341341b2b47b..5c8f7e08928d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -241,10 +241,9 @@ void shake_page(struct page *p, int access) do { struct shrink_control shrink = { .gfp_mask = GFP_KERNEL, - .nr_scanned = 1000, }; - nr = shrink_slab(&shrink, 1000); + nr = shrink_slab(&shrink, 1000, 1000); if (page_count(p) == 1) break; } while (nr > 10); diff --git a/mm/vmscan.c b/mm/vmscan.c index e4e245ed1a5b..7e0116150dc7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -202,6 +202,14 @@ void unregister_shrinker(struct shrinker *shrinker) } EXPORT_SYMBOL(unregister_shrinker); +static inline int do_shrinker_shrink(struct shrinker *shrinker, + struct shrink_control *sc, + unsigned long nr_to_scan) +{ + sc->nr_to_scan = nr_to_scan; + return (*shrinker->shrink)(shrinker, sc); +} + #define SHRINK_BATCH 128 /* * Call the shrink functions to age shrinkable caches @@ -223,15 +231,14 @@ EXPORT_SYMBOL(unregister_shrinker); * Returns the number of slab objects which we shrunk. */ unsigned long shrink_slab(struct shrink_control *shrink, + unsigned long nr_pages_scanned, unsigned long lru_pages) { struct shrinker *shrinker; unsigned long ret = 0; - unsigned long scanned = shrink->nr_scanned; - gfp_t gfp_mask = shrink->gfp_mask; - if (scanned == 0) - scanned = SWAP_CLUSTER_MAX; + if (nr_pages_scanned == 0) + nr_pages_scanned = SWAP_CLUSTER_MAX; if (!down_read_trylock(&shrinker_rwsem)) { /* Assume we'll be able to shrink next time */ @@ -244,8 +251,8 @@ unsigned long shrink_slab(struct shrink_control *shrink, unsigned long total_scan; unsigned long max_pass; - max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask); - delta = (4 * scanned) / shrinker->seeks; + max_pass = do_shrinker_shrink(shrinker, shrink, 0); + delta = (4 * nr_pages_scanned) / shrinker->seeks; delta *= max_pass; do_div(delta, lru_pages + 1); shrinker->nr += delta; @@ -272,9 +279,9 @@ unsigned long shrink_slab(struct shrink_control *shrink, int shrink_ret; int nr_before; - nr_before = (*shrinker->shrink)(shrinker, 0, gfp_mask); - shrink_ret = (*shrinker->shrink)(shrinker, this_scan, - gfp_mask); + nr_before = do_shrinker_shrink(shrinker, shrink, 0); + shrink_ret = do_shrinker_shrink(shrinker, shrink, + this_scan); if (shrink_ret == -1) break; if (shrink_ret < nr_before) @@ -2072,8 +2079,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, lru_pages += zone_reclaimable_pages(zone); } - shrink->nr_scanned = sc->nr_scanned; - shrink_slab(shrink, lru_pages); + shrink_slab(shrink, sc->nr_scanned, lru_pages); if (reclaim_state) { sc->nr_reclaimed += reclaim_state->reclaimed_slab; reclaim_state->reclaimed_slab = 0; @@ -2456,8 +2462,7 @@ loop_again: end_zone, 0)) shrink_zone(priority, zone, &sc); reclaim_state->reclaimed_slab = 0; - shrink.nr_scanned = sc.nr_scanned; - nr_slab = shrink_slab(&shrink, lru_pages); + nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages); sc.nr_reclaimed += reclaim_state->reclaimed_slab; total_scanned += sc.nr_scanned; @@ -3025,7 +3030,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) } nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE); - shrink.nr_scanned = sc.nr_scanned; if (nr_slab_pages0 > zone->min_slab_pages) { /* * shrink_slab() does not currently allow us to determine how @@ -3041,7 +3045,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) unsigned long lru_pages = zone_reclaimable_pages(zone); /* No reclaimable slab or very low memory pressure */ - if (!shrink_slab(&shrink, lru_pages)) + if (!shrink_slab(&shrink, sc.nr_scanned, lru_pages)) break; /* Freed enough memory */ diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 67e31276682a..cd6e4aa19dbf 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -326,10 +326,12 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) * Run memory cache shrinker. */ static int -rpcauth_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) +rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc) { LIST_HEAD(free); int res; + int nr_to_scan = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) return (nr_to_scan == 0) ? 0 : -1; -- cgit v1.2.3 From 5db8a73a8d7cc6a66afbf25ed7fda338caa8f5f9 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 15 Jun 2011 15:08:48 -0700 Subject: mm/memory-failure.c: fix page isolated count mismatch Pages isolated for migration are accounted with the vmstat counters NR_ISOLATE_[ANON|FILE]. Callers of migrate_pages() are expected to increment these counters when pages are isolated from the LRU. Once the pages have been migrated, they are put back on the LRU or freed and the isolated count is decremented. Memory failure is not properly accounting for pages it isolates causing the NR_ISOLATED counters to be negative. On SMP builds, this goes unnoticed as negative counters are treated as 0 due to expected per-cpu drift. On UP builds, the counter is treated by too_many_isolated() as a large value causing processes to enter D state during page reclaim or compaction. This patch accounts for pages isolated by memory failure correctly. [mel@csn.ul.ie: rewrote changelog] Reviewed-by: Andrea Arcangeli Signed-off-by: Minchan Kim Cc: Andi Kleen Acked-by: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 5c8f7e08928d..eac0ba561491 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -52,6 +52,7 @@ #include #include #include +#include #include "internal.h" int sysctl_memory_failure_early_kill __read_mostly = 0; @@ -1468,7 +1469,8 @@ int soft_offline_page(struct page *page, int flags) put_page(page); if (!ret) { LIST_HEAD(pagelist); - + inc_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); list_add(&page->lru, &pagelist); ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0, true); -- cgit v1.2.3