locking/lockdep: Rework FS_RECLAIM annotation

A while ago someone, and I cannot find the email just now, asked if we could not implement the RECLAIM_FS inversion stuff with a 'fake' lock like we use for other things like workqueues etc. I think this should be possible which allows reducing the 'irq' states and will reduce the amount of __bfs() lookups we do. Removing the 1 IRQ state results in 4 less __bfs() walks per dependency, improving lockdep performance. And by moving this annotation out of the lockdep code it becomes easier for the mm people to extend. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Byungchul Park <byungchul.park@lge.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Michal Hocko <mhocko@kernel.org> Cc: Nikolay Borisov <nborisov@suse.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: akpm@linux-foundation.org Cc: boqun.feng@gmail.com Cc: iamjoonsoo.kim@lge.com Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: npiggin@gmail.com Cc: walken@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Peter Zijlstra <peterz@infradead.org> 2017-03-03 12:13:38 +0300
committer: Ingo Molnar <mingo@kernel.org> 2017-08-10 13:29:03 +0300
commit: d92a8cfcb37ecd1315269dab741f073b63b3a8b6 (patch)
tree: bd081c6309a48d23ae074e2035105be13622b005 /mm
parent: a9668cd6ee288c4838bc668880ac085be551cac2 (diff)
download: linux-d92a8cfcb37ecd1315269dab741f073b63b3a8b6.tar.xz
4 files changed, 61 insertions, 13 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fc32aa81f359..c20d89601802 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -66,6 +66,7 @@
 #include <linux/kthread.h>
 #include <linux/memcontrol.h>
 #include <linux/ftrace.h>
+#include <linux/lockdep.h>
 
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
@@ -3490,6 +3491,47 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla
 }
 #endif /* CONFIG_COMPACTION */
 
+#ifdef CONFIG_LOCKDEP
+struct lockdep_map __fs_reclaim_map =
+	STATIC_LOCKDEP_MAP_INIT("fs_reclaim", &__fs_reclaim_map);
+
+static bool __need_fs_reclaim(gfp_t gfp_mask)
+{
+	gfp_mask = current_gfp_context(gfp_mask);
+
+	/* no reclaim without waiting on it */
+	if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
+		return false;
+
+	/* this guy won't enter reclaim */
+	if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
+		return false;
+
+	/* We're only interested __GFP_FS allocations for now */
+	if (!(gfp_mask & __GFP_FS))
+		return false;
+
+	if (gfp_mask & __GFP_NOLOCKDEP)
+		return false;
+
+	return true;
+}
+
+void fs_reclaim_acquire(gfp_t gfp_mask)
+{
+	if (__need_fs_reclaim(gfp_mask))
+		lock_map_acquire(&__fs_reclaim_map);
+}
+EXPORT_SYMBOL_GPL(fs_reclaim_acquire);
+
+void fs_reclaim_release(gfp_t gfp_mask)
+{
+	if (__need_fs_reclaim(gfp_mask))
+		lock_map_release(&__fs_reclaim_map);
+}
+EXPORT_SYMBOL_GPL(fs_reclaim_release);
+#endif
+
 /* Perform direct synchronous page reclaim */
 static int
 __perform_reclaim(gfp_t gfp_mask, unsigned int order,
@@ -3504,7 +3546,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
 	/* We now go into synchronous reclaim */
 	cpuset_memory_pressure_bump();
 	noreclaim_flag = memalloc_noreclaim_save();
-	lockdep_set_current_reclaim_state(gfp_mask);
+	fs_reclaim_acquire(gfp_mask);
 	reclaim_state.reclaimed_slab = 0;
 	current->reclaim_state = &reclaim_state;
 
@@ -3512,7 +3554,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
 								ac->nodemask);
 
 	current->reclaim_state = NULL;
-	lockdep_clear_current_reclaim_state();
+	fs_reclaim_release(gfp_mask);
 	memalloc_noreclaim_restore(noreclaim_flag);
 
 	cond_resched();
@@ -4041,7 +4083,8 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
 			*alloc_flags |= ALLOC_CPUSET;
 	}
 
-	lockdep_trace_alloc(gfp_mask);
+	fs_reclaim_acquire(gfp_mask);
+	fs_reclaim_release(gfp_mask);
 
 	might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
 
diff --git a/mm/slab.h b/mm/slab.h
index 6885e1192ec5..073362816acc 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -43,6 +43,7 @@ struct kmem_cache {
 #include <linux/kasan.h>
 #include <linux/kmemleak.h>
 #include <linux/random.h>
+#include <linux/sched/mm.h>
 
 /*
  * State of the slab allocator.
@@ -412,7 +413,10 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
 						     gfp_t flags)
 {
 	flags &= gfp_allowed_mask;
-	lockdep_trace_alloc(flags);
+
+	fs_reclaim_acquire(flags);
+	fs_reclaim_release(flags);
+
 	might_sleep_if(gfpflags_allow_blocking(flags));
 
 	if (should_failslab(s, flags))
diff --git a/mm/slob.c b/mm/slob.c
index 1bae78d71096..a8bd6fa11a66 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -432,7 +432,8 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
 
 	gfp &= gfp_allowed_mask;
 
-	lockdep_trace_alloc(gfp);
+	fs_reclaim_acquire(gfp);
+	fs_reclaim_release(gfp);
 
 	if (size < PAGE_SIZE - align) {
 		if (!size)
@@ -538,7 +539,8 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
 
 	flags &= gfp_allowed_mask;
 
-	lockdep_trace_alloc(flags);
+	fs_reclaim_acquire(flags);
+	fs_reclaim_release(flags);
 
 	if (c->size < PAGE_SIZE) {
 		b = slob_alloc(c->size, flags, c->align, node);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a1af041930a6..f957afe900ec 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3525,8 +3525,6 @@ static int kswapd(void *p)
 	};
 	const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
 
-	lockdep_set_current_reclaim_state(GFP_KERNEL);
-
 	if (!cpumask_empty(cpumask))
 		set_cpus_allowed_ptr(tsk, cpumask);
 	current->reclaim_state = &reclaim_state;
@@ -3585,14 +3583,15 @@ kswapd_try_sleep:
 		 */
 		trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx,
 						alloc_order);
+		fs_reclaim_acquire(GFP_KERNEL);
 		reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx);
+		fs_reclaim_release(GFP_KERNEL);
 		if (reclaim_order < alloc_order)
 			goto kswapd_try_sleep;
 	}
 
 	tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
 	current->reclaim_state = NULL;
-	lockdep_clear_current_reclaim_state();
 
 	return 0;
 }
@@ -3655,14 +3654,14 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
 	unsigned int noreclaim_flag;
 
 	noreclaim_flag = memalloc_noreclaim_save();
-	lockdep_set_current_reclaim_state(sc.gfp_mask);
+	fs_reclaim_acquire(sc.gfp_mask);
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
 	nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
 
 	p->reclaim_state = NULL;
-	lockdep_clear_current_reclaim_state();
+	fs_reclaim_release(sc.gfp_mask);
 	memalloc_noreclaim_restore(noreclaim_flag);
 
 	return nr_reclaimed;
@@ -3847,7 +3846,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
 	 */
 	noreclaim_flag = memalloc_noreclaim_save();
 	p->flags |= PF_SWAPWRITE;
-	lockdep_set_current_reclaim_state(sc.gfp_mask);
+	fs_reclaim_acquire(sc.gfp_mask);
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
@@ -3862,9 +3861,9 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
 	}
 
 	p->reclaim_state = NULL;
+	fs_reclaim_release(gfp_mask);
 	current->flags &= ~PF_SWAPWRITE;
 	memalloc_noreclaim_restore(noreclaim_flag);
-	lockdep_clear_current_reclaim_state();
 	return sc.nr_reclaimed >= nr_pages;
 }
author	Peter Zijlstra <peterz@infradead.org>	2017-03-03 12:13:38 +0300
committer	Ingo Molnar <mingo@kernel.org>	2017-08-10 13:29:03 +0300
commit	d92a8cfcb37ecd1315269dab741f073b63b3a8b6 (patch)
tree	bd081c6309a48d23ae074e2035105be13622b005 /mm
parent	a9668cd6ee288c4838bc668880ac085be551cac2 (diff)
download	linux-d92a8cfcb37ecd1315269dab741f073b63b3a8b6.tar.xz