From 20b5c30398639b458371c228abfda829854b61c5 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 14 Jan 2016 15:18:08 -0800 Subject: Revert "gfp: add __GFP_NOACCOUNT" This reverts commit 8f4fc071b192 ("gfp: add __GFP_NOACCOUNT"). Black-list kmem accounting policy (aka __GFP_NOACCOUNT) turned out to be fragile and difficult to maintain, because there seem to be many more allocations that should not be accounted than those that should be. Besides, false accounting an allocation might result in much worse consequences than not accounting at all, namely increased memory consumption due to pinned dead kmem caches. So it was decided to switch to the white-list policy. This patch reverts bits introducing the black-list policy. The white-list policy will be introduced later in the series. Signed-off-by: Vladimir Davydov Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Tejun Heo Cc: Greg Thelen Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/gfp.h') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 8942af0813e3..075b014448f5 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -30,7 +30,6 @@ struct vm_area_struct; #define ___GFP_HARDWALL 0x20000u #define ___GFP_THISNODE 0x40000u #define ___GFP_ATOMIC 0x80000u -#define ___GFP_NOACCOUNT 0x100000u #define ___GFP_NOTRACK 0x200000u #define ___GFP_DIRECT_RECLAIM 0x400000u #define ___GFP_OTHER_NODE 0x800000u @@ -104,7 +103,6 @@ struct vm_area_struct; #define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) #define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC) #define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) -#define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT) /* * Reclaim modifiers -- cgit v1.2.3 From a9bb7e620efdfd29b6d1c238041173e411670996 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 14 Jan 2016 15:18:12 -0800 Subject: memcg: only account kmem allocations marked as __GFP_ACCOUNT Black-list kmem accounting policy (aka __GFP_NOACCOUNT) turned out to be fragile and difficult to maintain, because there seem to be many more allocations that should not be accounted than those that should be. Besides, false accounting an allocation might result in much worse consequences than not accounting at all, namely increased memory consumption due to pinned dead kmem caches. So this patch switches kmem accounting to the white-policy: now only those kmem allocations that are marked as __GFP_ACCOUNT are accounted to memcg. Currently, no kmem allocations are marked like this. The following patches will mark several kmem allocations that are known to be easily triggered from userspace and therefore should be accounted to memcg. Signed-off-by: Vladimir Davydov Acked-by: Johannes Weiner Acked-by: Michal Hocko Cc: Tejun Heo Cc: Greg Thelen Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 9 +++++++++ include/linux/memcontrol.h | 2 ++ mm/page_alloc.c | 3 ++- 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux/gfp.h') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 075b014448f5..1dd59abe541d 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -30,6 +30,7 @@ struct vm_area_struct; #define ___GFP_HARDWALL 0x20000u #define ___GFP_THISNODE 0x40000u #define ___GFP_ATOMIC 0x80000u +#define ___GFP_ACCOUNT 0x100000u #define ___GFP_NOTRACK 0x200000u #define ___GFP_DIRECT_RECLAIM 0x400000u #define ___GFP_OTHER_NODE 0x800000u @@ -72,11 +73,15 @@ struct vm_area_struct; * * __GFP_THISNODE forces the allocation to be satisified from the requested * node with no fallbacks or placement policy enforcements. + * + * __GFP_ACCOUNT causes the allocation to be accounted to kmemcg (only relevant + * to kmem allocations). */ #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) #define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) #define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE) +#define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT) /* * Watermark modifiers -- controls access to emergency reserves @@ -195,6 +200,9 @@ struct vm_area_struct; * GFP_KERNEL is typical for kernel-internal allocations. The caller requires * ZONE_NORMAL or a lower zone for direct access but can direct reclaim. * + * GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is + * accounted to kmemcg. + * * GFP_NOWAIT is for kernel allocations that should not stall for direct * reclaim, start physical IO or use any filesystem callback. * @@ -234,6 +242,7 @@ struct vm_area_struct; */ #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) +#define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) #define GFP_NOIO (__GFP_RECLAIM) #define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2103f36b3bd3..c9d9a8e7b45f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -773,6 +773,8 @@ static inline bool __memcg_kmem_bypass(gfp_t gfp) { if (!memcg_kmem_enabled()) return true; + if (!(gfp & __GFP_ACCOUNT)) + return true; if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) return true; return false; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9d666df5ef95..ca58bfcdadac 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3402,7 +3402,8 @@ EXPORT_SYMBOL(__free_page_frag); /* * alloc_kmem_pages charges newly allocated pages to the kmem resource counter - * of the current memory cgroup. + * of the current memory cgroup if __GFP_ACCOUNT is set, other than that it is + * equivalent to alloc_pages. * * It should be used when the caller would like to use kmalloc, but since the * allocation is large, it has to fall back to the page allocator. -- cgit v1.2.3 From c00eb15a8914b8ba84032a36044a5aaf7f71709d Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 14 Jan 2016 15:19:00 -0800 Subject: mm/zonelist: enumerate zonelists array index Hardcoding index to zonelists array in gfp_zonelist() is not a good idea, let's enumerate it to improve readability. No functional change. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: fix CONFIG_NUMA=n build] [n-horiguchi@ah.jp.nec.com: fix warning in comparing enumerator] Signed-off-by: Yaowei Bai Cc: Michal Hocko Cc: David Rientjes Signed-off-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 9 +++++---- include/linux/mmzone.h | 20 +++++++++----------- mm/page_alloc.c | 9 ++++----- 3 files changed, 18 insertions(+), 20 deletions(-) (limited to 'include/linux/gfp.h') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 1dd59abe541d..91f74e741aa2 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -384,10 +384,11 @@ static inline enum zone_type gfp_zone(gfp_t flags) static inline int gfp_zonelist(gfp_t flags) { - if (IS_ENABLED(CONFIG_NUMA) && unlikely(flags & __GFP_THISNODE)) - return 1; - - return 0; +#ifdef CONFIG_NUMA + if (unlikely(flags & __GFP_THISNODE)) + return ZONELIST_NOFALLBACK; +#endif + return ZONELIST_FALLBACK; } /* diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 996384672c73..12c98dfc31b1 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -571,19 +571,17 @@ static inline bool zone_is_empty(struct zone *zone) /* Maximum number of zones on a zonelist */ #define MAX_ZONES_PER_ZONELIST (MAX_NUMNODES * MAX_NR_ZONES) +enum { + ZONELIST_FALLBACK, /* zonelist with fallback */ #ifdef CONFIG_NUMA - -/* - * The NUMA zonelists are doubled because we need zonelists that restrict the - * allocations to a single node for __GFP_THISNODE. - * - * [0] : Zonelist with fallback - * [1] : No fallback (__GFP_THISNODE) - */ -#define MAX_ZONELISTS 2 -#else -#define MAX_ZONELISTS 1 + /* + * The NUMA zonelists are doubled because we need zonelists that + * restrict the allocations to a single node for __GFP_THISNODE. + */ + ZONELIST_NOFALLBACK, /* zonelist without fallback (__GFP_THISNODE) */ #endif + MAX_ZONELISTS +}; /* * This struct contains information about a zone in a zonelist. It is stored diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2f6d30db4c94..e40e702ce919 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4148,8 +4148,7 @@ static void set_zonelist_order(void) static void build_zonelists(pg_data_t *pgdat) { - int j, node, load; - enum zone_type i; + int i, node, load; nodemask_t used_mask; int local_node, prev_node; struct zonelist *zonelist; @@ -4169,7 +4168,7 @@ static void build_zonelists(pg_data_t *pgdat) nodes_clear(used_mask); memset(node_order, 0, sizeof(node_order)); - j = 0; + i = 0; while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { /* @@ -4186,12 +4185,12 @@ static void build_zonelists(pg_data_t *pgdat) if (order == ZONELIST_ORDER_NODE) build_zonelists_in_node_order(pgdat, node); else - node_order[j++] = node; /* remember order */ + node_order[i++] = node; /* remember order */ } if (order == ZONELIST_ORDER_ZONE) { /* calculate node order -- i.e., DMA last! */ - build_zonelists_in_zone_order(pgdat, j); + build_zonelists_in_zone_order(pgdat, i); } build_thisnode_zonelists(pgdat); -- cgit v1.2.3 From 543dfb2df8ebb3eb0b499eae1d63de1701a99b40 Mon Sep 17 00:00:00 2001 From: Joshua Clayton Date: Thu, 14 Jan 2016 15:22:10 -0800 Subject: mm: fix noisy sparse warning in LIBCFS_ALLOC_PRE() Running sparse on drivers/staging/lustre results in dozens of warnings: include/linux/gfp.h:281:41: warning: odd constant _Bool cast (400000 becomes 1) Use "!!" to explicitly convert to bool and get rid of the warning. Signed-off-by: Joshua Clayton Cc: Mel Gorman Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/gfp.h') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 91f74e741aa2..28ad5f6494b0 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -278,7 +278,7 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) { - return (bool __force)(gfp_flags & __GFP_DIRECT_RECLAIM); + return !!(gfp_flags & __GFP_DIRECT_RECLAIM); } #ifdef CONFIG_HIGHMEM -- cgit v1.2.3