From 2262185c5b287f2758afda79c149b7cf6bee165c Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 6 Jul 2017 15:40:25 -0700 Subject: mm: per-cgroup memory reclaim stats Track the following reclaim counters for every memory cgroup: PGREFILL, PGSCAN, PGSTEAL, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE and PGLAZYFREED. These values are exposed using the memory.stats interface of cgroup v2. The meaning of each value is the same as for global counters, available using /proc/vmstat. Also, for consistency, rename mem_cgroup_count_vm_event() to count_memcg_event_mm(). Link: http://lkml.kernel.org/r/1494530183-30808-1-git-send-email-guro@fb.com Signed-off-by: Roman Gushchin Suggested-by: Johannes Weiner Acked-by: Michal Hocko Acked-by: Vladimir Davydov Acked-by: Johannes Weiner Cc: Tejun Heo Cc: Li Zefan Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 48 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 899949bbb2f9..b2a5b1cd4e55 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -357,6 +357,17 @@ static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) } struct mem_cgroup *mem_cgroup_from_id(unsigned short id); +static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec) +{ + struct mem_cgroup_per_node *mz; + + if (mem_cgroup_disabled()) + return NULL; + + mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); + return mz->memcg; +} + /** * parent_mem_cgroup - find the accounting parent of a memcg * @memcg: memcg whose parent to find @@ -546,8 +557,23 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, unsigned long *total_scanned); -static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, - enum vm_event_item idx) +static inline void count_memcg_events(struct mem_cgroup *memcg, + enum vm_event_item idx, + unsigned long count) +{ + if (!mem_cgroup_disabled()) + this_cpu_add(memcg->stat->events[idx], count); +} + +static inline void count_memcg_page_event(struct page *page, + enum memcg_stat_item idx) +{ + if (page->mem_cgroup) + count_memcg_events(page->mem_cgroup, idx, 1); +} + +static inline void count_memcg_event_mm(struct mm_struct *mm, + enum vm_event_item idx) { struct mem_cgroup *memcg; @@ -675,6 +701,11 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) return NULL; } +static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec) +{ + return NULL; +} + static inline bool mem_cgroup_online(struct mem_cgroup *memcg) { return true; @@ -789,8 +820,19 @@ static inline void mem_cgroup_split_huge_fixup(struct page *head) { } +static inline void count_memcg_events(struct mem_cgroup *memcg, + enum vm_event_item idx, + unsigned long count) +{ +} + +static inline void count_memcg_page_event(struct page *page, + enum memcg_stat_item idx) +{ +} + static inline -void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) +void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) { } #endif /* CONFIG_MEMCG */ -- cgit v1.2.3 From 8e675f7af50747e1e9e96538e8706767e4f80e2c Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 6 Jul 2017 15:40:28 -0700 Subject: mm/oom_kill: count global and memory cgroup oom kills Show count of oom killer invocations in /proc/vmstat and count of processes killed in memory cgroup in knob "memory.events" (in memory.oom_control for v1 cgroup). Also describe difference between "oom" and "oom_kill" in memory cgroup documentation. Currently oom in memory cgroup kills tasks iff shortage has happened inside page fault. These counters helps in monitoring oom kills - for now the only way is grepping for magic words in kernel log. [akpm@linux-foundation.org: fix for mem_cgroup_count_vm_event() rename] [akpm@linux-foundation.org: fix comment, per Konstantin] Link: http://lkml.kernel.org/r/149570810989.203600.9492483715840752937.stgit@buzz Signed-off-by: Konstantin Khlebnikov Cc: Michal Hocko Cc: Tetsuo Handa Cc: Roman Guschin Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroup-v2.txt | 20 ++++++++++++++++---- include/linux/memcontrol.h | 5 ++++- include/linux/vm_event_item.h | 1 + mm/memcontrol.c | 2 ++ mm/oom_kill.c | 5 +++++ mm/vmstat.c | 1 + 6 files changed, 29 insertions(+), 5 deletions(-) (limited to 'include/linux/memcontrol.h') diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt index 5ac2fbde97e6..e6101976e0f1 100644 --- a/Documentation/cgroup-v2.txt +++ b/Documentation/cgroup-v2.txt @@ -852,13 +852,25 @@ PAGE_SIZE multiple when read back. The number of times the cgroup's memory usage was about to go over the max boundary. If direct reclaim - fails to bring it down, the OOM killer is invoked. + fails to bring it down, the cgroup goes to OOM state. oom - The number of times the OOM killer has been invoked in - the cgroup. This may not exactly match the number of - processes killed but should generally be close. + The number of time the cgroup's memory usage was + reached the limit and allocation was about to fail. + + Depending on context result could be invocation of OOM + killer and retrying allocation or failing alloction. + + Failed allocation in its turn could be returned into + userspace as -ENOMEM or siletly ignored in cases like + disk readahead. For now OOM in memory cgroup kills + tasks iff shortage has happened inside page fault. + + oom_kill + + The number of processes belonging to this cgroup + killed by any kind of OOM killer. memory.stat diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b2a5b1cd4e55..72d0853beb31 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -582,8 +582,11 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, rcu_read_lock(); memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); - if (likely(memcg)) + if (likely(memcg)) { this_cpu_inc(memcg->stat->events[idx]); + if (idx == OOM_KILL) + cgroup_file_notify(&memcg->events_file); + } rcu_read_unlock(); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index be3ab2d13adf..37e8d31a4632 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -41,6 +41,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY, PAGEOUTRUN, PGROTATED, DROP_PAGECACHE, DROP_SLAB, + OOM_KILL, #ifdef CONFIG_NUMA_BALANCING NUMA_PTE_UPDATES, NUMA_HUGE_PTE_UPDATES, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3e2f8cf85b4c..4f686fc1c5fa 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3573,6 +3573,7 @@ static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v) seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable); seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom); + seq_printf(sf, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL)); return 0; } @@ -5164,6 +5165,7 @@ static int memory_events_show(struct seq_file *m, void *v) seq_printf(m, "high %lu\n", memcg_sum_events(memcg, MEMCG_HIGH)); seq_printf(m, "max %lu\n", memcg_sum_events(memcg, MEMCG_MAX)); seq_printf(m, "oom %lu\n", memcg_sum_events(memcg, MEMCG_OOM)); + seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL)); return 0; } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 04c9143a8625..0e2c925e7826 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -876,6 +876,11 @@ static void oom_kill_process(struct oom_control *oc, const char *message) /* Get a reference to safely compare mm after task_unlock(victim) */ mm = victim->mm; mmgrab(mm); + + /* Raise event before sending signal: task reaper must see this */ + count_vm_event(OOM_KILL); + count_memcg_event_mm(mm, OOM_KILL); + /* * We should send SIGKILL before setting TIF_MEMDIE in order to prevent * the OOM victim from depleting the memory reserves from the user diff --git a/mm/vmstat.c b/mm/vmstat.c index 6dae6b240b21..46281825c710 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1018,6 +1018,7 @@ const char * const vmstat_text[] = { "drop_pagecache", "drop_slab", + "oom_kill", #ifdef CONFIG_NUMA_BALANCING "numa_pte_updates", -- cgit v1.2.3 From 320492961c1cf21da5547b00c23e525851c1d16f Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 6 Jul 2017 15:40:46 -0700 Subject: mm: memcontrol: use the node-native slab memory counters Now that the slab counters are moved from the zone to the node level we can drop the private memcg node stats and use the official ones. Link: http://lkml.kernel.org/r/20170530181724.27197-4-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Vladimir Davydov Cc: Josef Bacik Cc: Michal Hocko Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 -- mm/memcontrol.c | 8 ++++---- mm/slab.h | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 72d0853beb31..fa506ae61d66 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -44,8 +44,6 @@ enum memcg_stat_item { MEMCG_SOCK, /* XXX: why are these zone and not node counters? */ MEMCG_KERNEL_STACK_KB, - MEMCG_SLAB_RECLAIMABLE, - MEMCG_SLAB_UNRECLAIMABLE, MEMCG_NR_STAT, }; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4f686fc1c5fa..dceb0deb8d5e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5198,8 +5198,8 @@ static int memory_stat_show(struct seq_file *m, void *v) seq_printf(m, "kernel_stack %llu\n", (u64)stat[MEMCG_KERNEL_STACK_KB] * 1024); seq_printf(m, "slab %llu\n", - (u64)(stat[MEMCG_SLAB_RECLAIMABLE] + - stat[MEMCG_SLAB_UNRECLAIMABLE]) * PAGE_SIZE); + (u64)(stat[NR_SLAB_RECLAIMABLE] + + stat[NR_SLAB_UNRECLAIMABLE]) * PAGE_SIZE); seq_printf(m, "sock %llu\n", (u64)stat[MEMCG_SOCK] * PAGE_SIZE); @@ -5223,9 +5223,9 @@ static int memory_stat_show(struct seq_file *m, void *v) } seq_printf(m, "slab_reclaimable %llu\n", - (u64)stat[MEMCG_SLAB_RECLAIMABLE] * PAGE_SIZE); + (u64)stat[NR_SLAB_RECLAIMABLE] * PAGE_SIZE); seq_printf(m, "slab_unreclaimable %llu\n", - (u64)stat[MEMCG_SLAB_UNRECLAIMABLE] * PAGE_SIZE); + (u64)stat[NR_SLAB_UNRECLAIMABLE] * PAGE_SIZE); /* Accumulated memory events */ diff --git a/mm/slab.h b/mm/slab.h index 9cfcf099709c..69f0579cb5aa 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -287,7 +287,7 @@ static __always_inline int memcg_charge_slab(struct page *page, memcg_kmem_update_page_stat(page, (s->flags & SLAB_RECLAIM_ACCOUNT) ? - MEMCG_SLAB_RECLAIMABLE : MEMCG_SLAB_UNRECLAIMABLE, + NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1 << order); return 0; } @@ -300,7 +300,7 @@ static __always_inline void memcg_uncharge_slab(struct page *page, int order, memcg_kmem_update_page_stat(page, (s->flags & SLAB_RECLAIM_ACCOUNT) ? - MEMCG_SLAB_RECLAIMABLE : MEMCG_SLAB_UNRECLAIMABLE, + NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, -(1 << order)); memcg_kmem_uncharge(page, order); } -- cgit v1.2.3 From ed52be7bfd45533b194b429f43361493d24599a7 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 6 Jul 2017 15:40:49 -0700 Subject: mm: memcontrol: use generic mod_memcg_page_state for kmem pages The kmem-specific functions do the same thing. Switch and drop. Link: http://lkml.kernel.org/r/20170530181724.27197-5-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Vladimir Davydov Cc: Josef Bacik Cc: Michal Hocko Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 17 ----------------- kernel/fork.c | 8 ++++---- mm/slab.h | 16 ++++++++-------- 3 files changed, 12 insertions(+), 29 deletions(-) (limited to 'include/linux/memcontrol.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index fa506ae61d66..5a72d8377942 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -929,19 +929,6 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) return memcg ? memcg->kmemcg_id : -1; } -/** - * memcg_kmem_update_page_stat - update kmem page state statistics - * @page: the page - * @idx: page state item to account - * @val: number of pages (positive or negative) - */ -static inline void memcg_kmem_update_page_stat(struct page *page, - enum memcg_stat_item idx, int val) -{ - if (memcg_kmem_enabled() && page->mem_cgroup) - this_cpu_add(page->mem_cgroup->stat->count[idx], val); -} - #else #define for_each_memcg_cache_index(_idx) \ for (; NULL; ) @@ -964,10 +951,6 @@ static inline void memcg_put_cache_ids(void) { } -static inline void memcg_kmem_update_page_stat(struct page *page, - enum memcg_stat_item idx, int val) -{ -} #endif /* CONFIG_MEMCG && !CONFIG_SLOB */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/kernel/fork.c b/kernel/fork.c index e53770d2bf95..aa01b810c0bd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -326,8 +326,8 @@ static void account_kernel_stack(struct task_struct *tsk, int account) } /* All stack pages belong to the same memcg. */ - memcg_kmem_update_page_stat(vm->pages[0], MEMCG_KERNEL_STACK_KB, - account * (THREAD_SIZE / 1024)); + mod_memcg_page_state(vm->pages[0], MEMCG_KERNEL_STACK_KB, + account * (THREAD_SIZE / 1024)); } else { /* * All stack pages are in the same zone and belong to the @@ -338,8 +338,8 @@ static void account_kernel_stack(struct task_struct *tsk, int account) mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, THREAD_SIZE / 1024 * account); - memcg_kmem_update_page_stat(first_page, MEMCG_KERNEL_STACK_KB, - account * (THREAD_SIZE / 1024)); + mod_memcg_page_state(first_page, MEMCG_KERNEL_STACK_KB, + account * (THREAD_SIZE / 1024)); } } diff --git a/mm/slab.h b/mm/slab.h index 69f0579cb5aa..7b84e3839dfe 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -285,10 +285,10 @@ static __always_inline int memcg_charge_slab(struct page *page, if (ret) return ret; - memcg_kmem_update_page_stat(page, - (s->flags & SLAB_RECLAIM_ACCOUNT) ? - NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, - 1 << order); + mod_memcg_page_state(page, + (s->flags & SLAB_RECLAIM_ACCOUNT) ? + NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, + 1 << order); return 0; } @@ -298,10 +298,10 @@ static __always_inline void memcg_uncharge_slab(struct page *page, int order, if (!memcg_kmem_enabled()) return; - memcg_kmem_update_page_stat(page, - (s->flags & SLAB_RECLAIM_ACCOUNT) ? - NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, - -(1 << order)); + mod_memcg_page_state(page, + (s->flags & SLAB_RECLAIM_ACCOUNT) ? + NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, + -(1 << order)); memcg_kmem_uncharge(page, order); } -- cgit v1.2.3 From 00f3ca2c2d6635d85108571c4dd9a29088668662 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 6 Jul 2017 15:40:52 -0700 Subject: mm: memcontrol: per-lruvec stats infrastructure lruvecs are at the intersection of the NUMA node and memcg, which is the scope for most paging activity. Introduce a convenient accounting infrastructure that maintains statistics per node, per memcg, and the lruvec itself. Then convert over accounting sites for statistics that are already tracked in both nodes and memcgs and can be easily switched. [hannes@cmpxchg.org: fix crash in the new cgroup stat keeping code] Link: http://lkml.kernel.org/r/20170531171450.GA10481@cmpxchg.org [hannes@cmpxchg.org: don't track uncharged pages at all Link: http://lkml.kernel.org/r/20170605175254.GA8547@cmpxchg.org [hannes@cmpxchg.org: add missing free_percpu()] Link: http://lkml.kernel.org/r/20170605175354.GB8547@cmpxchg.org [linux@roeck-us.net: hexagon: fix build error caused by include file order] Link: http://lkml.kernel.org/r/20170617153721.GA4382@roeck-us.net Link: http://lkml.kernel.org/r/20170530181724.27197-6-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Signed-off-by: Guenter Roeck Acked-by: Vladimir Davydov Cc: Josef Bacik Cc: Michal Hocko Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/hexagon/include/asm/pgtable.h | 1 - arch/hexagon/kernel/asm-offsets.c | 1 - arch/hexagon/mm/vm_tlb.c | 1 + include/linux/memcontrol.h | 246 ++++++++++++++++++++++++++++++++----- include/linux/vmstat.h | 1 - mm/memcontrol.c | 11 +- mm/page-writeback.c | 15 +-- mm/rmap.c | 8 +- mm/workingset.c | 9 +- 9 files changed, 238 insertions(+), 55 deletions(-) (limited to 'include/linux/memcontrol.h') diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index 24a9177fb897..aef02f7ca8aa 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -24,7 +24,6 @@ /* * Page table definitions for Qualcomm Hexagon processor. */ -#include #include #define __ARCH_USE_5LEVEL_HACK #include diff --git a/arch/hexagon/kernel/asm-offsets.c b/arch/hexagon/kernel/asm-offsets.c index 308be68d4fb3..3980c0407aa1 100644 --- a/arch/hexagon/kernel/asm-offsets.c +++ b/arch/hexagon/kernel/asm-offsets.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/hexagon/mm/vm_tlb.c b/arch/hexagon/mm/vm_tlb.c index 9647d00cb761..b474065533ce 100644 --- a/arch/hexagon/mm/vm_tlb.c +++ b/arch/hexagon/mm/vm_tlb.c @@ -24,6 +24,7 @@ * be instantiated for it, differently from a native build. */ #include +#include #include #include diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5a72d8377942..3914e3dd6168 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -26,7 +26,8 @@ #include #include #include -#include +#include +#include #include #include @@ -98,11 +99,16 @@ struct mem_cgroup_reclaim_iter { unsigned int generation; }; +struct lruvec_stat { + long count[NR_VM_NODE_STAT_ITEMS]; +}; + /* * per-zone information in memory controller. */ struct mem_cgroup_per_node { struct lruvec lruvec; + struct lruvec_stat __percpu *lruvec_stat; unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1]; @@ -496,23 +502,18 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, return val; } -static inline void mod_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx, int val) +static inline void __mod_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx, int val) { if (!mem_cgroup_disabled()) - this_cpu_add(memcg->stat->count[idx], val); -} - -static inline void inc_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx) -{ - mod_memcg_state(memcg, idx, 1); + __this_cpu_add(memcg->stat->count[idx], val); } -static inline void dec_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx) +static inline void mod_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx, int val) { - mod_memcg_state(memcg, idx, -1); + if (!mem_cgroup_disabled()) + this_cpu_add(memcg->stat->count[idx], val); } /** @@ -532,6 +533,13 @@ static inline void dec_memcg_state(struct mem_cgroup *memcg, * * Kernel pages are an exception to this, since they'll never move. */ +static inline void __mod_memcg_page_state(struct page *page, + enum memcg_stat_item idx, int val) +{ + if (page->mem_cgroup) + __mod_memcg_state(page->mem_cgroup, idx, val); +} + static inline void mod_memcg_page_state(struct page *page, enum memcg_stat_item idx, int val) { @@ -539,16 +547,76 @@ static inline void mod_memcg_page_state(struct page *page, mod_memcg_state(page->mem_cgroup, idx, val); } -static inline void inc_memcg_page_state(struct page *page, - enum memcg_stat_item idx) +static inline unsigned long lruvec_page_state(struct lruvec *lruvec, + enum node_stat_item idx) { - mod_memcg_page_state(page, idx, 1); + struct mem_cgroup_per_node *pn; + long val = 0; + int cpu; + + if (mem_cgroup_disabled()) + return node_page_state(lruvec_pgdat(lruvec), idx); + + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); + for_each_possible_cpu(cpu) + val += per_cpu(pn->lruvec_stat->count[idx], cpu); + + if (val < 0) + val = 0; + + return val; } -static inline void dec_memcg_page_state(struct page *page, - enum memcg_stat_item idx) +static inline void __mod_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) { - mod_memcg_page_state(page, idx, -1); + struct mem_cgroup_per_node *pn; + + __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); + if (mem_cgroup_disabled()) + return; + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); + __mod_memcg_state(pn->memcg, idx, val); + __this_cpu_add(pn->lruvec_stat->count[idx], val); +} + +static inline void mod_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) +{ + struct mem_cgroup_per_node *pn; + + mod_node_page_state(lruvec_pgdat(lruvec), idx, val); + if (mem_cgroup_disabled()) + return; + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); + mod_memcg_state(pn->memcg, idx, val); + this_cpu_add(pn->lruvec_stat->count[idx], val); +} + +static inline void __mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) +{ + struct mem_cgroup_per_node *pn; + + __mod_node_page_state(page_pgdat(page), idx, val); + if (mem_cgroup_disabled() || !page->mem_cgroup) + return; + __mod_memcg_state(page->mem_cgroup, idx, val); + pn = page->mem_cgroup->nodeinfo[page_to_nid(page)]; + __this_cpu_add(pn->lruvec_stat->count[idx], val); +} + +static inline void mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) +{ + struct mem_cgroup_per_node *pn; + + mod_node_page_state(page_pgdat(page), idx, val); + if (mem_cgroup_disabled() || !page->mem_cgroup) + return; + mod_memcg_state(page->mem_cgroup, idx, val); + pn = page->mem_cgroup->nodeinfo[page_to_nid(page)]; + this_cpu_add(pn->lruvec_stat->count[idx], val); } unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, @@ -777,19 +845,21 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, return 0; } -static inline void mod_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx, - int nr) +static inline void __mod_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx, + int nr) { } -static inline void inc_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx) +static inline void mod_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx, + int nr) { } -static inline void dec_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx) +static inline void __mod_memcg_page_state(struct page *page, + enum memcg_stat_item idx, + int nr) { } @@ -799,14 +869,34 @@ static inline void mod_memcg_page_state(struct page *page, { } -static inline void inc_memcg_page_state(struct page *page, - enum memcg_stat_item idx) +static inline unsigned long lruvec_page_state(struct lruvec *lruvec, + enum node_stat_item idx) { + return node_page_state(lruvec_pgdat(lruvec), idx); } -static inline void dec_memcg_page_state(struct page *page, - enum memcg_stat_item idx) +static inline void __mod_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) { + __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); +} + +static inline void mod_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) +{ + mod_node_page_state(lruvec_pgdat(lruvec), idx, val); +} + +static inline void __mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) +{ + __mod_node_page_state(page_pgdat(page), idx, val); +} + +static inline void mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) +{ + mod_node_page_state(page_pgdat(page), idx, val); } static inline @@ -838,6 +928,102 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) } #endif /* CONFIG_MEMCG */ +static inline void __inc_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) +{ + __mod_memcg_state(memcg, idx, 1); +} + +static inline void __dec_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) +{ + __mod_memcg_state(memcg, idx, -1); +} + +static inline void __inc_memcg_page_state(struct page *page, + enum memcg_stat_item idx) +{ + __mod_memcg_page_state(page, idx, 1); +} + +static inline void __dec_memcg_page_state(struct page *page, + enum memcg_stat_item idx) +{ + __mod_memcg_page_state(page, idx, -1); +} + +static inline void __inc_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + __mod_lruvec_state(lruvec, idx, 1); +} + +static inline void __dec_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + __mod_lruvec_state(lruvec, idx, -1); +} + +static inline void __inc_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + __mod_lruvec_page_state(page, idx, 1); +} + +static inline void __dec_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + __mod_lruvec_page_state(page, idx, -1); +} + +static inline void inc_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) +{ + mod_memcg_state(memcg, idx, 1); +} + +static inline void dec_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) +{ + mod_memcg_state(memcg, idx, -1); +} + +static inline void inc_memcg_page_state(struct page *page, + enum memcg_stat_item idx) +{ + mod_memcg_page_state(page, idx, 1); +} + +static inline void dec_memcg_page_state(struct page *page, + enum memcg_stat_item idx) +{ + mod_memcg_page_state(page, idx, -1); +} + +static inline void inc_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + mod_lruvec_state(lruvec, idx, 1); +} + +static inline void dec_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + mod_lruvec_state(lruvec, idx, -1); +} + +static inline void inc_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + mod_lruvec_page_state(page, idx, 1); +} + +static inline void dec_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + mod_lruvec_page_state(page, idx, -1); +} + #ifdef CONFIG_CGROUP_WRITEBACK struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg); diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 613771909b6e..b3d85f30d424 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -3,7 +3,6 @@ #include #include -#include #include #include #include diff --git a/mm/memcontrol.c b/mm/memcontrol.c index dceb0deb8d5e..425aa0caa712 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4122,6 +4122,12 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) if (!pn) return 1; + pn->lruvec_stat = alloc_percpu(struct lruvec_stat); + if (!pn->lruvec_stat) { + kfree(pn); + return 1; + } + lruvec_init(&pn->lruvec); pn->usage_in_excess = 0; pn->on_tree = false; @@ -4133,7 +4139,10 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) { - kfree(memcg->nodeinfo[node]); + struct mem_cgroup_per_node *pn = memcg->nodeinfo[node]; + + free_percpu(pn->lruvec_stat); + kfree(pn); } static void __mem_cgroup_free(struct mem_cgroup *memcg) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 143c1c25d680..8989eada0ef7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2433,8 +2433,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) inode_attach_wb(inode, page); wb = inode_to_wb(inode); - inc_memcg_page_state(page, NR_FILE_DIRTY); - __inc_node_page_state(page, NR_FILE_DIRTY); + __inc_lruvec_page_state(page, NR_FILE_DIRTY); __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); __inc_node_page_state(page, NR_DIRTIED); __inc_wb_stat(wb, WB_RECLAIMABLE); @@ -2455,8 +2454,7 @@ void account_page_cleaned(struct page *page, struct address_space *mapping, struct bdi_writeback *wb) { if (mapping_cap_account_dirty(mapping)) { - dec_memcg_page_state(page, NR_FILE_DIRTY); - dec_node_page_state(page, NR_FILE_DIRTY); + dec_lruvec_page_state(page, NR_FILE_DIRTY); dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); dec_wb_stat(wb, WB_RECLAIMABLE); task_io_account_cancelled_write(PAGE_SIZE); @@ -2712,8 +2710,7 @@ int clear_page_dirty_for_io(struct page *page) */ wb = unlocked_inode_to_wb_begin(inode, &locked); if (TestClearPageDirty(page)) { - dec_memcg_page_state(page, NR_FILE_DIRTY); - dec_node_page_state(page, NR_FILE_DIRTY); + dec_lruvec_page_state(page, NR_FILE_DIRTY); dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); dec_wb_stat(wb, WB_RECLAIMABLE); ret = 1; @@ -2759,8 +2756,7 @@ int test_clear_page_writeback(struct page *page) ret = TestClearPageWriteback(page); } if (ret) { - dec_memcg_page_state(page, NR_WRITEBACK); - dec_node_page_state(page, NR_WRITEBACK); + dec_lruvec_page_state(page, NR_WRITEBACK); dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); inc_node_page_state(page, NR_WRITTEN); } @@ -2814,8 +2810,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write) ret = TestSetPageWriteback(page); } if (!ret) { - inc_memcg_page_state(page, NR_WRITEBACK); - inc_node_page_state(page, NR_WRITEBACK); + inc_lruvec_page_state(page, NR_WRITEBACK); inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); } unlock_page_memcg(page); diff --git a/mm/rmap.c b/mm/rmap.c index b255743351e5..ced14f1af6dc 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1145,8 +1145,7 @@ void page_add_file_rmap(struct page *page, bool compound) if (!atomic_inc_and_test(&page->_mapcount)) goto out; } - __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, nr); - mod_memcg_page_state(page, NR_FILE_MAPPED, nr); + __mod_lruvec_page_state(page, NR_FILE_MAPPED, nr); out: unlock_page_memcg(page); } @@ -1181,12 +1180,11 @@ static void page_remove_file_rmap(struct page *page, bool compound) } /* - * We use the irq-unsafe __{inc|mod}_zone_page_state because + * We use the irq-unsafe __{inc|mod}_lruvec_page_state because * these counters are not modified in interrupt context, and * pte lock(a spinlock) is held, which implies preemption disabled. */ - __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, -nr); - mod_memcg_page_state(page, NR_FILE_MAPPED, -nr); + __mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr); if (unlikely(PageMlocked(page))) clear_page_mlock(page); diff --git a/mm/workingset.c b/mm/workingset.c index b8c9ab678479..7119cd745ace 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -288,12 +288,10 @@ bool workingset_refault(void *shadow) */ refault_distance = (refault - eviction) & EVICTION_MASK; - inc_node_state(pgdat, WORKINGSET_REFAULT); - inc_memcg_state(memcg, WORKINGSET_REFAULT); + inc_lruvec_state(lruvec, WORKINGSET_REFAULT); if (refault_distance <= active_file) { - inc_node_state(pgdat, WORKINGSET_ACTIVATE); - inc_memcg_state(memcg, WORKINGSET_ACTIVATE); + inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE); rcu_read_unlock(); return true; } @@ -474,8 +472,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, } if (WARN_ON_ONCE(node->exceptional)) goto out_invalid; - inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM); - inc_memcg_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM); + inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM); __radix_tree_delete_node(&mapping->page_tree, node, workingset_update_node, mapping); -- cgit v1.2.3