diff options
author | Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | 2017-11-16 04:35:40 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-16 05:21:04 +0300 |
commit | af5b0f6a09e42c9f4fa87735f2a366748767b686 (patch) | |
tree | ac9a7992f92ae5a7902805e980bb31e8edc4555b /mm | |
parent | c4812909f5d5a9b7f1c85a2d95be388a066cda52 (diff) | |
download | linux-af5b0f6a09e42c9f4fa87735f2a366748767b686.tar.xz |
mm: consolidate page table accounting
Currently, we account page tables separately for each page table level,
but that's redundant -- we only make use of total memory allocated to
page tables for oom_badness calculation. We also provide the
information to userspace, but it has dubious value there too.
This patch switches page table accounting to single counter.
mm->pgtables_bytes is now used to account all page table levels. We use
bytes, because page table size for different levels of page table tree
may be different.
The change has user-visible effect: we don't have VmPMD and VmPUD
reported in /proc/[pid]/status. Not sure if anybody uses them. (As
alternative, we can always report 0 kB for them.)
OOM-killer report is also slightly changed: we now report pgtables_bytes
instead of nr_ptes, nr_pmd, nr_puds.
Apart from reducing number of counters per-mm, the benefit is that we
now calculate oom_badness() more correctly for machines which have
different size of page tables depending on level or where page tables
are less than a page in size.
The only downside can be debuggability because we do not know which page
table level could leak. But I do not remember many bugs that would be
caught by separate counters so I wouldn't lose sleep over this.
[akpm@linux-foundation.org: fix mm/huge_memory.c]
Link: http://lkml.kernel.org/r/20171006100651.44742-2-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
[kirill.shutemov@linux.intel.com: fix build]
Link: http://lkml.kernel.org/r/20171016150113.ikfxy3e7zzfvsr4w@black.fi.intel.com
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/debug.c | 7 | ||||
-rw-r--r-- | mm/huge_memory.c | 2 | ||||
-rw-r--r-- | mm/oom_kill.c | 14 |
3 files changed, 9 insertions, 14 deletions
diff --git a/mm/debug.c b/mm/debug.c index c9888a6d7875..d947f3e03b0d 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -105,8 +105,7 @@ void dump_mm(const struct mm_struct *mm) "get_unmapped_area %p\n" #endif "mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n" - "pgd %p mm_users %d mm_count %d\n" - "nr_ptes %lu nr_pmds %lu nr_puds %lu map_count %d\n" + "pgd %p mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n" "hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n" "pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n" "start_code %lx end_code %lx start_data %lx end_data %lx\n" @@ -136,9 +135,7 @@ void dump_mm(const struct mm_struct *mm) mm->mmap_base, mm->mmap_legacy_base, mm->highest_vm_end, mm->pgd, atomic_read(&mm->mm_users), atomic_read(&mm->mm_count), - mm_nr_ptes(mm), - mm_nr_pmds(mm), - mm_nr_puds(mm), + mm_pgtables_bytes(mm), mm->map_count, mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm, mm->pinned_vm, mm->data_vm, mm->exec_vm, mm->stack_vm, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 3610d81c062a..86fe697e8bfb 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -942,7 +942,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, set_pmd_at(src_mm, addr, src_pmd, pmd); } add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); - atomic_long_inc(&dst_mm->nr_ptes); + mm_inc_nr_ptes(dst_mm); pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); set_pmd_at(dst_mm, addr, dst_pmd, pmd); ret = 0; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index f9300141480e..26add8a0d1f7 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -221,7 +221,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, * task's rss, pagetable and swap space use. */ points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) + - mm_nr_ptes(p->mm) + mm_nr_pmds(p->mm) + mm_nr_puds(p->mm); + mm_pgtables_bytes(p->mm) / PAGE_SIZE; task_unlock(p); /* @@ -389,15 +389,15 @@ static void select_bad_process(struct oom_control *oc) * Dumps the current memory state of all eligible tasks. Tasks not in the same * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes * are not shown. - * State information includes task's pid, uid, tgid, vm size, rss, nr_ptes, - * swapents, oom_score_adj value, and name. + * State information includes task's pid, uid, tgid, vm size, rss, + * pgtables_bytes, swapents, oom_score_adj value, and name. */ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask) { struct task_struct *p; struct task_struct *task; - pr_info("[ pid ] uid tgid total_vm rss nr_ptes nr_pmds nr_puds swapents oom_score_adj name\n"); + pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name\n"); rcu_read_lock(); for_each_process(p) { if (oom_unkillable_task(p, memcg, nodemask)) @@ -413,12 +413,10 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask) continue; } - pr_info("[%5d] %5d %5d %8lu %8lu %7ld %7ld %7ld %8lu %5hd %s\n", + pr_info("[%5d] %5d %5d %8lu %8lu %8ld %8lu %5hd %s\n", task->pid, from_kuid(&init_user_ns, task_uid(task)), task->tgid, task->mm->total_vm, get_mm_rss(task->mm), - mm_nr_ptes(task->mm), - mm_nr_pmds(task->mm), - mm_nr_puds(task->mm), + mm_pgtables_bytes(task->mm), get_mm_counter(task->mm, MM_SWAPENTS), task->signal->oom_score_adj, task->comm); task_unlock(task); |