diff options
Diffstat (limited to 'fs/proc/task_mmu.c')
-rw-r--r-- | fs/proc/task_mmu.c | 377 |
1 files changed, 307 insertions, 70 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 60b914860f81..25b6a887adb9 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1,5 +1,6 @@ #include <linux/mm.h> #include <linux/hugetlb.h> +#include <linux/huge_mm.h> #include <linux/mount.h> #include <linux/seq_file.h> #include <linux/highmem.h> @@ -7,6 +8,7 @@ #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/mempolicy.h> +#include <linux/rmap.h> #include <linux/swap.h> #include <linux/swapops.h> @@ -119,14 +121,14 @@ static void *m_start(struct seq_file *m, loff_t *pos) priv->task = get_pid_task(priv->pid, PIDTYPE_PID); if (!priv->task) - return NULL; + return ERR_PTR(-ESRCH); mm = mm_for_maps(priv->task); - if (!mm) - return NULL; + if (!mm || IS_ERR(mm)) + return mm; down_read(&mm->mmap_sem); - tail_vma = get_gate_vma(priv->task); + tail_vma = get_gate_vma(priv->task->mm); priv->tail_vma = tail_vma; /* Start with last addr hint */ @@ -180,7 +182,8 @@ static void m_stop(struct seq_file *m, void *v) struct proc_maps_private *priv = m->private; struct vm_area_struct *vma = v; - vma_stop(priv, vma); + if (!IS_ERR(vma)) + vma_stop(priv, vma); if (priv->task) put_task_struct(priv->task); } @@ -208,10 +211,10 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) { struct mm_struct *mm = vma->vm_mm; struct file *file = vma->vm_file; - int flags = vma->vm_flags; + vm_flags_t flags = vma->vm_flags; unsigned long ino = 0; unsigned long long pgoff = 0; - unsigned long start; + unsigned long start, end; dev_t dev = 0; int len; @@ -224,13 +227,15 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) /* We don't show the stack guard page in /proc/maps */ start = vma->vm_start; - if (vma->vm_flags & VM_GROWSDOWN) - if (!vma_stack_continue(vma->vm_prev, vma->vm_start)) - start += PAGE_SIZE; + if (stack_guard_page_start(vma, start)) + start += PAGE_SIZE; + end = vma->vm_end; + if (stack_guard_page_end(vma, end)) + end -= PAGE_SIZE; seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", start, - vma->vm_end, + end, flags & VM_READ ? 'r' : '-', flags & VM_WRITE ? 'w' : '-', flags & VM_EXEC ? 'x' : '-', @@ -249,8 +254,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) const char *name = arch_vma_name(vma); if (!name) { if (mm) { - if (vma->vm_start <= mm->start_brk && - vma->vm_end >= mm->brk) { + if (vma->vm_start <= mm->brk && + vma->vm_end >= mm->start_brk) { name = "[heap]"; } else if (vma->vm_start <= mm->start_stack && vma->vm_end >= mm->start_stack) { @@ -277,7 +282,8 @@ static int show_map(struct seq_file *m, void *v) show_map_vma(m, vma); if (m->count < m->size) /* vma is copied successfully */ - m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; + m->version = (vma != get_gate_vma(task->mm)) + ? vma->vm_start : 0; return 0; } @@ -329,58 +335,86 @@ struct mem_size_stats { unsigned long private_dirty; unsigned long referenced; unsigned long anonymous; + unsigned long anonymous_thp; unsigned long swap; u64 pss; }; -static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, - struct mm_walk *walk) + +static void smaps_pte_entry(pte_t ptent, unsigned long addr, + unsigned long ptent_size, struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = mss->vma; - pte_t *pte, ptent; - spinlock_t *ptl; struct page *page; int mapcount; - pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); - for (; addr != end; pte++, addr += PAGE_SIZE) { - ptent = *pte; - - if (is_swap_pte(ptent)) { - mss->swap += PAGE_SIZE; - continue; - } + if (is_swap_pte(ptent)) { + mss->swap += ptent_size; + return; + } - if (!pte_present(ptent)) - continue; + if (!pte_present(ptent)) + return; + + page = vm_normal_page(vma, addr, ptent); + if (!page) + return; + + if (PageAnon(page)) + mss->anonymous += ptent_size; + + mss->resident += ptent_size; + /* Accumulate the size in pages that have been accessed. */ + if (pte_young(ptent) || PageReferenced(page)) + mss->referenced += ptent_size; + mapcount = page_mapcount(page); + if (mapcount >= 2) { + if (pte_dirty(ptent) || PageDirty(page)) + mss->shared_dirty += ptent_size; + else + mss->shared_clean += ptent_size; + mss->pss += (ptent_size << PSS_SHIFT) / mapcount; + } else { + if (pte_dirty(ptent) || PageDirty(page)) + mss->private_dirty += ptent_size; + else + mss->private_clean += ptent_size; + mss->pss += (ptent_size << PSS_SHIFT); + } +} - page = vm_normal_page(vma, addr, ptent); - if (!page) - continue; +static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct mem_size_stats *mss = walk->private; + struct vm_area_struct *vma = mss->vma; + pte_t *pte; + spinlock_t *ptl; - if (PageAnon(page)) - mss->anonymous += PAGE_SIZE; - - mss->resident += PAGE_SIZE; - /* Accumulate the size in pages that have been accessed. */ - if (pte_young(ptent) || PageReferenced(page)) - mss->referenced += PAGE_SIZE; - mapcount = page_mapcount(page); - if (mapcount >= 2) { - if (pte_dirty(ptent) || PageDirty(page)) - mss->shared_dirty += PAGE_SIZE; - else - mss->shared_clean += PAGE_SIZE; - mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; + spin_lock(&walk->mm->page_table_lock); + if (pmd_trans_huge(*pmd)) { + if (pmd_trans_splitting(*pmd)) { + spin_unlock(&walk->mm->page_table_lock); + wait_split_huge_page(vma->anon_vma, pmd); } else { - if (pte_dirty(ptent) || PageDirty(page)) - mss->private_dirty += PAGE_SIZE; - else - mss->private_clean += PAGE_SIZE; - mss->pss += (PAGE_SIZE << PSS_SHIFT); + smaps_pte_entry(*(pte_t *)pmd, addr, + HPAGE_PMD_SIZE, walk); + spin_unlock(&walk->mm->page_table_lock); + mss->anonymous_thp += HPAGE_PMD_SIZE; + return 0; } + } else { + spin_unlock(&walk->mm->page_table_lock); } + /* + * The mmap_sem held all the way back in m_start() is what + * keeps khugepaged out of here and from collapsing things + * in here. + */ + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + for (; addr != end; pte++, addr += PAGE_SIZE) + smaps_pte_entry(*pte, addr, PAGE_SIZE, walk); pte_unmap_unlock(pte - 1, ptl); cond_resched(); return 0; @@ -416,6 +450,7 @@ static int show_smap(struct seq_file *m, void *v) "Private_Dirty: %8lu kB\n" "Referenced: %8lu kB\n" "Anonymous: %8lu kB\n" + "AnonHugePages: %8lu kB\n" "Swap: %8lu kB\n" "KernelPageSize: %8lu kB\n" "MMUPageSize: %8lu kB\n" @@ -429,6 +464,7 @@ static int show_smap(struct seq_file *m, void *v) mss.private_dirty >> 10, mss.referenced >> 10, mss.anonymous >> 10, + mss.anonymous_thp >> 10, mss.swap >> 10, vma_kernel_pagesize(vma) >> 10, vma_mmu_pagesize(vma) >> 10, @@ -436,7 +472,8 @@ static int show_smap(struct seq_file *m, void *v) (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); if (m->count < m->size) /* vma is copied successfully */ - m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; + m->version = (vma != get_gate_vma(task->mm)) + ? vma->vm_start : 0; return 0; } @@ -467,6 +504,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, spinlock_t *ptl; struct page *page; + split_huge_page_pmd(walk->mm, pmd); + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = *pte; @@ -497,15 +536,17 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, char buffer[PROC_NUMBUF]; struct mm_struct *mm; struct vm_area_struct *vma; - long type; + int type; + int rv; memset(buffer, 0, sizeof(buffer)); if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; - if (strict_strtol(strstrip(buffer), 10, &type)) - return -EINVAL; + rv = kstrtoint(strstrip(buffer), 10, &type); + if (rv < 0) + return rv; if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) return -EINVAL; task = get_proc_task(file->f_path.dentry->d_inode); @@ -623,6 +664,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t *pte; int err = 0; + split_huge_page_pmd(walk->mm, pmd); + /* find the first VMA at or above 'addr' */ vma = find_vma(walk->mm, addr); for (; addr != end; addr += PAGE_SIZE) { @@ -728,29 +771,25 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!task) goto out; - ret = -EACCES; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - goto out_task; - ret = -EINVAL; /* file position must be aligned */ if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES)) goto out_task; ret = 0; - if (!count) goto out_task; - mm = get_task_mm(task); - if (!mm) - goto out_task; - pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); ret = -ENOMEM; if (!pm.buffer) - goto out_mm; + goto out_task; + + mm = mm_for_maps(task); + ret = PTR_ERR(mm); + if (!mm || IS_ERR(mm)) + goto out_free; pagemap_walk.pmd_entry = pagemap_pte_range; pagemap_walk.pte_hole = pagemap_pte_hole; @@ -793,7 +832,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, len = min(count, PM_ENTRY_BYTES * pm.pos); if (copy_to_user(buf, pm.buffer, len)) { ret = -EFAULT; - goto out_free; + goto out_mm; } copied += len; buf += len; @@ -803,10 +842,10 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!ret || ret == PM_END_OF_BUFFER) ret = copied; -out_free: - kfree(pm.buffer); out_mm: mmput(mm); +out_free: + kfree(pm.buffer); out_task: put_task_struct(task); out: @@ -820,7 +859,192 @@ const struct file_operations proc_pagemap_operations = { #endif /* CONFIG_PROC_PAGE_MONITOR */ #ifdef CONFIG_NUMA -extern int show_numa_map(struct seq_file *m, void *v); + +struct numa_maps { + struct vm_area_struct *vma; + unsigned long pages; + unsigned long anon; + unsigned long active; + unsigned long writeback; + unsigned long mapcount_max; + unsigned long dirty; + unsigned long swapcache; + unsigned long node[MAX_NUMNODES]; +}; + +struct numa_maps_private { + struct proc_maps_private proc_maps; + struct numa_maps md; +}; + +static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty) +{ + int count = page_mapcount(page); + + md->pages++; + if (pte_dirty || PageDirty(page)) + md->dirty++; + + if (PageSwapCache(page)) + md->swapcache++; + + if (PageActive(page) || PageUnevictable(page)) + md->active++; + + if (PageWriteback(page)) + md->writeback++; + + if (PageAnon(page)) + md->anon++; + + if (count > md->mapcount_max) + md->mapcount_max = count; + + md->node[page_to_nid(page)]++; +} + +static int gather_pte_stats(pmd_t *pmd, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + struct numa_maps *md; + spinlock_t *ptl; + pte_t *orig_pte; + pte_t *pte; + + md = walk->private; + orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + do { + struct page *page; + int nid; + + if (!pte_present(*pte)) + continue; + + page = vm_normal_page(md->vma, addr, *pte); + if (!page) + continue; + + if (PageReserved(page)) + continue; + + nid = page_to_nid(page); + if (!node_isset(nid, node_states[N_HIGH_MEMORY])) + continue; + + gather_stats(page, md, pte_dirty(*pte)); + + } while (pte++, addr += PAGE_SIZE, addr != end); + pte_unmap_unlock(orig_pte, ptl); + return 0; +} +#ifdef CONFIG_HUGETLB_PAGE +static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, + unsigned long addr, unsigned long end, struct mm_walk *walk) +{ + struct numa_maps *md; + struct page *page; + + if (pte_none(*pte)) + return 0; + + page = pte_page(*pte); + if (!page) + return 0; + + md = walk->private; + gather_stats(page, md, pte_dirty(*pte)); + return 0; +} + +#else +static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, + unsigned long addr, unsigned long end, struct mm_walk *walk) +{ + return 0; +} +#endif + +/* + * Display pages allocated per node and memory policy via /proc. + */ +static int show_numa_map(struct seq_file *m, void *v) +{ + struct numa_maps_private *numa_priv = m->private; + struct proc_maps_private *proc_priv = &numa_priv->proc_maps; + struct vm_area_struct *vma = v; + struct numa_maps *md = &numa_priv->md; + struct file *file = vma->vm_file; + struct mm_struct *mm = vma->vm_mm; + struct mm_walk walk = {}; + struct mempolicy *pol; + int n; + char buffer[50]; + + if (!mm) + return 0; + + /* Ensure we start with an empty set of numa_maps statistics. */ + memset(md, 0, sizeof(*md)); + + md->vma = vma; + + walk.hugetlb_entry = gather_hugetbl_stats; + walk.pmd_entry = gather_pte_stats; + walk.private = md; + walk.mm = mm; + + pol = get_vma_policy(proc_priv->task, vma, vma->vm_start); + mpol_to_str(buffer, sizeof(buffer), pol, 0); + mpol_cond_put(pol); + + seq_printf(m, "%08lx %s", vma->vm_start, buffer); + + if (file) { + seq_printf(m, " file="); + seq_path(m, &file->f_path, "\n\t= "); + } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { + seq_printf(m, " heap"); + } else if (vma->vm_start <= mm->start_stack && + vma->vm_end >= mm->start_stack) { + seq_printf(m, " stack"); + } + + walk_page_range(vma->vm_start, vma->vm_end, &walk); + + if (!md->pages) + goto out; + + if (md->anon) + seq_printf(m, " anon=%lu", md->anon); + + if (md->dirty) + seq_printf(m, " dirty=%lu", md->dirty); + + if (md->pages != md->anon && md->pages != md->dirty) + seq_printf(m, " mapped=%lu", md->pages); + + if (md->mapcount_max > 1) + seq_printf(m, " mapmax=%lu", md->mapcount_max); + + if (md->swapcache) + seq_printf(m, " swapcache=%lu", md->swapcache); + + if (md->active < md->pages && !is_vm_hugetlb_page(vma)) + seq_printf(m, " active=%lu", md->active); + + if (md->writeback) + seq_printf(m, " writeback=%lu", md->writeback); + + for_each_node_state(n, N_HIGH_MEMORY) + if (md->node[n]) + seq_printf(m, " N%d=%lu", n, md->node[n]); +out: + seq_putc(m, '\n'); + + if (m->count < m->size) + m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0; + return 0; +} static const struct seq_operations proc_pid_numa_maps_op = { .start = m_start, @@ -831,7 +1055,20 @@ static const struct seq_operations proc_pid_numa_maps_op = { static int numa_maps_open(struct inode *inode, struct file *file) { - return do_maps_open(inode, file, &proc_pid_numa_maps_op); + struct numa_maps_private *priv; + int ret = -ENOMEM; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (priv) { + priv->proc_maps.pid = proc_pid(inode); + ret = seq_open(file, &proc_pid_numa_maps_op); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = priv; + } else { + kfree(priv); + } + } + return ret; } const struct file_operations proc_numa_maps_operations = { @@ -840,4 +1077,4 @@ const struct file_operations proc_numa_maps_operations = { .llseek = seq_lseek, .release = seq_release_private, }; -#endif +#endif /* CONFIG_NUMA */ |