summaryrefslogtreecommitdiff
path: root/fs/proc/task_mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc/task_mmu.c')
-rw-r--r--fs/proc/task_mmu.c357
1 files changed, 261 insertions, 96 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7dcd2a250495..9694cc283511 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -209,16 +209,20 @@ static int do_maps_open(struct inode *inode, struct file *file,
return ret;
}
-static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
+static void
+show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
{
struct mm_struct *mm = vma->vm_mm;
struct file *file = vma->vm_file;
+ struct proc_maps_private *priv = m->private;
+ struct task_struct *task = priv->task;
vm_flags_t flags = vma->vm_flags;
unsigned long ino = 0;
unsigned long long pgoff = 0;
unsigned long start, end;
dev_t dev = 0;
int len;
+ const char *name = NULL;
if (file) {
struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
@@ -252,36 +256,57 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
if (file) {
pad_len_spaces(m, len);
seq_path(m, &file->f_path, "\n");
- } else {
- const char *name = arch_vma_name(vma);
- if (!name) {
- if (mm) {
- if (vma->vm_start <= mm->brk &&
- vma->vm_end >= mm->start_brk) {
- name = "[heap]";
- } else if (vma->vm_start <= mm->start_stack &&
- vma->vm_end >= mm->start_stack) {
- name = "[stack]";
- }
+ goto done;
+ }
+
+ name = arch_vma_name(vma);
+ if (!name) {
+ pid_t tid;
+
+ if (!mm) {
+ name = "[vdso]";
+ goto done;
+ }
+
+ if (vma->vm_start <= mm->brk &&
+ vma->vm_end >= mm->start_brk) {
+ name = "[heap]";
+ goto done;
+ }
+
+ tid = vm_is_stack(task, vma, is_pid);
+
+ if (tid != 0) {
+ /*
+ * Thread stack in /proc/PID/task/TID/maps or
+ * the main process stack.
+ */
+ if (!is_pid || (vma->vm_start <= mm->start_stack &&
+ vma->vm_end >= mm->start_stack)) {
+ name = "[stack]";
} else {
- name = "[vdso]";
+ /* Thread stack in /proc/PID/maps */
+ pad_len_spaces(m, len);
+ seq_printf(m, "[stack:%d]", tid);
}
}
- if (name) {
- pad_len_spaces(m, len);
- seq_puts(m, name);
- }
+ }
+
+done:
+ if (name) {
+ pad_len_spaces(m, len);
+ seq_puts(m, name);
}
seq_putc(m, '\n');
}
-static int show_map(struct seq_file *m, void *v)
+static int show_map(struct seq_file *m, void *v, int is_pid)
{
struct vm_area_struct *vma = v;
struct proc_maps_private *priv = m->private;
struct task_struct *task = priv->task;
- show_map_vma(m, vma);
+ show_map_vma(m, vma, is_pid);
if (m->count < m->size) /* vma is copied successfully */
m->version = (vma != get_gate_vma(task->mm))
@@ -289,20 +314,49 @@ static int show_map(struct seq_file *m, void *v)
return 0;
}
+static int show_pid_map(struct seq_file *m, void *v)
+{
+ return show_map(m, v, 1);
+}
+
+static int show_tid_map(struct seq_file *m, void *v)
+{
+ return show_map(m, v, 0);
+}
+
static const struct seq_operations proc_pid_maps_op = {
.start = m_start,
.next = m_next,
.stop = m_stop,
- .show = show_map
+ .show = show_pid_map
};
-static int maps_open(struct inode *inode, struct file *file)
+static const struct seq_operations proc_tid_maps_op = {
+ .start = m_start,
+ .next = m_next,
+ .stop = m_stop,
+ .show = show_tid_map
+};
+
+static int pid_maps_open(struct inode *inode, struct file *file)
{
return do_maps_open(inode, file, &proc_pid_maps_op);
}
-const struct file_operations proc_maps_operations = {
- .open = maps_open,
+static int tid_maps_open(struct inode *inode, struct file *file)
+{
+ return do_maps_open(inode, file, &proc_tid_maps_op);
+}
+
+const struct file_operations proc_pid_maps_operations = {
+ .open = pid_maps_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+const struct file_operations proc_tid_maps_operations = {
+ .open = tid_maps_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
@@ -394,21 +448,15 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pte_t *pte;
spinlock_t *ptl;
- spin_lock(&walk->mm->page_table_lock);
- if (pmd_trans_huge(*pmd)) {
- if (pmd_trans_splitting(*pmd)) {
- spin_unlock(&walk->mm->page_table_lock);
- wait_split_huge_page(vma->anon_vma, pmd);
- } else {
- smaps_pte_entry(*(pte_t *)pmd, addr,
- HPAGE_PMD_SIZE, walk);
- spin_unlock(&walk->mm->page_table_lock);
- mss->anonymous_thp += HPAGE_PMD_SIZE;
- return 0;
- }
- } else {
+ if (pmd_trans_huge_lock(pmd, vma) == 1) {
+ smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
spin_unlock(&walk->mm->page_table_lock);
+ mss->anonymous_thp += HPAGE_PMD_SIZE;
+ return 0;
}
+
+ if (pmd_trans_unstable(pmd))
+ return 0;
/*
* The mmap_sem held all the way back in m_start() is what
* keeps khugepaged out of here and from collapsing things
@@ -422,7 +470,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
return 0;
}
-static int show_smap(struct seq_file *m, void *v)
+static int show_smap(struct seq_file *m, void *v, int is_pid)
{
struct proc_maps_private *priv = m->private;
struct task_struct *task = priv->task;
@@ -440,7 +488,7 @@ static int show_smap(struct seq_file *m, void *v)
if (vma->vm_mm && !is_vm_hugetlb_page(vma))
walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
- show_map_vma(m, vma);
+ show_map_vma(m, vma, is_pid);
seq_printf(m,
"Size: %8lu kB\n"
@@ -479,20 +527,49 @@ static int show_smap(struct seq_file *m, void *v)
return 0;
}
+static int show_pid_smap(struct seq_file *m, void *v)
+{
+ return show_smap(m, v, 1);
+}
+
+static int show_tid_smap(struct seq_file *m, void *v)
+{
+ return show_smap(m, v, 0);
+}
+
static const struct seq_operations proc_pid_smaps_op = {
.start = m_start,
.next = m_next,
.stop = m_stop,
- .show = show_smap
+ .show = show_pid_smap
+};
+
+static const struct seq_operations proc_tid_smaps_op = {
+ .start = m_start,
+ .next = m_next,
+ .stop = m_stop,
+ .show = show_tid_smap
};
-static int smaps_open(struct inode *inode, struct file *file)
+static int pid_smaps_open(struct inode *inode, struct file *file)
{
return do_maps_open(inode, file, &proc_pid_smaps_op);
}
-const struct file_operations proc_smaps_operations = {
- .open = smaps_open,
+static int tid_smaps_open(struct inode *inode, struct file *file)
+{
+ return do_maps_open(inode, file, &proc_tid_smaps_op);
+}
+
+const struct file_operations proc_pid_smaps_operations = {
+ .open = pid_smaps_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+const struct file_operations proc_tid_smaps_operations = {
+ .open = tid_smaps_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
@@ -507,6 +584,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
struct page *page;
split_huge_page_pmd(walk->mm, pmd);
+ if (pmd_trans_unstable(pmd))
+ return 0;
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE) {
@@ -598,11 +677,18 @@ const struct file_operations proc_clear_refs_operations = {
.llseek = noop_llseek,
};
+typedef struct {
+ u64 pme;
+} pagemap_entry_t;
+
struct pagemapread {
int pos, len;
- u64 *buffer;
+ pagemap_entry_t *buffer;
};
+#define PAGEMAP_WALK_SIZE (PMD_SIZE)
+#define PAGEMAP_WALK_MASK (PMD_MASK)
+
#define PM_ENTRY_BYTES sizeof(u64)
#define PM_STATUS_BITS 3
#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
@@ -620,10 +706,15 @@ struct pagemapread {
#define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT)
#define PM_END_OF_BUFFER 1
-static int add_to_pagemap(unsigned long addr, u64 pfn,
+static inline pagemap_entry_t make_pme(u64 val)
+{
+ return (pagemap_entry_t) { .pme = val };
+}
+
+static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
struct pagemapread *pm)
{
- pm->buffer[pm->pos++] = pfn;
+ pm->buffer[pm->pos++] = *pme;
if (pm->pos >= pm->len)
return PM_END_OF_BUFFER;
return 0;
@@ -635,8 +726,10 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
struct pagemapread *pm = walk->private;
unsigned long addr;
int err = 0;
+ pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
+
for (addr = start; addr < end; addr += PAGE_SIZE) {
- err = add_to_pagemap(addr, PM_NOT_PRESENT, pm);
+ err = add_to_pagemap(addr, &pme, pm);
if (err)
break;
}
@@ -649,17 +742,35 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte)
return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
}
-static u64 pte_to_pagemap_entry(pte_t pte)
+static void pte_to_pagemap_entry(pagemap_entry_t *pme, pte_t pte)
{
- u64 pme = 0;
if (is_swap_pte(pte))
- pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte))
- | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP;
+ *pme = make_pme(PM_PFRAME(swap_pte_to_pagemap_entry(pte))
+ | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP);
else if (pte_present(pte))
- pme = PM_PFRAME(pte_pfn(pte))
- | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
- return pme;
+ *pme = make_pme(PM_PFRAME(pte_pfn(pte))
+ | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
+ pmd_t pmd, int offset)
+{
+ /*
+ * Currently pmd for thp is always present because thp can not be
+ * swapped-out, migrated, or HWPOISONed (split in such cases instead.)
+ * This if-check is just to prepare for future implementation.
+ */
+ if (pmd_present(pmd))
+ *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset)
+ | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
}
+#else
+static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
+ pmd_t pmd, int offset)
+{
+}
+#endif
static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
struct mm_walk *walk)
@@ -668,13 +779,30 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
struct pagemapread *pm = walk->private;
pte_t *pte;
int err = 0;
+ pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
- split_huge_page_pmd(walk->mm, pmd);
+ if (pmd_trans_unstable(pmd))
+ return 0;
/* find the first VMA at or above 'addr' */
vma = find_vma(walk->mm, addr);
+ spin_lock(&walk->mm->page_table_lock);
+ if (pmd_trans_huge_lock(pmd, vma) == 1) {
+ for (; addr != end; addr += PAGE_SIZE) {
+ unsigned long offset;
+
+ offset = (addr & ~PAGEMAP_WALK_MASK) >>
+ PAGE_SHIFT;
+ thp_pmd_to_pagemap_entry(&pme, *pmd, offset);
+ err = add_to_pagemap(addr, &pme, pm);
+ if (err)
+ break;
+ }
+ spin_unlock(&walk->mm->page_table_lock);
+ return err;
+ }
+
for (; addr != end; addr += PAGE_SIZE) {
- u64 pfn = PM_NOT_PRESENT;
/* check to see if we've left 'vma' behind
* and need a new, higher one */
@@ -686,11 +814,11 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
if (vma && (vma->vm_start <= addr) &&
!is_vm_hugetlb_page(vma)) {
pte = pte_offset_map(pmd, addr);
- pfn = pte_to_pagemap_entry(*pte);
+ pte_to_pagemap_entry(&pme, *pte);
/* unmap before userspace copy */
pte_unmap(pte);
}
- err = add_to_pagemap(addr, pfn, pm);
+ err = add_to_pagemap(addr, &pme, pm);
if (err)
return err;
}
@@ -701,13 +829,12 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
}
#ifdef CONFIG_HUGETLB_PAGE
-static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset)
+static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme,
+ pte_t pte, int offset)
{
- u64 pme = 0;
if (pte_present(pte))
- pme = PM_PFRAME(pte_pfn(pte) + offset)
- | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
- return pme;
+ *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset)
+ | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
}
/* This function walks within one hugetlb entry in the single call */
@@ -717,12 +844,12 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
{
struct pagemapread *pm = walk->private;
int err = 0;
- u64 pfn;
+ pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
for (; addr != end; addr += PAGE_SIZE) {
int offset = (addr & ~hmask) >> PAGE_SHIFT;
- pfn = huge_pte_to_pagemap_entry(*pte, offset);
- err = add_to_pagemap(addr, pfn, pm);
+ huge_pte_to_pagemap_entry(&pme, *pte, offset);
+ err = add_to_pagemap(addr, &pme, pm);
if (err)
return err;
}
@@ -757,8 +884,6 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
* determine which areas of memory are actually mapped and llseek to
* skip over unmapped regions.
*/
-#define PAGEMAP_WALK_SIZE (PMD_SIZE)
-#define PAGEMAP_WALK_MASK (PMD_MASK)
static ssize_t pagemap_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
@@ -941,26 +1066,21 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
pte_t *pte;
md = walk->private;
- spin_lock(&walk->mm->page_table_lock);
- if (pmd_trans_huge(*pmd)) {
- if (pmd_trans_splitting(*pmd)) {
- spin_unlock(&walk->mm->page_table_lock);
- wait_split_huge_page(md->vma->anon_vma, pmd);
- } else {
- pte_t huge_pte = *(pte_t *)pmd;
- struct page *page;
-
- page = can_gather_numa_stats(huge_pte, md->vma, addr);
- if (page)
- gather_stats(page, md, pte_dirty(huge_pte),
- HPAGE_PMD_SIZE/PAGE_SIZE);
- spin_unlock(&walk->mm->page_table_lock);
- return 0;
- }
- } else {
+
+ if (pmd_trans_huge_lock(pmd, md->vma) == 1) {
+ pte_t huge_pte = *(pte_t *)pmd;
+ struct page *page;
+
+ page = can_gather_numa_stats(huge_pte, md->vma, addr);
+ if (page)
+ gather_stats(page, md, pte_dirty(huge_pte),
+ HPAGE_PMD_SIZE/PAGE_SIZE);
spin_unlock(&walk->mm->page_table_lock);
+ return 0;
}
+ if (pmd_trans_unstable(pmd))
+ return 0;
orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
do {
struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
@@ -1002,7 +1122,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
/*
* Display pages allocated per node and memory policy via /proc.
*/
-static int show_numa_map(struct seq_file *m, void *v)
+static int show_numa_map(struct seq_file *m, void *v, int is_pid)
{
struct numa_maps_private *numa_priv = m->private;
struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
@@ -1039,9 +1159,19 @@ static int show_numa_map(struct seq_file *m, void *v)
seq_path(m, &file->f_path, "\n\t= ");
} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
seq_printf(m, " heap");
- } else if (vma->vm_start <= mm->start_stack &&
- vma->vm_end >= mm->start_stack) {
- seq_printf(m, " stack");
+ } else {
+ pid_t tid = vm_is_stack(proc_priv->task, vma, is_pid);
+ if (tid != 0) {
+ /*
+ * Thread stack in /proc/PID/task/TID/maps or
+ * the main process stack.
+ */
+ if (!is_pid || (vma->vm_start <= mm->start_stack &&
+ vma->vm_end >= mm->start_stack))
+ seq_printf(m, " stack");
+ else
+ seq_printf(m, " stack:%d", tid);
+ }
}
if (is_vm_hugetlb_page(vma))
@@ -1084,21 +1214,39 @@ out:
return 0;
}
+static int show_pid_numa_map(struct seq_file *m, void *v)
+{
+ return show_numa_map(m, v, 1);
+}
+
+static int show_tid_numa_map(struct seq_file *m, void *v)
+{
+ return show_numa_map(m, v, 0);
+}
+
static const struct seq_operations proc_pid_numa_maps_op = {
- .start = m_start,
- .next = m_next,
- .stop = m_stop,
- .show = show_numa_map,
+ .start = m_start,
+ .next = m_next,
+ .stop = m_stop,
+ .show = show_pid_numa_map,
+};
+
+static const struct seq_operations proc_tid_numa_maps_op = {
+ .start = m_start,
+ .next = m_next,
+ .stop = m_stop,
+ .show = show_tid_numa_map,
};
-static int numa_maps_open(struct inode *inode, struct file *file)
+static int numa_maps_open(struct inode *inode, struct file *file,
+ const struct seq_operations *ops)
{
struct numa_maps_private *priv;
int ret = -ENOMEM;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (priv) {
priv->proc_maps.pid = proc_pid(inode);
- ret = seq_open(file, &proc_pid_numa_maps_op);
+ ret = seq_open(file, ops);
if (!ret) {
struct seq_file *m = file->private_data;
m->private = priv;
@@ -1109,8 +1257,25 @@ static int numa_maps_open(struct inode *inode, struct file *file)
return ret;
}
-const struct file_operations proc_numa_maps_operations = {
- .open = numa_maps_open,
+static int pid_numa_maps_open(struct inode *inode, struct file *file)
+{
+ return numa_maps_open(inode, file, &proc_pid_numa_maps_op);
+}
+
+static int tid_numa_maps_open(struct inode *inode, struct file *file)
+{
+ return numa_maps_open(inode, file, &proc_tid_numa_maps_op);
+}
+
+const struct file_operations proc_pid_numa_maps_operations = {
+ .open = pid_numa_maps_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+const struct file_operations proc_tid_numa_maps_operations = {
+ .open = tid_numa_maps_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,