diff options
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/base.c | 41 | ||||
-rw-r--r-- | fs/proc/fd.c | 1 | ||||
-rw-r--r-- | fs/proc/inode.c | 24 | ||||
-rw-r--r-- | fs/proc/meminfo.c | 5 | ||||
-rw-r--r-- | fs/proc/namespaces.c | 10 | ||||
-rw-r--r-- | fs/proc/page.c | 4 | ||||
-rw-r--r-- | fs/proc/self.c | 18 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 133 | ||||
-rw-r--r-- | fs/proc/thread_self.c | 19 |
9 files changed, 171 insertions, 84 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c index 4bd5d3118acd..2cf5d7e37375 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1564,12 +1564,16 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path) return -ENOENT; } -static const char *proc_pid_follow_link(struct dentry *dentry, void **cookie) +static const char *proc_pid_get_link(struct dentry *dentry, + struct inode *inode, + struct delayed_call *done) { - struct inode *inode = d_inode(dentry); struct path path; int error = -EACCES; + if (!dentry) + return ERR_PTR(-ECHILD); + /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; @@ -1630,7 +1634,7 @@ out: const struct inode_operations proc_pid_link_inode_operations = { .readlink = proc_pid_readlink, - .follow_link = proc_pid_follow_link, + .get_link = proc_pid_get_link, .setattr = proc_setattr, }; @@ -1895,7 +1899,7 @@ static const struct dentry_operations tid_map_files_dentry_operations = { .d_delete = pid_delete_dentry, }; -static int proc_map_files_get_link(struct dentry *dentry, struct path *path) +static int map_files_get_link(struct dentry *dentry, struct path *path) { unsigned long vm_start, vm_end; struct vm_area_struct *vma; @@ -1945,20 +1949,22 @@ struct map_files_info { * path to the file in question. */ static const char * -proc_map_files_follow_link(struct dentry *dentry, void **cookie) +proc_map_files_get_link(struct dentry *dentry, + struct inode *inode, + struct delayed_call *done) { if (!capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); - return proc_pid_follow_link(dentry, NULL); + return proc_pid_get_link(dentry, inode, done); } /* - * Identical to proc_pid_link_inode_operations except for follow_link() + * Identical to proc_pid_link_inode_operations except for get_link() */ static const struct inode_operations proc_map_files_link_inode_operations = { .readlink = proc_pid_readlink, - .follow_link = proc_map_files_follow_link, + .get_link = proc_map_files_get_link, .setattr = proc_setattr, }; @@ -1975,7 +1981,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, return -ENOENT; ei = PROC_I(inode); - ei->op.proc_get_link = proc_map_files_get_link; + ei->op.proc_get_link = map_files_get_link; inode->i_op = &proc_map_files_link_inode_operations; inode->i_size = 64; @@ -2359,7 +2365,7 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file_inode(file); - char *page; + void *page; ssize_t length; struct task_struct *task = get_proc_task(inode); @@ -2374,14 +2380,11 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, if (*ppos != 0) goto out; - length = -ENOMEM; - page = (char*)__get_free_page(GFP_TEMPORARY); - if (!page) + page = memdup_user(buf, count); + if (IS_ERR(page)) { + length = PTR_ERR(page); goto out; - - length = -EFAULT; - if (copy_from_user(page, buf, count)) - goto out_free; + } /* Guard against adverse ptrace interaction */ length = mutex_lock_interruptible(&task->signal->cred_guard_mutex); @@ -2390,10 +2393,10 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, length = security_setprocattr(task, (char*)file->f_path.dentry->d_name.name, - (void*)page, count); + page, count); mutex_unlock(&task->signal->cred_guard_mutex); out_free: - free_page((unsigned long) page); + kfree(page); out: put_task_struct(task); out_no_task: diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 3c2a915c695a..56afa5ef08f2 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -258,6 +258,7 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx, name, len, instantiate, p, (void *)(unsigned long)fd)) goto out_fd_loop; + cond_resched(); rcu_read_lock(); } rcu_read_unlock(); diff --git a/fs/proc/inode.c b/fs/proc/inode.c index bd95b9fdebb0..42305ddcbaa0 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -95,7 +95,8 @@ void __init proc_init_inodecache(void) proc_inode_cachep = kmem_cache_create("proc_inode_cache", sizeof(struct proc_inode), 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_PANIC), + SLAB_MEM_SPREAD|SLAB_ACCOUNT| + SLAB_PANIC), init_once); } @@ -393,24 +394,25 @@ static const struct file_operations proc_reg_file_ops_no_compat = { }; #endif -static const char *proc_follow_link(struct dentry *dentry, void **cookie) +static void proc_put_link(void *p) { - struct proc_dir_entry *pde = PDE(d_inode(dentry)); - if (unlikely(!use_pde(pde))) - return ERR_PTR(-EINVAL); - *cookie = pde; - return pde->data; + unuse_pde(p); } -static void proc_put_link(struct inode *unused, void *p) +static const char *proc_get_link(struct dentry *dentry, + struct inode *inode, + struct delayed_call *done) { - unuse_pde(p); + struct proc_dir_entry *pde = PDE(inode); + if (unlikely(!use_pde(pde))) + return ERR_PTR(-EINVAL); + set_delayed_call(done, proc_put_link, pde); + return pde->data; } const struct inode_operations proc_link_inode_operations = { .readlink = generic_readlink, - .follow_link = proc_follow_link, - .put_link = proc_put_link, + .get_link = proc_get_link, }; struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 9155a5a0d3b9..df4661abadc4 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -57,11 +57,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v) /* * Estimate the amount of memory available for userspace allocations, * without causing swapping. - * - * Free memory cannot be taken below the low watermark, before the - * system starts swapping. */ - available = i.freeram - wmark_low; + available = i.freeram - totalreserve_pages; /* * Not all the page cache can be freed, otherwise the system will diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index f6e8354b8cea..1dece8781f91 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -30,14 +30,18 @@ static const struct proc_ns_operations *ns_entries[] = { &mntns_operations, }; -static const char *proc_ns_follow_link(struct dentry *dentry, void **cookie) +static const char *proc_ns_get_link(struct dentry *dentry, + struct inode *inode, + struct delayed_call *done) { - struct inode *inode = d_inode(dentry); const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; struct task_struct *task; struct path ns_path; void *error = ERR_PTR(-EACCES); + if (!dentry) + return ERR_PTR(-ECHILD); + task = get_proc_task(inode); if (!task) return error; @@ -74,7 +78,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl static const struct inode_operations proc_ns_link_inode_operations = { .readlink = proc_ns_readlink, - .follow_link = proc_ns_follow_link, + .get_link = proc_ns_get_link, .setattr = proc_setattr, }; diff --git a/fs/proc/page.c b/fs/proc/page.c index 93484034a03d..b2855eea5405 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -103,9 +103,9 @@ u64 stable_page_flags(struct page *page) * pseudo flags for the well known (anonymous) memory mapped pages * * Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the - * simple test in page_mapped() is not enough. + * simple test in page_mapcount() is not enough. */ - if (!PageSlab(page) && page_mapped(page)) + if (!PageSlab(page) && page_mapcount(page)) u |= 1 << KPF_MMAP; if (PageAnon(page)) u |= 1 << KPF_ANON; diff --git a/fs/proc/self.c b/fs/proc/self.c index 113b8d061fc0..67e8db442cf0 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -18,26 +18,28 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer, return readlink_copy(buffer, buflen, tmp); } -static const char *proc_self_follow_link(struct dentry *dentry, void **cookie) +static const char *proc_self_get_link(struct dentry *dentry, + struct inode *inode, + struct delayed_call *done) { - struct pid_namespace *ns = dentry->d_sb->s_fs_info; + struct pid_namespace *ns = inode->i_sb->s_fs_info; pid_t tgid = task_tgid_nr_ns(current, ns); char *name; if (!tgid) return ERR_PTR(-ENOENT); /* 11 for max length of signed int in decimal + NULL term */ - name = kmalloc(12, GFP_KERNEL); - if (!name) - return ERR_PTR(-ENOMEM); + name = kmalloc(12, dentry ? GFP_KERNEL : GFP_ATOMIC); + if (unlikely(!name)) + return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD); sprintf(name, "%d", tgid); - return *cookie = name; + set_delayed_call(done, kfree_link, name); + return name; } static const struct inode_operations proc_self_inode_operations = { .readlink = proc_self_readlink, - .follow_link = proc_self_follow_link, - .put_link = kfree_put_link, + .get_link = proc_self_get_link, }; static unsigned self_inum; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 187b3b5f242e..65a1b6c69c11 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -14,6 +14,7 @@ #include <linux/swapops.h> #include <linux/mmu_notifier.h> #include <linux/page_idle.h> +#include <linux/shmem_fs.h> #include <asm/elf.h> #include <asm/uaccess.h> @@ -22,9 +23,13 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) { - unsigned long data, text, lib, swap, ptes, pmds; + unsigned long text, lib, swap, ptes, pmds, anon, file, shmem; unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; + anon = get_mm_counter(mm, MM_ANONPAGES); + file = get_mm_counter(mm, MM_FILEPAGES); + shmem = get_mm_counter(mm, MM_SHMEMPAGES); + /* * Note: to minimize their overhead, mm maintains hiwater_vm and * hiwater_rss only when about to *lower* total_vm or rss. Any @@ -35,11 +40,10 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) hiwater_vm = total_vm = mm->total_vm; if (hiwater_vm < mm->hiwater_vm) hiwater_vm = mm->hiwater_vm; - hiwater_rss = total_rss = get_mm_rss(mm); + hiwater_rss = total_rss = anon + file + shmem; if (hiwater_rss < mm->hiwater_rss) hiwater_rss = mm->hiwater_rss; - data = mm->total_vm - mm->shared_vm - mm->stack_vm; text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; swap = get_mm_counter(mm, MM_SWAPENTS); @@ -52,6 +56,9 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) "VmPin:\t%8lu kB\n" "VmHWM:\t%8lu kB\n" "VmRSS:\t%8lu kB\n" + "RssAnon:\t%8lu kB\n" + "RssFile:\t%8lu kB\n" + "RssShmem:\t%8lu kB\n" "VmData:\t%8lu kB\n" "VmStk:\t%8lu kB\n" "VmExe:\t%8lu kB\n" @@ -65,7 +72,10 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) mm->pinned_vm << (PAGE_SHIFT-10), hiwater_rss << (PAGE_SHIFT-10), total_rss << (PAGE_SHIFT-10), - data << (PAGE_SHIFT-10), + anon << (PAGE_SHIFT-10), + file << (PAGE_SHIFT-10), + shmem << (PAGE_SHIFT-10), + mm->data_vm << (PAGE_SHIFT-10), mm->stack_vm << (PAGE_SHIFT-10), text, lib, ptes >> 10, pmds >> 10, @@ -82,10 +92,11 @@ unsigned long task_statm(struct mm_struct *mm, unsigned long *shared, unsigned long *text, unsigned long *data, unsigned long *resident) { - *shared = get_mm_counter(mm, MM_FILEPAGES); + *shared = get_mm_counter(mm, MM_FILEPAGES) + + get_mm_counter(mm, MM_SHMEMPAGES); *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> PAGE_SHIFT; - *data = mm->total_vm - mm->shared_vm; + *data = mm->data_vm + mm->stack_vm; *resident = *shared + get_mm_counter(mm, MM_ANONPAGES); return mm->total_vm; } @@ -451,12 +462,14 @@ struct mem_size_stats { unsigned long private_hugetlb; u64 pss; u64 swap_pss; + bool check_shmem_swap; }; static void smaps_account(struct mem_size_stats *mss, struct page *page, - unsigned long size, bool young, bool dirty) + bool compound, bool young, bool dirty) { - int mapcount; + int i, nr = compound ? HPAGE_PMD_NR : 1; + unsigned long size = nr * PAGE_SIZE; if (PageAnon(page)) mss->anonymous += size; @@ -465,26 +478,53 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, /* Accumulate the size in pages that have been accessed. */ if (young || page_is_young(page) || PageReferenced(page)) mss->referenced += size; - mapcount = page_mapcount(page); - if (mapcount >= 2) { - u64 pss_delta; - if (dirty || PageDirty(page)) - mss->shared_dirty += size; - else - mss->shared_clean += size; - pss_delta = (u64)size << PSS_SHIFT; - do_div(pss_delta, mapcount); - mss->pss += pss_delta; - } else { + /* + * page_count(page) == 1 guarantees the page is mapped exactly once. + * If any subpage of the compound page mapped with PTE it would elevate + * page_count(). + */ + if (page_count(page) == 1) { if (dirty || PageDirty(page)) mss->private_dirty += size; else mss->private_clean += size; mss->pss += (u64)size << PSS_SHIFT; + return; + } + + for (i = 0; i < nr; i++, page++) { + int mapcount = page_mapcount(page); + + if (mapcount >= 2) { + if (dirty || PageDirty(page)) + mss->shared_dirty += PAGE_SIZE; + else + mss->shared_clean += PAGE_SIZE; + mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; + } else { + if (dirty || PageDirty(page)) + mss->private_dirty += PAGE_SIZE; + else + mss->private_clean += PAGE_SIZE; + mss->pss += PAGE_SIZE << PSS_SHIFT; + } } } +#ifdef CONFIG_SHMEM +static int smaps_pte_hole(unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct mem_size_stats *mss = walk->private; + + mss->swap += shmem_partial_swap_usage( + walk->vma->vm_file->f_mapping, addr, end); + + return 0; +} +#endif + static void smaps_pte_entry(pte_t *pte, unsigned long addr, struct mm_walk *walk) { @@ -512,11 +552,25 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, } } else if (is_migration_entry(swpent)) page = migration_entry_to_page(swpent); + } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap + && pte_none(*pte))) { + page = find_get_entry(vma->vm_file->f_mapping, + linear_page_index(vma, addr)); + if (!page) + return; + + if (radix_tree_exceptional_entry(page)) + mss->swap += PAGE_SIZE; + else + page_cache_release(page); + + return; } if (!page) return; - smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte)); + + smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte)); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -532,8 +586,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, if (IS_ERR_OR_NULL(page)) return; mss->anonymous_thp += HPAGE_PMD_SIZE; - smaps_account(mss, page, HPAGE_PMD_SIZE, - pmd_young(*pmd), pmd_dirty(*pmd)); + smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd)); } #else static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, @@ -549,7 +602,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t *pte; spinlock_t *ptl; - if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl)) { smaps_pmd_entry(pmd, addr, walk); spin_unlock(ptl); return 0; @@ -671,6 +724,31 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) }; memset(&mss, 0, sizeof mss); + +#ifdef CONFIG_SHMEM + if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) { + /* + * For shared or readonly shmem mappings we know that all + * swapped out pages belong to the shmem object, and we can + * obtain the swap value much more efficiently. For private + * writable mappings, we might have COW pages that are + * not affected by the parent swapped out pages of the shmem + * object, so we have to distinguish them during the page walk. + * Unless we know that the shmem object (or the part mapped by + * our VMA) has no swapped out pages at all. + */ + unsigned long shmem_swapped = shmem_swap_usage(vma); + + if (!shmem_swapped || (vma->vm_flags & VM_SHARED) || + !(vma->vm_flags & VM_WRITE)) { + mss.swap = shmem_swapped; + } else { + mss.check_shmem_swap = true; + smaps_walk.pte_hole = smaps_pte_hole; + } + } +#endif + /* mmap_sem is held in m_start */ walk_page_vma(vma, &smaps_walk); @@ -817,9 +895,6 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, pmd = pmd_wrprotect(pmd); pmd = pmd_clear_soft_dirty(pmd); - if (vma->vm_flags & VM_SOFTDIRTY) - vma->vm_flags &= ~VM_SOFTDIRTY; - set_pmd_at(vma->vm_mm, addr, pmdp, pmd); } #else @@ -838,7 +913,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, spinlock_t *ptl; struct page *page; - if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl)) { if (cp->type == CLEAR_REFS_SOFT_DIRTY) { clear_soft_dirty_pmd(vma, addr, pmd); goto out; @@ -1112,7 +1187,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, int err = 0; #ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (pmd_trans_huge_lock(pmdp, vma, &ptl) == 1) { + if (pmd_trans_huge_lock(pmdp, vma, &ptl)) { u64 flags = 0, frame = 0; pmd_t pmd = *pmdp; @@ -1444,7 +1519,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, pte_t *orig_pte; pte_t *pte; - if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl)) { pte_t huge_pte = *(pte_t *)pmd; struct page *page; diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index 947b0f4fd0a1..9eacd59e0360 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -19,26 +19,29 @@ static int proc_thread_self_readlink(struct dentry *dentry, char __user *buffer, return readlink_copy(buffer, buflen, tmp); } -static const char *proc_thread_self_follow_link(struct dentry *dentry, void **cookie) +static const char *proc_thread_self_get_link(struct dentry *dentry, + struct inode *inode, + struct delayed_call *done) { - struct pid_namespace *ns = dentry->d_sb->s_fs_info; + struct pid_namespace *ns = inode->i_sb->s_fs_info; pid_t tgid = task_tgid_nr_ns(current, ns); pid_t pid = task_pid_nr_ns(current, ns); char *name; if (!pid) return ERR_PTR(-ENOENT); - name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL); - if (!name) - return ERR_PTR(-ENOMEM); + name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, + dentry ? GFP_KERNEL : GFP_ATOMIC); + if (unlikely(!name)) + return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD); sprintf(name, "%d/task/%d", tgid, pid); - return *cookie = name; + set_delayed_call(done, kfree_link, name); + return name; } static const struct inode_operations proc_thread_self_inode_operations = { .readlink = proc_thread_self_readlink, - .follow_link = proc_thread_self_follow_link, - .put_link = kfree_put_link, + .get_link = proc_thread_self_get_link, }; static unsigned thread_self_inum; |