diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-11 04:19:42 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-11 04:19:42 +0300 |
commit | 33e247c7e58d335d70ecb84fd869091e2e4b8dcb (patch) | |
tree | e8561e1993dff03f8e56d10a5795fe9d379a3390 /mm/page_idle.c | |
parent | d71fc239b6915a8b750e9a447311029ff45b6580 (diff) | |
parent | 452e06af1f0149b01201f94264d452cd7a95db7a (diff) | |
download | linux-33e247c7e58d335d70ecb84fd869091e2e4b8dcb.tar.xz |
Merge branch 'akpm' (patches from Andrew)
Merge third patch-bomb from Andrew Morton:
- even more of the rest of MM
- lib/ updates
- checkpatch updates
- small changes to a few scruffy filesystems
- kmod fixes/cleanups
- kexec updates
- a dma-mapping cleanup series from hch
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (81 commits)
dma-mapping: consolidate dma_set_mask
dma-mapping: consolidate dma_supported
dma-mapping: cosolidate dma_mapping_error
dma-mapping: consolidate dma_{alloc,free}_noncoherent
dma-mapping: consolidate dma_{alloc,free}_{attrs,coherent}
mm: use vma_is_anonymous() in create_huge_pmd() and wp_huge_pmd()
mm: make sure all file VMAs have ->vm_ops set
mm, mpx: add "vm_flags_t vm_flags" arg to do_mmap_pgoff()
mm: mark most vm_operations_struct const
namei: fix warning while make xmldocs caused by namei.c
ipc: convert invalid scenarios to use WARN_ON
zlib_deflate/deftree: remove bi_reverse()
lib/decompress_unlzma: Do a NULL check for pointer
lib/decompressors: use real out buf size for gunzip with kernel
fs/affs: make root lookup from blkdev logical size
sysctl: fix int -> unsigned long assignments in INT_MIN case
kexec: export KERNEL_IMAGE_SIZE to vmcoreinfo
kexec: align crash_notes allocation to make it be inside one physical page
kexec: remove unnecessary test in kimage_alloc_crash_control_pages()
kexec: split kexec_load syscall from kexec core code
...
Diffstat (limited to 'mm/page_idle.c')
-rw-r--r-- | mm/page_idle.c | 232 |
1 files changed, 232 insertions, 0 deletions
diff --git a/mm/page_idle.c b/mm/page_idle.c new file mode 100644 index 000000000000..d5dd79041484 --- /dev/null +++ b/mm/page_idle.c @@ -0,0 +1,232 @@ +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/fs.h> +#include <linux/sysfs.h> +#include <linux/kobject.h> +#include <linux/mm.h> +#include <linux/mmzone.h> +#include <linux/pagemap.h> +#include <linux/rmap.h> +#include <linux/mmu_notifier.h> +#include <linux/page_ext.h> +#include <linux/page_idle.h> + +#define BITMAP_CHUNK_SIZE sizeof(u64) +#define BITMAP_CHUNK_BITS (BITMAP_CHUNK_SIZE * BITS_PER_BYTE) + +/* + * Idle page tracking only considers user memory pages, for other types of + * pages the idle flag is always unset and an attempt to set it is silently + * ignored. + * + * We treat a page as a user memory page if it is on an LRU list, because it is + * always safe to pass such a page to rmap_walk(), which is essential for idle + * page tracking. With such an indicator of user pages we can skip isolated + * pages, but since there are not usually many of them, it will hardly affect + * the overall result. + * + * This function tries to get a user memory page by pfn as described above. + */ +static struct page *page_idle_get_page(unsigned long pfn) +{ + struct page *page; + struct zone *zone; + + if (!pfn_valid(pfn)) + return NULL; + + page = pfn_to_page(pfn); + if (!page || !PageLRU(page) || + !get_page_unless_zero(page)) + return NULL; + + zone = page_zone(page); + spin_lock_irq(&zone->lru_lock); + if (unlikely(!PageLRU(page))) { + put_page(page); + page = NULL; + } + spin_unlock_irq(&zone->lru_lock); + return page; +} + +static int page_idle_clear_pte_refs_one(struct page *page, + struct vm_area_struct *vma, + unsigned long addr, void *arg) +{ + struct mm_struct *mm = vma->vm_mm; + spinlock_t *ptl; + pmd_t *pmd; + pte_t *pte; + bool referenced = false; + + if (unlikely(PageTransHuge(page))) { + pmd = page_check_address_pmd(page, mm, addr, + PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl); + if (pmd) { + referenced = pmdp_clear_young_notify(vma, addr, pmd); + spin_unlock(ptl); + } + } else { + pte = page_check_address(page, mm, addr, &ptl, 0); + if (pte) { + referenced = ptep_clear_young_notify(vma, addr, pte); + pte_unmap_unlock(pte, ptl); + } + } + if (referenced) { + clear_page_idle(page); + /* + * We cleared the referenced bit in a mapping to this page. To + * avoid interference with page reclaim, mark it young so that + * page_referenced() will return > 0. + */ + set_page_young(page); + } + return SWAP_AGAIN; +} + +static void page_idle_clear_pte_refs(struct page *page) +{ + /* + * Since rwc.arg is unused, rwc is effectively immutable, so we + * can make it static const to save some cycles and stack. + */ + static const struct rmap_walk_control rwc = { + .rmap_one = page_idle_clear_pte_refs_one, + .anon_lock = page_lock_anon_vma_read, + }; + bool need_lock; + + if (!page_mapped(page) || + !page_rmapping(page)) + return; + + need_lock = !PageAnon(page) || PageKsm(page); + if (need_lock && !trylock_page(page)) + return; + + rmap_walk(page, (struct rmap_walk_control *)&rwc); + + if (need_lock) + unlock_page(page); +} + +static ssize_t page_idle_bitmap_read(struct file *file, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t pos, size_t count) +{ + u64 *out = (u64 *)buf; + struct page *page; + unsigned long pfn, end_pfn; + int bit; + + if (pos % BITMAP_CHUNK_SIZE || count % BITMAP_CHUNK_SIZE) + return -EINVAL; + + pfn = pos * BITS_PER_BYTE; + if (pfn >= max_pfn) + return 0; + + end_pfn = pfn + count * BITS_PER_BYTE; + if (end_pfn > max_pfn) + end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS); + + for (; pfn < end_pfn; pfn++) { + bit = pfn % BITMAP_CHUNK_BITS; + if (!bit) + *out = 0ULL; + page = page_idle_get_page(pfn); + if (page) { + if (page_is_idle(page)) { + /* + * The page might have been referenced via a + * pte, in which case it is not idle. Clear + * refs and recheck. + */ + page_idle_clear_pte_refs(page); + if (page_is_idle(page)) + *out |= 1ULL << bit; + } + put_page(page); + } + if (bit == BITMAP_CHUNK_BITS - 1) + out++; + cond_resched(); + } + return (char *)out - buf; +} + +static ssize_t page_idle_bitmap_write(struct file *file, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t pos, size_t count) +{ + const u64 *in = (u64 *)buf; + struct page *page; + unsigned long pfn, end_pfn; + int bit; + + if (pos % BITMAP_CHUNK_SIZE || count % BITMAP_CHUNK_SIZE) + return -EINVAL; + + pfn = pos * BITS_PER_BYTE; + if (pfn >= max_pfn) + return -ENXIO; + + end_pfn = pfn + count * BITS_PER_BYTE; + if (end_pfn > max_pfn) + end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS); + + for (; pfn < end_pfn; pfn++) { + bit = pfn % BITMAP_CHUNK_BITS; + if ((*in >> bit) & 1) { + page = page_idle_get_page(pfn); + if (page) { + page_idle_clear_pte_refs(page); + set_page_idle(page); + put_page(page); + } + } + if (bit == BITMAP_CHUNK_BITS - 1) + in++; + cond_resched(); + } + return (char *)in - buf; +} + +static struct bin_attribute page_idle_bitmap_attr = + __BIN_ATTR(bitmap, S_IRUSR | S_IWUSR, + page_idle_bitmap_read, page_idle_bitmap_write, 0); + +static struct bin_attribute *page_idle_bin_attrs[] = { + &page_idle_bitmap_attr, + NULL, +}; + +static struct attribute_group page_idle_attr_group = { + .bin_attrs = page_idle_bin_attrs, + .name = "page_idle", +}; + +#ifndef CONFIG_64BIT +static bool need_page_idle(void) +{ + return true; +} +struct page_ext_operations page_idle_ops = { + .need = need_page_idle, +}; +#endif + +static int __init page_idle_init(void) +{ + int err; + + err = sysfs_create_group(mm_kobj, &page_idle_attr_group); + if (err) { + pr_err("page_idle: register sysfs failed\n"); + return err; + } + return 0; +} +subsys_initcall(page_idle_init); |