diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 151 |
1 files changed, 78 insertions, 73 deletions
diff --git a/mm/memory.c b/mm/memory.c index c387430f06c3..30991f83d0bf 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -50,6 +50,7 @@ #include <linux/export.h> #include <linux/delayacct.h> #include <linux/init.h> +#include <linux/pfn_t.h> #include <linux/writeback.h> #include <linux/memcontrol.h> #include <linux/mmu_notifier.h> @@ -566,7 +567,6 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, { spinlock_t *ptl; pgtable_t new = pte_alloc_one(mm, address); - int wait_split_huge_page; if (!new) return -ENOMEM; @@ -586,18 +586,14 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ ptl = pmd_lock(mm, pmd); - wait_split_huge_page = 0; if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ atomic_long_inc(&mm->nr_ptes); pmd_populate(mm, pmd, new); new = NULL; - } else if (unlikely(pmd_trans_splitting(*pmd))) - wait_split_huge_page = 1; + } spin_unlock(ptl); if (new) pte_free(mm, new); - if (wait_split_huge_page) - wait_split_huge_page(vma->anon_vma, pmd); return 0; } @@ -613,8 +609,7 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address) if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ pmd_populate_kernel(&init_mm, pmd, new); new = NULL; - } else - VM_BUG_ON(pmd_trans_splitting(*pmd)); + } spin_unlock(&init_mm.page_table_lock); if (new) pte_free_kernel(&init_mm, new); @@ -832,10 +827,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, } else if (is_migration_entry(entry)) { page = migration_entry_to_page(entry); - if (PageAnon(page)) - rss[MM_ANONPAGES]++; - else - rss[MM_FILEPAGES]++; + rss[mm_counter(page)]++; if (is_write_migration_entry(entry) && is_cow_mapping(vm_flags)) { @@ -873,11 +865,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, page = vm_normal_page(vma, addr, pte); if (page) { get_page(page); - page_dup_rmap(page); - if (PageAnon(page)) - rss[MM_ANONPAGES]++; - else - rss[MM_FILEPAGES]++; + page_dup_rmap(page, false); + rss[mm_counter(page)]++; } out_set_pte: @@ -961,7 +950,7 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src src_pmd = pmd_offset(src_pud, addr); do { next = pmd_addr_end(addr, end); - if (pmd_trans_huge(*src_pmd)) { + if (pmd_trans_huge(*src_pmd) || pmd_devmap(*src_pmd)) { int err; VM_BUG_ON(next-addr != HPAGE_PMD_SIZE); err = copy_huge_pmd(dst_mm, src_mm, @@ -1113,9 +1102,8 @@ again: tlb_remove_tlb_entry(tlb, pte, addr); if (unlikely(!page)) continue; - if (PageAnon(page)) - rss[MM_ANONPAGES]--; - else { + + if (!PageAnon(page)) { if (pte_dirty(ptent)) { force_flush = 1; set_page_dirty(page); @@ -1123,9 +1111,9 @@ again: if (pte_young(ptent) && likely(!(vma->vm_flags & VM_SEQ_READ))) mark_page_accessed(page); - rss[MM_FILEPAGES]--; } - page_remove_rmap(page); + rss[mm_counter(page)]--; + page_remove_rmap(page, false); if (unlikely(page_mapcount(page) < 0)) print_bad_pte(vma, addr, ptent, page); if (unlikely(!__tlb_remove_page(tlb, page))) { @@ -1146,11 +1134,7 @@ again: struct page *page; page = migration_entry_to_page(entry); - - if (PageAnon(page)) - rss[MM_ANONPAGES]--; - else - rss[MM_FILEPAGES]--; + rss[mm_counter(page)]--; } if (unlikely(!free_swap_and_cache(entry))) print_bad_pte(vma, addr, ptent, NULL); @@ -1193,7 +1177,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); - if (pmd_trans_huge(*pmd)) { + if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { if (next - addr != HPAGE_PMD_SIZE) { #ifdef CONFIG_DEBUG_VM if (!rwsem_is_locked(&tlb->mm->mmap_sem)) { @@ -1204,7 +1188,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, BUG(); } #endif - split_huge_page_pmd(vma, addr, pmd); + split_huge_pmd(vma, pmd, addr); } else if (zap_huge_pmd(tlb, vma, pmd, addr)) goto next; /* fall through */ @@ -1460,7 +1444,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr, /* Ok, finally just insert the thing.. */ get_page(page); - inc_mm_counter_fast(mm, MM_FILEPAGES); + inc_mm_counter_fast(mm, mm_counter_file(page)); page_add_file_rmap(page); set_pte_at(mm, addr, pte, mk_pte(page, prot)); @@ -1517,7 +1501,7 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, EXPORT_SYMBOL(vm_insert_page); static int insert_pfn(struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn, pgprot_t prot) + pfn_t pfn, pgprot_t prot) { struct mm_struct *mm = vma->vm_mm; int retval; @@ -1533,7 +1517,10 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr, goto out_unlock; /* Ok, finally just insert the thing.. */ - entry = pte_mkspecial(pfn_pte(pfn, prot)); + if (pfn_t_devmap(pfn)) + entry = pte_mkdevmap(pfn_t_pte(pfn, prot)); + else + entry = pte_mkspecial(pfn_t_pte(pfn, prot)); set_pte_at(mm, addr, pte, entry); update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */ @@ -1580,17 +1567,17 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, if (addr < vma->vm_start || addr >= vma->vm_end) return -EFAULT; - if (track_pfn_insert(vma, &pgprot, pfn)) + if (track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV))) return -EINVAL; - ret = insert_pfn(vma, addr, pfn, pgprot); + ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot); return ret; } EXPORT_SYMBOL(vm_insert_pfn); int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn) + pfn_t pfn) { BUG_ON(!(vma->vm_flags & VM_MIXEDMAP)); @@ -1604,10 +1591,10 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, * than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP * without pte special, it would there be refcounted as a normal page. */ - if (!HAVE_PTE_SPECIAL && pfn_valid(pfn)) { + if (!HAVE_PTE_SPECIAL && pfn_t_valid(pfn)) { struct page *page; - page = pfn_to_page(pfn); + page = pfn_t_to_page(pfn); return insert_page(vma, addr, page, vma->vm_page_prot); } return insert_pfn(vma, addr, pfn, vma->vm_page_prot); @@ -1949,6 +1936,20 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo copy_user_highpage(dst, src, va, vma); } +static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma) +{ + struct file *vm_file = vma->vm_file; + + if (vm_file) + return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO; + + /* + * Special mappings (e.g. VDSO) do not have any file so fake + * a default GFP_KERNEL for them. + */ + return GFP_KERNEL; +} + /* * Notify the address space that the page is about to become writable so that * it can prohibit this or wait for the page to get into an appropriate state. @@ -1964,6 +1965,7 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page, vmf.virtual_address = (void __user *)(address & PAGE_MASK); vmf.pgoff = page->index; vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; + vmf.gfp_mask = __get_fault_gfp_mask(vma); vmf.page = page; vmf.cow_page = NULL; @@ -2083,7 +2085,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, cow_user_page(new_page, old_page, address, vma); } - if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) + if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) goto oom_free_new; __SetPageUptodate(new_page); @@ -2097,7 +2099,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, if (likely(pte_same(*page_table, orig_pte))) { if (old_page) { if (!PageAnon(old_page)) { - dec_mm_counter_fast(mm, MM_FILEPAGES); + dec_mm_counter_fast(mm, + mm_counter_file(old_page)); inc_mm_counter_fast(mm, MM_ANONPAGES); } } else { @@ -2113,8 +2116,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, * thread doing COW. */ ptep_clear_flush_notify(vma, address, page_table); - page_add_new_anon_rmap(new_page, vma, address); - mem_cgroup_commit_charge(new_page, memcg, false); + page_add_new_anon_rmap(new_page, vma, address, false); + mem_cgroup_commit_charge(new_page, memcg, false, false); lru_cache_add_active_or_unevictable(new_page, vma); /* * We call the notify macro here because, when using secondary @@ -2146,14 +2149,14 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, * mapcount is visible. So transitively, TLBs to * old page will be flushed before it can be reused. */ - page_remove_rmap(old_page); + page_remove_rmap(old_page, false); } /* Free the old page.. */ new_page = old_page; page_copied = 1; } else { - mem_cgroup_cancel_charge(new_page, memcg); + mem_cgroup_cancel_charge(new_page, memcg, false); } if (new_page) @@ -2168,7 +2171,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, */ if (page_copied && (vma->vm_flags & VM_LOCKED)) { lock_page(old_page); /* LRU manipulation */ - munlock_vma_page(old_page); + if (PageMlocked(old_page)) + munlock_vma_page(old_page); unlock_page(old_page); } page_cache_release(old_page); @@ -2528,7 +2532,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, goto out_page; } - if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) { + if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg, false)) { ret = VM_FAULT_OOM; goto out_page; } @@ -2562,7 +2566,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, pte = maybe_mkwrite(pte_mkdirty(pte), vma); flags &= ~FAULT_FLAG_WRITE; ret |= VM_FAULT_WRITE; - exclusive = 1; + exclusive = RMAP_EXCLUSIVE; } flush_icache_page(vma, page); if (pte_swp_soft_dirty(orig_pte)) @@ -2570,15 +2574,16 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, set_pte_at(mm, address, page_table, pte); if (page == swapcache) { do_page_add_anon_rmap(page, vma, address, exclusive); - mem_cgroup_commit_charge(page, memcg, true); + mem_cgroup_commit_charge(page, memcg, true, false); } else { /* ksm created a completely new copy */ - page_add_new_anon_rmap(page, vma, address); - mem_cgroup_commit_charge(page, memcg, false); + page_add_new_anon_rmap(page, vma, address, false); + mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, vma); } swap_free(entry); - if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) + if (mem_cgroup_swap_full(page) || + (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) try_to_free_swap(page); unlock_page(page); if (page != swapcache) { @@ -2608,7 +2613,7 @@ unlock: out: return ret; out_nomap: - mem_cgroup_cancel_charge(page, memcg); + mem_cgroup_cancel_charge(page, memcg, false); pte_unmap_unlock(page_table, ptl); out_page: unlock_page(page); @@ -2702,7 +2707,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, if (!page) goto oom; - if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) + if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg, false)) goto oom_free_page; /* @@ -2723,15 +2728,15 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Deliver the page fault to userland, check inside PT lock */ if (userfaultfd_missing(vma)) { pte_unmap_unlock(page_table, ptl); - mem_cgroup_cancel_charge(page, memcg); + mem_cgroup_cancel_charge(page, memcg, false); page_cache_release(page); return handle_userfault(vma, address, flags, VM_UFFD_MISSING); } inc_mm_counter_fast(mm, MM_ANONPAGES); - page_add_new_anon_rmap(page, vma, address); - mem_cgroup_commit_charge(page, memcg, false); + page_add_new_anon_rmap(page, vma, address, false); + mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, vma); setpte: set_pte_at(mm, address, page_table, entry); @@ -2742,7 +2747,7 @@ unlock: pte_unmap_unlock(page_table, ptl); return 0; release: - mem_cgroup_cancel_charge(page, memcg); + mem_cgroup_cancel_charge(page, memcg, false); page_cache_release(page); goto unlock; oom_free_page: @@ -2767,6 +2772,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address, vmf.pgoff = pgoff; vmf.flags = flags; vmf.page = NULL; + vmf.gfp_mask = __get_fault_gfp_mask(vma); vmf.cow_page = cow_page; ret = vma->vm_ops->fault(vma, &vmf); @@ -2818,9 +2824,9 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, entry = maybe_mkwrite(pte_mkdirty(entry), vma); if (anon) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); - page_add_new_anon_rmap(page, vma, address); + page_add_new_anon_rmap(page, vma, address, false); } else { - inc_mm_counter_fast(vma->vm_mm, MM_FILEPAGES); + inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); page_add_file_rmap(page); } set_pte_at(vma->vm_mm, address, pte, entry); @@ -2933,6 +2939,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address, vmf.pgoff = pgoff; vmf.max_pgoff = max_pgoff; vmf.flags = flags; + vmf.gfp_mask = __get_fault_gfp_mask(vma); vma->vm_ops->map_pages(vma, &vmf); } @@ -2993,7 +3000,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (!new_page) return VM_FAULT_OOM; - if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) { + if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) { page_cache_release(new_page); return VM_FAULT_OOM; } @@ -3022,7 +3029,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, goto uncharge_out; } do_set_pte(vma, address, new_page, pte, true, true); - mem_cgroup_commit_charge(new_page, memcg, false); + mem_cgroup_commit_charge(new_page, memcg, false, false); lru_cache_add_active_or_unevictable(new_page, vma); pte_unmap_unlock(pte, ptl); if (fault_page) { @@ -3037,7 +3044,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, } return ret; uncharge_out: - mem_cgroup_cancel_charge(new_page, memcg); + mem_cgroup_cancel_charge(new_page, memcg, false); page_cache_release(new_page); return ret; } @@ -3089,7 +3096,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, * pinned by vma->vm_file's reference. We rely on unlock_page()'s * release semantics to prevent the compiler from undoing this copying. */ - mapping = fault_page->mapping; + mapping = page_rmapping(fault_page); unlock_page(fault_page); if ((dirtied || vma->vm_ops->page_mkwrite) && mapping) { /* @@ -3191,6 +3198,12 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, return 0; } + /* TODO: handle PTE-mapped THP */ + if (PageCompound(page)) { + pte_unmap_unlock(ptep, ptl); + return 0; + } + /* * Avoid grouping on RO pages in general. RO pages shouldn't hurt as * much anyway since they can be in shared cache state. This misses @@ -3363,17 +3376,9 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, int ret; barrier(); - if (pmd_trans_huge(orig_pmd)) { + if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) { unsigned int dirty = flags & FAULT_FLAG_WRITE; - /* - * If the pmd is splitting, return and retry the - * the fault. Alternative: wait until the split - * is done, and goto retry. - */ - if (pmd_trans_splitting(orig_pmd)) - return 0; - if (pmd_protnone(orig_pmd)) return do_huge_pmd_numa_page(mm, vma, address, orig_pmd, pmd); @@ -3400,7 +3405,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unlikely(__pte_alloc(mm, vma, pmd, address))) return VM_FAULT_OOM; /* if an huge pmd materialized from under us just retry later */ - if (unlikely(pmd_trans_huge(*pmd))) + if (unlikely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd))) return 0; /* * A regular pmd is established and it can't morph into a huge pmd |