diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 162 |
1 files changed, 77 insertions, 85 deletions
diff --git a/mm/memory.c b/mm/memory.c index 8b44f765b645..e229970e4223 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -118,6 +118,8 @@ __setup("norandmaps", disable_randmaps); unsigned long zero_pfn __read_mostly; unsigned long highest_memmap_pfn __read_mostly; +EXPORT_SYMBOL(zero_pfn); + /* * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init() */ @@ -751,7 +753,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn = pte_pfn(pte); if (HAVE_PTE_SPECIAL) { - if (likely(!pte_special(pte) || pte_numa(pte))) + if (likely(!pte_special(pte))) goto check_pfn; if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) return NULL; @@ -777,15 +779,14 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, } } + if (is_zero_pfn(pfn)) + return NULL; check_pfn: if (unlikely(pfn > highest_memmap_pfn)) { print_bad_pte(vma, addr, pte, NULL); return NULL; } - if (is_zero_pfn(pfn)) - return NULL; - /* * NOTE! We still have PageReserved() pages in the page tables. * eg. VDSO mappings can cause them to exist. @@ -884,7 +885,7 @@ out_set_pte: return 0; } -int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, +static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { @@ -1126,7 +1127,7 @@ again: addr) != page->index) { pte_t ptfile = pgoff_to_pte(page->index); if (pte_soft_dirty(ptent)) - pte_file_mksoft_dirty(ptfile); + ptfile = pte_file_mksoft_dirty(ptfile); set_pte_at(mm, addr, pte, ptfile); } if (PageAnon(page)) @@ -1292,7 +1293,6 @@ static void unmap_page_range(struct mmu_gather *tlb, details = NULL; BUG_ON(addr >= end); - mem_cgroup_uncharge_start(); tlb_start_vma(tlb, vma); pgd = pgd_offset(vma->vm_mm, addr); do { @@ -1302,7 +1302,6 @@ static void unmap_page_range(struct mmu_gather *tlb, next = zap_pud_range(tlb, vma, pgd, addr, next, details); } while (pgd++, addr = next, addr != end); tlb_end_vma(tlb, vma); - mem_cgroup_uncharge_end(); } @@ -2049,6 +2048,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page *dirty_page = NULL; unsigned long mmun_start = 0; /* For mmu_notifiers */ unsigned long mmun_end = 0; /* For mmu_notifiers */ + struct mem_cgroup *memcg; old_page = vm_normal_page(vma, address, orig_pte); if (!old_page) { @@ -2204,7 +2204,7 @@ gotten: } __SetPageUptodate(new_page); - if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) + if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) goto oom_free_new; mmun_start = address & PAGE_MASK; @@ -2234,6 +2234,8 @@ gotten: */ ptep_clear_flush(vma, address, page_table); page_add_new_anon_rmap(new_page, vma, address); + mem_cgroup_commit_charge(new_page, memcg, false); + lru_cache_add_active_or_unevictable(new_page, vma); /* * We call the notify macro here because, when using secondary * mmu page tables (such as kvm shadow page tables), we want the @@ -2271,7 +2273,7 @@ gotten: new_page = old_page; ret |= VM_FAULT_WRITE; } else - mem_cgroup_uncharge_page(new_page); + mem_cgroup_cancel_charge(new_page, memcg); if (new_page) page_cache_release(new_page); @@ -2399,7 +2401,10 @@ EXPORT_SYMBOL(unmap_mapping_range); /* * We enter with non-exclusive mmap_sem (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. - * We return with mmap_sem still held, but pte unmapped and unlocked. + * We return with pte unmapped and unlocked. + * + * We return with the mmap_sem locked or unlocked in the same cases + * as does filemap_fault(). */ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *page_table, pmd_t *pmd, @@ -2407,10 +2412,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, { spinlock_t *ptl; struct page *page, *swapcache; + struct mem_cgroup *memcg; swp_entry_t entry; pte_t pte; int locked; - struct mem_cgroup *ptr; int exclusive = 0; int ret = 0; @@ -2486,7 +2491,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, goto out_page; } - if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { + if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) { ret = VM_FAULT_OOM; goto out_page; } @@ -2511,10 +2516,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, * while the page is counted on swap but not yet in mapcount i.e. * before page_add_anon_rmap() and swap_free(); try_to_free_swap() * must be called after the swap_free(), or it will never succeed. - * Because delete_from_swap_page() may be called by reuse_swap_page(), - * mem_cgroup_commit_charge_swapin() may not be able to find swp_entry - * in page->private. In this case, a record in swap_cgroup is silently - * discarded at swap_free(). */ inc_mm_counter_fast(mm, MM_ANONPAGES); @@ -2530,12 +2531,14 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, if (pte_swp_soft_dirty(orig_pte)) pte = pte_mksoft_dirty(pte); set_pte_at(mm, address, page_table, pte); - if (page == swapcache) + if (page == swapcache) { do_page_add_anon_rmap(page, vma, address, exclusive); - else /* ksm created a completely new copy */ + mem_cgroup_commit_charge(page, memcg, true); + } else { /* ksm created a completely new copy */ page_add_new_anon_rmap(page, vma, address); - /* It's better to call commit-charge after rmap is established */ - mem_cgroup_commit_charge_swapin(page, ptr); + mem_cgroup_commit_charge(page, memcg, false); + lru_cache_add_active_or_unevictable(page, vma); + } swap_free(entry); if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) @@ -2568,7 +2571,7 @@ unlock: out: return ret; out_nomap: - mem_cgroup_cancel_charge_swapin(ptr); + mem_cgroup_cancel_charge(page, memcg); pte_unmap_unlock(page_table, ptl); out_page: unlock_page(page); @@ -2624,6 +2627,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *page_table, pmd_t *pmd, unsigned int flags) { + struct mem_cgroup *memcg; struct page *page; spinlock_t *ptl; pte_t entry; @@ -2657,7 +2661,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, */ __SetPageUptodate(page); - if (mem_cgroup_charge_anon(page, mm, GFP_KERNEL)) + if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) goto oom_free_page; entry = mk_pte(page, vma->vm_page_prot); @@ -2670,6 +2674,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, inc_mm_counter_fast(mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, address); + mem_cgroup_commit_charge(page, memcg, false); + lru_cache_add_active_or_unevictable(page, vma); setpte: set_pte_at(mm, address, page_table, entry); @@ -2679,7 +2685,7 @@ unlock: pte_unmap_unlock(page_table, ptl); return 0; release: - mem_cgroup_uncharge_page(page); + mem_cgroup_cancel_charge(page, memcg); page_cache_release(page); goto unlock; oom_free_page: @@ -2688,6 +2694,11 @@ oom: return VM_FAULT_OOM; } +/* + * The mmap_sem must have been held on entry, and may have been + * released depending on flags and vma->vm_ops->fault() return value. + * See filemap_fault() and __lock_page_retry(). + */ static int __do_fault(struct vm_area_struct *vma, unsigned long address, pgoff_t pgoff, unsigned int flags, struct page **page) { @@ -2744,7 +2755,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); else if (pte_file(*pte) && pte_file_soft_dirty(*pte)) - pte_mksoft_dirty(entry); + entry = pte_mksoft_dirty(entry); if (anon) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, address); @@ -2758,17 +2769,8 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, update_mmu_cache(vma, address, pte); } -static unsigned long fault_around_bytes = rounddown_pow_of_two(65536); - -static inline unsigned long fault_around_pages(void) -{ - return fault_around_bytes >> PAGE_SHIFT; -} - -static inline unsigned long fault_around_mask(void) -{ - return ~(fault_around_bytes - 1) & PAGE_MASK; -} +static unsigned long fault_around_bytes __read_mostly = + rounddown_pow_of_two(65536); #ifdef CONFIG_DEBUG_FS static int fault_around_bytes_get(void *data, u64 *val) @@ -2834,12 +2836,15 @@ late_initcall(fault_around_debugfs); static void do_fault_around(struct vm_area_struct *vma, unsigned long address, pte_t *pte, pgoff_t pgoff, unsigned int flags) { - unsigned long start_addr; + unsigned long start_addr, nr_pages, mask; pgoff_t max_pgoff; struct vm_fault vmf; int off; - start_addr = max(address & fault_around_mask(), vma->vm_start); + nr_pages = ACCESS_ONCE(fault_around_bytes) >> PAGE_SHIFT; + mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; + + start_addr = max(address & mask, vma->vm_start); off = ((address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); pte -= off; pgoff -= off; @@ -2851,7 +2856,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address, max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + PTRS_PER_PTE - 1; max_pgoff = min3(max_pgoff, vma_pages(vma) + vma->vm_pgoff - 1, - pgoff + fault_around_pages() - 1); + pgoff + nr_pages - 1); /* Check if it makes any sense to call ->map_pages */ while (!pte_none(*pte)) { @@ -2886,7 +2891,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, * something). */ if (vma->vm_ops->map_pages && !(flags & FAULT_FLAG_NONLINEAR) && - fault_around_pages() > 1) { + fault_around_bytes >> PAGE_SHIFT > 1) { pte = pte_offset_map_lock(mm, pmd, address, &ptl); do_fault_around(vma, address, pte, pgoff, flags); if (!pte_same(*pte, orig_pte)) @@ -2917,6 +2922,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, pgoff_t pgoff, unsigned int flags, pte_t orig_pte) { struct page *fault_page, *new_page; + struct mem_cgroup *memcg; spinlock_t *ptl; pte_t *pte; int ret; @@ -2928,7 +2934,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (!new_page) return VM_FAULT_OOM; - if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) { + if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) { page_cache_release(new_page); return VM_FAULT_OOM; } @@ -2948,12 +2954,14 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, goto uncharge_out; } do_set_pte(vma, address, new_page, pte, true, true); + mem_cgroup_commit_charge(new_page, memcg, false); + lru_cache_add_active_or_unevictable(new_page, vma); pte_unmap_unlock(pte, ptl); unlock_page(fault_page); page_cache_release(fault_page); return ret; uncharge_out: - mem_cgroup_uncharge_page(new_page); + mem_cgroup_cancel_charge(new_page, memcg); page_cache_release(new_page); return ret; } @@ -3016,6 +3024,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, return ret; } +/* + * We enter with non-exclusive mmap_sem (to exclude vma changes, + * but allow concurrent faults). + * The mmap_sem may have been released depending on flags and our + * return value. See filemap_fault() and __lock_page_or_retry(). + */ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *page_table, pmd_t *pmd, unsigned int flags, pte_t orig_pte) @@ -3040,7 +3054,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, * * We enter with non-exclusive mmap_sem (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. - * We return with mmap_sem still held, but pte unmapped and unlocked. + * We return with pte unmapped and unlocked. + * The mmap_sem may have been released depending on flags and our + * return value. See filemap_fault() and __lock_page_or_retry(). */ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *page_table, pmd_t *pmd, @@ -3172,7 +3188,10 @@ out: * * We enter with non-exclusive mmap_sem (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. - * We return with mmap_sem still held, but pte unmapped and unlocked. + * We return with pte unmapped and unlocked. + * + * The mmap_sem may have been released depending on flags and our + * return value. See filemap_fault() and __lock_page_or_retry(). */ static int handle_pte_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, @@ -3181,7 +3200,7 @@ static int handle_pte_fault(struct mm_struct *mm, pte_t entry; spinlock_t *ptl; - entry = *pte; + entry = ACCESS_ONCE(*pte); if (!pte_present(entry)) { if (pte_none(entry)) { if (vma->vm_ops) { @@ -3232,6 +3251,9 @@ unlock: /* * By the time we get here, we already hold the mm semaphore + * + * The mmap_sem may have been released depending on flags and our + * return value. See filemap_fault() and __lock_page_or_retry(). */ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags) @@ -3313,6 +3335,12 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, return handle_pte_fault(mm, vma, address, pte, pmd, flags); } +/* + * By the time we get here, we already hold the mm semaphore + * + * The mmap_sem may have been released depending on flags and our + * return value. See filemap_fault() and __lock_page_or_retry(). + */ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags) { @@ -3403,44 +3431,6 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) } #endif /* __PAGETABLE_PMD_FOLDED */ -#if !defined(__HAVE_ARCH_GATE_AREA) - -#if defined(AT_SYSINFO_EHDR) -static struct vm_area_struct gate_vma; - -static int __init gate_vma_init(void) -{ - gate_vma.vm_mm = NULL; - gate_vma.vm_start = FIXADDR_USER_START; - gate_vma.vm_end = FIXADDR_USER_END; - gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; - gate_vma.vm_page_prot = __P101; - - return 0; -} -__initcall(gate_vma_init); -#endif - -struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ -#ifdef AT_SYSINFO_EHDR - return &gate_vma; -#else - return NULL; -#endif -} - -int in_gate_area_no_mm(unsigned long addr) -{ -#ifdef AT_SYSINFO_EHDR - if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END)) - return 1; -#endif - return 0; -} - -#endif /* __HAVE_ARCH_GATE_AREA */ - static int __follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp, spinlock_t **ptlp) { @@ -3591,11 +3581,13 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, ret = get_user_pages(tsk, mm, addr, 1, write, 1, &page, &vma); if (ret <= 0) { +#ifndef CONFIG_HAVE_IOREMAP_PROT + break; +#else /* * Check if this is a VM_IO | VM_PFNMAP VMA, which * we can access using slightly different code. */ -#ifdef CONFIG_HAVE_IOREMAP_PROT vma = find_vma(mm, addr); if (!vma || vma->vm_start > addr) break; @@ -3603,9 +3595,9 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, ret = vma->vm_ops->access(vma, addr, buf, len, write); if (ret <= 0) -#endif break; bytes = ret; +#endif } else { bytes = len; offset = addr & (PAGE_SIZE-1); |