diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 110 |
1 files changed, 73 insertions, 37 deletions
diff --git a/mm/memory.c b/mm/memory.c index 30991f83d0bf..098f00d05461 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -65,6 +65,7 @@ #include <linux/userfaultfd_k.h> #include <asm/io.h> +#include <asm/mmu_context.h> #include <asm/pgalloc.h> #include <asm/uaccess.h> #include <asm/tlb.h> @@ -562,8 +563,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, } } -int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, - pmd_t *pmd, unsigned long address) +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { spinlock_t *ptl; pgtable_t new = pte_alloc_one(mm, address); @@ -661,9 +661,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, return; } if (nr_unshown) { - printk(KERN_ALERT - "BUG: Bad page map: %lu messages suppressed\n", - nr_unshown); + pr_alert("BUG: Bad page map: %lu messages suppressed\n", + nr_unshown); nr_unshown = 0; } nr_shown = 0; @@ -674,15 +673,13 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL; index = linear_page_index(vma, addr); - printk(KERN_ALERT - "BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n", - current->comm, - (long long)pte_val(pte), (long long)pmd_val(*pmd)); + pr_alert("BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n", + current->comm, + (long long)pte_val(pte), (long long)pmd_val(*pmd)); if (page) dump_page(page, "bad pte"); - printk(KERN_ALERT - "addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n", - (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); + pr_alert("addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n", + (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); /* * Choose text because data symbols depend on CONFIG_KALLSYMS_ALL=y */ @@ -1105,6 +1102,12 @@ again: if (!PageAnon(page)) { if (pte_dirty(ptent)) { + /* + * oom_reaper cannot tear down dirty + * pages + */ + if (unlikely(details && details->ignore_dirty)) + continue; force_flush = 1; set_page_dirty(page); } @@ -1123,8 +1126,8 @@ again: } continue; } - /* If details->check_mapping, we leave swap entries. */ - if (unlikely(details)) + /* only check swap_entries if explicitly asked for in details */ + if (unlikely(details && !details->check_swap_entries)) continue; entry = pte_to_swp_entry(ptent); @@ -1229,7 +1232,7 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb, return addr; } -static void unmap_page_range(struct mmu_gather *tlb, +void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, struct zap_details *details) @@ -1237,9 +1240,6 @@ static void unmap_page_range(struct mmu_gather *tlb, pgd_t *pgd; unsigned long next; - if (details && !details->check_mapping) - details = NULL; - BUG_ON(addr >= end); tlb_start_vma(tlb, vma); pgd = pgd_offset(vma->vm_mm, addr); @@ -1551,8 +1551,29 @@ out: int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) { + return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot); +} +EXPORT_SYMBOL(vm_insert_pfn); + +/** + * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot + * @vma: user vma to map to + * @addr: target user address of this page + * @pfn: source kernel pfn + * @pgprot: pgprot flags for the inserted page + * + * This is exactly like vm_insert_pfn, except that it allows drivers to + * to override pgprot on a per-page basis. + * + * This only makes sense for IO mappings, and it makes no sense for + * cow mappings. In general, using multiple vmas is preferable; + * vm_insert_pfn_prot should only be used if using multiple VMAs is + * impractical. + */ +int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t pgprot) +{ int ret; - pgprot_t pgprot = vma->vm_page_prot; /* * Technically, architectures with pte_special can avoid all these * restrictions (same for remap_pfn_range). However we would like @@ -1574,7 +1595,7 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, return ret; } -EXPORT_SYMBOL(vm_insert_pfn); +EXPORT_SYMBOL(vm_insert_pfn_prot); int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn) @@ -1591,10 +1612,15 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, * than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP * without pte special, it would there be refcounted as a normal page. */ - if (!HAVE_PTE_SPECIAL && pfn_t_valid(pfn)) { + if (!HAVE_PTE_SPECIAL && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) { struct page *page; - page = pfn_t_to_page(pfn); + /* + * At this point we are committed to insert_page() + * regardless of whether the caller specified flags that + * result in pfn_t_has_page() == false. + */ + page = pfn_to_page(pfn_t_to_pfn(pfn)); return insert_page(vma, addr, page, vma->vm_page_prot); } return insert_pfn(vma, addr, pfn, vma->vm_page_prot); @@ -1871,7 +1897,9 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, unsigned long end = addr + size; int err; - BUG_ON(addr >= end); + if (WARN_ON(addr >= end)) + return -EINVAL; + pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); @@ -2232,11 +2260,6 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma, page_cache_get(old_page); - /* - * Only catch write-faults on shared writable pages, - * read-only shared pages can get COWed by - * get_user_pages(.write=1, .force=1). - */ if (vma->vm_ops && vma->vm_ops->page_mkwrite) { int tmp; @@ -2412,7 +2435,7 @@ static inline void unmap_mapping_range_tree(struct rb_root *root, void unmap_mapping_range(struct address_space *mapping, loff_t const holebegin, loff_t const holelen, int even_cows) { - struct zap_details details; + struct zap_details details = { }; pgoff_t hba = holebegin >> PAGE_SHIFT; pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -3122,8 +3145,7 @@ static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *page_table, pmd_t *pmd, unsigned int flags, pte_t orig_pte) { - pgoff_t pgoff = (((address & PAGE_MASK) - - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + pgoff_t pgoff = linear_page_index(vma, address); pte_unmap(page_table); /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ @@ -3357,6 +3379,11 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, pmd_t *pmd; pte_t *pte; + if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, + flags & FAULT_FLAG_INSTRUCTION, + flags & FAULT_FLAG_REMOTE)) + return VM_FAULT_SIGSEGV; + if (unlikely(is_vm_hugetlb_page(vma))) return hugetlb_fault(mm, vma, address, flags); @@ -3397,15 +3424,24 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, } /* - * Use __pte_alloc instead of pte_alloc_map, because we can't + * Use pte_alloc() instead of pte_alloc_map, because we can't * run pte_offset_map on the pmd, if an huge pmd could * materialize from under us from a different thread. */ - if (unlikely(pmd_none(*pmd)) && - unlikely(__pte_alloc(mm, vma, pmd, address))) + if (unlikely(pte_alloc(mm, pmd, address))) return VM_FAULT_OOM; - /* if an huge pmd materialized from under us just retry later */ - if (unlikely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd))) + /* + * If a huge pmd materialized under us just retry later. Use + * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd + * didn't become pmd_trans_huge under us and then back to pmd_none, as + * a result of MADV_DONTNEED running immediately after a huge pmd fault + * in a different thread of this mm, in turn leading to a misleading + * pmd_trans_huge() retval. All we have to ensure is that it is a + * regular pmd that we can walk with pte_offset_map() and we can do that + * through an atomic read in C, which is what pmd_trans_unstable() + * provides. + */ + if (unlikely(pmd_trans_unstable(pmd) || pmd_devmap(*pmd))) return 0; /* * A regular pmd is established and it can't morph into a huge pmd @@ -3664,7 +3700,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, void *maddr; struct page *page = NULL; - ret = get_user_pages(tsk, mm, addr, 1, + ret = get_user_pages_remote(tsk, mm, addr, 1, write, 1, &page, &vma); if (ret <= 0) { #ifndef CONFIG_HAVE_IOREMAP_PROT |