diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 79 |
1 files changed, 56 insertions, 23 deletions
diff --git a/mm/memory.c b/mm/memory.c index 0e517be91a89..fe2fba27ded2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -68,6 +68,7 @@ #include <linux/debugfs.h> #include <linux/userfaultfd_k.h> #include <linux/dax.h> +#include <linux/oom.h> #include <asm/io.h> #include <asm/mmu_context.h> @@ -215,12 +216,8 @@ static bool tlb_next_batch(struct mmu_gather *tlb) return true; } -/* tlb_gather_mmu - * Called to initialize an (on-stack) mmu_gather structure for page-table - * tear-down from @mm. The @fullmm argument is used when @mm is without - * users and we're going to destroy the full address space (exit/execve). - */ -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; @@ -275,10 +272,14 @@ void tlb_flush_mmu(struct mmu_gather *tlb) * Called at the end of the shootdown operation to free up any resources * that were required. */ -void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +void arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end, bool force) { struct mmu_gather_batch *batch, *next; + if (force) + __tlb_adjust_range(tlb, start, end - start); + tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ @@ -398,6 +399,34 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ +/* tlb_gather_mmu + * Called to initialize an (on-stack) mmu_gather structure for page-table + * tear-down from @mm. The @fullmm argument is used when @mm is without + * users and we're going to destroy the full address space (exit/execve). + */ +void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + arch_tlb_gather_mmu(tlb, mm, start, end); + inc_tlb_flush_pending(tlb->mm); +} + +void tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) +{ + /* + * If there are parallel threads are doing PTE changes on same range + * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB + * flush by batching, a thread has stable TLB entry can fail to flush + * the TLB by observing pte_none|!pte_dirty, for example so flush TLB + * forcefully if we detect parallel PTE batching threads. + */ + bool force = mm_tlb_flush_nested(tlb->mm); + + arch_tlb_finish_mmu(tlb, start, end, force); + dec_tlb_flush_pending(tlb->mm); +} + /* * Note: this doesn't free the actual pages themselves. That * has been handled earlier when unmapping all the memory regions. @@ -1197,6 +1226,7 @@ again: init_rss_vec(rss); start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); pte = start_pte; + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); do { pte_t ptent = *pte; @@ -2864,6 +2894,7 @@ static int do_anonymous_page(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; struct mem_cgroup *memcg; struct page *page; + int ret = 0; pte_t entry; /* File mapping without ->vm_ops ? */ @@ -2896,6 +2927,9 @@ static int do_anonymous_page(struct vm_fault *vmf) vmf->address, &vmf->ptl); if (!pte_none(*vmf->pte)) goto unlock; + ret = check_stable_address_space(vma->vm_mm); + if (ret) + goto unlock; /* Deliver the page fault to userland, check inside PT lock */ if (userfaultfd_missing(vma)) { pte_unmap_unlock(vmf->pte, vmf->ptl); @@ -2930,6 +2964,10 @@ static int do_anonymous_page(struct vm_fault *vmf) if (!pte_none(*vmf->pte)) goto release; + ret = check_stable_address_space(vma->vm_mm); + if (ret) + goto release; + /* Deliver the page fault to userland, check inside PT lock */ if (userfaultfd_missing(vma)) { pte_unmap_unlock(vmf->pte, vmf->ptl); @@ -2949,7 +2987,7 @@ setpte: update_mmu_cache(vma, vmf->address, vmf->pte); unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); - return 0; + return ret; release: mem_cgroup_cancel_charge(page, memcg, false); put_page(page); @@ -3223,7 +3261,7 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, int finish_fault(struct vm_fault *vmf) { struct page *page; - int ret; + int ret = 0; /* Did we COW the page? */ if ((vmf->flags & FAULT_FLAG_WRITE) && @@ -3231,7 +3269,15 @@ int finish_fault(struct vm_fault *vmf) page = vmf->cow_page; else page = vmf->page; - ret = alloc_set_pte(vmf, vmf->memcg, page); + + /* + * check even for read faults because we might have lost our CoWed + * page + */ + if (!(vmf->vma->vm_flags & VM_SHARED)) + ret = check_stable_address_space(vmf->vma->vm_mm); + if (!ret) + ret = alloc_set_pte(vmf, vmf->memcg, page); if (vmf->pte) pte_unmap_unlock(vmf->pte, vmf->ptl); return ret; @@ -3871,19 +3917,6 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, mem_cgroup_oom_synchronize(false); } - /* - * This mm has been already reaped by the oom reaper and so the - * refault cannot be trusted in general. Anonymous refaults would - * lose data and give a zero page instead e.g. This is especially - * problem for use_mm() because regular tasks will just die and - * the corrupted data will not be visible anywhere while kthread - * will outlive the oom victim and potentially propagate the date - * further. - */ - if (unlikely((current->flags & PF_KTHREAD) && !(ret & VM_FAULT_ERROR) - && test_bit(MMF_UNSTABLE, &vma->vm_mm->flags))) - ret = VM_FAULT_SIGBUS; - return ret; } EXPORT_SYMBOL_GPL(handle_mm_fault); |