diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 3 | ||||
-rw-r--r-- | mm/Makefile | 1 | ||||
-rw-r--r-- | mm/as_dirty_helpers.c | 300 | ||||
-rw-r--r-- | mm/memory.c | 145 |
4 files changed, 36 insertions, 413 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 5006d0e6a5c7..f0c76ba47695 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -765,7 +765,4 @@ config GUP_BENCHMARK config ARCH_HAS_PTE_SPECIAL bool -config AS_DIRTY_HELPERS - bool - endmenu diff --git a/mm/Makefile b/mm/Makefile index f5d412bbc2f7..ac5e5ba78874 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -104,4 +104,3 @@ obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o obj-$(CONFIG_HMM) += hmm.o obj-$(CONFIG_MEMFD_CREATE) += memfd.o -obj-$(CONFIG_AS_DIRTY_HELPERS) += as_dirty_helpers.o diff --git a/mm/as_dirty_helpers.c b/mm/as_dirty_helpers.c deleted file mode 100644 index f600e31534fb..000000000000 --- a/mm/as_dirty_helpers.c +++ /dev/null @@ -1,300 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/mm.h> -#include <linux/mm_types.h> -#include <linux/hugetlb.h> -#include <linux/bitops.h> -#include <linux/mmu_notifier.h> -#include <asm/cacheflush.h> -#include <asm/tlbflush.h> - -/** - * struct apply_as - Closure structure for apply_as_range - * @base: struct pfn_range_apply we derive from - * @start: Address of first modified pte - * @end: Address of last modified pte + 1 - * @total: Total number of modified ptes - * @vma: Pointer to the struct vm_area_struct we're currently operating on - */ -struct apply_as { - struct pfn_range_apply base; - unsigned long start; - unsigned long end; - unsigned long total; - struct vm_area_struct *vma; -}; - -/** - * apply_pt_wrprotect - Leaf pte callback to write-protect a pte - * @pte: Pointer to the pte - * @token: Page table token, see apply_to_pfn_range() - * @addr: The virtual page address - * @closure: Pointer to a struct pfn_range_apply embedded in a - * struct apply_as - * - * The function write-protects a pte and records the range in - * virtual address space of touched ptes for efficient range TLB flushes. - * - * Return: Always zero. - */ -static int apply_pt_wrprotect(pte_t *pte, pgtable_t token, - unsigned long addr, - struct pfn_range_apply *closure) -{ - struct apply_as *aas = container_of(closure, typeof(*aas), base); - pte_t ptent = *pte; - - if (pte_write(ptent)) { - pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte); - - ptent = pte_wrprotect(old_pte); - ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent); - aas->total++; - aas->start = min(aas->start, addr); - aas->end = max(aas->end, addr + PAGE_SIZE); - } - - return 0; -} - -/** - * struct apply_as_clean - Closure structure for apply_as_clean - * @base: struct apply_as we derive from - * @bitmap_pgoff: Address_space Page offset of the first bit in @bitmap - * @bitmap: Bitmap with one bit for each page offset in the address_space range - * covered. - * @start: Address_space page offset of first modified pte relative - * to @bitmap_pgoff - * @end: Address_space page offset of last modified pte relative - * to @bitmap_pgoff - */ -struct apply_as_clean { - struct apply_as base; - pgoff_t bitmap_pgoff; - unsigned long *bitmap; - pgoff_t start; - pgoff_t end; -}; - -/** - * apply_pt_clean - Leaf pte callback to clean a pte - * @pte: Pointer to the pte - * @token: Page table token, see apply_to_pfn_range() - * @addr: The virtual page address - * @closure: Pointer to a struct pfn_range_apply embedded in a - * struct apply_as_clean - * - * The function cleans a pte and records the range in - * virtual address space of touched ptes for efficient TLB flushes. - * It also records dirty ptes in a bitmap representing page offsets - * in the address_space, as well as the first and last of the bits - * touched. - * - * Return: Always zero. - */ -static int apply_pt_clean(pte_t *pte, pgtable_t token, - unsigned long addr, - struct pfn_range_apply *closure) -{ - struct apply_as *aas = container_of(closure, typeof(*aas), base); - struct apply_as_clean *clean = container_of(aas, typeof(*clean), base); - pte_t ptent = *pte; - - if (pte_dirty(ptent)) { - pgoff_t pgoff = ((addr - aas->vma->vm_start) >> PAGE_SHIFT) + - aas->vma->vm_pgoff - clean->bitmap_pgoff; - pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte); - - ptent = pte_mkclean(old_pte); - ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent); - - aas->total++; - aas->start = min(aas->start, addr); - aas->end = max(aas->end, addr + PAGE_SIZE); - - __set_bit(pgoff, clean->bitmap); - clean->start = min(clean->start, pgoff); - clean->end = max(clean->end, pgoff + 1); - } - - return 0; -} - -/** - * apply_as_range - Apply a pte callback to all PTEs pointing into a range - * of an address_space. - * @mapping: Pointer to the struct address_space - * @aas: Closure structure - * @first_index: First page offset in the address_space - * @nr: Number of incremental page offsets to cover - * - * Return: Number of ptes touched. Note that this number might be larger - * than @nr if there are overlapping vmas - */ -static unsigned long apply_as_range(struct address_space *mapping, - struct apply_as *aas, - pgoff_t first_index, pgoff_t nr) -{ - struct vm_area_struct *vma; - pgoff_t vba, vea, cba, cea; - unsigned long start_addr, end_addr; - struct mmu_notifier_range range; - - i_mmap_lock_read(mapping); - vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index, - first_index + nr - 1) { - unsigned long vm_flags = READ_ONCE(vma->vm_flags); - - /* - * We can only do advisory flag tests below, since we can't - * require the vm's mmap_sem to be held to protect the flags. - * Therefore, callers that strictly depend on specific mmap - * flags to remain constant throughout the operation must - * either ensure those flags are immutable for all relevant - * vmas or can't use this function. Fixing this properly would - * require the vma::vm_flags to be protected by a separate - * lock taken after the i_mmap_lock - */ - - /* Skip non-applicable VMAs */ - if ((vm_flags & (VM_SHARED | VM_WRITE)) != - (VM_SHARED | VM_WRITE)) - continue; - - /* Warn on and skip VMAs whose flags indicate illegal usage */ - if (WARN_ON((vm_flags & (VM_HUGETLB | VM_IO)) != VM_IO)) - continue; - - /* Clip to the vma */ - vba = vma->vm_pgoff; - vea = vba + vma_pages(vma); - cba = first_index; - cba = max(cba, vba); - cea = first_index + nr; - cea = min(cea, vea); - - /* Translate to virtual address */ - start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start; - end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start; - if (start_addr >= end_addr) - continue; - - aas->base.mm = vma->vm_mm; - aas->vma = vma; - aas->start = end_addr; - aas->end = start_addr; - - mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, 0, - vma, vma->vm_mm, start_addr, end_addr); - mmu_notifier_invalidate_range_start(&range); - - /* Needed when we only change protection? */ - flush_cache_range(vma, start_addr, end_addr); - - /* - * We're not using tlb_gather_mmu() since typically - * only a small subrange of PTEs are affected. - */ - inc_tlb_flush_pending(vma->vm_mm); - - /* Should not error since aas->base.alloc == 0 */ - WARN_ON(apply_to_pfn_range(&aas->base, start_addr, - end_addr - start_addr)); - if (aas->end > aas->start) - flush_tlb_range(vma, aas->start, aas->end); - - mmu_notifier_invalidate_range_end(&range); - dec_tlb_flush_pending(vma->vm_mm); - } - i_mmap_unlock_read(mapping); - - return aas->total; -} - -/** - * apply_as_wrprotect - Write-protect all ptes in an address_space range - * @mapping: The address_space we want to write protect - * @first_index: The first page offset in the range - * @nr: Number of incremental page offsets to cover - * - * WARNING: This function should only be used for address spaces whose - * vmas are marked VM_IO and that do not contain huge pages. - * To avoid interference with COW'd pages, vmas not marked VM_SHARED are - * simply skipped. - * - * Return: The number of ptes actually write-protected. Note that - * already write-protected ptes are not counted. - */ -unsigned long apply_as_wrprotect(struct address_space *mapping, - pgoff_t first_index, pgoff_t nr) -{ - struct apply_as aas = { - .base = { - .alloc = 0, - .ptefn = apply_pt_wrprotect, - }, - .total = 0, - }; - - return apply_as_range(mapping, &aas, first_index, nr); -} -EXPORT_SYMBOL_GPL(apply_as_wrprotect); - -/** - * apply_as_clean - Clean all ptes in an address_space range - * @mapping: The address_space we want to clean - * @first_index: The first page offset in the range - * @nr: Number of incremental page offsets to cover - * @bitmap_pgoff: The page offset of the first bit in @bitmap - * @bitmap: Pointer to a bitmap of at least @nr bits. The bitmap needs to - * cover the whole range @first_index..@first_index + @nr. - * @start: Pointer to number of the first set bit in @bitmap. - * is modified as new bits are set by the function. - * @end: Pointer to the number of the last set bit in @bitmap. - * none set. The value is modified as new bits are set by the function. - * - * Note: When this function returns there is no guarantee that a CPU has - * not already dirtied new ptes. However it will not clean any ptes not - * reported in the bitmap. - * - * If a caller needs to make sure all dirty ptes are picked up and none - * additional are added, it first needs to write-protect the address-space - * range and make sure new writers are blocked in page_mkwrite() or - * pfn_mkwrite(). And then after a TLB flush following the write-protection - * pick up all dirty bits. - * - * WARNING: This function should only be used for address spaces whose - * vmas are marked VM_IO and that do not contain huge pages. - * To avoid interference with COW'd pages, vmas not marked VM_SHARED are - * simply skipped. - * - * Return: The number of dirty ptes actually cleaned. - */ -unsigned long apply_as_clean(struct address_space *mapping, - pgoff_t first_index, pgoff_t nr, - pgoff_t bitmap_pgoff, - unsigned long *bitmap, - pgoff_t *start, - pgoff_t *end) -{ - bool none_set = (*start >= *end); - struct apply_as_clean clean = { - .base = { - .base = { - .alloc = 0, - .ptefn = apply_pt_clean, - }, - .total = 0, - }, - .bitmap_pgoff = bitmap_pgoff, - .bitmap = bitmap, - .start = none_set ? nr : *start, - .end = none_set ? 0 : *end, - }; - unsigned long ret = apply_as_range(mapping, &clean.base, first_index, - nr); - - *start = clean.start; - *end = clean.end; - return ret; -} -EXPORT_SYMBOL_GPL(apply_as_clean); diff --git a/mm/memory.c b/mm/memory.c index 462aa47f8878..ddf20bd0c317 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2032,17 +2032,18 @@ int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long } EXPORT_SYMBOL(vm_iomap_memory); -static int apply_to_pte_range(struct pfn_range_apply *closure, pmd_t *pmd, - unsigned long addr, unsigned long end) +static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, unsigned long end, + pte_fn_t fn, void *data) { pte_t *pte; int err; pgtable_t token; spinlock_t *uninitialized_var(ptl); - pte = (closure->mm == &init_mm) ? + pte = (mm == &init_mm) ? pte_alloc_kernel(pmd, addr) : - pte_alloc_map_lock(closure->mm, pmd, addr, &ptl); + pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; @@ -2053,109 +2054,86 @@ static int apply_to_pte_range(struct pfn_range_apply *closure, pmd_t *pmd, token = pmd_pgtable(*pmd); do { - err = closure->ptefn(pte++, token, addr, closure); + err = fn(pte++, token, addr, data); if (err) break; } while (addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); - if (closure->mm != &init_mm) + if (mm != &init_mm) pte_unmap_unlock(pte-1, ptl); return err; } -static int apply_to_pmd_range(struct pfn_range_apply *closure, pud_t *pud, - unsigned long addr, unsigned long end) +static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end, + pte_fn_t fn, void *data) { pmd_t *pmd; unsigned long next; - int err = 0; + int err; BUG_ON(pud_huge(*pud)); - pmd = pmd_alloc(closure->mm, pud, addr); + pmd = pmd_alloc(mm, pud, addr); if (!pmd) return -ENOMEM; - do { next = pmd_addr_end(addr, end); - if (!closure->alloc && pmd_none_or_clear_bad(pmd)) - continue; - err = apply_to_pte_range(closure, pmd, addr, next); + err = apply_to_pte_range(mm, pmd, addr, next, fn, data); if (err) break; } while (pmd++, addr = next, addr != end); return err; } -static int apply_to_pud_range(struct pfn_range_apply *closure, p4d_t *p4d, - unsigned long addr, unsigned long end) +static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, + unsigned long addr, unsigned long end, + pte_fn_t fn, void *data) { pud_t *pud; unsigned long next; - int err = 0; + int err; - pud = pud_alloc(closure->mm, p4d, addr); + pud = pud_alloc(mm, p4d, addr); if (!pud) return -ENOMEM; - do { next = pud_addr_end(addr, end); - if (!closure->alloc && pud_none_or_clear_bad(pud)) - continue; - err = apply_to_pmd_range(closure, pud, addr, next); + err = apply_to_pmd_range(mm, pud, addr, next, fn, data); if (err) break; } while (pud++, addr = next, addr != end); return err; } -static int apply_to_p4d_range(struct pfn_range_apply *closure, pgd_t *pgd, - unsigned long addr, unsigned long end) +static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, + pte_fn_t fn, void *data) { p4d_t *p4d; unsigned long next; - int err = 0; + int err; - p4d = p4d_alloc(closure->mm, pgd, addr); + p4d = p4d_alloc(mm, pgd, addr); if (!p4d) return -ENOMEM; - do { next = p4d_addr_end(addr, end); - if (!closure->alloc && p4d_none_or_clear_bad(p4d)) - continue; - err = apply_to_pud_range(closure, p4d, addr, next); + err = apply_to_pud_range(mm, p4d, addr, next, fn, data); if (err) break; } while (p4d++, addr = next, addr != end); return err; } -/** - * apply_to_pfn_range - Scan a region of virtual memory, calling a provided - * function on each leaf page table entry - * @closure: Details about how to scan and what function to apply - * @addr: Start virtual address - * @size: Size of the region - * - * If @closure->alloc is set to 1, the function will fill in the page table - * as necessary. Otherwise it will skip non-present parts. - * Note: The caller must ensure that the range does not contain huge pages. - * The caller must also assure that the proper mmu_notifier functions are - * called before and after the call to apply_to_pfn_range. - * - * WARNING: Do not use this function unless you know exactly what you are - * doing. It is lacking support for huge pages and transparent huge pages. - * - * Return: Zero on success. If the provided function returns a non-zero status, - * the page table walk will terminate and that status will be returned. - * If @closure->alloc is set to 1, then this function may also return memory - * allocation errors arising from allocating page table memory. +/* + * Scan a region of virtual memory, filling in page tables as necessary + * and calling a provided function on each leaf page table. */ -int apply_to_pfn_range(struct pfn_range_apply *closure, - unsigned long addr, unsigned long size) +int apply_to_page_range(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, void *data) { pgd_t *pgd; unsigned long next; @@ -2165,65 +2143,16 @@ int apply_to_pfn_range(struct pfn_range_apply *closure, if (WARN_ON(addr >= end)) return -EINVAL; - pgd = pgd_offset(closure->mm, addr); + pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); - if (!closure->alloc && pgd_none_or_clear_bad(pgd)) - continue; - err = apply_to_p4d_range(closure, pgd, addr, next); + err = apply_to_p4d_range(mm, pgd, addr, next, fn, data); if (err) break; } while (pgd++, addr = next, addr != end); return err; } - -/** - * struct page_range_apply - Closure structure for apply_to_page_range() - * @pter: The base closure structure we derive from - * @fn: The leaf pte function to call - * @data: The leaf pte function closure - */ -struct page_range_apply { - struct pfn_range_apply pter; - pte_fn_t fn; - void *data; -}; - -/* - * Callback wrapper to enable use of apply_to_pfn_range for - * the apply_to_page_range interface - */ -static int apply_to_page_range_wrapper(pte_t *pte, pgtable_t token, - unsigned long addr, - struct pfn_range_apply *pter) -{ - struct page_range_apply *pra = - container_of(pter, typeof(*pra), pter); - - return pra->fn(pte, token, addr, pra->data); -} - -/* - * Scan a region of virtual memory, filling in page tables as necessary - * and calling a provided function on each leaf page table. - * - * WARNING: Do not use this function unless you know exactly what you are - * doing. It is lacking support for huge pages and transparent huge pages. - */ -int apply_to_page_range(struct mm_struct *mm, unsigned long addr, - unsigned long size, pte_fn_t fn, void *data) -{ - struct page_range_apply pra = { - .pter = {.mm = mm, - .alloc = 1, - .ptefn = apply_to_page_range_wrapper }, - .fn = fn, - .data = data - }; - - return apply_to_pfn_range(&pra.pter, addr, size); -} EXPORT_SYMBOL_GPL(apply_to_page_range); /* @@ -2309,7 +2238,7 @@ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf) ret = vmf->vma->vm_ops->page_mkwrite(vmf); /* Restore original flags so that caller is not surprised */ vmf->flags = old_flags; - if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) + if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) return ret; if (unlikely(!(ret & VM_FAULT_LOCKED))) { lock_page(page); @@ -2586,7 +2515,7 @@ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf) pte_unmap_unlock(vmf->pte, vmf->ptl); vmf->flags |= FAULT_FLAG_MKWRITE; ret = vma->vm_ops->pfn_mkwrite(vmf); - if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)) + if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)) return ret; return finish_mkwrite_fault(vmf); } @@ -2607,8 +2536,7 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf) pte_unmap_unlock(vmf->pte, vmf->ptl); tmp = do_page_mkwrite(vmf); if (unlikely(!tmp || (tmp & - (VM_FAULT_ERROR | VM_FAULT_NOPAGE | - VM_FAULT_RETRY)))) { + (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { put_page(vmf->page); return tmp; } @@ -3673,8 +3601,7 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf) unlock_page(vmf->page); tmp = do_page_mkwrite(vmf); if (unlikely(!tmp || - (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | - VM_FAULT_RETRY)))) { + (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { put_page(vmf->page); return tmp; } |