diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-26 22:32:41 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-26 22:32:41 +0300 |
commit | 98931dd95fd489fcbfa97da563505a6f071d7c77 (patch) | |
tree | 44683fc4a92efa614acdca2742a7ff19d26da1e3 /include/linux/mm.h | |
parent | df202b452fe6c6d6f1351bad485e2367ef1e644e (diff) | |
parent | f403f22f8ccb12860b2b62fec3173c6ccd45938b (diff) | |
download | linux-98931dd95fd489fcbfa97da563505a6f071d7c77.tar.xz |
Merge tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton:
"Almost all of MM here. A few things are still getting finished off,
reviewed, etc.
- Yang Shi has improved the behaviour of khugepaged collapsing of
readonly file-backed transparent hugepages.
- Johannes Weiner has arranged for zswap memory use to be tracked and
managed on a per-cgroup basis.
- Munchun Song adds a /proc knob ("hugetlb_optimize_vmemmap") for
runtime enablement of the recent huge page vmemmap optimization
feature.
- Baolin Wang contributes a series to fix some issues around hugetlb
pagetable invalidation.
- Zhenwei Pi has fixed some interactions between hwpoisoned pages and
virtualization.
- Tong Tiangen has enabled the use of the presently x86-only
page_table_check debugging feature on arm64 and riscv.
- David Vernet has done some fixup work on the memcg selftests.
- Peter Xu has taught userfaultfd to handle write protection faults
against shmem- and hugetlbfs-backed files.
- More DAMON development from SeongJae Park - adding online tuning of
the feature and support for monitoring of fixed virtual address
ranges. Also easier discovery of which monitoring operations are
available.
- Nadav Amit has done some optimization of TLB flushing during
mprotect().
- Neil Brown continues to labor away at improving our swap-over-NFS
support.
- David Hildenbrand has some fixes to anon page COWing versus
get_user_pages().
- Peng Liu fixed some errors in the core hugetlb code.
- Joao Martins has reduced the amount of memory consumed by
device-dax's compound devmaps.
- Some cleanups of the arch-specific pagemap code from Anshuman
Khandual.
- Muchun Song has found and fixed some errors in the TLB flushing of
transparent hugepages.
- Roman Gushchin has done more work on the memcg selftests.
... and, of course, many smaller fixes and cleanups. Notably, the
customary million cleanup serieses from Miaohe Lin"
* tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (381 commits)
mm: kfence: use PAGE_ALIGNED helper
selftests: vm: add the "settings" file with timeout variable
selftests: vm: add "test_hmm.sh" to TEST_FILES
selftests: vm: check numa_available() before operating "merge_across_nodes" in ksm_tests
selftests: vm: add migration to the .gitignore
selftests/vm/pkeys: fix typo in comment
ksm: fix typo in comment
selftests: vm: add process_mrelease tests
Revert "mm/vmscan: never demote for memcg reclaim"
mm/kfence: print disabling or re-enabling message
include/trace/events/percpu.h: cleanup for "percpu: improve percpu_alloc_percpu event trace"
include/trace/events/mmflags.h: cleanup for "tracing: incorrect gfp_t conversion"
mm: fix a potential infinite loop in start_isolate_page_range()
MAINTAINERS: add Muchun as co-maintainer for HugeTLB
zram: fix Kconfig dependency warning
mm/shmem: fix shmem folio swapoff hang
cgroup: fix an error handling path in alloc_pagecache_max_30M()
mm: damon: use HPAGE_PMD_SIZE
tracing: incorrect isolate_mote_t cast in mm_vmscan_lru_isolate
nodemask.h: fix compilation error with GCC12
...
Diffstat (limited to 'include/linux/mm.h')
-rw-r--r-- | include/linux/mm.h | 70 |
1 files changed, 58 insertions, 12 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index b0183450e484..f85e5a4c070b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1575,13 +1575,14 @@ static inline bool page_maybe_dma_pinned(struct page *page) /* * This should most likely only be called during fork() to see whether we - * should break the cow immediately for a page on the src mm. + * should break the cow immediately for an anon page on the src mm. + * + * The caller has to hold the PT lock and the vma->vm_mm->->write_protect_seq. */ static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma, struct page *page) { - if (!is_cow_mapping(vma->vm_flags)) - return false; + VM_BUG_ON(!(raw_read_seqcount(&vma->vm_mm->write_protect_seq) & 1)); if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)) return false; @@ -1845,9 +1846,6 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); int copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); -int follow_invalidate_pte(struct mm_struct *mm, unsigned long address, - struct mmu_notifier_range *range, pte_t **ptepp, - pmd_t **pmdpp, spinlock_t **ptlp); int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp, spinlock_t **ptlp); int follow_pfn(struct vm_area_struct *vma, unsigned long address, @@ -1969,10 +1967,11 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma, #define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP | \ MM_CP_UFFD_WP_RESOLVE) -extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, +extern unsigned long change_protection(struct mmu_gather *tlb, + struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot, unsigned long cp_flags); -extern int mprotect_fixup(struct vm_area_struct *vma, +extern int mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma, struct vm_area_struct **pprev, unsigned long start, unsigned long end, unsigned long newflags); @@ -3005,6 +3004,45 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) return 0; } +/* + * Indicates for which pages that are write-protected in the page table, + * whether GUP has to trigger unsharing via FAULT_FLAG_UNSHARE such that the + * GUP pin will remain consistent with the pages mapped into the page tables + * of the MM. + * + * Temporary unmapping of PageAnonExclusive() pages or clearing of + * PageAnonExclusive() has to protect against concurrent GUP: + * * Ordinary GUP: Using the PT lock + * * GUP-fast and fork(): mm->write_protect_seq + * * GUP-fast and KSM or temporary unmapping (swap, migration): + * clear/invalidate+flush of the page table entry + * + * Must be called with the (sub)page that's actually referenced via the + * page table entry, which might not necessarily be the head page for a + * PTE-mapped THP. + */ +static inline bool gup_must_unshare(unsigned int flags, struct page *page) +{ + /* + * FOLL_WRITE is implicitly handled correctly as the page table entry + * has to be writable -- and if it references (part of) an anonymous + * folio, that part is required to be marked exclusive. + */ + if ((flags & (FOLL_WRITE | FOLL_PIN)) != FOLL_PIN) + return false; + /* + * Note: PageAnon(page) is stable until the page is actually getting + * freed. + */ + if (!PageAnon(page)) + return false; + /* + * Note that PageKsm() pages cannot be exclusive, and consequently, + * cannot get pinned. + */ + return !PageAnonExclusive(page); +} + typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); @@ -3149,7 +3187,7 @@ static inline void print_vma_addr(char *prefix, unsigned long rip) } #endif -#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP int vmemmap_remap_free(unsigned long start, unsigned long end, unsigned long reuse); int vmemmap_remap_alloc(unsigned long start, unsigned long end, @@ -3158,13 +3196,14 @@ int vmemmap_remap_alloc(unsigned long start, unsigned long end, void *sparse_buffer_alloc(unsigned long size); struct page * __populate_section_memmap(unsigned long pfn, - unsigned long nr_pages, int nid, struct vmem_altmap *altmap); + unsigned long nr_pages, int nid, struct vmem_altmap *altmap, + struct dev_pagemap *pgmap); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, - struct vmem_altmap *altmap); + struct vmem_altmap *altmap, struct page *reuse); void *vmemmap_alloc_block(unsigned long size, int node); struct vmem_altmap; void *vmemmap_alloc_block_buf(unsigned long size, int node, @@ -3252,7 +3291,6 @@ enum mf_action_page_type { MF_MSG_BUDDY, MF_MSG_DAX, MF_MSG_UNSPLIT_THP, - MF_MSG_DIFFERENT_PAGE_SIZE, MF_MSG_UNKNOWN, }; @@ -3391,4 +3429,12 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start, } #endif +/* + * Whether to drop the pte markers, for example, the uffd-wp information for + * file-backed memory. This should only be specified when we will completely + * drop the page in the mm, either by truncation or unmapping of the vma. By + * default, the flag is not set. + */ +#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) + #endif /* _LINUX_MM_H */ |