From 23b1b44e6c61295084284aa7d87db863a7802b92 Mon Sep 17 00:00:00 2001 From: Bang Li Date: Wed, 22 May 2024 14:12:02 +0800 Subject: mm: add update_mmu_tlb_range() Patch series "Add update_mmu_tlb_range() to simplify code", v4. This series of commits mainly adds the update_mmu_tlb_range() to batch update tlb in an address range and implement update_mmu_tlb() using update_mmu_tlb_range(). After commit 19eaf44954df ("mm: thp: support allocation of anonymous multi-size THP"), We may need to batch update tlb of a certain address range by calling update_mmu_tlb() in a loop. Using the update_mmu_tlb_range(), we can simplify the code and possibly reduce the execution of some unnecessary code in some architectures. This patch (of 3): Add update_mmu_tlb_range(), we can batch update tlb of an address range. Link: https://lkml.kernel.org/r/20240522061204.117421-1-libang.li@antgroup.com Link: https://lkml.kernel.org/r/20240522061204.117421-2-libang.li@antgroup.com Signed-off-by: Bang Li Acked-by: David Hildenbrand Cc: Chris Zankel Cc: Huacai Chen Cc: Lance Yang Cc: Max Filippov Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Ryan Roberts Cc: Thomas Bogendoerfer Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux/pgtable.h') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 18019f037bae..17d1caee39ab 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -729,6 +729,13 @@ static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr, * fault. This function updates TLB only, do nothing with cache or others. * It is the difference with function update_mmu_cache. */ +#ifndef update_mmu_tlb_range +static inline void update_mmu_tlb_range(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, unsigned int nr) +{ +} +#endif + #ifndef __HAVE_ARCH_UPDATE_MMU_TLB static inline void update_mmu_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) -- cgit v1.2.3 From 8f65aa32239f1c3f11b7a25bd5921223bafc5fed Mon Sep 17 00:00:00 2001 From: Bang Li Date: Wed, 22 May 2024 14:12:03 +0800 Subject: mm: implement update_mmu_tlb() using update_mmu_tlb_range() Let's make update_mmu_tlb() simply a generic wrapper around update_mmu_tlb_range(). Only the latter can now be overridden by the architecture. We can now remove __HAVE_ARCH_UPDATE_MMU_TLB as well. Link: https://lkml.kernel.org/r/20240522061204.117421-3-libang.li@antgroup.com Signed-off-by: Bang Li Acked-by: David Hildenbrand Cc: Chris Zankel Cc: Huacai Chen Cc: Lance Yang Cc: Max Filippov Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Ryan Roberts Cc: Thomas Bogendoerfer Signed-off-by: Andrew Morton --- arch/loongarch/include/asm/pgtable.h | 2 -- arch/mips/include/asm/pgtable.h | 2 -- arch/riscv/include/asm/pgtable.h | 2 -- arch/xtensa/include/asm/pgtable.h | 3 --- arch/xtensa/mm/tlb.c | 6 ------ include/linux/pgtable.h | 4 +--- 6 files changed, 1 insertion(+), 18 deletions(-) (limited to 'include/linux/pgtable.h') diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 5ccc2a3a6f7a..161dd6e10479 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -467,8 +467,6 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf, #define update_mmu_cache(vma, addr, ptep) \ update_mmu_cache_range(NULL, vma, addr, ptep, 1) -#define __HAVE_ARCH_UPDATE_MMU_TLB -#define update_mmu_tlb update_mmu_cache #define update_mmu_tlb_range(vma, addr, ptep, nr) \ update_mmu_cache_range(NULL, vma, addr, ptep, nr) diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 0891ad7d43b6..c29a551eb0ca 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -594,8 +594,6 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf, #define update_mmu_cache(vma, address, ptep) \ update_mmu_cache_range(NULL, vma, address, ptep, 1) -#define __HAVE_ARCH_UPDATE_MMU_TLB -#define update_mmu_tlb update_mmu_cache #define update_mmu_tlb_range(vma, address, ptep, nr) \ update_mmu_cache_range(NULL, vma, address, ptep, nr) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 0c50b51f68c4..ebfe8faafb79 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -489,8 +489,6 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf, #define update_mmu_cache(vma, addr, ptep) \ update_mmu_cache_range(NULL, vma, addr, ptep, 1) -#define __HAVE_ARCH_UPDATE_MMU_TLB -#define update_mmu_tlb update_mmu_cache #define update_mmu_tlb_range(vma, addr, ptep, nr) \ update_mmu_cache_range(NULL, vma, addr, ptep, nr) diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 436158bd9030..1647a7cc3fbf 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -410,9 +410,6 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, typedef pte_t *pte_addr_t; -void update_mmu_tlb(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep); -#define __HAVE_ARCH_UPDATE_MMU_TLB void update_mmu_tlb_range(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, unsigned int nr); #define update_mmu_tlb_range update_mmu_tlb_range diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c index 05efba86b870..0a1a815dc796 100644 --- a/arch/xtensa/mm/tlb.c +++ b/arch/xtensa/mm/tlb.c @@ -163,12 +163,6 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) } } -void update_mmu_tlb(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep) -{ - local_flush_tlb_page(vma, address); -} - void update_mmu_tlb_range(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, unsigned int nr) { diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 17d1caee39ab..117b807e3f89 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -736,13 +736,11 @@ static inline void update_mmu_tlb_range(struct vm_area_struct *vma, } #endif -#ifndef __HAVE_ARCH_UPDATE_MMU_TLB static inline void update_mmu_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { + update_mmu_tlb_range(vma, address, ptep, 1); } -#define __HAVE_ARCH_UPDATE_MMU_TLB -#endif /* * Some architectures may be able to avoid expensive synchronization -- cgit v1.2.3 From 29f252cdc293f4a50b5d3dcbed53701d8444614d Mon Sep 17 00:00:00 2001 From: Barry Song Date: Wed, 29 May 2024 20:28:22 +1200 Subject: mm: introduce arch_do_swap_page_nr() which allows restore metadata for nr pages Should do_swap_page() have the capability to directly map a large folio, metadata restoration becomes necessary for a specified number of pages denoted as nr. It's important to highlight that metadata restoration is solely required by the SPARC platform, which, however, does not enable THP_SWAP. Consequently, in the present kernel configuration, there exists no practical scenario where users necessitate the restoration of nr metadata. Platforms implementing THP_SWAP might invoke this function with nr values exceeding 1, subsequent to do_swap_page() successfully mapping an entire large folio. Nonetheless, their arch_do_swap_page_nr() functions remain empty. Link: https://lkml.kernel.org/r/20240529082824.150954-5-21cnbao@gmail.com Signed-off-by: Barry Song Reviewed-by: Ryan Roberts Reviewed-by: Khalid Aziz Cc: "David S. Miller" Cc: Andreas Larsson Cc: Baolin Wang Cc: Chris Li Cc: Christoph Hellwig Cc: Chuanhua Han Cc: David Hildenbrand Cc: Gao Xiang Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Johannes Weiner Cc: Kairui Song Cc: Len Brown Cc: Matthew Wilcox (Oracle) Cc: Pavel Machek Cc: "Rafael J. Wysocki" Cc: Suren Baghdasaryan Cc: Yosry Ahmed Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 26 ++++++++++++++++++++------ mm/memory.c | 3 ++- 2 files changed, 22 insertions(+), 7 deletions(-) (limited to 'include/linux/pgtable.h') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 117b807e3f89..2f32eaccf0b9 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1089,6 +1089,15 @@ static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b) }) #ifndef __HAVE_ARCH_DO_SWAP_PAGE +static inline void arch_do_swap_page_nr(struct mm_struct *mm, + struct vm_area_struct *vma, + unsigned long addr, + pte_t pte, pte_t oldpte, + int nr) +{ + +} +#else /* * Some architectures support metadata associated with a page. When a * page is being swapped out, this metadata must be saved so it can be @@ -1097,12 +1106,17 @@ static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b) * page as metadata for the page. arch_do_swap_page() can restore this * metadata when a page is swapped back in. */ -static inline void arch_do_swap_page(struct mm_struct *mm, - struct vm_area_struct *vma, - unsigned long addr, - pte_t pte, pte_t oldpte) -{ - +static inline void arch_do_swap_page_nr(struct mm_struct *mm, + struct vm_area_struct *vma, + unsigned long addr, + pte_t pte, pte_t oldpte, + int nr) +{ + for (int i = 0; i < nr; i++) { + arch_do_swap_page(vma->vm_mm, vma, addr + i * PAGE_SIZE, + pte_advance_pfn(pte, i), + pte_advance_pfn(oldpte, i)); + } } #endif diff --git a/mm/memory.c b/mm/memory.c index 1411edbb55d0..56cc3dd50c56 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4308,7 +4308,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) VM_BUG_ON(!folio_test_anon(folio) || (pte_write(pte) && !PageAnonExclusive(page))); set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); - arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte); + arch_do_swap_page_nr(vma->vm_mm, vma, vmf->address, + pte, vmf->orig_pte, 1); folio_unlock(folio); if (folio != swapcache && swapcache) { -- cgit v1.2.3 From 18d095b2556e5e1292003c8e9f5d845ed42ef89b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 2 Jul 2024 15:51:19 +0200 Subject: mm: define __pte_leaf_size() to also take a PMD entry On powerpc 8xx, when a page is 8M size, the information is in the PMD entry. So allow architectures to provide __pte_leaf_size() instead of pte_leaf_size() and provide the PMD entry to that function. When __pte_leaf_size() is not defined, define it as a pte_leaf_size() so that architectures not interested in the PMD arguments are not impacted. Only define a default pte_leaf_size() when __pte_leaf_size() is not defined to make sure nobody adds new calls to pte_leaf_size() in the core. Link: https://lkml.kernel.org/r/c7c008f0a314bf8029ad7288fdc908db1ec7e449.1719928057.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Reviewed-by: Oscar Salvador Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Peter Xu Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 3 +++ kernel/events/core.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux/pgtable.h') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 2f32eaccf0b9..2a6a3cccfc36 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1907,9 +1907,12 @@ typedef unsigned int pgtbl_mod_mask; #ifndef pmd_leaf_size #define pmd_leaf_size(x) PMD_SIZE #endif +#ifndef __pte_leaf_size #ifndef pte_leaf_size #define pte_leaf_size(x) PAGE_SIZE #endif +#define __pte_leaf_size(x,y) pte_leaf_size(y) +#endif /* * We always define pmd_pfn for all archs as it's used in lots of generic diff --git a/kernel/events/core.c b/kernel/events/core.c index 8f908f077935..b2ca11bdc11e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7609,7 +7609,7 @@ again: pte = ptep_get_lockless(ptep); if (pte_present(pte)) - size = pte_leaf_size(pte); + size = __pte_leaf_size(pmd, pte); pte_unmap(ptep); #endif /* CONFIG_HAVE_GUP_FAST */ -- cgit v1.2.3