summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyan Roberts <ryan.roberts@arm.com>2025-04-22 11:18:17 +0300
committerWill Deacon <will@kernel.org>2025-05-09 15:43:07 +0300
commit06fc959fcff743298fdb5428e18bcbd7b54cb9ae (patch)
tree82f7a7e4d6637af8540b82945df1903e70ee8730
parent2fba13371fe80b4d0d533a502e460ce0e936d024 (diff)
downloadlinux-06fc959fcff743298fdb5428e18bcbd7b54cb9ae.tar.xz
arm64/mm: Support huge pte-mapped pages in vmap
Implement the required arch functions to enable use of contpte in the vmap when VM_ALLOW_HUGE_VMAP is specified. This speeds up vmap operations due to only having to issue a DSB and ISB per contpte block instead of per pte. But it also means that the TLB pressure reduces due to only needing a single TLB entry for the whole contpte block. Since vmap uses set_huge_pte_at() to set the contpte, that API is now used for kernel mappings for the first time. Although in the vmap case we never expect it to be called to modify a valid mapping so clear_flush() should never be called, it's still wise to make it robust for the kernel case, so amend the tlb flush function if the mm is for kernel space. Tested with vmalloc performance selftests: # kself/mm/test_vmalloc.sh \ run_test_mask=1 test_repeat_count=5 nr_pages=256 test_loop_count=100000 use_huge=1 Duration reduced from 1274243 usec to 1083553 usec on Apple M2 for 15% reduction in time taken. Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> Tested-by: Luiz Capitulino <luizcap@redhat.com> Link: https://lore.kernel.org/r/20250422081822.1836315-10-ryan.roberts@arm.com Signed-off-by: Will Deacon <will@kernel.org>
-rw-r--r--arch/arm64/include/asm/vmalloc.h45
-rw-r--r--arch/arm64/mm/hugetlbpage.c5
2 files changed, 49 insertions, 1 deletions
diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
index 38fafffe699f..12f534e8f3ed 100644
--- a/arch/arm64/include/asm/vmalloc.h
+++ b/arch/arm64/include/asm/vmalloc.h
@@ -23,6 +23,51 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot)
return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
}
+#define arch_vmap_pte_range_map_size arch_vmap_pte_range_map_size
+static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr,
+ unsigned long end, u64 pfn,
+ unsigned int max_page_shift)
+{
+ /*
+ * If the block is at least CONT_PTE_SIZE in size, and is naturally
+ * aligned in both virtual and physical space, then we can pte-map the
+ * block using the PTE_CONT bit for more efficient use of the TLB.
+ */
+ if (max_page_shift < CONT_PTE_SHIFT)
+ return PAGE_SIZE;
+
+ if (end - addr < CONT_PTE_SIZE)
+ return PAGE_SIZE;
+
+ if (!IS_ALIGNED(addr, CONT_PTE_SIZE))
+ return PAGE_SIZE;
+
+ if (!IS_ALIGNED(PFN_PHYS(pfn), CONT_PTE_SIZE))
+ return PAGE_SIZE;
+
+ return CONT_PTE_SIZE;
+}
+
+#define arch_vmap_pte_range_unmap_size arch_vmap_pte_range_unmap_size
+static inline unsigned long arch_vmap_pte_range_unmap_size(unsigned long addr,
+ pte_t *ptep)
+{
+ /*
+ * The caller handles alignment so it's sufficient just to check
+ * PTE_CONT.
+ */
+ return pte_valid_cont(__ptep_get(ptep)) ? CONT_PTE_SIZE : PAGE_SIZE;
+}
+
+#define arch_vmap_pte_supported_shift arch_vmap_pte_supported_shift
+static inline int arch_vmap_pte_supported_shift(unsigned long size)
+{
+ if (size >= CONT_PTE_SIZE)
+ return CONT_PTE_SHIFT;
+
+ return PAGE_SHIFT;
+}
+
#endif
#define arch_vmap_pgprot_tagged arch_vmap_pgprot_tagged
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index d34703846ef4..0c8737f4f2ce 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -209,7 +209,10 @@ static void clear_flush(struct mm_struct *mm,
for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
__ptep_get_and_clear_anysz(mm, ptep, pgsize);
- __flush_hugetlb_tlb_range(&vma, saddr, addr, pgsize, true);
+ if (mm == &init_mm)
+ flush_tlb_kernel_range(saddr, addr);
+ else
+ __flush_hugetlb_tlb_range(&vma, saddr, addr, pgsize, true);
}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,