From 55284f70134f01fdc9cc4c4905551cc1f37abd34 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Apr 2025 18:29:02 +0200 Subject: mm: Add vmalloc_huge_node() To enable node specific hash-tables using huge pages if possible. [bigeasy: use __vmalloc_node_range_noprof(), add nommu bits, inline vmalloc_huge] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250416162921.513656-3-bigeasy@linutronix.de --- include/linux/vmalloc.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux/vmalloc.h') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 31e9ffd936e3..de95794777ad 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -168,8 +168,13 @@ void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_m int node, const void *caller) __alloc_size(1); #define __vmalloc_node(...) alloc_hooks(__vmalloc_node_noprof(__VA_ARGS__)) -void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1); -#define vmalloc_huge(...) alloc_hooks(vmalloc_huge_noprof(__VA_ARGS__)) +void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node) __alloc_size(1); +#define vmalloc_huge_node(...) alloc_hooks(vmalloc_huge_node_noprof(__VA_ARGS__)) + +static inline void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) +{ + return vmalloc_huge_node(size, gfp_mask, NUMA_NO_NODE); +} extern void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); #define __vmalloc_array(...) alloc_hooks(__vmalloc_array_noprof(__VA_ARGS__)) -- cgit v1.2.3 From 2fba13371fe80b4d0d533a502e460ce0e936d024 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Tue, 22 Apr 2025 09:18:16 +0100 Subject: mm/vmalloc: Gracefully unmap huge ptes Commit f7ee1f13d606 ("mm/vmalloc: enable mapping of huge pages at pte level in vmap") added its support by reusing the set_huge_pte_at() API, which is otherwise only used for user mappings. But when unmapping those huge ptes, it continued to call ptep_get_and_clear(), which is a layering violation. To date, the only arch to implement this support is powerpc and it all happens to work ok for it. But arm64's implementation of ptep_get_and_clear() can not be safely used to clear a previous set_huge_pte_at(). So let's introduce a new arch opt-in function, arch_vmap_pte_range_unmap_size(), which can provide the size of a (present) pte. Then we can call huge_ptep_get_and_clear() to tear it down properly. Note that if vunmap_range() is called with a range that starts in the middle of a huge pte-mapped page, we must unmap the entire huge page so the behaviour is consistent with pmd and pud block mappings. In this case emit a warning just like we do for pmd/pud mappings. Reviewed-by: Anshuman Khandual Reviewed-by: Uladzislau Rezki (Sony) Reviewed-by: Catalin Marinas Signed-off-by: Ryan Roberts Tested-by: Luiz Capitulino Link: https://lore.kernel.org/r/20250422081822.1836315-9-ryan.roberts@arm.com Signed-off-by: Will Deacon --- include/linux/vmalloc.h | 8 ++++++++ mm/vmalloc.c | 18 ++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'include/linux/vmalloc.h') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 31e9ffd936e3..16dd4cba64f2 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -113,6 +113,14 @@ static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, uns } #endif +#ifndef arch_vmap_pte_range_unmap_size +static inline unsigned long arch_vmap_pte_range_unmap_size(unsigned long addr, + pte_t *ptep) +{ + return PAGE_SIZE; +} +#endif + #ifndef arch_vmap_pte_supported_shift static inline int arch_vmap_pte_supported_shift(unsigned long size) { diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d60d3a29d149..fe2e2cc8da94 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -350,12 +350,26 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pgtbl_mod_mask *mask) { pte_t *pte; + pte_t ptent; + unsigned long size = PAGE_SIZE; pte = pte_offset_kernel(pmd, addr); do { - pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte); +#ifdef CONFIG_HUGETLB_PAGE + size = arch_vmap_pte_range_unmap_size(addr, pte); + if (size != PAGE_SIZE) { + if (WARN_ON(!IS_ALIGNED(addr, size))) { + addr = ALIGN_DOWN(addr, size); + pte = PTR_ALIGN_DOWN(pte, sizeof(*pte) * (size >> PAGE_SHIFT)); + } + ptent = huge_ptep_get_and_clear(&init_mm, addr, pte, size); + if (WARN_ON(end - addr < size)) + size = end - addr; + } else +#endif + ptent = ptep_get_and_clear(&init_mm, addr, pte); WARN_ON(!pte_none(ptent) && !pte_present(ptent)); - } while (pte++, addr += PAGE_SIZE, addr != end); + } while (pte += (size >> PAGE_SHIFT), addr += size, addr != end); *mask |= PGTBL_PTE_MODIFIED; } -- cgit v1.2.3