From a8749a35c39903120ec421ef2525acc8e0daa55c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 8 Mar 2022 04:47:22 -0500 Subject: mm: vmalloc: introduce array allocation functions Linux has dozens of occurrences of vmalloc(array_size()) and vzalloc(array_size()). Allow to simplify the code by providing vmalloc_array and vcalloc, as well as the underscored variants that let the caller specify the GFP flags. Acked-by: Michal Hocko Signed-off-by: Paolo Bonzini --- include/linux/vmalloc.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/vmalloc.h') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 880227b9f044..d1bbd4fd50c5 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -159,6 +159,11 @@ void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller) __alloc_size(1); void *vmalloc_no_huge(unsigned long size) __alloc_size(1); +extern void *__vmalloc_array(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); +extern void *vmalloc_array(size_t n, size_t size) __alloc_size(1, 2); +extern void *__vcalloc(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); +extern void *vcalloc(size_t n, size_t size) __alloc_size(1, 2); + extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); -- cgit v1.2.3 From ff11a7ce1f0f8c1e7870de26860024b4ddbf5755 Mon Sep 17 00:00:00 2001 From: Bang Li Date: Tue, 22 Mar 2022 14:43:02 -0700 Subject: mm/vmalloc: fix comments about vmap_area struct The vmap_area_root should be in the "busy" tree and the free_vmap_area_root should be in the "free" tree. Link: https://lkml.kernel.org/r/20220305011510.33596-1-libang.linuxer@gmail.com Fixes: 688fcbfc06e4 ("mm/vmalloc: modify struct vmap_area to reduce its size") Signed-off-by: Bang Li Reviewed-by: Uladzislau Rezki (Sony) Cc: Pengfei Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/vmalloc.h') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 880227b9f044..05065915edd7 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -80,8 +80,8 @@ struct vmap_area { /* * The following two variables can be packed, because * a vmap_area object can be either: - * 1) in "free" tree (root is vmap_area_root) - * 2) or "busy" tree (root is free_vmap_area_root) + * 1) in "free" tree (root is free_vmap_area_root) + * 2) or "busy" tree (root is vmap_area_root) */ union { unsigned long subtree_max_size; /* in "free" tree */ -- cgit v1.2.3 From 0b7ccc70ee1d5b499d1626c9d28f729507b1c036 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:10:55 -0700 Subject: kasan, vmalloc: drop outdated VM_KASAN comment The comment about VM_KASAN in include/linux/vmalloc.c is outdated. VM_KASAN is currently only used to mark vm_areas allocated for kernel modules when CONFIG_KASAN_VMALLOC is disabled. Drop the comment. Link: https://lkml.kernel.org/r/780395afea83a147b3b5acc36cf2e38f7f8479f9.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Acked-by: Marco Elver Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux/vmalloc.h') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 5a0c3b556848..2ca95c7db463 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -35,17 +35,6 @@ struct notifier_block; /* in notifier.h */ #define VM_DEFER_KMEMLEAK 0 #endif -/* - * VM_KASAN is used slightly differently depending on CONFIG_KASAN_VMALLOC. - * - * If IS_ENABLED(CONFIG_KASAN_VMALLOC), VM_KASAN is set on a vm_struct after - * shadow memory has been mapped. It's used to handle allocation errors so that - * we don't try to poison shadow on free if it was never allocated. - * - * Otherwise, VM_KASAN is set for kasan_module_alloc() allocations and used to - * determine which allocations need the module shadow freed. - */ - /* bits [20..32] reserved for arch specific ioremap internals */ /* -- cgit v1.2.3 From 01d92c7f358ce892279ca830cf6ccf2862a17d1c Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 24 Mar 2022 18:11:16 -0700 Subject: kasan, vmalloc, arm64: mark vmalloc mappings as pgprot_tagged HW_TAGS KASAN relies on ARM Memory Tagging Extension (MTE). With MTE, a memory region must be mapped as MT_NORMAL_TAGGED to allow setting memory tags via MTE-specific instructions. Add proper protection bits to vmalloc() allocations. These allocations are always backed by page_alloc pages, so the tags will actually be getting set on the corresponding physical memory. Link: https://lkml.kernel.org/r/983fc33542db2f6b1e77b34ca23448d4640bbb9e.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Co-developed-by: Vincenzo Frascino Signed-off-by: Vincenzo Frascino Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Mark Rutland Cc: Peter Collingbourne Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/vmalloc.h | 6 ++++++ include/linux/vmalloc.h | 7 +++++++ mm/vmalloc.c | 9 +++++++++ 3 files changed, 22 insertions(+) (limited to 'include/linux/vmalloc.h') diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h index b9185503feae..38fafffe699f 100644 --- a/arch/arm64/include/asm/vmalloc.h +++ b/arch/arm64/include/asm/vmalloc.h @@ -25,4 +25,10 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot) #endif +#define arch_vmap_pgprot_tagged arch_vmap_pgprot_tagged +static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot) +{ + return pgprot_tagged(prot); +} + #endif /* _ASM_ARM64_VMALLOC_H */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 2ca95c7db463..3b1df7da402d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -115,6 +115,13 @@ static inline int arch_vmap_pte_supported_shift(unsigned long size) } #endif +#ifndef arch_vmap_pgprot_tagged +static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot) +{ + return prot; +} +#endif + /* * Highlevel APIs for driver use */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 1ab1f1b2f5b7..8530d86c3e58 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3128,6 +3128,15 @@ again: goto fail; } + /* + * Modify protection bits to allow tagging. + * This must be done before mapping by __vmalloc_area_node(). + */ + if (kasan_hw_tags_enabled() && + pgprot_val(prot) == pgprot_val(PAGE_KERNEL)) + prot = arch_vmap_pgprot_tagged(prot); + + /* Allocate physical pages and map them into vmalloc space. */ addr = __vmalloc_area_node(area, gfp_mask, prot, shift, node); if (!addr) goto fail; -- cgit v1.2.3 From 559089e0a93d44280ec3ab478830af319c56dbe3 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 15 Apr 2022 09:44:10 -0700 Subject: vmalloc: replace VM_NO_HUGE_VMAP with VM_ALLOW_HUGE_VMAP Huge page backed vmalloc memory could benefit performance in many cases. However, some users of vmalloc may not be ready to handle huge pages for various reasons: hardware constraints, potential pages split, etc. VM_NO_HUGE_VMAP was introduced to allow vmalloc users to opt-out huge pages. However, it is not easy to track down all the users that require the opt-out, as the allocation are passed different stacks and may cause issues in different layers. To address this issue, replace VM_NO_HUGE_VMAP with an opt-in flag, VM_ALLOW_HUGE_VMAP, so that users that benefit from huge pages could ask specificially. Also, remove vmalloc_no_huge() and add opt-in helper vmalloc_huge(). Fixes: fac54e2bfb5b ("x86/Kconfig: Select HAVE_ARCH_HUGE_VMALLOC with HAVE_ARCH_HUGE_VMAP") Link: https://lore.kernel.org/netdev/14444103-d51b-0fb3-ee63-c3f182f0b546@molgen.mpg.de/" Reviewed-by: Christoph Hellwig Signed-off-by: Song Liu Reviewed-by: Rik van Riel Signed-off-by: Linus Torvalds --- arch/Kconfig | 6 ++---- arch/powerpc/kernel/module.c | 2 +- arch/s390/kvm/pv.c | 7 +------ include/linux/vmalloc.h | 4 ++-- mm/vmalloc.c | 17 ++++++++++------- 5 files changed, 16 insertions(+), 20 deletions(-) (limited to 'include/linux/vmalloc.h') diff --git a/arch/Kconfig b/arch/Kconfig index 29b0167c088b..31c4fdc4a4ba 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -854,10 +854,8 @@ config HAVE_ARCH_HUGE_VMAP # # Archs that select this would be capable of PMD-sized vmaps (i.e., -# arch_vmap_pmd_supported() returns true), and they must make no assumptions -# that vmalloc memory is mapped with PAGE_SIZE ptes. The VM_NO_HUGE_VMAP flag -# can be used to prohibit arch-specific allocations from using hugepages to -# help with this (e.g., modules may require it). +# arch_vmap_pmd_supported() returns true). The VM_ALLOW_HUGE_VMAP flag +# must be used to enable allocations to use hugepages. # config HAVE_ARCH_HUGE_VMALLOC depends on HAVE_ARCH_HUGE_VMAP diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 40a583e9d3c7..97a76a8619fb 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -101,7 +101,7 @@ __module_alloc(unsigned long size, unsigned long start, unsigned long end, bool * too. */ return __vmalloc_node_range(size, 1, start, end, gfp, prot, - VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP, + VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 7f7c0d6af2ce..cc7c9599f43e 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -137,12 +137,7 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm) /* Allocate variable storage */ vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE); vlen += uv_info.guest_virt_base_stor_len; - /* - * The Create Secure Configuration Ultravisor Call does not support - * using large pages for the virtual memory area. - * This is a hardware limitation. - */ - kvm->arch.pv.stor_var = vmalloc_no_huge(vlen); + kvm->arch.pv.stor_var = vzalloc(vlen); if (!kvm->arch.pv.stor_var) goto out_err; return 0; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 3b1df7da402d..b159c2789961 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -26,7 +26,7 @@ struct notifier_block; /* in notifier.h */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ #define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */ #define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ -#define VM_NO_HUGE_VMAP 0x00000400 /* force PAGE_SIZE pte mapping */ +#define VM_ALLOW_HUGE_VMAP 0x00000400 /* Allow for huge pages on archs with HAVE_ARCH_HUGE_VMALLOC */ #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ !defined(CONFIG_KASAN_VMALLOC) @@ -153,7 +153,7 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, const void *caller) __alloc_size(1); void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller) __alloc_size(1); -void *vmalloc_no_huge(unsigned long size) __alloc_size(1); +void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __alloc_size(1); extern void *__vmalloc_array(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); extern void *vmalloc_array(size_t n, size_t size) __alloc_size(1, 2); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 0b17498a34f1..07da85ae825b 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3095,7 +3095,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, return NULL; } - if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP)) { + if (vmap_allow_huge && (vm_flags & VM_ALLOW_HUGE_VMAP)) { unsigned long size_per_node; /* @@ -3262,21 +3262,24 @@ void *vmalloc(unsigned long size) EXPORT_SYMBOL(vmalloc); /** - * vmalloc_no_huge - allocate virtually contiguous memory using small pages - * @size: allocation size + * vmalloc_huge - allocate virtually contiguous memory, allow huge pages + * @size: allocation size + * @gfp_mask: flags for the page level allocator * - * Allocate enough non-huge pages to cover @size from the page level + * Allocate enough pages to cover @size from the page level * allocator and map them into contiguous kernel virtual space. + * If @size is greater than or equal to PMD_SIZE, allow using + * huge pages for the memory * * Return: pointer to the allocated memory or %NULL on error */ -void *vmalloc_no_huge(unsigned long size) +void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) { return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL, VM_NO_HUGE_VMAP, + gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP, NUMA_NO_NODE, __builtin_return_address(0)); } -EXPORT_SYMBOL(vmalloc_no_huge); +EXPORT_SYMBOL_GPL(vmalloc_huge); /** * vzalloc - allocate virtually contiguous memory with zero fill -- cgit v1.2.3 From 993d0b287e2ef7bee2e8b13b0ce4d2b5066f278e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 12 Jun 2022 22:32:25 +0100 Subject: usercopy: Handle vm_map_ram() areas vmalloc does not allocate a vm_struct for vm_map_ram() areas. That causes us to deny usercopies from those areas. This affects XFS which uses vm_map_ram() for its directories. Fix this by calling find_vmap_area() instead of find_vm_area(). Fixes: 0aef499f3172 ("mm/usercopy: Detect vmalloc overruns") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Uladzislau Rezki (Sony) Tested-by: Zorro Lang Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220612213227.3881769-2-willy@infradead.org --- include/linux/vmalloc.h | 1 + mm/usercopy.c | 10 ++++------ mm/vmalloc.c | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux/vmalloc.h') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index b159c2789961..096d48aa3437 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -215,6 +215,7 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size, void free_vm_area(struct vm_struct *area); extern struct vm_struct *remove_vm_area(const void *addr); extern struct vm_struct *find_vm_area(const void *addr); +struct vmap_area *find_vmap_area(unsigned long addr); static inline bool is_vm_area_hugepages(const void *addr) { diff --git a/mm/usercopy.c b/mm/usercopy.c index baeacc735b83..cd4b41d9bf76 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -173,16 +173,14 @@ static inline void check_heap_object(const void *ptr, unsigned long n, } if (is_vmalloc_addr(ptr)) { - struct vm_struct *area = find_vm_area(ptr); + struct vmap_area *area = find_vmap_area((unsigned long)ptr); unsigned long offset; - if (!area) { + if (!area) usercopy_abort("vmalloc", "no area", to_user, 0, n); - return; - } - offset = ptr - area->addr; - if (offset + n > get_vm_area_size(area)) + offset = (unsigned long)ptr - area->va_start; + if ((unsigned long)ptr + n > area->va_end) usercopy_abort("vmalloc", NULL, to_user, offset, n); return; } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 07db42455dd4..effd1ff6a4b4 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1798,7 +1798,7 @@ static void free_unmap_vmap_area(struct vmap_area *va) free_vmap_area_noflush(va); } -static struct vmap_area *find_vmap_area(unsigned long addr) +struct vmap_area *find_vmap_area(unsigned long addr) { struct vmap_area *va; -- cgit v1.2.3