From a473573964e51dcb6efc182f773cd3924be4a184 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 21 Mar 2024 09:36:33 -0700 Subject: lib: code tagging module support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for code tagging from dynamically loaded modules. Link: https://lkml.kernel.org/r/20240321163705.3067592-12-surenb@google.com Signed-off-by: Suren Baghdasaryan Co-developed-by: Kent Overstreet Signed-off-by: Kent Overstreet Tested-by: Kees Cook Cc: Alexander Viro Cc: Alex Gaynor Cc: Alice Ryhl Cc: Andreas Hindborg Cc: Benno Lossin Cc: "Björn Roy Baron" Cc: Boqun Feng Cc: Christoph Lameter Cc: Dennis Zhou Cc: Gary Guo Cc: Miguel Ojeda Cc: Pasha Tatashin Cc: Peter Zijlstra Cc: Tejun Heo Cc: Vlastimil Babka Cc: Wedson Almeida Filho Signed-off-by: Andrew Morton --- kernel/module/main.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/module/main.c') diff --git a/kernel/module/main.c b/kernel/module/main.c index e1e8a7a9d6c1..ffa6b3e9cb43 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -1242,6 +1243,7 @@ static void free_module(struct module *mod) { trace_module_free(mod); + codetag_unload_module(mod); mod_sysfs_teardown(mod); /* @@ -2995,6 +2997,8 @@ static int load_module(struct load_info *info, const char __user *uargs, /* Get rid of temporary copy. */ free_copy(info, flags); + codetag_load_module(mod); + /* Done! */ trace_module_load(mod); -- cgit v1.2.3 From 47a92dfbe01f41bcbf359250ccb3caa589763abf Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 21 Mar 2024 09:36:34 -0700 Subject: lib: prevent module unloading if memory is not freed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skip freeing module's data section if there are non-zero allocation tags because otherwise, once these allocations are freed, the access to their code tag would cause UAF. Link: https://lkml.kernel.org/r/20240321163705.3067592-13-surenb@google.com Signed-off-by: Suren Baghdasaryan Tested-by: Kees Cook Cc: Alexander Viro Cc: Alex Gaynor Cc: Alice Ryhl Cc: Andreas Hindborg Cc: Benno Lossin Cc: "Björn Roy Baron" Cc: Boqun Feng Cc: Christoph Lameter Cc: Dennis Zhou Cc: Gary Guo Cc: Kent Overstreet Cc: Miguel Ojeda Cc: Pasha Tatashin Cc: Peter Zijlstra Cc: Tejun Heo Cc: Vlastimil Babka Cc: Wedson Almeida Filho Signed-off-by: Andrew Morton --- include/linux/codetag.h | 6 +++--- kernel/module/main.c | 27 +++++++++++++++++++-------- lib/codetag.c | 11 ++++++++--- 3 files changed, 30 insertions(+), 14 deletions(-) (limited to 'kernel/module/main.c') diff --git a/include/linux/codetag.h b/include/linux/codetag.h index c44f5b83f24d..bfd0ba5c4185 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -35,7 +35,7 @@ struct codetag_type_desc { size_t tag_size; void (*module_load)(struct codetag_type *cttype, struct codetag_module *cmod); - void (*module_unload)(struct codetag_type *cttype, + bool (*module_unload)(struct codetag_type *cttype, struct codetag_module *cmod); }; @@ -71,10 +71,10 @@ codetag_register_type(const struct codetag_type_desc *desc); #if defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) void codetag_load_module(struct module *mod); -void codetag_unload_module(struct module *mod); +bool codetag_unload_module(struct module *mod); #else static inline void codetag_load_module(struct module *mod) {} -static inline void codetag_unload_module(struct module *mod) {} +static inline bool codetag_unload_module(struct module *mod) { return true; } #endif #endif /* _LINUX_CODETAG_H */ diff --git a/kernel/module/main.c b/kernel/module/main.c index ffa6b3e9cb43..2d25eebc549d 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1211,15 +1211,19 @@ static void *module_memory_alloc(unsigned int size, enum mod_mem_type type) return module_alloc(size); } -static void module_memory_free(void *ptr, enum mod_mem_type type) +static void module_memory_free(void *ptr, enum mod_mem_type type, + bool unload_codetags) { + if (!unload_codetags && mod_mem_type_is_core_data(type)) + return; + if (mod_mem_use_vmalloc(type)) vfree(ptr); else module_memfree(ptr); } -static void free_mod_mem(struct module *mod) +static void free_mod_mem(struct module *mod, bool unload_codetags) { for_each_mod_mem_type(type) { struct module_memory *mod_mem = &mod->mem[type]; @@ -1230,20 +1234,27 @@ static void free_mod_mem(struct module *mod) /* Free lock-classes; relies on the preceding sync_rcu(). */ lockdep_free_key_range(mod_mem->base, mod_mem->size); if (mod_mem->size) - module_memory_free(mod_mem->base, type); + module_memory_free(mod_mem->base, type, + unload_codetags); } /* MOD_DATA hosts mod, so free it at last */ lockdep_free_key_range(mod->mem[MOD_DATA].base, mod->mem[MOD_DATA].size); - module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA); + module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA, unload_codetags); } /* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { + bool unload_codetags; + trace_module_free(mod); - codetag_unload_module(mod); + unload_codetags = codetag_unload_module(mod); + if (!unload_codetags) + pr_warn("%s: memory allocation(s) from the module still alive, cannot unload cleanly\n", + mod->name); + mod_sysfs_teardown(mod); /* @@ -1285,7 +1296,7 @@ static void free_module(struct module *mod) kfree(mod->args); percpu_modfree(mod); - free_mod_mem(mod); + free_mod_mem(mod, unload_codetags); } void *__symbol_get(const char *symbol) @@ -2298,7 +2309,7 @@ static int move_module(struct module *mod, struct load_info *info) return 0; out_enomem: for (t--; t >= 0; t--) - module_memory_free(mod->mem[t].base, t); + module_memory_free(mod->mem[t].base, t, true); return ret; } @@ -2428,7 +2439,7 @@ static void module_deallocate(struct module *mod, struct load_info *info) percpu_modfree(mod); module_arch_freeing_init(mod); - free_mod_mem(mod); + free_mod_mem(mod, true); } int __weak module_finalize(const Elf_Ehdr *hdr, diff --git a/lib/codetag.c b/lib/codetag.c index 54d2828eba25..408062f722ce 100644 --- a/lib/codetag.c +++ b/lib/codetag.c @@ -5,6 +5,7 @@ #include #include #include +#include struct codetag_type { struct list_head link; @@ -206,12 +207,13 @@ void codetag_load_module(struct module *mod) mutex_unlock(&codetag_lock); } -void codetag_unload_module(struct module *mod) +bool codetag_unload_module(struct module *mod) { struct codetag_type *cttype; + bool unload_ok = true; if (!mod) - return; + return true; mutex_lock(&codetag_lock); list_for_each_entry(cttype, &codetag_types, link) { @@ -228,7 +230,8 @@ void codetag_unload_module(struct module *mod) } if (found) { if (cttype->desc.module_unload) - cttype->desc.module_unload(cttype, cmod); + if (!cttype->desc.module_unload(cttype, cmod)) + unload_ok = false; cttype->count -= range_size(cttype, &cmod->range); idr_remove(&cttype->mod_idr, mod_id); @@ -237,6 +240,8 @@ void codetag_unload_module(struct module *mod) up_write(&cttype->mod_lock); } mutex_unlock(&codetag_lock); + + return unload_ok; } #else /* CONFIG_MODULES */ -- cgit v1.2.3 From bc6b94d3ea062454ca889884db99e145efffcb93 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Sun, 5 May 2024 19:06:17 +0300 Subject: module: make module_memory_{alloc,free} more self-contained MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the logic related to the memory allocation and freeing into module_memory_alloc() and module_memory_free(). Signed-off-by: Mike Rapoport (IBM) Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Masami Hiramatsu (Google) Acked-by: Song Liu Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 64 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 25 deletions(-) (limited to 'kernel/module/main.c') diff --git a/kernel/module/main.c b/kernel/module/main.c index e1e8a7a9d6c1..5b82b069e0d3 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1203,15 +1203,44 @@ static bool mod_mem_use_vmalloc(enum mod_mem_type type) mod_mem_type_is_core_data(type); } -static void *module_memory_alloc(unsigned int size, enum mod_mem_type type) +static int module_memory_alloc(struct module *mod, enum mod_mem_type type) { + unsigned int size = PAGE_ALIGN(mod->mem[type].size); + void *ptr; + + mod->mem[type].size = size; + if (mod_mem_use_vmalloc(type)) - return vzalloc(size); - return module_alloc(size); + ptr = vmalloc(size); + else + ptr = module_alloc(size); + + if (!ptr) + return -ENOMEM; + + /* + * The pointer to these blocks of memory are stored on the module + * structure and we keep that around so long as the module is + * around. We only free that memory when we unload the module. + * Just mark them as not being a leak then. The .init* ELF + * sections *do* get freed after boot so we *could* treat them + * slightly differently with kmemleak_ignore() and only grey + * them out as they work as typical memory allocations which + * *do* eventually get freed, but let's just keep things simple + * and avoid *any* false positives. + */ + kmemleak_not_leak(ptr); + + memset(ptr, 0, size); + mod->mem[type].base = ptr; + + return 0; } -static void module_memory_free(void *ptr, enum mod_mem_type type) +static void module_memory_free(struct module *mod, enum mod_mem_type type) { + void *ptr = mod->mem[type].base; + if (mod_mem_use_vmalloc(type)) vfree(ptr); else @@ -1229,12 +1258,12 @@ static void free_mod_mem(struct module *mod) /* Free lock-classes; relies on the preceding sync_rcu(). */ lockdep_free_key_range(mod_mem->base, mod_mem->size); if (mod_mem->size) - module_memory_free(mod_mem->base, type); + module_memory_free(mod, type); } /* MOD_DATA hosts mod, so free it at last */ lockdep_free_key_range(mod->mem[MOD_DATA].base, mod->mem[MOD_DATA].size); - module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA); + module_memory_free(mod, MOD_DATA); } /* Free a module, remove from lists, etc. */ @@ -2225,7 +2254,6 @@ static int find_module_sections(struct module *mod, struct load_info *info) static int move_module(struct module *mod, struct load_info *info) { int i; - void *ptr; enum mod_mem_type t = 0; int ret = -ENOMEM; @@ -2234,26 +2262,12 @@ static int move_module(struct module *mod, struct load_info *info) mod->mem[type].base = NULL; continue; } - mod->mem[type].size = PAGE_ALIGN(mod->mem[type].size); - ptr = module_memory_alloc(mod->mem[type].size, type); - /* - * The pointer to these blocks of memory are stored on the module - * structure and we keep that around so long as the module is - * around. We only free that memory when we unload the module. - * Just mark them as not being a leak then. The .init* ELF - * sections *do* get freed after boot so we *could* treat them - * slightly differently with kmemleak_ignore() and only grey - * them out as they work as typical memory allocations which - * *do* eventually get freed, but let's just keep things simple - * and avoid *any* false positives. - */ - kmemleak_not_leak(ptr); - if (!ptr) { + + ret = module_memory_alloc(mod, type); + if (ret) { t = type; goto out_enomem; } - memset(ptr, 0, mod->mem[type].size); - mod->mem[type].base = ptr; } /* Transfer each section which specifies SHF_ALLOC */ @@ -2296,7 +2310,7 @@ static int move_module(struct module *mod, struct load_info *info) return 0; out_enomem: for (t--; t >= 0; t--) - module_memory_free(mod->mem[t].base, t); + module_memory_free(mod, t); return ret; } -- cgit v1.2.3 From 12af2b83d0b17ec8b379b721dd4a8fbcd5d791f3 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Sun, 5 May 2024 19:06:18 +0300 Subject: mm: introduce execmem_alloc() and execmem_free() module_alloc() is used everywhere as a mean to allocate memory for code. Beside being semantically wrong, this unnecessarily ties all subsystems that need to allocate code, such as ftrace, kprobes and BPF to modules and puts the burden of code allocation to the modules code. Several architectures override module_alloc() because of various constraints where the executable memory can be located and this causes additional obstacles for improvements of code allocation. Start splitting code allocation from modules by introducing execmem_alloc() and execmem_free() APIs. Initially, execmem_alloc() is a wrapper for module_alloc() and execmem_free() is a replacement of module_memfree() to allow updating all call sites to use the new APIs. Since architectures define different restrictions on placement, permissions, alignment and other parameters for memory that can be used by different subsystems that allocate executable memory, execmem_alloc() takes a type argument, that will be used to identify the calling subsystem and to allow architectures define parameters for ranges suitable for that subsystem. No functional changes. Signed-off-by: Mike Rapoport (IBM) Acked-by: Masami Hiramatsu (Google) Acked-by: Song Liu Acked-by: Steven Rostedt (Google) Signed-off-by: Luis Chamberlain --- arch/powerpc/kernel/kprobes.c | 6 ++--- arch/s390/kernel/ftrace.c | 4 +-- arch/s390/kernel/kprobes.c | 4 +-- arch/s390/kernel/module.c | 5 ++-- arch/sparc/net/bpf_jit_comp_32.c | 8 +++--- arch/x86/kernel/ftrace.c | 6 ++--- arch/x86/kernel/kprobes/core.c | 4 +-- include/linux/execmem.h | 57 ++++++++++++++++++++++++++++++++++++++++ include/linux/moduleloader.h | 3 --- kernel/bpf/core.c | 6 ++--- kernel/kprobes.c | 8 +++--- kernel/module/Kconfig | 1 + kernel/module/main.c | 25 ++++++------------ mm/Kconfig | 3 +++ mm/Makefile | 1 + mm/execmem.c | 32 ++++++++++++++++++++++ 16 files changed, 128 insertions(+), 45 deletions(-) create mode 100644 include/linux/execmem.h create mode 100644 mm/execmem.c (limited to 'kernel/module/main.c') diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index bbca90a5e2ec..9fcd01bb2ce6 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -19,8 +19,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -130,7 +130,7 @@ void *alloc_insn_page(void) { void *page; - page = module_alloc(PAGE_SIZE); + page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); if (!page) return NULL; @@ -142,7 +142,7 @@ void *alloc_insn_page(void) } return page; error: - module_memfree(page); + execmem_free(page); return NULL; } diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index c46381ea04ec..798249ef5646 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -7,13 +7,13 @@ * Author(s): Martin Schwidefsky */ -#include #include #include #include #include #include #include +#include #include #include #include @@ -220,7 +220,7 @@ static int __init ftrace_plt_init(void) { const char *start, *end; - ftrace_plt = module_alloc(PAGE_SIZE); + ftrace_plt = execmem_alloc(EXECMEM_FTRACE, PAGE_SIZE); if (!ftrace_plt) panic("cannot allocate ftrace plt\n"); diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index f0cf20d4b3c5..3c1b1be744de 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -9,7 +9,6 @@ #define pr_fmt(fmt) "kprobes: " fmt -#include #include #include #include @@ -21,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -38,7 +38,7 @@ void *alloc_insn_page(void) { void *page; - page = module_alloc(PAGE_SIZE); + page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); if (!page) return NULL; set_memory_rox((unsigned long)page, 1); diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 42215f9404af..ac97a905e8cd 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -76,7 +77,7 @@ void *module_alloc(unsigned long size) #ifdef CONFIG_FUNCTION_TRACER void module_arch_cleanup(struct module *mod) { - module_memfree(mod->arch.trampolines_start); + execmem_free(mod->arch.trampolines_start); } #endif @@ -510,7 +511,7 @@ static int module_alloc_ftrace_hotpatch_trampolines(struct module *me, size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size); numpages = DIV_ROUND_UP(size, PAGE_SIZE); - start = module_alloc(numpages * PAGE_SIZE); + start = execmem_alloc(EXECMEM_FTRACE, numpages * PAGE_SIZE); if (!start) return -ENOMEM; set_memory_rox((unsigned long)start, numpages); diff --git a/arch/sparc/net/bpf_jit_comp_32.c b/arch/sparc/net/bpf_jit_comp_32.c index da2df1e84ed4..bda2dbd3f4c5 100644 --- a/arch/sparc/net/bpf_jit_comp_32.c +++ b/arch/sparc/net/bpf_jit_comp_32.c @@ -1,10 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 -#include #include #include #include #include #include +#include #include #include @@ -713,7 +713,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; if (unlikely(proglen + ilen > oldproglen)) { pr_err("bpb_jit_compile fatal error\n"); kfree(addrs); - module_memfree(image); + execmem_free(image); return; } memcpy(image + proglen, temp, ilen); @@ -736,7 +736,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; break; } if (proglen == oldproglen) { - image = module_alloc(proglen); + image = execmem_alloc(EXECMEM_BPF, proglen); if (!image) goto out; } @@ -758,7 +758,7 @@ out: void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) - module_memfree(fp->bpf_func); + execmem_free(fp->bpf_func); bpf_prog_unlock_free(fp); } diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 70139d9d2e01..c8ddb7abda7c 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -261,15 +262,14 @@ void arch_ftrace_update_code(int command) #ifdef CONFIG_X86_64 #ifdef CONFIG_MODULES -#include /* Module allocation simplifies allocating memory for code */ static inline void *alloc_tramp(unsigned long size) { - return module_alloc(size); + return execmem_alloc(EXECMEM_FTRACE, size); } static inline void tramp_free(void *tramp) { - module_memfree(tramp); + execmem_free(tramp); } #else /* Trampolines can only be created if modules are supported */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index d0e49bd7c6f3..72e6a45e7ec2 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -40,12 +40,12 @@ #include #include #include -#include #include #include #include #include #include +#include #include #include @@ -495,7 +495,7 @@ void *alloc_insn_page(void) { void *page; - page = module_alloc(PAGE_SIZE); + page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); if (!page) return NULL; diff --git a/include/linux/execmem.h b/include/linux/execmem.h new file mode 100644 index 000000000000..8eebc8ef66e7 --- /dev/null +++ b/include/linux/execmem.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_EXECMEM_ALLOC_H +#define _LINUX_EXECMEM_ALLOC_H + +#include +#include + +/** + * enum execmem_type - types of executable memory ranges + * + * There are several subsystems that allocate executable memory. + * Architectures define different restrictions on placement, + * permissions, alignment and other parameters for memory that can be used + * by these subsystems. + * Types in this enum identify subsystems that allocate executable memory + * and let architectures define parameters for ranges suitable for + * allocations by each subsystem. + * + * @EXECMEM_DEFAULT: default parameters that would be used for types that + * are not explicitly defined. + * @EXECMEM_MODULE_TEXT: parameters for module text sections + * @EXECMEM_KPROBES: parameters for kprobes + * @EXECMEM_FTRACE: parameters for ftrace + * @EXECMEM_BPF: parameters for BPF + * @EXECMEM_TYPE_MAX: + */ +enum execmem_type { + EXECMEM_DEFAULT, + EXECMEM_MODULE_TEXT = EXECMEM_DEFAULT, + EXECMEM_KPROBES, + EXECMEM_FTRACE, + EXECMEM_BPF, + EXECMEM_TYPE_MAX, +}; + +/** + * execmem_alloc - allocate executable memory + * @type: type of the allocation + * @size: how many bytes of memory are required + * + * Allocates memory that will contain executable code, either generated or + * loaded from kernel modules. + * + * The memory will have protections defined by architecture for executable + * region of the @type. + * + * Return: a pointer to the allocated memory or %NULL + */ +void *execmem_alloc(enum execmem_type type, size_t size); + +/** + * execmem_free - free executable memory + * @ptr: pointer to the memory that should be freed + */ +void execmem_free(void *ptr); + +#endif /* _LINUX_EXECMEM_ALLOC_H */ diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 89b1e0ed9811..a3b8caee9405 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -29,9 +29,6 @@ unsigned int arch_mod_section_prepend(struct module *mod, unsigned int section); sections. Returns NULL on failure. */ void *module_alloc(unsigned long size); -/* Free memory returned from module_alloc. */ -void module_memfree(void *module_region); - /* Determines if the section name is an init section (that is only used during * module loading). */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1ea5ce5bb599..892e50afda59 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -37,6 +36,7 @@ #include #include #include +#include #include #include @@ -1050,12 +1050,12 @@ void bpf_jit_uncharge_modmem(u32 size) void *__weak bpf_jit_alloc_exec(unsigned long size) { - return module_alloc(size); + return execmem_alloc(EXECMEM_BPF, size); } void __weak bpf_jit_free_exec(void *addr) { - module_memfree(addr); + execmem_free(addr); } struct bpf_binary_header * diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 65adc815fc6e..ddd7cdc16edf 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -39,6 +38,7 @@ #include #include #include +#include #include #include @@ -113,17 +113,17 @@ enum kprobe_slot_state { void __weak *alloc_insn_page(void) { /* - * Use module_alloc() so this page is within +/- 2GB of where the + * Use execmem_alloc() so this page is within +/- 2GB of where the * kernel image and loaded module images reside. This is required * for most of the architectures. * (e.g. x86-64 needs this to handle the %rip-relative fixups.) */ - return module_alloc(PAGE_SIZE); + return execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); } static void free_insn_page(void *page) { - module_memfree(page); + execmem_free(page); } struct kprobe_insn_cache kprobe_insn_slots = { diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index cb8377a18927..4047b6d48255 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -2,6 +2,7 @@ menuconfig MODULES bool "Enable loadable module support" modules + select EXECMEM help Kernel modules are small pieces of compiled code which can be inserted in the running kernel, rather than being diff --git a/kernel/module/main.c b/kernel/module/main.c index 5b82b069e0d3..d56b7df0cbb6 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include "internal.h" @@ -1179,16 +1180,6 @@ resolve_symbol_wait(struct module *mod, return ksym; } -void __weak module_memfree(void *module_region) -{ - /* - * This memory may be RO, and freeing RO memory in an interrupt is not - * supported by vmalloc. - */ - WARN_ON(in_interrupt()); - vfree(module_region); -} - void __weak module_arch_cleanup(struct module *mod) { } @@ -1213,7 +1204,7 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type) if (mod_mem_use_vmalloc(type)) ptr = vmalloc(size); else - ptr = module_alloc(size); + ptr = execmem_alloc(EXECMEM_MODULE_TEXT, size); if (!ptr) return -ENOMEM; @@ -1244,7 +1235,7 @@ static void module_memory_free(struct module *mod, enum mod_mem_type type) if (mod_mem_use_vmalloc(type)) vfree(ptr); else - module_memfree(ptr); + execmem_free(ptr); } static void free_mod_mem(struct module *mod) @@ -2496,9 +2487,9 @@ static void do_free_init(struct work_struct *w) llist_for_each_safe(pos, n, list) { initfree = container_of(pos, struct mod_initfree, node); - module_memfree(initfree->init_text); - module_memfree(initfree->init_data); - module_memfree(initfree->init_rodata); + execmem_free(initfree->init_text); + execmem_free(initfree->init_data); + execmem_free(initfree->init_rodata); kfree(initfree); } } @@ -2608,10 +2599,10 @@ static noinline int do_init_module(struct module *mod) * We want to free module_init, but be aware that kallsyms may be * walking this with preempt disabled. In all the failure paths, we * call synchronize_rcu(), but we don't want to slow down the success - * path. module_memfree() cannot be called in an interrupt, so do the + * path. execmem_free() cannot be called in an interrupt, so do the * work and call synchronize_rcu() in a work queue. * - * Note that module_alloc() on most architectures creates W+X page + * Note that execmem_alloc() on most architectures creates W+X page * mappings which won't be cleaned up until do_free_init() runs. Any * code such as mark_rodata_ro() which depends on those mappings to * be cleaned up needs to sync with the queued work by invoking diff --git a/mm/Kconfig b/mm/Kconfig index f30a18a0e37d..8bb60fdaba3c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1240,6 +1240,9 @@ config LOCK_MM_AND_FIND_VMA config IOMMU_MM_DATA bool +config EXECMEM + bool + source "mm/damon/Kconfig" endmenu diff --git a/mm/Makefile b/mm/Makefile index 4abb40b911ec..001336c91864 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -133,3 +133,4 @@ obj-$(CONFIG_IO_MAPPING) += io-mapping.o obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o +obj-$(CONFIG_EXECMEM) += execmem.o diff --git a/mm/execmem.c b/mm/execmem.c new file mode 100644 index 000000000000..480adc69b20d --- /dev/null +++ b/mm/execmem.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2002 Richard Henderson + * Copyright (C) 2001 Rusty Russell, 2002, 2010 Rusty Russell IBM. + * Copyright (C) 2023 Luis Chamberlain + * Copyright (C) 2024 Mike Rapoport IBM. + */ + +#include +#include +#include +#include + +static void *__execmem_alloc(size_t size) +{ + return module_alloc(size); +} + +void *execmem_alloc(enum execmem_type type, size_t size) +{ + return __execmem_alloc(size); +} + +void execmem_free(void *ptr) +{ + /* + * This memory may be RO, and freeing RO memory in an interrupt is not + * supported by vmalloc. + */ + WARN_ON(in_interrupt()); + vfree(ptr); +} -- cgit v1.2.3 From 223b5e57d0d50b0c07b933350dbcde92018d3080 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Sun, 5 May 2024 19:06:20 +0300 Subject: mm/execmem, arch: convert remaining overrides of module_alloc to execmem Extend execmem parameters to accommodate more complex overrides of module_alloc() by architectures. This includes specification of a fallback range required by arm, arm64 and powerpc, EXECMEM_MODULE_DATA type required by powerpc, support for allocation of KASAN shadow required by s390 and x86 and support for late initialization of execmem required by arm64. The core implementation of execmem_alloc() takes care of suppressing warnings when the initial allocation fails but there is a fallback range defined. Signed-off-by: Mike Rapoport (IBM) Acked-by: Will Deacon Acked-by: Song Liu Tested-by: Liviu Dudau Signed-off-by: Luis Chamberlain --- arch/Kconfig | 8 +++++ arch/arm/kernel/module.c | 41 ++++++++++++++---------- arch/arm64/Kconfig | 1 + arch/arm64/kernel/module.c | 55 ++++++++++++++++++-------------- arch/powerpc/kernel/module.c | 60 ++++++++++++++++++++++------------- arch/s390/kernel/module.c | 54 +++++++++++++------------------- arch/x86/kernel/module.c | 70 ++++++++++++++--------------------------- include/linux/execmem.h | 30 +++++++++++++++++- include/linux/moduleloader.h | 12 ------- kernel/module/main.c | 26 ++++------------ mm/execmem.c | 74 +++++++++++++++++++++++++++++++++++++------- 11 files changed, 246 insertions(+), 185 deletions(-) (limited to 'kernel/module/main.c') diff --git a/arch/Kconfig b/arch/Kconfig index 93404c802d29..ee1ccbde50ef 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -977,6 +977,14 @@ config ARCH_WANTS_MODULES_DATA_IN_VMALLOC For architectures like powerpc/32 which have constraints on module allocation and need to allocate module data outside of module area. +config ARCH_WANTS_EXECMEM_LATE + bool + help + For architectures that do not allocate executable memory early on + boot, but rather require its initialization late when there is + enough entropy for module space randomization, for instance + arm64. + config HAVE_IRQ_EXIT_ON_IRQ_STACK bool help diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index e74d84f58b77..a98fdf6ff26c 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -34,23 +35,31 @@ #endif #ifdef CONFIG_MMU -void *module_alloc(unsigned long size) +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) { - gfp_t gfp_mask = GFP_KERNEL; - void *p; - - /* Silence the initial allocation */ - if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) - gfp_mask |= __GFP_NOWARN; - - p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, - __builtin_return_address(0)); - if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p) - return p; - return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, - __builtin_return_address(0)); + unsigned long fallback_start = 0, fallback_end = 0; + + if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) { + fallback_start = VMALLOC_START; + fallback_end = VMALLOC_END; + } + + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .start = MODULES_VADDR, + .end = MODULES_END, + .pgprot = PAGE_KERNEL_EXEC, + .alignment = 1, + .fallback_start = fallback_start, + .fallback_end = fallback_end, + }, + }, + }; + + return &execmem_info; } #endif diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7b11c98b3e84..74b34a78b7ac 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -105,6 +105,7 @@ config ARM64 select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) select ARCH_WANT_LD_ORPHAN_WARN + select ARCH_WANTS_EXECMEM_LATE if EXECMEM select ARCH_WANTS_NO_INSTR select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES select ARCH_HAS_UBSAN diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index e92da4da1b2a..b7a7a23f9f8f 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -108,41 +109,47 @@ static int __init module_init_limits(void) return 0; } -subsys_initcall(module_init_limits); -void *module_alloc(unsigned long size) +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) { - void *p = NULL; + unsigned long fallback_start = 0, fallback_end = 0; + unsigned long start = 0, end = 0; + + module_init_limits(); /* * Where possible, prefer to allocate within direct branch range of the * kernel such that no PLTs are necessary. */ if (module_direct_base) { - p = __vmalloc_node_range(size, MODULE_ALIGN, - module_direct_base, - module_direct_base + SZ_128M, - GFP_KERNEL | __GFP_NOWARN, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); - } + start = module_direct_base; + end = module_direct_base + SZ_128M; - if (!p && module_plt_base) { - p = __vmalloc_node_range(size, MODULE_ALIGN, - module_plt_base, - module_plt_base + SZ_2G, - GFP_KERNEL | __GFP_NOWARN, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); - } - - if (!p) { - pr_warn_ratelimited("%s: unable to allocate memory\n", - __func__); + if (module_plt_base) { + fallback_start = module_plt_base; + fallback_end = module_plt_base + SZ_2G; + } + } else if (module_plt_base) { + start = module_plt_base; + end = module_plt_base + SZ_2G; } - /* Memory is intended to be executable, reset the pointer tag. */ - return kasan_reset_tag(p); + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .start = start, + .end = end, + .pgprot = PAGE_KERNEL, + .alignment = 1, + .fallback_start = fallback_start, + .fallback_end = fallback_end, + }, + }, + }; + + return &execmem_info; } enum aarch64_reloc_op { diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index f6d6ae0a1692..ac80559015a3 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -89,39 +90,56 @@ int module_finalize(const Elf_Ehdr *hdr, return 0; } -static __always_inline void * -__module_alloc(unsigned long size, unsigned long start, unsigned long end, bool nowarn) +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) { pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC; - gfp_t gfp = GFP_KERNEL | (nowarn ? __GFP_NOWARN : 0); + unsigned long fallback_start = 0, fallback_end = 0; + unsigned long start, end; /* - * Don't do huge page allocations for modules yet until more testing - * is done. STRICT_MODULE_RWX may require extra work to support this - * too. + * BOOK3S_32 and 8xx define MODULES_VADDR for text allocations and + * allow allocating data in the entire vmalloc space */ - return __vmalloc_node_range(size, 1, start, end, gfp, prot, - VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, __builtin_return_address(0)); -} - -void *module_alloc(unsigned long size) -{ #ifdef MODULES_VADDR unsigned long limit = (unsigned long)_etext - SZ_32M; - void *ptr = NULL; BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); /* First try within 32M limit from _etext to avoid branch trampolines */ - if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) - ptr = __module_alloc(size, limit, MODULES_END, true); - - if (!ptr) - ptr = __module_alloc(size, MODULES_VADDR, MODULES_END, false); + if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) { + start = limit; + fallback_start = MODULES_VADDR; + fallback_end = MODULES_END; + } else { + start = MODULES_VADDR; + } - return ptr; + end = MODULES_END; #else - return __module_alloc(size, VMALLOC_START, VMALLOC_END, false); + start = VMALLOC_START; + end = VMALLOC_END; #endif + + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .start = start, + .end = end, + .pgprot = prot, + .alignment = 1, + .fallback_start = fallback_start, + .fallback_end = fallback_end, + }, + [EXECMEM_MODULE_DATA] = { + .start = VMALLOC_START, + .end = VMALLOC_END, + .pgprot = PAGE_KERNEL, + .alignment = 1, + }, + }, + }; + + return &execmem_info; } diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index ac97a905e8cd..7fee64fdc1bb 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -37,41 +37,31 @@ #define PLT_ENTRY_SIZE 22 -static unsigned long get_module_load_offset(void) +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) { - static DEFINE_MUTEX(module_kaslr_mutex); - static unsigned long module_load_offset; - - if (!kaslr_enabled()) - return 0; - /* - * Calculate the module_load_offset the first time this code - * is called. Once calculated it stays the same until reboot. - */ - mutex_lock(&module_kaslr_mutex); - if (!module_load_offset) + unsigned long module_load_offset = 0; + unsigned long start; + + if (kaslr_enabled()) module_load_offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE; - mutex_unlock(&module_kaslr_mutex); - return module_load_offset; -} -void *module_alloc(unsigned long size) -{ - gfp_t gfp_mask = GFP_KERNEL; - void *p; - - if (PAGE_ALIGN(size) > MODULES_LEN) - return NULL; - p = __vmalloc_node_range(size, MODULE_ALIGN, - MODULES_VADDR + get_module_load_offset(), - MODULES_END, gfp_mask, PAGE_KERNEL, - VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK, - NUMA_NO_NODE, __builtin_return_address(0)); - if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { - vfree(p); - return NULL; - } - return p; + start = MODULES_VADDR + module_load_offset; + + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .flags = EXECMEM_KASAN_SHADOW, + .start = start, + .end = MODULES_END, + .pgprot = PAGE_KERNEL, + .alignment = MODULE_ALIGN, + }, + }, + }; + + return &execmem_info; } #ifdef CONFIG_FUNCTION_TRACER diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e18914c0e38a..45b1a7c03379 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -36,55 +37,30 @@ do { \ } while (0) #endif -#ifdef CONFIG_RANDOMIZE_BASE -static unsigned long module_load_offset; +static struct execmem_info execmem_info __ro_after_init; -/* Mutex protects the module_load_offset. */ -static DEFINE_MUTEX(module_kaslr_mutex); - -static unsigned long int get_module_load_offset(void) -{ - if (kaslr_enabled()) { - mutex_lock(&module_kaslr_mutex); - /* - * Calculate the module_load_offset the first time this - * code is called. Once calculated it stays the same until - * reboot. - */ - if (module_load_offset == 0) - module_load_offset = - get_random_u32_inclusive(1, 1024) * PAGE_SIZE; - mutex_unlock(&module_kaslr_mutex); - } - return module_load_offset; -} -#else -static unsigned long int get_module_load_offset(void) +struct execmem_info __init *execmem_arch_setup(void) { - return 0; -} -#endif - -void *module_alloc(unsigned long size) -{ - gfp_t gfp_mask = GFP_KERNEL; - void *p; - - if (PAGE_ALIGN(size) > MODULES_LEN) - return NULL; - - p = __vmalloc_node_range(size, MODULE_ALIGN, - MODULES_VADDR + get_module_load_offset(), - MODULES_END, gfp_mask, PAGE_KERNEL, - VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK, - NUMA_NO_NODE, __builtin_return_address(0)); - - if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { - vfree(p); - return NULL; - } - - return p; + unsigned long start, offset = 0; + + if (kaslr_enabled()) + offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE; + + start = MODULES_VADDR + offset; + + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .flags = EXECMEM_KASAN_SHADOW, + .start = start, + .end = MODULES_END, + .pgprot = PAGE_KERNEL, + .alignment = MODULE_ALIGN, + }, + }, + }; + + return &execmem_info; } #ifdef CONFIG_X86_32 diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 96fc59258467..32cef1144117 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -5,6 +5,14 @@ #include #include +#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ + !defined(CONFIG_KASAN_VMALLOC) +#include +#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) +#else +#define MODULE_ALIGN PAGE_SIZE +#endif + /** * enum execmem_type - types of executable memory ranges * @@ -22,6 +30,7 @@ * @EXECMEM_KPROBES: parameters for kprobes * @EXECMEM_FTRACE: parameters for ftrace * @EXECMEM_BPF: parameters for BPF + * @EXECMEM_MODULE_DATA: parameters for module data sections * @EXECMEM_TYPE_MAX: */ enum execmem_type { @@ -30,22 +39,38 @@ enum execmem_type { EXECMEM_KPROBES, EXECMEM_FTRACE, EXECMEM_BPF, + EXECMEM_MODULE_DATA, EXECMEM_TYPE_MAX, }; +/** + * enum execmem_range_flags - options for executable memory allocations + * @EXECMEM_KASAN_SHADOW: allocate kasan shadow + */ +enum execmem_range_flags { + EXECMEM_KASAN_SHADOW = (1 << 0), +}; + /** * struct execmem_range - definition of an address space suitable for code and * related data allocations * @start: address space start * @end: address space end (inclusive) + * @fallback_start: start of the secondary address space range for fallback + * allocations on architectures that require it + * @fallback_end: start of the secondary address space (inclusive) * @pgprot: permissions for memory in this address space * @alignment: alignment required for text allocations + * @flags: options for memory allocations for this range */ struct execmem_range { unsigned long start; unsigned long end; + unsigned long fallback_start; + unsigned long fallback_end; pgprot_t pgprot; unsigned int alignment; + enum execmem_range_flags flags; }; /** @@ -82,6 +107,9 @@ struct execmem_info *execmem_arch_setup(void); * Allocates memory that will contain executable code, either generated or * loaded from kernel modules. * + * Allocates memory that will contain data coupled with executable code, + * like data sections in kernel modules. + * * The memory will have protections defined by architecture for executable * region of the @type. * @@ -95,7 +123,7 @@ void *execmem_alloc(enum execmem_type type, size_t size); */ void execmem_free(void *ptr); -#ifdef CONFIG_EXECMEM +#if defined(CONFIG_EXECMEM) && !defined(CONFIG_ARCH_WANTS_EXECMEM_LATE) void execmem_init(void); #else static inline void execmem_init(void) {} diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index a3b8caee9405..e395461d59e5 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -25,10 +25,6 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, /* Additional bytes needed by arch in front of individual sections */ unsigned int arch_mod_section_prepend(struct module *mod, unsigned int section); -/* Allocator used for allocating struct module, core sections and init - sections. Returns NULL on failure. */ -void *module_alloc(unsigned long size); - /* Determines if the section name is an init section (that is only used during * module loading). */ @@ -126,12 +122,4 @@ void module_arch_cleanup(struct module *mod); /* Any cleanup before freeing mod->module_init */ void module_arch_freeing_init(struct module *mod); -#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ - !defined(CONFIG_KASAN_VMALLOC) -#include -#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) -#else -#define MODULE_ALIGN PAGE_SIZE -#endif - #endif diff --git a/kernel/module/main.c b/kernel/module/main.c index d56b7df0cbb6..91e185607d4b 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1188,24 +1188,20 @@ void __weak module_arch_freeing_init(struct module *mod) { } -static bool mod_mem_use_vmalloc(enum mod_mem_type type) -{ - return IS_ENABLED(CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC) && - mod_mem_type_is_core_data(type); -} - static int module_memory_alloc(struct module *mod, enum mod_mem_type type) { unsigned int size = PAGE_ALIGN(mod->mem[type].size); + enum execmem_type execmem_type; void *ptr; mod->mem[type].size = size; - if (mod_mem_use_vmalloc(type)) - ptr = vmalloc(size); + if (mod_mem_type_is_data(type)) + execmem_type = EXECMEM_MODULE_DATA; else - ptr = execmem_alloc(EXECMEM_MODULE_TEXT, size); + execmem_type = EXECMEM_MODULE_TEXT; + ptr = execmem_alloc(execmem_type, size); if (!ptr) return -ENOMEM; @@ -1232,10 +1228,7 @@ static void module_memory_free(struct module *mod, enum mod_mem_type type) { void *ptr = mod->mem[type].base; - if (mod_mem_use_vmalloc(type)) - vfree(ptr); - else - execmem_free(ptr); + execmem_free(ptr); } static void free_mod_mem(struct module *mod) @@ -1630,13 +1623,6 @@ static void free_modinfo(struct module *mod) } } -void * __weak module_alloc(unsigned long size) -{ - return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, __builtin_return_address(0)); -} - bool __weak module_init_section(const char *name) { return strstarts(name, ".init"); diff --git a/mm/execmem.c b/mm/execmem.c index 80e61c1e7319..0c4b36bc6d10 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -12,27 +12,49 @@ #include static struct execmem_info *execmem_info __ro_after_init; +static struct execmem_info default_execmem_info __ro_after_init; static void *__execmem_alloc(struct execmem_range *range, size_t size) { + bool kasan = range->flags & EXECMEM_KASAN_SHADOW; + unsigned long vm_flags = VM_FLUSH_RESET_PERMS; + gfp_t gfp_flags = GFP_KERNEL | __GFP_NOWARN; unsigned long start = range->start; unsigned long end = range->end; unsigned int align = range->alignment; pgprot_t pgprot = range->pgprot; + void *p; + + if (kasan) + vm_flags |= VM_DEFER_KMEMLEAK; + + p = __vmalloc_node_range(size, align, start, end, gfp_flags, + pgprot, vm_flags, NUMA_NO_NODE, + __builtin_return_address(0)); + if (!p && range->fallback_start) { + start = range->fallback_start; + end = range->fallback_end; + p = __vmalloc_node_range(size, align, start, end, gfp_flags, + pgprot, vm_flags, NUMA_NO_NODE, + __builtin_return_address(0)); + } + + if (!p) { + pr_warn_ratelimited("execmem: unable to allocate memory\n"); + return NULL; + } + + if (kasan && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) { + vfree(p); + return NULL; + } - return __vmalloc_node_range(size, align, start, end, - GFP_KERNEL, pgprot, VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, __builtin_return_address(0)); + return kasan_reset_tag(p); } void *execmem_alloc(enum execmem_type type, size_t size) { - struct execmem_range *range; - - if (!execmem_info) - return module_alloc(size); - - range = &execmem_info->ranges[type]; + struct execmem_range *range = &execmem_info->ranges[type]; return __execmem_alloc(range, size); } @@ -67,10 +89,16 @@ static void execmem_init_missing(struct execmem_info *info) struct execmem_range *r = &info->ranges[i]; if (!r->start) { - r->pgprot = default_range->pgprot; + if (i == EXECMEM_MODULE_DATA) + r->pgprot = PAGE_KERNEL; + else + r->pgprot = default_range->pgprot; r->alignment = default_range->alignment; r->start = default_range->start; r->end = default_range->end; + r->flags = default_range->flags; + r->fallback_start = default_range->fallback_start; + r->fallback_end = default_range->fallback_end; } } } @@ -80,14 +108,36 @@ struct execmem_info * __weak execmem_arch_setup(void) return NULL; } -void __init execmem_init(void) +static void __init __execmem_init(void) { struct execmem_info *info = execmem_arch_setup(); - if (!info || !execmem_validate(info)) + if (!info) { + info = execmem_info = &default_execmem_info; + info->ranges[EXECMEM_DEFAULT].start = VMALLOC_START; + info->ranges[EXECMEM_DEFAULT].end = VMALLOC_END; + info->ranges[EXECMEM_DEFAULT].pgprot = PAGE_KERNEL_EXEC; + info->ranges[EXECMEM_DEFAULT].alignment = 1; + } + + if (!execmem_validate(info)) return; execmem_init_missing(info); execmem_info = info; } + +#ifdef CONFIG_ARCH_WANTS_EXECMEM_LATE +static int __init execmem_late_init(void) +{ + __execmem_init(); + return 0; +} +core_initcall(execmem_late_init); +#else +void __init execmem_init(void) +{ + __execmem_init(); +} +#endif -- cgit v1.2.3