From 4e140f59d285c1ca1e5c81b4c13e27366865bd09 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 10 Jan 2022 23:15:27 +0000 Subject: mm/usercopy: Check kmap addresses properly If you are copying to an address in the kmap region, you may not copy across a page boundary, no matter what the size of the underlying allocation. You can't kmap() a slab page because slab pages always come from low memory. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220110231530.665970-2-willy@infradead.org --- arch/x86/include/asm/highmem.h | 1 + include/linux/highmem-internal.h | 10 ++++++++++ mm/usercopy.c | 16 ++++++++++------ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index 032e020853aa..731ee7cc40a5 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -26,6 +26,7 @@ #include #include #include +#include /* declarations for highmem.c */ extern unsigned long highstart_pfn, highend_pfn; diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index a77be5630209..337bd9f32921 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -149,6 +149,11 @@ static inline void totalhigh_pages_add(long count) atomic_long_add(count, &_totalhigh_pages); } +static inline bool is_kmap_addr(const void *x) +{ + unsigned long addr = (unsigned long)x; + return addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP); +} #else /* CONFIG_HIGHMEM */ static inline struct page *kmap_to_page(void *addr) @@ -234,6 +239,11 @@ static inline void __kunmap_atomic(void *addr) static inline unsigned int nr_free_highpages(void) { return 0; } static inline unsigned long totalhigh_pages(void) { return 0UL; } +static inline bool is_kmap_addr(const void *x) +{ + return false; +} + #endif /* CONFIG_HIGHMEM */ /* diff --git a/mm/usercopy.c b/mm/usercopy.c index 2c235d5c2364..ff13e7708faa 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -229,12 +229,16 @@ static inline void check_heap_object(const void *ptr, unsigned long n, if (!virt_addr_valid(ptr)) return; - /* - * When CONFIG_HIGHMEM=y, kmap_to_page() will give either the - * highmem page or fallback to virt_to_page(). The following - * is effectively a highmem-aware virt_to_slab(). - */ - folio = page_folio(kmap_to_page((void *)ptr)); + if (is_kmap_addr(ptr)) { + unsigned long page_end = (unsigned long)ptr | (PAGE_SIZE - 1); + + if ((unsigned long)ptr + n - 1 > page_end) + usercopy_abort("kmap", NULL, to_user, + offset_in_page(ptr), n); + return; + } + + folio = virt_to_folio(ptr); if (folio_test_slab(folio)) { /* Check slab allocator for flags and size. */ -- cgit v1.2.3 From 0aef499f3172a60222ae7460d61b364c134d6e1a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 10 Jan 2022 23:15:28 +0000 Subject: mm/usercopy: Detect vmalloc overruns If you have a vmalloc() allocation, or an address from calling vmap(), you cannot overrun the vm_area which describes it, regardless of the size of the underlying allocation. This probably doesn't do much for security because vmalloc comes with guard pages these days, but it prevents usercopy aborts when copying to a vmap() of smaller pages. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220110231530.665970-3-willy@infradead.org --- mm/usercopy.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/mm/usercopy.c b/mm/usercopy.c index ff13e7708faa..e1e856dca124 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -238,6 +239,21 @@ static inline void check_heap_object(const void *ptr, unsigned long n, return; } + if (is_vmalloc_addr(ptr)) { + struct vm_struct *area = find_vm_area(ptr); + unsigned long offset; + + if (!area) { + usercopy_abort("vmalloc", "no area", to_user, 0, n); + return; + } + + offset = ptr - area->addr; + if (offset + n > get_vm_area_size(area)) + usercopy_abort("vmalloc", NULL, to_user, offset, n); + return; + } + folio = virt_to_folio(ptr); if (folio_test_slab(folio)) { -- cgit v1.2.3 From ab502103ae3ce4c0fc393e598455efede3e523c9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 10 Jan 2022 23:15:29 +0000 Subject: mm/usercopy: Detect large folio overruns Move the compound page overrun detection out of CONFIG_HARDENED_USERCOPY_PAGESPAN and convert it to use folios so it's enabled for more people. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Kees Cook Reviewed-by: David Hildenbrand Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220110231530.665970-4-willy@infradead.org --- mm/usercopy.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index e1e856dca124..9458c2b24b02 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -164,7 +164,6 @@ static inline void check_page_span(const void *ptr, unsigned long n, { #ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN const void *end = ptr + n - 1; - struct page *endpage; bool is_reserved, is_cma; /* @@ -195,11 +194,6 @@ static inline void check_page_span(const void *ptr, unsigned long n, ((unsigned long)end & (unsigned long)PAGE_MASK))) return; - /* Allow if fully inside the same compound (__GFP_COMP) page. */ - endpage = virt_to_head_page(end); - if (likely(endpage == page)) - return; - /* * Reject if range is entirely either Reserved (i.e. special or * device memory), or CMA. Otherwise, reject since the object spans @@ -259,6 +253,10 @@ static inline void check_heap_object(const void *ptr, unsigned long n, if (folio_test_slab(folio)) { /* Check slab allocator for flags and size. */ __check_heap_object(ptr, n, folio_slab(folio), to_user); + } else if (folio_test_large(folio)) { + unsigned long offset = ptr - folio_address(folio); + if (offset + n > folio_size(folio)) + usercopy_abort("page alloc", NULL, to_user, offset, n); } else { /* Verify object does not incorrectly span multiple pages. */ check_page_span(ptr, n, folio_page(folio, 0), to_user); -- cgit v1.2.3 From 1109a5d907015005cdbe9eaa4fec40213e2f9010 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 10 Jan 2022 23:15:30 +0000 Subject: usercopy: Remove HARDENED_USERCOPY_PAGESPAN There isn't enough information to make this a useful check any more; the useful parts of it were moved in earlier patches, so remove this set of checks now. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Kees Cook Reviewed-by: David Hildenbrand Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220110231530.665970-5-willy@infradead.org --- mm/usercopy.c | 61 -------------------------------------------------------- security/Kconfig | 13 +----------- 2 files changed, 1 insertion(+), 73 deletions(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index 9458c2b24b02..ac8a093e90c1 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -158,64 +158,6 @@ static inline void check_bogus_address(const unsigned long ptr, unsigned long n, usercopy_abort("null address", NULL, to_user, ptr, n); } -/* Checks for allocs that are marked in some way as spanning multiple pages. */ -static inline void check_page_span(const void *ptr, unsigned long n, - struct page *page, bool to_user) -{ -#ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN - const void *end = ptr + n - 1; - bool is_reserved, is_cma; - - /* - * Sometimes the kernel data regions are not marked Reserved (see - * check below). And sometimes [_sdata,_edata) does not cover - * rodata and/or bss, so check each range explicitly. - */ - - /* Allow reads of kernel rodata region (if not marked as Reserved). */ - if (ptr >= (const void *)__start_rodata && - end <= (const void *)__end_rodata) { - if (!to_user) - usercopy_abort("rodata", NULL, to_user, 0, n); - return; - } - - /* Allow kernel data region (if not marked as Reserved). */ - if (ptr >= (const void *)_sdata && end <= (const void *)_edata) - return; - - /* Allow kernel bss region (if not marked as Reserved). */ - if (ptr >= (const void *)__bss_start && - end <= (const void *)__bss_stop) - return; - - /* Is the object wholly within one base page? */ - if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) == - ((unsigned long)end & (unsigned long)PAGE_MASK))) - return; - - /* - * Reject if range is entirely either Reserved (i.e. special or - * device memory), or CMA. Otherwise, reject since the object spans - * several independently allocated pages. - */ - is_reserved = PageReserved(page); - is_cma = is_migrate_cma_page(page); - if (!is_reserved && !is_cma) - usercopy_abort("spans multiple pages", NULL, to_user, 0, n); - - for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) { - page = virt_to_head_page(ptr); - if (is_reserved && !PageReserved(page)) - usercopy_abort("spans Reserved and non-Reserved pages", - NULL, to_user, 0, n); - if (is_cma && !is_migrate_cma_page(page)) - usercopy_abort("spans CMA and non-CMA pages", NULL, - to_user, 0, n); - } -#endif -} - static inline void check_heap_object(const void *ptr, unsigned long n, bool to_user) { @@ -257,9 +199,6 @@ static inline void check_heap_object(const void *ptr, unsigned long n, unsigned long offset = ptr - folio_address(folio); if (offset + n > folio_size(folio)) usercopy_abort("page alloc", NULL, to_user, offset, n); - } else { - /* Verify object does not incorrectly span multiple pages. */ - check_page_span(ptr, n, folio_page(folio, 0), to_user); } } diff --git a/security/Kconfig b/security/Kconfig index 9b2c4925585a..f29e4c656983 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -160,20 +160,9 @@ config HARDENED_USERCOPY copy_from_user() functions) by rejecting memory ranges that are larger than the specified heap object, span multiple separately allocated pages, are not on the process stack, - or are part of the kernel text. This kills entire classes + or are part of the kernel text. This prevents entire classes of heap overflow exploits and similar kernel memory exposures. -config HARDENED_USERCOPY_PAGESPAN - bool "Refuse to copy allocations that span multiple pages" - depends on HARDENED_USERCOPY - depends on BROKEN - help - When a multi-page allocation is done without __GFP_COMP, - hardened usercopy will reject attempts to copy it. There are, - however, several cases of this in the kernel that have not all - been removed. This config is intended to be used only while - trying to find such users. - config FORTIFY_SOURCE bool "Harden common str/mem functions against buffer overflows" depends on ARCH_HAS_FORTIFY_SOURCE -- cgit v1.2.3 From 75c1182e18f4a66bbd2c91511b6b629dac6a27dc Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 7 Apr 2022 10:59:30 -0700 Subject: security: don't treat structure as an array of struct hlist_head The initialization of "security_hook_heads" is done by casting it to another structure pointer type, and treating it as an array of "struct hlist_head" objects. This requires an exception be made in "randstruct", because otherwise it will emit an error, reducing the effectiveness of the hardening technique. Instead of using a cast, initialize the individual struct hlist_head elements in security_hook_heads explicitly. This removes the need for the cast and randstruct exception. Signed-off-by: Bill Wendling Cc: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220407175930.471870-1-morbo@google.com --- scripts/gcc-plugins/randomize_layout_plugin.c | 2 -- security/security.c | 9 ++++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index 334741a31d0a..c2ec81b68505 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -52,8 +52,6 @@ static const struct whitelist_entry whitelist[] = { { "net/unix/af_unix.c", "unix_skb_parms", "char" }, /* big_key payload.data struct splashing */ { "security/keys/big_key.c", "path", "void *" }, - /* walk struct security_hook_heads as an array of struct hlist_head */ - { "security/security.c", "hlist_head", "security_hook_heads" }, { } }; diff --git a/security/security.c b/security/security.c index b7cf5cbfdc67..37a9eeb901e0 100644 --- a/security/security.c +++ b/security/security.c @@ -365,13 +365,12 @@ static void __init ordered_lsm_init(void) int __init early_security_init(void) { - int i; - struct hlist_head *list = (struct hlist_head *) &security_hook_heads; struct lsm_info *lsm; - for (i = 0; i < sizeof(security_hook_heads) / sizeof(struct hlist_head); - i++) - INIT_HLIST_HEAD(&list[i]); +#define LSM_HOOK(RET, DEFAULT, NAME, ...) \ + INIT_HLIST_HEAD(&security_hook_heads.NAME); +#include "linux/lsm_hook_defs.h" +#undef LSM_HOOK for (lsm = __start_early_lsm_info; lsm < __end_early_lsm_info; lsm++) { if (!lsm->enabled) -- cgit v1.2.3 From e6f3b3c9c109ed57230996cf4a4c1b8ae7e36a81 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 5 Apr 2022 15:16:18 -0700 Subject: cfi: Use __builtin_function_start Clang 14 added support for the __builtin_function_start function, which allows us to implement the function_nocfi macro without architecture-specific inline assembly and in a way that also works with static initializers. Change CONFIG_CFI_CLANG to depend on Clang >= 14, define function_nocfi using __builtin_function_start, and remove the arm64 inline assembly implementation. Link: https://github.com/llvm/llvm-project/commit/ec2e26eaf63558934f5b73a6e530edc453cf9508 Link: https://github.com/ClangBuiltLinux/linux/issues/1353 Signed-off-by: Sami Tolvanen Reviewed-by: Nick Desaulniers Reviewed-by: Mark Rutland Tested-by: Mark Rutland Acked-by: Will Deacon # arm64 Reviewed-by: Nathan Chancellor Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220405221618.633743-1-samitolvanen@google.com --- arch/Kconfig | 5 +---- arch/arm64/include/asm/compiler.h | 16 ---------------- include/linux/compiler-clang.h | 10 ++++++++++ 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 29b0167c088b..c1627bce4a3a 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -723,10 +723,7 @@ config ARCH_SUPPORTS_CFI_CLANG config CFI_CLANG bool "Use Clang's Control Flow Integrity (CFI)" depends on LTO_CLANG && ARCH_SUPPORTS_CFI_CLANG - # Clang >= 12: - # - https://bugs.llvm.org/show_bug.cgi?id=46258 - # - https://bugs.llvm.org/show_bug.cgi?id=47479 - depends on CLANG_VERSION >= 120000 + depends on CLANG_VERSION >= 140000 select KALLSYMS help This option enables Clang’s forward-edge Control Flow Integrity diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h index dc3ea4080e2e..6fb2e6bcc392 100644 --- a/arch/arm64/include/asm/compiler.h +++ b/arch/arm64/include/asm/compiler.h @@ -23,20 +23,4 @@ #define __builtin_return_address(val) \ (void *)(ptrauth_clear_pac((unsigned long)__builtin_return_address(val))) -#ifdef CONFIG_CFI_CLANG -/* - * With CONFIG_CFI_CLANG, the compiler replaces function address - * references with the address of the function's CFI jump table - * entry. The function_nocfi macro always returns the address of the - * actual function instead. - */ -#define function_nocfi(x) ({ \ - void *addr; \ - asm("adrp %0, " __stringify(x) "\n\t" \ - "add %0, %0, :lo12:" __stringify(x) \ - : "=r" (addr)); \ - addr; \ -}) -#endif - #endif /* __ASM_COMPILER_H */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index babb1347148c..c84fec767445 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -69,6 +69,16 @@ #define __nocfi __attribute__((__no_sanitize__("cfi"))) #define __cficanonical __attribute__((__cfi_canonical_jump_table__)) +#if defined(CONFIG_CFI_CLANG) +/* + * With CONFIG_CFI_CLANG, the compiler replaces function address + * references with the address of the function's CFI jump table + * entry. The function_nocfi macro always returns the address of the + * actual function instead. + */ +#define function_nocfi(x) __builtin_function_start(x) +#endif + /* * Turn individual warnings and errors on and off locally, depending * on version. -- cgit v1.2.3 From 3b5eed3c71a2fb60aa4405ad92a2a6ad2677f220 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 3 May 2022 13:54:58 -0700 Subject: netfs: Eliminate Clang randstruct warning Clang's structure layout randomization feature gets upset when it sees struct inode (which is randomized) cast to struct netfs_i_context. This is due to seeing the inode pointer as being treated as an array of inodes, rather than "something else, following struct inode". Since netfs can't use container_of() (since it doesn't know what the true containing struct is), it uses this direct offset instead. Adjust the code to better reflect what is happening: an arbitrary pointer is being adjusted and cast to something else: use a "void *" for the math. The resulting binary output is the same, but Clang no longer sees an unexpected cross-structure cast: In file included from ../fs/nfs/inode.c:50: In file included from ../fs/nfs/fscache.h:15: In file included from ../include/linux/fscache.h:18: ../include/linux/netfs.h:298:9: error: casting from randomized structure pointer type 'struct inode *' to 'struct netfs_i_context *' return (struct netfs_i_context *)(inode + 1); ^ 1 error generated. Cc: David Howells Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220503205503.3054173-2-keescook@chromium.org Reviewed-by: Jeff Layton Link: https://lore.kernel.org/lkml/7562f8eccd7cc0e447becfe9912179088784e3b9.camel@kernel.org --- include/linux/netfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/netfs.h b/include/linux/netfs.h index c7bf1eaf51d5..0c33b715cbfd 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -295,7 +295,7 @@ extern void netfs_stats_show(struct seq_file *); */ static inline struct netfs_i_context *netfs_i_context(struct inode *inode) { - return (struct netfs_i_context *)(inode + 1); + return (void *)inode + sizeof(*inode); } /** @@ -307,7 +307,7 @@ static inline struct netfs_i_context *netfs_i_context(struct inode *inode) */ static inline struct inode *netfs_inode(struct netfs_i_context *ctx) { - return ((struct inode *)ctx) - 1; + return (void *)ctx - sizeof(struct inode); } /** -- cgit v1.2.3 From d3646589703731026ae7bcba5731fa7a7d0e5291 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 3 May 2022 13:54:59 -0700 Subject: sancov: Split plugin build from plugin CFLAGS When the sancov_plugin is enabled, it gets added to gcc-plugin-y which is used to populate both GCC_PLUGIN (for building the plugin) and GCC_PLUGINS_CFLAGS (for enabling and options). Instead of adding sancov to both and then removing it from GCC_PLUGINS_CFLAGS, create a separate list, gcc-plugin-external-y, which is only added to GCC_PLUGIN. This will also be used by the coming randstruct build changes. Cc: Masahiro Yamada Cc: linux-kbuild@vger.kernel.org Cc: linux-hardening@vger.kernel.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220503205503.3054173-3-keescook@chromium.org --- scripts/Makefile.gcc-plugins | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins index f67153b260c0..927c3dd57f84 100644 --- a/scripts/Makefile.gcc-plugins +++ b/scripts/Makefile.gcc-plugins @@ -8,8 +8,6 @@ ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY endif export DISABLE_LATENT_ENTROPY_PLUGIN -gcc-plugin-$(CONFIG_GCC_PLUGIN_SANCOV) += sancov_plugin.so - gcc-plugin-$(CONFIG_GCC_PLUGIN_STRUCTLEAK) += structleak_plugin.so gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STRUCTLEAK_VERBOSE) \ += -fplugin-arg-structleak_plugin-verbose @@ -53,13 +51,17 @@ export DISABLE_ARM_SSP_PER_TASK_PLUGIN # All the plugin CFLAGS are collected here in case a build target needs to # filter them out of the KBUILD_CFLAGS. GCC_PLUGINS_CFLAGS := $(strip $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y)) $(gcc-plugin-cflags-y)) -# The sancov_plugin.so is included via CFLAGS_KCOV, so it is removed here. -GCC_PLUGINS_CFLAGS := $(filter-out %/sancov_plugin.so, $(GCC_PLUGINS_CFLAGS)) export GCC_PLUGINS_CFLAGS # Add the flags to the build! KBUILD_CFLAGS += $(GCC_PLUGINS_CFLAGS) -# All enabled GCC plugins are collected here for building below. -GCC_PLUGIN := $(gcc-plugin-y) +# Some plugins are enabled outside of this Makefile, but they still need to +# be included in GCC_PLUGIN so they can get built. +gcc-plugin-external-$(CONFIG_GCC_PLUGIN_SANCOV) \ + += sancov_plugin.so + +# All enabled GCC plugins are collected here for building in +# scripts/gcc-scripts/Makefile. +GCC_PLUGIN := $(gcc-plugin-y) $(gcc-plugin-external-y) export GCC_PLUGIN -- cgit v1.2.3 From 595b893e2087de306d0781795fb8ec47873596a6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 3 May 2022 13:55:00 -0700 Subject: randstruct: Reorganize Kconfigs and attribute macros In preparation for Clang supporting randstruct, reorganize the Kconfigs, move the attribute macros, and generalize the feature to be named CONFIG_RANDSTRUCT for on/off, CONFIG_RANDSTRUCT_FULL for the full randomization mode, and CONFIG_RANDSTRUCT_PERFORMANCE for the cache-line sized mode. Cc: linux-hardening@vger.kernel.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220503205503.3054173-4-keescook@chromium.org --- Documentation/kbuild/reproducible-builds.rst | 7 ++-- arch/riscv/Kconfig | 2 +- arch/x86/mm/pti.c | 2 +- include/linux/compiler-gcc.h | 8 ---- include/linux/compiler_types.h | 14 +++---- include/linux/vermagic.h | 8 ++-- kernel/panic.c | 2 +- scripts/Makefile.gcc-plugins | 4 +- scripts/gcc-plugins/Kconfig | 38 ----------------- security/Kconfig.hardening | 62 ++++++++++++++++++++++++++++ 10 files changed, 81 insertions(+), 66 deletions(-) diff --git a/Documentation/kbuild/reproducible-builds.rst b/Documentation/kbuild/reproducible-builds.rst index 3b25655e441b..81ff30505d35 100644 --- a/Documentation/kbuild/reproducible-builds.rst +++ b/Documentation/kbuild/reproducible-builds.rst @@ -99,10 +99,9 @@ unreproducible parts can be treated as sources: Structure randomisation ----------------------- -If you enable ``CONFIG_GCC_PLUGIN_RANDSTRUCT``, you will need to -pre-generate the random seed in -``scripts/gcc-plugins/randomize_layout_seed.h`` so the same value -is used in rebuilds. +If you enable ``CONFIG_RANDSTRUCT``, you will need to pre-generate +the random seed in ``scripts/gcc-plugins/randomize_layout_seed.h`` +so the same value is used in rebuilds. Debug info conflicts -------------------- diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 00fd9c548f26..3ac2a81a55eb 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -468,7 +468,7 @@ config CC_HAVE_STACKPROTECTOR_TLS config STACKPROTECTOR_PER_TASK def_bool y - depends on !GCC_PLUGIN_RANDSTRUCT + depends on !RANDSTRUCT depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_TLS config PHYS_RAM_BASE_FIXED diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 5d5c7bb50ce9..ffe3b3a087fe 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -540,7 +540,7 @@ static inline bool pti_kernel_image_global_ok(void) * cases where RANDSTRUCT is in use to help keep the layout a * secret. */ - if (IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT)) + if (IS_ENABLED(CONFIG_RANDSTRUCT)) return false; return true; diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 52299c957c98..a0c55eeaeaf1 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -66,14 +66,6 @@ __builtin_unreachable(); \ } while (0) -#if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__) -#define __randomize_layout __attribute__((randomize_layout)) -#define __no_randomize_layout __attribute__((no_randomize_layout)) -/* This anon struct can add padding, so only enable it under randstruct. */ -#define randomized_struct_fields_start struct { -#define randomized_struct_fields_end } __randomize_layout; -#endif - /* * GCC 'asm goto' miscompiles certain code sequences: * diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 1c2c33ae1b37..d08dfcb0ac68 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -242,15 +242,15 @@ struct ftrace_likely_data { # define __latent_entropy #endif -#ifndef __randomize_layout +#if defined(RANDSTRUCT) && !defined(__CHECKER__) +# define __randomize_layout __designated_init __attribute__((randomize_layout)) +# define __no_randomize_layout __attribute__((no_randomize_layout)) +/* This anon struct can add padding, so only enable it under randstruct. */ +# define randomized_struct_fields_start struct { +# define randomized_struct_fields_end } __randomize_layout; +#else # define __randomize_layout __designated_init -#endif - -#ifndef __no_randomize_layout # define __no_randomize_layout -#endif - -#ifndef randomized_struct_fields_start # define randomized_struct_fields_start # define randomized_struct_fields_end #endif diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index 329d63babaeb..efb51a2da599 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -32,11 +32,11 @@ #else #define MODULE_VERMAGIC_MODVERSIONS "" #endif -#ifdef RANDSTRUCT_PLUGIN +#ifdef RANDSTRUCT #include -#define MODULE_RANDSTRUCT_PLUGIN "RANDSTRUCT_PLUGIN_" RANDSTRUCT_HASHED_SEED +#define MODULE_RANDSTRUCT "RANDSTRUCT_" RANDSTRUCT_HASHED_SEED #else -#define MODULE_RANDSTRUCT_PLUGIN +#define MODULE_RANDSTRUCT #endif #define VERMAGIC_STRING \ @@ -44,6 +44,6 @@ MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \ MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \ MODULE_ARCH_VERMAGIC \ - MODULE_RANDSTRUCT_PLUGIN + MODULE_RANDSTRUCT #endif /* _LINUX_VERMAGIC_H */ diff --git a/kernel/panic.c b/kernel/panic.c index eb4dfb932c85..8355b19676f8 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -48,7 +48,7 @@ unsigned int __read_mostly sysctl_oops_all_cpu_backtrace; int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE; static unsigned long tainted_mask = - IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT) ? (1 << TAINT_RANDSTRUCT) : 0; + IS_ENABLED(CONFIG_RANDSTRUCT) ? (1 << TAINT_RANDSTRUCT) : 0; static int pause_on_oops; static int pause_on_oops_flag; static DEFINE_SPINLOCK(pause_on_oops_lock); diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins index 927c3dd57f84..827c47ce5c73 100644 --- a/scripts/Makefile.gcc-plugins +++ b/scripts/Makefile.gcc-plugins @@ -24,8 +24,8 @@ gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STRUCTLEAK) \ gcc-plugin-$(CONFIG_GCC_PLUGIN_RANDSTRUCT) += randomize_layout_plugin.so gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_RANDSTRUCT) \ - += -DRANDSTRUCT_PLUGIN -gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_RANDSTRUCT_PERFORMANCE) \ + += -DRANDSTRUCT +gcc-plugin-cflags-$(CONFIG_RANDSTRUCT_PERFORMANCE) \ += -fplugin-arg-randomize_layout_plugin-performance-mode gcc-plugin-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak_plugin.so diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig index 51d81c3f03d6..e383cda05367 100644 --- a/scripts/gcc-plugins/Kconfig +++ b/scripts/gcc-plugins/Kconfig @@ -46,44 +46,6 @@ config GCC_PLUGIN_LATENT_ENTROPY * https://grsecurity.net/ * https://pax.grsecurity.net/ -config GCC_PLUGIN_RANDSTRUCT - bool "Randomize layout of sensitive kernel structures" - select MODVERSIONS if MODULES - help - If you say Y here, the layouts of structures that are entirely - function pointers (and have not been manually annotated with - __no_randomize_layout), or structures that have been explicitly - marked with __randomize_layout, will be randomized at compile-time. - This can introduce the requirement of an additional information - exposure vulnerability for exploits targeting these structure - types. - - Enabling this feature will introduce some performance impact, - slightly increase memory usage, and prevent the use of forensic - tools like Volatility against the system (unless the kernel - source tree isn't cleaned after kernel installation). - - The seed used for compilation is located at - scripts/gcc-plugins/randomize_layout_seed.h. It remains after - a make clean to allow for external modules to be compiled with - the existing seed and will be removed by a make mrproper or - make distclean. - - This plugin was ported from grsecurity/PaX. More information at: - * https://grsecurity.net/ - * https://pax.grsecurity.net/ - -config GCC_PLUGIN_RANDSTRUCT_PERFORMANCE - bool "Use cacheline-aware structure randomization" - depends on GCC_PLUGIN_RANDSTRUCT - depends on !COMPILE_TEST # do not reduce test coverage - help - If you say Y here, the RANDSTRUCT randomization will make a - best effort at restricting randomization to cacheline-sized - groups of elements. It will further not randomize bitfields - in structures. This reduces the performance hit of RANDSTRUCT - at the cost of weakened randomization. - config GCC_PLUGIN_ARM_SSP_PER_TASK bool depends on GCC_PLUGINS && ARM diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index ded4d7c0d132..364e3f8c6eea 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -266,4 +266,66 @@ config ZERO_CALL_USED_REGS endmenu +choice + prompt "Randomize layout of sensitive kernel structures" + default RANDSTRUCT_FULL if COMPILE_TEST && GCC_PLUGINS + default RANDSTRUCT_NONE + help + If you enable this, the layouts of structures that are entirely + function pointers (and have not been manually annotated with + __no_randomize_layout), or structures that have been explicitly + marked with __randomize_layout, will be randomized at compile-time. + This can introduce the requirement of an additional information + exposure vulnerability for exploits targeting these structure + types. + + Enabling this feature will introduce some performance impact, + slightly increase memory usage, and prevent the use of forensic + tools like Volatility against the system (unless the kernel + source tree isn't cleaned after kernel installation). + + The seed used for compilation is located at + scripts/randomize_layout_seed.h. It remains after a "make clean" + to allow for external modules to be compiled with the existing + seed and will be removed by a "make mrproper" or "make distclean". + + config RANDSTRUCT_NONE + bool "Disable structure layout randomization" + help + Build normally: no structure layout randomization. + + config RANDSTRUCT_FULL + bool "Fully randomize structure layout" + depends on GCC_PLUGINS + select MODVERSIONS if MODULES + help + Fully randomize the member layout of sensitive + structures as much as possible, which may have both a + memory size and performance impact. + + config RANDSTRUCT_PERFORMANCE + bool "Limit randomization of structure layout to cache-lines" + depends on GCC_PLUGINS + select MODVERSIONS if MODULES + help + Randomization of sensitive kernel structures will make a + best effort at restricting randomization to cacheline-sized + groups of members. It will further not randomize bitfields + in structures. This reduces the performance hit of RANDSTRUCT + at the cost of weakened randomization. +endchoice + +config RANDSTRUCT + def_bool !RANDSTRUCT_NONE + +config GCC_PLUGIN_RANDSTRUCT + def_bool GCC_PLUGINS && RANDSTRUCT + help + Use GCC plugin to randomize structure layout. + + This plugin was ported from grsecurity/PaX. More + information at: + * https://grsecurity.net/ + * https://pax.grsecurity.net/ + endmenu -- cgit v1.2.3 From 613f4b3ed7902d1dbbc6ade6401e452a63dfbc21 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 3 May 2022 13:55:01 -0700 Subject: randstruct: Split randstruct Makefile and CFLAGS To enable the new Clang randstruct implementation[1], move randstruct into its own Makefile and split the CFLAGS from GCC_PLUGINS_CFLAGS into RANDSTRUCT_CFLAGS. [1] https://reviews.llvm.org/D121556 Cc: linux-hardening@vger.kernel.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220503205503.3054173-5-keescook@chromium.org --- Makefile | 1 + arch/arm/vdso/Makefile | 2 +- arch/arm64/kernel/vdso/Makefile | 3 ++- arch/sparc/vdso/Makefile | 3 ++- arch/x86/entry/vdso/Makefile | 3 ++- scripts/Makefile.gcc-plugins | 8 ++------ scripts/Makefile.randstruct | 14 ++++++++++++++ 7 files changed, 24 insertions(+), 10 deletions(-) create mode 100644 scripts/Makefile.randstruct diff --git a/Makefile b/Makefile index 29e273d3f8cc..91c91fcf3c24 100644 --- a/Makefile +++ b/Makefile @@ -1011,6 +1011,7 @@ include-$(CONFIG_KASAN) += scripts/Makefile.kasan include-$(CONFIG_KCSAN) += scripts/Makefile.kcsan include-$(CONFIG_UBSAN) += scripts/Makefile.ubsan include-$(CONFIG_KCOV) += scripts/Makefile.kcov +include-$(CONFIG_RANDSTRUCT) += scripts/Makefile.randstruct include-$(CONFIG_GCC_PLUGINS) += scripts/Makefile.gcc-plugins include $(addprefix $(srctree)/, $(include-y)) diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index ec52b776f926..8ca1c9f262a2 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -28,7 +28,7 @@ CPPFLAGS_vdso.lds += -P -C -U$(ARCH) CFLAGS_REMOVE_vdso.o = -pg # Force -O2 to avoid libgcc dependencies -CFLAGS_REMOVE_vgettimeofday.o = -pg -Os $(GCC_PLUGINS_CFLAGS) +CFLAGS_REMOVE_vgettimeofday.o = -pg -Os $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) ifeq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o = -O2 else diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 172452f79e46..d9147fba1a0b 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -32,7 +32,8 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO # -Wmissing-prototypes and -Wmissing-declarations are removed from # the CFLAGS of vgettimeofday.c to make possible to build the # kernel with CONFIG_WERROR enabled. -CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) \ +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ + $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ $(CC_FLAGS_LTO) -Wmissing-prototypes -Wmissing-declarations KASAN_SANITIZE := n KCSAN_SANITIZE := n diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile index c5e1545bc5cf..77d7b9032158 100644 --- a/arch/sparc/vdso/Makefile +++ b/arch/sparc/vdso/Makefile @@ -58,7 +58,7 @@ CFL := $(PROFILING) -mcmodel=medlow -fPIC -O2 -fasynchronous-unwind-tables -m64 SPARC_REG_CFLAGS = -ffixed-g4 -ffixed-g5 -fcall-used-g5 -fcall-used-g7 -$(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(SPARC_REG_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) +$(vobjs): KBUILD_CFLAGS := $(filter-out $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(SPARC_REG_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) # # vDSO code runs in userspace and -pg doesn't help with profiling anyway. @@ -88,6 +88,7 @@ $(obj)/vdso32.so.dbg: asflags-$(CONFIG_SPARC64) += -m32 KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_32 := $(filter-out -mcmodel=medlow,$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(RANDSTRUCT_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(SPARC_REG_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 += -m32 -msoft-float -fpic diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 693f8b9031fb..c2a8b76ae0bc 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -91,7 +91,7 @@ ifneq ($(RETPOLINE_VDSO_CFLAGS),) endif endif -$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) +$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) # # vDSO code runs in userspace and -pg doesn't help with profiling anyway. @@ -148,6 +148,7 @@ KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(RANDSTRUCT_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS_32)) diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins index 827c47ce5c73..692d64a70542 100644 --- a/scripts/Makefile.gcc-plugins +++ b/scripts/Makefile.gcc-plugins @@ -22,12 +22,6 @@ export DISABLE_STRUCTLEAK_PLUGIN gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STRUCTLEAK) \ += -DSTRUCTLEAK_PLUGIN -gcc-plugin-$(CONFIG_GCC_PLUGIN_RANDSTRUCT) += randomize_layout_plugin.so -gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_RANDSTRUCT) \ - += -DRANDSTRUCT -gcc-plugin-cflags-$(CONFIG_RANDSTRUCT_PERFORMANCE) \ - += -fplugin-arg-randomize_layout_plugin-performance-mode - gcc-plugin-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak_plugin.so gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK) \ += -DSTACKLEAK_PLUGIN @@ -60,6 +54,8 @@ KBUILD_CFLAGS += $(GCC_PLUGINS_CFLAGS) # be included in GCC_PLUGIN so they can get built. gcc-plugin-external-$(CONFIG_GCC_PLUGIN_SANCOV) \ += sancov_plugin.so +gcc-plugin-external-$(CONFIG_GCC_PLUGIN_RANDSTRUCT) \ + += randomize_layout_plugin.so # All enabled GCC plugins are collected here for building in # scripts/gcc-scripts/Makefile. diff --git a/scripts/Makefile.randstruct b/scripts/Makefile.randstruct new file mode 100644 index 000000000000..4d741e6db554 --- /dev/null +++ b/scripts/Makefile.randstruct @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 + +randstruct-cflags-y += -DRANDSTRUCT + +ifdef CONFIG_GCC_PLUGIN_RANDSTRUCT +randstruct-cflags-y \ + += -fplugin=$(objtree)/scripts/gcc-plugins/randomize_layout_plugin.so +randstruct-cflags-$(CONFIG_RANDSTRUCT_PERFORMANCE) \ + += -fplugin-arg-randomize_layout_plugin-performance-mode +endif + +export RANDSTRUCT_CFLAGS := $(randstruct-cflags-y) + +KBUILD_CFLAGS += $(RANDSTRUCT_CFLAGS) -- cgit v1.2.3 From be2b34fa9be31c60a95989f984c9a5d40cd781b6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 3 May 2022 13:55:02 -0700 Subject: randstruct: Move seed generation into scripts/basic/ To enable Clang randstruct support, move the structure layout randomization seed generation out of scripts/gcc-plugins/ into scripts/basic/ so it happens early enough that it can be used by either compiler implementation. The gcc-plugin still builds its own header file, but now does so from the common "randstruct.seed" file. Cc: linux-hardening@vger.kernel.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220503205503.3054173-6-keescook@chromium.org --- Documentation/dontdiff | 1 + Documentation/kbuild/reproducible-builds.rst | 5 +++-- include/linux/vermagic.h | 2 +- scripts/basic/.gitignore | 1 + scripts/basic/Makefile | 11 +++++++++++ scripts/gcc-plugins/Makefile | 15 ++++++++++----- scripts/gcc-plugins/gen-random-seed.sh | 9 --------- scripts/gen-randstruct-seed.sh | 7 +++++++ security/Kconfig.hardening | 9 +++++---- 9 files changed, 39 insertions(+), 21 deletions(-) delete mode 100755 scripts/gcc-plugins/gen-random-seed.sh create mode 100755 scripts/gen-randstruct-seed.sh diff --git a/Documentation/dontdiff b/Documentation/dontdiff index 910b30a2a7d9..352ff53a2306 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff @@ -211,6 +211,7 @@ r200_reg_safe.h r300_reg_safe.h r420_reg_safe.h r600_reg_safe.h +randstruct.seed randomize_layout_hash.h randomize_layout_seed.h recordmcount diff --git a/Documentation/kbuild/reproducible-builds.rst b/Documentation/kbuild/reproducible-builds.rst index 81ff30505d35..071f0151a7a4 100644 --- a/Documentation/kbuild/reproducible-builds.rst +++ b/Documentation/kbuild/reproducible-builds.rst @@ -100,8 +100,9 @@ Structure randomisation ----------------------- If you enable ``CONFIG_RANDSTRUCT``, you will need to pre-generate -the random seed in ``scripts/gcc-plugins/randomize_layout_seed.h`` -so the same value is used in rebuilds. +the random seed in ``scripts/basic/randstruct.seed`` so the same +value is used by each build. See ``scripts/gen-randstruct-seed.sh`` +for details. Debug info conflicts -------------------- diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index efb51a2da599..a54046bf37e5 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -33,7 +33,7 @@ #define MODULE_VERMAGIC_MODVERSIONS "" #endif #ifdef RANDSTRUCT -#include +#include #define MODULE_RANDSTRUCT "RANDSTRUCT_" RANDSTRUCT_HASHED_SEED #else #define MODULE_RANDSTRUCT diff --git a/scripts/basic/.gitignore b/scripts/basic/.gitignore index 961c91c8a884..07c195f605a1 100644 --- a/scripts/basic/.gitignore +++ b/scripts/basic/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only /fixdep +/randstruct.seed diff --git a/scripts/basic/Makefile b/scripts/basic/Makefile index eeb6a38c5551..dd289a6725ac 100644 --- a/scripts/basic/Makefile +++ b/scripts/basic/Makefile @@ -3,3 +3,14 @@ # fixdep: used to generate dependency information during build process hostprogs-always-y += fixdep + +# randstruct: the seed is needed before building the gcc-plugin or +# before running a Clang kernel build. +gen-randstruct-seed := $(srctree)/scripts/gen-randstruct-seed.sh +quiet_cmd_create_randstruct_seed = GENSEED $@ +cmd_create_randstruct_seed = \ + $(CONFIG_SHELL) $(gen-randstruct-seed) \ + $@ $(objtree)/include/generated/randstruct_hash.h +$(obj)/randstruct.seed: $(gen-randstruct-seed) FORCE + $(call if_changed,create_randstruct_seed) +always-$(CONFIG_RANDSTRUCT) += randstruct.seed diff --git a/scripts/gcc-plugins/Makefile b/scripts/gcc-plugins/Makefile index 1952d3bb80c6..148f4639cf09 100644 --- a/scripts/gcc-plugins/Makefile +++ b/scripts/gcc-plugins/Makefile @@ -1,12 +1,17 @@ # SPDX-License-Identifier: GPL-2.0 -$(obj)/randomize_layout_plugin.so: $(objtree)/$(obj)/randomize_layout_seed.h -quiet_cmd_create_randomize_layout_seed = GENSEED $@ +$(obj)/randomize_layout_plugin.so: $(obj)/randomize_layout_seed.h +quiet_cmd_create_randomize_layout_seed = SEEDHDR $@ cmd_create_randomize_layout_seed = \ - $(CONFIG_SHELL) $(srctree)/$(src)/gen-random-seed.sh $@ $(objtree)/include/generated/randomize_layout_hash.h -$(objtree)/$(obj)/randomize_layout_seed.h: FORCE + SEED=$$(cat $(filter-out FORCE,$^) $@; \ + echo ' * This file is automatically generated. Keep it private.' >> $@; \ + echo ' * Exposing this value will expose the layout of randomized structures.' >> $@; \ + echo ' */' >> $@; \ + echo "const char *randstruct_seed = \"$$SEED\";" >> $@ +$(obj)/randomize_layout_seed.h: $(objtree)/scripts/basic/randstruct.seed FORCE $(call if_changed,create_randomize_layout_seed) -targets += randomize_layout_seed.h randomize_layout_hash.h +targets += randomize_layout_seed.h # Build rules for plugins # diff --git a/scripts/gcc-plugins/gen-random-seed.sh b/scripts/gcc-plugins/gen-random-seed.sh deleted file mode 100755 index 68af5cc20a64..000000000000 --- a/scripts/gcc-plugins/gen-random-seed.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -if [ ! -f "$1" ]; then - SEED=`od -A n -t x8 -N 32 /dev/urandom | tr -d ' \n'` - echo "const char *randstruct_seed = \"$SEED\";" > "$1" - HASH=`echo -n "$SEED" | sha256sum | cut -d" " -f1 | tr -d ' \n'` - echo "#define RANDSTRUCT_HASHED_SEED \"$HASH\"" > "$2" -fi diff --git a/scripts/gen-randstruct-seed.sh b/scripts/gen-randstruct-seed.sh new file mode 100755 index 000000000000..61017b36c464 --- /dev/null +++ b/scripts/gen-randstruct-seed.sh @@ -0,0 +1,7 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +SEED=$(od -A n -t x8 -N 32 /dev/urandom | tr -d ' \n') +echo "$SEED" > "$1" +HASH=$(echo -n "$SEED" | sha256sum | cut -d" " -f1) +echo "#define RANDSTRUCT_HASHED_SEED \"$HASH\"" > "$2" diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index 364e3f8c6eea..0277ba578779 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -284,10 +284,11 @@ choice tools like Volatility against the system (unless the kernel source tree isn't cleaned after kernel installation). - The seed used for compilation is located at - scripts/randomize_layout_seed.h. It remains after a "make clean" - to allow for external modules to be compiled with the existing - seed and will be removed by a "make mrproper" or "make distclean". + The seed used for compilation is in scripts/basic/randomize.seed. + It remains after a "make clean" to allow for external modules to + be compiled with the existing seed and will be removed by a + "make mrproper" or "make distclean". This file should not be made + public, or the structure layout can be determined. config RANDSTRUCT_NONE bool "Disable structure layout randomization" -- cgit v1.2.3 From 035f7f87b7295a342577aebd7b5b451f1e2a353c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 3 May 2022 13:55:03 -0700 Subject: randstruct: Enable Clang support Clang 15 will support randstruct via the -frandomize-layout-seed-file=... option. Update the Kconfig and Makefile to recognize this feature. Cc: Masahiro Yamada Cc: linux-kbuild@vger.kernel.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220503205503.3054173-7-keescook@chromium.org --- scripts/Makefile.randstruct | 3 +++ security/Kconfig.hardening | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.randstruct b/scripts/Makefile.randstruct index 4d741e6db554..24e283e89893 100644 --- a/scripts/Makefile.randstruct +++ b/scripts/Makefile.randstruct @@ -7,6 +7,9 @@ randstruct-cflags-y \ += -fplugin=$(objtree)/scripts/gcc-plugins/randomize_layout_plugin.so randstruct-cflags-$(CONFIG_RANDSTRUCT_PERFORMANCE) \ += -fplugin-arg-randomize_layout_plugin-performance-mode +else +randstruct-cflags-y \ + += -frandomize-layout-seed-file=$(objtree)/scripts/basic/randstruct.seed endif export RANDSTRUCT_CFLAGS := $(randstruct-cflags-y) diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index 0277ba578779..bd2aabb2c60f 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -266,9 +266,12 @@ config ZERO_CALL_USED_REGS endmenu +config CC_HAS_RANDSTRUCT + def_bool $(cc-option,-frandomize-layout-seed-file=/dev/null) + choice prompt "Randomize layout of sensitive kernel structures" - default RANDSTRUCT_FULL if COMPILE_TEST && GCC_PLUGINS + default RANDSTRUCT_FULL if COMPILE_TEST && (GCC_PLUGINS || CC_HAS_RANDSTRUCT) default RANDSTRUCT_NONE help If you enable this, the layouts of structures that are entirely @@ -297,13 +300,20 @@ choice config RANDSTRUCT_FULL bool "Fully randomize structure layout" - depends on GCC_PLUGINS + depends on CC_HAS_RANDSTRUCT || GCC_PLUGINS select MODVERSIONS if MODULES help Fully randomize the member layout of sensitive structures as much as possible, which may have both a memory size and performance impact. + One difference between the Clang and GCC plugin + implementations is the handling of bitfields. The GCC + plugin treats them as fully separate variables, + introducing sometimes significant padding. Clang tries + to keep adjacent bitfields together, but with their bit + ordering randomized. + config RANDSTRUCT_PERFORMANCE bool "Limit randomization of structure layout to cache-lines" depends on GCC_PLUGINS -- cgit v1.2.3 From e85094c31ddb794ac41c299a5a7a68243148f829 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 27 Apr 2022 18:31:16 +0100 Subject: arm64: stackleak: fix current_top_of_stack() Due to some historical confusion, arm64's current_top_of_stack() isn't what the stackleak code expects. This could in theory result in a number of problems, and practically results in an unnecessary performance hit. We can avoid this by aligning the arm64 implementation with the x86 implementation. The arm64 implementation of current_top_of_stack() was added specifically for stackleak in commit: 0b3e336601b82c6a ("arm64: Add support for STACKLEAK gcc plugin") This was intended to be equivalent to the x86 implementation, but the implementation, semantics, and performance characteristics differ wildly: * On x86, current_top_of_stack() returns the top of the current task's task stack, regardless of which stack is in active use. The implementation accesses a percpu variable which the x86 entry code maintains, and returns the location immediately above the pt_regs on the task stack (above which x86 has some padding). * On arm64 current_top_of_stack() returns the top of the stack in active use (i.e. the one which is currently being used). The implementation checks the SP against a number of potentially-accessible stacks, and will BUG() if no stack is found. The core stackleak_erase() code determines the upper bound of stack to erase with: | if (on_thread_stack()) | boundary = current_stack_pointer; | else | boundary = current_top_of_stack(); On arm64 stackleak_erase() is always called on a task stack, and on_thread_stack() should always be true. On x86, stackleak_erase() is mostly called on a trampoline stack, and is sometimes called on a task stack. Currently, this results in a lot of unnecessary code being generated for arm64 for the impossible !on_thread_stack() case. Some of this is inlined, bloating stackleak_erase(), while portions of this are left out-of-line and permitted to be instrumented (which would be a functional problem if that code were reachable). As a first step towards improving this, this patch aligns arm64's implementation of current_top_of_stack() with x86's, always returning the top of the current task's stack. With GCC 11.1.0 this results in the bulk of the unnecessary code being removed, including all of the out-of-line instrumentable code. While I don't believe there's a functional problem in practice I've marked this as a fix since the semantic was clearly wrong, the fix itself is simple, and other code might rely upon this in future. Fixes: 0b3e336601b82c6a ("arm64: Add support for STACKLEAK gcc plugin") Signed-off-by: Mark Rutland Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Catalin Marinas Cc: Kees Cook Cc: Will Deacon Acked-by: Catalin Marinas Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220427173128.2603085-2-mark.rutland@arm.com --- arch/arm64/include/asm/processor.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 73e38d9a540c..6b1a12c23fe7 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -381,12 +381,10 @@ long get_tagged_addr_ctrl(struct task_struct *task); * of header definitions for the use of task_stack_page. */ -#define current_top_of_stack() \ -({ \ - struct stack_info _info; \ - BUG_ON(!on_accessible_stack(current, current_stack_pointer, 1, &_info)); \ - _info.high; \ -}) +/* + * The top of the current task's task stack + */ +#define current_top_of_stack() ((unsigned long)current->stack + THREAD_SIZE) #define on_thread_stack() (on_task_stack(current, current_stack_pointer, 1, NULL)) #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From a12685e2d1f7eed58ee408ba375606577b59610c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 27 Apr 2022 18:31:17 +0100 Subject: stackleak: move skip_erasing() check earlier In stackleak_erase() we check skip_erasing() after accessing some fields from current. As generating the address of current uses asm which hazards with the static branch asm, this work is always performed, even when the static branch is patched to jump to the return at the end of the function. This patch avoids this redundant work by moving the skip_erasing() check earlier. To avoid complicating initialization within stackleak_erase(), the body of the function is split out into a __stackleak_erase() helper, with the check left in a wrapper function. The __stackleak_erase() helper is marked __always_inline to ensure that this is inlined into stackleak_erase() and not instrumented. Before this patch, on x86-64 w/ GCC 11.1.0 the start of the function is: : 65 48 8b 04 25 00 00 mov %gs:0x0,%rax 00 00 48 8b 48 20 mov 0x20(%rax),%rcx 48 8b 80 98 0a 00 00 mov 0xa98(%rax),%rax 66 90 xchg %ax,%ax <------------ static branch 48 89 c2 mov %rax,%rdx 48 29 ca sub %rcx,%rdx 48 81 fa ff 3f 00 00 cmp $0x3fff,%rdx After this patch, on x86-64 w/ GCC 11.1.0 the start of the function is: : 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) <--- static branch 65 48 8b 04 25 00 00 mov %gs:0x0,%rax 00 00 48 8b 48 20 mov 0x20(%rax),%rcx 48 8b 80 98 0a 00 00 mov 0xa98(%rax),%rax 48 89 c2 mov %rax,%rdx 48 29 ca sub %rcx,%rdx 48 81 fa ff 3f 00 00 cmp $0x3fff,%rdx Before this patch, on arm64 w/ GCC 11.1.0 the start of the function is: : d503245f bti c d5384100 mrs x0, sp_el0 f9401003 ldr x3, [x0, #32] f9451000 ldr x0, [x0, #2592] d503201f nop <------------------------------- static branch d503233f paciasp cb030002 sub x2, x0, x3 d287ffe1 mov x1, #0x3fff eb01005f cmp x2, x1 After this patch, on arm64 w/ GCC 11.1.0 the start of the function is: : d503245f bti c d503201f nop <------------------------------- static branch d503233f paciasp d5384100 mrs x0, sp_el0 f9401003 ldr x3, [x0, #32] d287ffe1 mov x1, #0x3fff f9451000 ldr x0, [x0, #2592] cb030002 sub x2, x0, x3 eb01005f cmp x2, x1 While this may not be a huge win on its own, moving the static branch will permit further optimization of the body of the function in subsequent patches. Signed-off-by: Mark Rutland Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220427173128.2603085-3-mark.rutland@arm.com --- kernel/stackleak.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/stackleak.c b/kernel/stackleak.c index ddb5a7f48d69..753eab797a04 100644 --- a/kernel/stackleak.c +++ b/kernel/stackleak.c @@ -70,7 +70,7 @@ late_initcall(stackleak_sysctls_init); #define skip_erasing() false #endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */ -asmlinkage void noinstr stackleak_erase(void) +static __always_inline void __stackleak_erase(void) { /* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */ unsigned long kstack_ptr = current->lowest_stack; @@ -78,9 +78,6 @@ asmlinkage void noinstr stackleak_erase(void) unsigned int poison_count = 0; const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long); - if (skip_erasing()) - return; - /* Check that 'lowest_stack' value is sane */ if (unlikely(kstack_ptr - boundary >= THREAD_SIZE)) kstack_ptr = boundary; @@ -125,6 +122,14 @@ asmlinkage void noinstr stackleak_erase(void) current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64; } +asmlinkage void noinstr stackleak_erase(void) +{ + if (skip_erasing()) + return; + + __stackleak_erase(); +} + void __used __no_caller_saved_registers noinstr stackleak_track_stack(void) { unsigned long sp = current_stack_pointer; -- cgit v1.2.3 From ac7838b4e1c552d54e67f78a29bc1bd7701c13e8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 27 Apr 2022 18:31:18 +0100 Subject: stackleak: remove redundant check In __stackleak_erase() we check that the `erase_low` value derived from `current->lowest_stack` is above the lowest legitimate stack pointer value, but this is already enforced by stackleak_track_stack() when recording the lowest stack value. Remove the redundant check. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220427173128.2603085-4-mark.rutland@arm.com --- kernel/stackleak.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/kernel/stackleak.c b/kernel/stackleak.c index 753eab797a04..f7a0f8cf73c3 100644 --- a/kernel/stackleak.c +++ b/kernel/stackleak.c @@ -78,10 +78,6 @@ static __always_inline void __stackleak_erase(void) unsigned int poison_count = 0; const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long); - /* Check that 'lowest_stack' value is sane */ - if (unlikely(kstack_ptr - boundary >= THREAD_SIZE)) - kstack_ptr = boundary; - /* Search for the poison value in the kernel stack */ while (kstack_ptr > boundary && poison_count <= depth) { if (*(unsigned long *)kstack_ptr == STACKLEAK_POISON) -- cgit v1.2.3 From 9ec79840d6afaf472294588a6bbe145bcdffa28b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 27 Apr 2022 18:31:19 +0100 Subject: stackleak: rework stack low bound handling In stackleak_task_init(), stackleak_track_stack(), and __stackleak_erase(), we open-code skipping the STACK_END_MAGIC at the bottom of the stack. Each case is implemented slightly differently, and only the __stackleak_erase() case is commented. In stackleak_task_init() and stackleak_track_stack() we unconditionally add sizeof(unsigned long) to the lowest stack address. In stackleak_task_init() we use end_of_stack() for this, and in stackleak_track_stack() we use task_stack_page(). In __stackleak_erase() we handle this by detecting if `kstack_ptr` has hit the stack end boundary, and if so, conditionally moving it above the magic. This patch adds a new stackleak_task_low_bound() helper which is used in all three cases, which unconditionally adds sizeof(unsigned long) to the lowest address on the task stack, with commentary as to why. This uses end_of_stack() as stackleak_task_init() did prior to this patch, as this is consistent with the code in kernel/fork.c which initializes the STACK_END_MAGIC value. In __stackleak_erase() we no longer need to check whether we've spilled into the STACK_END_MAGIC value, as stackleak_track_stack() ensures that `current->lowest_stack` stops immediately above this, and similarly the poison scan will stop immediately above this. For stackleak_task_init() and stackleak_track_stack() this results in no change to code generation. For __stackleak_erase() the generated assembly is slightly simpler and shorter. Signed-off-by: Mark Rutland Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220427173128.2603085-5-mark.rutland@arm.com --- include/linux/stackleak.h | 15 ++++++++++++++- kernel/stackleak.c | 14 ++++---------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h index ccaab2043fcd..67430faa5c51 100644 --- a/include/linux/stackleak.h +++ b/include/linux/stackleak.h @@ -15,9 +15,22 @@ #ifdef CONFIG_GCC_PLUGIN_STACKLEAK #include +/* + * The lowest address on tsk's stack which we can plausibly erase. + */ +static __always_inline unsigned long +stackleak_task_low_bound(const struct task_struct *tsk) +{ + /* + * The lowest unsigned long on the task stack contains STACK_END_MAGIC, + * which we must not corrupt. + */ + return (unsigned long)end_of_stack(tsk) + sizeof(unsigned long); +} + static inline void stackleak_task_init(struct task_struct *t) { - t->lowest_stack = (unsigned long)end_of_stack(t) + sizeof(unsigned long); + t->lowest_stack = stackleak_task_low_bound(t); # ifdef CONFIG_STACKLEAK_METRICS t->prev_lowest_stack = t->lowest_stack; # endif diff --git a/kernel/stackleak.c b/kernel/stackleak.c index f7a0f8cf73c3..24b7cf01b297 100644 --- a/kernel/stackleak.c +++ b/kernel/stackleak.c @@ -72,9 +72,11 @@ late_initcall(stackleak_sysctls_init); static __always_inline void __stackleak_erase(void) { + const unsigned long task_stack_low = stackleak_task_low_bound(current); + /* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */ unsigned long kstack_ptr = current->lowest_stack; - unsigned long boundary = (unsigned long)end_of_stack(current); + unsigned long boundary = task_stack_low; unsigned int poison_count = 0; const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long); @@ -88,13 +90,6 @@ static __always_inline void __stackleak_erase(void) kstack_ptr -= sizeof(unsigned long); } - /* - * One 'long int' at the bottom of the thread stack is reserved and - * should not be poisoned (see CONFIG_SCHED_STACK_END_CHECK=y). - */ - if (kstack_ptr == boundary) - kstack_ptr += sizeof(unsigned long); - #ifdef CONFIG_STACKLEAK_METRICS current->prev_lowest_stack = kstack_ptr; #endif @@ -140,8 +135,7 @@ void __used __no_caller_saved_registers noinstr stackleak_track_stack(void) /* 'lowest_stack' should be aligned on the register width boundary */ sp = ALIGN(sp, sizeof(unsigned long)); if (sp < current->lowest_stack && - sp >= (unsigned long)task_stack_page(current) + - sizeof(unsigned long)) { + sp >= stackleak_task_low_bound(current)) { current->lowest_stack = sp; } } -- cgit v1.2.3 From 1723d39d2fe49b8a75939060c95e7e908b463793 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 27 Apr 2022 18:31:20 +0100 Subject: stackleak: clarify variable names The logic within __stackleak_erase() can be a little hard to follow, as `boundary` switches from being the low bound to the high bound mid way through the function, and `kstack_ptr` is used to represent the start of the region to erase while `boundary` represents the end of the region to erase. Make this a little clearer by consistently using clearer variable names. The `boundary` variable is removed, the bounds of the region to erase are described by `erase_low` and `erase_high`, and bounds of the task stack are described by `task_stack_low` and `task_stack_high`. As the same time, remove the comment above the variables, since it is unclear whether it's intended as rationale, a complaint, or a TODO, and is more confusing than helpful. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220427173128.2603085-6-mark.rutland@arm.com --- kernel/stackleak.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/kernel/stackleak.c b/kernel/stackleak.c index 24b7cf01b297..d5f684dc0a2d 100644 --- a/kernel/stackleak.c +++ b/kernel/stackleak.c @@ -73,40 +73,38 @@ late_initcall(stackleak_sysctls_init); static __always_inline void __stackleak_erase(void) { const unsigned long task_stack_low = stackleak_task_low_bound(current); - - /* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */ - unsigned long kstack_ptr = current->lowest_stack; - unsigned long boundary = task_stack_low; + unsigned long erase_low = current->lowest_stack; + unsigned long erase_high; unsigned int poison_count = 0; const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long); /* Search for the poison value in the kernel stack */ - while (kstack_ptr > boundary && poison_count <= depth) { - if (*(unsigned long *)kstack_ptr == STACKLEAK_POISON) + while (erase_low > task_stack_low && poison_count <= depth) { + if (*(unsigned long *)erase_low == STACKLEAK_POISON) poison_count++; else poison_count = 0; - kstack_ptr -= sizeof(unsigned long); + erase_low -= sizeof(unsigned long); } #ifdef CONFIG_STACKLEAK_METRICS - current->prev_lowest_stack = kstack_ptr; + current->prev_lowest_stack = erase_low; #endif /* - * Now write the poison value to the kernel stack. Start from - * 'kstack_ptr' and move up till the new 'boundary'. We assume that - * the stack pointer doesn't change when we write poison. + * Now write the poison value to the kernel stack between 'erase_low' + * and 'erase_high'. We assume that the stack pointer doesn't change + * when we write poison. */ if (on_thread_stack()) - boundary = current_stack_pointer; + erase_high = current_stack_pointer; else - boundary = current_top_of_stack(); + erase_high = current_top_of_stack(); - while (kstack_ptr < boundary) { - *(unsigned long *)kstack_ptr = STACKLEAK_POISON; - kstack_ptr += sizeof(unsigned long); + while (erase_low < erase_high) { + *(unsigned long *)erase_low = STACKLEAK_POISON; + erase_low += sizeof(unsigned long); } /* Reset the 'lowest_stack' value for the next syscall */ -- cgit v1.2.3 From 0cfa2ccd285d98ad62218add2eebdcfff69fb2c0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 27 Apr 2022 18:31:21 +0100 Subject: stackleak: rework stack high bound handling Prior to returning to userspace, we reset current->lowest_stack to a reasonable high bound. Currently we do this by subtracting the arbitrary value `THREAD_SIZE/64` from the top of the stack, for reasons lost to history. Looking at configurations today: * On i386 where THREAD_SIZE is 8K, the bound will be 128 bytes. The pt_regs at the top of the stack is 68 bytes (with 0 to 16 bytes of padding above), and so this covers an additional portion of 44 to 60 bytes. * On x86_64 where THREAD_SIZE is at least 16K (up to 32K with KASAN) the bound will be at least 256 bytes (up to 512 with KASAN). The pt_regs at the top of the stack is 168 bytes, and so this cover an additional 88 bytes of stack (up to 344 with KASAN). * On arm64 where THREAD_SIZE is at least 16K (up to 64K with 64K pages and VMAP_STACK), the bound will be at least 256 bytes (up to 1024 with KASAN). The pt_regs at the top of the stack is 336 bytes, so this can fall within the pt_regs, or can cover an additional 688 bytes of stack. Clearly the `THREAD_SIZE/64` value doesn't make much sense -- in the worst case, this will cause more than 600 bytes of stack to be erased for every syscall, even if actual stack usage were substantially smaller. This patches makes this slightly less nonsensical by consistently resetting current->lowest_stack to the base of the task pt_regs. For clarity and for consistency with the handling of the low bound, the generation of the high bound is split into a helper with commentary explaining why. Since the pt_regs at the top of the stack will be clobbered upon the next exception entry, we don't need to poison these at exception exit. By using task_pt_regs() as the high stack boundary instead of current_top_of_stack() we avoid some redundant poisoning, and the compiler can share the address generation between the poisoning and resetting of `current->lowest_stack`, making the generated code more optimal. It's not clear to me whether the existing `THREAD_SIZE/64` offset was a dodgy heuristic to skip the pt_regs, or whether it was attempting to minimize the number of times stackleak_check_stack() would have to update `current->lowest_stack` when stack usage was shallow at the cost of unconditionally poisoning a small portion of the stack for every exit to userspace. For now I've simply removed the offset, and if we need/want to minimize updates for shallow stack usage it should be easy to add a better heuristic atop, with appropriate commentary so we know what's going on. Signed-off-by: Mark Rutland Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220427173128.2603085-7-mark.rutland@arm.com --- include/linux/stackleak.h | 14 ++++++++++++++ kernel/stackleak.c | 19 ++++++++++++++----- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h index 67430faa5c51..467661aeb413 100644 --- a/include/linux/stackleak.h +++ b/include/linux/stackleak.h @@ -28,6 +28,20 @@ stackleak_task_low_bound(const struct task_struct *tsk) return (unsigned long)end_of_stack(tsk) + sizeof(unsigned long); } +/* + * The address immediately after the highest address on tsk's stack which we + * can plausibly erase. + */ +static __always_inline unsigned long +stackleak_task_high_bound(const struct task_struct *tsk) +{ + /* + * The task's pt_regs lives at the top of the task stack and will be + * overwritten by exception entry, so there's no need to erase them. + */ + return (unsigned long)task_pt_regs(tsk); +} + static inline void stackleak_task_init(struct task_struct *t) { t->lowest_stack = stackleak_task_low_bound(t); diff --git a/kernel/stackleak.c b/kernel/stackleak.c index d5f684dc0a2d..ba346d46218f 100644 --- a/kernel/stackleak.c +++ b/kernel/stackleak.c @@ -73,6 +73,7 @@ late_initcall(stackleak_sysctls_init); static __always_inline void __stackleak_erase(void) { const unsigned long task_stack_low = stackleak_task_low_bound(current); + const unsigned long task_stack_high = stackleak_task_high_bound(current); unsigned long erase_low = current->lowest_stack; unsigned long erase_high; unsigned int poison_count = 0; @@ -93,14 +94,22 @@ static __always_inline void __stackleak_erase(void) #endif /* - * Now write the poison value to the kernel stack between 'erase_low' - * and 'erase_high'. We assume that the stack pointer doesn't change - * when we write poison. + * Write poison to the task's stack between 'erase_low' and + * 'erase_high'. + * + * If we're running on a different stack (e.g. an entry trampoline + * stack) we can erase everything below the pt_regs at the top of the + * task stack. + * + * If we're running on the task stack itself, we must not clobber any + * stack used by this function and its caller. We assume that this + * function has a fixed-size stack frame, and the current stack pointer + * doesn't change while we write poison. */ if (on_thread_stack()) erase_high = current_stack_pointer; else - erase_high = current_top_of_stack(); + erase_high = task_stack_high; while (erase_low < erase_high) { *(unsigned long *)erase_low = STACKLEAK_POISON; @@ -108,7 +117,7 @@ static __always_inline void __stackleak_erase(void) } /* Reset the 'lowest_stack' value for the next syscall */ - current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64; + current->lowest_stack = task_stack_high; } asmlinkage void noinstr stackleak_erase(void) -- cgit v1.2.3 From 77cf2b6dee6680536a3109d894f1b1ccda3fc5bf Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 27 Apr 2022 18:31:22 +0100 Subject: stackleak: rework poison scanning Currently we over-estimate the region of stack which must be erased. To determine the region to be erased, we scan downwards for a contiguous block of poison values (or the low bound of the stack). There are a few minor problems with this today: * When we find a block of poison values, we include this block within the region to erase. As this is included within the region to erase, this causes us to redundantly overwrite 'STACKLEAK_SEARCH_DEPTH' (128) bytes with poison. * As the loop condition checks 'poison_count <= depth', it will run an additional iteration after finding the contiguous block of poison, decrementing 'erase_low' once more than necessary. As this is included within the region to erase, this causes us to redundantly overwrite an additional unsigned long with poison. * As we always decrement 'erase_low' after checking an element on the stack, we always include the element below this within the region to erase. As this is included within the region to erase, this causes us to redundantly overwrite an additional unsigned long with poison. Note that this is not a functional problem. As the loop condition checks 'erase_low > task_stack_low', we'll never clobber the STACK_END_MAGIC. As we always decrement 'erase_low' after this, we'll never fail to erase the element immediately above the STACK_END_MAGIC. In total, this can cause us to erase `128 + 2 * sizeof(unsigned long)` bytes more than necessary, which is unfortunate. This patch reworks the logic to find the address immediately above the poisoned region, by finding the lowest non-poisoned address. This is factored into a stackleak_find_top_of_poison() helper both for clarity and so that this can be shared with the LKDTM test in subsequent patches. Signed-off-by: Mark Rutland Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220427173128.2603085-8-mark.rutland@arm.com --- include/linux/stackleak.h | 26 ++++++++++++++++++++++++++ kernel/stackleak.c | 18 ++++-------------- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h index 467661aeb413..c36e7a3b45e7 100644 --- a/include/linux/stackleak.h +++ b/include/linux/stackleak.h @@ -42,6 +42,32 @@ stackleak_task_high_bound(const struct task_struct *tsk) return (unsigned long)task_pt_regs(tsk); } +/* + * Find the address immediately above the poisoned region of the stack, where + * that region falls between 'low' (inclusive) and 'high' (exclusive). + */ +static __always_inline unsigned long +stackleak_find_top_of_poison(const unsigned long low, const unsigned long high) +{ + const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long); + unsigned int poison_count = 0; + unsigned long poison_high = high; + unsigned long sp = high; + + while (sp > low && poison_count < depth) { + sp -= sizeof(unsigned long); + + if (*(unsigned long *)sp == STACKLEAK_POISON) { + poison_count++; + } else { + poison_count = 0; + poison_high = sp; + } + } + + return poison_high; +} + static inline void stackleak_task_init(struct task_struct *t) { t->lowest_stack = stackleak_task_low_bound(t); diff --git a/kernel/stackleak.c b/kernel/stackleak.c index ba346d46218f..afd54b8e10b8 100644 --- a/kernel/stackleak.c +++ b/kernel/stackleak.c @@ -74,20 +74,10 @@ static __always_inline void __stackleak_erase(void) { const unsigned long task_stack_low = stackleak_task_low_bound(current); const unsigned long task_stack_high = stackleak_task_high_bound(current); - unsigned long erase_low = current->lowest_stack; - unsigned long erase_high; - unsigned int poison_count = 0; - const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long); - - /* Search for the poison value in the kernel stack */ - while (erase_low > task_stack_low && poison_count <= depth) { - if (*(unsigned long *)erase_low == STACKLEAK_POISON) - poison_count++; - else - poison_count = 0; - - erase_low -= sizeof(unsigned long); - } + unsigned long erase_low, erase_high; + + erase_low = stackleak_find_top_of_poison(task_stack_low, + current->lowest_stack); #ifdef CONFIG_STACKLEAK_METRICS current->prev_lowest_stack = erase_low; -- cgit v1.2.3 From 4130a61cebb1843517d68017a967f27fb602b885 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 27 Apr 2022 18:31:23 +0100 Subject: lkdtm/stackleak: avoid spurious failure The lkdtm_STACKLEAK_ERASING() test scans for a contiguous block of poison values between the low stack bound and the stack pointer, and fails if it does not find a sufficiently large block. This can happen legitimately if the scan the low stack bound, which could occur if functions called prior to lkdtm_STACKLEAK_ERASING() used a large amount of stack. If this were to occur, it means that the erased portion of the stack is smaller than the size used by the scan, but does not cause a functional problem In practice this is unlikely to happen, but as this is legitimate and would not result in a functional problem, the test should not fail in this case. Remove the spurious failure case. Signed-off-by: Mark Rutland Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220427173128.2603085-9-mark.rutland@arm.com --- drivers/misc/lkdtm/stackleak.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/misc/lkdtm/stackleak.c b/drivers/misc/lkdtm/stackleak.c index 00db21ff115e..707d530d509b 100644 --- a/drivers/misc/lkdtm/stackleak.c +++ b/drivers/misc/lkdtm/stackleak.c @@ -53,13 +53,6 @@ void lkdtm_STACKLEAK_ERASING(void) found = 0; } - if (found <= check_depth) { - pr_err("FAIL: the erased part is not found (checked %lu bytes)\n", - i * sizeof(unsigned long)); - test_failed = true; - goto end; - } - pr_info("the erased part begins after %lu not poisoned bytes\n", (i - found) * sizeof(unsigned long)); -- cgit v1.2.3 From 72b61896f2b47fa4b98e86184bc0e6ddbd1a8db1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 27 Apr 2022 18:31:24 +0100 Subject: lkdtm/stackleak: rework boundary management There are a few problems with the way the LKDTM STACKLEAK_ERASING test manipulates the stack pointer and boundary values: * It uses the address of a local variable to determine the current stack pointer, rather than using current_stack_pointer directly. As the local variable could be placed anywhere within the stack frame, this can be an over-estimate of the true stack pointer value. * Is uses an estimate of the current stack pointer as the upper boundary when scanning for poison, even though prior functions could have used more stack (and may have updated current->lowest stack accordingly). * A pr_info() call is made in the middle of the test. As the printk() code is out-of-line and will make use of the stack, this could clobber poison and/or adjust current->lowest_stack. It would be better to log the metadata after the body of the test to avoid such problems. These have been observed to result in spurious test failures on arm64. In addition to this there are a couple of things which are sub-optimal: * To avoid the STACK_END_MAGIC value, it conditionally modifies 'left' if this contains more than a single element, when it could instead calculate the bound unconditionally using stackleak_task_low_bound(). * It open-codes the poison scanning. It would be better if this used the same helper code as used by erasing function so that the two cannot diverge. This patch reworks the test to avoid these issues, making use of the recently introduced helpers to ensure this is aligned with the regular stackleak code. As the new code tests stack boundaries before accessing the stack, there is no need to fail early when the tracked or untracked portions of the stack extend all the way to the low stack boundary. As stackleak_find_top_of_poison() is now used to find the top of the poisoned region of the stack, the subsequent poison checking starts at this boundary and verifies that stackleak_find_top_of_poison() is working correctly. The pr_info() which logged the untracked portion of stack is now moved to the end of the function, and logs the size of all the portions of the stack relevant to the test, including the portions at the top and bottom of the stack which are not erased or scanned, and the current / lowest recorded stack usage. Tested on x86_64: | # echo STACKLEAK_ERASING > /sys/kernel/debug/provoke-crash/DIRECT | lkdtm: Performing direct entry STACKLEAK_ERASING | lkdtm: stackleak stack usage: | high offset: 168 bytes | current: 336 bytes | lowest: 656 bytes | tracked: 656 bytes | untracked: 400 bytes | poisoned: 15152 bytes | low offset: 8 bytes | lkdtm: OK: the rest of the thread stack is properly erased Tested on arm64: | # echo STACKLEAK_ERASING > /sys/kernel/debug/provoke-crash/DIRECT | lkdtm: Performing direct entry STACKLEAK_ERASING | lkdtm: stackleak stack usage: | high offset: 336 bytes | current: 656 bytes | lowest: 1232 bytes | tracked: 1232 bytes | untracked: 672 bytes | poisoned: 14136 bytes | low offset: 8 bytes | lkdtm: OK: the rest of the thread stack is properly erased Tested on arm64 with deliberate breakage to the starting stack value and poison scanning: | # echo STACKLEAK_ERASING > /sys/kernel/debug/provoke-crash/DIRECT | lkdtm: Performing direct entry STACKLEAK_ERASING | lkdtm: FAIL: non-poison value 24 bytes below poison boundary: 0x0 | lkdtm: FAIL: non-poison value 32 bytes below poison boundary: 0xffff8000083dbc00 ... | lkdtm: FAIL: non-poison value 1912 bytes below poison boundary: 0x78b4b9999e8cb15 | lkdtm: FAIL: non-poison value 1920 bytes below poison boundary: 0xffff8000083db400 | lkdtm: stackleak stack usage: | high offset: 336 bytes | current: 688 bytes | lowest: 1232 bytes | tracked: 576 bytes | untracked: 288 bytes | poisoned: 15176 bytes | low offset: 8 bytes | lkdtm: FAIL: the thread stack is NOT properly erased! | lkdtm: Unexpected! This kernel (5.18.0-rc1-00013-g1f7b1f1e29e0-dirty aarch64) was built with CONFIG_GCC_PLUGIN_STACKLEAK=y Signed-off-by: Mark Rutland Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220427173128.2603085-10-mark.rutland@arm.com --- drivers/misc/lkdtm/stackleak.c | 86 +++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 39 deletions(-) diff --git a/drivers/misc/lkdtm/stackleak.c b/drivers/misc/lkdtm/stackleak.c index 707d530d509b..0aafa46ced7d 100644 --- a/drivers/misc/lkdtm/stackleak.c +++ b/drivers/misc/lkdtm/stackleak.c @@ -13,59 +13,67 @@ void lkdtm_STACKLEAK_ERASING(void) { - unsigned long *sp, left, found, i; - const unsigned long check_depth = - STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long); + const unsigned long task_stack_base = (unsigned long)task_stack_page(current); + const unsigned long task_stack_low = stackleak_task_low_bound(current); + const unsigned long task_stack_high = stackleak_task_high_bound(current); + const unsigned long current_sp = current_stack_pointer; + const unsigned long lowest_sp = current->lowest_stack; + unsigned long untracked_high; + unsigned long poison_high, poison_low; bool test_failed = false; /* - * For the details about the alignment of the poison values, see - * the comment in stackleak_track_stack(). + * Depending on what has run prior to this test, the lowest recorded + * stack pointer could be above or below the current stack pointer. + * Start from the lowest of the two. + * + * Poison values are naturally-aligned unsigned longs. As the current + * stack pointer might not be sufficiently aligned, we must align + * downwards to find the lowest known stack pointer value. This is the + * high boundary for a portion of the stack which may have been used + * without being tracked, and has to be scanned for poison. */ - sp = PTR_ALIGN(&i, sizeof(unsigned long)); - - left = ((unsigned long)sp & (THREAD_SIZE - 1)) / sizeof(unsigned long); - sp--; + untracked_high = min(current_sp, lowest_sp); + untracked_high = ALIGN_DOWN(untracked_high, sizeof(unsigned long)); /* - * One 'long int' at the bottom of the thread stack is reserved - * and not poisoned. + * Find the top of the poison in the same way as the erasing code. */ - if (left > 1) { - left--; - } else { - pr_err("FAIL: not enough stack space for the test\n"); - test_failed = true; - goto end; - } - - pr_info("checking unused part of the thread stack (%lu bytes)...\n", - left * sizeof(unsigned long)); + poison_high = stackleak_find_top_of_poison(task_stack_low, untracked_high); /* - * Search for 'check_depth' poison values in a row (just like - * stackleak_erase() does). + * Check whether the poisoned portion of the stack (if any) consists + * entirely of poison. This verifies the entries that + * stackleak_find_top_of_poison() should have checked. */ - for (i = 0, found = 0; i < left && found <= check_depth; i++) { - if (*(sp - i) == STACKLEAK_POISON) - found++; - else - found = 0; - } + poison_low = poison_high; + while (poison_low > task_stack_low) { + poison_low -= sizeof(unsigned long); - pr_info("the erased part begins after %lu not poisoned bytes\n", - (i - found) * sizeof(unsigned long)); + if (*(unsigned long *)poison_low == STACKLEAK_POISON) + continue; - /* The rest of thread stack should be erased */ - for (; i < left; i++) { - if (*(sp - i) != STACKLEAK_POISON) { - pr_err("FAIL: bad value number %lu in the erased part: 0x%lx\n", - i, *(sp - i)); - test_failed = true; - } + pr_err("FAIL: non-poison value %lu bytes below poison boundary: 0x%lx\n", + poison_high - poison_low, *(unsigned long *)poison_low); + test_failed = true; } -end: + pr_info("stackleak stack usage:\n" + " high offset: %lu bytes\n" + " current: %lu bytes\n" + " lowest: %lu bytes\n" + " tracked: %lu bytes\n" + " untracked: %lu bytes\n" + " poisoned: %lu bytes\n" + " low offset: %lu bytes\n", + task_stack_base + THREAD_SIZE - task_stack_high, + task_stack_high - current_sp, + task_stack_high - lowest_sp, + task_stack_high - untracked_high, + untracked_high - poison_high, + poison_high - task_stack_low, + task_stack_low - task_stack_base); + if (test_failed) { pr_err("FAIL: the thread stack is NOT properly erased!\n"); pr_expected_config(CONFIG_GCC_PLUGIN_STACKLEAK); -- cgit v1.2.3 From f03a50938decaa601f02294e4d057fb0048ca9ca Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 27 Apr 2022 18:31:25 +0100 Subject: lkdtm/stackleak: prevent unexpected stack usage The lkdtm_STACKLEAK_ERASING() test is instrumentable and runs with IRQs unmasked, so it's possible for unrelated code to clobber the task stack and/or manipulate current->lowest_stack while the test is running, resulting in spurious failures. The regular stackleak erasing code is non-instrumentable and runs with IRQs masked, preventing similar issues. Make the body of the test non-instrumentable, and run it with IRQs masked, avoiding such spurious failures. Signed-off-by: Mark Rutland Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Catalin Marinas Cc: Kees Cook Cc: Will Deacon Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220427173128.2603085-11-mark.rutland@arm.com --- drivers/misc/lkdtm/stackleak.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/misc/lkdtm/stackleak.c b/drivers/misc/lkdtm/stackleak.c index 0aafa46ced7d..46c60761a05e 100644 --- a/drivers/misc/lkdtm/stackleak.c +++ b/drivers/misc/lkdtm/stackleak.c @@ -11,7 +11,20 @@ #include "lkdtm.h" #include -void lkdtm_STACKLEAK_ERASING(void) +/* + * Check that stackleak tracks the lowest stack pointer and erases the stack + * below this as expected. + * + * To prevent the lowest stack pointer changing during the test, IRQs are + * masked and instrumentation of this function is disabled. We assume that the + * compiler will create a fixed-size stack frame for this function. + * + * Any non-inlined function may make further use of the stack, altering the + * lowest stack pointer and/or clobbering poison values. To avoid spurious + * failures we must avoid printing until the end of the test or have already + * encountered a failure condition. + */ +static void noinstr check_stackleak_irqoff(void) { const unsigned long task_stack_base = (unsigned long)task_stack_page(current); const unsigned long task_stack_low = stackleak_task_low_bound(current); @@ -81,3 +94,12 @@ void lkdtm_STACKLEAK_ERASING(void) pr_info("OK: the rest of the thread stack is properly erased\n"); } } + +void lkdtm_STACKLEAK_ERASING(void) +{ + unsigned long flags; + + local_irq_save(flags); + check_stackleak_irqoff(); + local_irq_restore(flags); +} -- cgit v1.2.3 From f171d695f3adfd8093272fa5776ef4a2c6b9cf08 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 27 Apr 2022 18:31:26 +0100 Subject: lkdtm/stackleak: check stack boundaries The stackleak code relies upon the current SP and lowest recorded SP falling within expected task stack boundaries. Check this at the start of the test. Signed-off-by: Mark Rutland Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Catalin Marinas Cc: Kees Cook Cc: Will Deacon Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220427173128.2603085-12-mark.rutland@arm.com --- drivers/misc/lkdtm/stackleak.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/misc/lkdtm/stackleak.c b/drivers/misc/lkdtm/stackleak.c index 46c60761a05e..52800583fd05 100644 --- a/drivers/misc/lkdtm/stackleak.c +++ b/drivers/misc/lkdtm/stackleak.c @@ -35,6 +35,25 @@ static void noinstr check_stackleak_irqoff(void) unsigned long poison_high, poison_low; bool test_failed = false; + /* + * Check that the current and lowest recorded stack pointer values fall + * within the expected task stack boundaries. These tests should never + * fail unless the boundaries are incorrect or we're clobbering the + * STACK_END_MAGIC, and in either casee something is seriously wrong. + */ + if (current_sp < task_stack_low || current_sp >= task_stack_high) { + pr_err("FAIL: current_stack_pointer (0x%lx) outside of task stack bounds [0x%lx..0x%lx]\n", + current_sp, task_stack_low, task_stack_high - 1); + test_failed = true; + goto out; + } + if (lowest_sp < task_stack_low || lowest_sp >= task_stack_high) { + pr_err("FAIL: current->lowest_stack (0x%lx) outside of task stack bounds [0x%lx..0x%lx]\n", + lowest_sp, task_stack_low, task_stack_high - 1); + test_failed = true; + goto out; + } + /* * Depending on what has run prior to this test, the lowest recorded * stack pointer could be above or below the current stack pointer. @@ -87,6 +106,7 @@ static void noinstr check_stackleak_irqoff(void) poison_high - task_stack_low, task_stack_low - task_stack_base); +out: if (test_failed) { pr_err("FAIL: the thread stack is NOT properly erased!\n"); pr_expected_config(CONFIG_GCC_PLUGIN_STACKLEAK); -- cgit v1.2.3 From 8111e67dee9ff774712cff8e34fba465c8361960 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 27 Apr 2022 18:31:27 +0100 Subject: stackleak: add on/off stack variants The stackleak_erase() code dynamically handles being on a task stack or another stack. In most cases, this is a fixed property of the caller, which the caller is aware of, as an architecture might always return using the task stack, or might always return using a trampoline stack. This patch adds stackleak_erase_on_task_stack() and stackleak_erase_off_task_stack() functions which callers can use to avoid on_thread_stack() check and associated redundant work when the calling stack is known. The existing stackleak_erase() is retained as a safe default. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220427173128.2603085-13-mark.rutland@arm.com --- kernel/stackleak.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/kernel/stackleak.c b/kernel/stackleak.c index afd54b8e10b8..c2c33d2202e9 100644 --- a/kernel/stackleak.c +++ b/kernel/stackleak.c @@ -70,7 +70,7 @@ late_initcall(stackleak_sysctls_init); #define skip_erasing() false #endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */ -static __always_inline void __stackleak_erase(void) +static __always_inline void __stackleak_erase(bool on_task_stack) { const unsigned long task_stack_low = stackleak_task_low_bound(current); const unsigned long task_stack_high = stackleak_task_high_bound(current); @@ -96,7 +96,7 @@ static __always_inline void __stackleak_erase(void) * function has a fixed-size stack frame, and the current stack pointer * doesn't change while we write poison. */ - if (on_thread_stack()) + if (on_task_stack) erase_high = current_stack_pointer; else erase_high = task_stack_high; @@ -110,12 +110,41 @@ static __always_inline void __stackleak_erase(void) current->lowest_stack = task_stack_high; } +/* + * Erase and poison the portion of the task stack used since the last erase. + * Can be called from the task stack or an entry stack when the task stack is + * no longer in use. + */ asmlinkage void noinstr stackleak_erase(void) { if (skip_erasing()) return; - __stackleak_erase(); + __stackleak_erase(on_thread_stack()); +} + +/* + * Erase and poison the portion of the task stack used since the last erase. + * Can only be called from the task stack. + */ +asmlinkage void noinstr stackleak_erase_on_task_stack(void) +{ + if (skip_erasing()) + return; + + __stackleak_erase(true); +} + +/* + * Erase and poison the portion of the task stack used since the last erase. + * Can only be called from a stack other than the task stack. + */ +asmlinkage void noinstr stackleak_erase_off_task_stack(void) +{ + if (skip_erasing()) + return; + + __stackleak_erase(false); } void __used __no_caller_saved_registers noinstr stackleak_track_stack(void) -- cgit v1.2.3 From 88959a39a1708fc755a89923d60e50de74847bc9 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 27 Apr 2022 18:31:28 +0100 Subject: arm64: entry: use stackleak_erase_on_task_stack() On arm64 we always call stackleak_erase() on a task stack, and never call it on another stack. We can avoid some redundant work by using stackleak_erase_on_task_stack(), telling the stackleak code that it's being called on a task stack. Signed-off-by: Mark Rutland Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Catalin Marinas Cc: Kees Cook Cc: Will Deacon Acked-by: Catalin Marinas Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220427173128.2603085-14-mark.rutland@arm.com --- arch/arm64/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index ede028dee81b..5b82b9292400 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -596,7 +596,7 @@ SYM_CODE_START_LOCAL(ret_to_user) ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step enable_step_tsk x19, x2 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK - bl stackleak_erase + bl stackleak_erase_on_task_stack #endif kernel_exit 0 SYM_CODE_END(ret_to_user) -- cgit v1.2.3 From 8c6a490e404fe82b53bbafe0f0eeef10605617f7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 6 May 2022 13:11:45 +0100 Subject: lkdtm/stackleak: fix CONFIG_GCC_PLUGIN_STACKLEAK=n Recent rework broke building LKDTM when CONFIG_GCC_PLUGIN_STACKLEAK=n. This patch fixes that breakage. Prior to recent stackleak rework, the LKDTM STACKLEAK_ERASING code could be built when the kernel was not built with stackleak support, and would run a test that would almost certainly fail (or pass by sheer cosmic coincidence), e.g. | # echo STACKLEAK_ERASING > /sys/kernel/debug/provoke-crash/DIRECT | lkdtm: Performing direct entry STACKLEAK_ERASING | lkdtm: checking unused part of the thread stack (15560 bytes)... | lkdtm: FAIL: the erased part is not found (checked 15560 bytes) | lkdtm: FAIL: the thread stack is NOT properly erased! | lkdtm: This is probably expected, since this kernel (5.18.0-rc2 aarch64) was built *without* CONFIG_GCC_PLUGIN_STACKLEAK=y The recent rework to the test made it more accurate by using helpers which are only defined when CONFIG_GCC_PLUGIN_STACKLEAK=y, and so when building LKDTM when CONFIG_GCC_PLUGIN_STACKLEAK=n, we get a build failure: | drivers/misc/lkdtm/stackleak.c: In function 'check_stackleak_irqoff': | drivers/misc/lkdtm/stackleak.c:30:46: error: implicit declaration of function 'stackleak_task_low_bound' [-Werror=implicit-function-declaration] | 30 | const unsigned long task_stack_low = stackleak_task_low_bound(current); | | ^~~~~~~~~~~~~~~~~~~~~~~~ | drivers/misc/lkdtm/stackleak.c:31:47: error: implicit declaration of function 'stackleak_task_high_bound'; did you mean 'stackleak_task_init'? [-Werror=implicit-function-declaration] | 31 | const unsigned long task_stack_high = stackleak_task_high_bound(current); | | ^~~~~~~~~~~~~~~~~~~~~~~~~ | | stackleak_task_init | drivers/misc/lkdtm/stackleak.c:33:48: error: 'struct task_struct' has no member named 'lowest_stack' | 33 | const unsigned long lowest_sp = current->lowest_stack; | | ^~ | drivers/misc/lkdtm/stackleak.c:74:23: error: implicit declaration of function 'stackleak_find_top_of_poison' [-Werror=implicit-function-declaration] | 74 | poison_high = stackleak_find_top_of_poison(task_stack_low, untracked_high); | | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ This patch fixes the issue by not compiling the body of the test when CONFIG_GCC_PLUGIN_STACKLEAK=n, and replacing this with an unconditional XFAIL message. This means the pr_expected_config() in check_stackleak_irqoff() is redundant, and so it is removed. Where an architecture does not support stackleak, the test will log: | # echo STACKLEAK_ERASING > /sys/kernel/debug/provoke-crash/DIRECT | lkdtm: Performing direct entry STACKLEAK_ERASING | lkdtm: XFAIL: stackleak is not supported on this arch (HAVE_ARCH_STACKLEAK=n) Where an architectures does support stackleak, but this has not been compiled in, the test will log: | # echo STACKLEAK_ERASING > /sys/kernel/debug/provoke-crash/DIRECT | lkdtm: Performing direct entry STACKLEAK_ERASING | lkdtm: XFAIL: stackleak is not enabled (CONFIG_GCC_PLUGIN_STACKLEAK=n) Where stackleak has been compiled in, the test behaves as usual: | # echo STACKLEAK_ERASING > /sys/kernel/debug/provoke-crash/DIRECT | lkdtm: Performing direct entry STACKLEAK_ERASING | lkdtm: stackleak stack usage: | high offset: 336 bytes | current: 688 bytes | lowest: 1232 bytes | tracked: 1232 bytes | untracked: 672 bytes | poisoned: 14136 bytes | low offset: 8 bytes | lkdtm: OK: the rest of the thread stack is properly erased Fixes: f4cfacd92972cc44 ("lkdtm/stackleak: rework boundary management") Signed-off-by: Mark Rutland Cc: Alexander Popov Cc: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220506121145.1162908-1-mark.rutland@arm.com --- drivers/misc/lkdtm/stackleak.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/misc/lkdtm/stackleak.c b/drivers/misc/lkdtm/stackleak.c index 52800583fd05..82369c6f889e 100644 --- a/drivers/misc/lkdtm/stackleak.c +++ b/drivers/misc/lkdtm/stackleak.c @@ -11,6 +11,7 @@ #include "lkdtm.h" #include +#if defined(CONFIG_GCC_PLUGIN_STACKLEAK) /* * Check that stackleak tracks the lowest stack pointer and erases the stack * below this as expected. @@ -109,7 +110,6 @@ static void noinstr check_stackleak_irqoff(void) out: if (test_failed) { pr_err("FAIL: the thread stack is NOT properly erased!\n"); - pr_expected_config(CONFIG_GCC_PLUGIN_STACKLEAK); } else { pr_info("OK: the rest of the thread stack is properly erased\n"); } @@ -123,3 +123,13 @@ void lkdtm_STACKLEAK_ERASING(void) check_stackleak_irqoff(); local_irq_restore(flags); } +#else /* defined(CONFIG_GCC_PLUGIN_STACKLEAK) */ +void lkdtm_STACKLEAK_ERASING(void) +{ + if (IS_ENABLED(CONFIG_HAVE_ARCH_STACKLEAK)) { + pr_err("XFAIL: stackleak is not enabled (CONFIG_GCC_PLUGIN_STACKLEAK=n)\n"); + } else { + pr_err("XFAIL: stackleak is not supported on this arch (HAVE_ARCH_STACKLEAK=n)\n"); + } +} +#endif /* defined(CONFIG_GCC_PLUGIN_STACKLEAK) */ -- cgit v1.2.3 From 1ff297584fad2eef390f212b860e0fbb7363e0e8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 10 May 2022 10:29:36 -0700 Subject: randomize_kstack: Improve docs on requirements/rationale There were some recent questions about where and why to use the random_kstack routines when applying them to new architectures[1]. Update the header comments to reflect the design choices for the routines. [1] https://lore.kernel.org/lkml/1652173338.7bltwybi0c.astroid@bobo.none Cc: Nicholas Piggin Cc: Xiu Jianfeng Signed-off-by: Kees Cook --- include/linux/randomize_kstack.h | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index 1468caf001c0..5d868505a94e 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -40,10 +40,14 @@ DECLARE_PER_CPU(u32, kstack_offset); */ #define KSTACK_OFFSET_MAX(x) ((x) & 0x3FF) -/* - * These macros must be used during syscall entry when interrupts and +/** + * add_random_kstack_offset - Increase stack utilization by previously + * chosen random offset + * + * This should be used in the syscall entry path when interrupts and * preempt are disabled, and after user registers have been stored to - * the stack. + * the stack. For testing the resulting entropy, please see: + * tools/testing/selftests/lkdtm/stack-entropy.sh */ #define add_random_kstack_offset() do { \ if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ @@ -55,6 +59,23 @@ DECLARE_PER_CPU(u32, kstack_offset); } \ } while (0) +/** + * choose_random_kstack_offset - Choose the random offset for the next + * add_random_kstack_offset() + * + * This should only be used during syscall exit when interrupts and + * preempt are disabled. This position in the syscall flow is done to + * frustrate attacks from userspace attempting to learn the next offset: + * - Maximize the timing uncertainty visible from userspace: if the + * offset is chosen at syscall entry, userspace has much more control + * over the timing between choosing offsets. "How long will we be in + * kernel mode?" tends to be more difficult to predict than "how long + * will we be in user mode?" + * - Reduce the lifetime of the new offset sitting in memory during + * kernel mode execution. Exposure of "thread-local" memory content + * (e.g. current, percpu, etc) tends to be easier than arbitrary + * location memory exposure. + */ #define choose_random_kstack_offset(rand) do { \ if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ &randomize_kstack_offset)) { \ -- cgit v1.2.3 From 61f60bac8c05f8ecd2ae2a6360520b91a45be9a2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 10 May 2022 16:25:54 -0700 Subject: gcc-plugins: Change all version strings match kernel It's not meaningful for the GCC plugins to track their versions separately from the rest of the kernel. Switch all versions to the kernel version. Fix mismatched indenting while we're at it. Cc: linux-hardening@vger.kernel.org Signed-off-by: Kees Cook --- scripts/gcc-plugins/Makefile | 9 +++++---- scripts/gcc-plugins/latent_entropy_plugin.c | 2 +- scripts/gcc-plugins/randomize_layout_plugin.c | 2 +- scripts/gcc-plugins/sancov_plugin.c | 2 +- scripts/gcc-plugins/stackleak_plugin.c | 2 +- scripts/gcc-plugins/structleak_plugin.c | 2 +- 6 files changed, 10 insertions(+), 9 deletions(-) diff --git a/scripts/gcc-plugins/Makefile b/scripts/gcc-plugins/Makefile index 148f4639cf09..6f0aecad5d67 100644 --- a/scripts/gcc-plugins/Makefile +++ b/scripts/gcc-plugins/Makefile @@ -28,10 +28,11 @@ GCC_PLUGINS_DIR = $(shell $(CC) -print-file-name=plugin) plugin_cxxflags = -Wp,-MMD,$(depfile) $(KBUILD_HOSTCXXFLAGS) -fPIC \ -include $(srctree)/include/linux/compiler-version.h \ - -I $(GCC_PLUGINS_DIR)/include -I $(obj) -std=gnu++11 \ - -fno-rtti -fno-exceptions -fasynchronous-unwind-tables \ - -ggdb -Wno-narrowing -Wno-unused-variable \ - -Wno-format-diag + -include $(objtree)/include/generated/utsrelease.h \ + -I $(GCC_PLUGINS_DIR)/include -I $(obj) -std=gnu++11 \ + -fno-rtti -fno-exceptions -fasynchronous-unwind-tables \ + -ggdb -Wno-narrowing -Wno-unused-variable \ + -Wno-format-diag plugin_ldflags = -shared diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c index 8425da41de0d..5d415b2572a8 100644 --- a/scripts/gcc-plugins/latent_entropy_plugin.c +++ b/scripts/gcc-plugins/latent_entropy_plugin.c @@ -82,7 +82,7 @@ __visible int plugin_is_GPL_compatible; static GTY(()) tree latent_entropy_decl; static struct plugin_info latent_entropy_plugin_info = { - .version = "201606141920vanilla", + .version = UTS_RELEASE, .help = "disable\tturn off latent entropy instrumentation\n", }; diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index c2ec81b68505..19214e573137 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -34,7 +34,7 @@ __visible int plugin_is_GPL_compatible; static int performance_mode; static struct plugin_info randomize_layout_plugin_info = { - .version = "201402201816vanilla", + .version = UTS_RELEASE, .help = "disable\t\t\tdo not activate plugin\n" "performance-mode\tenable cacheline-aware layout randomization\n" }; diff --git a/scripts/gcc-plugins/sancov_plugin.c b/scripts/gcc-plugins/sancov_plugin.c index 23bd023a283b..f3d629555b84 100644 --- a/scripts/gcc-plugins/sancov_plugin.c +++ b/scripts/gcc-plugins/sancov_plugin.c @@ -26,7 +26,7 @@ __visible int plugin_is_GPL_compatible; tree sancov_fndecl; static struct plugin_info sancov_plugin_info = { - .version = "20160402", + .version = UTS_RELEASE, .help = "sancov plugin\n", }; diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c index 42f0252ee2a4..de817d54b8af 100644 --- a/scripts/gcc-plugins/stackleak_plugin.c +++ b/scripts/gcc-plugins/stackleak_plugin.c @@ -44,7 +44,7 @@ static bool verbose = false; static GTY(()) tree track_function_decl; static struct plugin_info stackleak_plugin_info = { - .version = "201707101337", + .version = UTS_RELEASE, .help = "track-min-size=nn\ttrack stack for functions with a stack frame size >= nn bytes\n" "arch=target_arch\tspecify target build arch\n" "disable\t\tdo not activate the plugin\n" diff --git a/scripts/gcc-plugins/structleak_plugin.c b/scripts/gcc-plugins/structleak_plugin.c index 74e319288389..86b608a24ec0 100644 --- a/scripts/gcc-plugins/structleak_plugin.c +++ b/scripts/gcc-plugins/structleak_plugin.c @@ -37,7 +37,7 @@ __visible int plugin_is_GPL_compatible; static struct plugin_info structleak_plugin_info = { - .version = "20190125vanilla", + .version = UTS_RELEASE, .help = "disable\tdo not activate plugin\n" "byref\tinit structs passed by reference\n" "byref-all\tinit anything passed by reference\n" -- cgit v1.2.3 From c1298a3a1139c9a73a188fbb153b6eb83dbd4d7d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 8 May 2022 09:15:53 -0700 Subject: big_keys: Use struct for internal payload The randstruct GCC plugin gets upset when it sees struct path (which is randomized) being assigned from a "void *" (which it cannot type-check). There's no need for these casts, as the entire internal payload use is following a normal struct layout. Convert the enum-based void * offset dereferencing to the new big_key_payload struct. No meaningful machine code changes result after this change, and source readability is improved. Drop the randstruct exception now that there is no "confusing" cross-type assignment. Cc: David Howells Cc: Eric Biggers Cc: Christoph Hellwig Cc: Jarkko Sakkinen Cc: James Morris Cc: "Serge E. Hallyn" Cc: linux-hardening@vger.kernel.org Cc: keyrings@vger.kernel.org Cc: linux-security-module@vger.kernel.org Signed-off-by: Kees Cook --- scripts/gcc-plugins/randomize_layout_plugin.c | 2 - security/keys/big_key.c | 73 +++++++++++++-------------- 2 files changed, 36 insertions(+), 39 deletions(-) diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index 19214e573137..5836a7fc7532 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -50,8 +50,6 @@ static const struct whitelist_entry whitelist[] = { { "drivers/net/ethernet/sun/niu.c", "page", "address_space" }, /* unix_skb_parms via UNIXCB() buffer */ { "net/unix/af_unix.c", "unix_skb_parms", "char" }, - /* big_key payload.data struct splashing */ - { "security/keys/big_key.c", "path", "void *" }, { } }; diff --git a/security/keys/big_key.c b/security/keys/big_key.c index d17e5f09eeb8..c3367622c683 100644 --- a/security/keys/big_key.c +++ b/security/keys/big_key.c @@ -20,12 +20,13 @@ /* * Layout of key payload words. */ -enum { - big_key_data, - big_key_path, - big_key_path_2nd_part, - big_key_len, +struct big_key_payload { + u8 *data; + struct path path; + size_t length; }; +#define to_big_key_payload(payload) \ + (struct big_key_payload *)((payload).data) /* * If the data is under this limit, there's no point creating a shm file to @@ -55,7 +56,7 @@ struct key_type key_type_big_key = { */ int big_key_preparse(struct key_preparsed_payload *prep) { - struct path *path = (struct path *)&prep->payload.data[big_key_path]; + struct big_key_payload *payload = to_big_key_payload(prep->payload); struct file *file; u8 *buf, *enckey; ssize_t written; @@ -63,13 +64,15 @@ int big_key_preparse(struct key_preparsed_payload *prep) size_t enclen = datalen + CHACHA20POLY1305_AUTHTAG_SIZE; int ret; + BUILD_BUG_ON(sizeof(*payload) != sizeof(prep->payload.data)); + if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data) return -EINVAL; /* Set an arbitrary quota */ prep->quotalen = 16; - prep->payload.data[big_key_len] = (void *)(unsigned long)datalen; + payload->length = datalen; if (datalen > BIG_KEY_FILE_THRESHOLD) { /* Create a shmem file to store the data in. This will permit the data @@ -117,9 +120,9 @@ int big_key_preparse(struct key_preparsed_payload *prep) /* Pin the mount and dentry to the key so that we can open it again * later */ - prep->payload.data[big_key_data] = enckey; - *path = file->f_path; - path_get(path); + payload->data = enckey; + payload->path = file->f_path; + path_get(&payload->path); fput(file); kvfree_sensitive(buf, enclen); } else { @@ -129,7 +132,7 @@ int big_key_preparse(struct key_preparsed_payload *prep) if (!data) return -ENOMEM; - prep->payload.data[big_key_data] = data; + payload->data = data; memcpy(data, prep->data, prep->datalen); } return 0; @@ -148,12 +151,11 @@ error: */ void big_key_free_preparse(struct key_preparsed_payload *prep) { - if (prep->datalen > BIG_KEY_FILE_THRESHOLD) { - struct path *path = (struct path *)&prep->payload.data[big_key_path]; + struct big_key_payload *payload = to_big_key_payload(prep->payload); - path_put(path); - } - kfree_sensitive(prep->payload.data[big_key_data]); + if (prep->datalen > BIG_KEY_FILE_THRESHOLD) + path_put(&payload->path); + kfree_sensitive(payload->data); } /* @@ -162,13 +164,12 @@ void big_key_free_preparse(struct key_preparsed_payload *prep) */ void big_key_revoke(struct key *key) { - struct path *path = (struct path *)&key->payload.data[big_key_path]; + struct big_key_payload *payload = to_big_key_payload(key->payload); /* clear the quota */ key_payload_reserve(key, 0); - if (key_is_positive(key) && - (size_t)key->payload.data[big_key_len] > BIG_KEY_FILE_THRESHOLD) - vfs_truncate(path, 0); + if (key_is_positive(key) && payload->length > BIG_KEY_FILE_THRESHOLD) + vfs_truncate(&payload->path, 0); } /* @@ -176,17 +177,15 @@ void big_key_revoke(struct key *key) */ void big_key_destroy(struct key *key) { - size_t datalen = (size_t)key->payload.data[big_key_len]; - - if (datalen > BIG_KEY_FILE_THRESHOLD) { - struct path *path = (struct path *)&key->payload.data[big_key_path]; + struct big_key_payload *payload = to_big_key_payload(key->payload); - path_put(path); - path->mnt = NULL; - path->dentry = NULL; + if (payload->length > BIG_KEY_FILE_THRESHOLD) { + path_put(&payload->path); + payload->path.mnt = NULL; + payload->path.dentry = NULL; } - kfree_sensitive(key->payload.data[big_key_data]); - key->payload.data[big_key_data] = NULL; + kfree_sensitive(payload->data); + payload->data = NULL; } /* @@ -211,14 +210,14 @@ int big_key_update(struct key *key, struct key_preparsed_payload *prep) */ void big_key_describe(const struct key *key, struct seq_file *m) { - size_t datalen = (size_t)key->payload.data[big_key_len]; + struct big_key_payload *payload = to_big_key_payload(key->payload); seq_puts(m, key->description); if (key_is_positive(key)) seq_printf(m, ": %zu [%s]", - datalen, - datalen > BIG_KEY_FILE_THRESHOLD ? "file" : "buff"); + payload->length, + payload->length > BIG_KEY_FILE_THRESHOLD ? "file" : "buff"); } /* @@ -227,16 +226,16 @@ void big_key_describe(const struct key *key, struct seq_file *m) */ long big_key_read(const struct key *key, char *buffer, size_t buflen) { - size_t datalen = (size_t)key->payload.data[big_key_len]; + struct big_key_payload *payload = to_big_key_payload(key->payload); + size_t datalen = payload->length; long ret; if (!buffer || buflen < datalen) return datalen; if (datalen > BIG_KEY_FILE_THRESHOLD) { - struct path *path = (struct path *)&key->payload.data[big_key_path]; struct file *file; - u8 *buf, *enckey = (u8 *)key->payload.data[big_key_data]; + u8 *buf, *enckey = payload->data; size_t enclen = datalen + CHACHA20POLY1305_AUTHTAG_SIZE; loff_t pos = 0; @@ -244,7 +243,7 @@ long big_key_read(const struct key *key, char *buffer, size_t buflen) if (!buf) return -ENOMEM; - file = dentry_open(path, O_RDONLY, current_cred()); + file = dentry_open(&payload->path, O_RDONLY, current_cred()); if (IS_ERR(file)) { ret = PTR_ERR(file); goto error; @@ -274,7 +273,7 @@ error: kvfree_sensitive(buf, enclen); } else { ret = datalen; - memcpy(buffer, key->payload.data[big_key_data], datalen); + memcpy(buffer, payload->data, datalen); } return ret; -- cgit v1.2.3 From 2dcfe9e2d370f6643486e327c6ae17af8887756c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 9 May 2022 15:11:54 -0700 Subject: niu: Silence randstruct warnings Clang randstruct gets upset when it sees struct addresspace (which is randomized) being assigned to a struct page (which is not randomized): drivers/net/ethernet/sun/niu.c:3385:12: error: casting from randomized structure pointer type 'struct address_space *' to 'struct page *' *link = (struct page *) page->mapping; ^ It looks like niu.c is looking for an in-line place to chain its allocated pages together and is overloading the "mapping" member, as it is unused. This is very non-standard, and is expected to be cleaned up in the future[1], but there is no "correct" way to handle it today. No meaningful machine code changes result after this change, and source readability is improved. Drop the randstruct exception now that there is no "confusing" cross-type assignment. [1] https://lore.kernel.org/lkml/YnqgjVoMDu5v9PNG@casper.infradead.org/ Cc: "Matthew Wilcox (Oracle)" Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Du Cheng Cc: Christophe JAILLET Cc: Vlastimil Babka Cc: William Kucharski Cc: Arnd Bergmann Cc: Nathan Chancellor Cc: netdev@vger.kernel.org Cc: linux-mm@kvack.org Cc: linux-hardening@vger.kernel.org Acked-by: Jakub Kicinski Link: https://lore.kernel.org/lkml/20220511151647.7290adbe@kernel.org Signed-off-by: Kees Cook --- drivers/net/ethernet/sun/niu.c | 41 ++++++++++++++++++++------- scripts/gcc-plugins/randomize_layout_plugin.c | 2 -- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 42460c0885fc..df70df29deea 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -35,6 +35,25 @@ #include "niu.h" +/* This driver wants to store a link to a "next page" within the + * page struct itself by overloading the content of the "mapping" + * member. This is not expected by the page API, but does currently + * work. However, the randstruct plugin gets very bothered by this + * case because "mapping" (struct address_space) is randomized, so + * casts to/from it trigger warnings. Hide this by way of a union, + * to create a typed alias of "mapping", since that's how it is + * actually being used here. + */ +union niu_page { + struct page page; + struct { + unsigned long __flags; /* unused alias of "flags" */ + struct list_head __lru; /* unused alias of "lru" */ + struct page *next; /* alias of "mapping" */ + }; +}; +#define niu_next_page(p) container_of(p, union niu_page, page)->next + #define DRV_MODULE_NAME "niu" #define DRV_MODULE_VERSION "1.1" #define DRV_MODULE_RELDATE "Apr 22, 2010" @@ -3283,7 +3302,7 @@ static struct page *niu_find_rxpage(struct rx_ring_info *rp, u64 addr, addr &= PAGE_MASK; pp = &rp->rxhash[h]; - for (; (p = *pp) != NULL; pp = (struct page **) &p->mapping) { + for (; (p = *pp) != NULL; pp = &niu_next_page(p)) { if (p->index == addr) { *link = pp; goto found; @@ -3300,7 +3319,7 @@ static void niu_hash_page(struct rx_ring_info *rp, struct page *page, u64 base) unsigned int h = niu_hash_rxaddr(rp, base); page->index = base; - page->mapping = (struct address_space *) rp->rxhash[h]; + niu_next_page(page) = rp->rxhash[h]; rp->rxhash[h] = page; } @@ -3382,11 +3401,11 @@ static int niu_rx_pkt_ignore(struct niu *np, struct rx_ring_info *rp) rcr_size = rp->rbr_sizes[(val & RCR_ENTRY_PKTBUFSZ) >> RCR_ENTRY_PKTBUFSZ_SHIFT]; if ((page->index + PAGE_SIZE) - rcr_size == addr) { - *link = (struct page *) page->mapping; + *link = niu_next_page(page); np->ops->unmap_page(np->device, page->index, PAGE_SIZE, DMA_FROM_DEVICE); page->index = 0; - page->mapping = NULL; + niu_next_page(page) = NULL; __free_page(page); rp->rbr_refill_pending++; } @@ -3451,11 +3470,11 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np, niu_rx_skb_append(skb, page, off, append_size, rcr_size); if ((page->index + rp->rbr_block_size) - rcr_size == addr) { - *link = (struct page *) page->mapping; + *link = niu_next_page(page); np->ops->unmap_page(np->device, page->index, PAGE_SIZE, DMA_FROM_DEVICE); page->index = 0; - page->mapping = NULL; + niu_next_page(page) = NULL; rp->rbr_refill_pending++; } else get_page(page); @@ -3518,13 +3537,13 @@ static void niu_rbr_free(struct niu *np, struct rx_ring_info *rp) page = rp->rxhash[i]; while (page) { - struct page *next = (struct page *) page->mapping; + struct page *next = niu_next_page(page); u64 base = page->index; np->ops->unmap_page(np->device, base, PAGE_SIZE, DMA_FROM_DEVICE); page->index = 0; - page->mapping = NULL; + niu_next_page(page) = NULL; __free_page(page); @@ -6440,8 +6459,7 @@ static void niu_reset_buffers(struct niu *np) page = rp->rxhash[j]; while (page) { - struct page *next = - (struct page *) page->mapping; + struct page *next = niu_next_page(page); u64 base = page->index; base = base >> RBR_DESCR_ADDR_SHIFT; rp->rbr[k++] = cpu_to_le32(base); @@ -10176,6 +10194,9 @@ static int __init niu_init(void) BUILD_BUG_ON(PAGE_SIZE < 4 * 1024); + BUILD_BUG_ON(offsetof(struct page, mapping) != + offsetof(union niu_page, next)); + niu_debug = netif_msg_init(debug, NIU_MSG_DEFAULT); #ifdef CONFIG_SPARC64 diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index 5836a7fc7532..c9d345a91c41 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -46,8 +46,6 @@ struct whitelist_entry { }; static const struct whitelist_entry whitelist[] = { - /* NIU overloads mapping with page struct */ - { "drivers/net/ethernet/sun/niu.c", "page", "address_space" }, /* unix_skb_parms via UNIXCB() buffer */ { "net/unix/af_unix.c", "unix_skb_parms", "char" }, { } -- cgit v1.2.3 From b146cbf2e32f01f56244d670aef2f43d44fcf120 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 10 May 2022 15:46:26 -0700 Subject: af_unix: Silence randstruct GCC plugin warning While preparing for Clang randstruct support (which duplicated many of the warnings the randstruct GCC plugin warned about), one strange one remained only for the randstruct GCC plugin. Eliminating this rids the plugin of the last exception. It seems the plugin is happy to dereference individual members of a cross-struct cast, but it is upset about casting to a whole object pointer. This only manifests in one place in the kernel, so just replace the variable with individual member accesses. There is no change in executable instruction output. Drop the last exception from the randstruct GCC plugin. Cc: "David S. Miller" Cc: Christoph Hellwig Cc: Paolo Abeni Cc: Alexei Starovoitov Cc: Cong Wang Cc: Al Viro Cc: netdev@vger.kernel.org Cc: linux-hardening@vger.kernel.org Acked-by: Kuniyuki Iwashima Link: https://lore.kernel.org/lkml/20220511022217.58586-1-kuniyu@amazon.co.jp Acked-by: Jakub Kicinski Link: https://lore.kernel.org/lkml/20220511151542.4cb3ff17@kernel.org Signed-off-by: Kees Cook --- net/unix/af_unix.c | 8 +++----- scripts/gcc-plugins/randomize_layout_plugin.c | 2 -- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e71a312faa1e..36367e7e3e0a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1808,11 +1808,9 @@ static int maybe_init_creds(struct scm_cookie *scm, static bool unix_skb_scm_eq(struct sk_buff *skb, struct scm_cookie *scm) { - const struct unix_skb_parms *u = &UNIXCB(skb); - - return u->pid == scm->pid && - uid_eq(u->uid, scm->creds.uid) && - gid_eq(u->gid, scm->creds.gid) && + return UNIXCB(skb).pid == scm->pid && + uid_eq(UNIXCB(skb).uid, scm->creds.uid) && + gid_eq(UNIXCB(skb).gid, scm->creds.gid) && unix_secdata_eq(scm, skb); } diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index c9d345a91c41..2ca768d88a68 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -46,8 +46,6 @@ struct whitelist_entry { }; static const struct whitelist_entry whitelist[] = { - /* unix_skb_parms via UNIXCB() buffer */ - { "net/unix/af_unix.c", "unix_skb_parms", "char" }, { } }; -- cgit v1.2.3 From 710e4ebfbacac53b05c86a01e6d636c69f6eca9f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 10 May 2022 16:22:45 -0700 Subject: gcc-plugins: randstruct: Remove cast exception handling With all randstruct exceptions removed, remove all the exception handling code. Any future warnings are likely to be shared between this plugin and Clang randstruct, and will need to be addressed in a more wholistic fashion. Cc: Christoph Hellwig Cc: linux-hardening@vger.kernel.org Signed-off-by: Kees Cook --- scripts/gcc-plugins/randomize_layout_plugin.c | 79 +-------------------------- 1 file changed, 3 insertions(+), 76 deletions(-) diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index 2ca768d88a68..ea2aea570404 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -39,16 +39,6 @@ static struct plugin_info randomize_layout_plugin_info = { "performance-mode\tenable cacheline-aware layout randomization\n" }; -struct whitelist_entry { - const char *pathname; - const char *lhs; - const char *rhs; -}; - -static const struct whitelist_entry whitelist[] = { - { } -}; - /* from old Linux dcache.h */ static inline unsigned long partial_name_hash(unsigned long c, unsigned long prevhash) @@ -734,60 +724,6 @@ static void handle_local_var_initializers(void) } } -static bool type_name_eq(gimple stmt, const_tree type_tree, const char *wanted_name) -{ - const char *type_name; - - if (type_tree == NULL_TREE) - return false; - - switch (TREE_CODE(type_tree)) { - case RECORD_TYPE: - type_name = TYPE_NAME_POINTER(type_tree); - break; - case INTEGER_TYPE: - if (TYPE_PRECISION(type_tree) == CHAR_TYPE_SIZE) - type_name = "char"; - else { - INFORM(gimple_location(stmt), "found non-char INTEGER_TYPE cast comparison: %qT\n", type_tree); - debug_tree(type_tree); - return false; - } - break; - case POINTER_TYPE: - if (TREE_CODE(TREE_TYPE(type_tree)) == VOID_TYPE) { - type_name = "void *"; - break; - } else { - INFORM(gimple_location(stmt), "found non-void POINTER_TYPE cast comparison %qT\n", type_tree); - debug_tree(type_tree); - return false; - } - default: - INFORM(gimple_location(stmt), "unhandled cast comparison: %qT\n", type_tree); - debug_tree(type_tree); - return false; - } - - return strcmp(type_name, wanted_name) == 0; -} - -static bool whitelisted_cast(gimple stmt, const_tree lhs_tree, const_tree rhs_tree) -{ - const struct whitelist_entry *entry; - expanded_location xloc = expand_location(gimple_location(stmt)); - - for (entry = whitelist; entry->pathname; entry++) { - if (!strstr(xloc.file, entry->pathname)) - continue; - - if (type_name_eq(stmt, lhs_tree, entry->lhs) && type_name_eq(stmt, rhs_tree, entry->rhs)) - return true; - } - - return false; -} - /* * iterate over all statements to find "bad" casts: * those where the address of the start of a structure is cast @@ -864,10 +800,7 @@ static unsigned int find_bad_casts_execute(void) #ifndef __DEBUG_PLUGIN if (lookup_attribute("randomize_performed", TYPE_ATTRIBUTES(ptr_lhs_type))) #endif - { - if (!whitelisted_cast(stmt, ptr_lhs_type, ptr_rhs_type)) - MISMATCH(gimple_location(stmt), "rhs", ptr_lhs_type, ptr_rhs_type); - } + MISMATCH(gimple_location(stmt), "rhs", ptr_lhs_type, ptr_rhs_type); continue; } @@ -890,10 +823,7 @@ static unsigned int find_bad_casts_execute(void) #ifndef __DEBUG_PLUGIN if (lookup_attribute("randomize_performed", TYPE_ATTRIBUTES(op0_type))) #endif - { - if (!whitelisted_cast(stmt, ptr_lhs_type, op0_type)) - MISMATCH(gimple_location(stmt), "op0", ptr_lhs_type, op0_type); - } + MISMATCH(gimple_location(stmt), "op0", ptr_lhs_type, op0_type); } else { const_tree ssa_name_var = SSA_NAME_VAR(rhs1); /* skip bogus type casts introduced by container_of */ @@ -903,10 +833,7 @@ static unsigned int find_bad_casts_execute(void) #ifndef __DEBUG_PLUGIN if (lookup_attribute("randomize_performed", TYPE_ATTRIBUTES(ptr_rhs_type))) #endif - { - if (!whitelisted_cast(stmt, ptr_lhs_type, ptr_rhs_type)) - MISMATCH(gimple_location(stmt), "ssa", ptr_lhs_type, ptr_rhs_type); - } + MISMATCH(gimple_location(stmt), "ssa", ptr_lhs_type, ptr_rhs_type); } } -- cgit v1.2.3 From a5f4d9df1f7beaaebbaa5943ceb789c34f10b8d5 Mon Sep 17 00:00:00 2001 From: Yuanzheng Song Date: Thu, 5 May 2022 07:10:37 +0000 Subject: mm: usercopy: move the virt_addr_valid() below the is_vmalloc_addr() The is_kmap_addr() and the is_vmalloc_addr() in the check_heap_object() will not work, because the virt_addr_valid() will exclude the kmap and vmalloc regions. So let's move the virt_addr_valid() below the is_vmalloc_addr(). Signed-off-by: Yuanzheng Song Fixes: 4e140f59d285 ("mm/usercopy: Check kmap addresses properly") Fixes: 0aef499f3172 ("mm/usercopy: Detect vmalloc overruns") Cc: Matthew Wilcox (Oracle) Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220505071037.4121100-1-songyuanzheng@huawei.com --- mm/usercopy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index ac8a093e90c1..baeacc735b83 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -163,9 +163,6 @@ static inline void check_heap_object(const void *ptr, unsigned long n, { struct folio *folio; - if (!virt_addr_valid(ptr)) - return; - if (is_kmap_addr(ptr)) { unsigned long page_end = (unsigned long)ptr | (PAGE_SIZE - 1); @@ -190,6 +187,9 @@ static inline void check_heap_object(const void *ptr, unsigned long n, return; } + if (!virt_addr_valid(ptr)) + return; + folio = virt_to_folio(ptr); if (folio_test_slab(folio)) { -- cgit v1.2.3 From ed5edd5a70b9525085403f193786395179ea303d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 12 May 2022 08:20:14 +0200 Subject: loadpin: stop using bdevname Use the %pg format specifier to save on stack consuption and code size. Signed-off-by: Christoph Hellwig Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220512062014.1826835-1-hch@lst.de --- security/loadpin/loadpin.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c index b12f7d986b1e..ad4e6756c038 100644 --- a/security/loadpin/loadpin.c +++ b/security/loadpin/loadpin.c @@ -78,11 +78,8 @@ static void check_pinning_enforcement(struct super_block *mnt_sb) * device, allow sysctl to change modes for testing. */ if (mnt_sb->s_bdev) { - char bdev[BDEVNAME_SIZE]; - ro = bdev_read_only(mnt_sb->s_bdev); - bdevname(mnt_sb->s_bdev, bdev); - pr_info("%s (%u:%u): %s\n", bdev, + pr_info("%pg (%u:%u): %s\n", mnt_sb->s_bdev, MAJOR(mnt_sb->s_bdev->bd_dev), MINOR(mnt_sb->s_bdev->bd_dev), ro ? "read-only" : "writable"); -- cgit v1.2.3