diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-14 03:52:29 +0300 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-14 03:52:29 +0300 |
| commit | d568788baab24875604c231f723dbb72387fb081 (patch) | |
| tree | 0921e372d643541c59751e1af47b20fc1b702204 | |
| parent | cea4a90faf9e5d15aee1fd01883bc81ad7640260 (diff) | |
| parent | cf2f06f7152d5e38a87aa2e9b8b452714789f6ba (diff) | |
| download | linux-d568788baab24875604c231f723dbb72387fb081.tar.xz | |
Merge tag 'hardening-v7.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux
Pull hardening updates from Kees Cook:
- randomize_kstack: Improve implementation across arches (Ryan Roberts)
- lkdtm/fortify: Drop unneeded FORTIFY_STR_OBJECT test
- refcount: Remove unused __signed_wrap function annotations
* tag 'hardening-v7.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
lkdtm/fortify: Drop unneeded FORTIFY_STR_OBJECT test
refcount: Remove unused __signed_wrap function annotations
randomize_kstack: Unify random source across arches
randomize_kstack: Maintain kstack_offset per task
| -rw-r--r-- | arch/Kconfig | 5 | ||||
| -rw-r--r-- | arch/arm64/kernel/syscall.c | 11 | ||||
| -rw-r--r-- | arch/loongarch/kernel/syscall.c | 11 | ||||
| -rw-r--r-- | arch/powerpc/kernel/syscall.c | 16 | ||||
| -rw-r--r-- | arch/riscv/kernel/traps.c | 12 | ||||
| -rw-r--r-- | arch/s390/include/asm/entry-common.h | 8 | ||||
| -rw-r--r-- | arch/s390/kernel/syscall.c | 2 | ||||
| -rw-r--r-- | arch/x86/entry/syscall_32.c | 4 | ||||
| -rw-r--r-- | arch/x86/entry/syscall_64.c | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/entry-common.h | 12 | ||||
| -rw-r--r-- | drivers/misc/lkdtm/fortify.c | 36 | ||||
| -rw-r--r-- | include/linux/compiler_types.h | 9 | ||||
| -rw-r--r-- | include/linux/randomize_kstack.h | 54 | ||||
| -rw-r--r-- | include/linux/refcount.h | 10 | ||||
| -rw-r--r-- | init/main.c | 9 | ||||
| -rw-r--r-- | kernel/fork.c | 1 | ||||
| -rw-r--r-- | tools/testing/selftests/lkdtm/tests.txt | 1 |
17 files changed, 49 insertions, 154 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 84089e80584b..334b69505381 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1518,9 +1518,8 @@ config HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET def_bool n help An arch should select this symbol if it can support kernel stack - offset randomization with calls to add_random_kstack_offset() - during syscall entry and choose_random_kstack_offset() during - syscall exit. Careful removal of -fstack-protector-strong and + offset randomization with a call to add_random_kstack_offset() + during syscall entry. Careful removal of -fstack-protector-strong and -fstack-protector should also be applied to the entry code and closely examined, as the artificial stack bump looks like an array to the compiler, so it will attempt to add canary checks regardless diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index c062badd1a56..358ddfbf1401 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -52,17 +52,6 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno, } syscall_set_return_value(current, regs, 0, ret); - - /* - * This value will get limited by KSTACK_OFFSET_MAX(), which is 10 - * bits. The actual entropy will be further reduced by the compiler - * when applying stack alignment constraints: the AAPCS mandates a - * 16-byte aligned SP at function boundaries, which will remove the - * 4 low bits from any entropy chosen here. - * - * The resulting 6 bits of entropy is seen in SP[9:4]. - */ - choose_random_kstack_offset(get_random_u16()); } static inline bool has_syscall_work(unsigned long flags) diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c index 1249d82c1cd0..85da7e050d97 100644 --- a/arch/loongarch/kernel/syscall.c +++ b/arch/loongarch/kernel/syscall.c @@ -79,16 +79,5 @@ void noinstr __no_stack_protector do_syscall(struct pt_regs *regs) regs->regs[7], regs->regs[8], regs->regs[9]); } - /* - * This value will get limited by KSTACK_OFFSET_MAX(), which is 10 - * bits. The actual entropy will be further reduced by the compiler - * when applying stack alignment constraints: 16-bytes (i.e. 4-bits) - * aligned, which will remove the 4 low bits from any entropy chosen - * here. - * - * The resulting 6 bits of entropy is seen in SP[9:4]. - */ - choose_random_kstack_offset(get_cycles()); - syscall_exit_to_user_mode(regs); } diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c index be159ad4b77b..b762677f8737 100644 --- a/arch/powerpc/kernel/syscall.c +++ b/arch/powerpc/kernel/syscall.c @@ -20,8 +20,6 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0) kuap_lock(); - add_random_kstack_offset(); - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); @@ -30,6 +28,8 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0) CT_WARN_ON(ct_state() == CT_STATE_KERNEL); user_exit_irqoff(); + add_random_kstack_offset(); + BUG_ON(regs_is_unrecoverable(regs)); BUG_ON(!user_mode(regs)); BUG_ON(arch_irq_disabled_regs(regs)); @@ -173,17 +173,5 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0) } #endif - /* - * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), - * so the maximum stack offset is 1k bytes (10 bits). - * - * The actual entropy will be further reduced by the compiler when - * applying stack alignment constraints: the powerpc architecture - * may have two kinds of stack alignment (16-bytes and 8-bytes). - * - * So the resulting 6 or 7 bits of entropy is seen in SP[9:4] or SP[9:3]. - */ - choose_random_kstack_offset(mftb()); - return ret; } diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 5fb57fad188a..461279a7bd86 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -344,18 +344,6 @@ void do_trap_ecall_u(struct pt_regs *regs) syscall_handler(regs, syscall); } - /* - * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), - * so the maximum stack offset is 1k bytes (10 bits). - * - * The actual entropy will be further reduced by the compiler when - * applying stack alignment constraints: 16-byte (i.e. 4-bit) aligned - * for RV32I or RV64I. - * - * The resulting 6 bits of entropy is seen in SP[9:4]. - */ - choose_random_kstack_offset(get_random_u16()); - syscall_exit_to_user_mode(regs); } else { irqentry_state_t state = irqentry_nmi_enter(regs); diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h index 979af986a8fe..35450a485323 100644 --- a/arch/s390/include/asm/entry-common.h +++ b/arch/s390/include/asm/entry-common.h @@ -51,14 +51,6 @@ static __always_inline void arch_exit_to_user_mode(void) #define arch_exit_to_user_mode arch_exit_to_user_mode -static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, - unsigned long ti_work) -{ - choose_random_kstack_offset(get_tod_clock_fast()); -} - -#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare - static __always_inline bool arch_in_rcu_eqs(void) { if (IS_ENABLED(CONFIG_KVM)) diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index d103c853e120..75d5a3cab14e 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c @@ -97,8 +97,8 @@ void noinstr __do_syscall(struct pt_regs *regs, int per_trap) { unsigned long nr; - add_random_kstack_offset(); enter_from_user_mode(regs); + add_random_kstack_offset(); regs->psw = get_lowcore()->svc_old_psw; regs->int_code = get_lowcore()->svc_int_code; update_timer_sys(); diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 8e829575e12f..31b9492fe851 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -247,7 +247,6 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) { int nr = syscall_32_enter(regs); - add_random_kstack_offset(); /* * Subtlety here: if ptrace pokes something larger than 2^31-1 into * orig_ax, the int return value truncates it. This matches @@ -256,6 +255,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) nr = syscall_enter_from_user_mode(regs, nr); instrumentation_begin(); + add_random_kstack_offset(); do_syscall_32_irqs_on(regs, nr); instrumentation_end(); @@ -268,7 +268,6 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) int nr = syscall_32_enter(regs); int res; - add_random_kstack_offset(); /* * This cannot use syscall_enter_from_user_mode() as it has to * fetch EBP before invoking any of the syscall entry work @@ -277,6 +276,7 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) enter_from_user_mode(regs); instrumentation_begin(); + add_random_kstack_offset(); local_irq_enable(); /* Fetch EBP from where the vDSO stashed it. */ if (IS_ENABLED(CONFIG_X86_64)) { diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index b6e68ea98b83..71f032504e73 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -86,10 +86,10 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr) /* Returns true to return using SYSRET, or false to use IRET */ __visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr) { - add_random_kstack_offset(); nr = syscall_enter_from_user_mode(regs, nr); instrumentation_begin(); + add_random_kstack_offset(); if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) { /* Invalid system call, but still a system call. */ diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index ce3eb6d5fdf9..7535131c711b 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -82,18 +82,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, current_thread_info()->status &= ~(TS_COMPAT | TS_I386_REGS_POKED); #endif - /* - * This value will get limited by KSTACK_OFFSET_MAX(), which is 10 - * bits. The actual entropy will be further reduced by the compiler - * when applying stack alignment constraints (see cc_stack_align4/8 in - * arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32) - * low bits from any entropy chosen here. - * - * Therefore, final stack offset entropy will be 7 (x86_64) or - * 8 (ia32) bits. - */ - choose_random_kstack_offset(rdtsc()); - /* Avoid unnecessary reads of 'x86_ibpb_exit_to_user' */ if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER) && this_cpu_read(x86_ibpb_exit_to_user)) { diff --git a/drivers/misc/lkdtm/fortify.c b/drivers/misc/lkdtm/fortify.c index 00ed2147113e..7615a02dfc47 100644 --- a/drivers/misc/lkdtm/fortify.c +++ b/drivers/misc/lkdtm/fortify.c @@ -10,30 +10,6 @@ static volatile int fortify_scratch_space; -static void lkdtm_FORTIFY_STR_OBJECT(void) -{ - struct target { - char a[10]; - int foo; - } target[3] = {}; - /* - * Using volatile prevents the compiler from determining the value of - * 'size' at compile time. Without that, we would get a compile error - * rather than a runtime error. - */ - volatile int size = 20; - - pr_info("trying to strcmp() past the end of a struct\n"); - - strncpy(target[0].a, target[1].a, size); - - /* Store result to global to prevent the code from being eliminated */ - fortify_scratch_space = target[0].a[3]; - - pr_err("FAIL: fortify did not block a strncpy() object write overflow!\n"); - pr_expected_config(CONFIG_FORTIFY_SOURCE); -} - static void lkdtm_FORTIFY_STR_MEMBER(void) { struct target { @@ -47,22 +23,23 @@ static void lkdtm_FORTIFY_STR_MEMBER(void) if (!src) return; + /* 15 bytes: past end of a[] but not target. */ strscpy(src, "over ten bytes", size); size = strlen(src) + 1; - pr_info("trying to strncpy() past the end of a struct member...\n"); + pr_info("trying to strscpy() past the end of a struct member...\n"); /* - * strncpy(target.a, src, 20); will hit a compile error because the - * compiler knows at build time that target.a < 20 bytes. Use a + * strscpy(target.a, src, 15); will hit a compile error because the + * compiler knows at build time that target.a < 15 bytes. Use a * volatile to force a runtime error. */ - strncpy(target.a, src, size); + strscpy(target.a, src, size); /* Store result to global to prevent the code from being eliminated */ fortify_scratch_space = target.a[3]; - pr_err("FAIL: fortify did not block a strncpy() struct member write overflow!\n"); + pr_err("FAIL: fortify did not block a strscpy() struct member write overflow!\n"); pr_expected_config(CONFIG_FORTIFY_SOURCE); kfree(src); @@ -210,7 +187,6 @@ static void lkdtm_FORTIFY_STRSCPY(void) } static struct crashtype crashtypes[] = { - CRASHTYPE(FORTIFY_STR_OBJECT), CRASHTYPE(FORTIFY_STR_MEMBER), CRASHTYPE(FORTIFY_MEM_OBJECT), CRASHTYPE(FORTIFY_MEM_MEMBER), diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 890076d0974b..e8fd77593b68 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -432,18 +432,11 @@ struct ftrace_likely_data { #define at_least #endif -/* Do not trap wrapping arithmetic within an annotated function. */ -#ifdef CONFIG_UBSAN_INTEGER_WRAP -# define __signed_wrap __attribute__((no_sanitize("signed-integer-overflow"))) -#else -# define __signed_wrap -#endif - /* Section for code which can't be instrumented at all */ #define __noinstr_section(section) \ noinline notrace __attribute((__section__(section))) \ __no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage \ - __no_sanitize_memory __signed_wrap + __no_sanitize_memory #define noinstr __noinstr_section(".noinstr.text") diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index 1d982dbdd0d0..024fc20e7762 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -6,10 +6,10 @@ #include <linux/kernel.h> #include <linux/jump_label.h> #include <linux/percpu-defs.h> +#include <linux/prandom.h> DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, randomize_kstack_offset); -DECLARE_PER_CPU(u32, kstack_offset); /* * Do not use this anywhere else in the kernel. This is used here because @@ -46,53 +46,39 @@ DECLARE_PER_CPU(u32, kstack_offset); #define KSTACK_OFFSET_MAX(x) ((x) & 0b1111111100) #endif +DECLARE_PER_CPU(struct rnd_state, kstack_rnd_state); + +static __always_inline u32 get_kstack_offset(void) +{ + struct rnd_state *state; + u32 rnd; + + state = &get_cpu_var(kstack_rnd_state); + rnd = prandom_u32_state(state); + put_cpu_var(kstack_rnd_state); + + return rnd; +} + /** - * add_random_kstack_offset - Increase stack utilization by previously - * chosen random offset + * add_random_kstack_offset - Increase stack utilization by a random offset. * - * This should be used in the syscall entry path when interrupts and - * preempt are disabled, and after user registers have been stored to - * the stack. For testing the resulting entropy, please see: - * tools/testing/selftests/lkdtm/stack-entropy.sh + * This should be used in the syscall entry path after user registers have been + * stored to the stack. Preemption may be enabled. For testing the resulting + * entropy, please see: tools/testing/selftests/lkdtm/stack-entropy.sh */ #define add_random_kstack_offset() do { \ if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ &randomize_kstack_offset)) { \ - u32 offset = raw_cpu_read(kstack_offset); \ + u32 offset = get_kstack_offset(); \ u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset)); \ /* Keep allocation even after "ptr" loses scope. */ \ asm volatile("" :: "r"(ptr) : "memory"); \ } \ } while (0) -/** - * choose_random_kstack_offset - Choose the random offset for the next - * add_random_kstack_offset() - * - * This should only be used during syscall exit when interrupts and - * preempt are disabled. This position in the syscall flow is done to - * frustrate attacks from userspace attempting to learn the next offset: - * - Maximize the timing uncertainty visible from userspace: if the - * offset is chosen at syscall entry, userspace has much more control - * over the timing between choosing offsets. "How long will we be in - * kernel mode?" tends to be more difficult to predict than "how long - * will we be in user mode?" - * - Reduce the lifetime of the new offset sitting in memory during - * kernel mode execution. Exposure of "thread-local" memory content - * (e.g. current, percpu, etc) tends to be easier than arbitrary - * location memory exposure. - */ -#define choose_random_kstack_offset(rand) do { \ - if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ - &randomize_kstack_offset)) { \ - u32 offset = raw_cpu_read(kstack_offset); \ - offset = ror32(offset, 5) ^ (rand); \ - raw_cpu_write(kstack_offset, offset); \ - } \ -} while (0) #else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */ #define add_random_kstack_offset() do { } while (0) -#define choose_random_kstack_offset(rand) do { } while (0) #endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */ #endif diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 3da377ffb0c2..ba7657ced281 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -170,7 +170,7 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } -static inline __must_check __signed_wrap +static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp) { int old = refcount_read(r); @@ -212,7 +212,7 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) return __refcount_add_not_zero(i, r, NULL); } -static inline __must_check __signed_wrap +static inline __must_check bool __refcount_add_not_zero_limited_acquire(int i, refcount_t *r, int *oldp, int limit) { @@ -244,7 +244,7 @@ __refcount_inc_not_zero_limited_acquire(refcount_t *r, int *oldp, int limit) return __refcount_add_not_zero_limited_acquire(1, r, oldp, limit); } -static inline __must_check __signed_wrap +static inline __must_check bool __refcount_add_not_zero_acquire(int i, refcount_t *r, int *oldp) { return __refcount_add_not_zero_limited_acquire(i, r, oldp, INT_MAX); @@ -277,7 +277,7 @@ static inline __must_check bool refcount_add_not_zero_acquire(int i, refcount_t return __refcount_add_not_zero_acquire(i, r, NULL); } -static inline __signed_wrap +static inline void __refcount_add(int i, refcount_t *r, int *oldp) { int old = atomic_fetch_add_relaxed(i, &r->refs); @@ -383,7 +383,7 @@ static inline void refcount_inc(refcount_t *r) __refcount_inc(r, NULL); } -static inline __must_check __signed_wrap +static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) { int old = atomic_fetch_sub_release(i, &r->refs); diff --git a/init/main.c b/init/main.c index 1cb395dd94e4..c9638a6946dc 100644 --- a/init/main.c +++ b/init/main.c @@ -833,7 +833,14 @@ static inline void initcall_debug_enable(void) #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, randomize_kstack_offset); -DEFINE_PER_CPU(u32, kstack_offset); +DEFINE_PER_CPU(struct rnd_state, kstack_rnd_state); + +static int __init random_kstack_init(void) +{ + prandom_seed_full_state(&kstack_rnd_state); + return 0; +} +late_initcall(random_kstack_init); static int __init early_randomize_kstack_offset(char *buf) { diff --git a/kernel/fork.c b/kernel/fork.c index 55a6906d3014..e14970fbc4ee 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -96,6 +96,7 @@ #include <linux/thread_info.h> #include <linux/kstack_erase.h> #include <linux/kasan.h> +#include <linux/randomize_kstack.h> #include <linux/scs.h> #include <linux/io_uring.h> #include <linux/io_uring_types.h> diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index e62b85b591be..3245032db34d 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -82,7 +82,6 @@ STACKLEAK_ERASING OK: the rest of the thread stack is properly erased CFI_FORWARD_PROTO CFI_BACKWARD call trace:|ok: control flow unchanged FORTIFY_STRSCPY detected buffer overflow -FORTIFY_STR_OBJECT detected buffer overflow FORTIFY_STR_MEMBER detected buffer overflow FORTIFY_MEM_OBJECT detected buffer overflow FORTIFY_MEM_MEMBER detected field-spanning write |
