diff options
author | Mark Rutland <mark.rutland@arm.com> | 2016-11-03 23:23:13 +0300 |
---|---|---|
committer | Catalin Marinas <catalin.marinas@arm.com> | 2016-11-11 21:25:46 +0300 |
commit | c02433dd6de32f042cf3ffe476746b1115b8c096 (patch) | |
tree | 265e0b7925a4c665a64b5f8d4279d16dcb947ee2 /arch/arm64/kernel | |
parent | 1b7e2296a822dfd2349960addc42a139360ce769 (diff) | |
download | linux-c02433dd6de32f042cf3ffe476746b1115b8c096.tar.xz |
arm64: split thread_info from task stack
This patch moves arm64's struct thread_info from the task stack into
task_struct. This protects thread_info from corruption in the case of
stack overflows, and makes its address harder to determine if stack
addresses are leaked, making a number of attacks more difficult. Precise
detection and handling of overflow is left for subsequent patches.
Largely, this involves changing code to store the task_struct in sp_el0,
and acquire the thread_info from the task struct. Core code now
implements current_thread_info(), and as noted in <linux/sched.h> this
relies on offsetof(task_struct, thread_info) == 0, enforced by core
code.
This change means that the 'tsk' register used in entry.S now points to
a task_struct, rather than a thread_info as it used to. To make this
clear, the TI_* field offsets are renamed to TSK_TI_*, with asm-offsets
appropriately updated to account for the structural change.
Userspace clobbers sp_el0, and we can no longer restore this from the
stack. Instead, the current task is cached in a per-cpu variable that we
can safely access from early assembly as interrupts are disabled (and we
are thus not preemptible).
Both secondary entry and idle are updated to stash the sp and task
pointer separately.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: James Morse <james.morse@arm.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r-- | arch/arm64/kernel/asm-offsets.c | 8 | ||||
-rw-r--r-- | arch/arm64/kernel/entry.S | 41 | ||||
-rw-r--r-- | arch/arm64/kernel/head.S | 11 | ||||
-rw-r--r-- | arch/arm64/kernel/process.c | 16 | ||||
-rw-r--r-- | arch/arm64/kernel/smp.c | 2 |
5 files changed, 49 insertions, 29 deletions
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index d30b2321c0ee..c2dc9fa4f09b 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -36,9 +36,10 @@ int main(void) { DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); BLANK(); - DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); - DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); - DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); + DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); + DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); + DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit)); + DEFINE(TSK_STACK, offsetof(struct task_struct, stack)); BLANK(); DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context)); BLANK(); @@ -121,6 +122,7 @@ int main(void) DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); BLANK(); DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); + DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); BLANK(); #ifdef CONFIG_KVM_ARM_HOST DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 2d4c83bc1f81..6349a8324b4f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -90,9 +90,8 @@ .if \el == 0 mrs x21, sp_el0 - mov tsk, sp - and tsk, tsk, #~(THREAD_SIZE - 1) // Ensure MDSCR_EL1.SS is clear, - ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug + ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear, + ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug disable_step_tsk x19, x20 // exceptions when scheduling. mov x29, xzr // fp pointed to user-space @@ -100,10 +99,10 @@ add x21, sp, #S_FRAME_SIZE get_thread_info tsk /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */ - ldr x20, [tsk, #TI_ADDR_LIMIT] + ldr x20, [tsk, #TSK_TI_ADDR_LIMIT] str x20, [sp, #S_ORIG_ADDR_LIMIT] mov x20, #TASK_SIZE_64 - str x20, [tsk, #TI_ADDR_LIMIT] + str x20, [tsk, #TSK_TI_ADDR_LIMIT] /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */ .endif /* \el == 0 */ mrs x22, elr_el1 @@ -139,7 +138,7 @@ .if \el != 0 /* Restore the task's original addr_limit. */ ldr x20, [sp, #S_ORIG_ADDR_LIMIT] - str x20, [tsk, #TI_ADDR_LIMIT] + str x20, [tsk, #TSK_TI_ADDR_LIMIT] /* No need to restore UAO, it will be restored from SPSR_EL1 */ .endif @@ -192,13 +191,14 @@ alternative_else_nop_endif mov x19, sp // preserve the original sp /* - * Compare sp with the current thread_info, if the top - * ~(THREAD_SIZE - 1) bits match, we are on a task stack, and - * should switch to the irq stack. + * Compare sp with the base of the task stack. + * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack, + * and should switch to the irq stack. */ - and x25, x19, #~(THREAD_SIZE - 1) - cmp x25, tsk - b.ne 9998f + ldr x25, [tsk, TSK_STACK] + eor x25, x25, x19 + and x25, x25, #~(THREAD_SIZE - 1) + cbnz x25, 9998f adr_this_cpu x25, irq_stack, x26 mov x26, #IRQ_STACK_START_SP @@ -427,9 +427,9 @@ el1_irq: irq_handler #ifdef CONFIG_PREEMPT - ldr w24, [tsk, #TI_PREEMPT] // get preempt count + ldr w24, [tsk, #TSK_TI_PREEMPT] // get preempt count cbnz w24, 1f // preempt count != 0 - ldr x0, [tsk, #TI_FLAGS] // get flags + ldr x0, [tsk, #TSK_TI_FLAGS] // get flags tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? bl el1_preempt 1: @@ -444,7 +444,7 @@ ENDPROC(el1_irq) el1_preempt: mov x24, lr 1: bl preempt_schedule_irq // irq en/disable is done inside - ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS + ldr x0, [tsk, #TSK_TI_FLAGS] // get new tasks TI_FLAGS tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? ret x24 #endif @@ -674,8 +674,7 @@ ENTRY(cpu_switch_to) ldp x29, x9, [x8], #16 ldr lr, [x8] mov sp, x9 - and x9, x9, #~(THREAD_SIZE - 1) - msr sp_el0, x9 + msr sp_el0, x1 ret ENDPROC(cpu_switch_to) @@ -686,7 +685,7 @@ ENDPROC(cpu_switch_to) ret_fast_syscall: disable_irq // disable interrupts str x0, [sp, #S_X0] // returned x0 - ldr x1, [tsk, #TI_FLAGS] // re-check for syscall tracing + ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing and x2, x1, #_TIF_SYSCALL_WORK cbnz x2, ret_fast_syscall_trace and x2, x1, #_TIF_WORK_MASK @@ -706,14 +705,14 @@ work_pending: #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_on // enabled while in userspace #endif - ldr x1, [tsk, #TI_FLAGS] // re-check for single-step + ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step b finish_ret_to_user /* * "slow" syscall return path. */ ret_to_user: disable_irq // disable interrupts - ldr x1, [tsk, #TI_FLAGS] + ldr x1, [tsk, #TSK_TI_FLAGS] and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending finish_ret_to_user: @@ -746,7 +745,7 @@ el0_svc_naked: // compat entry point enable_dbg_and_irq ct_user_exit 1 - ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks + ldr x16, [tsk, #TSK_TI_FLAGS] // check for syscall hooks tst x16, #_TIF_SYSCALL_WORK b.ne __sys_trace cmp scno, sc_nr // check upper syscall limit diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 332e33193ccf..eaafb253bbfa 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -428,7 +428,8 @@ ENDPROC(__create_page_tables) __primary_switched: adrp x4, init_thread_union add sp, x4, #THREAD_SIZE - msr sp_el0, x4 // Save thread_info + adr_l x5, init_task + msr sp_el0, x5 // Save thread_info adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address @@ -699,10 +700,10 @@ __secondary_switched: isb adr_l x0, secondary_data - ldr x0, [x0, #CPU_BOOT_STACK] // get secondary_data.stack - mov sp, x0 - and x0, x0, #~(THREAD_SIZE - 1) - msr sp_el0, x0 // save thread_info + ldr x1, [x0, #CPU_BOOT_STACK] // get secondary_data.stack + mov sp, x1 + ldr x2, [x0, #CPU_BOOT_TASK] + msr sp_el0, x2 mov x29, #0 b secondary_start_kernel ENDPROC(__secondary_switched) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index ec7b9c00effe..a98b743631c5 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -45,6 +45,7 @@ #include <linux/personality.h> #include <linux/notifier.h> #include <trace/events/power.h> +#include <linux/percpu.h> #include <asm/alternative.h> #include <asm/compat.h> @@ -322,6 +323,20 @@ void uao_thread_switch(struct task_struct *next) } /* + * We store our current task in sp_el0, which is clobbered by userspace. Keep a + * shadow copy so that we can restore this upon entry from userspace. + * + * This is *only* for exception entry from EL0, and is not valid until we + * __switch_to() a user task. + */ +DEFINE_PER_CPU(struct task_struct *, __entry_task); + +static void entry_task_switch(struct task_struct *next) +{ + __this_cpu_write(__entry_task, next); +} + +/* * Thread switching. */ struct task_struct *__switch_to(struct task_struct *prev, @@ -333,6 +348,7 @@ struct task_struct *__switch_to(struct task_struct *prev, tls_thread_switch(next); hw_breakpoint_thread_switch(next); contextidr_thread_switch(next); + entry_task_switch(next); uao_thread_switch(next); /* diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6f42c68e457f..cb87234cfcf2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -149,6 +149,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) * We need to tell the secondary core where to find its stack and the * page tables. */ + secondary_data.task = idle; secondary_data.stack = task_stack_page(idle) + THREAD_START_SP; update_cpu_boot_status(CPU_MMU_OFF); __flush_dcache_area(&secondary_data, sizeof(secondary_data)); @@ -173,6 +174,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_err("CPU%u: failed to boot: %d\n", cpu, ret); } + secondary_data.task = NULL; secondary_data.stack = NULL; status = READ_ONCE(secondary_data.status); if (ret && status) { |