From 58284a901b426e6130672e9f14c30dfd5a9dbde0 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 13 Nov 2020 13:00:14 +0530 Subject: arm64/mm: Validate hotplug range before creating linear mapping During memory hotplug process, the linear mapping should not be created for a given memory range if that would fall outside the maximum allowed linear range. Else it might cause memory corruption in the kernel virtual space. Maximum linear mapping region is [PAGE_OFFSET..(PAGE_END -1)] accommodating both its ends but excluding PAGE_END. Max physical range that can be mapped inside this linear mapping range, must also be derived from its end points. This ensures that arch_add_memory() validates memory hot add range for its potential linear mapping requirements, before creating it with __create_pgd_mapping(). Fixes: 4ab215061554 ("arm64: Add memory hotplug support") Signed-off-by: Anshuman Khandual Reviewed-by: Ard Biesheuvel Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Ard Biesheuvel Cc: Steven Price Cc: Robin Murphy Cc: David Hildenbrand Cc: Andrew Morton Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/1605252614-761-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 1c0f3e02f731..ca692a815731 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1444,11 +1444,28 @@ static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size) free_empty_tables(start, end, PAGE_OFFSET, PAGE_END); } +static bool inside_linear_region(u64 start, u64 size) +{ + /* + * Linear mapping region is the range [PAGE_OFFSET..(PAGE_END - 1)] + * accommodating both its ends but excluding PAGE_END. Max physical + * range which can be mapped inside this linear mapping range, must + * also be derived from its end points. + */ + return start >= __pa(_PAGE_OFFSET(vabits_actual)) && + (start + size - 1) <= __pa(PAGE_END - 1); +} + int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) { int ret, flags = 0; + if (!inside_linear_region(start, size)) { + pr_err("[%llx %llx] is outside linear mapping region\n", start, start + size); + return -EINVAL; + } + if (rodata_full || debug_pagealloc_enabled()) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; -- cgit v1.2.3 From 23529049c68423820487304f244144e0d576e85a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:46 +0000 Subject: arm64: entry: fix non-NMI user<->kernel transitions When built with PROVE_LOCKING, NO_HZ_FULL, and CONTEXT_TRACKING_FORCE will WARN() at boot time that interrupts are enabled when we call context_tracking_user_enter(), despite the DAIF flags indicating that IRQs are masked. The problem is that we're not tracking IRQ flag changes accurately, and so lockdep believes interrupts are enabled when they are not (and vice-versa). We can shuffle things so to make this more accurate. For kernel->user transitions there are a number of constraints we need to consider: 1) When we call __context_tracking_user_enter() HW IRQs must be disabled and lockdep must be up-to-date with this. 2) Userspace should be treated as having IRQs enabled from the PoV of both lockdep and tracing. 3) As context_tracking_user_enter() stops RCU from watching, we cannot use RCU after calling it. 4) IRQ flag tracing and lockdep have state that must be manipulated before RCU is disabled. ... with similar constraints applying for user->kernel transitions, with the ordering reversed. The generic entry code has enter_from_user_mode() and exit_to_user_mode() helpers to handle this. We can't use those directly, so we add arm64 copies for now (without the instrumentation markers which aren't used on arm64). These replace the existing user_exit() and user_exit_irqoff() calls spread throughout handlers, and the exception unmasking is left as-is. Note that: * The accounting for debug exceptions from userspace now happens in el0_dbg() and ret_to_user(), so this is removed from debug_exception_enter() and debug_exception_exit(). As user_exit_irqoff() wakes RCU, the userspace-specific check is removed. * The accounting for syscalls now happens in el0_svc(), el0_svc_compat(), and ret_to_user(), so this is removed from el0_svc_common(). This does not adversely affect the workaround for erratum 1463225, as this does not depend on any of the state tracking. * In ret_to_user() we mask interrupts with local_daif_mask(), and so we need to inform lockdep and tracing. Here a trace_hardirqs_off() is sufficient and safe as we have not yet exited kernel context and RCU is usable. * As PROVE_LOCKING selects TRACE_IRQFLAGS, the ifdeferry in entry.S only needs to check for the latter. * EL0 SError handling will be dealt with in a subsequent patch, as this needs to be treated as an NMI. Prior to this patch, booting an appropriately-configured kernel would result in spats as below: | DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled()) | WARNING: CPU: 2 PID: 1 at kernel/locking/lockdep.c:5280 check_flags.part.54+0x1dc/0x1f0 | Modules linked in: | CPU: 2 PID: 1 Comm: init Not tainted 5.10.0-rc3 #3 | Hardware name: linux,dummy-virt (DT) | pstate: 804003c5 (Nzcv DAIF +PAN -UAO -TCO BTYPE=--) | pc : check_flags.part.54+0x1dc/0x1f0 | lr : check_flags.part.54+0x1dc/0x1f0 | sp : ffff80001003bd80 | x29: ffff80001003bd80 x28: ffff66ce801e0000 | x27: 00000000ffffffff x26: 00000000000003c0 | x25: 0000000000000000 x24: ffffc31842527258 | x23: ffffc31842491368 x22: ffffc3184282d000 | x21: 0000000000000000 x20: 0000000000000001 | x19: ffffc318432ce000 x18: 0080000000000000 | x17: 0000000000000000 x16: ffffc31840f18a78 | x15: 0000000000000001 x14: ffffc3184285c810 | x13: 0000000000000001 x12: 0000000000000000 | x11: ffffc318415857a0 x10: ffffc318406614c0 | x9 : ffffc318415857a0 x8 : ffffc31841f1d000 | x7 : 647261685f706564 x6 : ffffc3183ff7c66c | x5 : ffff66ce801e0000 x4 : 0000000000000000 | x3 : ffffc3183fe00000 x2 : ffffc31841500000 | x1 : e956dc24146b3500 x0 : 0000000000000000 | Call trace: | check_flags.part.54+0x1dc/0x1f0 | lock_is_held_type+0x10c/0x188 | rcu_read_lock_sched_held+0x70/0x98 | __context_tracking_enter+0x310/0x350 | context_tracking_enter.part.3+0x5c/0xc8 | context_tracking_user_enter+0x6c/0x80 | finish_ret_to_user+0x2c/0x13cr Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-8-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/exception.h | 1 + arch/arm64/kernel/entry-common.c | 40 +++++++++++++++++++++++++------------- arch/arm64/kernel/entry.S | 35 +++++++++++++-------------------- arch/arm64/kernel/syscall.c | 1 - arch/arm64/mm/fault.c | 22 ++++++++++----------- 5 files changed, 51 insertions(+), 48 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index d69d53dd7be7..d579b2e6db7a 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -34,6 +34,7 @@ static inline u32 disr_to_esr(u64 disr) asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void enter_from_user_mode(void); +asmlinkage void exit_to_user_mode(void); void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs); void do_bti(struct pt_regs *regs); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 920da254be1d..49d1c1dd9baf 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -119,15 +119,25 @@ asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs) asmlinkage void noinstr enter_from_user_mode(void) { + lockdep_hardirqs_off(CALLER_ADDR0); CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit_irqoff(); + trace_hardirqs_off_finish(); +} + +asmlinkage void noinstr exit_to_user_mode(void) +{ + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + user_enter_irqoff(); + lockdep_hardirqs_on(CALLER_ADDR0); } static void noinstr el0_da(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); far = untagged_addr(far); do_mem_abort(far, esr, regs); @@ -145,35 +155,35 @@ static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr) if (!is_ttbr0_addr(far)) arm64_apply_bp_hardening(); - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_mem_abort(far, esr, regs); } static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_fpsimd_acc(esr, regs); } static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sve_acc(esr, regs); } static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_fpsimd_exc(esr, regs); } static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sysinstr(esr, regs); } @@ -185,35 +195,35 @@ static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr) if (!is_ttbr0_addr(instruction_pointer(regs))) arm64_apply_bp_hardening(); - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(far, esr, regs); } static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(regs->sp, esr, regs); } static void noinstr el0_undef(struct pt_regs *regs) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_undefinstr(regs); } static void noinstr el0_bti(struct pt_regs *regs) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_bti(regs); } static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); bad_el0_sync(regs, 0, esr); } @@ -226,7 +236,7 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - user_exit_irqoff(); + enter_from_user_mode(); do_debug_exception(far, esr, regs); local_daif_restore(DAIF_PROCCTX_NOIRQ); } @@ -236,12 +246,13 @@ static void noinstr el0_svc(struct pt_regs *regs) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + enter_from_user_mode(); do_el0_svc(regs); } static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_ptrauth_fault(regs, esr); } @@ -302,7 +313,7 @@ asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs) #ifdef CONFIG_COMPAT static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_cp15instr(esr, regs); } @@ -312,6 +323,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + enter_from_user_mode(); do_el0_svc_compat(regs); } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 53e30750fc28..d72c818b019c 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -30,18 +30,18 @@ #include /* - * Context tracking subsystem. Used to instrument transitions - * between user and kernel mode. + * Context tracking and irqflag tracing need to instrument transitions between + * user and kernel mode. */ - .macro ct_user_exit_irqoff -#ifdef CONFIG_CONTEXT_TRACKING + .macro user_exit_irqoff +#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS) bl enter_from_user_mode #endif .endm - .macro ct_user_enter -#ifdef CONFIG_CONTEXT_TRACKING - bl context_tracking_user_enter + .macro user_enter_irqoff +#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS) + bl exit_to_user_mode #endif .endm @@ -298,9 +298,6 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING alternative_else_nop_endif ldp x21, x22, [sp, #S_PC] // load ELR, SPSR - .if \el == 0 - ct_user_enter - .endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN alternative_if_not ARM64_HAS_PAN @@ -700,21 +697,14 @@ SYM_CODE_START_LOCAL_NOALIGN(el0_irq) kernel_entry 0 el0_irq_naked: gic_prio_irq_setup pmr=x20, tmp=x0 - ct_user_exit_irqoff + user_exit_irqoff enable_da_f -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif - tbz x22, #55, 1f bl do_el0_irq_bp_hardening 1: irq_handler -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_on -#endif b ret_to_user SYM_CODE_END(el0_irq) @@ -733,7 +723,7 @@ SYM_CODE_START_LOCAL(el0_error) el0_error_naked: mrs x25, esr_el1 gic_prio_kentry_setup tmp=x2 - ct_user_exit_irqoff + user_exit_irqoff enable_dbg mov x0, sp mov x1, x25 @@ -748,10 +738,14 @@ SYM_CODE_END(el0_error) SYM_CODE_START_LOCAL(ret_to_user) disable_daif gic_prio_kentry_setup tmp=x3 +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif ldr x19, [tsk, #TSK_TI_FLAGS] and x2, x19, #_TIF_WORK_MASK cbnz x2, work_pending finish_ret_to_user: + user_enter_irqoff /* Ignore asynchronous tag check faults in the uaccess routines */ clear_mte_async_tcf enable_step_tsk x19, x2 @@ -767,9 +761,6 @@ work_pending: mov x0, sp // 'regs' mov x1, x19 bl do_notify_resume -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_on // enabled while in userspace -#endif ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step b finish_ret_to_user SYM_CODE_END(ret_to_user) diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 13fe79f8e2db..f8f758e4a306 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -120,7 +120,6 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, */ cortex_a76_erratum_1463225_svc_handler(); - user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) { diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1ee94002801f..1f450b784d2c 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -789,16 +789,14 @@ void __init hook_debug_fault_code(int nr, */ static void debug_exception_enter(struct pt_regs *regs) { - /* - * Tell lockdep we disabled irqs in entry.S. Do nothing if they were - * already disabled to preserve the last enabled/disabled addresses. - */ - if (interrupts_enabled(regs)) - trace_hardirqs_off(); + if (!user_mode(regs)) { + /* + * Tell lockdep we disabled irqs in entry.S. Do nothing if they were + * already disabled to preserve the last enabled/disabled addresses. + */ + if (interrupts_enabled(regs)) + trace_hardirqs_off(); - if (user_mode(regs)) { - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - } else { /* * We might have interrupted pretty much anything. In * fact, if we're a debug exception, we can even interrupt @@ -819,8 +817,10 @@ static void debug_exception_exit(struct pt_regs *regs) { preempt_enable_no_resched(); - if (!user_mode(regs)) - rcu_nmi_exit(); + if (user_mode(regs)) + return; + + rcu_nmi_exit(); if (interrupts_enabled(regs)) trace_hardirqs_on(); -- cgit v1.2.3 From 2a9b3e6ac69a8bf177d8496a11e749e2dc72fa22 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 30 Nov 2020 11:59:50 +0000 Subject: arm64: entry: fix EL1 debug transitions In debug_exception_enter() and debug_exception_exit() we trace hardirqs on/off while RCU isn't guaranteed to be watching, and we don't save and restore the hardirq state, and so may return with this having changed. Handle this appropriately with new entry/exit helpers which do the bare minimum to ensure this is appropriately maintained, without marking debug exceptions as NMIs. These are placed in entry-common.c with the other entry/exit helpers. In future we'll want to reconsider whether some debug exceptions should be NMIs, but this will require a significant refactoring, and for now this should prevent issues with lockdep and RCU. Signed-off-by: Mark Rutland Cc: Catalin Marins Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20201130115950.22492-12-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-common.c | 26 ++++++++++++++++++++++++++ arch/arm64/mm/fault.c | 25 ------------------------- 2 files changed, 26 insertions(+), 25 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 6d70be0d3e8f..70e0a7591245 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -150,6 +150,30 @@ static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr) exit_to_kernel_mode(regs); } +static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs) +{ + regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); + + lockdep_hardirqs_off(CALLER_ADDR0); + rcu_nmi_enter(); + + trace_hardirqs_off_finish(); +} + +static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs) +{ + bool restore = regs->lockdep_hardirqs; + + if (restore) { + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + } + + rcu_nmi_exit(); + if (restore) + lockdep_hardirqs_on(CALLER_ADDR0); +} + static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -162,7 +186,9 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + arm64_enter_el1_dbg(regs); do_debug_exception(far, esr, regs); + arm64_exit_el1_dbg(regs); } static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1f450b784d2c..795d224f184f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -789,23 +789,6 @@ void __init hook_debug_fault_code(int nr, */ static void debug_exception_enter(struct pt_regs *regs) { - if (!user_mode(regs)) { - /* - * Tell lockdep we disabled irqs in entry.S. Do nothing if they were - * already disabled to preserve the last enabled/disabled addresses. - */ - if (interrupts_enabled(regs)) - trace_hardirqs_off(); - - /* - * We might have interrupted pretty much anything. In - * fact, if we're a debug exception, we can even interrupt - * NMI processing. We don't want this code makes in_nmi() - * to return true, but we need to notify RCU. - */ - rcu_nmi_enter(); - } - preempt_disable(); /* This code is a bit fragile. Test it. */ @@ -816,14 +799,6 @@ NOKPROBE_SYMBOL(debug_exception_enter); static void debug_exception_exit(struct pt_regs *regs) { preempt_enable_no_resched(); - - if (user_mode(regs)) - return; - - rcu_nmi_exit(); - - if (interrupts_enabled(regs)) - trace_hardirqs_on(); } NOKPROBE_SYMBOL(debug_exception_exit); -- cgit v1.2.3