diff options
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r-- | arch/arm64/kernel/acpi.c | 31 | ||||
-rw-r--r-- | arch/arm64/kernel/alternative.c | 60 | ||||
-rw-r--r-- | arch/arm64/kernel/asm-offsets.c | 21 | ||||
-rw-r--r-- | arch/arm64/kernel/cpufeature.c | 41 | ||||
-rw-r--r-- | arch/arm64/kernel/entry.S | 60 | ||||
-rw-r--r-- | arch/arm64/kernel/head.S | 3 | ||||
-rw-r--r-- | arch/arm64/kernel/hibernate.c | 4 | ||||
-rw-r--r-- | arch/arm64/kernel/hyp-stub.S | 2 | ||||
-rw-r--r-- | arch/arm64/kernel/irq.c | 3 | ||||
-rw-r--r-- | arch/arm64/kernel/kaslr.c | 9 | ||||
-rw-r--r-- | arch/arm64/kernel/kgdb.c | 14 | ||||
-rw-r--r-- | arch/arm64/kernel/machine_kexec.c | 3 | ||||
-rw-r--r-- | arch/arm64/kernel/machine_kexec_file.c | 4 | ||||
-rw-r--r-- | arch/arm64/kernel/perf_event.c | 2 | ||||
-rw-r--r-- | arch/arm64/kernel/probes/kprobes.c | 12 | ||||
-rw-r--r-- | arch/arm64/kernel/process.c | 51 | ||||
-rw-r--r-- | arch/arm64/kernel/ptrace.c | 162 | ||||
-rw-r--r-- | arch/arm64/kernel/setup.c | 13 | ||||
-rw-r--r-- | arch/arm64/kernel/smp.c | 33 | ||||
-rw-r--r-- | arch/arm64/kernel/traps.c | 8 |
20 files changed, 464 insertions, 72 deletions
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 44e3c351e1ea..803f0494dd3e 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -27,8 +27,10 @@ #include <linux/smp.h> #include <linux/serial_core.h> +#include <acpi/ghes.h> #include <asm/cputype.h> #include <asm/cpu_ops.h> +#include <asm/daifflags.h> #include <asm/pgtable.h> #include <asm/smp_plat.h> @@ -256,3 +258,32 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr) return __pgprot(PROT_NORMAL_NC); return __pgprot(PROT_DEVICE_nGnRnE); } + +/* + * Claim Synchronous External Aborts as a firmware first notification. + * + * Used by KVM and the arch do_sea handler. + * @regs may be NULL when called from process context. + */ +int apei_claim_sea(struct pt_regs *regs) +{ + int err = -ENOENT; + unsigned long current_flags; + + if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES)) + return err; + + current_flags = arch_local_save_flags(); + + /* + * SEA can interrupt SError, mask it and describe this as an NMI so + * that APEI defers the handling. + */ + local_daif_restore(DAIF_ERRCTX); + nmi_enter(); + err = ghes_notify_sea(); + nmi_exit(); + local_daif_restore(current_flags); + + return err; +} diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index b5d603992d40..a9b467763153 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -32,13 +32,23 @@ #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) -int alternatives_applied; +static int all_alternatives_applied; + +static DECLARE_BITMAP(applied_alternatives, ARM64_NCAPS); struct alt_region { struct alt_instr *begin; struct alt_instr *end; }; +bool alternative_is_applied(u16 cpufeature) +{ + if (WARN_ON(cpufeature >= ARM64_NCAPS)) + return false; + + return test_bit(cpufeature, applied_alternatives); +} + /* * Check if the target PC is within an alternative block. */ @@ -145,7 +155,8 @@ static void clean_dcache_range_nopatch(u64 start, u64 end) } while (cur += d_size, cur < end); } -static void __apply_alternatives(void *alt_region, bool is_module) +static void __apply_alternatives(void *alt_region, bool is_module, + unsigned long *feature_mask) { struct alt_instr *alt; struct alt_region *region = alt_region; @@ -155,6 +166,9 @@ static void __apply_alternatives(void *alt_region, bool is_module) for (alt = region->begin; alt < region->end; alt++) { int nr_inst; + if (!test_bit(alt->cpufeature, feature_mask)) + continue; + /* Use ARM64_CB_PATCH as an unconditional patch */ if (alt->cpufeature < ARM64_CB_PATCH && !cpus_have_cap(alt->cpufeature)) @@ -192,6 +206,12 @@ static void __apply_alternatives(void *alt_region, bool is_module) dsb(ish); __flush_icache_all(); isb(); + + /* Ignore ARM64_CB bit from feature mask */ + bitmap_or(applied_alternatives, applied_alternatives, + feature_mask, ARM64_NCAPS); + bitmap_and(applied_alternatives, applied_alternatives, + cpu_hwcaps, ARM64_NCAPS); } } @@ -208,14 +228,19 @@ static int __apply_alternatives_multi_stop(void *unused) /* We always have a CPU 0 at this point (__init) */ if (smp_processor_id()) { - while (!READ_ONCE(alternatives_applied)) + while (!READ_ONCE(all_alternatives_applied)) cpu_relax(); isb(); } else { - BUG_ON(alternatives_applied); - __apply_alternatives(®ion, false); + DECLARE_BITMAP(remaining_capabilities, ARM64_NPATCHABLE); + + bitmap_complement(remaining_capabilities, boot_capabilities, + ARM64_NPATCHABLE); + + BUG_ON(all_alternatives_applied); + __apply_alternatives(®ion, false, remaining_capabilities); /* Barriers provided by the cache flushing */ - WRITE_ONCE(alternatives_applied, 1); + WRITE_ONCE(all_alternatives_applied, 1); } return 0; @@ -227,6 +252,24 @@ void __init apply_alternatives_all(void) stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask); } +/* + * This is called very early in the boot process (directly after we run + * a feature detect on the boot CPU). No need to worry about other CPUs + * here. + */ +void __init apply_boot_alternatives(void) +{ + struct alt_region region = { + .begin = (struct alt_instr *)__alt_instructions, + .end = (struct alt_instr *)__alt_instructions_end, + }; + + /* If called on non-boot cpu things could go wrong */ + WARN_ON(smp_processor_id() != 0); + + __apply_alternatives(®ion, false, &boot_capabilities[0]); +} + #ifdef CONFIG_MODULES void apply_alternatives_module(void *start, size_t length) { @@ -234,7 +277,10 @@ void apply_alternatives_module(void *start, size_t length) .begin = start, .end = start + length, }; + DECLARE_BITMAP(all_capabilities, ARM64_NPATCHABLE); + + bitmap_fill(all_capabilities, ARM64_NPATCHABLE); - __apply_alternatives(®ion, true); + __apply_alternatives(®ion, true, &all_capabilities[0]); } #endif diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 65b8afc84466..7f40dcbdd51d 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -53,13 +53,9 @@ int main(void) DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context)); BLANK(); DEFINE(S_X0, offsetof(struct pt_regs, regs[0])); - DEFINE(S_X1, offsetof(struct pt_regs, regs[1])); DEFINE(S_X2, offsetof(struct pt_regs, regs[2])); - DEFINE(S_X3, offsetof(struct pt_regs, regs[3])); DEFINE(S_X4, offsetof(struct pt_regs, regs[4])); - DEFINE(S_X5, offsetof(struct pt_regs, regs[5])); DEFINE(S_X6, offsetof(struct pt_regs, regs[6])); - DEFINE(S_X7, offsetof(struct pt_regs, regs[7])); DEFINE(S_X8, offsetof(struct pt_regs, regs[8])); DEFINE(S_X10, offsetof(struct pt_regs, regs[10])); DEFINE(S_X12, offsetof(struct pt_regs, regs[12])); @@ -73,14 +69,11 @@ int main(void) DEFINE(S_X28, offsetof(struct pt_regs, regs[28])); DEFINE(S_LR, offsetof(struct pt_regs, regs[30])); DEFINE(S_SP, offsetof(struct pt_regs, sp)); -#ifdef CONFIG_COMPAT - DEFINE(S_COMPAT_SP, offsetof(struct pt_regs, compat_sp)); -#endif DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate)); DEFINE(S_PC, offsetof(struct pt_regs, pc)); - DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0)); DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit)); + DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save)); DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); @@ -93,7 +86,6 @@ int main(void) BLANK(); DEFINE(PAGE_SZ, PAGE_SIZE); BLANK(); - DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL); DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE); DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE); BLANK(); @@ -110,25 +102,18 @@ int main(void) BLANK(); DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last)); DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec)); - DEFINE(VDSO_RAW_TIME_NSEC, offsetof(struct vdso_data, raw_time_nsec)); DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec)); - DEFINE(VDSO_XTIME_CLK_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec)); DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec)); - DEFINE(VDSO_WTM_CLK_NSEC, offsetof(struct vdso_data, wtm_clock_nsec)); DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count)); DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult)); - DEFINE(VDSO_CS_RAW_MULT, offsetof(struct vdso_data, cs_raw_mult)); DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift)); DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest)); - DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall)); BLANK(); DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec)); - DEFINE(TVAL_TV_USEC, offsetof(struct timeval, tv_usec)); DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec)); - DEFINE(TSPEC_TV_NSEC, offsetof(struct timespec, tv_nsec)); BLANK(); DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); @@ -142,13 +127,9 @@ int main(void) DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); - DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); - DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); - DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); #endif #ifdef CONFIG_CPU_PM - DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx)); DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask)); DEFINE(MPIDR_HASH_SHIFTS, offsetof(struct mpidr_hash, shift_aff)); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f6d84e2c92fe..e24e94d28767 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -54,6 +54,9 @@ DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); EXPORT_SYMBOL(cpu_hwcaps); static struct arm64_cpu_capabilities const __ro_after_init *cpu_hwcaps_ptrs[ARM64_NCAPS]; +/* Need also bit for ARM64_CB_PATCH */ +DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE); + /* * Flag to indicate if we have computed the system wide * capabilities based on the boot time active CPUs. This @@ -1118,7 +1121,7 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) * that, freshly-onlined CPUs will set tpidr_el2, so we don't need to * do anything here. */ - if (!alternatives_applied) + if (!alternative_is_applied(ARM64_HAS_VIRT_HOST_EXTN)) write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); } #endif @@ -1203,11 +1206,27 @@ static void cpu_enable_address_auth(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_PTR_AUTH */ +#ifdef CONFIG_ARM64_PSEUDO_NMI +static bool enable_pseudo_nmi; + +static int __init early_enable_pseudo_nmi(char *p) +{ + return strtobool(p, &enable_pseudo_nmi); +} +early_param("irqchip.gicv3_pseudo_nmi", early_enable_pseudo_nmi); + +static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry, + int scope) +{ + return enable_pseudo_nmi && has_useable_gicv3_cpuif(entry, scope); +} +#endif + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", .capability = ARM64_HAS_SYSREG_GIC_CPUIF, - .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = has_useable_gicv3_cpuif, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, @@ -1480,6 +1499,21 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, }, #endif /* CONFIG_ARM64_PTR_AUTH */ +#ifdef CONFIG_ARM64_PSEUDO_NMI + { + /* + * Depends on having GICv3 + */ + .desc = "IRQ priority masking", + .capability = ARM64_HAS_IRQ_PRIO_MASKING, + .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, + .matches = can_use_gic_priorities, + .sys_reg = SYS_ID_AA64PFR0_EL1, + .field_pos = ID_AA64PFR0_GIC_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = 1, + }, +#endif {}, }; @@ -1654,6 +1688,9 @@ static void update_cpu_capabilities(u16 scope_mask) if (caps->desc) pr_info("detected: %s\n", caps->desc); cpus_set_cap(caps->capability); + + if ((scope_mask & SCOPE_BOOT_CPU) && (caps->type & SCOPE_BOOT_CPU)) + set_bit(caps->capability, boot_capabilities); } } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 0ec0c46b2c0c..c50a7a75f2e0 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -185,7 +185,7 @@ alternative_cb_end .else add x21, sp, #S_FRAME_SIZE - get_thread_info tsk + get_current_task tsk /* Save the task's original addr_limit and set USER_DS */ ldr x20, [tsk, #TSK_TI_ADDR_LIMIT] str x20, [sp, #S_ORIG_ADDR_LIMIT] @@ -249,6 +249,12 @@ alternative_else_nop_endif msr sp_el0, tsk .endif + /* Save pmr */ +alternative_if ARM64_HAS_IRQ_PRIO_MASKING + mrs_s x20, SYS_ICC_PMR_EL1 + str x20, [sp, #S_PMR_SAVE] +alternative_else_nop_endif + /* * Registers that may be useful after this macro is invoked: * @@ -269,6 +275,14 @@ alternative_else_nop_endif /* No need to restore UAO, it will be restored from SPSR_EL1 */ .endif + /* Restore pmr */ +alternative_if ARM64_HAS_IRQ_PRIO_MASKING + ldr x20, [sp, #S_PMR_SAVE] + msr_s SYS_ICC_PMR_EL1, x20 + /* Ensure priority change is seen by redistributor */ + dsb sy +alternative_else_nop_endif + ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 ct_user_enter @@ -603,32 +617,52 @@ el1_irq: kernel_entry 1 enable_da_f #ifdef CONFIG_TRACE_IRQFLAGS +#ifdef CONFIG_ARM64_PSEUDO_NMI +alternative_if ARM64_HAS_IRQ_PRIO_MASKING + ldr x20, [sp, #S_PMR_SAVE] +alternative_else + mov x20, #GIC_PRIO_IRQON +alternative_endif + cmp x20, #GIC_PRIO_IRQOFF + /* Irqs were disabled, don't trace */ + b.ls 1f +#endif bl trace_hardirqs_off +1: #endif irq_handler #ifdef CONFIG_PREEMPT ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count - cbnz x24, 1f // preempt count != 0 - bl el1_preempt +alternative_if ARM64_HAS_IRQ_PRIO_MASKING + /* + * DA_F were cleared at start of handling. If anything is set in DAIF, + * we come back from an NMI, so skip preemption + */ + mrs x0, daif + orr x24, x24, x0 +alternative_else_nop_endif + cbnz x24, 1f // preempt count != 0 || NMI return path + bl preempt_schedule_irq // irq en/disable is done inside 1: #endif #ifdef CONFIG_TRACE_IRQFLAGS +#ifdef CONFIG_ARM64_PSEUDO_NMI + /* + * if IRQs were disabled when we received the interrupt, we have an NMI + * and we are not re-enabling interrupt upon eret. Skip tracing. + */ + cmp x20, #GIC_PRIO_IRQOFF + b.ls 1f +#endif bl trace_hardirqs_on +1: #endif + kernel_exit 1 ENDPROC(el1_irq) -#ifdef CONFIG_PREEMPT -el1_preempt: - mov x24, lr -1: bl preempt_schedule_irq // irq en/disable is done inside - ldr x0, [tsk, #TSK_TI_FLAGS] // get new tasks TI_FLAGS - tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? - ret x24 -#endif - /* * EL0 mode handlers. */ @@ -1070,7 +1104,7 @@ ENTRY(ret_from_fork) cbz x19, 1f // not a kernel thread mov x0, x20 blr x19 -1: get_thread_info tsk +1: get_current_task tsk b ret_to_user ENDPROC(ret_from_fork) NOKPROBE(ret_from_fork) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 15d79a8e5e5e..eecf7927dab0 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -539,8 +539,7 @@ set_hcr: /* GICv3 system register access */ mrs x0, id_aa64pfr0_el1 ubfx x0, x0, #24, #4 - cmp x0, #1 - b.ne 3f + cbz x0, 3f mrs_s x0, SYS_ICC_SRE_EL2 orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 29cdc99688f3..9859e1178e6b 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -299,8 +299,10 @@ int swsusp_arch_suspend(void) dcache_clean_range(__idmap_text_start, __idmap_text_end); /* Clean kvm setup code to PoC? */ - if (el2_reset_needed()) + if (el2_reset_needed()) { dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end); + dcache_clean_range(__hyp_text_start, __hyp_text_end); + } /* make the crash dump kernel image protected again */ crash_post_resume(); diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index e1261fbaa374..17f325ba831e 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -28,6 +28,8 @@ #include <asm/virt.h> .text + .pushsection .hyp.text, "ax" + .align 11 ENTRY(__hyp_stub_vectors) diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 780a12f59a8f..92fa81798fb9 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -33,6 +33,9 @@ unsigned long irq_err_count; +/* Only access this in an NMI enter/exit */ +DEFINE_PER_CPU(struct nmi_ctx, nmi_contexts); + DEFINE_PER_CPU(unsigned long *, irq_stack_ptr); int arch_show_interrupts(struct seq_file *p, int prec) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index f0e6ab8abe9c..b09b6f75f759 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -14,6 +14,7 @@ #include <linux/sched.h> #include <linux/types.h> +#include <asm/cacheflush.h> #include <asm/fixmap.h> #include <asm/kernel-pgtable.h> #include <asm/memory.h> @@ -43,7 +44,7 @@ static __init u64 get_kaslr_seed(void *fdt) return ret; } -static __init const u8 *get_cmdline(void *fdt) +static __init const u8 *kaslr_get_cmdline(void *fdt) { static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE; @@ -87,6 +88,7 @@ u64 __init kaslr_early_init(u64 dt_phys) * we end up running with module randomization disabled. */ module_alloc_base = (u64)_etext - MODULES_VSIZE; + __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base)); /* * Try to map the FDT early. If this fails, we simply bail, @@ -109,7 +111,7 @@ u64 __init kaslr_early_init(u64 dt_phys) * Check if 'nokaslr' appears on the command line, and * return 0 if that is the case. */ - cmdline = get_cmdline(fdt); + cmdline = kaslr_get_cmdline(fdt); str = strstr(cmdline, "nokaslr"); if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) return 0; @@ -169,5 +171,8 @@ u64 __init kaslr_early_init(u64 dt_phys) module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; module_alloc_base &= PAGE_MASK; + __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base)); + __flush_dcache_area(&memstart_offset_seed, sizeof(memstart_offset_seed)); + return offset; } diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index ce46c4cdf368..691854b77c7f 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -244,27 +244,33 @@ int kgdb_arch_handle_exception(int exception_vector, int signo, static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) { + if (user_mode(regs)) + return DBG_HOOK_ERROR; + kgdb_handle_exception(1, SIGTRAP, 0, regs); - return 0; + return DBG_HOOK_HANDLED; } NOKPROBE_SYMBOL(kgdb_brk_fn) static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) { + if (user_mode(regs)) + return DBG_HOOK_ERROR; + compiled_break = 1; kgdb_handle_exception(1, SIGTRAP, 0, regs); - return 0; + return DBG_HOOK_HANDLED; } NOKPROBE_SYMBOL(kgdb_compiled_brk_fn); static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) { - if (!kgdb_single_step) + if (user_mode(regs) || !kgdb_single_step) return DBG_HOOK_ERROR; kgdb_handle_exception(1, SIGTRAP, 0, regs); - return 0; + return DBG_HOOK_HANDLED; } NOKPROBE_SYMBOL(kgdb_step_brk_fn); diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index aa9c94113700..66b5d697d943 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -321,7 +321,7 @@ void crash_post_resume(void) * but does not hold any data of loaded kernel image. * * Note that all the pages in crash dump kernel memory have been initially - * marked as Reserved in kexec_reserve_crashkres_pages(). + * marked as Reserved as memory was allocated via memblock_reserve(). * * In hibernation, the pages which are Reserved and yet "nosave" are excluded * from the hibernation iamge. crash_is_nosave() does thich check for crash @@ -361,7 +361,6 @@ void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) for (addr = begin; addr < end; addr += PAGE_SIZE) { page = phys_to_page(addr); - ClearPageReserved(page); free_reserved_page(page); } } diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index f2c211a6229b..58871333737a 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -120,10 +120,12 @@ static int create_dtb(struct kimage *image, { void *buf; size_t buf_size; + size_t cmdline_len; int ret; + cmdline_len = cmdline ? strlen(cmdline) : 0; buf_size = fdt_totalsize(initial_boot_params) - + strlen(cmdline) + DTB_EXTRA_SPACE; + + cmdline_len + DTB_EXTRA_SPACE; for (;;) { buf = vmalloc(buf_size); diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 1620a371b1f5..4addb38bc250 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -810,7 +810,7 @@ static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc, } /* - * Add an event filter to a given event. This will only work for PMUv2 PMUs. + * Add an event filter to a given event. */ static int armv8pmu_set_event_filter(struct hw_perf_event *event, struct perf_event_attr *attr) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 2a5b338b2542..7fb6f3aa5ceb 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -450,6 +450,9 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); int retval; + if (user_mode(regs)) + return DBG_HOOK_ERROR; + /* return error if this is not our step */ retval = kprobe_ss_hit(kcb, instruction_pointer(regs)); @@ -466,6 +469,9 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) int __kprobes kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr) { + if (user_mode(regs)) + return DBG_HOOK_ERROR; + kprobe_handler(regs); return DBG_HOOK_HANDLED; } @@ -478,13 +484,13 @@ bool arch_within_kprobe_blacklist(unsigned long addr) addr < (unsigned long)__entry_text_end) || (addr >= (unsigned long)__idmap_text_start && addr < (unsigned long)__idmap_text_end) || + (addr >= (unsigned long)__hyp_text_start && + addr < (unsigned long)__hyp_text_end) || !!search_exception_tables(addr)) return true; if (!is_kernel_in_hyp_mode()) { - if ((addr >= (unsigned long)__hyp_text_start && - addr < (unsigned long)__hyp_text_end) || - (addr >= (unsigned long)__hyp_idmap_text_start && + if ((addr >= (unsigned long)__hyp_idmap_text_start && addr < (unsigned long)__hyp_idmap_text_end)) return true; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index a0f985a6ac50..3767fb21a5b8 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -51,6 +51,7 @@ #include <linux/thread_info.h> #include <asm/alternative.h> +#include <asm/arch_gicv3.h> #include <asm/compat.h> #include <asm/cacheflush.h> #include <asm/exec.h> @@ -74,6 +75,50 @@ EXPORT_SYMBOL_GPL(pm_power_off); void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); +static void __cpu_do_idle(void) +{ + dsb(sy); + wfi(); +} + +static void __cpu_do_idle_irqprio(void) +{ + unsigned long pmr; + unsigned long daif_bits; + + daif_bits = read_sysreg(daif); + write_sysreg(daif_bits | PSR_I_BIT, daif); + + /* + * Unmask PMR before going idle to make sure interrupts can + * be raised. + */ + pmr = gic_read_pmr(); + gic_write_pmr(GIC_PRIO_IRQON); + + __cpu_do_idle(); + + gic_write_pmr(pmr); + write_sysreg(daif_bits, daif); +} + +/* + * cpu_do_idle() + * + * Idle the processor (wait for interrupt). + * + * If the CPU supports priority masking we must do additional work to + * ensure that interrupts are not masked at the PMR (because the core will + * not wake up if we block the wake up signal in the interrupt controller). + */ +void cpu_do_idle(void) +{ + if (system_uses_irq_prio_masking()) + __cpu_do_idle_irqprio(); + else + __cpu_do_idle(); +} + /* * This is our default idle handler. */ @@ -232,6 +277,9 @@ void __show_regs(struct pt_regs *regs) printk("sp : %016llx\n", sp); + if (system_uses_irq_prio_masking()) + printk("pmr_save: %08llx\n", regs->pmr_save); + i = top_reg; while (i >= 0) { @@ -363,6 +411,9 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) childregs->pstate |= PSR_SSBS_BIT; + if (system_uses_irq_prio_masking()) + childregs->pmr_save = GIC_PRIO_IRQON; + p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 9dce33b0e260..b82e0a9b3da3 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -979,6 +979,131 @@ static int pac_mask_get(struct task_struct *target, return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &uregs, 0, -1); } + +#ifdef CONFIG_CHECKPOINT_RESTORE +static __uint128_t pac_key_to_user(const struct ptrauth_key *key) +{ + return (__uint128_t)key->hi << 64 | key->lo; +} + +static struct ptrauth_key pac_key_from_user(__uint128_t ukey) +{ + struct ptrauth_key key = { + .lo = (unsigned long)ukey, + .hi = (unsigned long)(ukey >> 64), + }; + + return key; +} + +static void pac_address_keys_to_user(struct user_pac_address_keys *ukeys, + const struct ptrauth_keys *keys) +{ + ukeys->apiakey = pac_key_to_user(&keys->apia); + ukeys->apibkey = pac_key_to_user(&keys->apib); + ukeys->apdakey = pac_key_to_user(&keys->apda); + ukeys->apdbkey = pac_key_to_user(&keys->apdb); +} + +static void pac_address_keys_from_user(struct ptrauth_keys *keys, + const struct user_pac_address_keys *ukeys) +{ + keys->apia = pac_key_from_user(ukeys->apiakey); + keys->apib = pac_key_from_user(ukeys->apibkey); + keys->apda = pac_key_from_user(ukeys->apdakey); + keys->apdb = pac_key_from_user(ukeys->apdbkey); +} + +static int pac_address_keys_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct ptrauth_keys *keys = &target->thread.keys_user; + struct user_pac_address_keys user_keys; + + if (!system_supports_address_auth()) + return -EINVAL; + + pac_address_keys_to_user(&user_keys, keys); + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &user_keys, 0, -1); +} + +static int pac_address_keys_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct ptrauth_keys *keys = &target->thread.keys_user; + struct user_pac_address_keys user_keys; + int ret; + + if (!system_supports_address_auth()) + return -EINVAL; + + pac_address_keys_to_user(&user_keys, keys); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &user_keys, 0, -1); + if (ret) + return ret; + pac_address_keys_from_user(keys, &user_keys); + + return 0; +} + +static void pac_generic_keys_to_user(struct user_pac_generic_keys *ukeys, + const struct ptrauth_keys *keys) +{ + ukeys->apgakey = pac_key_to_user(&keys->apga); +} + +static void pac_generic_keys_from_user(struct ptrauth_keys *keys, + const struct user_pac_generic_keys *ukeys) +{ + keys->apga = pac_key_from_user(ukeys->apgakey); +} + +static int pac_generic_keys_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct ptrauth_keys *keys = &target->thread.keys_user; + struct user_pac_generic_keys user_keys; + + if (!system_supports_generic_auth()) + return -EINVAL; + + pac_generic_keys_to_user(&user_keys, keys); + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &user_keys, 0, -1); +} + +static int pac_generic_keys_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct ptrauth_keys *keys = &target->thread.keys_user; + struct user_pac_generic_keys user_keys; + int ret; + + if (!system_supports_generic_auth()) + return -EINVAL; + + pac_generic_keys_to_user(&user_keys, keys); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &user_keys, 0, -1); + if (ret) + return ret; + pac_generic_keys_from_user(keys, &user_keys); + + return 0; +} +#endif /* CONFIG_CHECKPOINT_RESTORE */ #endif /* CONFIG_ARM64_PTR_AUTH */ enum aarch64_regset { @@ -995,6 +1120,10 @@ enum aarch64_regset { #endif #ifdef CONFIG_ARM64_PTR_AUTH REGSET_PAC_MASK, +#ifdef CONFIG_CHECKPOINT_RESTORE + REGSET_PACA_KEYS, + REGSET_PACG_KEYS, +#endif #endif }; @@ -1074,6 +1203,24 @@ static const struct user_regset aarch64_regsets[] = { .get = pac_mask_get, /* this cannot be set dynamically */ }, +#ifdef CONFIG_CHECKPOINT_RESTORE + [REGSET_PACA_KEYS] = { + .core_note_type = NT_ARM_PACA_KEYS, + .n = sizeof(struct user_pac_address_keys) / sizeof(__uint128_t), + .size = sizeof(__uint128_t), + .align = sizeof(__uint128_t), + .get = pac_address_keys_get, + .set = pac_address_keys_set, + }, + [REGSET_PACG_KEYS] = { + .core_note_type = NT_ARM_PACG_KEYS, + .n = sizeof(struct user_pac_generic_keys) / sizeof(__uint128_t), + .size = sizeof(__uint128_t), + .align = sizeof(__uint128_t), + .get = pac_generic_keys_get, + .set = pac_generic_keys_set, + }, +#endif #endif }; @@ -1702,19 +1849,20 @@ void syscall_trace_exit(struct pt_regs *regs) } /* - * SPSR_ELx bits which are always architecturally RES0 per ARM DDI 0487C.a - * We also take into account DIT (bit 24), which is not yet documented, and - * treat PAN and UAO as RES0 bits, as they are meaningless at EL0, and may be - * allocated an EL0 meaning in future. + * SPSR_ELx bits which are always architecturally RES0 per ARM DDI 0487D.a. + * We permit userspace to set SSBS (AArch64 bit 12, AArch32 bit 23) which is + * not described in ARM DDI 0487D.a. + * We treat PAN and UAO as RES0 bits, as they are meaningless at EL0, and may + * be allocated an EL0 meaning in future. * Userspace cannot use these until they have an architectural meaning. * Note that this follows the SPSR_ELx format, not the AArch32 PSR format. * We also reserve IL for the kernel; SS is handled dynamically. */ #define SPSR_EL1_AARCH64_RES0_BITS \ - (GENMASK_ULL(63,32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \ - GENMASK_ULL(20, 10) | GENMASK_ULL(5, 5)) + (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \ + GENMASK_ULL(20, 13) | GENMASK_ULL(11, 10) | GENMASK_ULL(5, 5)) #define SPSR_EL1_AARCH32_RES0_BITS \ - (GENMASK_ULL(63,32) | GENMASK_ULL(23, 22) | GENMASK_ULL(20,20)) + (GENMASK_ULL(63, 32) | GENMASK_ULL(22, 22) | GENMASK_ULL(20, 20)) static int valid_compat_regs(struct user_pt_regs *regs) { diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 4b0e1231625c..f8482fe5a190 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -58,7 +58,6 @@ #include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/traps.h> -#include <asm/memblock.h> #include <asm/efi.h> #include <asm/xen/hypervisor.h> #include <asm/mmu_context.h> @@ -209,6 +208,7 @@ static void __init request_standard_resources(void) struct memblock_region *region; struct resource *res; unsigned long i = 0; + size_t res_size; kernel_code.start = __pa_symbol(_text); kernel_code.end = __pa_symbol(__init_begin - 1); @@ -216,9 +216,10 @@ static void __init request_standard_resources(void) kernel_data.end = __pa_symbol(_end - 1); num_standard_resources = memblock.memory.cnt; - standard_resources = memblock_alloc_low(num_standard_resources * - sizeof(*standard_resources), - SMP_CACHE_BYTES); + res_size = num_standard_resources * sizeof(*standard_resources); + standard_resources = memblock_alloc_low(res_size, SMP_CACHE_BYTES); + if (!standard_resources) + panic("%s: Failed to allocate %zu bytes\n", __func__, res_size); for_each_memblock(memory, region) { res = &standard_resources[i++]; @@ -313,7 +314,6 @@ void __init setup_arch(char **cmdline_p) arm64_memblock_init(); paging_init(); - efi_apply_persistent_mem_reservations(); acpi_table_upgrade(); @@ -340,6 +340,9 @@ void __init setup_arch(char **cmdline_p) smp_init_cpus(); smp_build_mpidr_hash(); + /* Init percpu seeds for random tags after cpus are set up. */ + kasan_init_tags(); + #ifdef CONFIG_ARM64_SW_TTBR0_PAN /* * Make sure init_thread_info.ttbr0 always generates translation diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 1598d6f7200a..824de7038967 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -35,6 +35,7 @@ #include <linux/smp.h> #include <linux/seq_file.h> #include <linux/irq.h> +#include <linux/irqchip/arm-gic-v3.h> #include <linux/percpu.h> #include <linux/clockchips.h> #include <linux/completion.h> @@ -180,6 +181,24 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) return ret; } +static void init_gic_priority_masking(void) +{ + u32 cpuflags; + + if (WARN_ON(!gic_enable_sre())) + return; + + cpuflags = read_sysreg(daif); + + WARN_ON(!(cpuflags & PSR_I_BIT)); + + gic_write_pmr(GIC_PRIO_IRQOFF); + + /* We can only unmask PSR.I if we can take aborts */ + if (!(cpuflags & PSR_A_BIT)) + write_sysreg(cpuflags & ~PSR_I_BIT, daif); +} + /* * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. @@ -206,6 +225,9 @@ asmlinkage notrace void secondary_start_kernel(void) */ cpu_uninstall_idmap(); + if (system_uses_irq_prio_masking()) + init_gic_priority_masking(); + preempt_disable(); trace_hardirqs_off(); @@ -419,6 +441,17 @@ void __init smp_prepare_boot_cpu(void) */ jump_label_init(); cpuinfo_store_boot_cpu(); + + /* + * We now know enough about the boot CPU to apply the + * alternatives that cannot wait until interrupt handling + * and/or scheduling is enabled. + */ + apply_boot_alternatives(); + + /* Conditionally switch to GIC PMR for interrupt masking */ + if (system_uses_irq_prio_masking()) + init_gic_priority_masking(); } static u64 __init of_get_cpu_mpidr(struct device_node *dn) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 4e2fb877f8d5..8ad119c3f665 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -898,13 +898,17 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr) asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr) { - nmi_enter(); + const bool was_in_nmi = in_nmi(); + + if (!was_in_nmi) + nmi_enter(); /* non-RAS errors are not containable */ if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr)) arm64_serror_panic(regs, esr); - nmi_exit(); + if (!was_in_nmi) + nmi_exit(); } void __pte_error(const char *file, int line, unsigned long val) |