diff options
Diffstat (limited to 'arch')
46 files changed, 387 insertions, 252 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 56b6ccc0e32d..ba4e966484ab 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1028,6 +1028,15 @@ config HAVE_STATIC_CALL_INLINE bool depends on HAVE_STATIC_CALL +config ARCH_WANT_LD_ORPHAN_WARN + bool + help + An arch should select this symbol once all linker sections are explicitly + included, size-asserted, or discarded in the linker scripts. This is + important because we never want expected sections to be placed heuristically + by the linker, since the locations of such sections can change between linker + versions. + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fe2f17eb2b50..002e0cf025f5 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -35,6 +35,7 @@ config ARM select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_IPC_PARSE_VERSION + select ARCH_WANT_LD_ORPHAN_WARN select BINFMT_FLAT_ARGVP_ENVP_ON_STACK select BUILDTIME_TABLE_SORT if MMU select CLONE_BACKWARDS diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 4d76eab2b22d..e15f76ca2887 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -16,10 +16,6 @@ LDFLAGS_vmlinux += --be8 KBUILD_LDFLAGS_MODULE += --be8 endif -# We never want expected sections to be placed heuristically by the -# linker. All sections should be explicitly named in the linker script. -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) - GZFLAGS :=-9 #KBUILD_CFLAGS +=-pipe diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 47f001ca5499..e1567418a2b1 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -129,7 +129,9 @@ LDFLAGS_vmlinux += --no-undefined # Delete all temporary local symbols LDFLAGS_vmlinux += -X # Report orphan sections -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) +ifdef CONFIG_LD_ORPHAN_WARN +LDFLAGS_vmlinux += --orphan-handling=warn +endif # Next argument is a linker script LDFLAGS_vmlinux += -T diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 34793aabdb65..58df9fd79a76 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -81,7 +81,6 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_BINFMT_MISC=y CONFIG_CMA=y CONFIG_ZSMALLOC=m -CONFIG_ZSMALLOC_PGTABLE_MAPPING=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1515f6f153a0..a6b5b7ef40ae 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -81,6 +81,7 @@ config ARM64 select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) + select ARCH_WANT_LD_ORPHAN_WARN select ARCH_HAS_UBSAN_SANITIZE_ALL select ARM_AMBA select ARM_ARCH_TIMER diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 5789c2d18d43..6a87d592bd00 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -28,10 +28,6 @@ LDFLAGS_vmlinux += --fix-cortex-a53-843419 endif endif -# We never want expected sections to be placed heuristically by the -# linker. All sections should be explicitly named in the linker script. -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) - ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS), y) ifneq ($(CONFIG_ARM64_LSE_ATOMICS), y) $(warning LSE atomics not supported by binutils) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index ec213b4a1650..1c26d7baa67f 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -128,6 +128,9 @@ static inline void local_daif_inherit(struct pt_regs *regs) { unsigned long flags = regs->pstate & DAIF_MASK; + if (interrupts_enabled(regs)) + trace_hardirqs_on(); + /* * We can't use local_daif_restore(regs->pstate) here as * system_has_prio_mask_debugging() won't restore the I bit if it can diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 99b9383cd036..0756191f44f6 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -31,7 +31,12 @@ static inline u32 disr_to_esr(u64 disr) return esr; } +asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs); +asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void enter_from_user_mode(void); +asmlinkage void exit_to_user_mode(void); +void arm64_enter_nmi(struct pt_regs *regs); +void arm64_exit_nmi(struct pt_regs *regs); void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs); void do_bti(struct pt_regs *regs); diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 997cf8c8cd52..28c85b87b8cd 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -193,6 +193,10 @@ struct pt_regs { /* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */ u64 pmr_save; u64 stackframe[2]; + + /* Only valid for some EL1 exceptions. */ + u64 lockdep_hardirqs; + u64 exit_rcu; }; static inline bool in_syscall(struct pt_regs const *regs) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e2ef4c2edf06..801861d05426 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -987,7 +987,7 @@ #define SYS_TFSR_EL1_TF0_SHIFT 0 #define SYS_TFSR_EL1_TF1_SHIFT 1 #define SYS_TFSR_EL1_TF0 (UL(1) << SYS_TFSR_EL1_TF0_SHIFT) -#define SYS_TFSR_EL1_TF1 (UK(2) << SYS_TFSR_EL1_TF1_SHIFT) +#define SYS_TFSR_EL1_TF1 (UL(1) << SYS_TFSR_EL1_TF1_SHIFT) /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ #define SYS_MPIDR_SAFE_VAL (BIT(31)) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 43d4c329775f..70e0a7591245 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -17,40 +17,164 @@ #include <asm/mmu.h> #include <asm/sysreg.h> -static void notrace el1_abort(struct pt_regs *regs, unsigned long esr) +/* + * This is intended to match the logic in irqentry_enter(), handling the kernel + * mode transitions only. + */ +static void noinstr enter_from_kernel_mode(struct pt_regs *regs) +{ + regs->exit_rcu = false; + + if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) { + lockdep_hardirqs_off(CALLER_ADDR0); + rcu_irq_enter(); + trace_hardirqs_off_finish(); + + regs->exit_rcu = true; + return; + } + + lockdep_hardirqs_off(CALLER_ADDR0); + rcu_irq_enter_check_tick(); + trace_hardirqs_off_finish(); +} + +/* + * This is intended to match the logic in irqentry_exit(), handling the kernel + * mode transitions only, and with preemption handled elsewhere. + */ +static void noinstr exit_to_kernel_mode(struct pt_regs *regs) +{ + lockdep_assert_irqs_disabled(); + + if (interrupts_enabled(regs)) { + if (regs->exit_rcu) { + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + rcu_irq_exit(); + lockdep_hardirqs_on(CALLER_ADDR0); + return; + } + + trace_hardirqs_on(); + } else { + if (regs->exit_rcu) + rcu_irq_exit(); + } +} + +void noinstr arm64_enter_nmi(struct pt_regs *regs) +{ + regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); + + __nmi_enter(); + lockdep_hardirqs_off(CALLER_ADDR0); + lockdep_hardirq_enter(); + rcu_nmi_enter(); + + trace_hardirqs_off_finish(); + ftrace_nmi_enter(); +} + +void noinstr arm64_exit_nmi(struct pt_regs *regs) +{ + bool restore = regs->lockdep_hardirqs; + + ftrace_nmi_exit(); + if (restore) { + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + } + + rcu_nmi_exit(); + lockdep_hardirq_exit(); + if (restore) + lockdep_hardirqs_on(CALLER_ADDR0); + __nmi_exit(); +} + +asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) + arm64_enter_nmi(regs); + else + enter_from_kernel_mode(regs); +} + +asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) + arm64_exit_nmi(regs); + else + exit_to_kernel_mode(regs); +} + +static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); + enter_from_kernel_mode(regs); local_daif_inherit(regs); far = untagged_addr(far); do_mem_abort(far, esr, regs); + local_daif_mask(); + exit_to_kernel_mode(regs); } -NOKPROBE_SYMBOL(el1_abort); -static void notrace el1_pc(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); + enter_from_kernel_mode(regs); local_daif_inherit(regs); do_sp_pc_abort(far, esr, regs); + local_daif_mask(); + exit_to_kernel_mode(regs); } -NOKPROBE_SYMBOL(el1_pc); -static void notrace el1_undef(struct pt_regs *regs) +static void noinstr el1_undef(struct pt_regs *regs) { + enter_from_kernel_mode(regs); local_daif_inherit(regs); do_undefinstr(regs); + local_daif_mask(); + exit_to_kernel_mode(regs); } -NOKPROBE_SYMBOL(el1_undef); -static void notrace el1_inv(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr) { + enter_from_kernel_mode(regs); local_daif_inherit(regs); bad_mode(regs, 0, esr); + local_daif_mask(); + exit_to_kernel_mode(regs); } -NOKPROBE_SYMBOL(el1_inv); -static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr) +static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs) +{ + regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); + + lockdep_hardirqs_off(CALLER_ADDR0); + rcu_nmi_enter(); + + trace_hardirqs_off_finish(); +} + +static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs) +{ + bool restore = regs->lockdep_hardirqs; + + if (restore) { + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + } + + rcu_nmi_exit(); + if (restore) + lockdep_hardirqs_on(CALLER_ADDR0); +} + +static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -62,18 +186,21 @@ static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + arm64_enter_el1_dbg(regs); do_debug_exception(far, esr, regs); + arm64_exit_el1_dbg(regs); } -NOKPROBE_SYMBOL(el1_dbg); -static void notrace el1_fpac(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr) { + enter_from_kernel_mode(regs); local_daif_inherit(regs); do_ptrauth_fault(regs, esr); + local_daif_mask(); + exit_to_kernel_mode(regs); } -NOKPROBE_SYMBOL(el1_fpac); -asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) +asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -106,20 +233,34 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) el1_inv(regs, esr); } } -NOKPROBE_SYMBOL(el1_sync_handler); -static void notrace el0_da(struct pt_regs *regs, unsigned long esr) +asmlinkage void noinstr enter_from_user_mode(void) +{ + lockdep_hardirqs_off(CALLER_ADDR0); + CT_WARN_ON(ct_state() != CONTEXT_USER); + user_exit_irqoff(); + trace_hardirqs_off_finish(); +} + +asmlinkage void noinstr exit_to_user_mode(void) +{ + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + user_enter_irqoff(); + lockdep_hardirqs_on(CALLER_ADDR0); +} + +static void noinstr el0_da(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); far = untagged_addr(far); do_mem_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el0_da); -static void notrace el0_ia(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -131,90 +272,80 @@ static void notrace el0_ia(struct pt_regs *regs, unsigned long esr) if (!is_ttbr0_addr(far)) arm64_apply_bp_hardening(); - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_mem_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el0_ia); -static void notrace el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_fpsimd_acc(esr, regs); } -NOKPROBE_SYMBOL(el0_fpsimd_acc); -static void notrace el0_sve_acc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sve_acc(esr, regs); } -NOKPROBE_SYMBOL(el0_sve_acc); -static void notrace el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_fpsimd_exc(esr, regs); } -NOKPROBE_SYMBOL(el0_fpsimd_exc); -static void notrace el0_sys(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sysinstr(esr, regs); } -NOKPROBE_SYMBOL(el0_sys); -static void notrace el0_pc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); if (!is_ttbr0_addr(instruction_pointer(regs))) arm64_apply_bp_hardening(); - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el0_pc); -static void notrace el0_sp(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(regs->sp, esr, regs); } -NOKPROBE_SYMBOL(el0_sp); -static void notrace el0_undef(struct pt_regs *regs) +static void noinstr el0_undef(struct pt_regs *regs) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_undefinstr(regs); } -NOKPROBE_SYMBOL(el0_undef); -static void notrace el0_bti(struct pt_regs *regs) +static void noinstr el0_bti(struct pt_regs *regs) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_bti(regs); } -NOKPROBE_SYMBOL(el0_bti); -static void notrace el0_inv(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); bad_el0_sync(regs, 0, esr); } -NOKPROBE_SYMBOL(el0_inv); -static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr) { /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */ unsigned long far = read_sysreg(far_el1); @@ -222,30 +353,28 @@ static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - user_exit_irqoff(); + enter_from_user_mode(); do_debug_exception(far, esr, regs); local_daif_restore(DAIF_PROCCTX_NOIRQ); } -NOKPROBE_SYMBOL(el0_dbg); -static void notrace el0_svc(struct pt_regs *regs) +static void noinstr el0_svc(struct pt_regs *regs) { if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + enter_from_user_mode(); do_el0_svc(regs); } -NOKPROBE_SYMBOL(el0_svc); -static void notrace el0_fpac(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_ptrauth_fault(regs, esr); } -NOKPROBE_SYMBOL(el0_fpac); -asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) +asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -297,27 +426,25 @@ asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) el0_inv(regs, esr); } } -NOKPROBE_SYMBOL(el0_sync_handler); #ifdef CONFIG_COMPAT -static void notrace el0_cp15(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_cp15instr(esr, regs); } -NOKPROBE_SYMBOL(el0_cp15); -static void notrace el0_svc_compat(struct pt_regs *regs) +static void noinstr el0_svc_compat(struct pt_regs *regs) { if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + enter_from_user_mode(); do_el0_svc_compat(regs); } -NOKPROBE_SYMBOL(el0_svc_compat); -asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs) +asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -360,5 +487,4 @@ asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs) el0_inv(regs, esr); } } -NOKPROBE_SYMBOL(el0_sync_compat_handler); #endif /* CONFIG_COMPAT */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b295fb912b12..d72c818b019c 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -30,18 +30,18 @@ #include <asm/unistd.h> /* - * Context tracking subsystem. Used to instrument transitions - * between user and kernel mode. + * Context tracking and irqflag tracing need to instrument transitions between + * user and kernel mode. */ - .macro ct_user_exit_irqoff -#ifdef CONFIG_CONTEXT_TRACKING + .macro user_exit_irqoff +#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS) bl enter_from_user_mode #endif .endm - .macro ct_user_enter -#ifdef CONFIG_CONTEXT_TRACKING - bl context_tracking_user_enter + .macro user_enter_irqoff +#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS) + bl exit_to_user_mode #endif .endm @@ -298,9 +298,6 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING alternative_else_nop_endif ldp x21, x22, [sp, #S_PC] // load ELR, SPSR - .if \el == 0 - ct_user_enter - .endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN alternative_if_not ARM64_HAS_PAN @@ -637,16 +634,8 @@ SYM_CODE_START_LOCAL_NOALIGN(el1_irq) gic_prio_irq_setup pmr=x20, tmp=x1 enable_da_f -#ifdef CONFIG_ARM64_PSEUDO_NMI - test_irqs_unmasked res=x0, pmr=x20 - cbz x0, 1f - bl asm_nmi_enter -1: -#endif - -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif + mov x0, sp + bl enter_el1_irq_or_nmi irq_handler @@ -665,26 +654,8 @@ alternative_else_nop_endif 1: #endif -#ifdef CONFIG_ARM64_PSEUDO_NMI - /* - * When using IRQ priority masking, we can get spurious interrupts while - * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a - * section with interrupts disabled. Skip tracing in those cases. - */ - test_irqs_unmasked res=x0, pmr=x20 - cbz x0, 1f - bl asm_nmi_exit -1: -#endif - -#ifdef CONFIG_TRACE_IRQFLAGS -#ifdef CONFIG_ARM64_PSEUDO_NMI - test_irqs_unmasked res=x0, pmr=x20 - cbnz x0, 1f -#endif - bl trace_hardirqs_on -1: -#endif + mov x0, sp + bl exit_el1_irq_or_nmi kernel_exit 1 SYM_CODE_END(el1_irq) @@ -726,21 +697,14 @@ SYM_CODE_START_LOCAL_NOALIGN(el0_irq) kernel_entry 0 el0_irq_naked: gic_prio_irq_setup pmr=x20, tmp=x0 - ct_user_exit_irqoff + user_exit_irqoff enable_da_f -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif - tbz x22, #55, 1f bl do_el0_irq_bp_hardening 1: irq_handler -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_on -#endif b ret_to_user SYM_CODE_END(el0_irq) @@ -759,7 +723,7 @@ SYM_CODE_START_LOCAL(el0_error) el0_error_naked: mrs x25, esr_el1 gic_prio_kentry_setup tmp=x2 - ct_user_exit_irqoff + user_exit_irqoff enable_dbg mov x0, sp mov x1, x25 @@ -774,13 +738,17 @@ SYM_CODE_END(el0_error) SYM_CODE_START_LOCAL(ret_to_user) disable_daif gic_prio_kentry_setup tmp=x3 - ldr x1, [tsk, #TSK_TI_FLAGS] - and x2, x1, #_TIF_WORK_MASK +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif + ldr x19, [tsk, #TSK_TI_FLAGS] + and x2, x19, #_TIF_WORK_MASK cbnz x2, work_pending finish_ret_to_user: + user_enter_irqoff /* Ignore asynchronous tag check faults in the uaccess routines */ clear_mte_async_tcf - enable_step_tsk x1, x2 + enable_step_tsk x19, x2 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK bl stackleak_erase #endif @@ -791,11 +759,9 @@ finish_ret_to_user: */ work_pending: mov x0, sp // 'regs' + mov x1, x19 bl do_notify_resume -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_on // enabled while in userspace -#endif - ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step + ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step b finish_ret_to_user SYM_CODE_END(ret_to_user) diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 9cf2fb87584a..60456a62da11 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -67,18 +67,3 @@ void __init init_IRQ(void) local_daif_restore(DAIF_PROCCTX_NOIRQ); } } - -/* - * Stubs to make nmi_enter/exit() code callable from ASM - */ -asmlinkage void notrace asm_nmi_enter(void) -{ - nmi_enter(); -} -NOKPROBE_SYMBOL(asm_nmi_enter); - -asmlinkage void notrace asm_nmi_exit(void) -{ - nmi_exit(); -} -NOKPROBE_SYMBOL(asm_nmi_exit); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 7697a4b48b7c..ed919f633ed8 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -72,13 +72,13 @@ EXPORT_SYMBOL_GPL(pm_power_off); void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); -static void __cpu_do_idle(void) +static void noinstr __cpu_do_idle(void) { dsb(sy); wfi(); } -static void __cpu_do_idle_irqprio(void) +static void noinstr __cpu_do_idle_irqprio(void) { unsigned long pmr; unsigned long daif_bits; @@ -108,7 +108,7 @@ static void __cpu_do_idle_irqprio(void) * ensure that interrupts are not masked at the PMR (because the core will * not wake up if we block the wake up signal in the interrupt controller). */ -void cpu_do_idle(void) +void noinstr cpu_do_idle(void) { if (system_uses_irq_prio_masking()) __cpu_do_idle_irqprio(); @@ -119,7 +119,7 @@ void cpu_do_idle(void) /* * This is our default idle handler. */ -void arch_cpu_idle(void) +void noinstr arch_cpu_idle(void) { /* * This should do all the clock switching and wait for interrupt diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 7689f2031c0c..793c46d6a447 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -10,6 +10,7 @@ #include <linux/uaccess.h> #include <asm/alternative.h> +#include <asm/exception.h> #include <asm/kprobes.h> #include <asm/mmu.h> #include <asm/ptrace.h> @@ -223,16 +224,16 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs, } -asmlinkage __kprobes notrace unsigned long +asmlinkage noinstr unsigned long __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg) { unsigned long ret; - nmi_enter(); + arm64_enter_nmi(regs); ret = _sdei_handler(regs, arg); - nmi_exit(); + arm64_exit_nmi(regs); return ret; } diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index e4c0dadf0d92..f8f758e4a306 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -121,7 +121,6 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, cortex_a76_erratum_1463225_svc_handler(); local_daif_restore(DAIF_PROCCTX); - user_exit(); if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) { /* diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 8af4e0e85736..2059d8f43f55 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -34,6 +34,7 @@ #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/esr.h> +#include <asm/exception.h> #include <asm/extable.h> #include <asm/insn.h> #include <asm/kprobes.h> @@ -753,8 +754,10 @@ const char *esr_get_class_string(u32 esr) * bad_mode handles the impossible case in the exception vector. This is always * fatal. */ -asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) +asmlinkage void notrace bad_mode(struct pt_regs *regs, int reason, unsigned int esr) { + arm64_enter_nmi(regs); + console_verbose(); pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n", @@ -786,7 +789,7 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack) __aligned(16); -asmlinkage void handle_bad_stack(struct pt_regs *regs) +asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs) { unsigned long tsk_stk = (unsigned long)current->stack; unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr); @@ -794,6 +797,8 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs) unsigned int esr = read_sysreg(esr_el1); unsigned long far = read_sysreg(far_el1); + arm64_enter_nmi(regs); + console_verbose(); pr_emerg("Insufficient stack space to handle exception!"); @@ -865,23 +870,16 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr) } } -asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr) +asmlinkage void noinstr do_serror(struct pt_regs *regs, unsigned int esr) { - nmi_enter(); + arm64_enter_nmi(regs); /* non-RAS errors are not containable */ if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr)) arm64_serror_panic(regs, esr); - nmi_exit(); -} - -asmlinkage void enter_from_user_mode(void) -{ - CT_WARN_ON(ct_state() != CONTEXT_USER); - user_exit_irqoff(); + arm64_exit_nmi(regs); } -NOKPROBE_SYMBOL(enter_from_user_mode); /* GENERIC_BUG traps */ diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1ee94002801f..795d224f184f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -789,25 +789,6 @@ void __init hook_debug_fault_code(int nr, */ static void debug_exception_enter(struct pt_regs *regs) { - /* - * Tell lockdep we disabled irqs in entry.S. Do nothing if they were - * already disabled to preserve the last enabled/disabled addresses. - */ - if (interrupts_enabled(regs)) - trace_hardirqs_off(); - - if (user_mode(regs)) { - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - } else { - /* - * We might have interrupted pretty much anything. In - * fact, if we're a debug exception, we can even interrupt - * NMI processing. We don't want this code makes in_nmi() - * to return true, but we need to notify RCU. - */ - rcu_nmi_enter(); - } - preempt_disable(); /* This code is a bit fragile. Test it. */ @@ -818,12 +799,6 @@ NOKPROBE_SYMBOL(debug_exception_enter); static void debug_exception_exit(struct pt_regs *regs) { preempt_enable_no_resched(); - - if (!user_mode(regs)) - rcu_nmi_exit(); - - if (interrupts_enabled(regs)) - trace_hardirqs_on(); } NOKPROBE_SYMBOL(debug_exception_exit); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e9f13fe08492..5181872f9452 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -152,6 +152,7 @@ config PPC select ARCH_USE_QUEUED_SPINLOCKS if PPC_QUEUED_SPINLOCKS select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WANT_IRQS_OFF_ACTIVATE_MM + select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WEAK_RELEASE_ACQUIRE select BINFMT_ELF select BUILDTIME_TABLE_SORT diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 16b8336f91dd..5c8c06215dd4 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -123,7 +123,6 @@ endif LDFLAGS_vmlinux-y := -Bstatic LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y) -LDFLAGS_vmlinux += $(call ld-option,--orphan-handling=warn) ifdef CONFIG_PPC64 ifeq ($(call cc-option-yn,-mcmodel=medium),y) diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index e0b52940e43c..750918451dd2 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -242,6 +242,18 @@ extern void radix_init_pseries(void); static inline void radix_init_pseries(void) { }; #endif +#ifdef CONFIG_HOTPLUG_CPU +#define arch_clear_mm_cpumask_cpu(cpu, mm) \ + do { \ + if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { \ + atomic_dec(&(mm)->context.active_cpus); \ + cpumask_clear_cpu(cpu, mm_cpumask(mm)); \ + } \ + } while (0) + +void cleanup_cpu_mmu_context(void); +#endif + static inline int get_user_context(mm_context_t *ctx, unsigned long ea) { int index = ea >> MAX_EA_BITS_PER_CONTEXT; diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 85215e79db42..a0ebc29f30b2 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1214,12 +1214,9 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu) { /* We have a block of xive->nr_servers VPs. We just need to check - * raw vCPU ids are below the expected limit for this guest's - * core stride ; kvmppc_pack_vcpu_id() will pack them down to an - * index that can be safely used to compute a VP id that belongs - * to the VP block. + * packed vCPU ids are below that. */ - return cpu < xive->nr_servers * xive->kvm->arch.emul_smt_mode; + return kvmppc_pack_vcpu_id(xive->kvm, cpu) < xive->nr_servers; } int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp) diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c index 0203cdf48c54..52e170bd95ae 100644 --- a/arch/powerpc/mm/book3s64/hash_native.c +++ b/arch/powerpc/mm/book3s64/hash_native.c @@ -68,7 +68,7 @@ static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned in rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) - : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r) + : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r) : "memory"); } @@ -92,16 +92,15 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) asm volatile("ptesync": : :"memory"); /* - * Flush the first set of the TLB, and any caching of partition table - * entries. Then flush the remaining sets of the TLB. Hash mode uses - * partition scoped TLB translations. + * Flush the partition table cache if this is HV mode. */ - tlbiel_hash_set_isa300(0, is, 0, 2, 0); - for (set = 1; set < num_sets; set++) - tlbiel_hash_set_isa300(set, is, 0, 0, 0); + if (early_cpu_has_feature(CPU_FTR_HVMODE)) + tlbiel_hash_set_isa300(0, is, 0, 2, 0); /* - * Now invalidate the process table cache. + * Now invalidate the process table cache. UPRT=0 HPT modes (what + * current hardware implements) do not use the process table, but + * add the flushes anyway. * * From ISA v3.0B p. 1078: * The following forms are invalid. @@ -110,6 +109,14 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) */ tlbiel_hash_set_isa300(0, is, 0, 2, 1); + /* + * Then flush the sets of the TLB proper. Hash mode uses + * partition scoped TLB translations, which may be flushed + * in !HV mode. + */ + for (set = 0; set < num_sets; set++) + tlbiel_hash_set_isa300(set, is, 0, 0, 0); + ppc_after_tlbiel_barrier(); asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c index 1c54821de7bf..0c8557220ae2 100644 --- a/arch/powerpc/mm/book3s64/mmu_context.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -17,6 +17,7 @@ #include <linux/export.h> #include <linux/gfp.h> #include <linux/slab.h> +#include <linux/cpu.h> #include <asm/mmu_context.h> #include <asm/pgalloc.h> @@ -307,3 +308,22 @@ void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) isync(); } #endif + +/** + * cleanup_cpu_mmu_context - Clean up MMU details for this CPU (newly offlined) + * + * This clears the CPU from mm_cpumask for all processes, and then flushes the + * local TLB to ensure TLB coherency in case the CPU is onlined again. + * + * KVM guest translations are not necessarily flushed here. If KVM started + * using mm_cpumask or the Linux APIs which do, this would have to be resolved. + */ +#ifdef CONFIG_HOTPLUG_CPU +void cleanup_cpu_mmu_context(void) +{ + int cpu = smp_processor_id(); + + clear_tasks_mm_cpumask(cpu); + tlbiel_all(); +} +#endif diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 63f61d8b55e5..f2bf98bdcea2 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -742,8 +742,7 @@ static int __init parse_numa_properties(void) of_node_put(cpu); } - if (likely(nid > 0)) - node_set_online(nid); + node_set_online(nid); } get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 74ebe664b016..adae2a6712e1 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -911,6 +911,8 @@ static int smp_core99_cpu_disable(void) mpic_cpu_set_priority(0xf); + cleanup_cpu_mmu_context(); + return 0; } diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 46115231a3b2..4426a109ec2f 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -211,11 +211,16 @@ static void __init pnv_init(void) add_preferred_console("hvc", 0, NULL); if (!radix_enabled()) { + size_t size = sizeof(struct slb_entry) * mmu_slb_size; int i; /* Allocate per cpu area to save old slb contents during MCE */ - for_each_possible_cpu(i) - paca_ptrs[i]->mce_faulty_slbs = memblock_alloc_node(mmu_slb_size, __alignof__(*paca_ptrs[i]->mce_faulty_slbs), cpu_to_node(i)); + for_each_possible_cpu(i) { + paca_ptrs[i]->mce_faulty_slbs = + memblock_alloc_node(size, + __alignof__(struct slb_entry), + cpu_to_node(i)); + } } } diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 54c4ba45c7ce..cbb67813cd5d 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -143,6 +143,9 @@ static int pnv_smp_cpu_disable(void) xive_smp_disable_cpu(); else xics_migrate_irqs_away(); + + cleanup_cpu_mmu_context(); + return 0; } diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index f2837e33bf5d..a02012f1b04a 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -90,6 +90,9 @@ static int pseries_cpu_disable(void) xive_smp_disable_cpu(); else xics_migrate_irqs_away(); + + cleanup_cpu_mmu_context(); + return 0; } diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 133f6adcb39c..b3ac2455faad 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -458,7 +458,8 @@ again: return hwirq; } - virq = irq_create_mapping(NULL, hwirq); + virq = irq_create_mapping_affinity(NULL, hwirq, + entry->affinity); if (!virq) { pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 26bb0603c5a1..92beb1444644 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -763,12 +763,7 @@ ENTRY(io_int_handler) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ jo .Lio_restore -#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS) - tmhh %r8,0x300 - jz 1f TRACE_IRQS_OFF -1: -#endif xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) .Lio_loop: lgr %r2,%r11 # pass pointer to pt_regs @@ -791,12 +786,7 @@ ENTRY(io_int_handler) TSTMSK __LC_CPU_FLAGS,_CIF_WORK jnz .Lio_work .Lio_restore: -#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS) - tm __PT_PSW(%r11),3 - jno 0f TRACE_IRQS_ON -0: -#endif mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) tm __PT_PSW+1(%r11),0x01 # returning to user ? jno .Lio_exit_kernel @@ -976,12 +966,7 @@ ENTRY(ext_int_handler) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ jo .Lio_restore -#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS) - tmhh %r8,0x300 - jz 1f TRACE_IRQS_OFF -1: -#endif xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs lghi %r3,EXT_INTERRUPT diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index daca7bad66de..8c0c68e7770e 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL(__delay); static void __udelay_disabled(unsigned long long usecs) { - unsigned long cr0, cr0_new, psw_mask, flags; + unsigned long cr0, cr0_new, psw_mask; struct s390_idle_data idle; u64 end; @@ -45,9 +45,8 @@ static void __udelay_disabled(unsigned long long usecs) psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT; set_clock_comparator(end); set_cpu_flag(CIF_IGNORE_IRQ); - local_irq_save(flags); psw_idle(&idle, psw_mask); - local_irq_restore(flags); + trace_hardirqs_off(); clear_cpu_flag(CIF_IGNORE_IRQ); set_clock_comparator(S390_lowcore.clock_comparator); __ctl_load(cr0, 0, 0); diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 743f257cf2cb..75217fb63d7b 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -103,9 +103,10 @@ static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *de { struct msi_desc *entry = irq_get_msi_desc(data->irq); struct msi_msg msg = entry->msg; + int cpu_addr = smp_cpu_get_cpu_address(cpumask_first(dest)); msg.address_lo &= 0xff0000ff; - msg.address_lo |= (cpumask_first(dest) << 8); + msg.address_lo |= (cpu_addr << 8); pci_write_msi_msg(data->irq, &msg); return IRQ_SET_MASK_OK; @@ -238,6 +239,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) unsigned long bit; struct msi_desc *msi; struct msi_msg msg; + int cpu_addr; int rc, irq; zdev->aisb = -1UL; @@ -287,9 +289,15 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) handle_percpu_irq); msg.data = hwirq - bit; if (irq_delivery == DIRECTED) { + if (msi->affinity) + cpu = cpumask_first(&msi->affinity->mask); + else + cpu = 0; + cpu_addr = smp_cpu_get_cpu_address(cpu); + msg.address_lo = zdev->msi_addr & 0xff0000ff; - msg.address_lo |= msi->affinity ? - (cpumask_first(&msi->affinity->mask) << 8) : 0; + msg.address_lo |= (cpu_addr << 8); + for_each_possible_cpu(cpu) { airq_iv_set_data(zpci_ibv[cpu], hwirq, irq); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f6946b81f74a..fbf26e0f7a6a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -100,6 +100,7 @@ config X86 select ARCH_WANT_DEFAULT_BPF_JIT if X86_64 select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANT_HUGE_PMD_SHARE + select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANTS_THP_SWAP if X86_64 select BUILDTIME_TABLE_SORT select CLKEVT_I8253 diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 154259f18b8b..1bf21746f4ce 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -209,9 +209,6 @@ ifdef CONFIG_X86_64 LDFLAGS_vmlinux += -z max-page-size=0x200000 endif -# We never want expected sections to be placed heuristically by the -# linker. All sections should be explicitly named in the linker script. -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/x86/tools relocs diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index ee249088cbfe..40b8fd375d52 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -61,7 +61,9 @@ KBUILD_LDFLAGS += $(call ld-option,--no-ld-generated-unwind-info) # Compressed kernel should be built as PIE since it may be loaded at any # address by the bootloader. LDFLAGS_vmlinux := -pie $(call ld-option, --no-dynamic-linker) -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) +ifdef CONFIG_LD_ORPHAN_WARN +LDFLAGS_vmlinux += --orphan-handling=warn +endif LDFLAGS_vmlinux += -T hostprogs := mkpiggy diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c index 954cb2702e23..27826c265aab 100644 --- a/arch/x86/boot/compressed/sev-es.c +++ b/arch/x86/boot/compressed/sev-es.c @@ -32,13 +32,12 @@ struct ghcb *boot_ghcb; */ static bool insn_has_rep_prefix(struct insn *insn) { + insn_byte_t p; int i; insn_get_prefixes(insn); - for (i = 0; i < insn->prefixes.nbytes; i++) { - insn_byte_t p = insn->prefixes.bytes[i]; - + for_each_insn_prefix(insn, i, p) { if (p == 0xf2 || p == 0xf3) return true; } diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index b47cc4226934..485c5066f8b8 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1916,7 +1916,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d * that caused the PEBS record. It's called collision. * If collision happened, the record will be dropped. */ - if (p->status != (1ULL << bit)) { + if (pebs_status != (1ULL << bit)) { for_each_set_bit(i, (unsigned long *)&pebs_status, size) error[i]++; continue; @@ -1940,7 +1940,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d if (error[bit]) { perf_log_lost_samples(event, error[bit]); - if (perf_event_account_interrupt(event)) + if (iregs && perf_event_account_interrupt(event)) x86_pmu_stop(event, 0); } diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 5c1ae3eff9d4..a8c3d284fa46 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -201,6 +201,21 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +/** + * for_each_insn_prefix() -- Iterate prefixes in the instruction + * @insn: Pointer to struct insn. + * @idx: Index storage. + * @prefix: Prefix byte. + * + * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix + * and the index is stored in @idx (note that this @idx is just for a cursor, + * do not change it.) + * Since prefixes.nbytes can be bigger than 4 if some prefixes + * are repeated, it cannot be used for looping over the prefixes. + */ +#define for_each_insn_prefix(insn, idx, prefix) \ + for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++) + #define POP_SS_OPCODE 0x1f #define MOV_SREG_OPCODE 0x8e diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 1b98f8c12b96..235f5cde06fc 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -161,7 +161,7 @@ static int __init early_set_hub_type(void) /* UV4/4A only have a revision difference */ case UV4_HUB_PART_NUMBER: uv_min_hub_revision_id = node_id.s.revision - + UV4_HUB_REVISION_BASE; + + UV4_HUB_REVISION_BASE - 1; uv_hub_type_set(UV4); if (uv_min_hub_revision_id == UV4A_HUB_REVISION_BASE) uv_hub_type_set(UV4|UV4A); diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index e5f4ee8f4c3b..e8b5f1cf1ae8 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -570,6 +570,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) if (d) { cpumask_set_cpu(cpu, &d->cpu_mask); + if (r->cache.arch_has_per_cpu_cfg) + rdt_domain_reconfigure_cdp(r); return; } @@ -923,6 +925,7 @@ static __init void rdt_init_res_defs_intel(void) r->rid == RDT_RESOURCE_L2CODE) { r->cache.arch_has_sparse_bitmaps = false; r->cache.arch_has_empty_bitmaps = false; + r->cache.arch_has_per_cpu_cfg = false; } else if (r->rid == RDT_RESOURCE_MBA) { r->msr_base = MSR_IA32_MBA_THRTL_BASE; r->msr_update = mba_wrmsr_intel; @@ -943,6 +946,7 @@ static __init void rdt_init_res_defs_amd(void) r->rid == RDT_RESOURCE_L2CODE) { r->cache.arch_has_sparse_bitmaps = true; r->cache.arch_has_empty_bitmaps = true; + r->cache.arch_has_per_cpu_cfg = true; } else if (r->rid == RDT_RESOURCE_MBA) { r->msr_base = MSR_IA32_MBA_BW_BASE; r->msr_update = mba_wrmsr_amd; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 80fa997fae60..f65d3c0dbc41 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -360,6 +360,8 @@ struct msr_param { * executing entities * @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid. * @arch_has_empty_bitmaps: True if the '0' bitmap is valid. + * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache + * level has CPU scope. */ struct rdt_cache { unsigned int cbm_len; @@ -369,6 +371,7 @@ struct rdt_cache { unsigned int shareable_bits; bool arch_has_sparse_bitmaps; bool arch_has_empty_bitmaps; + bool arch_has_per_cpu_cfg; }; /** diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 6f4ca4bea625..f3418428682b 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1909,8 +1909,13 @@ static int set_cache_qos_cfg(int level, bool enable) r_l = &rdt_resources_all[level]; list_for_each_entry(d, &r_l->domains, list) { - /* Pick one CPU from each domain instance to update MSR */ - cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); + if (r_l->cache.arch_has_per_cpu_cfg) + /* Pick all the CPUs in the domain instance */ + for_each_cpu(cpu, &d->cpu_mask) + cpumask_set_cpu(cpu, cpu_mask); + else + /* Pick one CPU from each domain instance to update MSR */ + cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); } cpu = get_cpu(); /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */ diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 3fdaa042823d..138bdb1fd136 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -255,12 +255,13 @@ static volatile u32 good_2byte_insns[256 / 32] = { static bool is_prefix_bad(struct insn *insn) { + insn_byte_t p; int i; - for (i = 0; i < insn->prefixes.nbytes; i++) { + for_each_insn_prefix(insn, i, p) { insn_attr_t attr; - attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]); + attr = inat_get_opcode_attribute(p); switch (attr) { case INAT_MAKE_PREFIX(INAT_PFX_ES): case INAT_MAKE_PREFIX(INAT_PFX_CS): @@ -715,6 +716,7 @@ static const struct uprobe_xol_ops push_xol_ops = { static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) { u8 opc1 = OPCODE1(insn); + insn_byte_t p; int i; switch (opc1) { @@ -746,8 +748,8 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix. * No one uses these insns, reject any branch insns with such prefix. */ - for (i = 0; i < insn->prefixes.nbytes; i++) { - if (insn->prefixes.bytes[i] == 0x66) + for_each_insn_prefix(insn, i, p) { + if (p == 0x66) return -ENOTSUPP; } diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 58f7fb95c7f4..4229950a5d78 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -63,13 +63,12 @@ static bool is_string_insn(struct insn *insn) */ bool insn_has_rep_prefix(struct insn *insn) { + insn_byte_t p; int i; insn_get_prefixes(insn); - for (i = 0; i < insn->prefixes.nbytes; i++) { - insn_byte_t p = insn->prefixes.bytes[i]; - + for_each_insn_prefix(insn, i, p) { if (p == 0xf2 || p == 0xf3) return true; } @@ -95,14 +94,15 @@ static int get_seg_reg_override_idx(struct insn *insn) { int idx = INAT_SEG_REG_DEFAULT; int num_overrides = 0, i; + insn_byte_t p; insn_get_prefixes(insn); /* Look for any segment override prefixes. */ - for (i = 0; i < insn->prefixes.nbytes; i++) { + for_each_insn_prefix(insn, i, p) { insn_attr_t attr; - attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]); + attr = inat_get_opcode_attribute(p); switch (attr) { case INAT_MAKE_PREFIX(INAT_PFX_CS): idx = INAT_SEG_REG_CS; |