diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-02 22:54:34 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-02 22:54:34 +0300 |
commit | 019b3fd94ba73d3ac615f0537440b81f129821f6 (patch) | |
tree | 09657e8fd2ff04295381e9c6492553fe16c58ab4 /arch/powerpc/kernel | |
parent | 4cad67197989c81417810b89f09a3549b75a2441 (diff) | |
parent | 4ebbbaa4ce8524b853dd6febf0176a6efa3482d7 (diff) | |
download | linux-019b3fd94ba73d3ac615f0537440b81f129821f6.tar.xz |
Merge tag 'powerpc-5.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
- A big series refactoring parts of our KVM code, and converting some
to C.
- Support for ARCH_HAS_SET_MEMORY, and ARCH_HAS_STRICT_MODULE_RWX on
some CPUs.
- Support for the Microwatt soft-core.
- Optimisations to our interrupt return path on 64-bit.
- Support for userspace access to the NX GZIP accelerator on PowerVM on
Power10.
- Enable KUAP and KUEP by default on 32-bit Book3S CPUs.
- Other smaller features, fixes & cleanups.
Thanks to: Andy Shevchenko, Aneesh Kumar K.V, Arnd Bergmann, Athira
Rajeev, Baokun Li, Benjamin Herrenschmidt, Bharata B Rao, Christophe
Leroy, Daniel Axtens, Daniel Henrique Barboza, Finn Thain, Geoff Levand,
Haren Myneni, Jason Wang, Jiapeng Chong, Joel Stanley, Jordan Niethe,
Kajol Jain, Nathan Chancellor, Nathan Lynch, Naveen N. Rao, Nicholas
Piggin, Nick Desaulniers, Paul Mackerras, Russell Currey, Sathvika
Vasireddy, Shaokun Zhang, Stephen Rothwell, Sudeep Holla, Suraj Jitindar
Singh, Tom Rix, Vaibhav Jain, YueHaibing, Zhang Jianhua, and Zhen Lei.
* tag 'powerpc-5.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (218 commits)
powerpc: Only build restart_table.c for 64s
powerpc/64s: move ret_from_fork etc above __end_soft_masked
powerpc/64s/interrupt: clean up interrupt return labels
powerpc/64/interrupt: add missing kprobe annotations on interrupt exit symbols
powerpc/64: enable MSR[EE] in irq replay pt_regs
powerpc/64s/interrupt: preserve regs->softe for NMI interrupts
powerpc/64s: add a table of implicit soft-masked addresses
powerpc/64e: remove implicit soft-masking and interrupt exit restart logic
powerpc/64e: fix CONFIG_RELOCATABLE build warnings
powerpc/64s: fix hash page fault interrupt handler
powerpc/4xx: Fix setup_kuep() on SMP
powerpc/32s: Fix setup_{kuap/kuep}() on SMP
powerpc/interrupt: Use names in check_return_regs_valid()
powerpc/interrupt: Also use exit_must_hard_disable() on PPC32
powerpc/sysfs: Replace sizeof(arr)/sizeof(arr[0]) with ARRAY_SIZE
powerpc/ptrace: Refactor regs_set_return_{msr/ip}
powerpc/ptrace: Move set_return_regs_changed() before regs_set_return_{msr/ip}
powerpc/stacktrace: Fix spurious "stale" traces in raise_backtrace_ipi()
powerpc/pseries/vas: Include irqdomain.h
powerpc: mark local variables around longjmp as volatile
...
Diffstat (limited to 'arch/powerpc/kernel')
61 files changed, 2071 insertions, 1764 deletions
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index aa267d173ded..a47eefa09bcb 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -86,11 +86,7 @@ int main(void) OFFSET(PACA_CANARY, paca_struct, canary); #endif #endif - OFFSET(MMCONTEXTID, mm_struct, context.id); -#ifdef CONFIG_PPC64 - DEFINE(SIGSEGV, SIGSEGV); - DEFINE(NMI_MASK, NMI_MASK); -#else +#ifdef CONFIG_PPC32 #ifdef CONFIG_PPC_RTAS OFFSET(RTAS_SP, thread_struct, rtas_sp); #endif @@ -119,7 +115,6 @@ int main(void) #ifdef CONFIG_ALTIVEC OFFSET(THREAD_VRSTATE, thread_struct, vr_state.vr); OFFSET(THREAD_VRSAVEAREA, thread_struct, vr_save_area); - OFFSET(THREAD_VRSAVE, thread_struct, vrsave); OFFSET(THREAD_USED_VR, thread_struct, used_vr); OFFSET(VRSTATE_VSCR, thread_vr_state, vscr); OFFSET(THREAD_LOAD_VEC, thread_struct, load_vec); @@ -150,22 +145,15 @@ int main(void) #ifdef CONFIG_SPE OFFSET(THREAD_EVR0, thread_struct, evr[0]); OFFSET(THREAD_ACC, thread_struct, acc); - OFFSET(THREAD_SPEFSCR, thread_struct, spefscr); OFFSET(THREAD_USED_SPE, thread_struct, used_spe); #endif /* CONFIG_SPE */ #endif /* CONFIG_PPC64 */ -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - OFFSET(THREAD_DBCR0, thread_struct, debug.dbcr0); -#endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER OFFSET(THREAD_KVM_SVCPU, thread_struct, kvm_shadow_vcpu); #endif #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu); #endif -#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) - OFFSET(KUAP, thread_struct, kuap); -#endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM OFFSET(PACATMSCRATCH, paca_struct, tm_scratch); @@ -185,19 +173,12 @@ int main(void) sizeof(struct pt_regs) + 16); #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - OFFSET(TI_FLAGS, thread_info, flags); OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags); - OFFSET(TI_PREEMPT, thread_info, preempt_count); #ifdef CONFIG_PPC64 OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size); OFFSET(DCACHEL1LOGBLOCKSIZE, ppc64_caches, l1d.log_block_size); - OFFSET(DCACHEL1BLOCKSPERPAGE, ppc64_caches, l1d.blocks_per_page); - OFFSET(ICACHEL1BLOCKSIZE, ppc64_caches, l1i.block_size); - OFFSET(ICACHEL1LOGBLOCKSIZE, ppc64_caches, l1i.log_block_size); - OFFSET(ICACHEL1BLOCKSPERPAGE, ppc64_caches, l1i.blocks_per_page); /* paca */ - DEFINE(PACA_SIZE, sizeof(struct paca_struct)); OFFSET(PACAPACAINDEX, paca_struct, paca_index); OFFSET(PACAPROCSTART, paca_struct, cpu_start); OFFSET(PACAKSAVE, paca_struct, kstack); @@ -209,18 +190,13 @@ int main(void) OFFSET(PACATOC, paca_struct, kernel_toc); OFFSET(PACAKBASE, paca_struct, kernelbase); OFFSET(PACAKMSR, paca_struct, kernel_msr); +#ifdef CONFIG_PPC_BOOK3S_64 + OFFSET(PACAHSRR_VALID, paca_struct, hsrr_valid); + OFFSET(PACASRR_VALID, paca_struct, srr_valid); +#endif OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask); OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened); OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled); -#ifdef CONFIG_PPC_BOOK3S - OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id); -#ifdef CONFIG_PPC_MM_SLICES - OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize); - OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize); - OFFSET(PACA_SLB_ADDR_LIMIT, paca_struct, mm_ctx_slb_addr_limit); - DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); -#endif /* CONFIG_PPC_MM_SLICES */ -#endif #ifdef CONFIG_PPC_BOOK3E OFFSET(PACAPGD, paca_struct, pgd); @@ -241,21 +217,9 @@ int main(void) #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PPC_BOOK3S_64 - OFFSET(PACASLBCACHE, paca_struct, slb_cache); - OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr); - OFFSET(PACASTABRR, paca_struct, stab_rr); - OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp); -#ifdef CONFIG_PPC_MM_SLICES - OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp); -#else - OFFSET(PACACONTEXTSLLP, paca_struct, mm_ctx_sllp); -#endif /* CONFIG_PPC_MM_SLICES */ OFFSET(PACA_EXGEN, paca_struct, exgen); OFFSET(PACA_EXMC, paca_struct, exmc); OFFSET(PACA_EXNMI, paca_struct, exnmi); -#ifdef CONFIG_PPC_PSERIES - OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr); -#endif OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr); OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid); OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid); @@ -264,9 +228,7 @@ int main(void) #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use); #endif - OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx); OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count); - OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx); #endif /* CONFIG_PPC_BOOK3S_64 */ OFFSET(PACAEMERGSP, paca_struct, emergency_sp); #ifdef CONFIG_PPC_BOOK3S_64 @@ -282,6 +244,9 @@ int main(void) OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); OFFSET(PACA_DSCR_DEFAULT, paca_struct, dscr_default); +#ifdef CONFIG_PPC64 + OFFSET(PACA_EXIT_SAVE_R1, paca_struct, exit_save_r1); +#endif #ifdef CONFIG_PPC_BOOK3E OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save); #endif @@ -343,10 +308,6 @@ int main(void) STACK_PT_REGS_OFFSET(STACK_REGS_AMR, amr); STACK_PT_REGS_OFFSET(STACK_REGS_IAMR, iamr); #endif -#ifdef CONFIG_PPC_KUAP - STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap); -#endif - #if defined(CONFIG_PPC32) #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) @@ -368,10 +329,6 @@ int main(void) #endif #endif -#ifndef CONFIG_PPC64 - OFFSET(MM_PGD, mm_struct, pgd); -#endif /* ! CONFIG_PPC64 */ - /* About the CPU features table */ OFFSET(CPU_SPEC_FEATURES, cpu_spec, cpu_features); OFFSET(CPU_SPEC_SETUP, cpu_spec, cpu_setup); @@ -404,13 +361,6 @@ int main(void) DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); #endif -#ifdef CONFIG_PPC_BOOK3S_64 - DEFINE(PGD_TABLE_SIZE, (sizeof(pgd_t) << max(RADIX_PGD_INDEX_SIZE, H_PGD_INDEX_SIZE))); -#else - DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); -#endif - DEFINE(PTE_SIZE, sizeof(pte_t)); - #ifdef CONFIG_KVM OFFSET(VCPU_HOST_STACK, kvm_vcpu, arch.host_stack); OFFSET(VCPU_HOST_PID, kvm_vcpu, arch.host_pid); @@ -482,11 +432,9 @@ int main(void) OFFSET(KVM_HOST_LPID, kvm, arch.host_lpid); OFFSET(KVM_HOST_LPCR, kvm, arch.host_lpcr); OFFSET(KVM_HOST_SDR1, kvm, arch.host_sdr1); - OFFSET(KVM_NEED_FLUSH, kvm, arch.need_tlb_flush.bits); OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls); OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v); OFFSET(KVM_RADIX, kvm, arch.radix); - OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled); OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest); OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr); OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar); @@ -514,7 +462,6 @@ int main(void) OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1); OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr); OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags); - OFFSET(VCPU_DEC, kvm_vcpu, arch.dec); OFFSET(VCPU_DEC_EXPIRES, kvm_vcpu, arch.dec_expires); OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions); OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded); @@ -525,7 +472,6 @@ int main(void) OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra); OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs); OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc); - OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc); OFFSET(VCPU_SIAR, kvm_vcpu, arch.siar); OFFSET(VCPU_SDAR, kvm_vcpu, arch.sdar); OFFSET(VCPU_SIER, kvm_vcpu, arch.sier); @@ -645,10 +591,8 @@ int main(void) HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); - HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys); HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt); - HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_PTID, ptid); HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend); @@ -756,7 +700,6 @@ int main(void) #endif DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); - DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE); #ifdef CONFIG_PPC_8xx DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE)); diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 735e89337398..5693e1c67c2b 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -35,7 +35,7 @@ void __init reserve_kdump_trampoline(void) static void __init create_trampoline(unsigned long addr) { - struct ppc_inst *p = (struct ppc_inst *)addr; + u32 *p = (u32 *)addr; /* The maximum range of a single instruction branch, is the current * instruction's address + (32 MB - 4) bytes. For the trampoline we @@ -45,8 +45,8 @@ static void __init create_trampoline(unsigned long addr) * branch to "addr" we jump to ("addr" + 32 MB). Although it requires * two instructions it doesn't require any registers. */ - patch_instruction(p, ppc_inst(PPC_INST_NOP)); - patch_branch((void *)p + 4, addr + PHYSICAL_START, 0); + patch_instruction(p, ppc_inst(PPC_RAW_NOP())); + patch_branch(p + 1, addr + PHYSICAL_START, 0); } void __init setup_kdump_trampoline(void) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 9160285cb2f4..0273a1349006 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -32,6 +32,7 @@ #include <asm/barrier.h> #include <asm/kup.h> #include <asm/bug.h> +#include <asm/interrupt.h> #include "head_32.h" @@ -74,6 +75,24 @@ _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) .globl transfer_to_syscall transfer_to_syscall: + stw r11, GPR1(r1) + stw r11, 0(r1) + mflr r12 + stw r12, _LINK(r1) +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) + rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ +#endif + lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + SAVE_GPR(2, r1) + addi r12,r12,STACK_FRAME_REGS_MARKER@l + stw r9,_MSR(r1) + li r2, INTERRUPT_SYSCALL + stw r12,8(r1) + stw r2,_TRAP(r1) + SAVE_GPR(0, r1) + SAVE_4GPRS(3, r1) + SAVE_2GPRS(7, r1) + addi r2,r10,-THREAD SAVE_NVGPRS(r1) /* Calling convention has r9 = orig r0, r10 = regs */ @@ -176,28 +195,6 @@ _GLOBAL(_switch) /* r3-r12 are caller saved -- Cort */ SAVE_NVGPRS(r1) stw r0,_NIP(r1) /* Return to switch caller */ - mfmsr r11 - li r0,MSR_FP /* Disable floating-point */ -#ifdef CONFIG_ALTIVEC -BEGIN_FTR_SECTION - oris r0,r0,MSR_VEC@h /* Disable altivec */ - mfspr r12,SPRN_VRSAVE /* save vrsave register value */ - stw r12,THREAD+THREAD_VRSAVE(r2) -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_SPE -BEGIN_FTR_SECTION - oris r0,r0,MSR_SPE@h /* Disable SPE */ - mfspr r12,SPRN_SPEFSCR /* save spefscr register value */ - stw r12,THREAD+THREAD_SPEFSCR(r2) -END_FTR_SECTION_IFSET(CPU_FTR_SPE) -#endif /* CONFIG_SPE */ - and. r0,r0,r11 /* FP or altivec or SPE enabled? */ - beq+ 1f - andc r11,r11,r0 - mtmsr r11 - isync -1: stw r11,_MSR(r1) mfcr r10 stw r10,_CCR(r1) stw r1,KSP(r3) /* Set old stack pointer */ @@ -218,19 +215,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) mr r3,r2 addi r2,r4,-THREAD /* Update current */ -#ifdef CONFIG_ALTIVEC -BEGIN_FTR_SECTION - lwz r0,THREAD+THREAD_VRSAVE(r2) - mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */ -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_SPE -BEGIN_FTR_SECTION - lwz r0,THREAD+THREAD_SPEFSCR(r2) - mtspr SPRN_SPEFSCR,r0 /* restore SPEFSCR reg */ -END_FTR_SECTION_IFSET(CPU_FTR_SPE) -#endif /* CONFIG_SPE */ - lwz r0,_CCR(r1) mtcrf 0xFF,r0 /* r3-r12 are destroyed -- Cort */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 03727308d8cc..15720f8661a1 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -32,7 +32,6 @@ #include <asm/irqflags.h> #include <asm/hw_irq.h> #include <asm/context_tracking.h> -#include <asm/tm.h> #include <asm/ppc-opcode.h> #include <asm/barrier.h> #include <asm/export.h> @@ -48,372 +47,7 @@ /* * System calls. */ - .section ".toc","aw" -SYS_CALL_TABLE: - .tc sys_call_table[TC],sys_call_table - -#ifdef CONFIG_COMPAT -COMPAT_SYS_CALL_TABLE: - .tc compat_sys_call_table[TC],compat_sys_call_table -#endif - -/* This value is used to mark exception frames on the stack. */ -exception_marker: - .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER - .section ".text" - .align 7 - -#ifdef CONFIG_PPC_BOOK3S -.macro system_call_vectored name trapnr - .globl system_call_vectored_\name -system_call_vectored_\name: -_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -BEGIN_FTR_SECTION - extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ - bne .Ltabort_syscall -END_FTR_SECTION_IFSET(CPU_FTR_TM) -#endif - SCV_INTERRUPT_TO_KERNEL - mr r10,r1 - ld r1,PACAKSAVE(r13) - std r10,0(r1) - std r11,_NIP(r1) - std r12,_MSR(r1) - std r0,GPR0(r1) - std r10,GPR1(r1) - std r2,GPR2(r1) - ld r2,PACATOC(r13) - mfcr r12 - li r11,0 - /* Can we avoid saving r3-r8 in common case? */ - std r3,GPR3(r1) - std r4,GPR4(r1) - std r5,GPR5(r1) - std r6,GPR6(r1) - std r7,GPR7(r1) - std r8,GPR8(r1) - /* Zero r9-r12, this should only be required when restoring all GPRs */ - std r11,GPR9(r1) - std r11,GPR10(r1) - std r11,GPR11(r1) - std r11,GPR12(r1) - std r9,GPR13(r1) - SAVE_NVGPRS(r1) - std r11,_XER(r1) - std r11,_LINK(r1) - std r11,_CTR(r1) - - li r11,\trapnr - std r11,_TRAP(r1) - std r12,_CCR(r1) - addi r10,r1,STACK_FRAME_OVERHEAD - ld r11,exception_marker@toc(r2) - std r11,-16(r10) /* "regshere" marker */ - -BEGIN_FTR_SECTION - HMT_MEDIUM -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - - /* - * scv enters with MSR[EE]=1 and is immediately considered soft-masked. - * The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED, - * and interrupts may be masked and pending already. - * system_call_exception() will call trace_hardirqs_off() which means - * interrupts could already have been blocked before trace_hardirqs_off, - * but this is the best we can do. - */ - - /* Calling convention has r9 = orig r0, r10 = regs */ - mr r9,r0 - bl system_call_exception - -.Lsyscall_vectored_\name\()_exit: - addi r4,r1,STACK_FRAME_OVERHEAD - li r5,1 /* scv */ - bl syscall_exit_prepare - - ld r2,_CCR(r1) - ld r4,_NIP(r1) - ld r5,_MSR(r1) - -BEGIN_FTR_SECTION - stdcx. r0,0,r1 /* to clear the reservation */ -END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - -BEGIN_FTR_SECTION - HMT_MEDIUM_LOW -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - - cmpdi r3,0 - bne .Lsyscall_vectored_\name\()_restore_regs - - /* rfscv returns with LR->NIA and CTR->MSR */ - mtlr r4 - mtctr r5 - - /* Could zero these as per ABI, but we may consider a stricter ABI - * which preserves these if libc implementations can benefit, so - * restore them for now until further measurement is done. */ - ld r0,GPR0(r1) - ld r4,GPR4(r1) - ld r5,GPR5(r1) - ld r6,GPR6(r1) - ld r7,GPR7(r1) - ld r8,GPR8(r1) - /* Zero volatile regs that may contain sensitive kernel data */ - li r9,0 - li r10,0 - li r11,0 - li r12,0 - mtspr SPRN_XER,r0 - - /* - * We don't need to restore AMR on the way back to userspace for KUAP. - * The value of AMR only matters while we're in the kernel. - */ - mtcr r2 - ld r2,GPR2(r1) - ld r3,GPR3(r1) - ld r13,GPR13(r1) - ld r1,GPR1(r1) - RFSCV_TO_USER - b . /* prevent speculative execution */ - -.Lsyscall_vectored_\name\()_restore_regs: - li r3,0 - mtmsrd r3,1 - mtspr SPRN_SRR0,r4 - mtspr SPRN_SRR1,r5 - - ld r3,_CTR(r1) - ld r4,_LINK(r1) - ld r5,_XER(r1) - - REST_NVGPRS(r1) - ld r0,GPR0(r1) - mtcr r2 - mtctr r3 - mtlr r4 - mtspr SPRN_XER,r5 - REST_10GPRS(2, r1) - REST_2GPRS(12, r1) - ld r1,GPR1(r1) - RFI_TO_USER -.endm - -system_call_vectored common 0x3000 -/* - * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0 - * which is tested by system_call_exception when r0 is -1 (as set by vector - * entry code). - */ -system_call_vectored sigill 0x7ff0 - - -/* - * Entered via kernel return set up by kernel/sstep.c, must match entry regs - */ - .globl system_call_vectored_emulate -system_call_vectored_emulate: -_ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate) - li r10,IRQS_ALL_DISABLED - stb r10,PACAIRQSOFTMASK(r13) - b system_call_vectored_common -#endif - - .balign IFETCH_ALIGN_BYTES - .globl system_call_common_real -system_call_common_real: - ld r10,PACAKMSR(r13) /* get MSR value for kernel */ - mtmsrd r10 - - .balign IFETCH_ALIGN_BYTES - .globl system_call_common -system_call_common: -_ASM_NOKPROBE_SYMBOL(system_call_common) -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -BEGIN_FTR_SECTION - extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ - bne .Ltabort_syscall -END_FTR_SECTION_IFSET(CPU_FTR_TM) -#endif - mr r10,r1 - ld r1,PACAKSAVE(r13) - std r10,0(r1) - std r11,_NIP(r1) - std r12,_MSR(r1) - std r0,GPR0(r1) - std r10,GPR1(r1) - std r2,GPR2(r1) -#ifdef CONFIG_PPC_FSL_BOOK3E -START_BTB_FLUSH_SECTION - BTB_FLUSH(r10) -END_BTB_FLUSH_SECTION -#endif - ld r2,PACATOC(r13) - mfcr r12 - li r11,0 - /* Can we avoid saving r3-r8 in common case? */ - std r3,GPR3(r1) - std r4,GPR4(r1) - std r5,GPR5(r1) - std r6,GPR6(r1) - std r7,GPR7(r1) - std r8,GPR8(r1) - /* Zero r9-r12, this should only be required when restoring all GPRs */ - std r11,GPR9(r1) - std r11,GPR10(r1) - std r11,GPR11(r1) - std r11,GPR12(r1) - std r9,GPR13(r1) - SAVE_NVGPRS(r1) - std r11,_XER(r1) - std r11,_CTR(r1) - mflr r10 - - /* - * This clears CR0.SO (bit 28), which is the error indication on - * return from this system call. - */ - rldimi r12,r11,28,(63-28) - li r11,0xc00 - std r10,_LINK(r1) - std r11,_TRAP(r1) - std r12,_CCR(r1) - addi r10,r1,STACK_FRAME_OVERHEAD - ld r11,exception_marker@toc(r2) - std r11,-16(r10) /* "regshere" marker */ - - /* - * We always enter kernel from userspace with irq soft-mask enabled and - * nothing pending. system_call_exception() will call - * trace_hardirqs_off(). - */ - li r11,IRQS_ALL_DISABLED - li r12,PACA_IRQ_HARD_DIS - stb r11,PACAIRQSOFTMASK(r13) - stb r12,PACAIRQHAPPENED(r13) - - /* Calling convention has r9 = orig r0, r10 = regs */ - mr r9,r0 - bl system_call_exception - -.Lsyscall_exit: - addi r4,r1,STACK_FRAME_OVERHEAD - li r5,0 /* !scv */ - bl syscall_exit_prepare - - ld r2,_CCR(r1) - ld r4,_NIP(r1) - ld r5,_MSR(r1) - ld r6,_LINK(r1) - -BEGIN_FTR_SECTION - stdcx. r0,0,r1 /* to clear the reservation */ -END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - - mtspr SPRN_SRR0,r4 - mtspr SPRN_SRR1,r5 - mtlr r6 - - cmpdi r3,0 - bne .Lsyscall_restore_regs - /* Zero volatile regs that may contain sensitive kernel data */ - li r0,0 - li r4,0 - li r5,0 - li r6,0 - li r7,0 - li r8,0 - li r9,0 - li r10,0 - li r11,0 - li r12,0 - mtctr r0 - mtspr SPRN_XER,r0 -.Lsyscall_restore_regs_cont: - -BEGIN_FTR_SECTION - HMT_MEDIUM_LOW -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - - /* - * We don't need to restore AMR on the way back to userspace for KUAP. - * The value of AMR only matters while we're in the kernel. - */ - mtcr r2 - ld r2,GPR2(r1) - ld r3,GPR3(r1) - ld r13,GPR13(r1) - ld r1,GPR1(r1) - RFI_TO_USER - b . /* prevent speculative execution */ - -.Lsyscall_restore_regs: - ld r3,_CTR(r1) - ld r4,_XER(r1) - REST_NVGPRS(r1) - mtctr r3 - mtspr SPRN_XER,r4 - ld r0,GPR0(r1) - REST_8GPRS(4, r1) - ld r12,GPR12(r1) - b .Lsyscall_restore_regs_cont - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -.Ltabort_syscall: - /* Firstly we need to enable TM in the kernel */ - mfmsr r10 - li r9, 1 - rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG - mtmsrd r10, 0 - - /* tabort, this dooms the transaction, nothing else */ - li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT) - TABORT(R9) - - /* - * Return directly to userspace. We have corrupted user register state, - * but userspace will never see that register state. Execution will - * resume after the tbegin of the aborted transaction with the - * checkpointed register state. - */ - li r9, MSR_RI - andc r10, r10, r9 - mtmsrd r10, 1 - mtspr SPRN_SRR0, r11 - mtspr SPRN_SRR1, r12 - RFI_TO_USER - b . /* prevent speculative execution */ -#endif - -#ifdef CONFIG_PPC_BOOK3S -_GLOBAL(ret_from_fork_scv) - bl schedule_tail - REST_NVGPRS(r1) - li r3,0 /* fork() return value */ - b .Lsyscall_vectored_common_exit -#endif - -_GLOBAL(ret_from_fork) - bl schedule_tail - REST_NVGPRS(r1) - li r3,0 /* fork() return value */ - b .Lsyscall_exit - -_GLOBAL(ret_from_kernel_thread) - bl schedule_tail - REST_NVGPRS(r1) - mtctr r14 - mr r3,r15 -#ifdef PPC64_ELF_ABI_v2 - mr r12,r14 -#endif - bctrl - li r3,0 - b .Lsyscall_exit #ifdef CONFIG_PPC_BOOK3S_64 @@ -630,156 +264,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) addi r1,r1,SWITCH_FRAME_SIZE blr - /* - * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not - * touched, no exit work created, then this can be used. - */ - .balign IFETCH_ALIGN_BYTES - .globl fast_interrupt_return -fast_interrupt_return: -_ASM_NOKPROBE_SYMBOL(fast_interrupt_return) - kuap_check_amr r3, r4 - ld r5,_MSR(r1) - andi. r0,r5,MSR_PR -#ifdef CONFIG_PPC_BOOK3S - bne .Lfast_user_interrupt_return_amr - kuap_kernel_restore r3, r4 - andi. r0,r5,MSR_RI - li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ - bne+ .Lfast_kernel_interrupt_return - addi r3,r1,STACK_FRAME_OVERHEAD - bl unrecoverable_exception - b . /* should not get here */ -#else - bne .Lfast_user_interrupt_return - b .Lfast_kernel_interrupt_return -#endif - - .balign IFETCH_ALIGN_BYTES - .globl interrupt_return -interrupt_return: -_ASM_NOKPROBE_SYMBOL(interrupt_return) - ld r4,_MSR(r1) - andi. r0,r4,MSR_PR - beq .Lkernel_interrupt_return - addi r3,r1,STACK_FRAME_OVERHEAD - bl interrupt_exit_user_prepare - cmpdi r3,0 - bne- .Lrestore_nvgprs - -#ifdef CONFIG_PPC_BOOK3S -.Lfast_user_interrupt_return_amr: - kuap_user_restore r3, r4 -#endif -.Lfast_user_interrupt_return: - ld r11,_NIP(r1) - ld r12,_MSR(r1) -BEGIN_FTR_SECTION - ld r10,_PPR(r1) - mtspr SPRN_PPR,r10 -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r12 - -BEGIN_FTR_SECTION - stdcx. r0,0,r1 /* to clear the reservation */ -FTR_SECTION_ELSE - ldarx r0,0,r1 -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - - ld r3,_CCR(r1) - ld r4,_LINK(r1) - ld r5,_CTR(r1) - ld r6,_XER(r1) - li r0,0 - - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) - REST_GPR(13, r1) - - mtcr r3 - mtlr r4 - mtctr r5 - mtspr SPRN_XER,r6 - - REST_4GPRS(2, r1) - REST_GPR(6, r1) - REST_GPR(0, r1) - REST_GPR(1, r1) - RFI_TO_USER - b . /* prevent speculative execution */ - -.Lrestore_nvgprs: - REST_NVGPRS(r1) - b .Lfast_user_interrupt_return - - .balign IFETCH_ALIGN_BYTES -.Lkernel_interrupt_return: - addi r3,r1,STACK_FRAME_OVERHEAD - bl interrupt_exit_kernel_prepare - -.Lfast_kernel_interrupt_return: - cmpdi cr1,r3,0 - ld r11,_NIP(r1) - ld r12,_MSR(r1) - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r12 - -BEGIN_FTR_SECTION - stdcx. r0,0,r1 /* to clear the reservation */ -FTR_SECTION_ELSE - ldarx r0,0,r1 -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - - ld r3,_LINK(r1) - ld r4,_CTR(r1) - ld r5,_XER(r1) - ld r6,_CCR(r1) - li r0,0 - - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) - - mtlr r3 - mtctr r4 - mtspr SPRN_XER,r5 - - /* - * Leaving a stale exception_marker on the stack can confuse - * the reliable stack unwinder later on. Clear it. - */ - std r0,STACK_FRAME_OVERHEAD-16(r1) - - REST_4GPRS(2, r1) - - bne- cr1,1f /* emulate stack store */ - mtcr r6 - REST_GPR(6, r1) - REST_GPR(0, r1) - REST_GPR(1, r1) - RFI_TO_KERNEL - b . /* prevent speculative execution */ - -1: /* - * Emulate stack store with update. New r1 value was already calculated - * and updated in our interrupt regs by emulate_loadstore, but we can't - * store the previous value of r1 to the stack before re-loading our - * registers from it, otherwise they could be clobbered. Use - * PACA_EXGEN as temporary storage to hold the store data, as - * interrupts are disabled here so it won't be clobbered. - */ - mtcr r6 - std r9,PACA_EXGEN+0(r13) - addi r9,r1,INT_FRAME_SIZE /* get original r1 */ - REST_GPR(6, r1) - REST_GPR(0, r1) - REST_GPR(1, r1) - std r9,0(r1) /* perform store component of stdu */ - ld r9,PACA_EXGEN+0(r13) - - RFI_TO_KERNEL - b . /* prevent speculative execution */ - #ifdef CONFIG_PPC_RTAS /* * On CHRP, the Run-Time Abstraction Services (RTAS) have to be diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index 2ed14d4a47f5..93b0f3ec8fb0 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -38,9 +38,9 @@ static int __init early_init_dt_scan_epapr(unsigned long node, for (i = 0; i < (len / 4); i++) { struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i])); - patch_instruction((struct ppc_inst *)(epapr_hypercall_start + i), inst); + patch_instruction(epapr_hypercall_start + i, inst); #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) - patch_instruction((struct ppc_inst *)(epapr_ev_idle_start + i), inst); + patch_instruction(epapr_ev_idle_start + i, inst); #endif } diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index f1ae710274bc..1401787b0b93 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -26,6 +26,10 @@ #include <asm/feature-fixups.h> #include <asm/context_tracking.h> +/* 64e interrupt returns always use SRR registers */ +#define fast_interrupt_return fast_interrupt_return_srr +#define interrupt_return interrupt_return_srr + /* XXX This will ultimately add space for a special exception save * structure used to save things like SRR0/SRR1, SPRGs, MAS, etc... * when taking special interrupts. For now we don't support that, @@ -897,6 +901,34 @@ kernel_dbg_exc: bl unknown_exception b interrupt_return +.macro SEARCH_RESTART_TABLE +#ifdef CONFIG_RELOCATABLE + ld r11,PACATOC(r13) + ld r14,__start___restart_table@got(r11) + ld r15,__stop___restart_table@got(r11) +#else + LOAD_REG_IMMEDIATE_SYM(r14, r11, __start___restart_table) + LOAD_REG_IMMEDIATE_SYM(r15, r11, __stop___restart_table) +#endif +300: + cmpd r14,r15 + beq 302f + ld r11,0(r14) + cmpld r10,r11 + blt 301f + ld r11,8(r14) + cmpld r10,r11 + bge 301f + ld r11,16(r14) + b 303f +301: + addi r14,r14,24 + b 300b +302: + li r11,0 +303: +.endm + /* * An interrupt came in while soft-disabled; We mark paca->irq_happened * accordingly and if the interrupt is level sensitive, we hard disable @@ -905,6 +937,9 @@ kernel_dbg_exc: */ .macro masked_interrupt_book3e paca_irq full_mask + std r14,PACA_EXGEN+EX_R14(r13) + std r15,PACA_EXGEN+EX_R15(r13) + lbz r10,PACAIRQHAPPENED(r13) .if \full_mask == 1 ori r10,r10,\paca_irq | PACA_IRQ_HARD_DIS @@ -914,15 +949,23 @@ kernel_dbg_exc: stb r10,PACAIRQHAPPENED(r13) .if \full_mask == 1 - rldicl r10,r11,48,1 /* clear MSR_EE */ - rotldi r11,r10,16 + xori r11,r11,MSR_EE /* clear MSR_EE */ mtspr SPRN_SRR1,r11 .endif + mfspr r10,SPRN_SRR0 + SEARCH_RESTART_TABLE + cmpdi r11,0 + beq 1f + mtspr SPRN_SRR0,r11 /* return to restart address */ +1: + lwz r11,PACA_EXGEN+EX_CR(r13) mtcr r11 ld r10,PACA_EXGEN+EX_R10(r13) ld r11,PACA_EXGEN+EX_R11(r13) + ld r14,PACA_EXGEN+EX_R14(r13) + ld r15,PACA_EXGEN+EX_R15(r13) mfspr r13,SPRN_SPRG_GEN_SCRATCH rfi b . @@ -1282,7 +1325,12 @@ a2_tlbinit_code_start: a2_tlbinit_after_linear_map: /* Now we branch the new virtual address mapped by this entry */ +#ifdef CONFIG_RELOCATABLE + ld r5,PACATOC(r13) + ld r3,1f@got(r5) +#else LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f) +#endif mtctr r3 bctr diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f7fc6e078d4e..4aec59a77d4c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -428,18 +428,31 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) /* If coming from user, skip soft-mask tests. */ andi. r10,r12,MSR_PR - bne 2f - - /* Kernel code running below __end_interrupts is implicitly - * soft-masked */ - LOAD_HANDLER(r10, __end_interrupts) + bne 3f + + /* + * Kernel code running below __end_soft_masked may be + * implicitly soft-masked if it is within the regions + * in the soft mask table. + */ + LOAD_HANDLER(r10, __end_soft_masked) cmpld r11,r10 + bge+ 1f + + /* SEARCH_SOFT_MASK_TABLE clobbers r9,r10,r12 */ + mtctr r12 + stw r9,PACA_EXGEN+EX_CCR(r13) + SEARCH_SOFT_MASK_TABLE + cmpdi r12,0 + mfctr r12 /* Restore r12 to SRR1 */ + lwz r9,PACA_EXGEN+EX_CCR(r13) + beq 1f /* Not in soft-mask table */ li r10,IMASK - blt- 1f + b 2f /* In soft-mask table, always mask */ /* Test the soft mask state against our interrupt's bit */ - lbz r10,PACAIRQSOFTMASK(r13) -1: andi. r10,r10,IMASK +1: lbz r10,PACAIRQSOFTMASK(r13) +2: andi. r10,r10,IMASK /* Associate vector numbers with bits in paca->irq_happened */ .if IVEC == 0x500 || IVEC == 0xea0 li r10,PACA_IRQ_EE @@ -470,7 +483,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) .if ISTACK andi. r10,r12,MSR_PR /* See if coming from user */ -2: mr r10,r1 /* Save r1 */ +3: mr r10,r1 /* Save r1 */ subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */ beq- 100f ld r1,PACAKSAVE(r13) /* kernel stack to use */ @@ -485,6 +498,20 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) std r0,GPR0(r1) /* save r0 in stackframe */ std r10,GPR1(r1) /* save r1 in stackframe */ + /* Mark our [H]SRRs valid for return */ + li r10,1 + .if IHSRR_IF_HVMODE + BEGIN_FTR_SECTION + stb r10,PACAHSRR_VALID(r13) + FTR_SECTION_ELSE + stb r10,PACASRR_VALID(r13) + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) + .elseif IHSRR + stb r10,PACAHSRR_VALID(r13) + .else + stb r10,PACASRR_VALID(r13) + .endif + .if ISET_RI li r10,MSR_RI mtmsrd r10,1 /* Set MSR_RI */ @@ -577,6 +604,66 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) __GEN_COMMON_BODY \name .endm +.macro SEARCH_RESTART_TABLE +#ifdef CONFIG_RELOCATABLE + mr r12,r2 + ld r2,PACATOC(r13) + LOAD_REG_ADDR(r9, __start___restart_table) + LOAD_REG_ADDR(r10, __stop___restart_table) + mr r2,r12 +#else + LOAD_REG_IMMEDIATE_SYM(r9, r12, __start___restart_table) + LOAD_REG_IMMEDIATE_SYM(r10, r12, __stop___restart_table) +#endif +300: + cmpd r9,r10 + beq 302f + ld r12,0(r9) + cmpld r11,r12 + blt 301f + ld r12,8(r9) + cmpld r11,r12 + bge 301f + ld r12,16(r9) + b 303f +301: + addi r9,r9,24 + b 300b +302: + li r12,0 +303: +.endm + +.macro SEARCH_SOFT_MASK_TABLE +#ifdef CONFIG_RELOCATABLE + mr r12,r2 + ld r2,PACATOC(r13) + LOAD_REG_ADDR(r9, __start___soft_mask_table) + LOAD_REG_ADDR(r10, __stop___soft_mask_table) + mr r2,r12 +#else + LOAD_REG_IMMEDIATE_SYM(r9, r12, __start___soft_mask_table) + LOAD_REG_IMMEDIATE_SYM(r10, r12, __stop___soft_mask_table) +#endif +300: + cmpd r9,r10 + beq 302f + ld r12,0(r9) + cmpld r11,r12 + blt 301f + ld r12,8(r9) + cmpld r11,r12 + bge 301f + li r12,1 + b 303f +301: + addi r9,r9,16 + b 300b +302: + li r12,0 +303: +.endm + /* * Restore all registers including H/SRR0/1 saved in a stack frame of a * standard exception. @@ -584,10 +671,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .macro EXCEPTION_RESTORE_REGS hsrr=0 /* Move original SRR0 and SRR1 into the respective regs */ ld r9,_MSR(r1) + li r10,0 .if \hsrr mtspr SPRN_HSRR1,r9 + stb r10,PACAHSRR_VALID(r13) .else mtspr SPRN_SRR1,r9 + stb r10,PACASRR_VALID(r13) .endif ld r9,_NIP(r1) .if \hsrr @@ -704,17 +794,17 @@ __start_interrupts: * scv instructions enter the kernel without changing EE, RI, ME, or HV. * In particular, this means we can take a maskable interrupt at any point * in the scv handler, which is unlike any other interrupt. This is solved - * by treating the instruction addresses below __end_interrupts as being - * soft-masked. + * by treating the instruction addresses in the handler as being soft-masked, + * by adding a SOFT_MASK_TABLE entry for them. * * AIL-0 mode scv exceptions go to 0x17000-0x17fff, but we set AIL-3 and * ensure scv is never executed with relocation off, which means AIL-0 * should never happen. * - * Before leaving the below __end_interrupts text, at least of the following - * must be true: + * Before leaving the following inside-__end_soft_masked text, at least of the + * following must be true: * - MSR[PR]=1 (i.e., return to userspace) - * - MSR_EE|MSR_RI is set (no reentrant exceptions) + * - MSR_EE|MSR_RI is clear (no reentrant exceptions) * - Standard kernel environment is set up (stack, paca, etc) * * Call convention: @@ -722,6 +812,7 @@ __start_interrupts: * syscall register convention is in Documentation/powerpc/syscall64-abi.rst */ EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000) +1: /* SCV 0 */ mr r9,r13 GET_PACA(r13) @@ -751,8 +842,11 @@ EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000) b system_call_vectored_sigill #endif .endr +2: EXC_VIRT_END(system_call_vectored, 0x3000, 0x1000) +SOFT_MASK_TABLE(1b, 2b) // Treat scv vectors as soft-masked, see comment above. + #ifdef CONFIG_RELOCATABLE TRAMP_VIRT_BEGIN(system_call_vectored_tramp) __LOAD_HANDLER(r10, system_call_vectored_common) @@ -1149,7 +1243,7 @@ EXC_COMMON_BEGIN(machine_check_common) mtmsrd r10,1 addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_exception - b interrupt_return + b interrupt_return_srr #ifdef CONFIG_PPC_P7_NAP @@ -1275,7 +1369,7 @@ BEGIN_MMU_FTR_SECTION MMU_FTR_SECTION_ELSE bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) - b interrupt_return + b interrupt_return_srr 1: bl do_break /* @@ -1283,7 +1377,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) * If so, we need to restore them with their updated values. */ REST_NVGPRS(r1) - b interrupt_return + b interrupt_return_srr /** @@ -1323,7 +1417,7 @@ BEGIN_MMU_FTR_SECTION bl do_slb_fault cmpdi r3,0 bne- 1f - b fast_interrupt_return + b fast_interrupt_return_srr 1: /* Error case */ MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */ @@ -1332,7 +1426,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault - b interrupt_return + b interrupt_return_srr /** @@ -1368,7 +1462,7 @@ BEGIN_MMU_FTR_SECTION MMU_FTR_SECTION_ELSE bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) - b interrupt_return + b interrupt_return_srr /** @@ -1403,7 +1497,7 @@ BEGIN_MMU_FTR_SECTION bl do_slb_fault cmpdi r3,0 bne- 1f - b fast_interrupt_return + b fast_interrupt_return_srr 1: /* Error case */ MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */ @@ -1412,7 +1506,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault - b interrupt_return + b interrupt_return_srr /** @@ -1456,7 +1550,11 @@ EXC_COMMON_BEGIN(hardware_interrupt_common) GEN_COMMON hardware_interrupt addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ - b interrupt_return + BEGIN_FTR_SECTION + b interrupt_return_hsrr + FTR_SECTION_ELSE + b interrupt_return_srr + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) /** @@ -1483,7 +1581,7 @@ EXC_COMMON_BEGIN(alignment_common) addi r3,r1,STACK_FRAME_OVERHEAD bl alignment_exception REST_NVGPRS(r1) /* instruction emulation may change GPRs */ - b interrupt_return + b interrupt_return_srr /** @@ -1590,7 +1688,7 @@ EXC_COMMON_BEGIN(program_check_common) addi r3,r1,STACK_FRAME_OVERHEAD bl program_check_exception REST_NVGPRS(r1) /* instruction emulation may change GPRs */ - b interrupt_return + b interrupt_return_srr /* @@ -1633,12 +1731,12 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif bl load_up_fpu - b fast_interrupt_return + b fast_interrupt_return_srr #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ addi r3,r1,STACK_FRAME_OVERHEAD bl fp_unavailable_tm - b interrupt_return + b interrupt_return_srr #endif @@ -1677,7 +1775,7 @@ EXC_COMMON_BEGIN(decrementer_common) GEN_COMMON decrementer addi r3,r1,STACK_FRAME_OVERHEAD bl timer_interrupt - b interrupt_return + b interrupt_return_srr /** @@ -1714,6 +1812,8 @@ EXC_COMMON_BEGIN(hdecrementer_common) * * Be careful to avoid touching the kernel stack. */ + li r10,0 + stb r10,PACAHSRR_VALID(r13) ld r10,PACA_EXGEN+EX_CTR(r13) mtctr r10 mtcrf 0x80,r9 @@ -1761,7 +1861,7 @@ EXC_COMMON_BEGIN(doorbell_super_common) #else bl unknown_async_exception #endif - b interrupt_return + b interrupt_return_srr EXC_REAL_NONE(0xb00, 0x100) @@ -1838,8 +1938,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) mtctr r10 bctr .else - li r10,MSR_RI - mtmsrd r10,1 /* Set RI (EE=0) */ #ifdef CONFIG_RELOCATABLE __LOAD_HANDLER(r10, system_call_common) mtctr r10 @@ -1925,7 +2023,7 @@ EXC_COMMON_BEGIN(single_step_common) GEN_COMMON single_step addi r3,r1,STACK_FRAME_OVERHEAD bl single_step_exception - b interrupt_return + b interrupt_return_srr /** @@ -1963,7 +2061,7 @@ BEGIN_MMU_FTR_SECTION MMU_FTR_SECTION_ELSE bl unknown_exception ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) - b interrupt_return + b interrupt_return_hsrr /** @@ -1988,7 +2086,7 @@ EXC_COMMON_BEGIN(h_instr_storage_common) GEN_COMMON h_instr_storage addi r3,r1,STACK_FRAME_OVERHEAD bl unknown_exception - b interrupt_return + b interrupt_return_hsrr /** @@ -2012,7 +2110,7 @@ EXC_COMMON_BEGIN(emulation_assist_common) addi r3,r1,STACK_FRAME_OVERHEAD bl emulation_assist_interrupt REST_NVGPRS(r1) /* instruction emulation may change GPRs */ - b interrupt_return + b interrupt_return_hsrr /** @@ -2089,7 +2187,7 @@ EXC_COMMON_BEGIN(hmi_exception_common) GEN_COMMON hmi_exception addi r3,r1,STACK_FRAME_OVERHEAD bl handle_hmi_exception - b interrupt_return + b interrupt_return_hsrr /** @@ -2119,7 +2217,7 @@ EXC_COMMON_BEGIN(h_doorbell_common) #else bl unknown_async_exception #endif - b interrupt_return + b interrupt_return_hsrr /** @@ -2145,7 +2243,7 @@ EXC_COMMON_BEGIN(h_virt_irq_common) GEN_COMMON h_virt_irq addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ - b interrupt_return + b interrupt_return_hsrr EXC_REAL_NONE(0xec0, 0x20) @@ -2188,7 +2286,7 @@ EXC_COMMON_BEGIN(performance_monitor_common) GEN_COMMON performance_monitor addi r3,r1,STACK_FRAME_OVERHEAD bl performance_monitor_exception - b interrupt_return + b interrupt_return_srr /** @@ -2225,19 +2323,19 @@ BEGIN_FTR_SECTION END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69) #endif bl load_up_altivec - b fast_interrupt_return + b fast_interrupt_return_srr #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_tm - b interrupt_return + b interrupt_return_srr #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_exception - b interrupt_return + b interrupt_return_srr /** @@ -2278,14 +2376,14 @@ BEGIN_FTR_SECTION 2: /* User process was in a transaction */ addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_tm - b interrupt_return + b interrupt_return_srr #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_exception - b interrupt_return + b interrupt_return_srr /** @@ -2313,7 +2411,7 @@ EXC_COMMON_BEGIN(facility_unavailable_common) addi r3,r1,STACK_FRAME_OVERHEAD bl facility_unavailable_exception REST_NVGPRS(r1) /* instruction emulation may change GPRs */ - b interrupt_return + b interrupt_return_srr /** @@ -2341,7 +2439,7 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common) addi r3,r1,STACK_FRAME_OVERHEAD bl facility_unavailable_exception REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */ - b interrupt_return + b interrupt_return_hsrr EXC_REAL_NONE(0xfa0, 0x20) @@ -2370,7 +2468,7 @@ EXC_COMMON_BEGIN(cbe_system_error_common) GEN_COMMON cbe_system_error addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_system_error_exception - b interrupt_return + b interrupt_return_hsrr #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1200, 0x100) @@ -2401,7 +2499,7 @@ EXC_COMMON_BEGIN(instruction_breakpoint_common) GEN_COMMON instruction_breakpoint addi r3,r1,STACK_FRAME_OVERHEAD bl instruction_breakpoint_exception - b interrupt_return + b interrupt_return_srr EXC_REAL_NONE(0x1400, 0x100) @@ -2509,6 +2607,8 @@ BEGIN_FTR_SECTION ld r10,PACA_EXGEN+EX_CFAR(r13) mtspr SPRN_CFAR,r10 END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + li r10,0 + stb r10,PACAHSRR_VALID(r13) ld r10,PACA_EXGEN+EX_R10(r13) ld r11,PACA_EXGEN+EX_R11(r13) ld r12,PACA_EXGEN+EX_R12(r13) @@ -2521,7 +2621,7 @@ EXC_COMMON_BEGIN(denorm_exception_common) GEN_COMMON denorm_exception addi r3,r1,STACK_FRAME_OVERHEAD bl unknown_exception - b interrupt_return + b interrupt_return_hsrr #ifdef CONFIG_CBE_RAS @@ -2538,7 +2638,7 @@ EXC_COMMON_BEGIN(cbe_maintenance_common) GEN_COMMON cbe_maintenance addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_maintenance_exception - b interrupt_return + b interrupt_return_hsrr #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1600, 0x100) @@ -2568,7 +2668,7 @@ EXC_COMMON_BEGIN(altivec_assist_common) #else bl unknown_exception #endif - b interrupt_return + b interrupt_return_srr #ifdef CONFIG_CBE_RAS @@ -2585,7 +2685,7 @@ EXC_COMMON_BEGIN(cbe_thermal_common) GEN_COMMON cbe_thermal addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_thermal_exception - b interrupt_return + b interrupt_return_hsrr #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1800, 0x100) @@ -2610,7 +2710,6 @@ INT_DEFINE_END(soft_nmi) * and run it entirely with interrupts hard disabled. */ EXC_COMMON_BEGIN(soft_nmi_common) - mfspr r11,SPRN_SRR0 mr r10,r1 ld r1,PACAEMERGSP(r13) subi r1,r1,INT_FRAME_SIZE @@ -2624,6 +2723,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) mtmsrd r9,1 kuap_kernel_restore r9, r10 + EXCEPTION_RESTORE_REGS hsrr=0 RFI_TO_KERNEL @@ -2645,19 +2745,24 @@ masked_Hinterrupt: .else masked_interrupt: .endif - lbz r11,PACAIRQHAPPENED(r13) - or r11,r11,r10 - stb r11,PACAIRQHAPPENED(r13) + stw r9,PACA_EXGEN+EX_CCR(r13) + lbz r9,PACAIRQHAPPENED(r13) + or r9,r9,r10 + stb r9,PACAIRQHAPPENED(r13) + + .if ! \hsrr cmpwi r10,PACA_IRQ_DEC bne 1f - lis r10,0x7fff - ori r10,r10,0xffff - mtspr SPRN_DEC,r10 + LOAD_REG_IMMEDIATE(r9, 0x7fffffff) + mtspr SPRN_DEC,r9 #ifdef CONFIG_PPC_WATCHDOG + lwz r9,PACA_EXGEN+EX_CCR(r13) b soft_nmi_common #else b 2f #endif + .endif + 1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK beq 2f xori r12,r12,MSR_EE /* clear MSR_EE */ @@ -2666,11 +2771,29 @@ masked_interrupt: .else mtspr SPRN_SRR1,r12 .endif - ori r11,r11,PACA_IRQ_HARD_DIS - stb r11,PACAIRQHAPPENED(r13) + ori r9,r9,PACA_IRQ_HARD_DIS + stb r9,PACAIRQHAPPENED(r13) 2: /* done */ - ld r10,PACA_EXGEN+EX_CTR(r13) - mtctr r10 + li r9,0 + .if \hsrr + stb r9,PACAHSRR_VALID(r13) + .else + stb r9,PACASRR_VALID(r13) + .endif + + SEARCH_RESTART_TABLE + cmpdi r12,0 + beq 3f + .if \hsrr + mtspr SPRN_HSRR0,r12 + .else + mtspr SPRN_SRR0,r12 + .endif +3: + + ld r9,PACA_EXGEN+EX_CTR(r13) + mtctr r9 + lwz r9,PACA_EXGEN+EX_CCR(r13) mtcrf 0x80,r9 std r1,PACAR1(r13) ld r9,PACA_EXGEN+EX_R9(r13) @@ -2881,7 +3004,7 @@ MASKED_INTERRUPT hsrr=1 USE_FIXED_SECTION(virt_trampolines) /* - * All code below __end_interrupts is treated as soft-masked. If + * All code below __end_soft_masked is treated as soft-masked. If * any code runs here with MSR[EE]=1, it must then cope with pending * soft interrupt being raised (i.e., by ensuring it is replayed). * diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index c9e2819b095a..c7022c41cc31 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -23,18 +23,20 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features); #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) DEFINE_STATIC_KEY_FALSE(kvm_guest); -bool check_kvm_guest(void) +int __init check_kvm_guest(void) { struct device_node *hyper_node; hyper_node = of_find_node_by_path("/hypervisor"); if (!hyper_node) - return false; + return 0; if (!of_device_is_compatible(hyper_node, "linux,kvm")) - return false; + return 0; static_branch_enable(&kvm_guest); - return true; + + return 0; } +core_initcall(check_kvm_guest); // before kvm_guest_init() #endif diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 2c57ece6671c..6010adcee16e 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -103,6 +103,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) ori r12,r12,MSR_FP or r12,r12,r4 std r12,_MSR(r1) +#ifdef CONFIG_PPC_BOOK3S_64 + li r4,0 + stb r4,PACASRR_VALID(r13) +#endif #endif li r4,1 stb r4,THREAD_LOAD_FP(r5) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index a8221ddcbd66..6b1ec9e3541b 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -142,42 +142,23 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) .macro SYSCALL_ENTRY trapno mfspr r9, SPRN_SRR1 - mfspr r10, SPRN_SRR0 + mfspr r12, SPRN_SRR0 LOAD_REG_IMMEDIATE(r11, MSR_KERNEL) /* can take exceptions */ - lis r12, 1f@h - ori r12, r12, 1f@l + lis r10, 1f@h + ori r10, r10, 1f@l mtspr SPRN_SRR1, r11 - mtspr SPRN_SRR0, r12 - mfspr r12,SPRN_SPRG_THREAD + mtspr SPRN_SRR0, r10 + mfspr r10,SPRN_SPRG_THREAD mr r11, r1 - lwz r1,TASK_STACK-THREAD(r12) - tovirt(r12, r12) + lwz r1,TASK_STACK-THREAD(r10) + tovirt(r10, r10) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE rfi 1: - stw r11,GPR1(r1) - stw r11,0(r1) - mr r11, r1 - stw r10,_NIP(r11) - mflr r10 - stw r10, _LINK(r11) - mfcr r10 - rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ - stw r10,_CCR(r11) /* save registers */ -#ifdef CONFIG_40x - rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ -#endif - lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ - stw r2,GPR2(r11) - addi r10,r10,STACK_FRAME_REGS_MARKER@l - stw r9,_MSR(r11) - li r2, \trapno - stw r10,8(r11) - stw r2,_TRAP(r11) - SAVE_GPR(0, r11) - SAVE_4GPRS(3, r11) - SAVE_2GPRS(7, r11) - addi r2,r12,-THREAD + stw r12,_NIP(r1) + mfcr r12 + rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */ + stw r12,_CCR(r1) b transfer_to_syscall /* jump to handler */ .endm diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index e1360b88b6cb..7d72ee5ab387 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -701,39 +701,3 @@ _GLOBAL(abort) mfspr r13,SPRN_DBCR0 oris r13,r13,DBCR0_RST_SYSTEM@h mtspr SPRN_DBCR0,r13 - -_GLOBAL(set_context) - -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is the second parameter. - */ - lis r5, abatron_pteptrs@ha - stw r4, abatron_pteptrs@l + 0x4(r5) -#endif - sync - mtspr SPRN_PID,r3 - isync /* Need an isync to flush shadow */ - /* TLBs after changing PID */ - blr - -/* We put a few things here that have to be page-aligned. This stuff - * goes at the beginning of the data segment, which is page-aligned. - */ - .data - .align 12 - .globl sdata -sdata: - .globl empty_zero_page -empty_zero_page: - .space 4096 -EXPORT_SYMBOL(empty_zero_page) - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - -/* Room for two PTE pointers, usually the kernel and current user pointers - * to their respective root page table. - */ -abatron_pteptrs: - .space 8 diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 5c106ac36626..ddc978a2d381 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -532,6 +532,10 @@ finish_tlb_load_44x: andi. r10,r12,_PAGE_USER /* User page ? */ beq 1f /* nope, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ +#ifdef CONFIG_PPC_KUEP +0: rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */ + patch_site 0b, patch__tlb_44x_kuep +#endif 1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */ /* Done...restore registers and get out of here. @@ -743,6 +747,10 @@ finish_tlb_load_47x: andi. r10,r12,_PAGE_USER /* User page ? */ beq 1f /* nope, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ +#ifdef CONFIG_PPC_KUEP +0: rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */ + patch_site 0b, patch__tlb_47x_kuep +#endif 1: tlbwe r11,r13,2 /* Done...restore registers and get out of here. @@ -780,20 +788,6 @@ _GLOBAL(__fixup_440A_mcheck) sync blr -_GLOBAL(set_context) - -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is the second parameter. - */ - lis r5, abatron_pteptrs@h - ori r5, r5, abatron_pteptrs@l - stw r4, 0x4(r5) -#endif - mtspr SPRN_PID,r3 - isync /* Force context change */ - blr - /* * Init CPU state. This is called at boot time or for secondary CPUs * to setup initial TLB entries, setup IVORs, etc... @@ -1239,34 +1233,8 @@ head_start_common: isync blr -/* - * We put a few things here that have to be page-aligned. This stuff - * goes at the beginning of the data segment, which is page-aligned. - */ - .data - .align PAGE_SHIFT - .globl sdata -sdata: - .globl empty_zero_page -empty_zero_page: - .space PAGE_SIZE -EXPORT_SYMBOL(empty_zero_page) - -/* - * To support >32-bit physical addresses, we use an 8KB pgdir. - */ - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - -/* - * Room for two PTE pointers, usually the kernel and current user pointers - * to their respective root page table. - */ -abatron_pteptrs: - .space 8 - #ifdef CONFIG_SMP + .data .align 12 temp_boot_stack: .space 1024 diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index ece7f97bafff..79930b0bc781 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -194,8 +194,9 @@ CLOSE_FIXED_SECTION(first_256B) /* This value is used to mark exception frames on the stack. */ .section ".toc","aw" +/* This value is used to mark exception frames on the stack. */ exception_marker: - .tc ID_72656773_68657265[TC],0x7265677368657265 + .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER .previous /* @@ -211,6 +212,8 @@ OPEN_TEXT_SECTION(0x100) USE_TEXT_SECTION() +#include "interrupt_64.S" + #ifdef CONFIG_PPC_BOOK3E /* * The booting_thread_hwid holds the thread id we want to boot in cpu @@ -997,23 +1000,3 @@ start_here_common: 0: trap EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0 .previous - -/* - * We put a few things here that have to be page-aligned. - * This stuff goes at the beginning of the bss, which is page-aligned. - */ - .section ".bss" -/* - * pgd dir should be aligned to PGD_TABLE_SIZE which is 64K. - * We will need to find a better way to fix this - */ - .align 16 - - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - - .globl empty_zero_page -empty_zero_page: - .space PAGE_SIZE -EXPORT_SYMBOL(empty_zero_page) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 7d445e4342c0..9bdb95f5694f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -786,28 +786,3 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_SRR1, r10 mtspr SPRN_SRR0, r11 rfi - -/* - * We put a few things here that have to be page-aligned. - * This stuff goes at the beginning of the data segment, - * which is page-aligned. - */ - .data - .globl sdata -sdata: - .globl empty_zero_page - .align PAGE_SHIFT -empty_zero_page: - .space PAGE_SIZE -EXPORT_SYMBOL(empty_zero_page) - - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - -/* Room for two PTE table pointers, usually the kernel and current user - * pointer to their respective root page table (pgdir). - */ - .globl abatron_pteptrs -abatron_pteptrs: - .space 8 diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 065178f19a3d..764edd860ed4 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -518,8 +518,6 @@ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtspr SPRN_RPA,r1 - mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ - mtcrf 0x80,r2 BEGIN_MMU_FTR_SECTION li r0,1 mfspr r1,SPRN_SPRG_603_LRU @@ -531,9 +529,15 @@ BEGIN_MMU_FTR_SECTION mfspr r2,SPRN_SRR1 rlwimi r2,r0,31-14,14,14 mtspr SPRN_SRR1,r2 -END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) + mtcrf 0x80,r2 + tlbld r3 + rfi +MMU_FTR_SECTION_ELSE + mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r2 tlbld r3 rfi +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) DataAddressInvalid: mfspr r3,SPRN_SRR1 rlwinm r1,r3,9,6,6 /* Get load/store bit */ @@ -607,9 +611,15 @@ BEGIN_MMU_FTR_SECTION mfspr r2,SPRN_SRR1 rlwimi r2,r0,31-14,14,14 mtspr SPRN_SRR1,r2 -END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) + mtcrf 0x80,r2 + tlbld r3 + rfi +MMU_FTR_SECTION_ELSE + mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r2 tlbld r3 rfi +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) #ifndef CONFIG_ALTIVEC #define altivec_assist_exception unknown_exception @@ -756,9 +766,6 @@ PerformanceMonitor: * the kernel image to physical address PHYSICAL_START. */ relocate_kernel: - addis r9,r26,klimit@ha /* fetch klimit */ - lwz r25,klimit@l(r9) - addis r25,r25,-KERNELBASE@h lis r3,PHYSICAL_START@h /* Destination base address */ li r6,0 /* Destination offset */ li r5,0x4000 /* # bytes of memory to copy */ @@ -766,7 +773,8 @@ relocate_kernel: addi r0,r3,4f@l /* jump to the address of 4f */ mtctr r0 /* in copy and do the rest. */ bctr /* jump to the copy */ -4: mr r5,r25 +4: lis r5,_end-KERNELBASE@h + ori r5,r5,_end-KERNELBASE@l bl copy_and_flush /* copy the rest */ b turn_on_mmu @@ -924,12 +932,6 @@ _GLOBAL(load_segment_registers) li r0, NUM_USER_SEGMENTS /* load up user segment register values */ mtctr r0 /* for context 0 */ li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */ -#ifdef CONFIG_PPC_KUEP - oris r3, r3, SR_NX@h /* Set Nx */ -#endif -#ifdef CONFIG_PPC_KUAP - oris r3, r3, SR_KS@h /* Set Ks */ -#endif li r4, 0 3: mtsrin r3, r4 addi r3, r3, 0x111 /* increment VSID */ @@ -1024,58 +1026,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) rfi /* - * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next); - * - * Set up the segment registers for a new context. - */ -_ENTRY(switch_mmu_context) - lwz r3,MMCONTEXTID(r4) - cmpwi cr0,r3,0 - blt- 4f - mulli r3,r3,897 /* multiply context by skew factor */ - rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */ -#ifdef CONFIG_PPC_KUEP - oris r3, r3, SR_NX@h /* Set Nx */ -#endif -#ifdef CONFIG_PPC_KUAP - oris r3, r3, SR_KS@h /* Set Ks */ -#endif - li r0,NUM_USER_SEGMENTS - mtctr r0 - -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is passed as second argument. - */ - lwz r4, MM_PGD(r4) - lis r5, abatron_pteptrs@ha - stw r4, abatron_pteptrs@l + 0x4(r5) -#endif -BEGIN_MMU_FTR_SECTION -#ifndef CONFIG_BDI_SWITCH - lwz r4, MM_PGD(r4) -#endif - tophys(r4, r4) - rlwinm r4, r4, 4, 0xffff01ff - mtspr SPRN_SDR1, r4 -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) - li r4,0 - isync -3: - mtsrin r3,r4 - addi r3,r3,0x111 /* next VSID */ - rlwinm r3,r3,0,8,3 /* clear out any overflow from VSID field */ - addis r4,r4,0x1000 /* address of next segment */ - bdnz 3b - sync - isync - blr -4: trap - EMIT_BUG_ENTRY 4b,__FILE__,__LINE__,0 - blr -EXPORT_SYMBOL(switch_mmu_context) - -/* * An undocumented "feature" of 604e requires that the v bit * be cleared before changing BAT values. * @@ -1256,61 +1206,4 @@ setup_usbgecko_bat: blr #endif -#ifdef CONFIG_8260 -/* Jump into the system reset for the rom. - * We first disable the MMU, and then jump to the ROM reset address. - * - * r3 is the board info structure, r4 is the location for starting. - * I use this for building a small kernel that can load other kernels, - * rather than trying to write or rely on a rom monitor that can tftp load. - */ - .globl m8260_gorom -m8260_gorom: - mfmsr r0 - rlwinm r0,r0,0,17,15 /* clear MSR_EE in r0 */ - sync - mtmsr r0 - sync - mfspr r11, SPRN_HID0 - lis r10, 0 - ori r10,r10,HID0_ICE|HID0_DCE - andc r11, r11, r10 - mtspr SPRN_HID0, r11 - isync - li r5, MSR_ME|MSR_RI - lis r6,2f@h - addis r6,r6,-KERNELBASE@h - ori r6,r6,2f@l - mtspr SPRN_SRR0,r6 - mtspr SPRN_SRR1,r5 - isync - sync - rfi -2: - mtlr r4 - blr -#endif - - -/* - * We put a few things here that have to be page-aligned. - * This stuff goes at the beginning of the data segment, - * which is page-aligned. - */ .data - .globl sdata -sdata: - .globl empty_zero_page -empty_zero_page: - .space 4096 -EXPORT_SYMBOL(empty_zero_page) - - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - -/* Room for two PTE pointers, usually the kernel and current user pointers - * to their respective root page table. - */ -abatron_pteptrs: - .space 8 diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index f82470091697..87b806e8eded 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -128,37 +128,20 @@ BEGIN_FTR_SECTION mr r12, r13 lwz r13, THREAD_NORMSAVE(2)(r10) FTR_SECTION_ELSE -#endif mfcr r12 -#ifdef CONFIG_KVM_BOOKE_HV ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) +#else + mfcr r12 #endif mfspr r9, SPRN_SRR1 BOOKE_CLEAR_BTB(r11) - lwz r11, TASK_STACK - THREAD(r10) + mr r11, r1 + lwz r1, TASK_STACK - THREAD(r10) rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */ - ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE) - stw r12, _CCR(r11) /* save various registers */ - mflr r12 - stw r12,_LINK(r11) + ALLOC_STACK_FRAME(r1, THREAD_SIZE - INT_FRAME_SIZE) + stw r12, _CCR(r1) mfspr r12,SPRN_SRR0 - stw r1, GPR1(r11) - stw r1, 0(r11) - mr r1, r11 - stw r12,_NIP(r11) - rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ - lis r12, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ - stw r2,GPR2(r11) - addi r12, r12, STACK_FRAME_REGS_MARKER@l - stw r9,_MSR(r11) - li r2, \trapno - stw r12, 8(r11) - stw r2,_TRAP(r11) - SAVE_GPR(0, r11) - SAVE_4GPRS(3, r11) - SAVE_2GPRS(7, r11) - - addi r2,r10,-THREAD + stw r12,_NIP(r1) b transfer_to_syscall /* jump to handler */ .endm diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index a1a5c3f10dc4..0f9642f36b49 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -985,20 +985,6 @@ _GLOBAL(abort) mtspr SPRN_DBCR0,r13 isync -_GLOBAL(set_context) - -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is the second parameter. - */ - lis r5, abatron_pteptrs@h - ori r5, r5, abatron_pteptrs@l - stw r4, 0x4(r5) -#endif - mtspr SPRN_PID,r3 - isync /* Force context change */ - blr - #ifdef CONFIG_SMP /* When we get here, r24 needs to hold the CPU # */ .globl __secondary_start @@ -1226,26 +1212,3 @@ _GLOBAL(restore_to_as0) */ 3: mr r3,r5 bl _start - -/* - * We put a few things here that have to be page-aligned. This stuff - * goes at the beginning of the data segment, which is page-aligned. - */ - .data - .align 12 - .globl sdata -sdata: - .globl empty_zero_page -empty_zero_page: - .space 4096 -EXPORT_SYMBOL(empty_zero_page) - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - -/* - * Room for two PTE pointers, usually the kernel and current user pointers - * to their respective root page table. - */ -abatron_pteptrs: - .space 8 diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 8fc7a14e4d71..21a638aff72f 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -486,7 +486,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) return; reset: - regs->msr &= ~MSR_SE; + regs_set_return_msr(regs, regs->msr & ~MSR_SE); for (i = 0; i < nr_wp_slots(); i++) { info = counter_arch_bp(__this_cpu_read(bp_per_reg[i])); __set_breakpoint(i, info); @@ -537,7 +537,7 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, current->thread.last_hit_ubp[i] = bp[i]; info[i] = NULL; } - regs->msr |= MSR_SE; + regs_set_return_msr(regs, regs->msr | MSR_SE); return false; } diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index e0938ba298f2..21bbd615ca41 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -3,6 +3,7 @@ #include <linux/context_tracking.h> #include <linux/err.h> #include <linux/compat.h> +#include <linux/sched/debug.h> /* for show_regs */ #include <asm/asm-prototypes.h> #include <asm/kup.h> @@ -26,6 +27,53 @@ unsigned long global_dbcr0[NR_CPUS]; typedef long (*syscall_fn)(long, long, long, long, long, long); +#ifdef CONFIG_PPC_BOOK3S_64 +DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); +static inline bool exit_must_hard_disable(void) +{ + return static_branch_unlikely(&interrupt_exit_not_reentrant); +} +#else +static inline bool exit_must_hard_disable(void) +{ + return true; +} +#endif + +/* + * local irqs must be disabled. Returns false if the caller must re-enable + * them, check for new work, and try again. + * + * This should be called with local irqs disabled, but if they were previously + * enabled when the interrupt handler returns (indicating a process-context / + * synchronous interrupt) then irqs_enabled should be true. + * + * restartable is true then EE/RI can be left on because interrupts are handled + * with a restart sequence. + */ +static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) +{ + /* This must be done with RI=1 because tracing may touch vmaps */ + trace_hardirqs_on(); + + if (exit_must_hard_disable() || !restartable) + __hard_EE_RI_disable(); + +#ifdef CONFIG_PPC64 + /* This pattern matches prep_irq_for_idle */ + if (unlikely(lazy_irq_pending_nocheck())) { + if (exit_must_hard_disable() || !restartable) { + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + __hard_RI_enable(); + } + trace_hardirqs_off(); + + return false; + } +#endif + return true; +} + /* Has to run notrace because it is entered not completely "reconciled" */ notrace long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, @@ -144,71 +192,6 @@ notrace long system_call_exception(long r3, long r4, long r5, return f(r3, r4, r5, r6, r7, r8); } -/* - * local irqs must be disabled. Returns false if the caller must re-enable - * them, check for new work, and try again. - * - * This should be called with local irqs disabled, but if they were previously - * enabled when the interrupt handler returns (indicating a process-context / - * synchronous interrupt) then irqs_enabled should be true. - */ -static notrace __always_inline bool __prep_irq_for_enabled_exit(bool clear_ri) -{ - /* This must be done with RI=1 because tracing may touch vmaps */ - trace_hardirqs_on(); - - /* This pattern matches prep_irq_for_idle */ - if (clear_ri) - __hard_EE_RI_disable(); - else - __hard_irq_disable(); -#ifdef CONFIG_PPC64 - if (unlikely(lazy_irq_pending_nocheck())) { - /* Took an interrupt, may have more exit work to do. */ - if (clear_ri) - __hard_RI_enable(); - trace_hardirqs_off(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - - return false; - } - local_paca->irq_happened = 0; - irq_soft_mask_set(IRQS_ENABLED); -#endif - return true; -} - -static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled) -{ - if (__prep_irq_for_enabled_exit(clear_ri)) - return true; - - /* - * Must replay pending soft-masked interrupts now. Don't just - * local_irq_enabe(); local_irq_disable(); because if we are - * returning from an asynchronous interrupt here, another one - * might hit after irqs are enabled, and it would exit via this - * same path allowing another to fire, and so on unbounded. - * - * If interrupts were enabled when this interrupt exited, - * indicating a process context (synchronous) interrupt, - * local_irq_enable/disable can be used, which will enable - * interrupts rather than keeping them masked (unclear how - * much benefit this is over just replaying for all cases, - * because we immediately disable again, so all we're really - * doing is allowing hard interrupts to execute directly for - * a very small time, rather than being masked and replayed). - */ - if (irqs_enabled) { - local_irq_enable(); - local_irq_disable(); - } else { - replay_soft_interrupts(); - } - - return false; -} - static notrace void booke_load_dbcr0(void) { #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -231,57 +214,92 @@ static notrace void booke_load_dbcr0(void) #endif } -/* - * This should be called after a syscall returns, with r3 the return value - * from the syscall. If this function returns non-zero, the system call - * exit assembly should additionally load all GPR registers and CTR and XER - * from the interrupt frame. - * - * The function graph tracer can not trace the return side of this function, - * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. - */ -notrace unsigned long syscall_exit_prepare(unsigned long r3, - struct pt_regs *regs, - long scv) +static void check_return_regs_valid(struct pt_regs *regs) { - unsigned long ti_flags; - unsigned long ret = 0; - bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; +#ifdef CONFIG_PPC_BOOK3S_64 + unsigned long trap, srr0, srr1; + static bool warned; + u8 *validp; + char *h; - CT_WARN_ON(ct_state() == CONTEXT_USER); + if (trap_is_scv(regs)) + return; - kuap_assert_locked(); + trap = regs->trap; + // EE in HV mode sets HSRRs like 0xea0 + if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL) + trap = 0xea0; + + switch (trap) { + case 0x980: + case INTERRUPT_H_DATA_STORAGE: + case 0xe20: + case 0xe40: + case INTERRUPT_HMI: + case 0xe80: + case 0xea0: + case INTERRUPT_H_FAC_UNAVAIL: + case 0x1200: + case 0x1500: + case 0x1600: + case 0x1800: + validp = &local_paca->hsrr_valid; + if (!*validp) + return; + + srr0 = mfspr(SPRN_HSRR0); + srr1 = mfspr(SPRN_HSRR1); + h = "H"; + + break; + default: + validp = &local_paca->srr_valid; + if (!*validp) + return; + + srr0 = mfspr(SPRN_SRR0); + srr1 = mfspr(SPRN_SRR1); + h = ""; + break; + } - regs->result = r3; + if (srr0 == regs->nip && srr1 == regs->msr) + return; - /* Check whether the syscall is issued inside a restartable sequence */ - rseq_syscall(regs); + /* + * A NMI / soft-NMI interrupt may have come in after we found + * srr_valid and before the SRRs are loaded. The interrupt then + * comes in and clobbers SRRs and clears srr_valid. Then we load + * the SRRs here and test them above and find they don't match. + * + * Test validity again after that, to catch such false positives. + * + * This test in general will have some window for false negatives + * and may not catch and fix all such cases if an NMI comes in + * later and clobbers SRRs without clearing srr_valid, but hopefully + * such things will get caught most of the time, statistically + * enough to be able to get a warning out. + */ + barrier(); - ti_flags = current_thread_info()->flags; + if (!*validp) + return; - if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { - if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { - r3 = -r3; - regs->ccr |= 0x10000000; /* Set SO bit in CR */ - } + if (!warned) { + warned = true; + printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip); + printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr); + show_regs(regs); } - if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) { - if (ti_flags & _TIF_RESTOREALL) - ret = _TIF_RESTOREALL; - else - regs->gpr[3] = r3; - clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags); - } else { - regs->gpr[3] = r3; - } - - if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { - do_syscall_trace_leave(regs); - ret |= _TIF_RESTOREALL; - } + *validp = 0; /* fixup */ +#endif +} - local_irq_disable(); +static notrace unsigned long +interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs) +{ + unsigned long ti_flags; again: ti_flags = READ_ONCE(current_thread_info()->flags); @@ -303,7 +321,7 @@ again: ti_flags = READ_ONCE(current_thread_info()->flags); } - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && unlikely((ti_flags & _TIF_RESTORE_TM))) { restore_tm_state(regs); @@ -327,10 +345,10 @@ again: } } - user_enter_irqoff(); + check_return_regs_valid(regs); - /* scv need not set RI=0 because SRRs are not used */ - if (unlikely(!__prep_irq_for_enabled_exit(is_not_scv))) { + user_enter_irqoff(); + if (!prep_irq_for_enabled_exit(true)) { user_exit_irqoff(); local_irq_enable(); local_irq_disable(); @@ -352,90 +370,131 @@ again: return ret; } -notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr) +/* + * This should be called after a syscall returns, with r3 the return value + * from the syscall. If this function returns non-zero, the system call + * exit assembly should additionally load all GPR registers and CTR and XER + * from the interrupt frame. + * + * The function graph tracer can not trace the return side of this function, + * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. + */ +notrace unsigned long syscall_exit_prepare(unsigned long r3, + struct pt_regs *regs, + long scv) { unsigned long ti_flags; - unsigned long flags; unsigned long ret = 0; + bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; - if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) - BUG_ON(!(regs->msr & MSR_RI)); - BUG_ON(!(regs->msr & MSR_PR)); - BUG_ON(arch_irq_disabled_regs(regs)); CT_WARN_ON(ct_state() == CONTEXT_USER); - /* - * We don't need to restore AMR on the way back to userspace for KUAP. - * AMR can only have been unlocked if we interrupted the kernel. - */ kuap_assert_locked(); - local_irq_save(flags); - -again: - ti_flags = READ_ONCE(current_thread_info()->flags); - while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { - local_irq_enable(); /* returning to user: may enable */ - if (ti_flags & _TIF_NEED_RESCHED) { - schedule(); - } else { - if (ti_flags & _TIF_SIGPENDING) - ret |= _TIF_RESTOREALL; - do_notify_resume(regs, ti_flags); - } - local_irq_disable(); - ti_flags = READ_ONCE(current_thread_info()->flags); - } + regs->result = r3; - if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { - if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && - unlikely((ti_flags & _TIF_RESTORE_TM))) { - restore_tm_state(regs); - } else { - unsigned long mathflags = MSR_FP; + /* Check whether the syscall is issued inside a restartable sequence */ + rseq_syscall(regs); - if (cpu_has_feature(CPU_FTR_VSX)) - mathflags |= MSR_VEC | MSR_VSX; - else if (cpu_has_feature(CPU_FTR_ALTIVEC)) - mathflags |= MSR_VEC; + ti_flags = current_thread_info()->flags; - /* See above restore_math comment */ - if ((regs->msr & mathflags) != mathflags) - restore_math(regs); + if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { + if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { + r3 = -r3; + regs->ccr |= 0x10000000; /* Set SO bit in CR */ } } - user_enter_irqoff(); + if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) { + if (ti_flags & _TIF_RESTOREALL) + ret = _TIF_RESTOREALL; + else + regs->gpr[3] = r3; + clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags); + } else { + regs->gpr[3] = r3; + } - if (unlikely(!__prep_irq_for_enabled_exit(true))) { - user_exit_irqoff(); - local_irq_enable(); - local_irq_disable(); - goto again; + if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { + do_syscall_trace_leave(regs); + ret |= _TIF_RESTOREALL; } - booke_load_dbcr0(); + local_irq_disable(); + ret = interrupt_exit_user_prepare_main(ret, regs); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - local_paca->tm_scratch = regs->msr; +#ifdef CONFIG_PPC64 + regs->exit_result = ret; #endif - account_cpu_user_exit(); + return ret; +} - /* Restore user access locks last */ - kuap_user_restore(regs); - kuep_unlock(); +#ifdef CONFIG_PPC64 +notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs) +{ + /* + * This is called when detecting a soft-pending interrupt as well as + * an alternate-return interrupt. So we can't just have the alternate + * return path clear SRR1[MSR] and set PACA_IRQ_HARD_DIS (unless + * the soft-pending case were to fix things up as well). RI might be + * disabled, in which case it gets re-enabled by __hard_irq_disable(). + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + +#ifdef CONFIG_PPC_BOOK3S_64 + set_kuap(AMR_KUAP_BLOCKED); +#endif + + trace_hardirqs_off(); + user_exit_irqoff(); + account_cpu_user_entry(); + + BUG_ON(!user_mode(regs)); + + regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs); + + return regs->exit_result; +} +#endif + +notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs) +{ + unsigned long ret; + + if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) + BUG_ON(!(regs->msr & MSR_RI)); + BUG_ON(!(regs->msr & MSR_PR)); + BUG_ON(arch_irq_disabled_regs(regs)); + CT_WARN_ON(ct_state() == CONTEXT_USER); + + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * AMR can only have been unlocked if we interrupted the kernel. + */ + kuap_assert_locked(); + + local_irq_disable(); + + ret = interrupt_exit_user_prepare_main(0, regs); + +#ifdef CONFIG_PPC64 + regs->exit_result = ret; +#endif return ret; } void preempt_schedule_irq(void); -notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr) +notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) { unsigned long flags; unsigned long ret = 0; unsigned long kuap; + bool stack_store = current_thread_info()->flags & + _TIF_EMULATE_STACK_STORE; if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && unlikely(!(regs->msr & MSR_RI))) @@ -450,11 +509,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign kuap = kuap_get_and_assert_locked(); - if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) { - clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); - ret = 1; - } - local_irq_save(flags); if (!arch_irq_disabled_regs(regs)) { @@ -469,17 +523,58 @@ again: } } - if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags)))) + check_return_regs_valid(regs); + + /* + * Stack store exit can't be restarted because the interrupt + * stack frame might have been clobbered. + */ + if (!prep_irq_for_enabled_exit(unlikely(stack_store))) { + /* + * Replay pending soft-masked interrupts now. Don't + * just local_irq_enabe(); local_irq_disable(); because + * if we are returning from an asynchronous interrupt + * here, another one might hit after irqs are enabled, + * and it would exit via this same path allowing + * another to fire, and so on unbounded. + */ + hard_irq_disable(); + replay_soft_interrupts(); + /* Took an interrupt, may have more exit work to do. */ goto again; - } else { - /* Returning to a kernel context with local irqs disabled. */ - __hard_EE_RI_disable(); + } #ifdef CONFIG_PPC64 + /* + * An interrupt may clear MSR[EE] and set this concurrently, + * but it will be marked pending and the exit will be retried. + * This leaves a racy window where MSR[EE]=0 and HARD_DIS is + * clear, until interrupt_exit_kernel_restart() calls + * hard_irq_disable(), which will set HARD_DIS again. + */ + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + + } else { + check_return_regs_valid(regs); + + if (unlikely(stack_store)) + __hard_EE_RI_disable(); + /* + * Returning to a kernel context with local irqs disabled. + * Here, if EE was enabled in the interrupted context, enable + * it on return as well. A problem exists here where a soft + * masked interrupt may have cleared MSR[EE] and set HARD_DIS + * here, and it will still exist on return to the caller. This + * will be resolved by the masked interrupt firing again. + */ if (regs->msr & MSR_EE) local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; -#endif +#endif /* CONFIG_PPC64 */ } + if (unlikely(stack_store)) { + clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); + ret = 1; + } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM local_paca->tm_scratch = regs->msr; @@ -494,3 +589,46 @@ again: return ret; } + +#ifdef CONFIG_PPC64 +notrace unsigned long interrupt_exit_user_restart(struct pt_regs *regs) +{ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + +#ifdef CONFIG_PPC_BOOK3S_64 + set_kuap(AMR_KUAP_BLOCKED); +#endif + + trace_hardirqs_off(); + user_exit_irqoff(); + account_cpu_user_entry(); + + BUG_ON(!user_mode(regs)); + + regs->exit_result |= interrupt_exit_user_prepare(regs); + + return regs->exit_result; +} + +/* + * No real need to return a value here because the stack store case does not + * get restarted. + */ +notrace unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs) +{ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + +#ifdef CONFIG_PPC_BOOK3S_64 + set_kuap(AMR_KUAP_BLOCKED); +#endif + + if (regs->softe == IRQS_ENABLED) + trace_hardirqs_off(); + + BUG_ON(user_mode(regs)); + + return interrupt_exit_kernel_prepare(regs); +} +#endif diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S new file mode 100644 index 000000000000..4063e8a3f704 --- /dev/null +++ b/arch/powerpc/kernel/interrupt_64.S @@ -0,0 +1,770 @@ +#include <asm/asm-offsets.h> +#include <asm/bug.h> +#ifdef CONFIG_PPC_BOOK3S +#include <asm/exception-64s.h> +#else +#include <asm/exception-64e.h> +#endif +#include <asm/feature-fixups.h> +#include <asm/head-64.h> +#include <asm/hw_irq.h> +#include <asm/kup.h> +#include <asm/mmu.h> +#include <asm/ppc_asm.h> +#include <asm/ptrace.h> +#include <asm/tm.h> + + .section ".toc","aw" +SYS_CALL_TABLE: + .tc sys_call_table[TC],sys_call_table + +#ifdef CONFIG_COMPAT +COMPAT_SYS_CALL_TABLE: + .tc compat_sys_call_table[TC],compat_sys_call_table +#endif + .previous + + .align 7 + +.macro DEBUG_SRR_VALID srr +#ifdef CONFIG_PPC_RFI_SRR_DEBUG + .ifc \srr,srr + mfspr r11,SPRN_SRR0 + ld r12,_NIP(r1) +100: tdne r11,r12 + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + mfspr r11,SPRN_SRR1 + ld r12,_MSR(r1) +100: tdne r11,r12 + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + .else + mfspr r11,SPRN_HSRR0 + ld r12,_NIP(r1) +100: tdne r11,r12 + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + mfspr r11,SPRN_HSRR1 + ld r12,_MSR(r1) +100: tdne r11,r12 + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + .endif +#endif +.endm + +#ifdef CONFIG_PPC_BOOK3S +.macro system_call_vectored name trapnr + .globl system_call_vectored_\name +system_call_vectored_\name: +_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION + extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ + bne tabort_syscall +END_FTR_SECTION_IFSET(CPU_FTR_TM) +#endif + SCV_INTERRUPT_TO_KERNEL + mr r10,r1 + ld r1,PACAKSAVE(r13) + std r10,0(r1) + std r11,_NIP(r1) + std r12,_MSR(r1) + std r0,GPR0(r1) + std r10,GPR1(r1) + std r2,GPR2(r1) + ld r2,PACATOC(r13) + mfcr r12 + li r11,0 + /* Can we avoid saving r3-r8 in common case? */ + std r3,GPR3(r1) + std r4,GPR4(r1) + std r5,GPR5(r1) + std r6,GPR6(r1) + std r7,GPR7(r1) + std r8,GPR8(r1) + /* Zero r9-r12, this should only be required when restoring all GPRs */ + std r11,GPR9(r1) + std r11,GPR10(r1) + std r11,GPR11(r1) + std r11,GPR12(r1) + std r9,GPR13(r1) + SAVE_NVGPRS(r1) + std r11,_XER(r1) + std r11,_LINK(r1) + std r11,_CTR(r1) + + li r11,\trapnr + std r11,_TRAP(r1) + std r12,_CCR(r1) + addi r10,r1,STACK_FRAME_OVERHEAD + ld r11,exception_marker@toc(r2) + std r11,-16(r10) /* "regshere" marker */ + +BEGIN_FTR_SECTION + HMT_MEDIUM +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + + /* + * scv enters with MSR[EE]=1 and is immediately considered soft-masked. + * The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED, + * and interrupts may be masked and pending already. + * system_call_exception() will call trace_hardirqs_off() which means + * interrupts could already have been blocked before trace_hardirqs_off, + * but this is the best we can do. + */ + + /* Calling convention has r9 = orig r0, r10 = regs */ + mr r9,r0 + bl system_call_exception + +.Lsyscall_vectored_\name\()_exit: + addi r4,r1,STACK_FRAME_OVERHEAD + li r5,1 /* scv */ + bl syscall_exit_prepare + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ +.Lsyscall_vectored_\name\()_rst_start: + lbz r11,PACAIRQHAPPENED(r13) + andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l + bne- syscall_vectored_\name\()_restart + li r11,IRQS_ENABLED + stb r11,PACAIRQSOFTMASK(r13) + li r11,0 + stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS + + ld r2,_CCR(r1) + ld r4,_NIP(r1) + ld r5,_MSR(r1) + +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + +BEGIN_FTR_SECTION + HMT_MEDIUM_LOW +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + + cmpdi r3,0 + bne .Lsyscall_vectored_\name\()_restore_regs + + /* rfscv returns with LR->NIA and CTR->MSR */ + mtlr r4 + mtctr r5 + + /* Could zero these as per ABI, but we may consider a stricter ABI + * which preserves these if libc implementations can benefit, so + * restore them for now until further measurement is done. */ + ld r0,GPR0(r1) + ld r4,GPR4(r1) + ld r5,GPR5(r1) + ld r6,GPR6(r1) + ld r7,GPR7(r1) + ld r8,GPR8(r1) + /* Zero volatile regs that may contain sensitive kernel data */ + li r9,0 + li r10,0 + li r11,0 + li r12,0 + mtspr SPRN_XER,r0 + + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ + mtcr r2 + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r13,GPR13(r1) + ld r1,GPR1(r1) + RFSCV_TO_USER + b . /* prevent speculative execution */ + +.Lsyscall_vectored_\name\()_restore_regs: + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r5 + + ld r3,_CTR(r1) + ld r4,_LINK(r1) + ld r5,_XER(r1) + + REST_NVGPRS(r1) + ld r0,GPR0(r1) + mtcr r2 + mtctr r3 + mtlr r4 + mtspr SPRN_XER,r5 + REST_10GPRS(2, r1) + REST_2GPRS(12, r1) + ld r1,GPR1(r1) + RFI_TO_USER +.Lsyscall_vectored_\name\()_rst_end: + +syscall_vectored_\name\()_restart: +_ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart) + GET_PACA(r13) + ld r1,PACA_EXIT_SAVE_R1(r13) + ld r2,PACATOC(r13) + ld r3,RESULT(r1) + addi r4,r1,STACK_FRAME_OVERHEAD + li r11,IRQS_ALL_DISABLED + stb r11,PACAIRQSOFTMASK(r13) + bl syscall_exit_restart + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ + b .Lsyscall_vectored_\name\()_rst_start +1: + +SOFT_MASK_TABLE(.Lsyscall_vectored_\name\()_rst_start, 1b) +RESTART_TABLE(.Lsyscall_vectored_\name\()_rst_start, .Lsyscall_vectored_\name\()_rst_end, syscall_vectored_\name\()_restart) + +.endm + +system_call_vectored common 0x3000 + +/* + * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0 + * which is tested by system_call_exception when r0 is -1 (as set by vector + * entry code). + */ +system_call_vectored sigill 0x7ff0 + + +/* + * Entered via kernel return set up by kernel/sstep.c, must match entry regs + */ + .globl system_call_vectored_emulate +system_call_vectored_emulate: +_ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate) + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) + b system_call_vectored_common +#endif /* CONFIG_PPC_BOOK3S */ + + .balign IFETCH_ALIGN_BYTES + .globl system_call_common_real +system_call_common_real: +_ASM_NOKPROBE_SYMBOL(system_call_common_real) + ld r10,PACAKMSR(r13) /* get MSR value for kernel */ + mtmsrd r10 + + .balign IFETCH_ALIGN_BYTES + .globl system_call_common +system_call_common: +_ASM_NOKPROBE_SYMBOL(system_call_common) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION + extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ + bne tabort_syscall +END_FTR_SECTION_IFSET(CPU_FTR_TM) +#endif + mr r10,r1 + ld r1,PACAKSAVE(r13) + std r10,0(r1) + std r11,_NIP(r1) + std r12,_MSR(r1) + std r0,GPR0(r1) + std r10,GPR1(r1) + std r2,GPR2(r1) +#ifdef CONFIG_PPC_FSL_BOOK3E +START_BTB_FLUSH_SECTION + BTB_FLUSH(r10) +END_BTB_FLUSH_SECTION +#endif + ld r2,PACATOC(r13) + mfcr r12 + li r11,0 + /* Can we avoid saving r3-r8 in common case? */ + std r3,GPR3(r1) + std r4,GPR4(r1) + std r5,GPR5(r1) + std r6,GPR6(r1) + std r7,GPR7(r1) + std r8,GPR8(r1) + /* Zero r9-r12, this should only be required when restoring all GPRs */ + std r11,GPR9(r1) + std r11,GPR10(r1) + std r11,GPR11(r1) + std r11,GPR12(r1) + std r9,GPR13(r1) + SAVE_NVGPRS(r1) + std r11,_XER(r1) + std r11,_CTR(r1) + mflr r10 + + /* + * This clears CR0.SO (bit 28), which is the error indication on + * return from this system call. + */ + rldimi r12,r11,28,(63-28) + li r11,0xc00 + std r10,_LINK(r1) + std r11,_TRAP(r1) + std r12,_CCR(r1) + addi r10,r1,STACK_FRAME_OVERHEAD + ld r11,exception_marker@toc(r2) + std r11,-16(r10) /* "regshere" marker */ + +#ifdef CONFIG_PPC_BOOK3S + li r11,1 + stb r11,PACASRR_VALID(r13) +#endif + + /* + * We always enter kernel from userspace with irq soft-mask enabled and + * nothing pending. system_call_exception() will call + * trace_hardirqs_off(). + */ + li r11,IRQS_ALL_DISABLED + li r12,-1 /* Set MSR_EE and MSR_RI */ + stb r11,PACAIRQSOFTMASK(r13) + mtmsrd r12,1 + + /* Calling convention has r9 = orig r0, r10 = regs */ + mr r9,r0 + bl system_call_exception + +.Lsyscall_exit: + addi r4,r1,STACK_FRAME_OVERHEAD + li r5,0 /* !scv */ + bl syscall_exit_prepare + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ +#ifdef CONFIG_PPC_BOOK3S +.Lsyscall_rst_start: + lbz r11,PACAIRQHAPPENED(r13) + andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l + bne- syscall_restart +#endif + li r11,IRQS_ENABLED + stb r11,PACAIRQSOFTMASK(r13) + li r11,0 + stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS + + ld r2,_CCR(r1) + ld r6,_LINK(r1) + mtlr r6 + +#ifdef CONFIG_PPC_BOOK3S + lbz r4,PACASRR_VALID(r13) + cmpdi r4,0 + bne 1f + li r4,0 + stb r4,PACASRR_VALID(r13) +#endif + ld r4,_NIP(r1) + ld r5,_MSR(r1) + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r5 +1: + DEBUG_SRR_VALID srr + +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + + cmpdi r3,0 + bne .Lsyscall_restore_regs + /* Zero volatile regs that may contain sensitive kernel data */ + li r0,0 + li r4,0 + li r5,0 + li r6,0 + li r7,0 + li r8,0 + li r9,0 + li r10,0 + li r11,0 + li r12,0 + mtctr r0 + mtspr SPRN_XER,r0 +.Lsyscall_restore_regs_cont: + +BEGIN_FTR_SECTION + HMT_MEDIUM_LOW +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ + mtcr r2 + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r13,GPR13(r1) + ld r1,GPR1(r1) + RFI_TO_USER + b . /* prevent speculative execution */ + +.Lsyscall_restore_regs: + ld r3,_CTR(r1) + ld r4,_XER(r1) + REST_NVGPRS(r1) + mtctr r3 + mtspr SPRN_XER,r4 + ld r0,GPR0(r1) + REST_8GPRS(4, r1) + ld r12,GPR12(r1) + b .Lsyscall_restore_regs_cont +.Lsyscall_rst_end: + +#ifdef CONFIG_PPC_BOOK3S +syscall_restart: +_ASM_NOKPROBE_SYMBOL(syscall_restart) + GET_PACA(r13) + ld r1,PACA_EXIT_SAVE_R1(r13) + ld r2,PACATOC(r13) + ld r3,RESULT(r1) + addi r4,r1,STACK_FRAME_OVERHEAD + li r11,IRQS_ALL_DISABLED + stb r11,PACAIRQSOFTMASK(r13) + bl syscall_exit_restart + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ + b .Lsyscall_rst_start +1: + +SOFT_MASK_TABLE(.Lsyscall_rst_start, 1b) +RESTART_TABLE(.Lsyscall_rst_start, .Lsyscall_rst_end, syscall_restart) +#endif + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +tabort_syscall: +_ASM_NOKPROBE_SYMBOL(tabort_syscall) + /* Firstly we need to enable TM in the kernel */ + mfmsr r10 + li r9, 1 + rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG + mtmsrd r10, 0 + + /* tabort, this dooms the transaction, nothing else */ + li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT) + TABORT(R9) + + /* + * Return directly to userspace. We have corrupted user register state, + * but userspace will never see that register state. Execution will + * resume after the tbegin of the aborted transaction with the + * checkpointed register state. + */ + li r9, MSR_RI + andc r10, r10, r9 + mtmsrd r10, 1 + mtspr SPRN_SRR0, r11 + mtspr SPRN_SRR1, r12 + RFI_TO_USER + b . /* prevent speculative execution */ +#endif + + /* + * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not + * touched, no exit work created, then this can be used. + */ + .balign IFETCH_ALIGN_BYTES + .globl fast_interrupt_return_srr +fast_interrupt_return_srr: +_ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr) + kuap_check_amr r3, r4 + ld r5,_MSR(r1) + andi. r0,r5,MSR_PR +#ifdef CONFIG_PPC_BOOK3S + beq 1f + kuap_user_restore r3, r4 + b .Lfast_user_interrupt_return_srr +1: kuap_kernel_restore r3, r4 + andi. r0,r5,MSR_RI + li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ + bne+ .Lfast_kernel_interrupt_return_srr + addi r3,r1,STACK_FRAME_OVERHEAD + bl unrecoverable_exception + b . /* should not get here */ +#else + bne .Lfast_user_interrupt_return_srr + b .Lfast_kernel_interrupt_return_srr +#endif + +.macro interrupt_return_macro srr + .balign IFETCH_ALIGN_BYTES + .globl interrupt_return_\srr +interrupt_return_\srr\(): +_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()) + ld r4,_MSR(r1) + andi. r0,r4,MSR_PR + beq interrupt_return_\srr\()_kernel +interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */ +_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user) + addi r3,r1,STACK_FRAME_OVERHEAD + bl interrupt_exit_user_prepare + cmpdi r3,0 + bne- .Lrestore_nvgprs_\srr +.Lrestore_nvgprs_\srr\()_cont: + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ +#ifdef CONFIG_PPC_BOOK3S +.Linterrupt_return_\srr\()_user_rst_start: + lbz r11,PACAIRQHAPPENED(r13) + andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l + bne- interrupt_return_\srr\()_user_restart +#endif + li r11,IRQS_ENABLED + stb r11,PACAIRQSOFTMASK(r13) + li r11,0 + stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS + +.Lfast_user_interrupt_return_\srr\(): +#ifdef CONFIG_PPC_BOOK3S + .ifc \srr,srr + lbz r4,PACASRR_VALID(r13) + .else + lbz r4,PACAHSRR_VALID(r13) + .endif + cmpdi r4,0 + li r4,0 + bne 1f +#endif + ld r11,_NIP(r1) + ld r12,_MSR(r1) + .ifc \srr,srr + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 +1: +#ifdef CONFIG_PPC_BOOK3S + stb r4,PACASRR_VALID(r13) +#endif + .else + mtspr SPRN_HSRR0,r11 + mtspr SPRN_HSRR1,r12 +1: +#ifdef CONFIG_PPC_BOOK3S + stb r4,PACAHSRR_VALID(r13) +#endif + .endif + DEBUG_SRR_VALID \srr + +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + lbz r4,PACAIRQSOFTMASK(r13) + tdnei r4,IRQS_ENABLED +#endif + +BEGIN_FTR_SECTION + ld r10,_PPR(r1) + mtspr SPRN_PPR,r10 +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + ldarx r0,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + + ld r3,_CCR(r1) + ld r4,_LINK(r1) + ld r5,_CTR(r1) + ld r6,_XER(r1) + li r0,0 + + REST_4GPRS(7, r1) + REST_2GPRS(11, r1) + REST_GPR(13, r1) + + mtcr r3 + mtlr r4 + mtctr r5 + mtspr SPRN_XER,r6 + + REST_4GPRS(2, r1) + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + .ifc \srr,srr + RFI_TO_USER + .else + HRFI_TO_USER + .endif + b . /* prevent speculative execution */ +.Linterrupt_return_\srr\()_user_rst_end: + +.Lrestore_nvgprs_\srr\(): + REST_NVGPRS(r1) + b .Lrestore_nvgprs_\srr\()_cont + +#ifdef CONFIG_PPC_BOOK3S +interrupt_return_\srr\()_user_restart: +_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart) + GET_PACA(r13) + ld r1,PACA_EXIT_SAVE_R1(r13) + ld r2,PACATOC(r13) + addi r3,r1,STACK_FRAME_OVERHEAD + li r11,IRQS_ALL_DISABLED + stb r11,PACAIRQSOFTMASK(r13) + bl interrupt_exit_user_restart + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ + b .Linterrupt_return_\srr\()_user_rst_start +1: + +SOFT_MASK_TABLE(.Linterrupt_return_\srr\()_user_rst_start, 1b) +RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr\()_user_rst_end, interrupt_return_\srr\()_user_restart) +#endif + + .balign IFETCH_ALIGN_BYTES +interrupt_return_\srr\()_kernel: +_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel) + addi r3,r1,STACK_FRAME_OVERHEAD + bl interrupt_exit_kernel_prepare + + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ +.Linterrupt_return_\srr\()_kernel_rst_start: + ld r11,SOFTE(r1) + cmpwi r11,IRQS_ENABLED + stb r11,PACAIRQSOFTMASK(r13) + bne 1f +#ifdef CONFIG_PPC_BOOK3S + lbz r11,PACAIRQHAPPENED(r13) + andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l + bne- interrupt_return_\srr\()_kernel_restart +#endif + li r11,0 + stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS +1: + +.Lfast_kernel_interrupt_return_\srr\(): + cmpdi cr1,r3,0 +#ifdef CONFIG_PPC_BOOK3S + .ifc \srr,srr + lbz r4,PACASRR_VALID(r13) + .else + lbz r4,PACAHSRR_VALID(r13) + .endif + cmpdi r4,0 + li r4,0 + bne 1f +#endif + ld r11,_NIP(r1) + ld r12,_MSR(r1) + .ifc \srr,srr + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 +1: +#ifdef CONFIG_PPC_BOOK3S + stb r4,PACASRR_VALID(r13) +#endif + .else + mtspr SPRN_HSRR0,r11 + mtspr SPRN_HSRR1,r12 +1: +#ifdef CONFIG_PPC_BOOK3S + stb r4,PACAHSRR_VALID(r13) +#endif + .endif + DEBUG_SRR_VALID \srr + +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + ldarx r0,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + + ld r3,_LINK(r1) + ld r4,_CTR(r1) + ld r5,_XER(r1) + ld r6,_CCR(r1) + li r0,0 + + REST_4GPRS(7, r1) + REST_2GPRS(11, r1) + + mtlr r3 + mtctr r4 + mtspr SPRN_XER,r5 + + /* + * Leaving a stale exception_marker on the stack can confuse + * the reliable stack unwinder later on. Clear it. + */ + std r0,STACK_FRAME_OVERHEAD-16(r1) + + REST_4GPRS(2, r1) + + bne- cr1,1f /* emulate stack store */ + mtcr r6 + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + .ifc \srr,srr + RFI_TO_KERNEL + .else + HRFI_TO_KERNEL + .endif + b . /* prevent speculative execution */ + +1: /* + * Emulate stack store with update. New r1 value was already calculated + * and updated in our interrupt regs by emulate_loadstore, but we can't + * store the previous value of r1 to the stack before re-loading our + * registers from it, otherwise they could be clobbered. Use + * PACA_EXGEN as temporary storage to hold the store data, as + * interrupts are disabled here so it won't be clobbered. + */ + mtcr r6 + std r9,PACA_EXGEN+0(r13) + addi r9,r1,INT_FRAME_SIZE /* get original r1 */ + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + std r9,0(r1) /* perform store component of stdu */ + ld r9,PACA_EXGEN+0(r13) + + .ifc \srr,srr + RFI_TO_KERNEL + .else + HRFI_TO_KERNEL + .endif + b . /* prevent speculative execution */ +.Linterrupt_return_\srr\()_kernel_rst_end: + +#ifdef CONFIG_PPC_BOOK3S +interrupt_return_\srr\()_kernel_restart: +_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart) + GET_PACA(r13) + ld r1,PACA_EXIT_SAVE_R1(r13) + ld r2,PACATOC(r13) + addi r3,r1,STACK_FRAME_OVERHEAD + li r11,IRQS_ALL_DISABLED + stb r11,PACAIRQSOFTMASK(r13) + bl interrupt_exit_kernel_restart + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ + b .Linterrupt_return_\srr\()_kernel_rst_start +1: + +SOFT_MASK_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, 1b) +RESTART_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, .Linterrupt_return_\srr\()_kernel_rst_end, interrupt_return_\srr\()_kernel_restart) +#endif + +.endm + +interrupt_return_macro srr +#ifdef CONFIG_PPC_BOOK3S +interrupt_return_macro hsrr + + .globl __end_soft_masked +__end_soft_masked: +DEFINE_FIXED_SYMBOL(__end_soft_masked) +#endif /* CONFIG_PPC_BOOK3S */ + +#ifdef CONFIG_PPC_BOOK3S +_GLOBAL(ret_from_fork_scv) + bl schedule_tail + REST_NVGPRS(r1) + li r3,0 /* fork() return value */ + b .Lsyscall_vectored_common_exit +#endif + +_GLOBAL(ret_from_fork) + bl schedule_tail + REST_NVGPRS(r1) + li r3,0 /* fork() return value */ + b .Lsyscall_exit + +_GLOBAL(ret_from_kernel_thread) + bl schedule_tail + REST_NVGPRS(r1) + mtctr r14 + mr r3,r15 +#ifdef PPC64_ELF_ABI_v2 + mr r12,r14 +#endif + bctrl + li r3,0 + b .Lsyscall_exit diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 72cb45393ef2..91e63eac4e8f 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -121,6 +121,7 @@ void replay_soft_interrupts(void) ppc_save_regs(®s); regs.softe = IRQS_ENABLED; + regs.msr |= MSR_EE; again: if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) @@ -217,6 +218,100 @@ static inline void replay_soft_interrupts_irqrestore(void) #define replay_soft_interrupts_irqrestore() replay_soft_interrupts() #endif +#ifdef CONFIG_CC_HAS_ASM_GOTO +notrace void arch_local_irq_restore(unsigned long mask) +{ + unsigned char irq_happened; + + /* Write the new soft-enabled value if it is a disable */ + if (mask) { + irq_soft_mask_set(mask); + return; + } + + /* + * After the stb, interrupts are unmasked and there are no interrupts + * pending replay. The restart sequence makes this atomic with + * respect to soft-masked interrupts. If this was just a simple code + * sequence, a soft-masked interrupt could become pending right after + * the comparison and before the stb. + * + * This allows interrupts to be unmasked without hard disabling, and + * also without new hard interrupts coming in ahead of pending ones. + */ + asm_volatile_goto( +"1: \n" +" lbz 9,%0(13) \n" +" cmpwi 9,0 \n" +" bne %l[happened] \n" +" stb 9,%1(13) \n" +"2: \n" + RESTART_TABLE(1b, 2b, 1b) + : : "i" (offsetof(struct paca_struct, irq_happened)), + "i" (offsetof(struct paca_struct, irq_soft_mask)) + : "cr0", "r9" + : happened); + + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(!(mfmsr() & MSR_EE)); + + return; + +happened: + irq_happened = get_irq_happened(); + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(!irq_happened); + + if (irq_happened == PACA_IRQ_HARD_DIS) { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(mfmsr() & MSR_EE); + irq_soft_mask_set(IRQS_ENABLED); + local_paca->irq_happened = 0; + __hard_irq_enable(); + return; + } + + /* Have interrupts to replay, need to hard disable first */ + if (!(irq_happened & PACA_IRQ_HARD_DIS)) { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + if (!(mfmsr() & MSR_EE)) { + /* + * An interrupt could have come in and cleared + * MSR[EE] and set IRQ_HARD_DIS, so check + * IRQ_HARD_DIS again and warn if it is still + * clear. + */ + irq_happened = get_irq_happened(); + WARN_ON_ONCE(!(irq_happened & PACA_IRQ_HARD_DIS)); + } + } + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + } else { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + if (WARN_ON_ONCE(mfmsr() & MSR_EE)) + __hard_irq_disable(); + } + } + + /* + * Disable preempt here, so that the below preempt_enable will + * perform resched if required (a replayed interrupt may set + * need_resched). + */ + preempt_disable(); + irq_soft_mask_set(IRQS_ALL_DISABLED); + trace_hardirqs_off(); + + replay_soft_interrupts_irqrestore(); + local_paca->irq_happened = 0; + + trace_hardirqs_on(); + irq_soft_mask_set(IRQS_ENABLED); + __hard_irq_enable(); + preempt_enable(); +} +#else notrace void arch_local_irq_restore(unsigned long mask) { unsigned char irq_happened; @@ -288,6 +383,7 @@ notrace void arch_local_irq_restore(unsigned long mask) __hard_irq_enable(); preempt_enable(); } +#endif EXPORT_SYMBOL(arch_local_irq_restore); /* diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index ce87dc5ea23c..5277cf582c16 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -11,10 +11,10 @@ void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - struct ppc_inst *addr = (struct ppc_inst *)jump_entry_code(entry); + u32 *addr = (u32 *)jump_entry_code(entry); if (type == JUMP_LABEL_JMP) patch_branch(addr, jump_entry_target(entry), 0); else - patch_instruction(addr, ppc_inst(PPC_INST_NOP)); + patch_instruction(addr, ppc_inst(PPC_RAW_NOP())); } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 7dd2ad3603ad..bdee7262c080 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -147,7 +147,7 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs) return 0; if (*(u32 *)regs->nip == BREAK_INSTR) - regs->nip += BREAK_INSTR_SIZE; + regs_add_return_ip(regs, BREAK_INSTR_SIZE); return 1; } @@ -372,7 +372,7 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) { - regs->nip = pc; + regs_set_return_ip(regs, pc); } /* @@ -394,7 +394,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, case 'c': /* handle the optional parameter */ if (kgdb_hex2long(&ptr, &addr)) - linux_regs->nip = addr; + regs_set_return_ip(linux_regs, addr); atomic_set(&kgdb_cpu_doing_single_step, -1); /* set the trace bit if we're stepping */ @@ -402,9 +402,9 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, #ifdef CONFIG_PPC_ADV_DEBUG_REGS mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); - linux_regs->msr |= MSR_DE; + regs_set_return_msr(linux_regs, linux_regs->msr | MSR_DE); #else - linux_regs->msr |= MSR_SE; + regs_set_return_msr(linux_regs, linux_regs->msr | MSR_SE); #endif atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); @@ -417,11 +417,10 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { + u32 instr, *addr = (u32 *)bpt->bpt_addr; int err; - unsigned int instr; - struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; - err = get_kernel_nofault(instr, (unsigned *) addr); + err = get_kernel_nofault(instr, addr); if (err) return err; @@ -429,7 +428,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) if (err) return -EFAULT; - *(unsigned int *)bpt->saved_instr = instr; + *(u32 *)bpt->saved_instr = instr; return 0; } @@ -438,7 +437,7 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { int err; unsigned int instr = *(unsigned int *)bpt->saved_instr; - struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; + u32 *addr = (u32 *)bpt->bpt_addr; err = patch_instruction(addr, ppc_inst(instr)); if (err) diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c index 660138f6c4b2..7154d58338cc 100644 --- a/arch/powerpc/kernel/kprobes-ftrace.c +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -39,7 +39,7 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, * On powerpc, NIP is *before* this instruction for the * pre handler */ - regs->nip -= MCOUNT_INSN_SIZE; + regs_add_return_ip(regs, -MCOUNT_INSN_SIZE); __this_cpu_write(current_kprobe, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; @@ -48,7 +48,7 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, * Emulate singlestep (and also recover regs->nip) * as if there is a nop */ - regs->nip += MCOUNT_INSN_SIZE; + regs_add_return_ip(regs, MCOUNT_INSN_SIZE); if (unlikely(p->post_handler)) { kcb->kprobe_status = KPROBE_HIT_SSDONE; p->post_handler(p, regs, 0); diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index c64a5feaebbe..cbc28d1a2e1b 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -19,11 +19,13 @@ #include <linux/extable.h> #include <linux/kdebug.h> #include <linux/slab.h> +#include <linux/moduleloader.h> #include <asm/code-patching.h> #include <asm/cacheflush.h> #include <asm/sstep.h> #include <asm/sections.h> #include <asm/inst.h> +#include <asm/set_memory.h> #include <linux/uaccess.h> DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; @@ -103,28 +105,42 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) return addr; } +void *alloc_insn_page(void) +{ + void *page; + + page = module_alloc(PAGE_SIZE); + if (!page) + return NULL; + + if (strict_module_rwx_enabled()) { + set_memory_ro((unsigned long)page, 1); + set_memory_x((unsigned long)page, 1); + } + return page; +} + int arch_prepare_kprobe(struct kprobe *p) { int ret = 0; struct kprobe *prev; - struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->addr); + struct ppc_inst insn = ppc_inst_read(p->addr); if ((unsigned long)p->addr & 0x03) { printk("Attempt to register kprobe at an unaligned address\n"); ret = -EINVAL; - } else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) { - printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n"); + } else if (IS_MTMSRD(insn) || IS_RFID(insn)) { + printk("Cannot register a kprobe on mtmsr[d]/rfi[d]\n"); ret = -EINVAL; } else if ((unsigned long)p->addr & ~PAGE_MASK && - ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)(p->addr - 1)))) { + ppc_inst_prefixed(ppc_inst_read(p->addr - 1))) { printk("Cannot register a kprobe on the second word of prefixed instruction\n"); ret = -EINVAL; } preempt_disable(); prev = get_kprobe(p->addr - 1); preempt_enable_no_resched(); - if (prev && - ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)prev->ainsn.insn))) { + if (prev && ppc_inst_prefixed(ppc_inst_read(prev->ainsn.insn))) { printk("Cannot register a kprobe on the second word of prefixed instruction\n"); ret = -EINVAL; } @@ -138,7 +154,7 @@ int arch_prepare_kprobe(struct kprobe *p) } if (!ret) { - patch_instruction((struct ppc_inst *)p->ainsn.insn, insn); + patch_instruction(p->ainsn.insn, insn); p->opcode = ppc_inst_val(insn); } @@ -149,13 +165,13 @@ NOKPROBE_SYMBOL(arch_prepare_kprobe); void arch_arm_kprobe(struct kprobe *p) { - patch_instruction((struct ppc_inst *)p->addr, ppc_inst(BREAKPOINT_INSTRUCTION)); + WARN_ON_ONCE(patch_instruction(p->addr, ppc_inst(BREAKPOINT_INSTRUCTION))); } NOKPROBE_SYMBOL(arch_arm_kprobe); void arch_disarm_kprobe(struct kprobe *p) { - patch_instruction((struct ppc_inst *)p->addr, ppc_inst(p->opcode)); + WARN_ON_ONCE(patch_instruction(p->addr, ppc_inst(p->opcode))); } NOKPROBE_SYMBOL(arch_disarm_kprobe); @@ -178,7 +194,7 @@ static nokprobe_inline void prepare_singlestep(struct kprobe *p, struct pt_regs * variant as values in regs could play a part in * if the trap is taken or not */ - regs->nip = (unsigned long)p->ainsn.insn; + regs_set_return_ip(regs, (unsigned long)p->ainsn.insn); } static nokprobe_inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) @@ -228,7 +244,7 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe); static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) { int ret; - struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); + struct ppc_inst insn = ppc_inst_read(p->ainsn.insn); /* regs->nip is also adjusted if emulate_step returns 1 */ ret = emulate_step(regs, insn); @@ -319,8 +335,9 @@ int kprobe_handler(struct pt_regs *regs) kprobe_opcode_t insn = *p->ainsn.insn; if (kcb->kprobe_status == KPROBE_HIT_SS && is_trap(insn)) { /* Turn off 'trace' bits */ - regs->msr &= ~MSR_SINGLESTEP; - regs->msr |= kcb->kprobe_saved_msr; + regs_set_return_msr(regs, + (regs->msr & ~MSR_SINGLESTEP) | + kcb->kprobe_saved_msr); goto no_kprobe; } @@ -415,7 +432,7 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * we end up emulating it in kprobe_handler(), which increments the nip * again. */ - regs->nip = orig_ret_address - 4; + regs_set_return_ip(regs, orig_ret_address - 4); regs->link = orig_ret_address; return 0; @@ -439,7 +456,7 @@ int kprobe_post_handler(struct pt_regs *regs) if (!cur || user_mode(regs)) return 0; - len = ppc_inst_len(ppc_inst_read((struct ppc_inst *)cur->ainsn.insn)); + len = ppc_inst_len(ppc_inst_read(cur->ainsn.insn)); /* make sure we got here for instruction we have a kprobe on */ if (((unsigned long)cur->ainsn.insn + len) != regs->nip) return 0; @@ -450,8 +467,8 @@ int kprobe_post_handler(struct pt_regs *regs) } /* Adjust nip to after the single-stepped instruction */ - regs->nip = (unsigned long)cur->addr + len; - regs->msr |= kcb->kprobe_saved_msr; + regs_set_return_ip(regs, (unsigned long)cur->addr + len); + regs_set_return_msr(regs, regs->msr | kcb->kprobe_saved_msr); /*Restore back the original saved kprobes variables and continue. */ if (kcb->kprobe_status == KPROBE_REENTER) { @@ -490,9 +507,11 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) * and allow the page fault handler to continue as a * normal page fault. */ - regs->nip = (unsigned long)cur->addr; - regs->msr &= ~MSR_SINGLESTEP; /* Turn off 'trace' bits */ - regs->msr |= kcb->kprobe_saved_msr; + regs_set_return_ip(regs, (unsigned long)cur->addr); + /* Turn off 'trace' bits */ + regs_set_return_msr(regs, + (regs->msr & ~MSR_SINGLESTEP) | + kcb->kprobe_saved_msr); if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); else @@ -506,7 +525,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) * zero, try to fix up. */ if ((entry = search_exception_tables(regs->nip)) != NULL) { - regs->nip = extable_fixup(entry); + regs_set_return_ip(regs, extable_fixup(entry)); return 1; } diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 15e7b4900689..47a683cd00d2 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -274,7 +274,7 @@ void mce_common_process_ue(struct pt_regs *regs, entry = search_kernel_exception_table(regs->nip); if (entry) { mce_err->ignore_event = true; - regs->nip = extable_fixup(entry); + regs_set_return_ip(regs, extable_fixup(entry)); } } diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 667104d4c455..c2f55fe7092d 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -463,7 +463,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, pfn = addr_to_pfn(regs, regs->nip); if (pfn != ULONG_MAX) { instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK); - instr = ppc_inst_read((struct ppc_inst *)instr_addr); + instr = ppc_inst_read((u32 *)instr_addr); if (!analyse_instr(&op, &tmp, instr)) { pfn = addr_to_pfn(regs, op.ea); *addr = op.ea; @@ -481,12 +481,11 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, return -1; } -static int mce_handle_ierror(struct pt_regs *regs, +static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1, const struct mce_ierror_table table[], struct mce_error_info *mce_err, uint64_t *addr, uint64_t *phys_addr) { - uint64_t srr1 = regs->msr; int handled = 0; int i; @@ -695,19 +694,19 @@ static long mce_handle_ue_error(struct pt_regs *regs, } static long mce_handle_error(struct pt_regs *regs, + unsigned long srr1, const struct mce_derror_table dtable[], const struct mce_ierror_table itable[]) { struct mce_error_info mce_err = { 0 }; uint64_t addr, phys_addr = ULONG_MAX; - uint64_t srr1 = regs->msr; long handled; if (SRR1_MC_LOADSTORE(srr1)) handled = mce_handle_derror(regs, dtable, &mce_err, &addr, &phys_addr); else - handled = mce_handle_ierror(regs, itable, &mce_err, &addr, + handled = mce_handle_ierror(regs, srr1, itable, &mce_err, &addr, &phys_addr); if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE) @@ -723,16 +722,20 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs) /* P7 DD1 leaves top bits of DSISR undefined */ regs->dsisr &= 0x0000ffff; - return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table); + return mce_handle_error(regs, regs->msr, + mce_p7_derror_table, mce_p7_ierror_table); } long __machine_check_early_realmode_p8(struct pt_regs *regs) { - return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table); + return mce_handle_error(regs, regs->msr, + mce_p8_derror_table, mce_p8_ierror_table); } long __machine_check_early_realmode_p9(struct pt_regs *regs) { + unsigned long srr1 = regs->msr; + /* * On POWER9 DD2.1 and below, it's possible to get a machine check * caused by a paste instruction where only DSISR bit 25 is set. This @@ -746,10 +749,39 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs) if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000) return 1; - return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); + /* + * Async machine check due to bad real address from store or foreign + * link time out comes with the load/store bit (PPC bit 42) set in + * SRR1, but the cause comes in SRR1 not DSISR. Clear bit 42 so we're + * directed to the ierror table so it will find the cause (which + * describes it correctly as a store error). + */ + if (SRR1_MC_LOADSTORE(srr1) && + ((srr1 & 0x081c0000) == 0x08140000 || + (srr1 & 0x081c0000) == 0x08180000)) { + srr1 &= ~PPC_BIT(42); + } + + return mce_handle_error(regs, srr1, + mce_p9_derror_table, mce_p9_ierror_table); } long __machine_check_early_realmode_p10(struct pt_regs *regs) { - return mce_handle_error(regs, mce_p10_derror_table, mce_p10_ierror_table); + unsigned long srr1 = regs->msr; + + /* + * Async machine check due to bad real address from store comes with + * the load/store bit (PPC bit 42) set in SRR1, but the cause comes in + * SRR1 not DSISR. Clear bit 42 so we're directed to the ierror table + * so it will find the cause (which describes it correctly as a store + * error). + */ + if (SRR1_MC_LOADSTORE(srr1) && + (srr1 & 0x081c0000) == 0x08140000) { + srr1 &= ~PPC_BIT(42); + } + + return mce_handle_error(regs, srr1, + mce_p10_derror_table, mce_p10_ierror_table); } diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 6a076bef2932..39ab15419592 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -388,9 +388,3 @@ _GLOBAL(start_secondary_resume) bl start_secondary b . #endif /* CONFIG_SMP */ - -/* - * This routine is just here to keep GCC happy - sigh... - */ -_GLOBAL(__main) - blr diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 3f35c8d20be7..ed04a3ba66fe 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -92,12 +92,14 @@ int module_finalize(const Elf_Ehdr *hdr, static __always_inline void * __module_alloc(unsigned long size, unsigned long start, unsigned long end) { + pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC; + /* * Don't do huge page allocations for modules yet until more testing * is done. STRICT_MODULE_RWX may require extra work to support this * too. */ - return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, PAGE_KERNEL_EXEC, + return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, prot, VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index c27b8687b82a..f417afc08d33 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -145,10 +145,9 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr, static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val) { - if (entry->jump[0] != (PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val))) + if (entry->jump[0] != PPC_RAW_LIS(_R12, PPC_HA(val))) return 0; - if (entry->jump[1] != (PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) | - PPC_LO(val))) + if (entry->jump[1] != PPC_RAW_ADDI(_R12, _R12, PPC_LO(val))) return 0; return 1; } @@ -175,16 +174,10 @@ static uint32_t do_plt_call(void *location, entry++; } - /* - * lis r12, sym@ha - * addi r12, r12, sym@l - * mtctr r12 - * bctr - */ - entry->jump[0] = PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val); - entry->jump[1] = PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) | PPC_LO(val); - entry->jump[2] = PPC_INST_MTCTR | __PPC_RS(R12); - entry->jump[3] = PPC_INST_BCTR; + entry->jump[0] = PPC_RAW_LIS(_R12, PPC_HA(val)); + entry->jump[1] = PPC_RAW_ADDI(_R12, _R12, PPC_LO(val)); + entry->jump[2] = PPC_RAW_MTCTR(_R12); + entry->jump[3] = PPC_RAW_BCTR(); pr_debug("Initialized plt for 0x%x at %p\n", val, entry); return (uint32_t)entry; diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index ae2b188365b1..6baa676e7cb6 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -122,27 +122,19 @@ struct ppc64_stub_entry * the stub, but it's significantly shorter to put these values at the * end of the stub code, and patch the stub address (32-bits relative * to the TOC ptr, r2) into the stub. - * - * addis r11,r2, <high> - * addi r11,r11, <low> - * std r2,R2_STACK_OFFSET(r1) - * ld r12,32(r11) - * ld r2,40(r11) - * mtctr r12 - * bctr */ static u32 ppc64_stub_insns[] = { - PPC_INST_ADDIS | __PPC_RT(R11) | __PPC_RA(R2), - PPC_INST_ADDI | __PPC_RT(R11) | __PPC_RA(R11), + PPC_RAW_ADDIS(_R11, _R2, 0), + PPC_RAW_ADDI(_R11, _R11, 0), /* Save current r2 value in magic place on the stack. */ - PPC_INST_STD | __PPC_RS(R2) | __PPC_RA(R1) | R2_STACK_OFFSET, - PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R11) | 32, + PPC_RAW_STD(_R2, _R1, R2_STACK_OFFSET), + PPC_RAW_LD(_R12, _R11, 32), #ifdef PPC64_ELF_ABI_v1 /* Set up new r2 from function descriptor */ - PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R11) | 40, + PPC_RAW_LD(_R2, _R11, 40), #endif - PPC_INST_MTCTR | __PPC_RS(R12), - PPC_INST_BCTR, + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR(), }; /* Count how many different 24-bit relocations (different symbol, @@ -336,21 +328,12 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, #ifdef CONFIG_MPROFILE_KERNEL -#define PACATOC offsetof(struct paca_struct, kernel_toc) - -/* - * ld r12,PACATOC(r13) - * addis r12,r12,<high> - * addi r12,r12,<low> - * mtctr r12 - * bctr - */ static u32 stub_insns[] = { - PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R13) | PACATOC, - PPC_INST_ADDIS | __PPC_RT(R12) | __PPC_RA(R12), - PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12), - PPC_INST_MTCTR | __PPC_RS(R12), - PPC_INST_BCTR, + PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)), + PPC_RAW_ADDIS(_R12, _R12, 0), + PPC_RAW_ADDI(_R12, _R12, 0), + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR(), }; /* @@ -507,7 +490,7 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me) if (!instr_is_relative_link_branch(ppc_inst(*prev_insn))) return 1; - if (*instruction != PPC_INST_NOP) { + if (*instruction != PPC_RAW_NOP()) { pr_err("%s: Expected nop after call, got %08x at %pS\n", me->name, *instruction, instruction); return 0; @@ -696,21 +679,17 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, * ld r2, ...(r12) * add r2, r2, r12 */ - if ((((uint32_t *)location)[0] & ~0xfffc) != - (PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R12))) + if ((((uint32_t *)location)[0] & ~0xfffc) != PPC_RAW_LD(_R2, _R12, 0)) break; - if (((uint32_t *)location)[1] != - (PPC_INST_ADD | __PPC_RT(R2) | __PPC_RA(R2) | __PPC_RB(R12))) + if (((uint32_t *)location)[1] != PPC_RAW_ADD(_R2, _R2, _R12)) break; /* * If found, replace it with: * addis r2, r12, (.TOC.-func)@ha * addi r2, r2, (.TOC.-func)@l */ - ((uint32_t *)location)[0] = PPC_INST_ADDIS | __PPC_RT(R2) | - __PPC_RA(R12) | PPC_HA(value); - ((uint32_t *)location)[1] = PPC_INST_ADDI | __PPC_RT(R2) | - __PPC_RA(R2) | PPC_LO(value); + ((uint32_t *)location)[0] = PPC_RAW_ADDIS(_R2, _R12, PPC_HA(value)); + ((uint32_t *)location)[1] = PPC_RAW_ADDI(_R2, _R2, PPC_LO(value)); break; case R_PPC64_REL16_HA: diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index cdf87086fa33..c79899abcec8 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -18,24 +18,16 @@ #include <asm/ppc-opcode.h> #include <asm/inst.h> -#define TMPL_CALL_HDLR_IDX \ - (optprobe_template_call_handler - optprobe_template_entry) -#define TMPL_EMULATE_IDX \ - (optprobe_template_call_emulate - optprobe_template_entry) -#define TMPL_RET_IDX \ - (optprobe_template_ret - optprobe_template_entry) -#define TMPL_OP_IDX \ - (optprobe_template_op_address - optprobe_template_entry) -#define TMPL_INSN_IDX \ - (optprobe_template_insn - optprobe_template_entry) -#define TMPL_END_IDX \ - (optprobe_template_end - optprobe_template_entry) - -DEFINE_INSN_CACHE_OPS(ppc_optinsn); +#define TMPL_CALL_HDLR_IDX (optprobe_template_call_handler - optprobe_template_entry) +#define TMPL_EMULATE_IDX (optprobe_template_call_emulate - optprobe_template_entry) +#define TMPL_RET_IDX (optprobe_template_ret - optprobe_template_entry) +#define TMPL_OP_IDX (optprobe_template_op_address - optprobe_template_entry) +#define TMPL_INSN_IDX (optprobe_template_insn - optprobe_template_entry) +#define TMPL_END_IDX (optprobe_template_end - optprobe_template_entry) static bool insn_page_in_use; -static void *__ppc_alloc_insn_page(void) +void *alloc_optinsn_page(void) { if (insn_page_in_use) return NULL; @@ -43,20 +35,11 @@ static void *__ppc_alloc_insn_page(void) return &optinsn_slot; } -static void __ppc_free_insn_page(void *page __maybe_unused) +void free_optinsn_page(void *page) { insn_page_in_use = false; } -struct kprobe_insn_cache kprobe_ppc_optinsn_slots = { - .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex), - .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages), - /* insn_size initialized later */ - .alloc = __ppc_alloc_insn_page, - .free = __ppc_free_insn_page, - .nr_garbage = 0, -}; - /* * Check if we can optimize this probe. Returns NIP post-emulation if this can * be optimized and 0 otherwise. @@ -66,6 +49,7 @@ static unsigned long can_optimize(struct kprobe *p) struct pt_regs regs; struct instruction_op op; unsigned long nip = 0; + unsigned long addr = (unsigned long)p->addr; /* * kprobe placed for kretprobe during boot time @@ -73,7 +57,7 @@ static unsigned long can_optimize(struct kprobe *p) * So further checks can be skipped. */ if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) - return (unsigned long)p->addr + sizeof(kprobe_opcode_t); + return addr + sizeof(kprobe_opcode_t); /* * We only support optimizing kernel addresses, but not @@ -81,11 +65,11 @@ static unsigned long can_optimize(struct kprobe *p) * * FIXME: Optimize kprobes placed in module addresses. */ - if (!is_kernel_addr((unsigned long)p->addr)) + if (!is_kernel_addr(addr)) return 0; memset(®s, 0, sizeof(struct pt_regs)); - regs.nip = (unsigned long)p->addr; + regs.nip = addr; regs.trap = 0x0; regs.msr = MSR_KERNEL; @@ -100,9 +84,8 @@ static unsigned long can_optimize(struct kprobe *p) * Ensure that the instruction is not a conditional branch, * and that can be emulated. */ - if (!is_conditional_branch(ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) && - analyse_instr(&op, ®s, - ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) == 1) { + if (!is_conditional_branch(ppc_inst_read(p->ainsn.insn)) && + analyse_instr(&op, ®s, ppc_inst_read(p->ainsn.insn)) == 1) { emulate_update_regs(®s, &op); nip = regs.nip; } @@ -123,7 +106,7 @@ static void optimized_callback(struct optimized_kprobe *op, kprobes_inc_nmissed_count(&op->kp); } else { __this_cpu_write(current_kprobe, &op->kp); - regs->nip = (unsigned long)op->kp.addr; + regs_set_return_ip(regs, (unsigned long)op->kp.addr); get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; opt_pre_handler(&op->kp, regs); __this_cpu_write(current_kprobe, NULL); @@ -136,19 +119,15 @@ NOKPROBE_SYMBOL(optimized_callback); void arch_remove_optimized_kprobe(struct optimized_kprobe *op) { if (op->optinsn.insn) { - free_ppc_optinsn_slot(op->optinsn.insn, 1); + free_optinsn_slot(op->optinsn.insn, 1); op->optinsn.insn = NULL; } } static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) { - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_RAW_LIS(reg, IMM_H(val)))); - addr++; - - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_RAW_ORI(reg, reg, IMM_L(val)))); + patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(reg, PPC_HI(val)))); + patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_LO(val)))); } /* @@ -157,34 +136,11 @@ static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t * */ static void patch_imm64_load_insns(unsigned long long val, int reg, kprobe_opcode_t *addr) { - /* lis reg,(op)@highest */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ADDIS | ___PPC_RT(reg) | - ((val >> 48) & 0xffff))); - addr++; - - /* ori reg,reg,(op)@higher */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | - ___PPC_RS(reg) | ((val >> 32) & 0xffff))); - addr++; - - /* rldicr reg,reg,32,31 */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_RLDICR | ___PPC_RA(reg) | - ___PPC_RS(reg) | __PPC_SH64(32) | __PPC_ME64(31))); - addr++; - - /* oris reg,reg,(op)@h */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ORIS | ___PPC_RA(reg) | - ___PPC_RS(reg) | ((val >> 16) & 0xffff))); - addr++; - - /* ori reg,reg,(op)@l */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | - ___PPC_RS(reg) | (val & 0xffff))); + patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(reg, PPC_HIGHEST(val)))); + patch_instruction(addr++, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_HIGHER(val)))); + patch_instruction(addr++, ppc_inst(PPC_RAW_SLDI(reg, reg, 32))); + patch_instruction(addr++, ppc_inst(PPC_RAW_ORIS(reg, reg, PPC_HI(val)))); + patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_LO(val)))); } static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) @@ -198,19 +154,18 @@ static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *ad int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) { struct ppc_inst branch_op_callback, branch_emulate_step, temp; - kprobe_opcode_t *op_callback_addr, *emulate_step_addr, *buff; + unsigned long op_callback_addr, emulate_step_addr; + kprobe_opcode_t *buff; long b_offset; unsigned long nip, size; int rc, i; - kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; - nip = can_optimize(p); if (!nip) return -EILSEQ; /* Allocate instruction slot for detour buffer */ - buff = get_ppc_optinsn_slot(); + buff = get_optinsn_slot(); if (!buff) return -ENOMEM; @@ -228,8 +183,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) goto error; /* Check if the return address is also within 32MB range */ - b_offset = (unsigned long)(buff + TMPL_RET_IDX) - - (unsigned long)nip; + b_offset = (unsigned long)(buff + TMPL_RET_IDX) - nip; if (!is_offset_in_branch_range(b_offset)) goto error; @@ -238,8 +192,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) size = (TMPL_END_IDX * sizeof(kprobe_opcode_t)) / sizeof(int); pr_devel("Copying template to %p, size %lu\n", buff, size); for (i = 0; i < size; i++) { - rc = patch_instruction((struct ppc_inst *)(buff + i), - ppc_inst(*(optprobe_template_entry + i))); + rc = patch_instruction(buff + i, ppc_inst(*(optprobe_template_entry + i))); if (rc < 0) goto error; } @@ -253,51 +206,48 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) /* * 2. branch to optimized_callback() and emulate_step() */ - op_callback_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("optimized_callback"); - emulate_step_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("emulate_step"); + op_callback_addr = ppc_kallsyms_lookup_name("optimized_callback"); + emulate_step_addr = ppc_kallsyms_lookup_name("emulate_step"); if (!op_callback_addr || !emulate_step_addr) { WARN(1, "Unable to lookup optimized_callback()/emulate_step()\n"); goto error; } - rc = create_branch(&branch_op_callback, - (struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), - (unsigned long)op_callback_addr, - BRANCH_SET_LINK); + rc = create_branch(&branch_op_callback, buff + TMPL_CALL_HDLR_IDX, + op_callback_addr, BRANCH_SET_LINK); - rc |= create_branch(&branch_emulate_step, - (struct ppc_inst *)(buff + TMPL_EMULATE_IDX), - (unsigned long)emulate_step_addr, - BRANCH_SET_LINK); + rc |= create_branch(&branch_emulate_step, buff + TMPL_EMULATE_IDX, + emulate_step_addr, BRANCH_SET_LINK); if (rc) goto error; - patch_instruction((struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), - branch_op_callback); - patch_instruction((struct ppc_inst *)(buff + TMPL_EMULATE_IDX), - branch_emulate_step); + patch_instruction(buff + TMPL_CALL_HDLR_IDX, branch_op_callback); + patch_instruction(buff + TMPL_EMULATE_IDX, branch_emulate_step); /* * 3. load instruction to be emulated into relevant register, and */ - temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); - patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); + if (IS_ENABLED(CONFIG_PPC64)) { + temp = ppc_inst_read(p->ainsn.insn); + patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); + } else { + patch_imm_load_insns((unsigned long)p->ainsn.insn, 4, buff + TMPL_INSN_IDX); + } /* * 4. branch back from trampoline */ - patch_branch((struct ppc_inst *)(buff + TMPL_RET_IDX), (unsigned long)nip, 0); + patch_branch(buff + TMPL_RET_IDX, nip, 0); - flush_icache_range((unsigned long)buff, - (unsigned long)(&buff[TMPL_END_IDX])); + flush_icache_range((unsigned long)buff, (unsigned long)(&buff[TMPL_END_IDX])); op->optinsn.insn = buff; return 0; error: - free_ppc_optinsn_slot(buff, 0); + free_optinsn_slot(buff, 0); return -ERANGE; } @@ -328,12 +278,9 @@ void arch_optimize_kprobes(struct list_head *oplist) * Backup instructions which will be replaced * by jump address */ - memcpy(op->optinsn.copied_insn, op->kp.addr, - RELATIVEJUMP_SIZE); - create_branch(&instr, - (struct ppc_inst *)op->kp.addr, - (unsigned long)op->optinsn.insn, 0); - patch_instruction((struct ppc_inst *)op->kp.addr, instr); + memcpy(op->optinsn.copied_insn, op->kp.addr, RELATIVEJUMP_SIZE); + create_branch(&instr, op->kp.addr, (unsigned long)op->optinsn.insn, 0); + patch_instruction(op->kp.addr, instr); list_del_init(&op->list); } } @@ -343,8 +290,7 @@ void arch_unoptimize_kprobe(struct optimized_kprobe *op) arch_arm_kprobe(&op->kp); } -void arch_unoptimize_kprobes(struct list_head *oplist, - struct list_head *done_list) +void arch_unoptimize_kprobes(struct list_head *oplist, struct list_head *done_list) { struct optimized_kprobe *op; struct optimized_kprobe *tmp; @@ -355,8 +301,7 @@ void arch_unoptimize_kprobes(struct list_head *oplist, } } -int arch_within_optimized_kprobe(struct optimized_kprobe *op, - unsigned long addr) +int arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) { return ((unsigned long)op->kp.addr <= addr && (unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 7f5aae3c387d..9bd30cac852b 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -346,10 +346,8 @@ void copy_mm_to_paca(struct mm_struct *mm) #ifdef CONFIG_PPC_BOOK3S mm_context_t *context = &mm->context; - get_paca()->mm_ctx_id = context->id; #ifdef CONFIG_PPC_MM_SLICES VM_BUG_ON(!mm_ctx_slb_addr_limit(context)); - get_paca()->mm_ctx_slb_addr_limit = mm_ctx_slb_addr_limit(context); memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context), LOW_SLICE_ARRAY_SZ); memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context), diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8935c5696bce..185beb290580 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -96,7 +96,8 @@ static void check_if_tm_restore_required(struct task_struct *tsk) if (tsk == current && tsk->thread.regs && MSR_TM_ACTIVE(tsk->thread.regs->msr) && !test_thread_flag(TIF_RESTORE_TM)) { - tsk->thread.ckpt_regs.msr = tsk->thread.regs->msr; + regs_set_return_msr(&tsk->thread.ckpt_regs, + tsk->thread.regs->msr); set_thread_flag(TIF_RESTORE_TM); } } @@ -161,7 +162,7 @@ static void __giveup_fpu(struct task_struct *tsk) msr &= ~(MSR_FP|MSR_FE0|MSR_FE1); if (cpu_has_feature(CPU_FTR_VSX)) msr &= ~MSR_VSX; - tsk->thread.regs->msr = msr; + regs_set_return_msr(tsk->thread.regs, msr); } void giveup_fpu(struct task_struct *tsk) @@ -244,7 +245,7 @@ static void __giveup_altivec(struct task_struct *tsk) msr &= ~MSR_VEC; if (cpu_has_feature(CPU_FTR_VSX)) msr &= ~MSR_VSX; - tsk->thread.regs->msr = msr; + regs_set_return_msr(tsk->thread.regs, msr); } void giveup_altivec(struct task_struct *tsk) @@ -559,7 +560,7 @@ void notrace restore_math(struct pt_regs *regs) msr_check_and_clear(new_msr); - regs->msr |= new_msr | fpexc_mode; + regs_set_return_msr(regs, regs->msr | new_msr | fpexc_mode); } } #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -1114,7 +1115,7 @@ void restore_tm_state(struct pt_regs *regs) #endif restore_math(regs); - regs->msr |= msr_diff; + regs_set_return_msr(regs, regs->msr | msr_diff); } #else /* !CONFIG_PPC_TRANSACTIONAL_MEM */ @@ -1129,6 +1130,10 @@ static inline void save_sprs(struct thread_struct *t) if (cpu_has_feature(CPU_FTR_ALTIVEC)) t->vrsave = mfspr(SPRN_VRSAVE); #endif +#ifdef CONFIG_SPE + if (cpu_has_feature(CPU_FTR_SPE)) + t->spefscr = mfspr(SPRN_SPEFSCR); +#endif #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_DSCR)) t->dscr = mfspr(SPRN_DSCR); @@ -1159,6 +1164,11 @@ static inline void restore_sprs(struct thread_struct *old_thread, old_thread->vrsave != new_thread->vrsave) mtspr(SPRN_VRSAVE, new_thread->vrsave); #endif +#ifdef CONFIG_SPE + if (cpu_has_feature(CPU_FTR_SPE) && + old_thread->spefscr != new_thread->spefscr) + mtspr(SPRN_SPEFSCR, new_thread->spefscr); +#endif #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_DSCR)) { u64 dscr = get_paca()->dscr_default; @@ -1213,6 +1223,19 @@ struct task_struct *__switch_to(struct task_struct *prev, __flush_tlb_pending(batch); batch->active = 0; } + + /* + * On POWER9 the copy-paste buffer can only paste into + * foreign real addresses, so unprivileged processes can not + * see the data or use it in any way unless they have + * foreign real mappings. If the new process has the foreign + * real address mappings, we must issue a cp_abort to clear + * any state and prevent snooping, corruption or a covert + * channel. ISA v3.1 supports paste into local memory. + */ + if (new->mm && (cpu_has_feature(CPU_FTR_ARCH_31) || + atomic_read(&new->mm->context.vas_windows))) + asm volatile(PPC_CP_ABORT); #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -1248,43 +1271,48 @@ struct task_struct *__switch_to(struct task_struct *prev, } /* - * Call restore_sprs() before calling _switch(). If we move it after - * _switch() then we miss out on calling it for new tasks. The reason - * for this is we manually create a stack frame for new tasks that - * directly returns through ret_from_fork() or + * Call restore_sprs() and set_return_regs_changed() before calling + * _switch(). If we move it after _switch() then we miss out on calling + * it for new tasks. The reason for this is we manually create a stack + * frame for new tasks that directly returns through ret_from_fork() or * ret_from_kernel_thread(). See copy_thread() for details. */ restore_sprs(old_thread, new_thread); + set_return_regs_changed(); /* _switch changes stack (and regs) */ + #ifdef CONFIG_PPC32 kuap_assert_locked(); #endif last = _switch(old_thread, new_thread); + /* + * Nothing after _switch will be run for newly created tasks, + * because they switch directly to ret_from_fork/ret_from_kernel_thread + * etc. Code added here should have a comment explaining why that is + * okay. + */ + #ifdef CONFIG_PPC_BOOK3S_64 + /* + * This applies to a process that was context switched while inside + * arch_enter_lazy_mmu_mode(), to re-activate the batch that was + * deactivated above, before _switch(). This will never be the case + * for new tasks. + */ if (current_thread_info()->local_flags & _TLF_LAZY_MMU) { current_thread_info()->local_flags &= ~_TLF_LAZY_MMU; batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } - if (current->thread.regs) { + /* + * Math facilities are masked out of the child MSR in copy_thread. + * A new task does not need to restore_math because it will + * demand fault them. + */ + if (current->thread.regs) restore_math(current->thread.regs); - - /* - * On POWER9 the copy-paste buffer can only paste into - * foreign real addresses, so unprivileged processes can not - * see the data or use it in any way unless they have - * foreign real mappings. If the new process has the foreign - * real address mappings, we must issue a cp_abort to clear - * any state and prevent snooping, corruption or a covert - * channel. ISA v3.1 supports paste into local memory. - */ - if (current->mm && - (cpu_has_feature(CPU_FTR_ARCH_31) || - atomic_read(¤t->mm->context.vas_windows))) - asm volatile(PPC_CP_ABORT); - } #endif /* CONFIG_PPC_BOOK3S_64 */ return last; @@ -1736,6 +1764,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, #ifdef CONFIG_ALTIVEC p->thread.vr_save_area = NULL; #endif +#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) + p->thread.kuap = KUAP_NONE; +#endif setup_ksp_vsid(p, sp); @@ -1838,13 +1869,14 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) } regs->gpr[2] = toc; } - regs->nip = entry; - regs->msr = MSR_USER64; + regs_set_return_ip(regs, entry); + regs_set_return_msr(regs, MSR_USER64); } else { - regs->nip = start; regs->gpr[2] = 0; - regs->msr = MSR_USER32; + regs_set_return_ip(regs, start); + regs_set_return_msr(regs, MSR_USER32); } + #endif #ifdef CONFIG_VSX current->thread.used_vsr = 0; @@ -1875,7 +1907,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.tm_tfiar = 0; current->thread.load_tm = 0; #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - } EXPORT_SYMBOL(start_thread); @@ -1923,9 +1954,10 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val) if (val > PR_FP_EXC_PRECISE) return -EINVAL; tsk->thread.fpexc_mode = __pack_fe01(val); - if (regs != NULL && (regs->msr & MSR_FP) != 0) - regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1)) - | tsk->thread.fpexc_mode; + if (regs != NULL && (regs->msr & MSR_FP) != 0) { + regs_set_return_msr(regs, (regs->msr & ~(MSR_FE0|MSR_FE1)) + | tsk->thread.fpexc_mode); + } return 0; } @@ -1971,9 +2003,9 @@ int set_endian(struct task_struct *tsk, unsigned int val) return -EINVAL; if (val == PR_ENDIAN_BIG) - regs->msr &= ~MSR_LE; + regs_set_return_msr(regs, regs->msr & ~MSR_LE); else if (val == PR_ENDIAN_LITTLE || val == PR_ENDIAN_PPC_LITTLE) - regs->msr |= MSR_LE; + regs_set_return_msr(regs, regs->msr | MSR_LE); else return -EINVAL; @@ -2121,8 +2153,9 @@ unsigned long get_wchan(struct task_struct *p) static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH; -void show_stack(struct task_struct *tsk, unsigned long *stack, - const char *loglvl) +void __no_sanitize_address show_stack(struct task_struct *tsk, + unsigned long *stack, + const char *loglvl) { unsigned long sp, ip, lr, newsp; int count = 0; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index fbe9deebc8e1..f620e04dc9bf 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -758,7 +758,7 @@ void __init early_init_devtree(void *params) first_memblock_size = min_t(u64, first_memblock_size, memory_limit); setup_initial_memory_limit(memstart_addr, first_memblock_size); /* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */ - memblock_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START); + memblock_reserve(PHYSICAL_START, __pa(_end) - PHYSICAL_START); /* If relocatable, reserve first 32k for interrupt vectors etc. */ if (PHYSICAL_START > MEMORY_START) memblock_reserve(MEMORY_START, 0x8000); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 41ed7e33d897..a5bf355ce1d6 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -27,10 +27,12 @@ #include <linux/initrd.h> #include <linux/bitops.h> #include <linux/pgtable.h> +#include <linux/printk.h> #include <asm/prom.h> #include <asm/rtas.h> #include <asm/page.h> #include <asm/processor.h> +#include <asm/interrupt.h> #include <asm/irq.h> #include <asm/io.h> #include <asm/smp.h> @@ -242,13 +244,31 @@ static int __init prom_strcmp(const char *cs, const char *ct) return 0; } -static char __init *prom_strcpy(char *dest, const char *src) +static ssize_t __init prom_strscpy_pad(char *dest, const char *src, size_t n) { - char *tmp = dest; + ssize_t rc; + size_t i; - while ((*dest++ = *src++) != '\0') - /* nothing */; - return tmp; + if (n == 0 || n > INT_MAX) + return -E2BIG; + + // Copy up to n bytes + for (i = 0; i < n && src[i] != '\0'; i++) + dest[i] = src[i]; + + rc = i; + + // If we copied all n then we have run out of space for the nul + if (rc == n) { + // Rewind by one character to ensure nul termination + i--; + rc = -E2BIG; + } + + for (; i < n; i++) + dest[i] = '\0'; + + return rc; } static int __init prom_strncmp(const char *cs, const char *ct, size_t count) @@ -701,13 +721,12 @@ static int __init prom_setprop(phandle node, const char *nodename, } /* We can't use the standard versions because of relocation headaches. */ -#define isxdigit(c) (('0' <= (c) && (c) <= '9') \ - || ('a' <= (c) && (c) <= 'f') \ - || ('A' <= (c) && (c) <= 'F')) +#define prom_isxdigit(c) \ + (('0' <= (c) && (c) <= '9') || ('a' <= (c) && (c) <= 'f') || ('A' <= (c) && (c) <= 'F')) -#define isdigit(c) ('0' <= (c) && (c) <= '9') -#define islower(c) ('a' <= (c) && (c) <= 'z') -#define toupper(c) (islower(c) ? ((c) - 'a' + 'A') : (c)) +#define prom_isdigit(c) ('0' <= (c) && (c) <= '9') +#define prom_islower(c) ('a' <= (c) && (c) <= 'z') +#define prom_toupper(c) (prom_islower(c) ? ((c) - 'a' + 'A') : (c)) static unsigned long prom_strtoul(const char *cp, const char **endp) { @@ -716,14 +735,14 @@ static unsigned long prom_strtoul(const char *cp, const char **endp) if (*cp == '0') { base = 8; cp++; - if (toupper(*cp) == 'X') { + if (prom_toupper(*cp) == 'X') { cp++; base = 16; } } - while (isxdigit(*cp) && - (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) < base) { + while (prom_isxdigit(*cp) && + (value = prom_isdigit(*cp) ? *cp - '0' : prom_toupper(*cp) - 'A' + 10) < base) { result = result * base + value; cp++; } @@ -927,6 +946,10 @@ struct option_vector6 { u8 os_name; } __packed; +struct option_vector7 { + u8 os_id[256]; +} __packed; + struct ibm_arch_vec { struct { u32 mask, val; } pvrs[14]; @@ -949,6 +972,9 @@ struct ibm_arch_vec { u8 vec6_len; struct option_vector6 vec6; + + u8 vec7_len; + struct option_vector7 vec7; } __packed; static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { @@ -1095,6 +1121,9 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { .secondary_pteg = 0, .os_name = OV6_LINUX, }, + + /* option vector 7: OS Identification */ + .vec7_len = VECTOR_LENGTH(sizeof(struct option_vector7)), }; static struct ibm_arch_vec __prombss ibm_architecture_vec ____cacheline_aligned; @@ -1323,6 +1352,8 @@ static void __init prom_check_platform_support(void) memcpy(&ibm_architecture_vec, &ibm_architecture_vec_template, sizeof(ibm_architecture_vec)); + prom_strscpy_pad(ibm_architecture_vec.vec7.os_id, linux_banner, 256); + if (prop_len > 1) { int i; u8 vec[8]; @@ -1762,6 +1793,8 @@ static int prom_rtas_hcall(uint64_t args) asm volatile("sc 1\n" : "=r" (arg1) : "r" (arg1), "r" (arg2) :); + srr_regs_clobbered(); + return arg1; } @@ -2702,7 +2735,7 @@ static void __init flatten_device_tree(void) /* Add "phandle" in there, we'll need it */ namep = make_room(&mem_start, &mem_end, 16, 1); - prom_strcpy(namep, "phandle"); + prom_strscpy_pad(namep, "phandle", sizeof("phandle")); mem_start = (unsigned long)namep + prom_strlen(namep) + 1; /* Build string array */ @@ -3210,54 +3243,6 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4) #endif /* CONFIG_BLK_DEV_INITRD */ } -#ifdef CONFIG_PPC64 -#ifdef CONFIG_RELOCATABLE -static void reloc_toc(void) -{ -} - -static void unreloc_toc(void) -{ -} -#else -static void __reloc_toc(unsigned long offset, unsigned long nr_entries) -{ - unsigned long i; - unsigned long *toc_entry; - - /* Get the start of the TOC by using r2 directly. */ - asm volatile("addi %0,2,-0x8000" : "=b" (toc_entry)); - - for (i = 0; i < nr_entries; i++) { - *toc_entry = *toc_entry + offset; - toc_entry++; - } -} - -static void reloc_toc(void) -{ - unsigned long offset = reloc_offset(); - unsigned long nr_entries = - (__prom_init_toc_end - __prom_init_toc_start) / sizeof(long); - - __reloc_toc(offset, nr_entries); - - mb(); -} - -static void unreloc_toc(void) -{ - unsigned long offset = reloc_offset(); - unsigned long nr_entries = - (__prom_init_toc_end - __prom_init_toc_start) / sizeof(long); - - mb(); - - __reloc_toc(-offset, nr_entries); -} -#endif -#endif - #ifdef CONFIG_PPC_SVM /* * Perform the Enter Secure Mode ultracall. @@ -3291,14 +3276,12 @@ static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt) * relocated it so the check will fail. Restore the original image by * relocating it back to the kernel virtual base address. */ - if (IS_ENABLED(CONFIG_RELOCATABLE)) - relocate(KERNELBASE); + relocate(KERNELBASE); ret = enter_secure_mode(kbase, fdt); /* Relocate the kernel again. */ - if (IS_ENABLED(CONFIG_RELOCATABLE)) - relocate(kbase); + relocate(kbase); if (ret != U_SUCCESS) { prom_printf("Returned %d from switching to secure mode.\n", ret); @@ -3326,8 +3309,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, #ifdef CONFIG_PPC32 unsigned long offset = reloc_offset(); reloc_got2(offset); -#else - reloc_toc(); #endif /* @@ -3504,8 +3485,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, #ifdef CONFIG_PPC32 reloc_got2(-offset); -#else - unreloc_toc(); #endif /* Move to secure memory if we're supposed to be secure guests. */ diff --git a/arch/powerpc/kernel/ptrace/ptrace-adv.c b/arch/powerpc/kernel/ptrace/ptrace-adv.c index 3990c01ef8cf..399f5d94a3df 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-adv.c +++ b/arch/powerpc/kernel/ptrace/ptrace-adv.c @@ -12,7 +12,7 @@ void user_enable_single_step(struct task_struct *task) if (regs != NULL) { task->thread.debug.dbcr0 &= ~DBCR0_BT; task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC; - regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); } set_tsk_thread_flag(task, TIF_SINGLESTEP); } @@ -24,7 +24,7 @@ void user_enable_block_step(struct task_struct *task) if (regs != NULL) { task->thread.debug.dbcr0 &= ~DBCR0_IC; task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT; - regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); } set_tsk_thread_flag(task, TIF_SINGLESTEP); } @@ -50,7 +50,7 @@ void user_disable_single_step(struct task_struct *task) * All debug events were off..... */ task->thread.debug.dbcr0 &= ~DBCR0_IDM; - regs->msr &= ~MSR_DE; + regs_set_return_msr(regs, regs->msr & ~MSR_DE); } } clear_tsk_thread_flag(task, TIF_SINGLESTEP); @@ -82,6 +82,7 @@ int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) { + struct pt_regs *regs = task->thread.regs; #ifdef CONFIG_HAVE_HW_BREAKPOINT int ret; struct thread_struct *thread = &task->thread; @@ -112,7 +113,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned l dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W); if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, task->thread.debug.dbcr1)) { - task->thread.regs->msr &= ~MSR_DE; + regs_set_return_msr(regs, regs->msr & ~MSR_DE); task->thread.debug.dbcr0 &= ~DBCR0_IDM; } return 0; @@ -132,7 +133,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned l dbcr_dac(task) |= DBCR_DAC1R; if (data & 0x2UL) dbcr_dac(task) |= DBCR_DAC1W; - task->thread.regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); return 0; } @@ -220,7 +221,7 @@ static long set_instruction_bp(struct task_struct *child, } out: child->thread.debug.dbcr0 |= DBCR0_IDM; - child->thread.regs->msr |= MSR_DE; + regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE); return slot; } @@ -336,7 +337,7 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) return -ENOSPC; } child->thread.debug.dbcr0 |= DBCR0_IDM; - child->thread.regs->msr |= MSR_DE; + regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE); return slot + 4; } @@ -430,7 +431,7 @@ static int set_dac_range(struct task_struct *child, child->thread.debug.dbcr2 |= DBCR2_DAC12MX; else /* PPC_BREAKPOINT_MODE_MASK */ child->thread.debug.dbcr2 |= DBCR2_DAC12MM; - child->thread.regs->msr |= MSR_DE; + regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE); return 5; } @@ -485,7 +486,8 @@ long ppc_del_hwdebug(struct task_struct *child, long data) if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0, child->thread.debug.dbcr1)) { child->thread.debug.dbcr0 &= ~DBCR0_IDM; - child->thread.regs->msr &= ~MSR_DE; + regs_set_return_msr(child->thread.regs, + child->thread.regs->msr & ~MSR_DE); } } return rc; diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c index aa36fcad36cd..a5dd7d2e2c9e 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c +++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c @@ -11,10 +11,8 @@ void user_enable_single_step(struct task_struct *task) { struct pt_regs *regs = task->thread.regs; - if (regs != NULL) { - regs->msr &= ~MSR_BE; - regs->msr |= MSR_SE; - } + if (regs != NULL) + regs_set_return_msr(regs, (regs->msr & ~MSR_BE) | MSR_SE); set_tsk_thread_flag(task, TIF_SINGLESTEP); } @@ -22,10 +20,8 @@ void user_enable_block_step(struct task_struct *task) { struct pt_regs *regs = task->thread.regs; - if (regs != NULL) { - regs->msr &= ~MSR_SE; - regs->msr |= MSR_BE; - } + if (regs != NULL) + regs_set_return_msr(regs, (regs->msr & ~MSR_SE) | MSR_BE); set_tsk_thread_flag(task, TIF_SINGLESTEP); } @@ -34,7 +30,7 @@ void user_disable_single_step(struct task_struct *task) struct pt_regs *regs = task->thread.regs; if (regs != NULL) - regs->msr &= ~(MSR_SE | MSR_BE); + regs_set_return_msr(regs, regs->msr & ~(MSR_SE | MSR_BE)); clear_tsk_thread_flag(task, TIF_SINGLESTEP); } diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 773bcc4ca843..b8be1d6668b5 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -113,8 +113,9 @@ static unsigned long get_user_msr(struct task_struct *task) static __always_inline int set_user_msr(struct task_struct *task, unsigned long msr) { - task->thread.regs->msr &= ~MSR_DEBUGCHANGE; - task->thread.regs->msr |= msr & MSR_DEBUGCHANGE; + unsigned long newmsr = (task->thread.regs->msr & ~MSR_DEBUGCHANGE) | + (msr & MSR_DEBUGCHANGE); + regs_set_return_msr(task->thread.regs, newmsr); return 0; } diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c index a28239b8b0c0..33c07c8af6c8 100644 --- a/arch/powerpc/kernel/rtas-rtc.c +++ b/arch/powerpc/kernel/rtas-rtc.c @@ -12,7 +12,7 @@ #define MAX_RTC_WAIT 5000 /* 5 sec */ -#define RTAS_CLOCK_BUSY (-2) + time64_t __init rtas_get_boot_time(void) { int ret[8]; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 6bada744402b..99f2cce635fb 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -25,6 +25,7 @@ #include <linux/reboot.h> #include <linux/syscalls.h> +#include <asm/interrupt.h> #include <asm/prom.h> #include <asm/rtas.h> #include <asm/hvcall.h> @@ -46,6 +47,13 @@ /* This is here deliberately so it's only used in this file */ void enter_rtas(unsigned long); +static inline void do_enter_rtas(unsigned long args) +{ + enter_rtas(args); + + srr_regs_clobbered(); /* rtas uses SRRs, invalidate */ +} + struct rtas_t rtas = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; @@ -384,7 +392,7 @@ static char *__fetch_rtas_last_error(char *altbuf) save_args = rtas.args; rtas.args = err_args; - enter_rtas(__pa(&rtas.args)); + do_enter_rtas(__pa(&rtas.args)); err_args = rtas.args; rtas.args = save_args; @@ -430,7 +438,7 @@ va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, for (i = 0; i < nret; ++i) args->rets[i] = 0; - enter_rtas(__pa(args)); + do_enter_rtas(__pa(args)); } void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...) @@ -1138,7 +1146,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) flags = lock_rtas(); rtas.args = args; - enter_rtas(__pa(&rtas.args)); + do_enter_rtas(__pa(&rtas.args)); args = rtas.args; /* A -1 return code indicates that the last command couldn't diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index c17d1c9362b5..cc51fa52e783 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -300,9 +300,7 @@ static void stf_barrier_enable(bool enable) void setup_stf_barrier(void) { enum stf_barrier_type type; - bool enable, hv; - - hv = cpu_has_feature(CPU_FTR_HVMODE); + bool enable; /* Default to fallback in case fw-features are not available */ if (cpu_has_feature(CPU_FTR_ARCH_300)) @@ -315,8 +313,7 @@ void setup_stf_barrier(void) type = STF_BARRIER_NONE; enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && - (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || - (security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv)); + security_ftr_enabled(SEC_FTR_STF_BARRIER); if (type == STF_BARRIER_FALLBACK) { pr_info("stf-barrier: fallback barrier available\n"); @@ -439,8 +436,8 @@ static void update_branch_cache_flush(void) site2 = &patch__call_kvm_flush_link_stack_p9; // This controls the branch from guest_exit_cont to kvm_flush_link_stack if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) { - patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); - patch_instruction_site(site2, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(site, ppc_inst(PPC_RAW_NOP())); + patch_instruction_site(site2, ppc_inst(PPC_RAW_NOP())); } else { // Could use HW flush, but that could also flush count cache patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); @@ -450,11 +447,11 @@ static void update_branch_cache_flush(void) // Patch out the bcctr first, then nop the rest site = &patch__call_flush_branch_caches3; - patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(site, ppc_inst(PPC_RAW_NOP())); site = &patch__call_flush_branch_caches2; - patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(site, ppc_inst(PPC_RAW_NOP())); site = &patch__call_flush_branch_caches1; - patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(site, ppc_inst(PPC_RAW_NOP())); // This controls the branch from _switch to flush_branch_caches if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE && @@ -477,12 +474,12 @@ static void update_branch_cache_flush(void) // If we just need to flush the link stack, early return if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) { patch_instruction_site(&patch__flush_link_stack_return, - ppc_inst(PPC_INST_BLR)); + ppc_inst(PPC_RAW_BLR())); // If we have flush instruction, early return } else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW) { patch_instruction_site(&patch__flush_count_cache_return, - ppc_inst(PPC_INST_BLR)); + ppc_inst(PPC_RAW_BLR())); } } } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 046fe21b5c3b..26328fd2990c 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -92,8 +92,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid); int dcache_bsize; int icache_bsize; -unsigned long klimit = (unsigned long) _end; - /* * This still seems to be needed... -- paulus */ @@ -931,7 +929,7 @@ void __init setup_arch(char **cmdline_p) init_mm.start_code = (unsigned long)_stext; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; - init_mm.brk = klimit; + init_mm.brk = (unsigned long)_end; mm_iommu_init(&init_mm); irqstack_early_init(); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index d7c1f92152af..7ec5c47fce0e 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -74,7 +74,7 @@ EXPORT_SYMBOL(DMA_MODE_WRITE); */ notrace void __init machine_init(u64 dt_ptr) { - struct ppc_inst *addr = (struct ppc_inst *)patch_site_addr(&patch__memset_nocache); + u32 *addr = (u32 *)patch_site_addr(&patch__memset_nocache); struct ppc_inst insn; /* Configure static keys first, now that we're relocated. */ @@ -85,7 +85,7 @@ notrace void __init machine_init(u64 dt_ptr) /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); - patch_instruction_site(&patch__memcpy_nocache, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(&patch__memcpy_nocache, ppc_inst(PPC_RAW_NOP())); create_cond_branch(&insn, addr, branch_target(addr), 0x820000); patch_instruction(addr, insn); /* replace b by bne cr0 */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index a35fbf4d0bce..1ff258f6c76c 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -33,6 +33,7 @@ #include <linux/pgtable.h> #include <asm/debugfs.h> +#include <asm/kvm_guest.h> #include <asm/io.h> #include <asm/kdump.h> #include <asm/prom.h> @@ -939,16 +940,20 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh) * disable it by default. Book3S has a soft-nmi hardlockup detector based * on the decrementer interrupt, so it does not suffer from this problem. * - * It is likely to get false positives in VM guests, so disable it there - * by default too. + * It is likely to get false positives in KVM guests, so disable it there + * by default too. PowerVM will not stop or arbitrarily oversubscribe + * CPUs, but give a minimum regular allotment even with SPLPAR, so enable + * the detector for non-KVM guests, assume PowerVM. */ static int __init disable_hardlockup_detector(void) { #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF hardlockup_detector_disable(); #else - if (firmware_has_feature(FW_FEATURE_LPAR)) - hardlockup_detector_disable(); + if (firmware_has_feature(FW_FEATURE_LPAR)) { + if (is_kvm_guest()) + hardlockup_detector_disable(); + } #endif return 0; diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 9ded046edb0e..e600764a926c 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -214,7 +214,7 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, regs->gpr[0] = __NR_restart_syscall; else regs->gpr[3] = regs->orig_gpr3; - regs->nip -= 4; + regs_add_return_ip(regs, -4); regs->result = 0; } else { if (trap_is_scv(regs)) { @@ -322,16 +322,16 @@ static unsigned long get_tm_stackpointer(struct task_struct *tsk) * For signals taken in non-TM or suspended mode, we use the * normal/non-checkpointed stack pointer. */ - - unsigned long ret = tsk->thread.regs->gpr[1]; + struct pt_regs *regs = tsk->thread.regs; + unsigned long ret = regs->gpr[1]; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BUG_ON(tsk != current); - if (MSR_TM_ACTIVE(tsk->thread.regs->msr)) { + if (MSR_TM_ACTIVE(regs->msr)) { preempt_disable(); tm_reclaim_current(TM_CAUSE_SIGNAL); - if (MSR_TM_TRANSACTIONAL(tsk->thread.regs->msr)) + if (MSR_TM_TRANSACTIONAL(regs->msr)) ret = tsk->thread.ckpt_regs.gpr[1]; /* @@ -341,7 +341,7 @@ static unsigned long get_tm_stackpointer(struct task_struct *tsk) * (tm_recheckpoint_new_task() would recheckpoint). Besides, we * enter the signal handler in non-transactional state. */ - tsk->thread.regs->msr &= ~MSR_TS_MASK; + regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK); preempt_enable(); } #endif diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 8f05ed0da292..0608581967f0 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -354,14 +354,8 @@ static void prepare_save_tm_user_regs(void) { WARN_ON(tm_suspend_disabled); -#ifdef CONFIG_ALTIVEC if (cpu_has_feature(CPU_FTR_ALTIVEC)) current->thread.ckvrsave = mfspr(SPRN_VRSAVE); -#endif -#ifdef CONFIG_SPE - if (current->thread.used_spe) - flush_spe_to_thread(current); -#endif } static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, @@ -379,7 +373,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user */ unsafe_put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR], failed); -#ifdef CONFIG_ALTIVEC /* save altivec registers */ if (current->thread.used_vr) { unsafe_copy_to_user(&frame->mc_vregs, ¤t->thread.ckvr_state, @@ -412,7 +405,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user else unsafe_put_user(current->thread.ckvrsave, (u32 __user *)&tm_frame->mc_vregs[32], failed); -#endif /* CONFIG_ALTIVEC */ unsafe_copy_ckfpr_to_user(&frame->mc_fregs, current, failed); if (msr & MSR_FP) @@ -420,7 +412,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user else unsafe_copy_ckfpr_to_user(&tm_frame->mc_fregs, current, failed); -#ifdef CONFIG_VSX /* * Copy VSR 0-31 upper half from thread_struct to local * buffer, then write that to userspace. Also set MSR_VSX in @@ -436,23 +427,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user msr |= MSR_VSX; } -#endif /* CONFIG_VSX */ -#ifdef CONFIG_SPE - /* SPE regs are not checkpointed with TM, so this section is - * simply the same as in __unsafe_save_user_regs(). - */ - if (current->thread.used_spe) { - unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr, - ELF_NEVRREG * sizeof(u32), failed); - /* set MSR_SPE in the saved MSR value to indicate that - * frame->mc_vregs contains valid data */ - msr |= MSR_SPE; - } - - /* We always copy to/from spefscr */ - unsafe_put_user(current->thread.spefscr, - (u32 __user *)&frame->mc_vregs + ELF_NEVRREG, failed); -#endif /* CONFIG_SPE */ unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed); @@ -505,14 +479,14 @@ static long restore_user_regs(struct pt_regs *regs, /* if doing signal return, restore the previous little-endian mode */ if (sig) - regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE)); #ifdef CONFIG_ALTIVEC /* * Force the process to reload the altivec registers from * current->thread when it next does altivec instructions */ - regs->msr &= ~MSR_VEC; + regs_set_return_msr(regs, regs->msr & ~MSR_VEC); if (msr & MSR_VEC) { /* restore altivec registers from the stack */ unsafe_copy_from_user(¤t->thread.vr_state, &sr->mc_vregs, @@ -534,7 +508,7 @@ static long restore_user_regs(struct pt_regs *regs, * Force the process to reload the VSX registers from * current->thread when it next does VSX instruction. */ - regs->msr &= ~MSR_VSX; + regs_set_return_msr(regs, regs->msr & ~MSR_VSX); if (msr & MSR_VSX) { /* * Restore altivec registers from the stack to a local @@ -550,12 +524,12 @@ static long restore_user_regs(struct pt_regs *regs, * force the process to reload the FP registers from * current->thread when it next does FP instructions */ - regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); + regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1)); #ifdef CONFIG_SPE /* force the process to reload the spe registers from current->thread when it next does spe instructions */ - regs->msr &= ~MSR_SPE; + regs_set_return_msr(regs, regs->msr & ~MSR_SPE); if (msr & MSR_SPE) { /* restore spe registers from the stack */ unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, @@ -587,9 +561,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *tm_sr) { unsigned long msr, msr_hi; -#ifdef CONFIG_VSX int i; -#endif if (tm_suspend_disabled) return 1; @@ -608,10 +580,9 @@ static long restore_tm_user_regs(struct pt_regs *regs, unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed); /* Restore the previous little-endian mode */ - regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE)); -#ifdef CONFIG_ALTIVEC - regs->msr &= ~MSR_VEC; + regs_set_return_msr(regs, regs->msr & ~MSR_VEC); if (msr & MSR_VEC) { /* restore altivec registers from the stack */ unsafe_copy_from_user(¤t->thread.ckvr_state, &sr->mc_vregs, @@ -629,14 +600,12 @@ static long restore_tm_user_regs(struct pt_regs *regs, (u32 __user *)&sr->mc_vregs[32], failed); if (cpu_has_feature(CPU_FTR_ALTIVEC)) mtspr(SPRN_VRSAVE, current->thread.ckvrsave); -#endif /* CONFIG_ALTIVEC */ - regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); + regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1)); unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed); -#ifdef CONFIG_VSX - regs->msr &= ~MSR_VSX; + regs_set_return_msr(regs, regs->msr & ~MSR_VSX); if (msr & MSR_VSX) { /* * Restore altivec registers from the stack to a local @@ -649,24 +618,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; current->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0; } -#endif /* CONFIG_VSX */ - -#ifdef CONFIG_SPE - /* SPE regs are not checkpointed with TM, so this section is - * simply the same as in restore_user_regs(). - */ - regs->msr &= ~MSR_SPE; - if (msr & MSR_SPE) { - unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, - ELF_NEVRREG * sizeof(u32), failed); - current->thread.used_spe = true; - } else if (current->thread.used_spe) - memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); - - /* Always get SPEFSCR back */ - unsafe_get_user(current->thread.spefscr, - (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed); -#endif /* CONFIG_SPE */ user_read_access_end(); @@ -675,7 +626,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, unsafe_restore_general_regs(regs, tm_sr, failed); -#ifdef CONFIG_ALTIVEC /* restore altivec registers from the stack */ if (msr & MSR_VEC) unsafe_copy_from_user(¤t->thread.vr_state, &tm_sr->mc_vregs, @@ -684,11 +634,9 @@ static long restore_tm_user_regs(struct pt_regs *regs, /* Always get VRSAVE back */ unsafe_get_user(current->thread.vrsave, (u32 __user *)&tm_sr->mc_vregs[32], failed); -#endif /* CONFIG_ALTIVEC */ unsafe_copy_ckfpr_from_user(current, &tm_sr->mc_fregs, failed); -#ifdef CONFIG_VSX if (msr & MSR_VSX) { /* * Restore altivec registers from the stack to a local @@ -697,7 +645,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, unsafe_copy_vsx_from_user(current, &tm_sr->mc_vsregs, failed); current->thread.used_vsr = true; } -#endif /* CONFIG_VSX */ /* Get the top half of the MSR from the user context */ unsafe_get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR], failed); @@ -725,7 +672,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, * * Pull in the MSR TM bits from the user context */ - regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK); + regs_set_return_msr(regs, (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK)); /* Now, recheckpoint. This loads up all of the checkpointed (older) * registers, including FP and V[S]Rs. After recheckpointing, the * transactional versions should be loaded. @@ -740,14 +687,12 @@ static long restore_tm_user_regs(struct pt_regs *regs, msr_check_and_set(msr & (MSR_FP | MSR_VEC)); if (msr & MSR_FP) { load_fp_state(¤t->thread.fp_state); - regs->msr |= (MSR_FP | current->thread.fpexc_mode); + regs_set_return_msr(regs, regs->msr | (MSR_FP | current->thread.fpexc_mode)); } -#ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { load_vr_state(¤t->thread.vr_state); - regs->msr |= MSR_VEC; + regs_set_return_msr(regs, regs->msr | MSR_VEC); } -#endif preempt_enable(); @@ -828,10 +773,8 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp_rt32); } else { tramp = (unsigned long)mctx->mc_pad; - /* Set up the sigreturn trampoline: li r0,sigret; sc */ - unsafe_put_user(PPC_INST_ADDI + __NR_rt_sigreturn, &mctx->mc_pad[0], - failed); - unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed); + unsafe_put_user(PPC_RAW_LI(_R0, __NR_rt_sigreturn), &mctx->mc_pad[0], failed); + unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed); asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0])); } unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, failed); @@ -858,10 +801,10 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, regs->gpr[4] = (unsigned long)&frame->info; regs->gpr[5] = (unsigned long)&frame->uc; regs->gpr[6] = (unsigned long)frame; - regs->nip = (unsigned long) ksig->ka.sa.sa_handler; + regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler); /* enter the signal handler in native-endian mode */ - regs->msr &= ~MSR_LE; - regs->msr |= (MSR_KERNEL & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE)); + return 0; failed: @@ -926,9 +869,8 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp32); } else { tramp = (unsigned long)mctx->mc_pad; - /* Set up the sigreturn trampoline: li r0,sigret; sc */ - unsafe_put_user(PPC_INST_ADDI + __NR_sigreturn, &mctx->mc_pad[0], failed); - unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed); + unsafe_put_user(PPC_RAW_LI(_R0, __NR_sigreturn), &mctx->mc_pad[0], failed); + unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed); asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0])); } user_access_end(); @@ -947,10 +889,10 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, regs->gpr[1] = newsp; regs->gpr[3] = ksig->sig; regs->gpr[4] = (unsigned long) sc; - regs->nip = (unsigned long)ksig->ka.sa.sa_handler; + regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler); /* enter the signal handler in native-endian mode */ - regs->msr &= ~MSR_LE; - regs->msr |= (MSR_KERNEL & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE)); + return 0; failed: @@ -1200,7 +1142,7 @@ SYSCALL_DEFINE0(rt_sigreturn) * set, and recheckpoint was not called. This avoid * hitting a TM Bad thing at RFID */ - regs->msr &= ~MSR_TS_MASK; + regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK); } /* Fall through, for non-TM restore */ #endif @@ -1289,7 +1231,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, affect the contents of these registers. After this point, failure is a problem, anyway, and it's very unlikely unless the user is really doing something wrong. */ - regs->msr = new_msr; + regs_set_return_msr(regs, new_msr); #ifdef CONFIG_PPC_ADV_DEBUG_REGS current->thread.debug.dbcr0 = new_dbcr0; #endif diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index f9e1f5428b9e..1831bba0582e 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -354,7 +354,7 @@ static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_ /* get MSR separately, transfer the LE bit if doing signal return */ unsafe_get_user(msr, &sc->gp_regs[PT_MSR], efault_out); if (sig) - regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE)); unsafe_get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3], efault_out); unsafe_get_user(regs->ctr, &sc->gp_regs[PT_CTR], efault_out); unsafe_get_user(regs->link, &sc->gp_regs[PT_LNK], efault_out); @@ -376,7 +376,7 @@ static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_ * This has to be done before copying stuff into tsk->thread.fpr/vr * for the reasons explained in the previous comment. */ - regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX); + regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX)); #ifdef CONFIG_ALTIVEC unsafe_get_user(v_regs, &sc->v_regs, efault_out); @@ -468,7 +468,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, return -EINVAL; /* pull in MSR LE from user context */ - regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE)); /* The following non-GPR non-FPR non-VR state is also checkpointed: */ err |= __get_user(regs->ctr, &tm_sc->gp_regs[PT_CTR]); @@ -495,7 +495,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, * This has to be done before copying stuff into tsk->thread.fpr/vr * for the reasons explained in the previous comment. */ - regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX); + regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX)); #ifdef CONFIG_ALTIVEC err |= __get_user(v_regs, &sc->v_regs); @@ -565,7 +565,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, preempt_disable(); /* pull in MSR TS bits from user context */ - regs->msr |= msr & MSR_TS_MASK; + regs_set_return_msr(regs, regs->msr | (msr & MSR_TS_MASK)); /* * Ensure that TM is enabled in regs->msr before we leave the signal @@ -583,7 +583,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, * to be de-scheduled with MSR[TS] set but without calling * tm_recheckpoint(). This can cause a bug. */ - regs->msr |= MSR_TM; + regs_set_return_msr(regs, regs->msr | MSR_TM); /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(&tsk->thread); @@ -591,11 +591,11 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, msr_check_and_set(msr & (MSR_FP | MSR_VEC)); if (msr & MSR_FP) { load_fp_state(&tsk->thread.fp_state); - regs->msr |= (MSR_FP | tsk->thread.fpexc_mode); + regs_set_return_msr(regs, regs->msr | (MSR_FP | tsk->thread.fpexc_mode)); } if (msr & MSR_VEC) { load_vr_state(&tsk->thread.vr_state); - regs->msr |= MSR_VEC; + regs_set_return_msr(regs, regs->msr | MSR_VEC); } preempt_enable(); @@ -618,15 +618,12 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) int i; long err = 0; - /* bctrl # call the handler */ - err |= __put_user(PPC_INST_BCTRL, &tramp[0]); - /* addi r1, r1, __SIGNAL_FRAMESIZE # Pop the dummy stackframe */ - err |= __put_user(PPC_INST_ADDI | __PPC_RT(R1) | __PPC_RA(R1) | - (__SIGNAL_FRAMESIZE & 0xffff), &tramp[1]); - /* li r0, __NR_[rt_]sigreturn| */ - err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[2]); - /* sc */ - err |= __put_user(PPC_INST_SC, &tramp[3]); + /* Call the handler and pop the dummy stackframe*/ + err |= __put_user(PPC_RAW_BCTRL(), &tramp[0]); + err |= __put_user(PPC_RAW_ADDI(_R1, _R1, __SIGNAL_FRAMESIZE), &tramp[1]); + + err |= __put_user(PPC_RAW_LI(_R0, syscall), &tramp[2]); + err |= __put_user(PPC_RAW_SC(), &tramp[3]); /* Minimal traceback info */ for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++) @@ -720,6 +717,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, /* This returns like rt_sigreturn */ set_thread_flag(TIF_RESTOREALL); + return 0; efault_out: @@ -786,7 +784,7 @@ SYSCALL_DEFINE0(rt_sigreturn) * the MSR[TS] that came from user context later, at * restore_tm_sigcontexts. */ - regs->msr &= ~MSR_TS_MASK; + regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK); if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) goto badframe; @@ -818,7 +816,8 @@ SYSCALL_DEFINE0(rt_sigreturn) * MSR[TS] set, but without CPU in the proper state, * causing a TM bad thing. */ - current->thread.regs->msr &= ~MSR_TS_MASK; + regs_set_return_msr(current->thread.regs, + current->thread.regs->msr & ~MSR_TS_MASK); if (!user_read_access_begin(&uc->uc_mcontext, sizeof(uc->uc_mcontext))) goto badframe; @@ -832,6 +831,7 @@ SYSCALL_DEFINE0(rt_sigreturn) goto badframe; set_thread_flag(TIF_RESTOREALL); + return 0; badframe_block: @@ -911,12 +911,12 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, /* Set up to return from userspace. */ if (tsk->mm->context.vdso) { - regs->nip = VDSO64_SYMBOL(tsk->mm->context.vdso, sigtramp_rt64); + regs_set_return_ip(regs, VDSO64_SYMBOL(tsk->mm->context.vdso, sigtramp_rt64)); } else { err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); if (err) goto badframe; - regs->nip = (unsigned long) &frame->tramp[0]; + regs_set_return_ip(regs, (unsigned long) &frame->tramp[0]); } /* Allocate a dummy caller frame for the signal handler. */ @@ -941,14 +941,13 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, } /* enter the signal handler in native-endian mode */ - regs->msr &= ~MSR_LE; - regs->msr |= (MSR_KERNEL & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE)); regs->gpr[1] = newsp; regs->gpr[3] = ksig->sig; regs->result = 0; if (ksig->ka.sa.sa_flags & SA_SIGINFO) { - err |= get_user(regs->gpr[4], (unsigned long __user *)&frame->pinfo); - err |= get_user(regs->gpr[5], (unsigned long __user *)&frame->puc); + regs->gpr[4] = (unsigned long)&frame->info; + regs->gpr[5] = (unsigned long)&frame->uc; regs->gpr[6] = (unsigned long) frame; } else { regs->gpr[4] = (unsigned long)&frame->uc.uc_mcontext; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 7ddc2d32c39e..447b78a87c8f 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -619,6 +619,8 @@ static void nmi_stop_this_cpu(struct pt_regs *regs) /* * IRQs are already hard disabled by the smp_handle_nmi_ipi. */ + set_cpu_online(smp_processor_id(), false); + spin_begin(); while (1) spin_cpu_relax(); @@ -634,6 +636,15 @@ void smp_send_stop(void) static void stop_this_cpu(void *dummy) { hard_irq_disable(); + + /* + * Offlining CPUs in stop_this_cpu can result in scheduler warnings, + * (see commit de6e5d38417e), but printk_safe_flush_on_panic() wants + * to know other CPUs are offline before it breaks locks to flush + * printk buffers, in case we panic()ed while holding the lock. + */ + set_cpu_online(smp_processor_id(), false); + spin_begin(); while (1) spin_cpu_relax(); @@ -1541,6 +1552,10 @@ void start_secondary(void *unused) { unsigned int cpu = raw_smp_processor_id(); + /* PPC64 calls setup_kup() in early_setup_secondary() */ + if (IS_ENABLED(CONFIG_PPC32)) + setup_kup(); + mmgrab(&init_mm); current->active_mm = &init_mm; diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 1deb1bf331dd..2b0d04a1b7d2 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -23,8 +23,8 @@ #include <asm/paca.h> -void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, - struct task_struct *task, struct pt_regs *regs) +void __no_sanitize_address arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, + struct task_struct *task, struct pt_regs *regs) { unsigned long sp; @@ -61,8 +61,8 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, * * If the task is not 'current', the caller *must* ensure the task is inactive. */ -int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, - void *cookie, struct task_struct *task) +int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, + void *cookie, struct task_struct *task) { unsigned long sp; unsigned long newsp; @@ -172,17 +172,31 @@ static void handle_backtrace_ipi(struct pt_regs *regs) static void raise_backtrace_ipi(cpumask_t *mask) { + struct paca_struct *p; unsigned int cpu; + u64 delay_us; for_each_cpu(cpu, mask) { - if (cpu == smp_processor_id()) + if (cpu == smp_processor_id()) { handle_backtrace_ipi(NULL); - else - smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, 5 * USEC_PER_SEC); - } + continue; + } - for_each_cpu(cpu, mask) { - struct paca_struct *p = paca_ptrs[cpu]; + delay_us = 5 * USEC_PER_SEC; + + if (smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, delay_us)) { + // Now wait up to 5s for the other CPU to do its backtrace + while (cpumask_test_cpu(cpu, mask) && delay_us) { + udelay(1); + delay_us--; + } + + // Other CPU cleared itself from the mask + if (delay_us) + continue; + } + + p = paca_ptrs[cpu]; cpumask_clear_cpu(cpu, mask); diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index a552c9e68d7e..bf4ae0f0e36c 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -114,7 +114,8 @@ SYSCALL_DEFINE0(switch_endian) { struct thread_info *ti; - current->thread.regs->msr ^= MSR_LE; + regs_set_return_msr(current->thread.regs, + current->thread.regs->msr ^ MSR_LE); /* * Set TIF_RESTOREALL so that r3 isn't clobbered on return to diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 2e08640bb3b4..5ff0e55d0db1 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -843,14 +843,14 @@ static int register_cpu_online(unsigned int cpu) #ifdef HAS_PPC_PMC_IBM case PPC_PMC_IBM: attrs = ibm_common_attrs; - nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute); + nattrs = ARRAY_SIZE(ibm_common_attrs); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_IBM */ #ifdef HAS_PPC_PMC_G4 case PPC_PMC_G4: attrs = g4_common_attrs; - nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute); + nattrs = ARRAY_SIZE(g4_common_attrs); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_G4 */ @@ -858,7 +858,7 @@ static int register_cpu_online(unsigned int cpu) case PPC_PMC_PA6T: /* PA Semi starts counting at PMC0 */ attrs = pa6t_attrs; - nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute); + nattrs = ARRAY_SIZE(pa6t_attrs); pmc_attrs = NULL; break; #endif @@ -940,14 +940,14 @@ static int unregister_cpu_online(unsigned int cpu) #ifdef HAS_PPC_PMC_IBM case PPC_PMC_IBM: attrs = ibm_common_attrs; - nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute); + nattrs = ARRAY_SIZE(ibm_common_attrs); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_IBM */ #ifdef HAS_PPC_PMC_G4 case PPC_PMC_G4: attrs = g4_common_attrs; - nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute); + nattrs = ARRAY_SIZE(g4_common_attrs); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_G4 */ @@ -955,7 +955,7 @@ static int unregister_cpu_online(unsigned int cpu) case PPC_PMC_PA6T: /* PA Semi starts counting at PMC0 */ attrs = pa6t_attrs; - nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute); + nattrs = ARRAY_SIZE(pa6t_attrs); pmc_attrs = NULL; break; #endif diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 6c31af7f4fa8..b9a047d92ec0 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -201,7 +201,7 @@ static int __init TAU_init(void) tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) && !strcmp(cur_cpu_spec->platform, "ppc750"); - tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0); + tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1); if (!tau_workq) return -ENOMEM; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index da995c5fb97d..e45ce427bffb 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -231,24 +231,13 @@ static u64 scan_dispatch_log(u64 stop_tb) void notrace accumulate_stolen_time(void) { u64 sst, ust; - unsigned long save_irq_soft_mask = irq_soft_mask_return(); struct cpu_accounting_data *acct = &local_paca->accounting; - /* We are called early in the exception entry, before - * soft/hard_enabled are sync'ed to the expected state - * for the exception. We are hard disabled but the PACA - * needs to reflect that so various debug stuff doesn't - * complain - */ - irq_soft_mask_set(IRQS_DISABLED); - sst = scan_dispatch_log(acct->starttime_user); ust = scan_dispatch_log(acct->starttime); acct->stime -= sst; acct->utime -= ust; acct->steal_time += ust + sst; - - irq_soft_mask_set(save_irq_soft_mask); } static inline u64 calculate_stolen_time(u64 stop_tb) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index ffe9537195aa..d89c5df4f206 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -49,7 +49,7 @@ ftrace_call_replace(unsigned long ip, unsigned long addr, int link) addr = ppc_function_entry((void *)addr); /* if (link) set op to 'bl' else 'b' */ - create_branch(&op, (struct ppc_inst *)ip, addr, link ? 1 : 0); + create_branch(&op, (u32 *)ip, addr, link ? 1 : 0); return op; } @@ -79,7 +79,7 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) } /* replace the text with the new text */ - if (patch_instruction((struct ppc_inst *)ip, new)) + if (patch_instruction((u32 *)ip, new)) return -EPERM; return 0; @@ -94,7 +94,7 @@ static int test_24bit_addr(unsigned long ip, unsigned long addr) addr = ppc_function_entry((void *)addr); /* use the create_branch to verify that this offset can be branched */ - return create_branch(&op, (struct ppc_inst *)ip, addr, 0) == 0; + return create_branch(&op, (u32 *)ip, addr, 0) == 0; } static int is_bl_op(struct ppc_inst op) @@ -162,7 +162,7 @@ __ftrace_make_nop(struct module *mod, #ifdef CONFIG_MPROFILE_KERNEL /* When using -mkernel_profile there is no load to jump over */ - pop = ppc_inst(PPC_INST_NOP); + pop = ppc_inst(PPC_RAW_NOP()); if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) { pr_err("Fetching instruction at %lx failed.\n", ip - 4); @@ -170,7 +170,7 @@ __ftrace_make_nop(struct module *mod, } /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ - if (!ppc_inst_equal(op, ppc_inst(PPC_INST_MFLR)) && + if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) && !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) { pr_err("Unexpected instruction %s around bl _mcount\n", ppc_inst_as_str(op)); @@ -203,12 +203,12 @@ __ftrace_make_nop(struct module *mod, } if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) { - pr_err("Expected %08x found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op)); + pr_err("Expected %08lx found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op)); return -EINVAL; } #endif /* CONFIG_MPROFILE_KERNEL */ - if (patch_instruction((struct ppc_inst *)ip, pop)) { + if (patch_instruction((u32 *)ip, pop)) { pr_err("Patching NOP failed.\n"); return -EPERM; } @@ -278,9 +278,9 @@ __ftrace_make_nop(struct module *mod, return -EINVAL; } - op = ppc_inst(PPC_INST_NOP); + op = ppc_inst(PPC_RAW_NOP()); - if (patch_instruction((struct ppc_inst *)ip, op)) + if (patch_instruction((u32 *)ip, op)) return -EPERM; return 0; @@ -380,7 +380,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) return -1; } - if (patch_branch((struct ppc_inst *)tramp, ptr, 0)) { + if (patch_branch((u32 *)tramp, ptr, 0)) { pr_debug("REL24 out of range!\n"); return -1; } @@ -424,7 +424,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) } } - if (patch_instruction((struct ppc_inst *)ip, ppc_inst(PPC_INST_NOP))) { + if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) { pr_err("Patching NOP failed.\n"); return -EPERM; } @@ -446,7 +446,7 @@ int ftrace_make_nop(struct module *mod, if (test_24bit_addr(ip, addr)) { /* within range */ old = ftrace_call_replace(ip, addr, 1); - new = ppc_inst(PPC_INST_NOP); + new = ppc_inst(PPC_RAW_NOP()); return ftrace_modify_code(ip, old, new); } else if (core_kernel_text(ip)) return __ftrace_make_nop_kernel(rec, addr); @@ -510,7 +510,7 @@ static int expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) { /* look for patched "NOP" on ppc64 with -mprofile-kernel */ - if (!ppc_inst_equal(op0, ppc_inst(PPC_INST_NOP))) + if (!ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()))) return 0; return 1; } @@ -589,14 +589,14 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { int err; struct ppc_inst op; - unsigned long ip = rec->ip; + u32 *ip = (u32 *)rec->ip; /* read where this goes */ - if (copy_inst_from_kernel_nofault(&op, (void *)ip)) + if (copy_inst_from_kernel_nofault(&op, ip)) return -EFAULT; /* It should be pointing to a nop */ - if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { + if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) { pr_err("Expected NOP but have %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -608,8 +608,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } /* create the branch to the trampoline */ - err = create_branch(&op, (struct ppc_inst *)ip, - rec->arch.mod->arch.tramp, BRANCH_SET_LINK); + err = create_branch(&op, ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK); if (err) { pr_err("REL24 out of range!\n"); return -EINVAL; @@ -617,7 +616,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) pr_devel("write to %lx\n", rec->ip); - if (patch_instruction((struct ppc_inst *)ip, op)) + if (patch_instruction(ip, op)) return -EPERM; return 0; @@ -653,7 +652,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) return -EFAULT; } - if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { + if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) { pr_err("Unexpected call sequence at %p: %s\n", ip, ppc_inst_as_str(op)); return -EINVAL; } @@ -684,7 +683,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) */ if (test_24bit_addr(ip, addr)) { /* within range */ - old = ppc_inst(PPC_INST_NOP); + old = ppc_inst(PPC_RAW_NOP()); new = ftrace_call_replace(ip, addr, 1); return ftrace_modify_code(ip, old, new); } else if (core_kernel_text(ip)) @@ -762,7 +761,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, /* The new target may be within range */ if (test_24bit_addr(ip, addr)) { /* within range */ - if (patch_branch((struct ppc_inst *)ip, addr, BRANCH_SET_LINK)) { + if (patch_branch((u32 *)ip, addr, BRANCH_SET_LINK)) { pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -790,12 +789,12 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } /* Ensure branch is within 24 bits */ - if (create_branch(&op, (struct ppc_inst *)ip, tramp, BRANCH_SET_LINK)) { + if (create_branch(&op, (u32 *)ip, tramp, BRANCH_SET_LINK)) { pr_err("Branch out of range\n"); return -EINVAL; } - if (patch_branch((struct ppc_inst *)ip, tramp, BRANCH_SET_LINK)) { + if (patch_branch((u32 *)ip, tramp, BRANCH_SET_LINK)) { pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -851,7 +850,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) struct ppc_inst old, new; int ret; - old = ppc_inst_read((struct ppc_inst *)&ftrace_call); + old = ppc_inst_read((u32 *)&ftrace_call); new = ftrace_call_replace(ip, (unsigned long)func, 1); ret = ftrace_modify_code(ip, old, new); @@ -859,7 +858,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) /* Also update the regs callback function */ if (!ret) { ip = (unsigned long)(&ftrace_regs_call); - old = ppc_inst_read((struct ppc_inst *)&ftrace_regs_call); + old = ppc_inst_read((u32 *)&ftrace_regs_call); new = ftrace_call_replace(ip, (unsigned long)func, 1); ret = ftrace_modify_code(ip, old, new); } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index b4ab95c9e94a..dfbce527c98e 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -67,6 +67,7 @@ #include <asm/kprobes.h> #include <asm/stacktrace.h> #include <asm/nmi.h> +#include <asm/disassemble.h> #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) int (*__debugger)(struct pt_regs *regs) __read_mostly; @@ -427,7 +428,7 @@ void hv_nmi_check_nonrecoverable(struct pt_regs *regs) return; nonrecoverable: - regs->msr &= ~MSR_RI; + regs_set_return_msr(regs, regs->msr & ~MSR_RI); #endif } DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception) @@ -537,11 +538,11 @@ static inline int check_io_access(struct pt_regs *regs) * For the debug message, we look at the preceding * load or store. */ - if (*nip == PPC_INST_NOP) + if (*nip == PPC_RAW_NOP()) nip -= 2; - else if (*nip == PPC_INST_ISYNC) + else if (*nip == PPC_RAW_ISYNC()) --nip; - if (*nip == PPC_INST_SYNC || (*nip >> 26) == OP_TRAP) { + if (*nip == PPC_RAW_SYNC() || get_op(*nip) == OP_TRAP) { unsigned int rb; --nip; @@ -549,8 +550,8 @@ static inline int check_io_access(struct pt_regs *regs) printk(KERN_DEBUG "%s bad port %lx at %p\n", (*nip & 0x100)? "OUT to": "IN from", regs->gpr[rb] - _IO_BASE, nip); - regs->msr |= MSR_RI; - regs->nip = extable_fixup(entry); + regs_set_return_msr(regs, regs->msr | MSR_RI); + regs_set_return_ip(regs, extable_fixup(entry)); return 1; } } @@ -586,8 +587,8 @@ static inline int check_io_access(struct pt_regs *regs) #define REASON_BOUNDARY SRR1_BOUNDARY #define single_stepping(regs) ((regs)->msr & MSR_SE) -#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) -#define clear_br_trace(regs) ((regs)->msr &= ~MSR_BE) +#define clear_single_step(regs) (regs_set_return_msr((regs), (regs)->msr & ~MSR_SE)) +#define clear_br_trace(regs) (regs_set_return_msr((regs), (regs)->msr & ~MSR_BE)) #endif #define inst_length(reason) (((reason) & REASON_PREFIXED) ? 8 : 4) @@ -1031,7 +1032,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs) #endif /* !__LITTLE_ENDIAN__ */ /* Go to next instruction */ - regs->nip += 4; + regs_add_return_ip(regs, 4); } #endif /* CONFIG_VSX */ @@ -1476,7 +1477,7 @@ static void do_program_check(struct pt_regs *regs) if (!(regs->msr & MSR_PR) && /* not user-mode */ report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) { - regs->nip += 4; + regs_add_return_ip(regs, 4); return; } _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); @@ -1538,7 +1539,7 @@ static void do_program_check(struct pt_regs *regs) if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) { switch (emulate_instruction(regs)) { case 0: - regs->nip += 4; + regs_add_return_ip(regs, 4); emulate_single_step(regs); return; case -EFAULT: @@ -1566,7 +1567,7 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception) */ DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt) { - regs->msr |= REASON_ILLEGAL; + regs_set_return_msr(regs, regs->msr | REASON_ILLEGAL); do_program_check(regs); } @@ -1593,7 +1594,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception) if (fixed == 1) { /* skip over emulated instruction */ - regs->nip += inst_length(reason); + regs_add_return_ip(regs, inst_length(reason)); emulate_single_step(regs); return; } @@ -1659,7 +1660,7 @@ static void tm_unavailable(struct pt_regs *regs) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (user_mode(regs)) { current->thread.load_tm++; - regs->msr |= MSR_TM; + regs_set_return_msr(regs, regs->msr | MSR_TM); tm_enable(); tm_restore_sprs(¤t->thread); return; @@ -1751,7 +1752,7 @@ DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception) pr_err("DSCR based mfspr emulation failed\n"); return; } - regs->nip += 4; + regs_add_return_ip(regs, 4); emulate_single_step(regs); } return; @@ -1948,7 +1949,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status) */ if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0, current->thread.debug.dbcr1)) - regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); else /* Make sure the IDM flag is off */ current->thread.debug.dbcr0 &= ~DBCR0_IDM; @@ -1969,7 +1970,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException) * instead of stopping here when hitting a BT */ if (debug_status & DBSR_BT) { - regs->msr &= ~MSR_DE; + regs_set_return_msr(regs, regs->msr & ~MSR_DE); /* Disable BT */ mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_BT); @@ -1980,7 +1981,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException) if (user_mode(regs)) { current->thread.debug.dbcr0 &= ~DBCR0_BT; current->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC; - regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); return; } @@ -1994,7 +1995,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException) if (debugger_sstep(regs)) return; } else if (debug_status & DBSR_IC) { /* Instruction complete */ - regs->msr &= ~MSR_DE; + regs_set_return_msr(regs, regs->msr & ~MSR_DE); /* Disable instruction completion */ mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC); @@ -2016,7 +2017,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException) current->thread.debug.dbcr0 &= ~DBCR0_IC; if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0, current->thread.debug.dbcr1)) - regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); else /* Make sure the IDM bit is off */ current->thread.debug.dbcr0 &= ~DBCR0_IDM; @@ -2044,7 +2045,7 @@ DEFINE_INTERRUPT_HANDLER(altivec_assist_exception) PPC_WARN_EMULATED(altivec, regs); err = emulate_altivec(regs); if (err == 0) { - regs->nip += 4; /* skip emulated instruction */ + regs_add_return_ip(regs, 4); /* skip emulated instruction */ emulate_single_step(regs); return; } @@ -2109,7 +2110,7 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException) err = do_spe_mathemu(regs); if (err == 0) { - regs->nip += 4; /* skip emulated instruction */ + regs_add_return_ip(regs, 4); /* skip emulated instruction */ emulate_single_step(regs); return; } @@ -2140,10 +2141,10 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException) giveup_spe(current); preempt_enable(); - regs->nip -= 4; + regs_add_return_ip(regs, -4); err = speround_handler(regs); if (err == 0) { - regs->nip += 4; /* skip emulated instruction */ + regs_add_return_ip(regs, 4); /* skip emulated instruction */ emulate_single_step(regs); return; } diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 9356b60d6030..8513aa49614e 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -296,3 +296,42 @@ void __init udbg_init_40x_realmode(void) } #endif /* CONFIG_PPC_EARLY_DEBUG_40x */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_MICROWATT + +#define UDBG_UART_MW_ADDR ((void __iomem *)0xc0002000) + +static u8 udbg_uart_in_isa300_rm(unsigned int reg) +{ + uint64_t msr = mfmsr(); + uint8_t c; + + mtmsr(msr & ~(MSR_EE|MSR_DR)); + isync(); + eieio(); + c = __raw_rm_readb(UDBG_UART_MW_ADDR + (reg << 2)); + mtmsr(msr); + isync(); + return c; +} + +static void udbg_uart_out_isa300_rm(unsigned int reg, u8 val) +{ + uint64_t msr = mfmsr(); + + mtmsr(msr & ~(MSR_EE|MSR_DR)); + isync(); + eieio(); + __raw_rm_writeb(val, UDBG_UART_MW_ADDR + (reg << 2)); + mtmsr(msr); + isync(); +} + +void __init udbg_init_debug_microwatt(void) +{ + udbg_uart_in = udbg_uart_in_isa300_rm; + udbg_uart_out = udbg_uart_out_isa300_rm; + udbg_use_uart(); +} + +#endif /* CONFIG_PPC_EARLY_DEBUG_MICROWATT */ diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index 186f69b11e94..c6975467d9ff 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -42,7 +42,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, return -EINVAL; if (cpu_has_feature(CPU_FTR_ARCH_31) && - ppc_inst_prefixed(auprobe->insn) && + ppc_inst_prefixed(ppc_inst_read(auprobe->insn)) && (addr & 0x3f) == 60) { pr_info_ratelimited("Cannot register a uprobe on 64 byte unaligned prefixed instruction\n"); return -EINVAL; @@ -62,7 +62,7 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) autask->saved_trap_nr = current->thread.trap_nr; current->thread.trap_nr = UPROBE_TRAP_NR; - regs->nip = current->utask->xol_vaddr; + regs_set_return_ip(regs, current->utask->xol_vaddr); user_enable_single_step(current); return 0; @@ -119,7 +119,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) * support doesn't exist and have to fix-up the next instruction * to be executed. */ - regs->nip = (unsigned long)ppc_inst_next((void *)utask->vaddr, &auprobe->insn); + regs_set_return_ip(regs, (unsigned long)ppc_inst_next((void *)utask->vaddr, auprobe->insn)); user_disable_single_step(current); return 0; @@ -182,7 +182,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) * emulate_step() returns 1 if the insn was successfully emulated. * For all other cases, we need to single-step in hardware. */ - ret = emulate_step(regs, ppc_inst_read(&auprobe->insn)); + ret = emulate_step(regs, ppc_inst_read(auprobe->insn)); if (ret > 0) return true; diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index f5a52f444e36..fc120fac1910 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -73,6 +73,10 @@ _GLOBAL(load_up_altivec) addi r5,r4,THREAD /* Get THREAD */ oris r12,r12,MSR_VEC@h std r12,_MSR(r1) +#ifdef CONFIG_PPC_BOOK3S_64 + li r4,0 + stb r4,PACASRR_VALID(r13) +#endif #endif li r4,1 stb r4,THREAD_LOAD_VEC(r5) @@ -131,7 +135,9 @@ _GLOBAL(load_up_vsx) /* enable use of VSX after return */ oris r12,r12,MSR_VSX@h std r12,_MSR(r1) - b fast_interrupt_return + li r4,0 + stb r4,PACASRR_VALID(r13) + b fast_interrupt_return_srr #endif /* CONFIG_VSX */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 72fa3c00229a..40bdefe9caa7 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -9,6 +9,22 @@ #define EMITS_PT_NOTE #define RO_EXCEPTION_TABLE_ALIGN 0 +#define SOFT_MASK_TABLE(align) \ + . = ALIGN(align); \ + __soft_mask_table : AT(ADDR(__soft_mask_table) - LOAD_OFFSET) { \ + __start___soft_mask_table = .; \ + KEEP(*(__soft_mask_table)) \ + __stop___soft_mask_table = .; \ + } + +#define RESTART_TABLE(align) \ + . = ALIGN(align); \ + __restart_table : AT(ADDR(__restart_table) - LOAD_OFFSET) { \ + __start___restart_table = .; \ + KEEP(*(__restart_table)) \ + __stop___restart_table = .; \ + } + #include <asm/page.h> #include <asm-generic/vmlinux.lds.h> #include <asm/cache.h> @@ -124,6 +140,9 @@ SECTIONS RO_DATA(PAGE_SIZE) #ifdef CONFIG_PPC64 + SOFT_MASK_TABLE(8) + RESTART_TABLE(8) + . = ALIGN(8); __stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) { __start___stf_entry_barrier_fixup = .; diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index c9a8f4781a10..a165635fd214 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -24,6 +24,7 @@ #include <linux/kdebug.h> #include <linux/sched/debug.h> #include <linux/delay.h> +#include <linux/processor.h> #include <linux/smp.h> #include <asm/interrupt.h> |