diff options
252 files changed, 4893 insertions, 2196 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem b/Documentation/ABI/testing/sysfs-bus-papr-pmem index 95254cec92bf..4ac0673901e7 100644 --- a/Documentation/ABI/testing/sysfs-bus-papr-pmem +++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem @@ -61,3 +61,15 @@ Description: * "CchRHCnt" : Cache Read Hit Count * "CchWHCnt" : Cache Write Hit Count * "FastWCnt" : Fast Write Count + +What: /sys/bus/nd/devices/nmemX/papr/health_bitmap_inject +Date: Jan, 2022 +KernelVersion: v5.17 +Contact: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, nvdimm@lists.linux.dev, +Description: + (RO) Reports the health bitmap inject bitmap that is applied to + bitmap received from PowerVM via the H_SCM_HEALTH. This is used + to forcibly set specific bits returned from Hcall. These is then + used to simulate various health or shutdown states for an nvdimm + and are set by user-space tools like ndctl by issuing a PAPR DSM. + diff --git a/Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info b/Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info new file mode 100644 index 000000000000..141a6b371469 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info @@ -0,0 +1,29 @@ +What: /sys/firmware/papr/energy_scale_info +Date: February 2022 +Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org> +Description: Directory hosting a set of platform attributes like + energy/frequency on Linux running as a PAPR guest. + + Each file in a directory contains a platform + attribute hierarchy pertaining to performance/ + energy-savings mode and processor frequency. + +What: /sys/firmware/papr/energy_scale_info/<id> +Date: February 2022 +Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org> +Description: Energy, frequency attributes directory for POWERVM servers + +What: /sys/firmware/papr/energy_scale_info/<id>/desc +Date: February 2022 +Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org> +Description: String description of the energy attribute of <id> + +What: /sys/firmware/papr/energy_scale_info/<id>/value +Date: February 2022 +Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org> +Description: Numeric value of the energy attribute of <id> + +What: /sys/firmware/papr/energy_scale_info/<id>/value_desc +Date: February 2022 +Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org> +Description: String value of the energy attribute of <id> diff --git a/arch/Kconfig b/arch/Kconfig index 9af3a597cc67..84bc1de02720 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -202,6 +202,9 @@ config HAVE_FUNCTION_ERROR_INJECTION config HAVE_NMI bool +config HAVE_FUNCTION_DESCRIPTORS + bool + config TRACE_IRQFLAGS_SUPPORT bool diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index b2317324827f..cb93769a9f2a 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -36,6 +36,7 @@ config IA64 select HAVE_SETUP_PER_CPU_AREA select TTY select HAVE_ARCH_TRACEHOOK + select HAVE_FUNCTION_DESCRIPTORS select HAVE_VIRT_CPU_ACCOUNTING select HUGETLB_PAGE_SIZE_VARIABLE if HUGETLB_PAGE select VIRT_TO_BUS diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h index 6629301a2620..2ef5f9966ad1 100644 --- a/arch/ia64/include/asm/elf.h +++ b/arch/ia64/include/asm/elf.h @@ -226,7 +226,7 @@ struct got_entry { * Layout of the Function Descriptor */ struct fdesc { - uint64_t ip; + uint64_t addr; uint64_t gp; }; diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index 3a033d2008b3..8e0875cf6071 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -9,6 +9,9 @@ #include <linux/elf.h> #include <linux/uaccess.h> + +typedef struct fdesc func_desc_t; + #include <asm-generic/sections.h> extern char __phys_per_cpu_start[]; @@ -27,25 +30,4 @@ extern char __start_gate_brl_fsys_bubble_down_patchlist[], __end_gate_brl_fsys_b extern char __start_unwind[], __end_unwind[]; extern char __start_ivt_text[], __end_ivt_text[]; -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 - -#undef dereference_function_descriptor -static inline void *dereference_function_descriptor(void *ptr) -{ - struct fdesc *desc = ptr; - void *p; - - if (!get_kernel_nofault(p, (void *)&desc->ip)) - ptr = p; - return ptr; -} - -#undef dereference_kernel_function_descriptor -static inline void *dereference_kernel_function_descriptor(void *ptr) -{ - if (ptr < (void *)__start_opd || ptr >= (void *)__end_opd) - return ptr; - return dereference_function_descriptor(ptr); -} - #endif /* _ASM_IA64_SECTIONS_H */ diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index 360f36b0eb3f..8f62cf97f691 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -602,15 +602,15 @@ get_fdesc (struct module *mod, uint64_t value, int *okp) return value; /* Look for existing function descriptor. */ - while (fdesc->ip) { - if (fdesc->ip == value) + while (fdesc->addr) { + if (fdesc->addr == value) return (uint64_t)fdesc; if ((uint64_t) ++fdesc >= mod->arch.opd->sh_addr + mod->arch.opd->sh_size) BUG(); } /* Create new one */ - fdesc->ip = value; + fdesc->addr = value; fdesc->gp = mod->arch.gp; return (uint64_t) fdesc; } diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 00cb889bd9a6..90fc95bd55ca 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -72,6 +72,7 @@ config PARISC select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS select TRACE_IRQFLAGS_SUPPORT + select HAVE_FUNCTION_DESCRIPTORS if 64BIT help The PA-RISC microprocessor is designed by Hewlett-Packard and used diff --git a/arch/parisc/include/asm/sections.h b/arch/parisc/include/asm/sections.h index bb52aea0cb21..33df42b5cc6d 100644 --- a/arch/parisc/include/asm/sections.h +++ b/arch/parisc/include/asm/sections.h @@ -2,20 +2,14 @@ #ifndef _PARISC_SECTIONS_H #define _PARISC_SECTIONS_H +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +#include <asm/elf.h> +typedef Elf64_Fdesc func_desc_t; +#endif + /* nothing to see, move along */ #include <asm-generic/sections.h> extern char __alt_instructions[], __alt_instructions_end[]; -#ifdef CONFIG_64BIT - -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 - -#undef dereference_function_descriptor -void *dereference_function_descriptor(void *); - -#undef dereference_kernel_function_descriptor -void *dereference_kernel_function_descriptor(void *); -#endif - #endif diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index ea3d83b6fb62..2030c77592d3 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -263,27 +263,6 @@ __get_wchan(struct task_struct *p) return 0; } -#ifdef CONFIG_64BIT -void *dereference_function_descriptor(void *ptr) -{ - Elf64_Fdesc *desc = ptr; - void *p; - - if (!get_kernel_nofault(p, (void *)&desc->addr)) - ptr = p; - return ptr; -} - -void *dereference_kernel_function_descriptor(void *ptr) -{ - if (ptr < (void *)__start_opd || - ptr >= (void *)__end_opd) - return ptr; - - return dereference_function_descriptor(ptr); -} -#endif - static inline unsigned long brk_rnd(void) { return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 7e7387bd7d53..174edabb74fa 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -9,6 +9,10 @@ config 64BIT bool default y if PPC64 +config LIVEPATCH_64 + def_bool PPC64 + depends on LIVEPATCH + config MMU bool default y @@ -132,7 +136,7 @@ config PPC select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if (PPC_BOOK3S || PPC_8xx || 40x) && !HIBERNATION select ARCH_HAS_STRICT_KERNEL_RWX if FSL_BOOKE && !HIBERNATION && !RANDOMIZE_BASE - select ARCH_HAS_STRICT_MODULE_RWX if ARCH_HAS_STRICT_KERNEL_RWX && !PPC_BOOK3S_32 + select ARCH_HAS_STRICT_MODULE_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_UBSAN_SANITIZE_ALL @@ -198,11 +202,13 @@ config PPC select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_ARGS if MPROFILE_KERNEL || PPC32 select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL || PPC32 select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_DESCRIPTORS if PPC64 && !CPU_LITTLE_ENDIAN select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER @@ -222,7 +228,7 @@ config PPC select HAVE_KPROBES_ON_FTRACE select HAVE_KRETPROBES select HAVE_LD_DEAD_CODE_DATA_ELIMINATION - select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS && PPC64 + select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) select HAVE_OPTPROBES diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5f16ac1583c5..eb541e730d3c 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -171,7 +171,7 @@ else CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,$(call cc-option,-mtune=power5)) CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mcpu=power5,-mcpu=power4) endif -else +else ifdef CONFIG_PPC_BOOK3E_64 CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 endif @@ -213,7 +213,7 @@ CHECKFLAGS += -m$(BITS) -D__powerpc__ -D__powerpc$(BITS)__ ifdef CONFIG_CPU_BIG_ENDIAN CHECKFLAGS += -D__BIG_ENDIAN__ else -CHECKFLAGS += -D__LITTLE_ENDIAN__ +CHECKFLAGS += -D__LITTLE_ENDIAN__ -D_CALL_ELF=2 endif ifdef CONFIG_476FPE_ERR46 @@ -421,9 +421,9 @@ ifeq ($(KBUILD_EXTMOD),) prepare: vdso_prepare vdso_prepare: prepare0 $(if $(CONFIG_VDSO32),$(Q)$(MAKE) \ - $(build)=arch/powerpc/kernel/vdso32 include/generated/vdso32-offsets.h) + $(build)=arch/powerpc/kernel/vdso include/generated/vdso32-offsets.h) $(if $(CONFIG_PPC64),$(Q)$(MAKE) \ - $(build)=arch/powerpc/kernel/vdso64 include/generated/vdso64-offsets.h) + $(build)=arch/powerpc/kernel/vdso include/generated/vdso64-offsets.h) endif archprepare: checkbin diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore index 1eee61b82341..a4716d138cfc 100644 --- a/arch/powerpc/boot/.gitignore +++ b/arch/powerpc/boot/.gitignore @@ -16,6 +16,7 @@ kernel-vmlinux.strip.c kernel-vmlinux.strip.gz mktree otheros.bld +otheros-too-big.bld uImage cuImage.* dtbImage.* diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts new file mode 100644 index 000000000000..73f8c998c64d --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * T1040RDB-REV-A Device Tree Source + * + * Copyright 2014 - 2015 Freescale Semiconductor Inc. + * + */ + +#include "t1040rdb.dts" + +/ { + model = "fsl,T1040RDB-REV-A"; + compatible = "fsl,T1040RDB-REV-A"; +}; + +&seville_port0 { + label = "ETH5"; +}; + +&seville_port2 { + label = "ETH7"; +}; + +&seville_port4 { + label = "ETH9"; +}; + +&seville_port6 { + label = "ETH11"; +}; diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts index af0c8a6f5613..b6733e7e6580 100644 --- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts @@ -119,7 +119,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_0>; phy-mode = "qsgmii"; - label = "ETH5"; + label = "ETH3"; status = "okay"; }; @@ -135,7 +135,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_2>; phy-mode = "qsgmii"; - label = "ETH7"; + label = "ETH5"; status = "okay"; }; @@ -151,7 +151,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_4>; phy-mode = "qsgmii"; - label = "ETH9"; + label = "ETH7"; status = "okay"; }; @@ -167,7 +167,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_6>; phy-mode = "qsgmii"; - label = "ETH11"; + label = "ETH9"; status = "okay"; }; diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi index 099a598c74c0..bfe1ed5be337 100644 --- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi @@ -139,12 +139,12 @@ fman@400000 { ethernet@e6000 { phy-handle = <&phy_rgmii_0>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; ethernet@e8000 { phy-handle = <&phy_rgmii_1>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; mdio0: mdio@fc000 { diff --git a/arch/powerpc/boot/dts/xpedite5200.dts b/arch/powerpc/boot/dts/xpedite5200.dts index 840ea84bbb59..74b346f2d43c 100644 --- a/arch/powerpc/boot/dts/xpedite5200.dts +++ b/arch/powerpc/boot/dts/xpedite5200.dts @@ -132,7 +132,7 @@ reg = <0x68>; }; - dtt@48 { + dtt@34 { compatible = "maxim,max1237"; reg = <0x34>; }; diff --git a/arch/powerpc/boot/dts/xpedite5200_xmon.dts b/arch/powerpc/boot/dts/xpedite5200_xmon.dts index 449fc1b5dc23..d491c7a8f979 100644 --- a/arch/powerpc/boot/dts/xpedite5200_xmon.dts +++ b/arch/powerpc/boot/dts/xpedite5200_xmon.dts @@ -136,7 +136,7 @@ reg = <0x68>; }; - dtt@48 { + dtt@34 { compatible = "maxim,max1237"; reg = <0x34>; }; diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index 2b736d9fbb1b..2bc53c646ccd 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -21,6 +21,7 @@ #define PPC_STLCX stringify_in_c(stdcx.) #define PPC_CNTLZL stringify_in_c(cntlzd) #define PPC_MTOCRF(FXM, RS) MTOCRF((FXM), RS) +#define PPC_SRL stringify_in_c(srd) #define PPC_LR_STKOFF 16 #define PPC_MIN_STKFRM 112 @@ -54,6 +55,7 @@ #define PPC_STLCX stringify_in_c(stwcx.) #define PPC_CNTLZL stringify_in_c(cntlzw) #define PPC_MTOCRF stringify_in_c(mtcrf) +#define PPC_SRL stringify_in_c(srw) #define PPC_LR_STKOFF 4 #define PPC_MIN_STKFRM 16 diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 41b8a1e1144a..d995c65d18ab 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -19,22 +19,6 @@ #include <uapi/asm/ucontext.h> -/* SMP */ -extern struct task_struct *current_set[NR_CPUS]; -extern struct task_struct *secondary_current; -void start_secondary(void *unused); - -/* kexec */ -struct paca_struct; -struct kimage; -extern struct paca_struct kexec_paca; -void kexec_copy_flush(struct kimage *image); - -/* pseries hcall tracing */ -extern struct static_key hcall_tracepoint_key; -void __trace_hcall_entry(unsigned long opcode, unsigned long *args); -void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf); - /* Ultravisor */ #if defined(CONFIG_PPC_POWERNV) || defined(CONFIG_PPC_SVM) long ucall_norets(unsigned long opcode, ...); @@ -50,49 +34,12 @@ int64_t __opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3, int64_t a4, int64_t a5, int64_t a6, int64_t a7, int64_t opcode, uint64_t msr); -/* VMX copying */ -int enter_vmx_usercopy(void); -int exit_vmx_usercopy(void); -int enter_vmx_ops(void); -void *exit_vmx_ops(void *dest); - -/* signals, syscalls and interrupts */ -long sys_swapcontext(struct ucontext __user *old_ctx, - struct ucontext __user *new_ctx, - long ctx_size); -#ifdef CONFIG_PPC32 -long sys_debug_setcontext(struct ucontext __user *ctx, - int ndbg, struct sig_dbg_op __user *dbg); -int -ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, - struct __kernel_old_timeval __user *tvp); -unsigned long __init early_init(unsigned long dt_ptr); -void __init machine_init(u64 dt_ptr); -#endif -long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs); -notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv); -notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs); -notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs); -#ifdef CONFIG_PPC64 -unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs); -unsigned long interrupt_exit_user_restart(struct pt_regs *regs); -unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs); -#endif - -long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, - u32 len_high, u32 len_low); -long sys_switch_endian(void); - /* prom_init (OpenFirmware) */ unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long pp, unsigned long r6, unsigned long r7, unsigned long kbase); -/* setup */ -void __init early_setup(unsigned long dt_ptr); -void early_setup_secondary(void); - /* misc runtime */ extern u64 __bswapdi2(u64); extern s64 __lshrdi3(s64, int); @@ -103,11 +50,6 @@ extern int __ucmpdi2(u64, u64); /* tracing */ void _mcount(void); -unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, - unsigned long sp); - -void pnv_power9_force_smt4_catch(void); -void pnv_power9_force_smt4_release(void); /* Transaction memory related */ void tm_enable(void); diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index ea5d27dda8cf..344fba3b16eb 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -287,7 +287,7 @@ static inline void arch___clear_bit_unlock(int nr, volatile unsigned long *addr) * fls: find last (most-significant) bit set. * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ -static inline int fls(unsigned int x) +static __always_inline int fls(unsigned int x) { int lz; @@ -305,7 +305,7 @@ static inline int fls(unsigned int x) * 32-bit fls calls. */ #ifdef CONFIG_PPC64 -static inline int fls64(__u64 x) +static __always_inline int fls64(__u64 x) { int lz; diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 95e06f2a8e23..40041ac713d9 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -298,28 +298,35 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p unsigned long clr, unsigned long set, int huge) { pte_basic_t old; - unsigned long tmp; - __asm__ __volatile__( + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) { + unsigned long tmp; + + asm volatile( #ifndef CONFIG_PTE_64BIT -"1: lwarx %0, 0, %3\n" -" andc %1, %0, %4\n" + "1: lwarx %0, 0, %3\n" + " andc %1, %0, %4\n" #else -"1: lwarx %L0, 0, %3\n" -" lwz %0, -4(%3)\n" -" andc %1, %L0, %4\n" + "1: lwarx %L0, 0, %3\n" + " lwz %0, -4(%3)\n" + " andc %1, %L0, %4\n" #endif -" or %1, %1, %5\n" -" stwcx. %1, 0, %3\n" -" bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*p) + " or %1, %1, %5\n" + " stwcx. %1, 0, %3\n" + " bne- 1b" + : "=&r" (old), "=&r" (tmp), "=m" (*p) #ifndef CONFIG_PTE_64BIT - : "r" (p), + : "r" (p), #else - : "b" ((unsigned long)(p) + 4), + : "b" ((unsigned long)(p) + 4), #endif - "r" (clr), "r" (set), "m" (*p) - : "cc" ); + "r" (clr), "r" (set), "m" (*p) + : "cc" ); + } else { + old = pte_val(*p); + + *p = __pte((old & ~(pte_basic_t)clr) | set); + } return old; } diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 69fcf63eec94..54cf46808157 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -328,7 +328,7 @@ static inline unsigned long get_kuap(void) return mfspr(SPRN_AMR); } -static inline void set_kuap(unsigned long value) +static __always_inline void set_kuap(unsigned long value) { if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) return; @@ -398,7 +398,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user #endif /* !CONFIG_PPC_KUAP */ -static inline void prevent_user_access(unsigned long dir) +static __always_inline void prevent_user_access(unsigned long dir) { set_kuap(AMR_KUAP_BLOCKED); if (static_branch_unlikely(&uaccess_flush_key)) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 02c08d1492f8..ecbae1832de3 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -11,7 +11,7 @@ #ifdef __ASSEMBLY__ #include <asm/asm-offsets.h> #ifdef CONFIG_DEBUG_BUGVERBOSE -.macro EMIT_BUG_ENTRY addr,file,line,flags +.macro __EMIT_BUG_ENTRY addr,file,line,flags .section __bug_table,"aw" 5001: .4byte \addr - 5001b, 5002f - 5001b .short \line, \flags @@ -22,7 +22,7 @@ .previous .endm #else -.macro EMIT_BUG_ENTRY addr,file,line,flags +.macro __EMIT_BUG_ENTRY addr,file,line,flags .section __bug_table,"aw" 5001: .4byte \addr - 5001b .short \flags @@ -33,7 +33,14 @@ .macro EMIT_WARN_ENTRY addr,file,line,flags EX_TABLE(\addr,\addr+4) - EMIT_BUG_ENTRY \addr,\file,\line,\flags + __EMIT_BUG_ENTRY \addr,\file,\line,\flags +.endm + +.macro EMIT_BUG_ENTRY addr,file,line,flags + .if \flags & 1 /* BUGFLAG_WARNING */ + .err /* Use EMIT_WARN_ENTRY for warnings */ + .endif + __EMIT_BUG_ENTRY \addr,\file,\line,\flags .endm #else /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index e26080539c31..409483b2d0ce 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -118,7 +118,7 @@ static inline unsigned long ppc_function_entry(void *func) * function's descriptor. The first entry in the descriptor is the * address of the function text. */ - return ((func_descr_t *)func)->entry; + return ((struct func_desc *)func)->addr; #else return (unsigned long)func; #endif diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index b8425e3cfd81..971589a21bc0 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -176,4 +176,10 @@ do { \ /* Relocate the kernel image to @final_address */ void relocate(unsigned long final_address); +struct func_desc { + unsigned long addr; + unsigned long toc; + unsigned long env; +}; + #endif /* _ASM_POWERPC_ELF_H */ diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index c99ba08a408d..cdf3c6df5123 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -65,7 +65,7 @@ * but the gcc inline assembly syntax does not allow us to specify registers * on the clobber list that are also on the input/output list. Therefore, * the lists of clobbered registers depends on the number of register - * parmeters ("+r" and "=r") passed to the hypercall. + * parameters ("+r" and "=r") passed to the hypercall. * * Each assembly block should use one of the HCALL_CLOBBERSx macros. As a * general rule, 'x' is the number of parameters passed to the assembly diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 9b702d2b80fb..8dddd34b8ecf 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -54,6 +54,7 @@ #define FW_FEATURE_STUFF_TCE ASM_CONST(0x0000008000000000) #define FW_FEATURE_RPT_INVALIDATE ASM_CONST(0x0000010000000000) #define FW_FEATURE_FORM2_AFFINITY ASM_CONST(0x0000020000000000) +#define FW_FEATURE_ENERGY_SCALE_INFO ASM_CONST(0x0000040000000000) #ifndef __ASSEMBLY__ @@ -74,7 +75,8 @@ enum { FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 | FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE | FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR | - FW_FEATURE_RPT_INVALIDATE | FW_FEATURE_FORM2_AFFINITY, + FW_FEATURE_RPT_INVALIDATE | FW_FEATURE_FORM2_AFFINITY | + FW_FEATURE_ENERGY_SCALE_INFO, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR, FW_FEATURE_POWERNV_ALWAYS = 0, diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index debe8c4f7062..d83758acd1c7 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -10,44 +10,7 @@ #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR -#ifdef __ASSEMBLY__ - -/* Based off of objdump output from glibc */ - -#define MCOUNT_SAVE_FRAME \ - stwu r1,-48(r1); \ - stw r3, 12(r1); \ - stw r4, 16(r1); \ - stw r5, 20(r1); \ - stw r6, 24(r1); \ - mflr r3; \ - lwz r4, 52(r1); \ - mfcr r5; \ - stw r7, 28(r1); \ - stw r8, 32(r1); \ - stw r9, 36(r1); \ - stw r10,40(r1); \ - stw r3, 44(r1); \ - stw r5, 8(r1) - -#define MCOUNT_RESTORE_FRAME \ - lwz r6, 8(r1); \ - lwz r0, 44(r1); \ - lwz r3, 12(r1); \ - mtctr r0; \ - lwz r4, 16(r1); \ - mtcr r6; \ - lwz r5, 20(r1); \ - lwz r6, 24(r1); \ - lwz r0, 52(r1); \ - lwz r7, 28(r1); \ - lwz r8, 32(r1); \ - mtlr r0; \ - lwz r9, 36(r1); \ - lwz r10,40(r1); \ - addi r1, r1, 48 - -#else /* !__ASSEMBLY__ */ +#ifndef __ASSEMBLY__ extern void _mcount(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) @@ -56,9 +19,36 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } +unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, + unsigned long sp); + struct dyn_arch_ftrace { struct module *mod; }; + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +struct ftrace_regs { + struct pt_regs regs; +}; + +static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) +{ + /* We clear regs.msr in ftrace_call */ + return fregs->regs.msr ? &fregs->regs : NULL; +} + +static __always_inline void ftrace_instruction_pointer_set(struct ftrace_regs *fregs, + unsigned long ip) +{ + regs_set_return_ip(&fregs->regs, ip); +} + +struct ftrace_ops; + +#define ftrace_graph_func ftrace_graph_func +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); +#endif #endif /* __ASSEMBLY__ */ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 962708fa1017..6a1a1ac5743b 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -15,7 +15,7 @@ extern bool hugetlb_disabled; -void __init hugetlbpage_init_default(void); +void __init hugetlbpage_init_defaultsize(void); int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len); @@ -76,6 +76,9 @@ static inline void __init gigantic_hugetlb_cma_reserve(void) { } +static inline void __init hugetlbpage_init_defaultsize(void) +{ +} #endif /* CONFIG_HUGETLB_PAGE */ #endif /* _ASM_POWERPC_HUGETLB_H */ diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 9bcf345cb208..d92a20a85395 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -323,7 +323,8 @@ #define H_SCM_PERFORMANCE_STATS 0x418 #define H_RPT_INVALIDATE 0x448 #define H_SCM_FLUSH 0x44C -#define MAX_HCALL_OPCODE H_SCM_FLUSH +#define H_GET_ENERGY_SCALE_INFO 0x450 +#define MAX_HCALL_OPCODE H_GET_ENERGY_SCALE_INFO /* Scope args for H_SCM_UNBIND_ALL */ #define H_UNBIND_SCOPE_ALL (0x1) @@ -500,6 +501,11 @@ long plpar_hcall_raw(unsigned long opcode, unsigned long *retbuf, ...); long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...); long plpar_hcall9_raw(unsigned long opcode, unsigned long *retbuf, ...); +/* pseries hcall tracing */ +extern struct static_key hcall_tracepoint_key; +void __trace_hcall_entry(unsigned long opcode, unsigned long *args); +void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf); + struct hvcall_mpp_data { unsigned long entitled_mem; unsigned long mapped_mem; diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index fc28f46d2f9d..f964ef5c57d8 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -123,9 +123,6 @@ static inline void nap_adjust_return(struct pt_regs *regs) #endif } -struct interrupt_state { -}; - static inline void booke_restore_dbcr0(void) { #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -138,7 +135,7 @@ static inline void booke_restore_dbcr0(void) #endif } -static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) +static inline void interrupt_enter_prepare(struct pt_regs *regs) { #ifdef CONFIG_PPC32 if (!arch_irq_disabled_regs(regs)) @@ -228,17 +225,17 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup * However interrupt_nmi_exit_prepare does return directly to regs, because * NMIs do not do "exit work" or replay soft-masked interrupts. */ -static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) +static inline void interrupt_exit_prepare(struct pt_regs *regs) { } -static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) +static inline void interrupt_async_enter_prepare(struct pt_regs *regs) { #ifdef CONFIG_PPC64 /* Ensure interrupt_enter_prepare does not enable MSR[EE] */ local_paca->irq_happened |= PACA_IRQ_HARD_DIS; #endif - interrupt_enter_prepare(regs, state); + interrupt_enter_prepare(regs); #ifdef CONFIG_PPC_BOOK3S_64 /* * RI=1 is set by interrupt_enter_prepare, so this thread flags access @@ -251,7 +248,7 @@ static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct in irq_enter(); } -static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) +static inline void interrupt_async_exit_prepare(struct pt_regs *regs) { /* * Adjust at exit so the main handler sees the true NIA. This must @@ -262,7 +259,7 @@ static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct int nap_adjust_return(regs); irq_exit(); - interrupt_exit_prepare(regs, state); + interrupt_exit_prepare(regs); } struct interrupt_nmi_state { @@ -447,13 +444,11 @@ static __always_inline void ____##func(struct pt_regs *regs); \ \ interrupt_handler void func(struct pt_regs *regs) \ { \ - struct interrupt_state state; \ - \ - interrupt_enter_prepare(regs, &state); \ + interrupt_enter_prepare(regs); \ \ ____##func (regs); \ \ - interrupt_exit_prepare(regs, &state); \ + interrupt_exit_prepare(regs); \ } \ NOKPROBE_SYMBOL(func); \ \ @@ -482,14 +477,13 @@ static __always_inline long ____##func(struct pt_regs *regs); \ \ interrupt_handler long func(struct pt_regs *regs) \ { \ - struct interrupt_state state; \ long ret; \ \ - interrupt_enter_prepare(regs, &state); \ + interrupt_enter_prepare(regs); \ \ ret = ____##func (regs); \ \ - interrupt_exit_prepare(regs, &state); \ + interrupt_exit_prepare(regs); \ \ return ret; \ } \ @@ -518,13 +512,11 @@ static __always_inline void ____##func(struct pt_regs *regs); \ \ interrupt_handler void func(struct pt_regs *regs) \ { \ - struct interrupt_state state; \ - \ - interrupt_async_enter_prepare(regs, &state); \ + interrupt_async_enter_prepare(regs); \ \ ____##func (regs); \ \ - interrupt_async_exit_prepare(regs, &state); \ + interrupt_async_exit_prepare(regs); \ } \ NOKPROBE_SYMBOL(func); \ \ @@ -612,7 +604,7 @@ DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault); DECLARE_INTERRUPT_HANDLER(do_bad_segment_interrupt); /* hash_utils.c */ -DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault); +DECLARE_INTERRUPT_HANDLER(do_hash_fault); /* fault.c */ DECLARE_INTERRUPT_HANDLER(do_page_fault); @@ -644,6 +636,17 @@ static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs) local_irq_enable(); } +long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, + unsigned long r0, struct pt_regs *regs); +notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv); +notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs); +notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs); +#ifdef CONFIG_PPC64 +unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs); +unsigned long interrupt_exit_user_restart(struct pt_regs *regs); +unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs); +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_INTERRUPT_H */ diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index beba4979bff9..fee979d3a1aa 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -359,25 +359,37 @@ static inline void __raw_writeq_be(unsigned long v, volatile void __iomem *addr) */ static inline void __raw_rm_writeb(u8 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stbcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stbcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writew(u16 val, volatile void __iomem *paddr) { - __asm__ __volatile__("sthcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + sthcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writel(u32 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stwcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stwcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stdcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stdcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } @@ -389,7 +401,10 @@ static inline void __raw_rm_writeq_be(u64 val, volatile void __iomem *paddr) static inline u8 __raw_rm_readb(volatile void __iomem *paddr) { u8 ret; - __asm__ __volatile__("lbzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lbzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -397,7 +412,10 @@ static inline u8 __raw_rm_readb(volatile void __iomem *paddr) static inline u16 __raw_rm_readw(volatile void __iomem *paddr) { u16 ret; - __asm__ __volatile__("lhzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lhzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -405,7 +423,10 @@ static inline u16 __raw_rm_readw(volatile void __iomem *paddr) static inline u32 __raw_rm_readl(volatile void __iomem *paddr) { u32 ret; - __asm__ __volatile__("lwzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lwzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -413,7 +434,10 @@ static inline u32 __raw_rm_readl(volatile void __iomem *paddr) static inline u64 __raw_rm_readq(volatile void __iomem *paddr) { u64 ret; - __asm__ __volatile__("ldcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + ldcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 8ebdd23d987c..2aefe14e1442 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -96,6 +96,8 @@ static inline bool kdump_in_progress(void) void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_code_buffer, unsigned long start_address) __noreturn; +void kexec_copy_flush(struct kimage *image); + #ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops kexec_elf64_ops; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d9bf60bf0816..faf301d0dec0 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -26,6 +26,8 @@ #include <asm/hvcall.h> #include <asm/mce.h> +#define __KVM_HAVE_ARCH_VCPU_DEBUGFS + #define KVM_MAX_VCPUS NR_CPUS #define KVM_MAX_VCORES NR_CPUS @@ -295,7 +297,6 @@ struct kvm_arch { bool dawr1_enabled; pgd_t *pgtable; u64 process_table; - struct dentry *debugfs_dir; struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */ #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE @@ -673,7 +674,6 @@ struct kvm_vcpu_arch { u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES]; u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES]; u64 timing_last_exit; - struct dentry *debugfs_exit_timing; #endif #ifdef CONFIG_PPC_BOOK3S @@ -831,8 +831,6 @@ struct kvm_vcpu_arch { struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */ struct kvmhv_tb_accumulator guest_time; /* guest execution */ struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */ - - struct dentry *debugfs_dir; #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a14dbcd1b8ce..c583d0c37f31 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -314,6 +314,8 @@ struct kvmppc_ops { int (*svm_off)(struct kvm *kvm); int (*enable_dawr1)(struct kvm *kvm); bool (*hash_v3_possible)(void); + int (*create_vm_debugfs)(struct kvm *kvm); + int (*create_vcpu_debugfs)(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry); }; extern struct kvmppc_ops *kvmppc_hv_ops; diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h index 4fe018cc207b..6f10de6af6e3 100644 --- a/arch/powerpc/include/asm/livepatch.h +++ b/arch/powerpc/include/asm/livepatch.h @@ -14,21 +14,21 @@ #ifdef CONFIG_LIVEPATCH static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) { - struct pt_regs *regs = ftrace_get_regs(fregs); - - regs_set_return_ip(regs, ip); + ftrace_instruction_pointer_set(fregs, ip); } #define klp_get_ftrace_location klp_get_ftrace_location static inline unsigned long klp_get_ftrace_location(unsigned long faddr) { /* - * Live patch works only with -mprofile-kernel on PPC. In this case, - * the ftrace location is always within the first 16 bytes. + * Live patch works on PPC32 and only with -mprofile-kernel on PPC64. In + * both cases, the ftrace location is always within the first 16 bytes. */ return ftrace_location_range(faddr, faddr + 16); } +#endif /* CONFIG_LIVEPATCH */ +#ifdef CONFIG_LIVEPATCH_64 static inline void klp_init_thread_info(struct task_struct *p) { /* + 1 to account for STACK_END_MAGIC */ @@ -36,6 +36,6 @@ static inline void klp_init_thread_info(struct task_struct *p) } #else static inline void klp_init_thread_info(struct task_struct *p) { } -#endif /* CONFIG_LIVEPATCH */ +#endif #endif /* _ASM_POWERPC_LIVEPATCH_H */ diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index e821037f74f0..358d171ae8e0 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -10,11 +10,6 @@ #include <asm/setup.h> -/* We export this macro for external modules like Alsa to know if - * ppc_md.feature_call is implemented or not - */ -#define CONFIG_PPC_HAS_FEATURE_CALLS - struct pt_regs; struct pci_bus; struct device_node; @@ -99,6 +94,8 @@ struct machdep_calls { /* Called during machine check exception to retrive fixup address. */ bool (*mce_check_early_recovery)(struct pt_regs *regs); + void (*machine_check_log_err)(void); + /* Motherboard/chipset features. This is a kind of general purpose * hook used to control some machine specific features (like reset * lines, chip power control, etc...). @@ -235,21 +232,6 @@ extern struct machdep_calls *machine_id; machine_id == &mach_##name; \ }) -#ifdef CONFIG_PPC_PMAC -/* - * Power macintoshes have either a CUDA, PMU or SMU controlling - * system reset, power, NVRAM, RTC. - */ -typedef enum sys_ctrler_kind { - SYS_CTRLER_UNKNOWN = 0, - SYS_CTRLER_CUDA = 1, - SYS_CTRLER_PMU = 2, - SYS_CTRLER_SMU = 3, -} sys_ctrler_t; -extern sys_ctrler_t sys_ctrler; - -#endif /* CONFIG_PPC_PMAC */ - static inline void log_error(char *buf, unsigned int err_type, int fatal) { if (ppc_md.log_error) diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 331d944280b8..c9f0936bd3c9 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -235,8 +235,21 @@ extern void machine_check_print_event_info(struct machine_check_event *evt, unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr); extern void mce_common_process_ue(struct pt_regs *regs, struct mce_error_info *mce_err); +void mce_irq_work_queue(void); int mce_register_notifier(struct notifier_block *nb); int mce_unregister_notifier(struct notifier_block *nb); + +#ifdef CONFIG_PPC_BOOK3S_64 +void mce_run_irq_context_handlers(void); +#else +static inline void mce_run_irq_context_handlers(void) { }; +#endif /* CONFIG_PPC_BOOK3S_64 */ + +#ifdef CONFIG_PPC_BOOK3S_64 +void set_mce_pending_irq_work(void); +void clear_mce_pending_irq_work(void); +#endif /* CONFIG_PPC_BOOK3S_64 */ + #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); void flush_erat(void); diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index dcc9b338e042..9091e4904a6b 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -338,9 +338,6 @@ static inline int pte_young(pte_t pte) return pte_val(pte) & _PAGE_ACCESSED; } -#define __HAVE_ARCH_PTE_SAME -#define pte_same(A,B) ((pte_val(A) ^ pte_val(B)) == 0) - /* * Note that on Book E processors, the pmd contains the kernel virtual * (lowmem) address of the pte page. The physical address is less useful diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 78888b0c30f6..57083f95e82b 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -282,9 +282,6 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, flush_tlb_page(vma, address); } -#define __HAVE_ARCH_PTE_SAME -#define pte_same(A,B) ((pte_val(A) ^ pte_val(B)) == 0) - #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 295573a82c66..8330968ca346 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -288,6 +288,7 @@ struct paca_struct { #endif #ifdef CONFIG_PPC_BOOK3S_64 struct mce_info *mce_info; + u8 mce_pending_irq_work; #endif /* CONFIG_PPC_BOOK3S_64 */ } ____cacheline_aligned; diff --git a/arch/powerpc/include/asm/pmac_feature.h b/arch/powerpc/include/asm/pmac_feature.h index e08e829261b6..2495866f2e97 100644 --- a/arch/powerpc/include/asm/pmac_feature.h +++ b/arch/powerpc/include/asm/pmac_feature.h @@ -401,5 +401,17 @@ extern u32 __iomem *uninorth_base; */ extern int pmac_get_uninorth_variant(void); +/* + * Power macintoshes have either a CUDA, PMU or SMU controlling + * system reset, power, NVRAM, RTC. + */ +typedef enum sys_ctrler_kind { + SYS_CTRLER_UNKNOWN = 0, + SYS_CTRLER_CUDA = 1, + SYS_CTRLER_PMU = 2, + SYS_CTRLER_SMU = 3, +} sys_ctrler_t; +extern sys_ctrler_t sys_ctrler; + #endif /* __ASM_POWERPC_PMAC_FEATURE_H */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 9675303b724e..82f1f0041c6f 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -262,6 +262,8 @@ #define PPC_INST_MFSPR_PVR 0x7c1f42a6 #define PPC_INST_MFSPR_PVR_MASK 0xfc1ffffe #define PPC_INST_MTMSRD 0x7c000164 +#define PPC_INST_PASTE 0x7c20070d +#define PPC_INST_PASTE_MASK 0xfc2007ff #define PPC_INST_POPCNTB 0x7c0000f4 #define PPC_INST_POPCNTB_MASK 0xfc0007fe #define PPC_INST_RFEBB 0x4c000124 diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index f21e6bde17a1..4dea2d963738 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -203,12 +203,7 @@ GLUE(.,name): #else /* 32-bit */ -#define _ENTRY(n) \ - .globl n; \ -n: - #define _GLOBAL(n) \ - .stabs __stringify(n:F-1),N_FUN,0,0,n;\ .globl n; \ n: @@ -697,12 +692,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) #define evr30 30 #define evr31 31 -/* some stab codes */ -#define N_FUN 36 -#define N_RSYM 64 -#define N_SLINE 68 -#define N_SO 100 - #define RFSCV .long 0x4c0000a4 /* diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 2c8686d9e964..39c25021030f 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -411,6 +411,8 @@ extern int powersave_nap; /* set if nap mode can be used in idle loop */ extern void power7_idle_type(unsigned long type); extern void arch300_idle_type(unsigned long stop_psscr_val, unsigned long stop_psscr_mask); +void pnv_power9_force_smt4_catch(void); +void pnv_power9_force_smt4_release(void); extern int fix_alignment(struct pt_regs *); @@ -427,6 +429,12 @@ extern int fix_alignment(struct pt_regs *); int do_mathemu(struct pt_regs *regs); +/* VMX copying */ +int enter_vmx_usercopy(void); +int exit_vmx_usercopy(void); +int enter_vmx_ops(void); +void *exit_vmx_ops(void *dest); + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PROCESSOR_H */ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 82e5b055fa2a..00531af17ce0 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -274,7 +274,6 @@ extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal); #ifdef CONFIG_PPC_PSERIES extern time64_t last_rtas_event; extern int clobbering_unread_rtas_event(void); -extern int pseries_devicetree_update(s32 scope); extern void post_mobility_fixup(void); int rtas_syscall_dispatch_ibm_suspend_me(u64 handle); #else diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 38f79e42bf3c..8be2c491c733 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -6,6 +6,10 @@ #include <linux/elf.h> #include <linux/uaccess.h> +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +typedef struct func_desc func_desc_t; +#endif + #include <asm-generic/sections.h> extern char __head_end[]; @@ -54,31 +58,6 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end) (unsigned long)_stext < end; } -#ifdef PPC64_ELF_ABI_v1 - -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 - -#undef dereference_function_descriptor -static inline void *dereference_function_descriptor(void *ptr) -{ - struct ppc64_opd_entry *desc = ptr; - void *p; - - if (!get_kernel_nofault(p, (void *)&desc->funcaddr)) - ptr = p; - return ptr; -} - -#undef dereference_kernel_function_descriptor -static inline void *dereference_kernel_function_descriptor(void *ptr) -{ - if (ptr < (void *)__start_opd || ptr >= (void *)__end_opd) - return ptr; - - return dereference_function_descriptor(ptr); -} -#endif /* PPC64_ELF_ABI_v1 */ - #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h index b040094f7920..7ebc807aa8cc 100644 --- a/arch/powerpc/include/asm/set_memory.h +++ b/arch/powerpc/include/asm/set_memory.h @@ -6,6 +6,8 @@ #define SET_MEMORY_RW 1 #define SET_MEMORY_NX 2 #define SET_MEMORY_X 3 +#define SET_MEMORY_NP 4 /* Set memory non present */ +#define SET_MEMORY_P 5 /* Set memory present */ int change_memory_attr(unsigned long addr, int numpages, long action); @@ -29,6 +31,14 @@ static inline int set_memory_x(unsigned long addr, int numpages) return change_memory_attr(addr, numpages, SET_MEMORY_X); } -int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot); +static inline int set_memory_np(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_NP); +} + +static inline int set_memory_p(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_P); +} #endif diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index d0d3dd531c7f..049ca26893e6 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -76,6 +76,13 @@ static inline void setup_spectre_v2(void) {} #endif void __init do_btb_flush_fixups(void); +#ifdef CONFIG_PPC32 +unsigned long __init early_init(unsigned long dt_ptr); +void __init machine_init(u64 dt_ptr); +#endif +void __init early_setup(unsigned long dt_ptr); +void early_setup_secondary(void); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_SETUP_H */ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 007332a4a732..60ab739a5e3b 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -60,6 +60,9 @@ struct smp_ops_t { #endif }; +extern struct task_struct *secondary_current; + +void start_secondary(void *unused); extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); extern int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); extern void smp_send_debugger_break(void); diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h index 8a2d11ba0dae..96ad4510c895 100644 --- a/arch/powerpc/include/asm/spu.h +++ b/arch/powerpc/include/asm/spu.h @@ -249,8 +249,8 @@ void unregister_spu_syscalls(struct spufs_calls *calls); int spu_add_dev_attr(struct device_attribute *attr); void spu_remove_dev_attr(struct device_attribute *attr); -int spu_add_dev_attr_group(struct attribute_group *attrs); -void spu_remove_dev_attr_group(struct attribute_group *attrs); +int spu_add_dev_attr_group(const struct attribute_group *attrs); +void spu_remove_dev_attr_group(const struct attribute_group *attrs); extern void notify_spus_active(void); extern void do_notify_spus_active(void); diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h index 7ee66ae5444d..a2b13e55254f 100644 --- a/arch/powerpc/include/asm/syscalls.h +++ b/arch/powerpc/include/asm/syscalls.h @@ -18,6 +18,10 @@ asmlinkage long sys_mmap2(unsigned long addr, size_t len, unsigned long fd, unsigned long pgoff); asmlinkage long ppc64_personality(unsigned long personality); asmlinkage long sys_rtas(struct rtas_args __user *uargs); +int ppc_select(int n, fd_set __user *inp, fd_set __user *outp, + fd_set __user *exp, struct __kernel_old_timeval __user *tvp); +long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, + u32 len_high, u32 len_low); #ifdef CONFIG_COMPAT unsigned long compat_sys_mmap2(unsigned long addr, size_t len, diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index d6e649b3c70b..125328d1b980 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -51,7 +51,7 @@ struct thread_info { unsigned int cpu; #endif unsigned long local_flags; /* private flags for thread */ -#ifdef CONFIG_LIVEPATCH +#ifdef CONFIG_LIVEPATCH_64 unsigned long *livepatch_sp; #endif #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC32) diff --git a/arch/powerpc/include/asm/types.h b/arch/powerpc/include/asm/types.h index f1630c553efe..84078c28c1a2 100644 --- a/arch/powerpc/include/asm/types.h +++ b/arch/powerpc/include/asm/types.h @@ -13,9 +13,9 @@ #ifdef __powerpc64__ #if defined(_CALL_ELF) && _CALL_ELF == 2 -#define PPC64_ELF_ABI_v2 +#define PPC64_ELF_ABI_v2 1 #else -#define PPC64_ELF_ABI_v1 +#define PPC64_ELF_ABI_v1 1 #endif #endif /* __powerpc64__ */ @@ -23,12 +23,6 @@ typedef __vector128 vector128; -typedef struct { - unsigned long entry; - unsigned long toc; - unsigned long env; -} func_descr_t; - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_TYPES_H */ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 2e83217f52de..9b82b38ff867 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -116,8 +116,11 @@ do { \ */ #define __get_user_atomic_128_aligned(kaddr, uaddr, err) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine altivec\n" \ "1: lvx 0,0,%1 # get user\n" \ " stvx 0,0,%2 # put kernel\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 57573d9c1e09..83afcb6c194b 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -30,6 +30,16 @@ #define VAS_THRESH_FIFO_GT_EIGHTH_FULL 3 /* + * VAS window Linux status bits + */ +#define VAS_WIN_ACTIVE 0x0 /* Used in platform independent */ + /* vas mmap() */ +/* Window is closed in the hypervisor due to lost credit */ +#define VAS_WIN_NO_CRED_CLOSE 0x00000001 +/* Window is closed due to migration */ +#define VAS_WIN_MIGRATE_CLOSE 0x00000002 + +/* * Get/Set bit fields */ #define GET_FIELD(m, v) (((v) & (m)) >> MASK_LSH(m)) @@ -59,6 +69,9 @@ struct vas_user_win_ref { struct pid *pid; /* PID of owner */ struct pid *tgid; /* Thread group ID of owner */ struct mm_struct *mm; /* Linux process mm_struct */ + struct mutex mmap_mutex; /* protects paste address mmap() */ + /* with DLPAR close/open windows */ + struct vm_area_struct *vma; /* Save VMA and used in DLPAR ops */ }; /* @@ -67,6 +80,7 @@ struct vas_user_win_ref { struct vas_window { u32 winid; u32 wcreds_max; /* Window credits */ + u32 status; /* Window status used in OS */ enum vas_cop_type cop; struct vas_user_win_ref task_ref; char *dbgname; diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index 1faff0be1111..f0a4cf01e85c 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -2,74 +2,9 @@ #ifndef _ASM_POWERPC_VDSO_GETTIMEOFDAY_H #define _ASM_POWERPC_VDSO_GETTIMEOFDAY_H -#include <asm/page.h> - -#ifdef __ASSEMBLY__ - -#include <asm/ppc_asm.h> - -/* - * The macros sets two stack frames, one for the caller and one for the callee - * because there are no requirement for the caller to set a stack frame when - * calling VDSO so it may have omitted to set one, especially on PPC64 - */ - -.macro cvdso_call funct - .cfi_startproc - PPC_STLU r1, -PPC_MIN_STKFRM(r1) - mflr r0 - .cfi_register lr, r0 - PPC_STLU r1, -PPC_MIN_STKFRM(r1) - PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) -#ifdef __powerpc64__ - PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) -#endif - get_datapage r5 - addi r5, r5, VDSO_DATA_OFFSET - bl DOTSYM(\funct) - PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) -#ifdef __powerpc64__ - PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) -#endif - cmpwi r3, 0 - mtlr r0 - .cfi_restore lr - addi r1, r1, 2 * PPC_MIN_STKFRM - crclr so - beqlr+ - crset so - neg r3, r3 - blr - .cfi_endproc -.endm - -.macro cvdso_call_time funct - .cfi_startproc - PPC_STLU r1, -PPC_MIN_STKFRM(r1) - mflr r0 - .cfi_register lr, r0 - PPC_STLU r1, -PPC_MIN_STKFRM(r1) - PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) -#ifdef __powerpc64__ - PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) -#endif - get_datapage r4 - addi r4, r4, VDSO_DATA_OFFSET - bl DOTSYM(\funct) - PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) -#ifdef __powerpc64__ - PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) -#endif - crclr so - mtlr r0 - .cfi_restore lr - addi r1, r1, 2 * PPC_MIN_STKFRM - blr - .cfi_endproc -.endm - -#else +#ifndef __ASSEMBLY__ +#include <asm/page.h> #include <asm/vdso/timebase.h> #include <asm/barrier.h> #include <asm/unistd.h> diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h index 860c59291bfc..308857123a08 100644 --- a/arch/powerpc/include/uapi/asm/elf.h +++ b/arch/powerpc/include/uapi/asm/elf.h @@ -289,12 +289,4 @@ typedef elf_fpreg_t elf_vsrreghalf_t32[ELF_NVSRHALFREG]; /* Keep this the last entry. */ #define R_PPC64_NUM 253 -/* There's actually a third entry here, but it's unused */ -struct ppc64_opd_entry -{ - unsigned long funcaddr; - unsigned long r2; -}; - - #endif /* _UAPI_ASM_POWERPC_ELF_H */ diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h index 82488b1e7276..17439925045c 100644 --- a/arch/powerpc/include/uapi/asm/papr_pdsm.h +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -116,6 +116,22 @@ struct nd_papr_pdsm_health { }; }; +/* Flags for injecting specific smart errors */ +#define PDSM_SMART_INJECT_HEALTH_FATAL (1 << 0) +#define PDSM_SMART_INJECT_BAD_SHUTDOWN (1 << 1) + +struct nd_papr_pdsm_smart_inject { + union { + struct { + /* One or more of PDSM_SMART_INJECT_ */ + __u32 flags; + __u8 fatal_enable; + __u8 unsafe_shutdown_enable; + }; + __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; + }; +}; + /* * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel * via 'nd_cmd_pkg.nd_command' member of the ioctl struct @@ -123,12 +139,14 @@ struct nd_papr_pdsm_health { enum papr_pdsm { PAPR_PDSM_MIN = 0x0, PAPR_PDSM_HEALTH, + PAPR_PDSM_SMART_INJECT, PAPR_PDSM_MAX, }; /* Maximal union that can hold all possible payload types */ union nd_pdsm_payload { struct nd_papr_pdsm_health health; + struct nd_papr_pdsm_smart_inject smart_inject; __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; } __packed; diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 4d7829399570..4ddd161aef32 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -194,8 +194,8 @@ targets += prom_init_check clean-files := vmlinux.lds # Force dependency (incbin is bad) -$(obj)/vdso32_wrapper.o : $(obj)/vdso32/vdso32.so.dbg -$(obj)/vdso64_wrapper.o : $(obj)/vdso64/vdso64.so.dbg +$(obj)/vdso32_wrapper.o : $(obj)/vdso/vdso32.so.dbg +$(obj)/vdso64_wrapper.o : $(obj)/vdso/vdso64.so.dbg # for cleaning -subdir- += vdso32 vdso64 +subdir- += vdso diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 7582f3e3a330..eec536aef83a 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -94,7 +94,7 @@ int main(void) OFFSET(TASK_CPU, task_struct, thread_info.cpu); #endif -#ifdef CONFIG_LIVEPATCH +#ifdef CONFIG_LIVEPATCH_64 OFFSET(TI_livepatch_sp, thread_info, livepatch_sp); #endif diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c index ef2ad4945904..03f1135ef64f 100644 --- a/arch/powerpc/kernel/early_32.c +++ b/arch/powerpc/kernel/early_32.c @@ -8,7 +8,6 @@ #include <linux/kernel.h> #include <asm/setup.h> #include <asm/sections.h> -#include <asm/asm-prototypes.h> /* * We're called here very early in the boot. diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 4fdb7c77fda1..65562c4a0a69 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1643,9 +1643,11 @@ int __init setup_fadump(void) if (fw_dump.ops->fadump_process(&fw_dump) < 0) fadump_invalidate_release_mem(); } - /* Initialize the kernel dump memory structure for FAD registration. */ - else if (fw_dump.reserve_dump_area_size) + /* Initialize the kernel dump memory structure and register with f/w */ + else if (fw_dump.reserve_dump_area_size) { fw_dump.ops->fadump_init_mem_struct(&fw_dump); + register_fadump(); + } /* * In case of panic, fadump is triggered via ppc_panic_event() @@ -1657,7 +1659,12 @@ int __init setup_fadump(void) return 1; } -subsys_initcall(setup_fadump); +/* + * Use subsys_initcall_sync() here because there is dependency with + * crash_save_vmcoreinfo_init(), which mush run first to ensure vmcoreinfo initialization + * is done before regisering with f/w. + */ +subsys_initcall_sync(setup_fadump); #else /* !CONFIG_PRESERVE_FA_DUMP */ /* Scan the Firmware Assisted dump configuration details. */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index b6c6d1de5fd5..088f500896c7 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -53,8 +53,8 @@ * This is all going to change RSN when we add bi_recs....... -- Dan */ __HEAD -_ENTRY(_stext); -_ENTRY(_start); +_GLOBAL(_stext); +_GLOBAL(_start); mr r31,r3 /* save device tree ptr */ @@ -82,19 +82,19 @@ turn_on_mmu: */ . = 0xc0 crit_save: -_ENTRY(crit_r10) +_GLOBAL(crit_r10) .space 4 -_ENTRY(crit_r11) +_GLOBAL(crit_r11) .space 4 -_ENTRY(crit_srr0) +_GLOBAL(crit_srr0) .space 4 -_ENTRY(crit_srr1) +_GLOBAL(crit_srr1) .space 4 -_ENTRY(crit_r1) +_GLOBAL(crit_r1) .space 4 -_ENTRY(crit_dear) +_GLOBAL(crit_dear) .space 4 -_ENTRY(crit_esr) +_GLOBAL(crit_esr) .space 4 /* diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index b73a56466903..f15cb9fdb692 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -52,8 +52,8 @@ * */ __HEAD -_ENTRY(_stext); -_ENTRY(_start); +_GLOBAL(_stext); +_GLOBAL(_start); /* * Reserve a word at a fixed location to store the address * of abatron_pteptrs diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 0d073b9fd52c..0b05f2be66b9 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -53,8 +53,8 @@ #define PAGE_SHIFT_8M 23 __HEAD -_ENTRY(_stext); -_ENTRY(_start); +_GLOBAL(_stext); +_GLOBAL(_start); /* MPC8xx * This port was done on an MBX board with an 860. Right now I only diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index b876ef8c70a7..6c739beb938c 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -50,16 +50,13 @@ mtspr SPRN_DBAT##n##L,RB __HEAD - .stabs "arch/powerpc/kernel/",N_SO,0,0,0f - .stabs "head_book3s_32.S",N_SO,0,0,0f -0: -_ENTRY(_stext); +_GLOBAL(_stext); /* * _start is defined this way because the XCOFF loader in the OpenFirmware * on the powermac expects the entry point to be a procedure descriptor. */ -_ENTRY(_start); +_GLOBAL(_start); /* * These are here for legacy reasons, the kernel used to * need to look like a coff function entry for the pmac @@ -504,14 +501,12 @@ DataLoadTLBMiss: lwz r0,0(r2) /* get linux-style pte */ andc. r1,r1,r0 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ - /* - * NOTE! We are assuming this is not an SMP system, otherwise - * we would need to update the pte atomically with lwarx/stwcx. - */ /* Convert linux-style PTE to low word of PPC-style PTE */ rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */ rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r1,r0,32-3,24,24 /* _PAGE_RW -> _PAGE_DIRTY */ rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ + xori r1,r1,_PAGE_DIRTY /* clear dirty when not rw */ ori r1,r1,0xe04 /* clear out reserved bits */ andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */ BEGIN_FTR_SECTION @@ -586,10 +581,6 @@ DataStoreTLBMiss: lwz r0,0(r2) /* get linux-style pte */ andc. r1,r1,r0 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ - /* - * NOTE! We are assuming this is not an SMP system, otherwise - * we would need to update the pte atomically with lwarx/stwcx. - */ /* Convert linux-style PTE to low word of PPC-style PTE */ rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ li r1,0xe06 /* clear out reserved bits & PP msb */ @@ -784,7 +775,7 @@ relocate_kernel: * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5. */ -_ENTRY(copy_and_flush) +_GLOBAL(copy_and_flush) addi r5,r5,-4 addi r6,r6,-4 4: li r0,L1_CACHE_BYTES/4 @@ -1082,7 +1073,7 @@ BEGIN_MMU_FTR_SECTION END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr -_ENTRY(update_bats) +_GLOBAL(update_bats) lis r4, 1f@h ori r4, r4, 1f@l tophys(r4, r4) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index ac2b4dcf5fd3..f0db4f52bc00 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -54,8 +54,8 @@ * */ __HEAD -_ENTRY(_stext); -_ENTRY(_start); +_GLOBAL(_stext); +_GLOBAL(_start); /* * Reserve a word at a fixed location to store the address * of abatron_pteptrs @@ -154,7 +154,7 @@ _ENTRY(_start); * if needed */ -_ENTRY(__early_start) +_GLOBAL(__early_start) LOAD_REG_ADDR_PIC(r20, kernstart_virt_addr) lwz r20,0(r20) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 7cd6ce3ec423..784ea3289c84 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -5,7 +5,6 @@ #include <linux/compat.h> #include <linux/sched/debug.h> /* for show_regs */ -#include <asm/asm-prototypes.h> #include <asm/kup.h> #include <asm/cputime.h> #include <asm/hw_irq.h> diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 2cf31a97126c..752fb182eacb 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -64,7 +64,6 @@ #include <asm/udbg.h> #include <asm/smp.h> #include <asm/livepatch.h> -#include <asm/asm-prototypes.h> #include <asm/hw_irq.h> #include <asm/softirq_stack.h> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 2503dd4713b9..18173199b79d 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -24,23 +24,12 @@ #include <asm/machdep.h> #include <asm/mce.h> #include <asm/nmi.h> -#include <asm/asm-prototypes.h> #include "setup.h" -static void machine_check_process_queued_event(struct irq_work *work); -static void machine_check_ue_irq_work(struct irq_work *work); static void machine_check_ue_event(struct machine_check_event *evt); static void machine_process_ue_event(struct work_struct *work); -static struct irq_work mce_event_process_work = { - .func = machine_check_process_queued_event, -}; - -static struct irq_work mce_ue_event_irq_work = { - .func = machine_check_ue_irq_work, -}; - static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); static BLOCKING_NOTIFIER_HEAD(mce_notifier_list); @@ -89,6 +78,13 @@ static void mce_set_error_info(struct machine_check_event *mce, } } +void mce_irq_work_queue(void) +{ + /* Raise decrementer interrupt */ + arch_irq_work_raise(); + set_mce_pending_irq_work(); +} + /* * Decode and save high level MCE information into per cpu buffer which * is an array of machine_check_event structure. @@ -217,7 +213,7 @@ void release_mce_event(void) get_mce_event(NULL, true); } -static void machine_check_ue_irq_work(struct irq_work *work) +static void machine_check_ue_work(void) { schedule_work(&mce_ue_event_work); } @@ -239,7 +235,7 @@ static void machine_check_ue_event(struct machine_check_event *evt) evt, sizeof(*evt)); /* Queue work to process this event later. */ - irq_work_queue(&mce_ue_event_irq_work); + mce_irq_work_queue(); } /* @@ -249,7 +245,6 @@ void machine_check_queue_event(void) { int index; struct machine_check_event evt; - unsigned long msr; if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; @@ -263,20 +258,7 @@ void machine_check_queue_event(void) memcpy(&local_paca->mce_info->mce_event_queue[index], &evt, sizeof(evt)); - /* - * Queue irq work to process this event later. Before - * queuing the work enable translation for non radix LPAR, - * as irq_work_queue may try to access memory outside RMO - * region. - */ - if (!radix_enabled() && firmware_has_feature(FW_FEATURE_LPAR)) { - msr = mfmsr(); - mtmsr(msr | MSR_IR | MSR_DR); - irq_work_queue(&mce_event_process_work); - mtmsr(msr); - } else { - irq_work_queue(&mce_event_process_work); - } + mce_irq_work_queue(); } void mce_common_process_ue(struct pt_regs *regs, @@ -338,7 +320,7 @@ static void machine_process_ue_event(struct work_struct *work) * process pending MCE event from the mce event queue. This function will be * called during syscall exit. */ -static void machine_check_process_queued_event(struct irq_work *work) +static void machine_check_process_queued_event(void) { int index; struct machine_check_event *evt; @@ -363,6 +345,27 @@ static void machine_check_process_queued_event(struct irq_work *work) } } +void set_mce_pending_irq_work(void) +{ + local_paca->mce_pending_irq_work = 1; +} + +void clear_mce_pending_irq_work(void) +{ + local_paca->mce_pending_irq_work = 0; +} + +void mce_run_irq_context_handlers(void) +{ + if (unlikely(local_paca->mce_pending_irq_work)) { + if (ppc_md.machine_check_log_err) + ppc_md.machine_check_log_err(); + machine_check_process_queued_event(); + machine_check_ue_work(); + clear_mce_pending_irq_work(); + } +} + void machine_check_print_event_info(struct machine_check_event *evt, bool user_mode, bool in_guest) { @@ -401,14 +404,14 @@ void machine_check_print_event_info(struct machine_check_event *evt, static const char *mc_ra_types[] = { "Indeterminate", "Instruction fetch (bad)", - "Instruction fetch (foreign)", + "Instruction fetch (foreign/control memory)", "Page table walk ifetch (bad)", - "Page table walk ifetch (foreign)", + "Page table walk ifetch (foreign/control memory)", "Load (bad)", "Store (bad)", "Page table walk Load/Store (bad)", - "Page table walk Load/Store (foreign)", - "Load/Store (foreign)", + "Page table walk Load/Store (foreign/control memory)", + "Load/Store (foreign/control memory)", }; static const char *mc_link_types[] = { "Indeterminate", diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index a491ad481d85..a0432ef46967 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -18,6 +18,7 @@ #include <linux/bug.h> #include <linux/sort.h> #include <asm/setup.h> +#include <asm/code-patching.h> /* Count how many different relocations (different symbol, different addend) */ @@ -174,15 +175,25 @@ static uint32_t do_plt_call(void *location, entry++; } - entry->jump[0] = PPC_RAW_LIS(_R12, PPC_HA(val)); - entry->jump[1] = PPC_RAW_ADDI(_R12, _R12, PPC_LO(val)); - entry->jump[2] = PPC_RAW_MTCTR(_R12); - entry->jump[3] = PPC_RAW_BCTR(); + if (patch_instruction(&entry->jump[0], ppc_inst(PPC_RAW_LIS(_R12, PPC_HA(val))))) + return 0; + if (patch_instruction(&entry->jump[1], ppc_inst(PPC_RAW_ADDI(_R12, _R12, PPC_LO(val))))) + return 0; + if (patch_instruction(&entry->jump[2], ppc_inst(PPC_RAW_MTCTR(_R12)))) + return 0; + if (patch_instruction(&entry->jump[3], ppc_inst(PPC_RAW_BCTR()))) + return 0; pr_debug("Initialized plt for 0x%x at %p\n", val, entry); return (uint32_t)entry; } +static int patch_location_16(uint32_t *loc, u16 value) +{ + loc = PTR_ALIGN_DOWN(loc, sizeof(u32)); + return patch_instruction(loc, ppc_inst((*loc & 0xffff0000) | value)); +} + int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, @@ -216,37 +227,42 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, case R_PPC_ADDR16_LO: /* Low half of the symbol */ - *(uint16_t *)location = value; + if (patch_location_16(location, PPC_LO(value))) + return -EFAULT; break; case R_PPC_ADDR16_HI: /* Higher half of the symbol */ - *(uint16_t *)location = (value >> 16); + if (patch_location_16(location, PPC_HI(value))) + return -EFAULT; break; case R_PPC_ADDR16_HA: - /* Sign-adjusted lower 16 bits: PPC ELF ABI says: - (((x >> 16) + ((x & 0x8000) ? 1 : 0))) & 0xFFFF. - This is the same, only sane. - */ - *(uint16_t *)location = (value + 0x8000) >> 16; + if (patch_location_16(location, PPC_HA(value))) + return -EFAULT; break; case R_PPC_REL24: if ((int)(value - (uint32_t)location) < -0x02000000 - || (int)(value - (uint32_t)location) >= 0x02000000) + || (int)(value - (uint32_t)location) >= 0x02000000) { value = do_plt_call(location, value, sechdrs, module); + if (!value) + return -EFAULT; + } /* Only replace bits 2 through 26 */ pr_debug("REL24 value = %08X. location = %08X\n", value, (uint32_t)location); pr_debug("Location before: %08X.\n", *(uint32_t *)location); - *(uint32_t *)location - = (*(uint32_t *)location & ~0x03fffffc) + value = (*(uint32_t *)location & ~0x03fffffc) | ((value - (uint32_t)location) & 0x03fffffc); + + if (patch_instruction(location, ppc_inst(value))) + return -EFAULT; + pr_debug("Location after: %08X.\n", *(uint32_t *)location); pr_debug("ie. jump to %08X+%08X = %08X\n", diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 5d77d3f5fbb5..794720530442 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -14,6 +14,7 @@ #include <linux/ftrace.h> #include <linux/bug.h> #include <linux/uaccess.h> +#include <linux/kernel.h> #include <asm/module.h> #include <asm/firmware.h> #include <asm/code-patching.h> @@ -32,20 +33,13 @@ #ifdef PPC64_ELF_ABI_v2 -/* An address is simply the address of the function. */ -typedef unsigned long func_desc_t; - static func_desc_t func_desc(unsigned long addr) { - return addr; -} -static unsigned long func_addr(unsigned long addr) -{ - return addr; -} -static unsigned long stub_func_addr(func_desc_t func) -{ - return func; + func_desc_t desc = { + .addr = addr, + }; + + return desc; } /* PowerPC64 specific values for the Elf64_Sym st_other field. */ @@ -63,20 +57,9 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym) } #else -/* An address is address of the OPD entry, which contains address of fn. */ -typedef struct ppc64_opd_entry func_desc_t; - static func_desc_t func_desc(unsigned long addr) { - return *(struct ppc64_opd_entry *)addr; -} -static unsigned long func_addr(unsigned long addr) -{ - return func_desc(addr).funcaddr; -} -static unsigned long stub_func_addr(func_desc_t func) -{ - return func.funcaddr; + return *(struct func_desc *)addr; } static unsigned int local_entry_offset(const Elf64_Sym *sym) { @@ -93,6 +76,16 @@ void *dereference_module_function_descriptor(struct module *mod, void *ptr) } #endif +static unsigned long func_addr(unsigned long addr) +{ + return func_desc(addr).addr; +} + +static unsigned long stub_func_addr(func_desc_t func) +{ + return func.addr; +} + #define STUB_MAGIC 0x73747562 /* stub */ /* Like PPC32, we need little trampolines to do > 24-bit jumps (into @@ -187,7 +180,7 @@ static int relacmp(const void *_x, const void *_y) static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, const Elf64_Shdr *sechdrs) { - /* One extra reloc so it's always 0-funcaddr terminated */ + /* One extra reloc so it's always 0-addr terminated */ unsigned long relocs = 1; unsigned i; @@ -277,6 +270,12 @@ static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs, return NULL; } +bool module_init_section(const char *name) +{ + /* We don't handle .init for the moment: always return false. */ + return false; +} + int module_frob_arch_sections(Elf64_Ehdr *hdr, Elf64_Shdr *sechdrs, char *secstrings, @@ -286,7 +285,6 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, /* Find .toc and .stubs sections, symtab and strtab */ for (i = 1; i < hdr->e_shnum; i++) { - char *p; if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0) me->arch.stubs_section = i; else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) { @@ -298,10 +296,6 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, dedotify_versions((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size); - /* We don't handle .init for the moment: rename to _init */ - while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init"))) - p[0] = '_'; - if (sechdrs[i].sh_type == SHT_SYMTAB) dedotify((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size / sizeof(Elf64_Sym), @@ -428,7 +422,7 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, if (is_mprofile_ftrace_call(name)) return create_ftrace_stub(entry, addr, me); - for (i = 0; i < sizeof(ppc64_stub_insns) / sizeof(u32); i++) { + for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) { if (patch_instruction(&entry->jump[i], ppc_inst(ppc64_stub_insns[i]))) return 0; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 3d30d40a0e9c..86c4f009563d 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -352,6 +352,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, be32_to_cpu(intserv[found_thread])); boot_cpuid = found; + // Pass the boot CPU's hard CPU id back to our caller + *((u32 *)data) = be32_to_cpu(intserv[found_thread]); + /* * PAPR defines "logical" PVR values for cpus that * meet various levels of the architecture: @@ -388,9 +391,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; else if (!dt_cpu_ftrs_in_use()) cur_cpu_spec->cpu_features |= CPU_FTR_SMT; - allocate_paca(boot_cpuid); #endif - set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread])); return 0; } @@ -714,6 +715,7 @@ static inline void save_fscr_to_task(void) {} void __init early_init_devtree(void *params) { + u32 boot_cpu_hwid; phys_addr_t limit; DBG(" -> early_init_devtree(%px)\n", params); @@ -790,8 +792,6 @@ void __init early_init_devtree(void *params) * FIXME .. and the initrd too? */ move_device_tree(); - allocate_paca_ptrs(); - DBG("Scanning CPUs ...\n"); dt_cpu_ftrs_scan(); @@ -799,7 +799,7 @@ void __init early_init_devtree(void *params) /* Retrieve CPU related informations from the flat tree * (altivec support, boot CPU ID, ...) */ - of_scan_flat_dt(early_init_dt_scan_cpus, NULL); + of_scan_flat_dt(early_init_dt_scan_cpus, &boot_cpu_hwid); if (boot_cpuid < 0) { printk("Failed to identify boot CPU !\n"); BUG(); @@ -816,6 +816,11 @@ void __init early_init_devtree(void *params) mmu_early_init_devtree(); + // NB. paca is not installed until later in early_setup() + allocate_paca_ptrs(); + allocate_paca(boot_cpuid); + set_hard_smp_processor_id(boot_cpuid, boot_cpu_hwid); + #ifdef CONFIG_PPC_POWERNV /* Scan and build the list of machine check recoverable ranges */ of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index b8be1d6668b5..f15bc78caf71 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -841,7 +841,7 @@ static const struct user_regset_view user_ppc_compat_view = { const struct user_regset_view *task_user_regset_view(struct task_struct *task) { - if (IS_ENABLED(CONFIG_PPC64) && test_tsk_thread_flag(task, TIF_32BIT)) + if (IS_ENABLED(CONFIG_COMPAT) && is_tsk_32bit_task(task)) return &user_ppc_compat_view; return &user_ppc_native_view; } diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index c43f77e2ac31..55742ef1f991 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -22,7 +22,6 @@ #include <linux/syscalls.h> #include <asm/switch_to.h> -#include <asm/asm-prototypes.h> #include <asm/debug.h> #define CREATE_TRACE_POINTS @@ -445,4 +444,10 @@ void __init pt_regs_check(void) * real registers. */ BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long)); + +#ifdef PPC64_ELF_ABI_v1 + BUILD_BUG_ON(!IS_ENABLED(CONFIG_HAVE_FUNCTION_DESCRIPTORS)); +#else + BUILD_BUG_ON(IS_ENABLED(CONFIG_HAVE_FUNCTION_DESCRIPTORS)); +#endif } diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S index 02d4719bf43a..232e4549defe 100644 --- a/arch/powerpc/kernel/reloc_64.S +++ b/arch/powerpc/kernel/reloc_64.S @@ -8,8 +8,10 @@ #include <asm/ppc_asm.h> RELA = 7 -RELACOUNT = 0x6ffffff9 +RELASZ = 8 +RELAENT = 9 R_PPC64_RELATIVE = 22 +R_PPC64_UADDR64 = 43 /* * r3 = desired final address of kernel @@ -25,29 +27,38 @@ _GLOBAL(relocate) add r9,r9,r12 /* r9 has runtime addr of .rela.dyn section */ ld r10,(p_st - 0b)(r12) add r10,r10,r12 /* r10 has runtime addr of _stext */ + ld r13,(p_sym - 0b)(r12) + add r13,r13,r12 /* r13 has runtime addr of .dynsym */ /* - * Scan the dynamic section for the RELA and RELACOUNT entries. + * Scan the dynamic section for the RELA, RELASZ and RELAENT entries. */ li r7,0 li r8,0 -1: ld r6,0(r11) /* get tag */ +.Ltags: + ld r6,0(r11) /* get tag */ cmpdi r6,0 - beq 4f /* end of list */ + beq .Lend_of_list /* end of list */ cmpdi r6,RELA bne 2f ld r7,8(r11) /* get RELA pointer in r7 */ - b 3f -2: addis r6,r6,(-RELACOUNT)@ha - cmpdi r6,RELACOUNT@l + b 4f +2: cmpdi r6,RELASZ bne 3f - ld r8,8(r11) /* get RELACOUNT value in r8 */ -3: addi r11,r11,16 - b 1b -4: cmpdi r7,0 /* check we have both RELA and RELACOUNT */ + ld r8,8(r11) /* get RELASZ value in r8 */ + b 4f +3: cmpdi r6,RELAENT + bne 4f + ld r12,8(r11) /* get RELAENT value in r12 */ +4: addi r11,r11,16 + b .Ltags +.Lend_of_list: + cmpdi r7,0 /* check we have RELA, RELASZ, RELAENT */ cmpdi cr1,r8,0 - beq 6f - beq cr1,6f + beq .Lout + beq cr1,.Lout + cmpdi r12,0 + beq .Lout /* * Work out linktime address of _stext and hence the @@ -62,23 +73,39 @@ _GLOBAL(relocate) /* * Run through the list of relocations and process the - * R_PPC64_RELATIVE ones. + * R_PPC64_RELATIVE and R_PPC64_UADDR64 ones. */ + divd r8,r8,r12 /* RELASZ / RELAENT */ mtctr r8 -5: ld r0,8(9) /* ELF64_R_TYPE(reloc->r_info) */ +.Lrels: ld r0,8(r9) /* ELF64_R_TYPE(reloc->r_info) */ cmpdi r0,R_PPC64_RELATIVE - bne 6f + bne .Luaddr64 ld r6,0(r9) /* reloc->r_offset */ ld r0,16(r9) /* reloc->r_addend */ + b .Lstore +.Luaddr64: + srdi r14,r0,32 /* ELF64_R_SYM(reloc->r_info) */ + clrldi r0,r0,32 + cmpdi r0,R_PPC64_UADDR64 + bne .Lnext + ld r6,0(r9) + ld r0,16(r9) + mulli r14,r14,24 /* 24 == sizeof(elf64_sym) */ + add r14,r14,r13 /* elf64_sym[ELF64_R_SYM] */ + ld r14,8(r14) + add r0,r0,r14 +.Lstore: add r0,r0,r3 stdx r0,r7,r6 - addi r9,r9,24 - bdnz 5b - -6: blr +.Lnext: + add r9,r9,r12 + bdnz .Lrels +.Lout: + blr .balign 8 p_dyn: .8byte __dynamic_start - 0b p_rela: .8byte __rela_dyn_start - 0b +p_sym: .8byte __dynamic_symtab - 0b p_st: .8byte _stext - 0b diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 733e6ef36758..1f42aabbbab3 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1313,6 +1313,12 @@ int __init early_init_dt_scan_rtas(unsigned long node, entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL); sizep = of_get_flat_dt_prop(node, "rtas-size", NULL); +#ifdef CONFIG_PPC64 + /* need this feature to decide the crashkernel offset */ + if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL)) + powerpc_firmware_features |= FW_FEATURE_LPAR; +#endif + if (basep && entryp && sizep) { rtas.base = *basep; rtas.entry = *entryp; diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index e159d4093d98..d96fd14bd7c9 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -747,14 +747,29 @@ static int count_cache_flush_get(void *data, u64 *val) return 0; } +static int link_stack_flush_get(void *data, u64 *val) +{ + if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) + *val = 0; + else + *val = 1; + + return 0; +} + DEFINE_DEBUGFS_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get, count_cache_flush_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_link_stack_flush, link_stack_flush_get, + count_cache_flush_set, "%llu\n"); static __init int count_cache_flush_debugfs_init(void) { debugfs_create_file_unsafe("count_cache_flush", 0600, arch_debugfs_dir, NULL, &fops_count_cache_flush); + debugfs_create_file_unsafe("link_stack_flush", 0600, + arch_debugfs_dir, NULL, + &fops_link_stack_flush); return 0; } device_initcall(count_cache_flush_debugfs_init); diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index a0a78aba2083..1ee4640a2641 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -26,15 +26,18 @@ static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr, const char *format; node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); - if (!of_device_is_available(node)) - return -ENODEV; + if (!of_device_is_available(node)) { + rc = -ENODEV; + goto out; + } rc = of_property_read_string(node, "format", &format); if (rc) - return rc; + goto out; rc = sprintf(buf, "%s\n", format); +out: of_node_put(node); return rc; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index f8da937df918..518ae5aa9410 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -456,8 +456,8 @@ void __init smp_setup_cpu_maps(void) intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); if (intserv) { - DBG(" ibm,ppc-interrupt-server#s -> %d threads\n", - nthreads); + DBG(" ibm,ppc-interrupt-server#s -> %lu threads\n", + (len / sizeof(int))); } else { DBG(" no ibm,ppc-interrupt-server#s -> 1 thread\n"); intserv = of_get_property(dn, "reg", &len); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index be8577ac9397..e547066a06aa 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -67,7 +67,6 @@ #include <asm/kup.h> #include <asm/early_ioremap.h> #include <asm/pgalloc.h> -#include <asm/asm-prototypes.h> #include "setup.h" diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index d1e1fc0acbea..73d483b07ff3 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -936,11 +936,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, * descriptor is the entry address of signal and the second * entry is the TOC value we need to use. */ - func_descr_t __user *funct_desc_ptr = - (func_descr_t __user *) ksig->ka.sa.sa_handler; + struct func_desc __user *ptr = + (struct func_desc __user *)ksig->ka.sa.sa_handler; - err |= get_user(regs->ctr, &funct_desc_ptr->entry); - err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); + err |= get_user(regs->ctr, &ptr->addr); + err |= get_user(regs->gpr[2], &ptr->toc); } /* enter the signal handler in native-endian mode */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index b7fd6a72aa76..de0f6f09a5dd 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -57,7 +57,6 @@ #include <asm/vdso.h> #include <asm/debug.h> #include <asm/kexec.h> -#include <asm/asm-prototypes.h> #include <asm/cpu_has_feature.h> #include <asm/ftrace.h> #include <asm/kup.h> @@ -716,7 +715,7 @@ void smp_send_stop(void) } #endif /* CONFIG_NMI_IPI */ -struct task_struct *current_set[NR_CPUS]; +static struct task_struct *current_set[NR_CPUS]; static void smp_store_cpu_info(int id) { diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 825931e400df..c4f5b4ce926f 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -35,7 +35,6 @@ #include <asm/syscalls.h> #include <asm/time.h> #include <asm/unistd.h> -#include <asm/asm-prototypes.h> static inline long do_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 8e83d19fe8fa..828d0f4106d2 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -29,7 +29,6 @@ #include <asm/cache.h> #include <asm/8xx_immap.h> #include <asm/machdep.h> -#include <asm/asm-prototypes.h> #include "setup.h" diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index cd0b8b71ecdd..f5cbfe5efd25 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -69,7 +69,7 @@ #include <asm/smp.h> #include <asm/vdso_datapage.h> #include <asm/firmware.h> -#include <asm/asm-prototypes.h> +#include <asm/mce.h> /* powerpc clocksource/clockevent code */ @@ -107,7 +107,12 @@ struct clock_event_device decrementer_clockevent = { }; EXPORT_SYMBOL(decrementer_clockevent); -DEFINE_PER_CPU(u64, decrementers_next_tb); +/* + * This always puts next_tb beyond now, so the clock event will never fire + * with the usual comparison, no need for a separate test for stopped. + */ +#define DEC_CLOCKEVENT_STOPPED ~0ULL +DEFINE_PER_CPU(u64, decrementers_next_tb) = DEC_CLOCKEVENT_STOPPED; EXPORT_SYMBOL_GPL(decrementers_next_tb); static DEFINE_PER_CPU(struct clock_event_device, decrementers); @@ -582,8 +587,9 @@ void timer_rearm_host_dec(u64 now) local_paca->irq_happened |= PACA_IRQ_DEC; } else { now = *next_tb - now; - if (now <= decrementer_max) - set_dec_or_work(now); + if (now > decrementer_max) + now = decrementer_max; + set_dec_or_work(now); } } EXPORT_SYMBOL_GPL(timer_rearm_host_dec); @@ -638,14 +644,13 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) if (test_irq_work_pending()) { clear_irq_work_pending(); + mce_run_irq_context_handlers(); irq_work_run(); } now = get_tb(); if (now >= *next_tb) { - *next_tb = ~(u64)0; - if (evt->event_handler) - evt->event_handler(evt); + evt->event_handler(evt); __this_cpu_inc(irq_stat.timer_irqs_event); } else { now = *next_tb - now; @@ -664,9 +669,6 @@ EXPORT_SYMBOL(timer_interrupt); #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST void timer_broadcast_interrupt(void) { - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); - - *next_tb = ~(u64)0; tick_receive_broadcast(); __this_cpu_inc(irq_stat.broadcast_irqs_event); } @@ -892,7 +894,9 @@ static int decrementer_set_next_event(unsigned long evt, static int decrementer_shutdown(struct clock_event_device *dev) { - decrementer_set_next_event(decrementer_max, dev); + __this_cpu_write(decrementers_next_tb, DEC_CLOCKEVENT_STOPPED); + set_dec_or_work(decrementer_max); + return 0; } diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940b..5a0f023a26e9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1 diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile index 858503775c58..542aa7a8b2b4 100644 --- a/arch/powerpc/kernel/trace/Makefile +++ b/arch/powerpc/kernel/trace/Makefile @@ -8,13 +8,13 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) endif -obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_32.o -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64.o +obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o ifdef CONFIG_MPROFILE_KERNEL -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_mprofile.o +obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o else obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o endif +obj-$(CONFIG_FUNCTION_TRACER) += ftrace_low.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 80b6285769f2..4ee04aacf9f1 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -22,7 +22,6 @@ #include <linux/init.h> #include <linux/list.h> -#include <asm/asm-prototypes.h> #include <asm/cacheflush.h> #include <asm/code-patching.h> #include <asm/ftrace.h> @@ -910,30 +909,30 @@ int __init ftrace_dyn_arch_init(void) extern void ftrace_graph_call(void); extern void ftrace_graph_stub(void); -int ftrace_enable_ftrace_graph_caller(void) +static int ftrace_modify_ftrace_graph_caller(bool enable) { unsigned long ip = (unsigned long)(&ftrace_graph_call); unsigned long addr = (unsigned long)(&ftrace_graph_caller); unsigned long stub = (unsigned long)(&ftrace_graph_stub); ppc_inst_t old, new; - old = ftrace_call_replace(ip, stub, 0); - new = ftrace_call_replace(ip, addr, 0); + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS)) + return 0; + + old = ftrace_call_replace(ip, enable ? stub : addr, 0); + new = ftrace_call_replace(ip, enable ? addr : stub, 0); return ftrace_modify_code(ip, old, new); } -int ftrace_disable_ftrace_graph_caller(void) +int ftrace_enable_ftrace_graph_caller(void) { - unsigned long ip = (unsigned long)(&ftrace_graph_call); - unsigned long addr = (unsigned long)(&ftrace_graph_caller); - unsigned long stub = (unsigned long)(&ftrace_graph_stub); - ppc_inst_t old, new; - - old = ftrace_call_replace(ip, addr, 0); - new = ftrace_call_replace(ip, stub, 0); + return ftrace_modify_ftrace_graph_caller(true); +} - return ftrace_modify_code(ip, old, new); +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_ftrace_graph_caller(false); } /* @@ -944,6 +943,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp) { unsigned long return_hooker; + int bit; if (unlikely(ftrace_graph_is_dead())) goto out; @@ -951,13 +951,27 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, if (unlikely(atomic_read(¤t->tracing_graph_pause))) goto out; + bit = ftrace_test_recursion_trylock(ip, parent); + if (bit < 0) + goto out; + return_hooker = ppc_function_entry(return_to_handler); if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp)) parent = return_hooker; + + ftrace_test_recursion_unlock(bit); out: return parent; } + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) +{ + fregs->regs.link = prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]); +} +#endif #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ #ifdef PPC64_ELF_ABI_v1 diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S deleted file mode 100644 index 0a02c0cb12d9..000000000000 --- a/arch/powerpc/kernel/trace/ftrace_32.S +++ /dev/null @@ -1,187 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Split from entry_32.S - */ - -#include <linux/magic.h> -#include <asm/reg.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/ftrace.h> -#include <asm/export.h> -#include <asm/ptrace.h> - -_GLOBAL(mcount) -_GLOBAL(_mcount) - /* - * It is required that _mcount on PPC32 must preserve the - * link register. But we have r12 to play with. We use r12 - * to push the return address back to the caller of mcount - * into the ctr register, restore the link register and - * then jump back using the ctr register. - */ - mflr r12 - mtctr r12 - mtlr r0 - bctr -EXPORT_SYMBOL(_mcount) - -_GLOBAL(ftrace_caller) - MCOUNT_SAVE_FRAME - /* r3 ends up with link register */ - subi r3, r3, MCOUNT_INSN_SIZE - lis r5,function_trace_op@ha - lwz r5,function_trace_op@l(r5) - li r6, 0 -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop - MCOUNT_RESTORE_FRAME -ftrace_caller_common: -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: - b ftrace_graph_stub -_GLOBAL(ftrace_graph_stub) -#endif - /* old link register ends up in ctr reg */ - bctr - - -_GLOBAL(ftrace_stub) - blr - -_GLOBAL(ftrace_regs_caller) - /* Save the original return address in A's stack frame */ - stw r0,LRSAVE(r1) - - /* Create our stack frame + pt_regs */ - stwu r1,-INT_FRAME_SIZE(r1) - - /* Save all gprs to pt_regs */ - stw r0, GPR0(r1) - stmw r2, GPR2(r1) - - /* Save previous stack pointer (r1) */ - addi r8, r1, INT_FRAME_SIZE - stw r8, GPR1(r1) - - /* Load special regs for save below */ - mfmsr r8 - mfctr r9 - mfxer r10 - mfcr r11 - - /* Get the _mcount() call site out of LR */ - mflr r7 - /* Save it as pt_regs->nip */ - stw r7, _NIP(r1) - /* Save the read LR in pt_regs->link */ - stw r0, _LINK(r1) - - lis r3,function_trace_op@ha - lwz r5,function_trace_op@l(r3) - - /* Calculate ip from nip-4 into r3 for call below */ - subi r3, r7, MCOUNT_INSN_SIZE - - /* Put the original return address in r4 as parent_ip */ - mr r4, r0 - - /* Save special regs */ - stw r8, _MSR(r1) - stw r9, _CTR(r1) - stw r10, _XER(r1) - stw r11, _CCR(r1) - - /* Load &pt_regs in r6 for call below */ - addi r6, r1, STACK_FRAME_OVERHEAD - - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_regs_call -ftrace_regs_call: - bl ftrace_stub - nop - - /* Load ctr with the possibly modified NIP */ - lwz r3, _NIP(r1) - mtctr r3 - - /* Restore gprs */ - lmw r2, GPR2(r1) - - /* Restore possibly modified LR */ - lwz r0, _LINK(r1) - mtlr r0 - - /* Pop our stack frame */ - addi r1, r1, INT_FRAME_SIZE - - b ftrace_caller_common - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -_GLOBAL(ftrace_graph_caller) - stwu r1,-48(r1) - stw r3, 12(r1) - stw r4, 16(r1) - stw r5, 20(r1) - stw r6, 24(r1) - stw r7, 28(r1) - stw r8, 32(r1) - stw r9, 36(r1) - stw r10,40(r1) - - addi r5, r1, 48 - mfctr r4 /* ftrace_caller has moved local addr here */ - stw r4, 44(r1) - mflr r3 /* ftrace_caller has restored LR from stack */ - subi r4, r4, MCOUNT_INSN_SIZE - - bl prepare_ftrace_return - nop - - /* - * prepare_ftrace_return gives us the address we divert to. - * Change the LR in the callers stack frame to this. - */ - stw r3,52(r1) - mtlr r3 - lwz r0,44(r1) - mtctr r0 - - lwz r3, 12(r1) - lwz r4, 16(r1) - lwz r5, 20(r1) - lwz r6, 24(r1) - lwz r7, 28(r1) - lwz r8, 32(r1) - lwz r9, 36(r1) - lwz r10,40(r1) - - addi r1, r1, 48 - - bctr - -_GLOBAL(return_to_handler) - /* need to save return values */ - stwu r1, -32(r1) - stw r3, 20(r1) - stw r4, 16(r1) - stw r31, 12(r1) - mr r31, r1 - - bl ftrace_return_to_handler - nop - - /* return value has real return address */ - mtlr r3 - - lwz r3, 20(r1) - lwz r4, 16(r1) - lwz r31,12(r1) - lwz r1, 0(r1) - - /* Jump back to real return address */ - blr -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_64.S b/arch/powerpc/kernel/trace/ftrace_low.S index 25e5b9e47c06..0bddf1fa6636 100644 --- a/arch/powerpc/kernel/trace/ftrace_64.S +++ b/arch/powerpc/kernel/trace/ftrace_low.S @@ -10,6 +10,7 @@ #include <asm/ppc-opcode.h> #include <asm/export.h> +#ifdef CONFIG_PPC64 .pushsection ".tramp.ftrace.text","aw",@progbits; .globl ftrace_tramp_text ftrace_tramp_text: @@ -21,6 +22,7 @@ ftrace_tramp_text: ftrace_tramp_init: .space 64 .popsection +#endif _GLOBAL(mcount) _GLOBAL(_mcount) @@ -33,6 +35,7 @@ EXPORT_SYMBOL(_mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER _GLOBAL(return_to_handler) /* need to save return values */ +#ifdef CONFIG_PPC64 std r4, -32(r1) std r3, -24(r1) /* save TOC */ @@ -46,6 +49,11 @@ _GLOBAL(return_to_handler) * Switch to our TOC to run inside the core kernel. */ ld r2, PACATOC(r13) +#else + stwu r1, -16(r1) + stw r3, 8(r1) + stw r4, 12(r1) +#endif bl ftrace_return_to_handler nop @@ -53,11 +61,17 @@ _GLOBAL(return_to_handler) /* return value has real return address */ mtlr r3 +#ifdef CONFIG_PPC64 ld r1, 0(r1) ld r4, -32(r1) ld r3, -24(r1) ld r2, -16(r1) ld r31, -8(r1) +#else + lwz r3, 8(r1) + lwz r4, 12(r1) + addi r1, r1, 16 +#endif /* Jump back to real return address */ blr diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index d636fc755f60..4fa23e260cab 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -32,52 +32,71 @@ * Our job is to save the register state into a struct pt_regs (on the stack) * and then arrange for the ftrace function to be called. */ -_GLOBAL(ftrace_regs_caller) - /* Save the original return address in A's stack frame */ - std r0,LRSAVE(r1) - +.macro ftrace_regs_entry allregs /* Create our stack frame + pt_regs */ - stdu r1,-SWITCH_FRAME_SIZE(r1) + PPC_STLU r1,-SWITCH_FRAME_SIZE(r1) /* Save all gprs to pt_regs */ SAVE_GPR(0, r1) - SAVE_GPRS(2, 11, r1) + SAVE_GPRS(3, 10, r1) +#ifdef CONFIG_PPC64 + /* Save the original return address in A's stack frame */ + std r0, LRSAVE+SWITCH_FRAME_SIZE(r1) /* Ok to continue? */ lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 beq ftrace_no_trace +#endif - SAVE_GPRS(12, 31, r1) + .if \allregs == 1 + SAVE_GPR(2, r1) + SAVE_GPRS(11, 31, r1) + .else +#ifdef CONFIG_LIVEPATCH_64 + SAVE_GPR(14, r1) +#endif + .endif /* Save previous stack pointer (r1) */ addi r8, r1, SWITCH_FRAME_SIZE - std r8, GPR1(r1) + PPC_STL r8, GPR1(r1) + .if \allregs == 1 /* Load special regs for save below */ mfmsr r8 mfctr r9 mfxer r10 mfcr r11 + .else + /* Clear MSR to flag as ftrace_caller versus frace_regs_caller */ + li r8, 0 + .endif /* Get the _mcount() call site out of LR */ mflr r7 /* Save it as pt_regs->nip */ - std r7, _NIP(r1) + PPC_STL r7, _NIP(r1) /* Save the read LR in pt_regs->link */ - std r0, _LINK(r1) + PPC_STL r0, _LINK(r1) +#ifdef CONFIG_PPC64 /* Save callee's TOC in the ABI compliant location */ - std r2, 24(r1) + std r2, STK_GOT(r1) ld r2,PACATOC(r13) /* get kernel TOC in r2 */ addis r3,r2,function_trace_op@toc@ha addi r3,r3,function_trace_op@toc@l ld r5,0(r3) +#else + lis r3,function_trace_op@ha + lwz r5,function_trace_op@l(r3) +#endif -#ifdef CONFIG_LIVEPATCH - mr r14,r7 /* remember old NIP */ +#ifdef CONFIG_LIVEPATCH_64 + mr r14, r7 /* remember old NIP */ #endif + /* Calculate ip from nip-4 into r3 for call below */ subi r3, r7, MCOUNT_INSN_SIZE @@ -85,128 +104,87 @@ _GLOBAL(ftrace_regs_caller) mr r4, r0 /* Save special regs */ - std r8, _MSR(r1) - std r9, _CTR(r1) - std r10, _XER(r1) - std r11, _CCR(r1) + PPC_STL r8, _MSR(r1) + .if \allregs == 1 + PPC_STL r9, _CTR(r1) + PPC_STL r10, _XER(r1) + PPC_STL r11, _CCR(r1) + .endif /* Load &pt_regs in r6 for call below */ - addi r6, r1 ,STACK_FRAME_OVERHEAD - - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_regs_call -ftrace_regs_call: - bl ftrace_stub - nop + addi r6, r1, STACK_FRAME_OVERHEAD +.endm +.macro ftrace_regs_exit allregs /* Load ctr with the possibly modified NIP */ - ld r3, _NIP(r1) + PPC_LL r3, _NIP(r1) mtctr r3 -#ifdef CONFIG_LIVEPATCH + +#ifdef CONFIG_LIVEPATCH_64 cmpd r14, r3 /* has NIP been altered? */ #endif /* Restore gprs */ - REST_GPR(0, r1) + .if \allregs == 1 REST_GPRS(2, 31, r1) + .else + REST_GPRS(3, 10, r1) +#ifdef CONFIG_LIVEPATCH_64 + REST_GPR(14, r1) +#endif + .endif /* Restore possibly modified LR */ - ld r0, _LINK(r1) + PPC_LL r0, _LINK(r1) mtlr r0 +#ifdef CONFIG_PPC64 /* Restore callee's TOC */ - ld r2, 24(r1) + ld r2, STK_GOT(r1) +#endif /* Pop our stack frame */ addi r1, r1, SWITCH_FRAME_SIZE -#ifdef CONFIG_LIVEPATCH +#ifdef CONFIG_LIVEPATCH_64 /* Based on the cmpd above, if the NIP was altered handle livepatch */ bne- livepatch_handler #endif - -ftrace_caller_common: -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: - b ftrace_graph_stub -_GLOBAL(ftrace_graph_stub) -#endif - bctr /* jump after _mcount site */ +.endm -_GLOBAL(ftrace_stub) - blr - -ftrace_no_trace: - mflr r3 - mtctr r3 - REST_GPR(3, r1) - addi r1, r1, SWITCH_FRAME_SIZE - mtlr r0 - bctr +_GLOBAL(ftrace_regs_caller) + ftrace_regs_entry 1 + /* ftrace_call(r3, r4, r5, r6) */ +.globl ftrace_regs_call +ftrace_regs_call: + bl ftrace_stub + nop + ftrace_regs_exit 1 _GLOBAL(ftrace_caller) - /* Save the original return address in A's stack frame */ - std r0, LRSAVE(r1) - - /* Create our stack frame + pt_regs */ - stdu r1, -SWITCH_FRAME_SIZE(r1) - - /* Save all gprs to pt_regs */ - SAVE_GPRS(3, 10, r1) - - lbz r3, PACA_FTRACE_ENABLED(r13) - cmpdi r3, 0 - beq ftrace_no_trace - - /* Get the _mcount() call site out of LR */ - mflr r7 - std r7, _NIP(r1) - - /* Save callee's TOC in the ABI compliant location */ - std r2, 24(r1) - ld r2, PACATOC(r13) /* get kernel TOC in r2 */ - - addis r3, r2, function_trace_op@toc@ha - addi r3, r3, function_trace_op@toc@l - ld r5, 0(r3) - - /* Calculate ip from nip-4 into r3 for call below */ - subi r3, r7, MCOUNT_INSN_SIZE - - /* Put the original return address in r4 as parent_ip */ - mr r4, r0 - - /* Set pt_regs to NULL */ - li r6, 0 - + ftrace_regs_entry 0 /* ftrace_call(r3, r4, r5, r6) */ .globl ftrace_call ftrace_call: bl ftrace_stub nop + ftrace_regs_exit 0 - ld r3, _NIP(r1) - mtctr r3 - - /* Restore gprs */ - REST_GPRS(3, 10, r1) - - /* Restore callee's TOC */ - ld r2, 24(r1) +_GLOBAL(ftrace_stub) + blr - /* Pop our stack frame */ +#ifdef CONFIG_PPC64 +ftrace_no_trace: + mflr r3 + mtctr r3 + REST_GPR(3, r1) addi r1, r1, SWITCH_FRAME_SIZE - - /* Reload original LR */ - ld r0, LRSAVE(r1) mtlr r0 + bctr +#endif - /* Handle function_graph or go back */ - b ftrace_caller_common - -#ifdef CONFIG_LIVEPATCH +#ifdef CONFIG_LIVEPATCH_64 /* * This function runs in the mcount context, between two functions. As * such it can only clobber registers which are volatile and used in @@ -273,55 +251,3 @@ livepatch_handler: /* Return to original caller of live patched function */ blr #endif /* CONFIG_LIVEPATCH */ - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -_GLOBAL(ftrace_graph_caller) - stdu r1, -112(r1) - /* with -mprofile-kernel, parameter regs are still alive at _mcount */ - std r10, 104(r1) - std r9, 96(r1) - std r8, 88(r1) - std r7, 80(r1) - std r6, 72(r1) - std r5, 64(r1) - std r4, 56(r1) - std r3, 48(r1) - - /* Save callee's TOC in the ABI compliant location */ - std r2, 24(r1) - ld r2, PACATOC(r13) /* get kernel TOC in r2 */ - - addi r5, r1, 112 - mfctr r4 /* ftrace_caller has moved local addr here */ - std r4, 40(r1) - mflr r3 /* ftrace_caller has restored LR from stack */ - subi r4, r4, MCOUNT_INSN_SIZE - - bl prepare_ftrace_return - nop - - /* - * prepare_ftrace_return gives us the address we divert to. - * Change the LR to this. - */ - mtlr r3 - - ld r0, 40(r1) - mtctr r0 - ld r10, 104(r1) - ld r9, 96(r1) - ld r8, 88(r1) - ld r7, 80(r1) - ld r6, 72(r1) - ld r5, 64(r1) - ld r4, 56(r1) - ld r3, 48(r1) - - /* Restore callee's TOC */ - ld r2, 24(r1) - - addi r1, r1, 112 - mflr r0 - std r0, LRSAVE(r1) - bctr -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/vdso64/.gitignore b/arch/powerpc/kernel/vdso/.gitignore index 84151a7ba31d..dd9bdd67758b 100644 --- a/arch/powerpc/kernel/vdso64/.gitignore +++ b/arch/powerpc/kernel/vdso/.gitignore @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +vdso32.lds +vdso32.so.dbg vdso64.lds vdso64.so.dbg diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile new file mode 100644 index 000000000000..954974287ee7 --- /dev/null +++ b/arch/powerpc/kernel/vdso/Makefile @@ -0,0 +1,103 @@ +# SPDX-License-Identifier: GPL-2.0 + +# List of files in the vdso, has to be asm only for now + +ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24 +include $(srctree)/lib/vdso/Makefile + +obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o note-32.o getcpu-32.o +obj-vdso64 = sigtramp64-64.o gettimeofday-64.o datapage-64.o cacheflush-64.o note-64.o getcpu-64.o + +ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday-32.o += -include $(c-gettimeofday-y) + CFLAGS_vgettimeofday-32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + CFLAGS_vgettimeofday-32.o += $(call cc-option, -fno-stack-protector) + CFLAGS_vgettimeofday-32.o += -DDISABLE_BRANCH_PROFILING + CFLAGS_vgettimeofday-32.o += -ffreestanding -fasynchronous-unwind-tables + CFLAGS_REMOVE_vgettimeofday-32.o = $(CC_FLAGS_FTRACE) + CFLAGS_REMOVE_vgettimeofday-32.o += -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc + CFLAGS_vgettimeofday-64.o += -include $(c-gettimeofday-y) + CFLAGS_vgettimeofday-64.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + CFLAGS_vgettimeofday-64.o += $(call cc-option, -fno-stack-protector) + CFLAGS_vgettimeofday-64.o += -DDISABLE_BRANCH_PROFILING + CFLAGS_vgettimeofday-64.o += -ffreestanding -fasynchronous-unwind-tables + CFLAGS_REMOVE_vgettimeofday-64.o = $(CC_FLAGS_FTRACE) +# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true +# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is +# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code +# generation is minimal, it will just use r29 instead. + CFLAGS_vgettimeofday-64.o += $(call cc-option, -ffixed-r30) +endif + +# Build rules + +ifdef CROSS32_COMPILE + VDSOCC := $(CROSS32_COMPILE)gcc +else + VDSOCC := $(CC) +endif + +targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o +obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) +targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o +obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) + +GCOV_PROFILE := n +KCOV_INSTRUMENT := n +UBSAN_SANITIZE := n +KASAN_SANITIZE := n + +ccflags-y := -shared -fno-common -fno-builtin -nostdlib -Wl,--hash-style=both + +CC32FLAGS := -Wl,-soname=linux-vdso32.so.1 -m32 +AS32FLAGS := -D__VDSO32__ -s + +CC64FLAGS := -Wl,-soname=linux-vdso64.so.1 +AS64FLAGS := -D__VDSO64__ -s + +targets += vdso32.lds +CPPFLAGS_vdso32.lds += -P -C -Upowerpc +targets += vdso64.lds +CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) + +# link rule for the .so file, .lds has to be first +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o FORCE + $(call if_changed,vdso32ld_and_check) +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o FORCE + $(call if_changed,vdso64ld_and_check) + +# assembly rules for the .S files +$(obj-vdso32): %-32.o: %.S FORCE + $(call if_changed_dep,vdso32as) +$(obj)/vgettimeofday-32.o: %-32.o: %.c FORCE + $(call if_changed_dep,vdso32cc) +$(obj-vdso64): %-64.o: %.S FORCE + $(call if_changed_dep,vdso64as) +$(obj)/vgettimeofday-64.o: %-64.o: %.c FORCE + $(call if_changed_dep,cc_o_c) + +# Generate VDSO offsets using helper script +gen-vdso32sym := $(srctree)/$(src)/gen_vdso32_offsets.sh +quiet_cmd_vdso32sym = VDSO32SYM $@ + cmd_vdso32sym = $(NM) $< | $(gen-vdso32sym) | LC_ALL=C sort > $@ +gen-vdso64sym := $(srctree)/$(src)/gen_vdso64_offsets.sh +quiet_cmd_vdso64sym = VDSO64SYM $@ + cmd_vdso64sym = $(NM) $< | $(gen-vdso64sym) | LC_ALL=C sort > $@ + +include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE + $(call if_changed,vdso32sym) +include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE + $(call if_changed,vdso64sym) + +# actual build commands +quiet_cmd_vdso32ld_and_check = VDSO32L $@ + cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check) +quiet_cmd_vdso32as = VDSO32A $@ + cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) $(AS32FLAGS) -c -o $@ $< +quiet_cmd_vdso32cc = VDSO32C $@ + cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $< + +quiet_cmd_vdso64ld_and_check = VDSO64L $@ + cmd_vdso64ld_and_check = $(VDSOCC) $(c_flags) $(CC64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check) +quiet_cmd_vdso64as = VDSO64A $@ + cmd_vdso64as = $(VDSOCC) $(a_flags) $(CC64FLAGS) $(AS64FLAGS) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso/cacheflush.S index f340e82d1981..d4e43ab2d5df 100644 --- a/arch/powerpc/kernel/vdso32/cacheflush.S +++ b/arch/powerpc/kernel/vdso/cacheflush.S @@ -46,7 +46,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) add r8,r8,r5 /* ensure we get enough */ #ifdef CONFIG_PPC64 lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + PPC_SRL. r8,r8,r9 /* compute line count */ #else srwi. r8, r8, L1_CACHE_SHIFT mr r7, r6 @@ -72,7 +72,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + PPC_SRL. r8,r8,r9 /* compute line count */ crclr cr0*4+so beqlr /* nothing to do? */ #endif diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso/datapage.S index 65244416ab94..db8e167f0166 100644 --- a/arch/powerpc/kernel/vdso32/datapage.S +++ b/arch/powerpc/kernel/vdso/datapage.S @@ -30,11 +30,15 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mr. r4,r3 get_datapage r3 mtlr r12 +#ifdef __powerpc64__ + addi r3,r3,CFG_SYSCALL_MAP64 +#else addi r3,r3,CFG_SYSCALL_MAP32 +#endif + crclr cr0*4+so beqlr li r0,NR_syscalls stw r0,0(r4) - crclr cr0*4+so blr .cfi_endproc V_FUNCTION_END(__kernel_get_syscall_map) @@ -49,8 +53,10 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) mflr r12 .cfi_register lr,r12 get_datapage r3 +#ifndef __powerpc64__ lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) - lwz r3,CFG_TB_TICKS_PER_SEC(r3) +#endif + PPC_LL r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 crclr cr0*4+so blr diff --git a/arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh b/arch/powerpc/kernel/vdso/gen_vdso32_offsets.sh index c7b54a5dcd3e..c7b54a5dcd3e 100755 --- a/arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh +++ b/arch/powerpc/kernel/vdso/gen_vdso32_offsets.sh diff --git a/arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh b/arch/powerpc/kernel/vdso/gen_vdso64_offsets.sh index 4bf15ffd5933..4bf15ffd5933 100755 --- a/arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh +++ b/arch/powerpc/kernel/vdso/gen_vdso64_offsets.sh diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso/getcpu.S index ff5e214fec41..8e08ccf19062 100644 --- a/arch/powerpc/kernel/vdso32/getcpu.S +++ b/arch/powerpc/kernel/vdso/getcpu.S @@ -19,8 +19,8 @@ V_FUNCTION_BEGIN(__kernel_getcpu) .cfi_startproc mfspr r5,SPRN_SPRG_VDSO_READ - cmpwi cr0,r3,0 - cmpwi cr1,r4,0 + PPC_LCMPI cr0,r3,0 + PPC_LCMPI cr1,r4,0 clrlwi r6,r5,16 rlwinm r7,r5,16,31-15,31-0 beq cr0,1f diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S index d21d08140a5e..eb9c81e1c218 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * Userland implementation of gettimeofday() for 32 bits processes in a - * ppc64 kernel for use in the vDSO + * Userland implementation of gettimeofday() for processes + * for use in the vDSO * * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org, * IBM Corp. @@ -12,7 +12,49 @@ #include <asm/vdso_datapage.h> #include <asm/asm-offsets.h> #include <asm/unistd.h> -#include <asm/vdso/gettimeofday.h> + +/* + * The macro sets two stack frames, one for the caller and one for the callee + * because there are no requirement for the caller to set a stack frame when + * calling VDSO so it may have omitted to set one, especially on PPC64 + */ + +.macro cvdso_call funct call_time=0 + .cfi_startproc + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + mflr r0 + .cfi_register lr, r0 + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif + get_datapage r5 + .ifeq \call_time + addi r5, r5, VDSO_DATA_OFFSET + .else + addi r4, r5, VDSO_DATA_OFFSET + .endif + bl DOTSYM(\funct) + PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif + .ifeq \call_time + cmpwi r3, 0 + .endif + mtlr r0 + .cfi_restore lr + addi r1, r1, 2 * PPC_MIN_STKFRM + crclr so + .ifeq \call_time + beqlr+ + crset so + neg r3, r3 + .endif + blr + .cfi_endproc +.endm .text /* @@ -41,9 +83,11 @@ V_FUNCTION_END(__kernel_clock_gettime) * int __kernel_clock_gettime64(clockid_t clock_id, struct __timespec64 *ts); * */ +#ifndef __powerpc64__ V_FUNCTION_BEGIN(__kernel_clock_gettime64) cvdso_call __c_kernel_clock_gettime64 V_FUNCTION_END(__kernel_clock_gettime64) +#endif /* * Exact prototype of clock_getres() @@ -63,12 +107,13 @@ V_FUNCTION_END(__kernel_clock_getres) * */ V_FUNCTION_BEGIN(__kernel_time) - cvdso_call_time __c_kernel_time + cvdso_call __c_kernel_time call_time=1 V_FUNCTION_END(__kernel_time) /* Routines for restoring integer registers, called by the compiler. */ /* Called with r11 pointing to the stack header word of the caller of the */ /* function, just beyond the end of the integer restore area. */ +#ifndef __powerpc64__ _GLOBAL(_restgpr_31_x) _GLOBAL(_rest32gpr_31_x) lwz r0,4(r11) @@ -76,3 +121,4 @@ _GLOBAL(_rest32gpr_31_x) mtlr r0 mr r1,r11 blr +#endif diff --git a/arch/powerpc/kernel/vdso32/note.S b/arch/powerpc/kernel/vdso/note.S index 227a7327399e..227a7327399e 100644 --- a/arch/powerpc/kernel/vdso32/note.S +++ b/arch/powerpc/kernel/vdso/note.S diff --git a/arch/powerpc/kernel/vdso32/sigtramp.S b/arch/powerpc/kernel/vdso/sigtramp32.S index 0bcc5e5fe789..0bcc5e5fe789 100644 --- a/arch/powerpc/kernel/vdso32/sigtramp.S +++ b/arch/powerpc/kernel/vdso/sigtramp32.S diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso/sigtramp64.S index 2d4067561293..2d4067561293 100644 --- a/arch/powerpc/kernel/vdso64/sigtramp.S +++ b/arch/powerpc/kernel/vdso/sigtramp64.S diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso/vdso32.lds.S index 58e0099f70f4..58e0099f70f4 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso/vdso32.lds.S diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S index 0288cad428b0..0288cad428b0 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso/vdso64.lds.S diff --git a/arch/powerpc/kernel/vdso32/vgettimeofday.c b/arch/powerpc/kernel/vdso/vgettimeofday.c index 65fb03fb1731..55a287c9a736 100644 --- a/arch/powerpc/kernel/vdso32/vgettimeofday.c +++ b/arch/powerpc/kernel/vdso/vgettimeofday.c @@ -2,8 +2,22 @@ /* * Powerpc userspace implementations of gettimeofday() and similar. */ +#include <linux/time.h> #include <linux/types.h> +#ifdef __powerpc64__ +int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, + const struct vdso_data *vd) +{ + return __cvdso_clock_gettime_data(vd, clock, ts); +} + +int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, + const struct vdso_data *vd) +{ + return __cvdso_clock_getres_data(vd, clock_id, res); +} +#else int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts, const struct vdso_data *vd) { @@ -16,16 +30,17 @@ int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts, return __cvdso_clock_gettime_data(vd, clock, ts); } -int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, +int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, const struct vdso_data *vd) { - return __cvdso_gettimeofday_data(vd, tv, tz); + return __cvdso_clock_getres_time32_data(vd, clock_id, res); } +#endif -int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, +int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, const struct vdso_data *vd) { - return __cvdso_clock_getres_time32_data(vd, clock_id, res); + return __cvdso_gettimeofday_data(vd, tv, tz); } __kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile deleted file mode 100644 index 7d9a6fee0e3d..000000000000 --- a/arch/powerpc/kernel/vdso32/Makefile +++ /dev/null @@ -1,73 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -# List of files in the vdso, has to be asm only for now - -ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24 -include $(srctree)/lib/vdso/Makefile - -obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o - -ifneq ($(c-gettimeofday-y),) - CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) - CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) - CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector) - CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING - CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables - CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -endif - -# Build rules - -ifdef CROSS32_COMPILE - VDSOCC := $(CROSS32_COMPILE)gcc -else - VDSOCC := $(CC) -endif - -CC32FLAGS := -ifdef CONFIG_PPC64 -CC32FLAGS += -m32 -KBUILD_CFLAGS := $(filter-out -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc,$(KBUILD_CFLAGS)) -endif - -targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday.o -obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) - -GCOV_PROFILE := n -KCOV_INSTRUMENT := n -UBSAN_SANITIZE := n -KASAN_SANITIZE := n - -ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ - -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both -asflags-y := -D__VDSO32__ -s - -obj-y += vdso32_wrapper.o -targets += vdso32.lds -CPPFLAGS_vdso32.lds += -P -C -Upowerpc - -# link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday.o FORCE - $(call if_changed,vdso32ld_and_check) - -# assembly rules for the .S files -$(obj-vdso32): %.o: %.S FORCE - $(call if_changed_dep,vdso32as) -$(obj)/vgettimeofday.o: %.o: %.c FORCE - $(call if_changed_dep,vdso32cc) - -# Generate VDSO offsets using helper script -gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh -quiet_cmd_vdsosym = VDSOSYM $@ - cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ - -include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE - $(call if_changed,vdsosym) - -# actual build commands -quiet_cmd_vdso32ld_and_check = VDSO32L $@ - cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check) -quiet_cmd_vdso32as = VDSO32A $@ - cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $< -quiet_cmd_vdso32cc = VDSO32C $@ - cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32_wrapper.S index 3f5ef035b0a9..10f92f265d51 100644 --- a/arch/powerpc/kernel/vdso32_wrapper.S +++ b/arch/powerpc/kernel/vdso32_wrapper.S @@ -7,7 +7,7 @@ .globl vdso32_start, vdso32_end .balign PAGE_SIZE vdso32_start: - .incbin "arch/powerpc/kernel/vdso32/vdso32.so.dbg" + .incbin "arch/powerpc/kernel/vdso/vdso32.so.dbg" .balign PAGE_SIZE vdso32_end: diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile deleted file mode 100644 index 3c5baaa6f1e7..000000000000 --- a/arch/powerpc/kernel/vdso64/Makefile +++ /dev/null @@ -1,56 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# List of files in the vdso, has to be asm only for now - -ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24 -include $(srctree)/lib/vdso/Makefile - -obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o - -ifneq ($(c-gettimeofday-y),) - CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) - CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) - CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector) - CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING - CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables - CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -endif - -# Build rules - -targets := $(obj-vdso64) vdso64.so.dbg vgettimeofday.o -obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) - -GCOV_PROFILE := n -KCOV_INSTRUMENT := n -UBSAN_SANITIZE := n -KASAN_SANITIZE := n - -ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ - -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both - -# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true -# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is -# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code -# generation is minimal, it will just use r29 instead. -ccflags-y += $(call cc-option, -ffixed-r30) - -asflags-y := -D__VDSO64__ -s - -targets += vdso64.lds -CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) - -# link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday.o FORCE - $(call if_changed,vdso64ld_and_check) - -# Generate VDSO offsets using helper script -gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh -quiet_cmd_vdsosym = VDSOSYM $@ - cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ - -include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE - $(call if_changed,vdsosym) - -# actual build commands -quiet_cmd_vdso64ld_and_check = VDSO64L $@ - cmd_vdso64ld_and_check = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S deleted file mode 100644 index 76c3c8cf8ece..000000000000 --- a/arch/powerpc/kernel/vdso64/cacheflush.S +++ /dev/null @@ -1,75 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * vDSO provided cache flush routines - * - * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), - * IBM Corp. - */ -#include <asm/processor.h> -#include <asm/ppc_asm.h> -#include <asm/vdso.h> -#include <asm/vdso_datapage.h> -#include <asm/asm-offsets.h> - - .text - -/* - * Default "generic" version of __kernel_sync_dicache. - * - * void __kernel_sync_dicache(unsigned long start, unsigned long end) - * - * Flushes the data cache & invalidate the instruction cache for the - * provided range [start, end[ - */ -V_FUNCTION_BEGIN(__kernel_sync_dicache) - .cfi_startproc -BEGIN_FTR_SECTION - b 3f -END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) - mflr r12 - .cfi_register lr,r12 - get_datapage r10 - mtlr r12 - .cfi_restore lr - - lwz r7,CFG_DCACHE_BLOCKSZ(r10) - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 /* ensure we get enough */ - lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) - srd. r8,r8,r9 /* compute line count */ - crclr cr0*4+so - beqlr /* nothing to do? */ - mtctr r8 -1: dcbst 0,r6 - add r6,r6,r7 - bdnz 1b - sync - -/* Now invalidate the instruction cache */ - - lwz r7,CFG_ICACHE_BLOCKSZ(r10) - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 - lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) - srd. r8,r8,r9 /* compute line count */ - crclr cr0*4+so - beqlr /* nothing to do? */ - mtctr r8 -2: icbi 0,r6 - add r6,r6,r7 - bdnz 2b - isync - li r3,0 - blr -3: - crclr cr0*4+so - sync - isync - li r3,0 - blr - .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache) diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S deleted file mode 100644 index 00760dc69d68..000000000000 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ /dev/null @@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Access to the shared data page by the vDSO & syscall map - * - * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. - */ - -#include <asm/processor.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/unistd.h> -#include <asm/vdso.h> -#include <asm/vdso_datapage.h> - - .text - -/* - * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; - * - * returns a pointer to the syscall map. the map is agnostic to the - * size of "long", unlike kernel bitops, it stores bits from top to - * bottom so that memory actually contains a linear bitmap - * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of - * 32 bits int at N >> 5. - */ -V_FUNCTION_BEGIN(__kernel_get_syscall_map) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - mr r4,r3 - get_datapage r3 - mtlr r12 - addi r3,r3,CFG_SYSCALL_MAP64 - cmpldi cr0,r4,0 - crclr cr0*4+so - beqlr - li r0,NR_syscalls - stw r0,0(r4) - blr - .cfi_endproc -V_FUNCTION_END(__kernel_get_syscall_map) - - -/* - * void unsigned long __kernel_get_tbfreq(void); - * - * returns the timebase frequency in HZ - */ -V_FUNCTION_BEGIN(__kernel_get_tbfreq) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - get_datapage r3 - ld r3,CFG_TB_TICKS_PER_SEC(r3) - mtlr r12 - crclr cr0*4+so - blr - .cfi_endproc -V_FUNCTION_END(__kernel_get_tbfreq) diff --git a/arch/powerpc/kernel/vdso64/getcpu.S b/arch/powerpc/kernel/vdso64/getcpu.S deleted file mode 100644 index 12bbf236cdc4..000000000000 --- a/arch/powerpc/kernel/vdso64/getcpu.S +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * - * Copyright (C) IBM Corporation, 2012 - * - * Author: Anton Blanchard <anton@au.ibm.com> - */ -#include <asm/ppc_asm.h> -#include <asm/vdso.h> - - .text -/* - * Exact prototype of getcpu - * - * int __kernel_getcpu(unsigned *cpu, unsigned *node); - * - */ -V_FUNCTION_BEGIN(__kernel_getcpu) - .cfi_startproc - mfspr r5,SPRN_SPRG_VDSO_READ - cmpdi cr0,r3,0 - cmpdi cr1,r4,0 - clrlwi r6,r5,16 - rlwinm r7,r5,16,31-15,31-0 - beq cr0,1f - stw r6,0(r3) -1: beq cr1,2f - stw r7,0(r4) -2: crclr cr0*4+so - li r3,0 /* always success */ - blr - .cfi_endproc -V_FUNCTION_END(__kernel_getcpu) diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S deleted file mode 100644 index d7a7bfb51081..000000000000 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Userland implementation of gettimeofday() for 64 bits processes in a - * ppc64 kernel for use in the vDSO - * - * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), - * IBM Corp. - */ -#include <asm/processor.h> -#include <asm/ppc_asm.h> -#include <asm/vdso.h> -#include <asm/vdso_datapage.h> -#include <asm/asm-offsets.h> -#include <asm/unistd.h> -#include <asm/vdso/gettimeofday.h> - - .text -/* - * Exact prototype of gettimeofday - * - * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); - * - */ -V_FUNCTION_BEGIN(__kernel_gettimeofday) - cvdso_call __c_kernel_gettimeofday -V_FUNCTION_END(__kernel_gettimeofday) - - -/* - * Exact prototype of clock_gettime() - * - * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); - * - */ -V_FUNCTION_BEGIN(__kernel_clock_gettime) - cvdso_call __c_kernel_clock_gettime -V_FUNCTION_END(__kernel_clock_gettime) - - -/* - * Exact prototype of clock_getres() - * - * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); - * - */ -V_FUNCTION_BEGIN(__kernel_clock_getres) - cvdso_call __c_kernel_clock_getres -V_FUNCTION_END(__kernel_clock_getres) - -/* - * Exact prototype of time() - * - * time_t time(time *t); - * - */ -V_FUNCTION_BEGIN(__kernel_time) - cvdso_call_time __c_kernel_time -V_FUNCTION_END(__kernel_time) diff --git a/arch/powerpc/kernel/vdso64/note.S b/arch/powerpc/kernel/vdso64/note.S deleted file mode 100644 index dc2a509f7e8a..000000000000 --- a/arch/powerpc/kernel/vdso64/note.S +++ /dev/null @@ -1 +0,0 @@ -#include "../vdso32/note.S" diff --git a/arch/powerpc/kernel/vdso64/vgettimeofday.c b/arch/powerpc/kernel/vdso64/vgettimeofday.c deleted file mode 100644 index 5b5500058344..000000000000 --- a/arch/powerpc/kernel/vdso64/vgettimeofday.c +++ /dev/null @@ -1,29 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Powerpc userspace implementations of gettimeofday() and similar. - */ -#include <linux/time.h> -#include <linux/types.h> - -int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, - const struct vdso_data *vd) -{ - return __cvdso_clock_gettime_data(vd, clock, ts); -} - -int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, - const struct vdso_data *vd) -{ - return __cvdso_gettimeofday_data(vd, tv, tz); -} - -int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, - const struct vdso_data *vd) -{ - return __cvdso_clock_getres_data(vd, clock_id, res); -} - -__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) -{ - return __cvdso_time_data(vd, time); -} diff --git a/arch/powerpc/kernel/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64_wrapper.S index 1d56d81fe3b3..839d1a61411d 100644 --- a/arch/powerpc/kernel/vdso64_wrapper.S +++ b/arch/powerpc/kernel/vdso64_wrapper.S @@ -7,7 +7,7 @@ .globl vdso64_start, vdso64_end .balign PAGE_SIZE vdso64_start: - .incbin "arch/powerpc/kernel/vdso64/vdso64.so.dbg" + .incbin "arch/powerpc/kernel/vdso/vdso64.so.dbg" .balign PAGE_SIZE vdso64_end: diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 2bcca818136a..fe22d940412f 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -281,9 +281,7 @@ SECTIONS . = ALIGN(8); .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) { -#ifdef CONFIG_PPC32 __dynamic_symtab = .; -#endif *(.dynsym) } .dynstr : AT(ADDR(.dynstr) - LOAD_OFFSET) { *(.dynstr) } diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 8b68d9f91a03..abf5897ae88c 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -134,11 +134,18 @@ void __init reserve_crashkernel(void) if (!crashk_res.start) { #ifdef CONFIG_PPC64 /* - * On 64bit we split the RMO in half but cap it at half of - * a small SLB (128MB) since the crash kernel needs to place - * itself and some stacks to be in the first segment. + * On the LPAR platform place the crash kernel to mid of + * RMA size (512MB or more) to ensure the crash kernel + * gets enough space to place itself and some stack to be + * in the first segment. At the same time normal kernel + * also get enough space to allocate memory for essential + * system resource in the first segment. Keep the crash + * kernel starts at 128MB offset on other platforms. */ - crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); + if (firmware_has_feature(FW_FEATURE_LPAR)) + crashk_res.start = ppc64_rma_size / 2; + else + crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); #else crashk_res.start = KDUMP_KERNELBASE; #endif diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index 635b5fc30b53..6cc7793b8420 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -28,7 +28,6 @@ #include <asm/prom.h> #include <asm/smp.h> #include <asm/hw_breakpoint.h> -#include <asm/asm-prototypes.h> #include <asm/svm.h> #include <asm/ultravisor.h> @@ -291,7 +290,7 @@ static union thread_union kexec_stack __init_task_data = * For similar reasons to the stack above, the kexecing CPU needs to be on a * static PACA; we switch to kexec_paca. */ -struct paca_struct kexec_paca; +static struct paca_struct kexec_paca; /* Our assembly helper, in misc_64.S */ extern void kexec_sequence(void *newstack, unsigned long start, diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index c3e31fef0be1..1ae09992c9ea 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -228,7 +228,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) struct kvmppc_sid_map *map; struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); u16 sid_map_mask; - static int backwards_map = 0; + static int backwards_map; if (kvmppc_get_msr(vcpu) & MSR_PR) gvsid |= VSID_PR; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 213232914367..0aeb51738ca9 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -2112,7 +2112,7 @@ static const struct file_operations debugfs_htab_fops = { void kvmppc_mmu_debugfs_init(struct kvm *kvm) { - debugfs_create_file("htab", 0400, kvm->arch.debugfs_dir, kvm, + debugfs_create_file("htab", 0400, kvm->debugfs_dentry, kvm, &debugfs_htab_fops); } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 8cebe5542256..e4ce2a35483f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -1454,7 +1454,7 @@ static const struct file_operations debugfs_radix_fops = { void kvmhv_radix_debugfs_init(struct kvm *kvm) { - debugfs_create_file("radix", 0400, kvm->arch.debugfs_dir, kvm, + debugfs_create_file("radix", 0400, kvm->debugfs_dentry, kvm, &debugfs_radix_fops); } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 84c89f08ae9a..c886557638a1 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2767,20 +2767,17 @@ static const struct file_operations debugfs_timings_ops = { }; /* Create a debugfs directory for the vcpu */ -static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id) +static int kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) { - char buf[16]; - struct kvm *kvm = vcpu->kvm; - - snprintf(buf, sizeof(buf), "vcpu%u", id); - vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir); - debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, vcpu, + debugfs_create_file("timings", 0444, debugfs_dentry, vcpu, &debugfs_timings_ops); + return 0; } #else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ -static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id) +static int kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) { + return 0; } #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ @@ -2903,8 +2900,6 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) vcpu->arch.cpu_type = KVM_CPU_3S_64; kvmppc_sanity_check(vcpu); - debugfs_vcpu_init(vcpu, id); - return 0; } @@ -5223,7 +5218,6 @@ void kvmppc_free_host_rm_ops(void) static int kvmppc_core_init_vm_hv(struct kvm *kvm) { unsigned long lpcr, lpid; - char buf[32]; int ret; mutex_init(&kvm->arch.uvmem_lock); @@ -5356,15 +5350,14 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) kvm->arch.smt_mode = 1; kvm->arch.emul_smt_mode = 1; - /* - * Create a debugfs directory for the VM - */ - snprintf(buf, sizeof(buf), "vm%d", current->pid); - kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir); + return 0; +} + +static int kvmppc_arch_create_vm_debugfs_hv(struct kvm *kvm) +{ kvmppc_mmu_debugfs_init(kvm); if (radix_enabled()) kvmhv_radix_debugfs_init(kvm); - return 0; } @@ -5379,8 +5372,6 @@ static void kvmppc_free_vcores(struct kvm *kvm) static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) { - debugfs_remove_recursive(kvm->arch.debugfs_dir); - if (!cpu_has_feature(CPU_FTR_ARCH_300)) kvm_hv_vm_deactivated(); @@ -6042,6 +6033,8 @@ static struct kvmppc_ops kvm_ops_hv = { .svm_off = kvmhv_svm_off, .enable_dawr1 = kvmhv_enable_dawr1, .hash_v3_possible = kvmppc_hash_v3_possible, + .create_vcpu_debugfs = kvmppc_arch_create_vcpu_debugfs_hv, + .create_vm_debugfs = kvmppc_arch_create_vm_debugfs_hv, }; static int kvm_init_subcore_bitmap(void) @@ -6103,7 +6096,7 @@ static int kvmppc_book3s_init_hv(void) if (!cpu_has_feature(CPU_FTR_ARCH_300)) { r = kvm_init_subcore_bitmap(); if (r) - return r; + goto err; } /* @@ -6119,30 +6112,42 @@ static int kvmppc_book3s_init_hv(void) np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc"); if (!np) { pr_err("KVM-HV: Cannot determine method for accessing XICS\n"); - return -ENODEV; + r = -ENODEV; + goto err; } /* presence of intc confirmed - node can be dropped again */ of_node_put(np); } #endif - kvm_ops_hv.owner = THIS_MODULE; - kvmppc_hv_ops = &kvm_ops_hv; - init_default_hcalls(); init_vcore_lists(); r = kvmppc_mmu_hv_init(); if (r) - return r; + goto err; - if (kvmppc_radix_possible()) + if (kvmppc_radix_possible()) { r = kvmppc_radix_init(); + if (r) + goto err; + } r = kvmppc_uvmem_init(); - if (r < 0) + if (r < 0) { pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r); + return r; + } + + kvm_ops_hv.owner = THIS_MODULE; + kvmppc_hv_ops = &kvm_ops_hv; + + return 0; + +err: + kvmhv_nested_exit(); + kvmppc_radix_exit(); return r; } diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7d6d91338c3f..7e52d0beee77 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -15,7 +15,6 @@ #include <linux/cma.h> #include <linux/bitops.h> -#include <asm/asm-prototypes.h> #include <asm/cputable.h> #include <asm/interrupt.h> #include <asm/kvm_ppc.h> diff --git a/arch/powerpc/kvm/book3s_hv_rm_xive.c b/arch/powerpc/kvm/book3s_hv_rm_xive.c index 6f18632e30e9..dd9880731bd6 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xive.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xive.c @@ -16,7 +16,6 @@ #include <asm/pnv-pci.h> #include <asm/opal.h> #include <asm/smp.h> -#include <asm/asm-prototypes.h> #include <asm/xive.h> #include <asm/xive-regs.h> diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 9cc466006e8b..ab6d37d78c62 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1016,19 +1016,10 @@ DEFINE_SHOW_ATTRIBUTE(xics_debug); static void xics_debugfs_init(struct kvmppc_xics *xics) { - char *name; - - name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics); - if (!name) { - pr_err("%s: no memory for name\n", __func__); - return; - } - - xics->dentry = debugfs_create_file(name, 0444, arch_debugfs_dir, + xics->dentry = debugfs_create_file("xics", 0444, xics->kvm->debugfs_dentry, xics, &xics_debug_fops); - pr_debug("%s: created %s\n", __func__, name); - kfree(name); + pr_debug("%s: created\n", __func__); } static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm, @@ -1440,7 +1431,7 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type) static void kvmppc_xics_init(struct kvm_device *dev) { - struct kvmppc_xics *xics = (struct kvmppc_xics *)dev->private; + struct kvmppc_xics *xics = dev->private; xics_debugfs_init(xics); } diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index e216c068075d..c0ce5531d9bc 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -2354,24 +2354,15 @@ DEFINE_SHOW_ATTRIBUTE(xive_debug); static void xive_debugfs_init(struct kvmppc_xive *xive) { - char *name; - - name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); - if (!name) { - pr_err("%s: no memory for name\n", __func__); - return; - } - - xive->dentry = debugfs_create_file(name, S_IRUGO, arch_debugfs_dir, + xive->dentry = debugfs_create_file("xive", S_IRUGO, xive->kvm->debugfs_dentry, xive, &xive_debug_fops); - pr_debug("%s: created %s\n", __func__, name); - kfree(name); + pr_debug("%s: created\n", __func__); } static void kvmppc_xive_init(struct kvm_device *dev) { - struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; + struct kvmppc_xive *xive = dev->private; /* Register some debug interfaces */ xive_debugfs_init(xive); diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 561a5bfe0468..f81ba6f84e72 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -1259,24 +1259,15 @@ DEFINE_SHOW_ATTRIBUTE(xive_native_debug); static void xive_native_debugfs_init(struct kvmppc_xive *xive) { - char *name; - - name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); - if (!name) { - pr_err("%s: no memory for name\n", __func__); - return; - } - - xive->dentry = debugfs_create_file(name, 0444, arch_debugfs_dir, + xive->dentry = debugfs_create_file("xive", 0444, xive->kvm->debugfs_dentry, xive, &xive_native_debug_fops); - pr_debug("%s: created %s\n", __func__, name); - kfree(name); + pr_debug("%s: created\n", __func__); } static void kvmppc_xive_native_init(struct kvm_device *dev) { - struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; + struct kvmppc_xive *xive = dev->private; /* Register some debug interfaces */ xive_native_debugfs_init(xive); diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 7e8b69015d20..c8b2b4478545 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -495,6 +495,7 @@ static struct kvmppc_ops kvm_ops_e500 = { .emulate_op = kvmppc_core_emulate_op_e500, .emulate_mtspr = kvmppc_core_emulate_mtspr_e500, .emulate_mfspr = kvmppc_core_emulate_mfspr_e500, + .create_vcpu_debugfs = kvmppc_create_vcpu_debugfs_e500, }; static int __init kvmppc_e500_init(void) diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 1c189b5aadcc..fa0d8dbbe484 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -381,6 +381,7 @@ static struct kvmppc_ops kvm_ops_e500mc = { .emulate_op = kvmppc_core_emulate_op_e500, .emulate_mtspr = kvmppc_core_emulate_mtspr_e500, .emulate_mfspr = kvmppc_core_emulate_mfspr_e500, + .create_vcpu_debugfs = kvmppc_create_vcpu_debugfs_e500, }; static int __init kvmppc_e500mc_init(void) diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 48272a9b9c30..cfc9114b87d0 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -73,7 +73,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) { u32 inst; enum emulation_result emulated = EMULATE_FAIL; - int advance = 1; struct instruction_op op; /* this default type might be overwritten by subcategories */ @@ -98,6 +97,8 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) int type = op.type & INSTR_TYPE_MASK; int size = GETSIZE(op.type); + vcpu->mmio_is_write = OP_IS_STORE(type); + switch (type) { case LOAD: { int instr_byte_swap = op.type & BYTEREV; @@ -355,15 +356,10 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) } } - if (emulated == EMULATE_FAIL) { - advance = 0; - kvmppc_core_queue_program(vcpu, 0); - } - trace_kvm_ppc_instr(inst, kvmppc_get_pc(vcpu), emulated); /* Advance past emulated instruction. */ - if (advance) + if (emulated != EMULATE_FAIL) kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); return emulated; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2ad0ccd202d5..9772b176e406 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -307,9 +307,31 @@ int kvmppc_emulate_mmio(struct kvm_vcpu *vcpu) u32 last_inst; kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); - /* XXX Deliver Program interrupt to guest. */ - pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst); - r = RESUME_HOST; + kvm_debug_ratelimited("Guest access to device memory using unsupported instruction (opcode: %#08x)\n", + last_inst); + + /* + * Injecting a Data Storage here is a bit more + * accurate since the instruction that caused the + * access could still be a valid one. + */ + if (!IS_ENABLED(CONFIG_BOOKE)) { + ulong dsisr = DSISR_BADACCESS; + + if (vcpu->mmio_is_write) + dsisr |= DSISR_ISSTORE; + + kvmppc_core_queue_data_storage(vcpu, vcpu->arch.vaddr_accessed, dsisr); + } else { + /* + * BookE does not send a SIGBUS on a bad + * fault, so use a Program interrupt instead + * to avoid a fault loop. + */ + kvmppc_core_queue_program(vcpu, 0); + } + + r = RESUME_GUEST; break; } default: @@ -431,6 +453,8 @@ int kvm_arch_check_processor_compat(void *opaque) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { struct kvmppc_ops *kvm_ops = NULL; + int r; + /* * if we have both HV and PR enabled, default is HV */ @@ -452,11 +476,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) } else goto err_out; - if (kvm_ops->owner && !try_module_get(kvm_ops->owner)) + if (!try_module_get(kvm_ops->owner)) return -ENOENT; kvm->arch.kvm_ops = kvm_ops; - return kvmppc_core_init_vm(kvm); + r = kvmppc_core_init_vm(kvm); + if (r) + module_put(kvm_ops->owner); + return r; err_out: return -EINVAL; } @@ -755,7 +782,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) rcuwait_init(&vcpu->arch.wait); vcpu->arch.waitp = &vcpu->arch.wait; - kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id); return 0; out_vcpu_uninit: @@ -772,8 +798,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) /* Make sure we're not using the vcpu anymore */ hrtimer_cancel(&vcpu->arch.dec_timer); - kvmppc_remove_vcpu_debugfs(vcpu); - switch (vcpu->arch.irq_type) { case KVMPPC_IRQ_MPIC: kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu); @@ -1114,10 +1138,8 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu) struct kvm_run *run = vcpu->run; u64 gpr; - if (run->mmio.len > sizeof(gpr)) { - printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); + if (run->mmio.len > sizeof(gpr)) return; - } if (!vcpu->arch.mmio_host_swabbed) { switch (run->mmio.len) { @@ -1236,10 +1258,8 @@ static int __kvmppc_handle_load(struct kvm_vcpu *vcpu, host_swabbed = !is_default_endian; } - if (bytes > sizeof(run->mmio.data)) { - printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, - run->mmio.len); - } + if (bytes > sizeof(run->mmio.data)) + return EMULATE_FAIL; run->mmio.phys_addr = vcpu->arch.paddr_accessed; run->mmio.len = bytes; @@ -1325,10 +1345,8 @@ int kvmppc_handle_store(struct kvm_vcpu *vcpu, host_swabbed = !is_default_endian; } - if (bytes > sizeof(run->mmio.data)) { - printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, - run->mmio.len); - } + if (bytes > sizeof(run->mmio.data)) + return EMULATE_FAIL; run->mmio.phys_addr = vcpu->arch.paddr_accessed; run->mmio.len = bytes; @@ -1499,7 +1517,7 @@ int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu, { enum emulation_result emulated = EMULATE_DONE; - if (vcpu->arch.mmio_vsx_copy_nums > 2) + if (vcpu->arch.mmio_vmx_copy_nums > 2) return EMULATE_FAIL; while (vcpu->arch.mmio_vmx_copy_nums) { @@ -1596,7 +1614,7 @@ int kvmppc_handle_vmx_store(struct kvm_vcpu *vcpu, unsigned int index = rs & KVM_MMIO_REG_MASK; enum emulation_result emulated = EMULATE_DONE; - if (vcpu->arch.mmio_vsx_copy_nums > 2) + if (vcpu->arch.mmio_vmx_copy_nums > 2) return EMULATE_FAIL; vcpu->arch.io_gpr = rs; @@ -1841,6 +1859,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) #ifdef CONFIG_ALTIVEC out: #endif + + /* + * We're already returning to userspace, don't pass the + * RESUME_HOST flags along. + */ + if (r > 0) + r = 0; + vcpu_put(vcpu); return r; } @@ -2497,3 +2523,16 @@ int kvm_arch_init(void *opaque) } EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr); + +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) +{ + if (vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs) + vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs(vcpu, debugfs_dentry); +} + +int kvm_arch_create_vm_debugfs(struct kvm *kvm) +{ + if (kvm->arch.kvm_ops->create_vm_debugfs) + kvm->arch.kvm_ops->create_vm_debugfs(kvm); + return 0; +} diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index ba56a5cbba97..25071331f8c1 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -204,21 +204,10 @@ static const struct file_operations kvmppc_exit_timing_fops = { .release = single_release, }; -void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id) +int kvmppc_create_vcpu_debugfs_e500(struct kvm_vcpu *vcpu, + struct dentry *debugfs_dentry) { - static char dbg_fname[50]; - struct dentry *debugfs_file; - - snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing", - current->pid, id); - debugfs_file = debugfs_create_file(dbg_fname, 0666, kvm_debugfs_dir, - vcpu, &kvmppc_exit_timing_fops); - - vcpu->arch.debugfs_exit_timing = debugfs_file; -} - -void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) -{ - debugfs_remove(vcpu->arch.debugfs_exit_timing); - vcpu->arch.debugfs_exit_timing = NULL; + debugfs_create_file("timing", 0666, debugfs_dentry, + vcpu, &kvmppc_exit_timing_fops); + return 0; } diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h index feef7885ba82..45817ab82bb4 100644 --- a/arch/powerpc/kvm/timing.h +++ b/arch/powerpc/kvm/timing.h @@ -14,8 +14,8 @@ #ifdef CONFIG_KVM_EXIT_TIMING void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu); void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu); -void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id); -void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu); +int kvmppc_create_vcpu_debugfs_e500(struct kvm_vcpu *vcpu, + struct dentry *debugfs_dentry); static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) { @@ -26,9 +26,11 @@ static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) /* if exit timing is not configured there is no need to build the c file */ static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {} static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {} -static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, - unsigned int id) {} -static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {} +static inline int kvmppc_create_vcpu_debugfs_e500(struct kvm_vcpu *vcpu, + struct dentry *debugfs_dentry) +{ + return 0; +} static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {} #endif /* CONFIG_KVM_EXIT_TIMING */ diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index 830a126e095d..38cd0ed0a617 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -115,6 +115,7 @@ {H_VASI_STATE, "H_VASI_STATE"}, \ {H_ENABLE_CRQ, "H_ENABLE_CRQ"}, \ {H_GET_EM_PARMS, "H_GET_EM_PARMS"}, \ + {H_GET_ENERGY_SCALE_INFO, "H_GET_ENERGY_SCALE_INFO"}, \ {H_SET_MPP, "H_SET_MPP"}, \ {H_GET_MPP, "H_GET_MPP"}, \ {H_HOME_NODE_ASSOCIATIVITY, "H_HOME_NODE_ASSOCIATIVITY"}, \ diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 27d9070617df..4541e8e29467 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -116,9 +116,6 @@ EXPORT_SYMBOL(__csum_partial) EX_TABLE(8 ## n ## 7b, fault); .text - .stabs "arch/powerpc/lib/",N_SO,0,0,0f - .stabs "checksum_32.S",N_SO,0,0,0f -0: CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 906d43463366..00c68e7fb11e 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -43,9 +43,14 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr) #ifdef CONFIG_STRICT_KERNEL_RWX static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); +static int map_patch_area(void *addr, unsigned long text_poke_addr); +static void unmap_patch_area(unsigned long addr); + static int text_area_cpu_up(unsigned int cpu) { struct vm_struct *area; + unsigned long addr; + int err; area = get_vm_area(PAGE_SIZE, VM_ALLOC); if (!area) { @@ -53,6 +58,15 @@ static int text_area_cpu_up(unsigned int cpu) cpu); return -1; } + + // Map/unmap the area to ensure all page tables are pre-allocated + addr = (unsigned long)area->addr; + err = map_patch_area(empty_zero_page, addr); + if (err) + return err; + + unmap_patch_area(addr); + this_cpu_write(text_poke_area, area); return 0; diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index a3bcf4786e4a..3e9c27c46331 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -57,9 +57,6 @@ EX_TABLE(8 ## n ## 7b,9 ## n ## 1b) .text - .stabs "arch/powerpc/lib/",N_SO,0,0,0f - .stabs "copy_32.S",N_SO,0,0,0f -0: CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 5ed88fbee0db..6f79bde6d6c2 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -75,10 +75,8 @@ extern int do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1, static nokprobe_inline unsigned long truncate_if_32bit(unsigned long msr, unsigned long val) { -#ifdef __powerpc64__ if ((msr & MSR_64BIT) == 0) val &= 0xffffffffUL; -#endif return val; } @@ -1065,15 +1063,9 @@ Efault: int emulate_dcbz(unsigned long ea, struct pt_regs *regs) { int err; - unsigned long size; + unsigned long size = l1_dcache_bytes(); -#ifdef __powerpc64__ - size = ppc64_caches.l1d.block_size; - if (!(regs->msr & MSR_64BIT)) - ea &= 0xffffffffUL; -#else - size = L1_CACHE_BYTES; -#endif + ea = truncate_if_32bit(regs->msr, ea); ea &= ~(size - 1); if (!address_ok(regs, ea, size)) return -EFAULT; @@ -1097,7 +1089,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __put_user_asmx(x, addr, err, op, cr) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: " op " %2,0,%3\n" \ + ".machine pop\n" \ " mfcr %1\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ @@ -1110,7 +1105,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __get_user_asmx(x, addr, err, op) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: "op" %1,0,%2\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ @@ -1139,10 +1137,8 @@ static nokprobe_inline void set_cr0(const struct pt_regs *regs, op->type |= SETCC; op->ccval = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000); -#ifdef __powerpc64__ if (!(regs->msr & MSR_64BIT)) val = (int) val; -#endif if (val < 0) op->ccval |= 0x80000000; else if (val > 0) @@ -1173,12 +1169,8 @@ static nokprobe_inline void add_with_carry(const struct pt_regs *regs, op->type = COMPUTE + SETREG + SETXER; op->reg = rd; op->val = val; -#ifdef __powerpc64__ - if (!(regs->msr & MSR_64BIT)) { - val = (unsigned int) val; - val1 = (unsigned int) val1; - } -#endif + val = truncate_if_32bit(regs->msr, val); + val1 = truncate_if_32bit(regs->msr, val1); op->xerval = regs->xer; if (val < val1 || (carry_in && val == val1)) op->xerval |= XER_CA; @@ -3389,7 +3381,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) __put_user_asmx(op->val, ea, err, "stbcx.", cr); break; case 2: - __put_user_asmx(op->val, ea, err, "stbcx.", cr); + __put_user_asmx(op->val, ea, err, "sthcx.", cr); break; #endif case 4: diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index 62e6c3045252..f76a50291fd7 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c @@ -9,7 +9,6 @@ #include <linux/uaccess.h> #include <linux/hardirq.h> #include <asm/switch_to.h> -#include <asm/asm-prototypes.h> int enter_vmx_usercopy(void) { diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 7abf82a698d3..985cabdd7f67 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1621,8 +1621,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); -DECLARE_INTERRUPT_HANDLER(__do_hash_fault); -DEFINE_INTERRUPT_HANDLER(__do_hash_fault) +DEFINE_INTERRUPT_HANDLER(do_hash_fault) { unsigned long ea = regs->dar; unsigned long dsisr = regs->dsisr; @@ -1681,35 +1680,6 @@ DEFINE_INTERRUPT_HANDLER(__do_hash_fault) } } -/* - * The _RAW interrupt entry checks for the in_nmi() case before - * running the full handler. - */ -DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) -{ - /* - * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then - * don't call hash_page, just fail the fault. This is required to - * prevent re-entrancy problems in the hash code, namely perf - * interrupts hitting while something holds H_PAGE_BUSY, and taking a - * hash fault. See the comment in hash_preload(). - * - * We come here as a result of a DSI at a point where we don't want - * to call hash_page, such as when we are accessing memory (possibly - * user memory) inside a PMU interrupt that occurred while interrupts - * were soft-disabled. We want to invoke the exception handler for - * the access, or panic if there isn't a handler. - */ - if (unlikely(in_nmi())) { - do_bad_page_fault_segv(regs); - return 0; - } - - __do_hash_fault(regs); - - return 0; -} - #ifdef CONFIG_PPC_MM_SLICES static bool should_hash_preload(struct mm_struct *mm, unsigned long ea) { @@ -1776,26 +1746,18 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea, #endif /* CONFIG_PPC_64K_PAGES */ /* - * __hash_page_* must run with interrupts off, as it sets the - * H_PAGE_BUSY bit. It's possible for perf interrupts to hit at any - * time and may take a hash fault reading the user stack, see - * read_user_stack_slow() in the powerpc/perf code. - * - * If that takes a hash fault on the same page as we lock here, it - * will bail out when seeing H_PAGE_BUSY set, and retry the access - * leading to an infinite loop. + * __hash_page_* must run with interrupts off, including PMI interrupts + * off, as it sets the H_PAGE_BUSY bit. * - * Disabling interrupts here does not prevent perf interrupts, but it - * will prevent them taking hash faults (see the NMI test in - * do_hash_page), then read_user_stack's copy_from_user_nofault will - * fail and perf will fall back to read_user_stack_slow(), which - * walks the Linux page tables. + * It's otherwise possible for perf interrupts to hit at any time and + * may take a hash fault reading the user stack, which could take a + * hash miss and deadlock on the same H_PAGE_BUSY bit. * * Interrupts must also be off for the duration of the * mm_is_thread_local test and update, to prevent preempt running the * mm on another CPU (XXX: this may be racy vs kthread_use_mm). */ - local_irq_save(flags); + powerpc_local_irq_pmu_save(flags); /* Is that local to this CPU ? */ if (mm_is_thread_local(mm)) @@ -1820,7 +1782,7 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea, mm_ctx_user_psize(&mm->context), pte_val(*ptep)); - local_irq_restore(flags); + powerpc_local_irq_pmu_restore(flags); } /* diff --git a/arch/powerpc/mm/book3s64/hugetlbpage.c b/arch/powerpc/mm/book3s64/hugetlbpage.c index ea8f83afb0ae..3bc0eb21b2a0 100644 --- a/arch/powerpc/mm/book3s64/hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/hugetlbpage.c @@ -150,7 +150,7 @@ void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } -void __init hugetlbpage_init_default(void) +void __init hugetlbpage_init_defaultsize(void) { /* Set default large page size. Currently, we pick 16M or 1M * depending on what is available diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 31f4cef3adac..81091b9587f6 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -9,7 +9,6 @@ * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM */ -#include <asm/asm-prototypes.h> #include <asm/interrupt.h> #include <asm/mmu.h> #include <asm/mmu_context.h> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index eb8ecd7343a9..d53fed4eccbd 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -35,7 +35,6 @@ #include <linux/kfence.h> #include <linux/pkeys.h> -#include <asm/asm-prototypes.h> #include <asm/firmware.h> #include <asm/interrupt.h> #include <asm/page.h> @@ -567,18 +566,24 @@ NOKPROBE_SYMBOL(hash__do_page_fault); static void __bad_page_fault(struct pt_regs *regs, int sig) { int is_write = page_fault_is_write(regs->dsisr); + const char *msg; /* kernel has accessed a bad area */ + if (regs->dar < PAGE_SIZE) + msg = "Kernel NULL pointer dereference"; + else + msg = "Unable to handle kernel data access"; + switch (TRAP(regs)) { case INTERRUPT_DATA_STORAGE: - case INTERRUPT_DATA_SEGMENT: case INTERRUPT_H_DATA_STORAGE: - pr_alert("BUG: %s on %s at 0x%08lx\n", - regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" : - "Unable to handle kernel data access", + pr_alert("BUG: %s on %s at 0x%08lx\n", msg, is_write ? "write" : "read", regs->dar); break; + case INTERRUPT_DATA_SEGMENT: + pr_alert("BUG: %s at 0x%08lx\n", msg, regs->dar); + break; case INTERRUPT_INST_STORAGE: case INTERRUPT_INST_SEGMENT: pr_alert("BUG: Unable to handle kernel instruction fetch%s", diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index ddead41e2194..b642a5a8668f 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -664,10 +664,7 @@ static int __init hugetlbpage_init(void) configured = true; } - if (configured) { - if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE)) - hugetlbpage_init_default(); - } else + if (!configured) pr_info("Failed to initialize. Disabling HugeTLB"); return 0; diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 35f46bf54281..83c0ee9fbf05 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -59,6 +59,7 @@ #include <asm/sections.h> #include <asm/iommu.h> #include <asm/vdso.h> +#include <asm/hugetlb.h> #include <mm/mmu_decl.h> @@ -513,6 +514,9 @@ void __init mmu_early_init_devtree(void) } else hash__early_init_devtree(); + if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE)) + hugetlbpage_init_defaultsize(); + if (!(cur_cpu_spec->mmu_features & MMU_FTR_HPTE_TABLE) && !(cur_cpu_spec->mmu_features & MMU_FTR_TYPE_RADIX)) panic("kernel does not support any MMU type offered by platform"); diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index cf8770b1a692..f3e4d069e0ba 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -83,13 +83,12 @@ void __init kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte) { unsigned long k_cur; - phys_addr_t pa = __pa(kasan_early_shadow_page); for (k_cur = k_start; k_cur != k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_off_k(k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); - if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) + if (pte_page(*ptep) != virt_to_page(lm_alias(kasan_early_shadow_page))) continue; __set_pte_at(&init_mm, k_cur, ptep, pte, 0); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 9d5f710d2c20..b9b7fefbb64b 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -956,7 +956,9 @@ static int __init parse_numa_properties(void) of_node_put(cpu); } - node_set_online(nid); + /* node_set_online() is an UB if 'nid' is negative */ + if (likely(nid >= 0)) + node_set_online(nid); } get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c index edea388e9d3f..85753e32a4de 100644 --- a/arch/powerpc/mm/pageattr.c +++ b/arch/powerpc/mm/pageattr.c @@ -15,12 +15,14 @@ #include <asm/pgtable.h> +static pte_basic_t pte_update_delta(pte_t *ptep, unsigned long addr, + unsigned long old, unsigned long new) +{ + return pte_update(&init_mm, addr, ptep, old & ~new, new & ~old, 0); +} + /* - * Updates the attributes of a page in three steps: - * - * 1. take the page_table_lock - * 2. install the new entry with the updated attributes - * 3. flush the TLB + * Updates the attributes of a page atomically. * * This sequence is safe against concurrent updates, and also allows updating the * attributes of a page currently being executed or accessed. @@ -28,41 +30,39 @@ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data) { long action = (long)data; - pte_t pte; - spin_lock(&init_mm.page_table_lock); - - pte = ptep_get(ptep); - - /* modify the PTE bits as desired, then apply */ + /* modify the PTE bits as desired */ switch (action) { case SET_MEMORY_RO: - pte = pte_wrprotect(pte); + /* Don't clear DIRTY bit */ + pte_update_delta(ptep, addr, _PAGE_KERNEL_RW & ~_PAGE_DIRTY, _PAGE_KERNEL_RO); break; case SET_MEMORY_RW: - pte = pte_mkwrite(pte_mkdirty(pte)); + pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_RW); break; case SET_MEMORY_NX: - pte = pte_exprotect(pte); + pte_update_delta(ptep, addr, _PAGE_KERNEL_ROX, _PAGE_KERNEL_RO); break; case SET_MEMORY_X: - pte = pte_mkexec(pte); + pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_ROX); + break; + case SET_MEMORY_NP: + pte_update(&init_mm, addr, ptep, _PAGE_PRESENT, 0, 0); + break; + case SET_MEMORY_P: + pte_update(&init_mm, addr, ptep, 0, _PAGE_PRESENT, 0); break; default: WARN_ON_ONCE(1); break; } - pte_update(&init_mm, addr, ptep, ~0UL, pte_val(pte), 0); - /* See ptesync comment in radix__set_pte_at() */ if (radix_enabled()) asm volatile("ptesync": : :"memory"); flush_tlb_kernel_range(addr, addr + PAGE_SIZE); - spin_unlock(&init_mm.page_table_lock); - return 0; } @@ -96,36 +96,3 @@ int change_memory_attr(unsigned long addr, int numpages, long action) return apply_to_existing_page_range(&init_mm, start, size, change_page_attr, (void *)action); } - -/* - * Set the attributes of a page: - * - * This function is used by PPC32 at the end of init to set final kernel memory - * protection. It includes changing the maping of the page it is executing from - * and data pages it is using. - */ -static int set_page_attr(pte_t *ptep, unsigned long addr, void *data) -{ - pgprot_t prot = __pgprot((unsigned long)data); - - spin_lock(&init_mm.page_table_lock); - - set_pte_at(&init_mm, addr, ptep, pte_modify(*ptep, prot)); - flush_tlb_kernel_range(addr, addr + PAGE_SIZE); - - spin_unlock(&init_mm.page_table_lock); - - return 0; -} - -int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot) -{ - unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE); - unsigned long sz = numpages * PAGE_SIZE; - - if (numpages <= 0) - return 0; - - return apply_to_existing_page_range(&init_mm, start, sz, set_page_attr, - (void *)pgprot_val(prot)); -} diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 906e4e4328b2..a56ade39dc68 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -135,10 +135,12 @@ void mark_initmem_nx(void) unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); - if (v_block_mapped((unsigned long)_sinittext)) + if (v_block_mapped((unsigned long)_sinittext)) { mmu_mark_initmem_nx(); - else - set_memory_attr((unsigned long)_sinittext, numpages, PAGE_KERNEL); + } else { + set_memory_nx((unsigned long)_sinittext, numpages); + set_memory_rw((unsigned long)_sinittext, numpages); + } } #ifdef CONFIG_STRICT_KERNEL_RWX @@ -146,24 +148,23 @@ void mark_rodata_ro(void) { unsigned long numpages; + if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX) && mmu_has_feature(MMU_FTR_HPTE_TABLE)) + pr_warn("This platform has HASH MMU, STRICT_MODULE_RWX won't work\n"); + if (v_block_mapped((unsigned long)_stext + 1)) { mmu_mark_rodata_ro(); ptdump_check_wx(); return; } - numpages = PFN_UP((unsigned long)_etext) - - PFN_DOWN((unsigned long)_stext); - - set_memory_attr((unsigned long)_stext, numpages, PAGE_KERNEL_ROX); /* - * mark .rodata as read only. Use __init_begin rather than __end_rodata - * to cover NOTES and EXCEPTION_TABLE. + * mark .text and .rodata as read only. Use __init_begin rather than + * __end_rodata to cover NOTES and EXCEPTION_TABLE. */ numpages = PFN_UP((unsigned long)__init_begin) - - PFN_DOWN((unsigned long)__start_rodata); + PFN_DOWN((unsigned long)_stext); - set_memory_attr((unsigned long)__start_rodata, numpages, PAGE_KERNEL_RO); + set_memory_ro((unsigned long)_stext, numpages); // mark_initmem_nx() should have already run by now ptdump_check_wx(); @@ -179,8 +180,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) return; if (enable) - set_memory_attr(addr, numpages, PAGE_KERNEL); + set_memory_p(addr, numpages); else - set_memory_attr(addr, numpages, __pgprot(0)); + set_memory_np(addr, numpages); } #endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index c7f824d294b2..9a601587836b 100644 --- a/arch/powerpc/mm/ptdump/hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -238,7 +238,10 @@ static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64 static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r) { - struct hash_pte ptes[4]; + struct { + unsigned long v; + unsigned long r; + } ptes[4]; unsigned long vsid, vpn, hash, hpte_group, want_v; int i, j, ssize = mmu_kernel_ssize; long lpar_rc = 0; diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index b20a2a83a6e7..979701d360da 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -27,21 +27,21 @@ #define PPC_JMP(dest) \ do { \ long offset = (long)(dest) - (ctx->idx * 4); \ - if (!is_offset_in_branch_range(offset)) { \ + if ((dest) != 0 && !is_offset_in_branch_range(offset)) { \ pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ return -ERANGE; \ } \ EMIT(PPC_RAW_BRANCH(offset)); \ } while (0) -/* blr; (unconditional 'branch' with link) to absolute address */ -#define PPC_BL_ABS(dest) EMIT(PPC_INST_BL | \ - (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc)) +/* bl (unconditional 'branch' with link) */ +#define PPC_BL(dest) EMIT(PPC_INST_BL | (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc)) + /* "cond" here covers BO:BI fields. */ #define PPC_BCC_SHORT(cond, dest) \ do { \ long offset = (long)(dest) - (ctx->idx * 4); \ - if (!is_offset_in_cond_branch_range(offset)) { \ + if ((dest) != 0 && !is_offset_in_cond_branch_range(offset)) { \ pr_err_ratelimited("Conditional branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ return -ERANGE; \ } \ @@ -59,10 +59,7 @@ EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \ } } while(0) -#ifdef CONFIG_PPC32 -#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0)) -#endif - +#ifdef CONFIG_PPC64 #define PPC_LI64(d, i) do { \ if ((long)(i) >= -2147483648 && \ (long)(i) < 2147483648) \ @@ -85,11 +82,6 @@ EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \ 0xffff)); \ } } while (0) - -#ifdef CONFIG_PPC64 -#define PPC_FUNC_ADDR(d,i) do { PPC_LI64(d, i); } while(0) -#else -#define PPC_FUNC_ADDR(d,i) do { PPC_LI32(d, i); } while(0) #endif /* @@ -127,15 +119,6 @@ #define SEEN_FUNC 0x20000000 /* might call external helpers */ #define SEEN_TAILCALL 0x40000000 /* uses tail calls */ -#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */ -#define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */ - -#ifdef CONFIG_PPC64 -extern const int b2p[MAX_BPF_JIT_REG + 2]; -#else -extern const int b2p[MAX_BPF_JIT_REG + 1]; -#endif - struct codegen_context { /* * This is used to track register usage as well @@ -149,10 +132,13 @@ struct codegen_context { unsigned int seen; unsigned int idx; unsigned int stack_size; - int b2p[ARRAY_SIZE(b2p)]; + int b2p[MAX_BPF_JIT_REG + 2]; unsigned int exentry_idx; + unsigned int alt_exit_addr; }; +#define bpf_to_ppc(r) (ctx->b2p[r]) + #ifdef CONFIG_PPC32 #define BPF_FIXUP_LEN 3 /* Three instructions => 12 bytes */ #else @@ -180,12 +166,14 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i) ctx->seen &= ~(1 << (31 - i)); } -void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); +void bpf_jit_init_reg_mapping(struct codegen_context *ctx); +int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, u32 *addrs, int pass); void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); void bpf_jit_realloc_regs(struct codegen_context *ctx); +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr); int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx, int insn_idx, int jmp_off, int dst_reg); diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h deleted file mode 100644 index b63b35e45e55..000000000000 --- a/arch/powerpc/net/bpf_jit64.h +++ /dev/null @@ -1,91 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * bpf_jit64.h: BPF JIT compiler for PPC64 - * - * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> - * IBM Corporation - */ -#ifndef _BPF_JIT64_H -#define _BPF_JIT64_H - -#include "bpf_jit.h" - -/* - * Stack layout: - * Ensure the top half (upto local_tmp_var) stays consistent - * with our redzone usage. - * - * [ prev sp ] <------------- - * [ nv gpr save area ] 5*8 | - * [ tail_call_cnt ] 8 | - * [ local_tmp_var ] 16 | - * fp (r31) --> [ ebpf stack space ] upto 512 | - * [ frame header ] 32/112 | - * sp (r1) ---> [ stack pointer ] -------------- - */ - -/* for gpr non volatile registers BPG_REG_6 to 10 */ -#define BPF_PPC_STACK_SAVE (5*8) -/* for bpf JIT code internal usage */ -#define BPF_PPC_STACK_LOCALS 24 -/* stack frame excluding BPF stack, ensure this is quadword aligned */ -#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ - BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) - -#ifndef __ASSEMBLY__ - -/* BPF register usage */ -#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) -#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) - -/* BPF to ppc register mappings */ -const int b2p[MAX_BPF_JIT_REG + 2] = { - /* function return value */ - [BPF_REG_0] = 8, - /* function arguments */ - [BPF_REG_1] = 3, - [BPF_REG_2] = 4, - [BPF_REG_3] = 5, - [BPF_REG_4] = 6, - [BPF_REG_5] = 7, - /* non volatile registers */ - [BPF_REG_6] = 27, - [BPF_REG_7] = 28, - [BPF_REG_8] = 29, - [BPF_REG_9] = 30, - /* frame pointer aka BPF_REG_10 */ - [BPF_REG_FP] = 31, - /* eBPF jit internal registers */ - [BPF_REG_AX] = 2, - [TMP_REG_1] = 9, - [TMP_REG_2] = 10 -}; - -/* PPC NVR range -- update this if we ever use NVRs below r27 */ -#define BPF_PPC_NVR_MIN 27 - -/* - * WARNING: These can use TMP_REG_2 if the offset is not at word boundary, - * so ensure that it isn't in use already. - */ -#define PPC_BPF_LL(r, base, i) do { \ - if ((i) % 4) { \ - EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\ - EMIT(PPC_RAW_LDX(r, base, \ - b2p[TMP_REG_2])); \ - } else \ - EMIT(PPC_RAW_LD(r, base, i)); \ - } while(0) -#define PPC_BPF_STL(r, base, i) do { \ - if ((i) % 4) { \ - EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\ - EMIT(PPC_RAW_STDX(r, base, \ - b2p[TMP_REG_2])); \ - } else \ - EMIT(PPC_RAW_STD(r, base, i)); \ - } while(0) -#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STDU(r, base, i)); } while(0) - -#endif /* !__ASSEMBLY__ */ - -#endif diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index a4f4d347e6bd..427185256216 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -59,7 +59,9 @@ static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, */ tmp_idx = ctx->idx; ctx->idx = addrs[i] / 4; - bpf_jit_emit_func_call_rel(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); + if (ret) + return ret; /* * Restore ctx->idx here. This is safe as the length @@ -70,13 +72,13 @@ static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, tmp_idx = ctx->idx; ctx->idx = addrs[i] / 4; #ifdef CONFIG_PPC32 - PPC_LI32(ctx->b2p[insn[i].dst_reg] - 1, (u32)insn[i + 1].imm); - PPC_LI32(ctx->b2p[insn[i].dst_reg], (u32)insn[i].imm); + PPC_LI32(bpf_to_ppc(insn[i].dst_reg) - 1, (u32)insn[i + 1].imm); + PPC_LI32(bpf_to_ppc(insn[i].dst_reg), (u32)insn[i].imm); for (j = ctx->idx - addrs[i] / 4; j < 4; j++) EMIT(PPC_RAW_NOP()); #else func_addr = ((u64)(u32)insn[i].imm) | (((u64)(u32)insn[i + 1].imm) << 32); - PPC_LI64(b2p[insn[i].dst_reg], func_addr); + PPC_LI64(bpf_to_ppc(insn[i].dst_reg), func_addr); /* overwrite rest with nops */ for (j = ctx->idx - addrs[i] / 4; j < 5; j++) EMIT(PPC_RAW_NOP()); @@ -89,6 +91,22 @@ static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, return 0; } +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr) +{ + if (!exit_addr || is_offset_in_branch_range(exit_addr - (ctx->idx * 4))) { + PPC_JMP(exit_addr); + } else if (ctx->alt_exit_addr) { + if (WARN_ON(!is_offset_in_branch_range((long)ctx->alt_exit_addr - (ctx->idx * 4)))) + return -1; + PPC_JMP(ctx->alt_exit_addr); + } else { + ctx->alt_exit_addr = ctx->idx * 4; + bpf_jit_build_epilogue(image, ctx); + } + + return 0; +} + struct powerpc64_jit_data { struct bpf_binary_header *header; u32 *addrs; @@ -161,7 +179,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) } memset(&cgctx, 0, sizeof(struct codegen_context)); - memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p)); + bpf_jit_init_reg_mapping(&cgctx); /* Make sure that the stack is quadword aligned. */ cgctx.stack_size = round_up(fp->aux->stack_depth, 16); @@ -177,8 +195,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) * If we have seen a tail call, we need a second pass. * This is because bpf_jit_emit_common_epilogue() is called * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. + * We also need a second pass if we ended up with too large + * a program so as to ensure BPF_EXIT branches are in range. */ - if (cgctx.seen & SEEN_TAILCALL) { + if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) { cgctx.idx = 0; if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) { fp = org_fp; @@ -193,6 +213,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) * calculate total size from idx. */ bpf_jit_build_prologue(0, &cgctx); + addrs[fp->len] = cgctx.idx * 4; bpf_jit_build_epilogue(0, &cgctx); fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4; @@ -233,6 +254,7 @@ skip_init_ctx: for (pass = 1; pass < 3; pass++) { /* Now build the prologue, body code & epilogue for real. */ cgctx.idx = 0; + cgctx.alt_exit_addr = 0; bpf_jit_build_prologue(code_base, &cgctx); if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass)) { bpf_jit_binary_free(bpf_hdr); diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index cf8dd8aea386..e46ed1e8c6ca 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -33,40 +33,38 @@ /* stack frame, ensure this is quadword aligned */ #define BPF_PPC_STACKFRAME(ctx) (STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_SAVE + (ctx)->stack_size) +#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0)) + +/* PPC NVR range -- update this if we ever use NVRs below r17 */ +#define BPF_PPC_NVR_MIN _R17 +#define BPF_PPC_TC _R16 + /* BPF register usage */ -#define TMP_REG (MAX_BPF_JIT_REG + 0) +#define TMP_REG (MAX_BPF_JIT_REG + 0) /* BPF to ppc register mappings */ -const int b2p[MAX_BPF_JIT_REG + 1] = { +void bpf_jit_init_reg_mapping(struct codegen_context *ctx) +{ /* function return value */ - [BPF_REG_0] = 12, + ctx->b2p[BPF_REG_0] = _R12; /* function arguments */ - [BPF_REG_1] = 4, - [BPF_REG_2] = 6, - [BPF_REG_3] = 8, - [BPF_REG_4] = 10, - [BPF_REG_5] = 22, + ctx->b2p[BPF_REG_1] = _R4; + ctx->b2p[BPF_REG_2] = _R6; + ctx->b2p[BPF_REG_3] = _R8; + ctx->b2p[BPF_REG_4] = _R10; + ctx->b2p[BPF_REG_5] = _R22; /* non volatile registers */ - [BPF_REG_6] = 24, - [BPF_REG_7] = 26, - [BPF_REG_8] = 28, - [BPF_REG_9] = 30, + ctx->b2p[BPF_REG_6] = _R24; + ctx->b2p[BPF_REG_7] = _R26; + ctx->b2p[BPF_REG_8] = _R28; + ctx->b2p[BPF_REG_9] = _R30; /* frame pointer aka BPF_REG_10 */ - [BPF_REG_FP] = 18, + ctx->b2p[BPF_REG_FP] = _R18; /* eBPF jit internal registers */ - [BPF_REG_AX] = 20, - [TMP_REG] = 31, /* 32 bits */ -}; - -static int bpf_to_ppc(struct codegen_context *ctx, int reg) -{ - return ctx->b2p[reg]; + ctx->b2p[BPF_REG_AX] = _R20; + ctx->b2p[TMP_REG] = _R31; /* 32 bits */ } -/* PPC NVR range -- update this if we ever use NVRs below r17 */ -#define BPF_PPC_NVR_MIN 17 -#define BPF_PPC_TC 16 - static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) { if ((reg >= BPF_PPC_NVR_MIN && reg < 32) || reg == BPF_PPC_TC) @@ -77,14 +75,22 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) return BPF_PPC_STACKFRAME(ctx) - 4; } +#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */ +#define SEEN_NVREG_FULL_MASK 0x0003ffff /* Non volatile registers r14-r31 */ +#define SEEN_NVREG_TEMP_MASK 0x00001e01 /* BPF_REG_5, BPF_REG_AX, TMP_REG */ + void bpf_jit_realloc_regs(struct codegen_context *ctx) { + unsigned int nvreg_mask; + if (ctx->seen & SEEN_FUNC) - return; + nvreg_mask = SEEN_NVREG_TEMP_MASK; + else + nvreg_mask = SEEN_NVREG_FULL_MASK; - while (ctx->seen & SEEN_NVREG_MASK && + while (ctx->seen & nvreg_mask && (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) { - int old = 32 - fls(ctx->seen & (SEEN_NVREG_MASK & 0xaaaaaaab)); + int old = 32 - fls(ctx->seen & (nvreg_mask & 0xaaaaaaab)); int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa)); int i; @@ -108,8 +114,8 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) int i; /* First arg comes in as a 32 bits pointer. */ - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), _R3)); - EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_1) - 1, 0)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3)); + EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0)); EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); /* @@ -118,7 +124,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * invoked through a tail call. */ if (ctx->seen & SEEN_TAILCALL) - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, _R1, + EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_1) - 1, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); else EMIT(PPC_RAW_NOP()); @@ -140,15 +146,15 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_STW(i, _R1, bpf_jit_stack_offsetof(ctx, i))); /* If needed retrieve arguments 9 and 10, ie 5th 64 bits arg.*/ - if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { - EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, BPF_PPC_STACKFRAME(ctx)) + 8); - EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), _R1, BPF_PPC_STACKFRAME(ctx)) + 12); + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_5))) { + EMIT(PPC_RAW_LWZ(bpf_to_ppc(BPF_REG_5) - 1, _R1, BPF_PPC_STACKFRAME(ctx)) + 8); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(BPF_REG_5), _R1, BPF_PPC_STACKFRAME(ctx)) + 12); } /* Setup frame pointer to point to the bpf stack area */ - if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP))) { - EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_FP) - 1, 0)); - EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), _R1, + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) { + EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_FP) - 1, 0)); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1, STACK_FRAME_MIN_SIZE + ctx->stack_size)); } @@ -168,7 +174,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) { - EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_0))); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); bpf_jit_emit_common_epilogue(image, ctx); @@ -185,12 +191,12 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_BLR()); } -void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) +int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) { s32 rel = (s32)func - (s32)(image + ctx->idx); if (image && rel < 0x2000000 && rel >= -0x2000000) { - PPC_BL_ABS(func); + PPC_BL(func); EMIT(PPC_RAW_NOP()); EMIT(PPC_RAW_NOP()); EMIT(PPC_RAW_NOP()); @@ -201,6 +207,8 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun EMIT(PPC_RAW_MTCTR(_R0)); EMIT(PPC_RAW_BCTRL()); } + + return 0; } static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) @@ -211,8 +219,8 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * r5-r6/BPF_REG_2 - pointer to bpf_array * r7-r8/BPF_REG_3 - index in bpf_array */ - int b2p_bpf_array = bpf_to_ppc(ctx, BPF_REG_2); - int b2p_index = bpf_to_ppc(ctx, BPF_REG_3); + int b2p_bpf_array = bpf_to_ppc(BPF_REG_2); + int b2p_index = bpf_to_ppc(BPF_REG_3); /* * if (index >= array->map.max_entries) @@ -221,7 +229,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_LWZ(_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); EMIT(PPC_RAW_CMPLW(b2p_index, _R0)); EMIT(PPC_RAW_LWZ(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); - PPC_BCC(COND_GE, out); + PPC_BCC_SHORT(COND_GE, out); /* * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) @@ -230,7 +238,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_CMPLWI(_R0, MAX_TAIL_CALL_CNT)); /* tail_call_cnt++; */ EMIT(PPC_RAW_ADDIC(_R0, _R0, 1)); - PPC_BCC(COND_GE, out); + PPC_BCC_SHORT(COND_GE, out); /* prog = array->ptrs[index]; */ EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29)); @@ -243,7 +251,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * goto out; */ EMIT(PPC_RAW_CMPLWI(_R3, 0)); - PPC_BCC(COND_EQ, out); + PPC_BCC_SHORT(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func))); @@ -258,7 +266,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_MTCTR(_R3)); - EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_1))); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_1))); /* tear restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); @@ -282,11 +290,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * for (i = 0; i < flen; i++) { u32 code = insn[i].code; - u32 dst_reg = bpf_to_ppc(ctx, insn[i].dst_reg); + u32 dst_reg = bpf_to_ppc(insn[i].dst_reg); u32 dst_reg_h = dst_reg - 1; - u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg); + u32 src_reg = bpf_to_ppc(insn[i].src_reg); u32 src_reg_h = src_reg - 1; - u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG); + u32 tmp_reg = bpf_to_ppc(TMP_REG); u32 size = BPF_SIZE(code); s16 off = insn[i].off; s32 imm = insn[i].imm; @@ -834,7 +842,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (BPF_MODE(code) == BPF_PROBE_MEM) { PPC_LI32(_R0, TASK_SIZE - off); EMIT(PPC_RAW_CMPLW(src_reg, _R0)); - PPC_BCC(COND_GT, (ctx->idx + 5) * 4); + PPC_BCC_SHORT(COND_GT, (ctx->idx + 4) * 4); EMIT(PPC_RAW_LI(dst_reg, 0)); /* * For BPF_DW case, "li reg_h,0" would be needed when @@ -929,8 +937,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * the epilogue. If we _are_ the last instruction, * we'll just fall through to the epilogue. */ - if (i != flen - 1) - PPC_JMP(exit_addr); + if (i != flen - 1) { + ret = bpf_jit_emit_exit_insn(image, ctx, _R0, exit_addr); + if (ret) + return ret; + } /* else fall through to the epilogue */ break; @@ -945,15 +956,17 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (ret < 0) return ret; - if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, 8)); - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), _R1, 12)); + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_5))) { + EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_5) - 1, _R1, 8)); + EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_5), _R1, 12)); } - bpf_jit_emit_func_call_rel(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); + if (ret) + return ret; - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, _R3)); - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), _R4)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0) - 1, _R3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0), _R4)); break; /* diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index e1e8c934308a..585f257da045 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -17,7 +17,60 @@ #include <linux/bpf.h> #include <asm/security_features.h> -#include "bpf_jit64.h" +#include "bpf_jit.h" + +/* + * Stack layout: + * Ensure the top half (upto local_tmp_var) stays consistent + * with our redzone usage. + * + * [ prev sp ] <------------- + * [ nv gpr save area ] 5*8 | + * [ tail_call_cnt ] 8 | + * [ local_tmp_var ] 16 | + * fp (r31) --> [ ebpf stack space ] upto 512 | + * [ frame header ] 32/112 | + * sp (r1) ---> [ stack pointer ] -------------- + */ + +/* for gpr non volatile registers BPG_REG_6 to 10 */ +#define BPF_PPC_STACK_SAVE (5*8) +/* for bpf JIT code internal usage */ +#define BPF_PPC_STACK_LOCALS 24 +/* stack frame excluding BPF stack, ensure this is quadword aligned */ +#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ + BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) + +/* BPF register usage */ +#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) + +/* BPF to ppc register mappings */ +void bpf_jit_init_reg_mapping(struct codegen_context *ctx) +{ + /* function return value */ + ctx->b2p[BPF_REG_0] = _R8; + /* function arguments */ + ctx->b2p[BPF_REG_1] = _R3; + ctx->b2p[BPF_REG_2] = _R4; + ctx->b2p[BPF_REG_3] = _R5; + ctx->b2p[BPF_REG_4] = _R6; + ctx->b2p[BPF_REG_5] = _R7; + /* non volatile registers */ + ctx->b2p[BPF_REG_6] = _R27; + ctx->b2p[BPF_REG_7] = _R28; + ctx->b2p[BPF_REG_8] = _R29; + ctx->b2p[BPF_REG_9] = _R30; + /* frame pointer aka BPF_REG_10 */ + ctx->b2p[BPF_REG_FP] = _R31; + /* eBPF jit internal registers */ + ctx->b2p[BPF_REG_AX] = _R12; + ctx->b2p[TMP_REG_1] = _R9; + ctx->b2p[TMP_REG_2] = _R10; +} + +/* PPC NVR range -- update this if we ever use NVRs below r27 */ +#define BPF_PPC_NVR_MIN _R27 static inline bool bpf_has_stack_frame(struct codegen_context *ctx) { @@ -27,7 +80,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * - the bpf program uses its stack area * The latter condition is deduced from the usage of BPF_REG_FP */ - return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, b2p[BPF_REG_FP]); + return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP)); } /* @@ -73,22 +126,23 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; + if (__is_defined(PPC64_ELF_ABI_v2)) + EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))); + /* * Initialize tail_call_cnt if we do tail calls. * Otherwise, put in NOPs so that it can be skipped when we are * invoked through a tail call. */ if (ctx->seen & SEEN_TAILCALL) { - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], 0)); + EMIT(PPC_RAW_LI(bpf_to_ppc(TMP_REG_1), 0)); /* this goes in the redzone */ - PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8)); + EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, -(BPF_PPC_STACK_SAVE + 8))); } else { EMIT(PPC_RAW_NOP()); EMIT(PPC_RAW_NOP()); } -#define BPF_TAILCALL_PROLOGUE_SIZE 8 - if (bpf_has_stack_frame(ctx)) { /* * We need a stack frame, but we don't necessarily need to @@ -96,10 +150,10 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) */ if (ctx->seen & SEEN_FUNC) { EMIT(PPC_RAW_MFLR(_R0)); - PPC_BPF_STL(0, 1, PPC_LR_STKOFF); + EMIT(PPC_RAW_STD(_R0, _R1, PPC_LR_STKOFF)); } - PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size)); + EMIT(PPC_RAW_STDU(_R1, _R1, -(BPF_PPC_STACKFRAME + ctx->stack_size))); } /* @@ -108,12 +162,12 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * in the protected zone below the previous stack frame */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, b2p[i])) - PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); + if (bpf_is_seen_register(ctx, bpf_to_ppc(i))) + EMIT(PPC_RAW_STD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i)))); /* Setup frame pointer to point to the bpf stack area */ - if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP])) - EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1, + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) + EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1, STACK_FRAME_MIN_SIZE + ctx->stack_size)); } @@ -123,15 +177,15 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Restore NVRs */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, b2p[i])) - PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); + if (bpf_is_seen_register(ctx, bpf_to_ppc(i))) + EMIT(PPC_RAW_LD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i)))); /* Tear down our stack frame */ if (bpf_has_stack_frame(ctx)) { - EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size)); + EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME + ctx->stack_size)); if (ctx->seen & SEEN_FUNC) { - PPC_BPF_LL(0, 1, PPC_LR_STKOFF); - EMIT(PPC_RAW_MTLR(0)); + EMIT(PPC_RAW_LD(_R0, _R1, PPC_LR_STKOFF)); + EMIT(PPC_RAW_MTLR(_R0)); } } } @@ -141,42 +195,45 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) bpf_jit_emit_common_epilogue(image, ctx); /* Move result to r3 */ - EMIT(PPC_RAW_MR(3, b2p[BPF_REG_0])); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); EMIT(PPC_RAW_BLR()); } -static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, - u64 func) +static int bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, u64 func) { -#ifdef PPC64_ELF_ABI_v1 - /* func points to the function descriptor */ - PPC_LI64(b2p[TMP_REG_2], func); - /* Load actual entry point from function descriptor */ - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); - /* ... and move it to CTR */ - EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); - /* - * Load TOC from function descriptor at offset 8. - * We can clobber r2 since we get called through a - * function pointer (so caller will save/restore r2) - * and since we don't use a TOC ourself. - */ - PPC_BPF_LL(2, b2p[TMP_REG_2], 8); -#else - /* We can clobber r12 */ - PPC_FUNC_ADDR(12, func); - EMIT(PPC_RAW_MTCTR(12)); -#endif + unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0; + long reladdr; + + if (WARN_ON_ONCE(!core_kernel_text(func_addr))) + return -EINVAL; + + reladdr = func_addr - kernel_toc_addr(); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + pr_err("eBPF: address of %ps out of range of kernel_toc.\n", (void *)func); + return -ERANGE; + } + + EMIT(PPC_RAW_ADDIS(_R12, _R2, PPC_HA(reladdr))); + EMIT(PPC_RAW_ADDI(_R12, _R12, PPC_LO(reladdr))); + EMIT(PPC_RAW_MTCTR(_R12)); EMIT(PPC_RAW_BCTRL()); + + return 0; } -void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) +int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) { unsigned int i, ctx_idx = ctx->idx; + if (WARN_ON_ONCE(func && is_module_text_address(func))) + return -EINVAL; + + /* skip past descriptor if elf v1 */ + func += FUNCTION_DESCR_SIZE; + /* Load function address into r12 */ - PPC_LI64(12, func); + PPC_LI64(_R12, func); /* For bpf-to-bpf function calls, the callee's address is unknown * until the last extra pass. As seen above, we use PPC_LI64() to @@ -191,20 +248,10 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun for (i = ctx->idx - ctx_idx; i < 5; i++) EMIT(PPC_RAW_NOP()); -#ifdef PPC64_ELF_ABI_v1 - /* - * Load TOC from function descriptor at offset 8. - * We can clobber r2 since we get called through a - * function pointer (so caller will save/restore r2) - * and since we don't use a TOC ourself. - */ - PPC_BPF_LL(2, 12, 8); - /* Load actual entry point from function descriptor */ - PPC_BPF_LL(12, 12, 0); -#endif - - EMIT(PPC_RAW_MTCTR(12)); + EMIT(PPC_RAW_MTCTR(_R12)); EMIT(PPC_RAW_BCTRL()); + + return 0; } static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) @@ -215,54 +262,53 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * r4/BPF_REG_2 - pointer to bpf_array * r5/BPF_REG_3 - index in bpf_array */ - int b2p_bpf_array = b2p[BPF_REG_2]; - int b2p_index = b2p[BPF_REG_3]; + int b2p_bpf_array = bpf_to_ppc(BPF_REG_2); + int b2p_index = bpf_to_ppc(BPF_REG_3); + int bpf_tailcall_prologue_size = 8; + + if (__is_defined(PPC64_ELF_ABI_v2)) + bpf_tailcall_prologue_size += 4; /* skip past the toc load */ /* * if (index >= array->map.max_entries) * goto out; */ - EMIT(PPC_RAW_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(TMP_REG_1), b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31)); - EMIT(PPC_RAW_CMPLW(b2p_index, b2p[TMP_REG_1])); - PPC_BCC(COND_GE, out); + EMIT(PPC_RAW_CMPLW(b2p_index, bpf_to_ppc(TMP_REG_1))); + PPC_BCC_SHORT(COND_GE, out); /* * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) * goto out; */ - PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); - EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT)); - PPC_BCC(COND_GE, out); + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx))); + EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT)); + PPC_BCC_SHORT(COND_GE, out); /* * tail_call_cnt++; */ - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1)); - PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), 1)); + EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx))); /* prog = array->ptrs[index]; */ - EMIT(PPC_RAW_MULI(b2p[TMP_REG_1], b2p_index, 8)); - EMIT(PPC_RAW_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array)); - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); + EMIT(PPC_RAW_MULI(bpf_to_ppc(TMP_REG_1), b2p_index, 8)); + EMIT(PPC_RAW_ADD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), b2p_bpf_array)); + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_array, ptrs))); /* * if (prog == NULL) * goto out; */ - EMIT(PPC_RAW_CMPLDI(b2p[TMP_REG_1], 0)); - PPC_BCC(COND_EQ, out); + EMIT(PPC_RAW_CMPLDI(bpf_to_ppc(TMP_REG_1), 0)); + PPC_BCC_SHORT(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); -#ifdef PPC64_ELF_ABI_v1 - /* skip past the function descriptor */ - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], - FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE)); -#else - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE)); -#endif - EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_prog, bpf_func))); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), + FUNCTION_DESCR_SIZE + bpf_tailcall_prologue_size)); + EMIT(PPC_RAW_MTCTR(bpf_to_ppc(TMP_REG_1))); /* tear down stack, restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); @@ -309,9 +355,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * for (i = 0; i < flen; i++) { u32 code = insn[i].code; - u32 dst_reg = b2p[insn[i].dst_reg]; - u32 src_reg = b2p[insn[i].src_reg]; + u32 dst_reg = bpf_to_ppc(insn[i].dst_reg); + u32 src_reg = bpf_to_ppc(insn[i].src_reg); u32 size = BPF_SIZE(code); + u32 tmp1_reg = bpf_to_ppc(TMP_REG_1); + u32 tmp2_reg = bpf_to_ppc(TMP_REG_2); s16 off = insn[i].off; s32 imm = insn[i].imm; bool func_addr_fixed; @@ -362,8 +410,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * } else if (imm >= -32768 && imm < 32768) { EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); } else { - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_ADD(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_ADD(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ @@ -373,8 +421,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * } else if (imm > -32768 && imm <= 32768) { EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(-imm))); } else { - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ @@ -389,32 +437,28 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (imm >= -32768 && imm < 32768) EMIT(PPC_RAW_MULI(dst_reg, dst_reg, IMM_L(imm))); else { - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); if (BPF_CLASS(code) == BPF_ALU) - EMIT(PPC_RAW_MULW(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp1_reg)); else - EMIT(PPC_RAW_MULD(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_MULD(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg)); - EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], src_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVWU(tmp1_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(tmp1_reg, src_reg, tmp1_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg)); - EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], src_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVDU(tmp1_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_MULD(tmp1_reg, src_reg, tmp1_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, src_reg)); break; @@ -433,35 +477,23 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * } } - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); switch (BPF_CLASS(code)) { case BPF_ALU: if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_2], - dst_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], - b2p[TMP_REG_1], - b2p[TMP_REG_2])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVWU(tmp2_reg, dst_reg, tmp1_reg)); + EMIT(PPC_RAW_MULW(tmp1_reg, tmp1_reg, tmp2_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else - EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, tmp1_reg)); break; case BPF_ALU64: if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_2], - dst_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], - b2p[TMP_REG_1], - b2p[TMP_REG_2])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVDU(tmp2_reg, dst_reg, tmp1_reg)); + EMIT(PPC_RAW_MULD(tmp1_reg, tmp1_reg, tmp2_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else - EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, tmp1_reg)); break; } goto bpf_alu32_trunc; @@ -483,8 +515,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm))); else { /* Sign-extended */ - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_AND(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_AND(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ @@ -495,8 +527,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { /* Sign-extended */ - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp1_reg)); } else { if (IMM_L(imm)) EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm))); @@ -512,8 +544,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { /* Sign-extended */ - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_XOR(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, tmp1_reg)); } else { if (IMM_L(imm)) EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm))); @@ -614,11 +646,11 @@ bpf_alu32_trunc: switch (imm) { case 16: /* Rotate 8 bits left & mask with 0x0000ff00 */ - EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23)); + EMIT(PPC_RAW_RLWINM(tmp1_reg, dst_reg, 8, 16, 23)); /* Rotate 8 bits right & insert LSB to reg */ - EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31)); + EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 24, 31)); /* Move result back to dst_reg */ - EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_MR(dst_reg, tmp1_reg)); break; case 32: /* @@ -626,28 +658,28 @@ bpf_alu32_trunc: * 2 bytes are already in their final position * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) */ - EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31)); + EMIT(PPC_RAW_RLWINM(tmp1_reg, dst_reg, 8, 0, 31)); /* Rotate 24 bits and insert byte 1 */ - EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7)); + EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 0, 7)); /* Rotate 24 bits and insert byte 3 */ - EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23)); - EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, tmp1_reg)); break; case 64: /* Store the value to stack and then use byte-reverse loads */ - PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx))); + EMIT(PPC_RAW_STD(dst_reg, _R1, bpf_jit_stack_local(ctx))); + EMIT(PPC_RAW_ADDI(tmp1_reg, _R1, bpf_jit_stack_local(ctx))); if (cpu_has_feature(CPU_FTR_ARCH_206)) { - EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); + EMIT(PPC_RAW_LDBRX(dst_reg, 0, tmp1_reg)); } else { - EMIT(PPC_RAW_LWBRX(dst_reg, 0, b2p[TMP_REG_1])); + EMIT(PPC_RAW_LWBRX(dst_reg, 0, tmp1_reg)); if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32)); - EMIT(PPC_RAW_LI(b2p[TMP_REG_2], 4)); - EMIT(PPC_RAW_LWBRX(b2p[TMP_REG_2], b2p[TMP_REG_2], b2p[TMP_REG_1])); + EMIT(PPC_RAW_LI(tmp2_reg, 4)); + EMIT(PPC_RAW_LWBRX(tmp2_reg, tmp2_reg, tmp1_reg)); if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) - EMIT(PPC_RAW_SLDI(b2p[TMP_REG_2], b2p[TMP_REG_2], 32)); - EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_2])); + EMIT(PPC_RAW_SLDI(tmp2_reg, tmp2_reg, 32)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp2_reg)); } break; } @@ -686,15 +718,14 @@ emit_clear: break; case STF_BARRIER_SYNC_ORI: EMIT(PPC_RAW_SYNC()); - EMIT(PPC_RAW_LD(b2p[TMP_REG_1], _R13, 0)); + EMIT(PPC_RAW_LD(tmp1_reg, _R13, 0)); EMIT(PPC_RAW_ORI(_R31, _R31, 0)); break; case STF_BARRIER_FALLBACK: - EMIT(PPC_RAW_MFLR(b2p[TMP_REG_1])); - PPC_LI64(12, dereference_kernel_function_descriptor(bpf_stf_barrier)); - EMIT(PPC_RAW_MTCTR(12)); + ctx->seen |= SEEN_FUNC; + PPC_LI64(_R12, dereference_kernel_function_descriptor(bpf_stf_barrier)); + EMIT(PPC_RAW_MTCTR(_R12)); EMIT(PPC_RAW_BCTRL()); - EMIT(PPC_RAW_MTLR(b2p[TMP_REG_1])); break; case STF_BARRIER_NONE: break; @@ -707,34 +738,39 @@ emit_clear: case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); - src_reg = b2p[TMP_REG_1]; + EMIT(PPC_RAW_LI(tmp1_reg, imm)); + src_reg = tmp1_reg; } EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); - src_reg = b2p[TMP_REG_1]; + EMIT(PPC_RAW_LI(tmp1_reg, imm)); + src_reg = tmp1_reg; } EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - PPC_LI32(b2p[TMP_REG_1], imm); - src_reg = b2p[TMP_REG_1]; + PPC_LI32(tmp1_reg, imm); + src_reg = tmp1_reg; } EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - PPC_LI32(b2p[TMP_REG_1], imm); - src_reg = b2p[TMP_REG_1]; + PPC_LI32(tmp1_reg, imm); + src_reg = tmp1_reg; + } + if (off % 4) { + EMIT(PPC_RAW_LI(tmp2_reg, off)); + EMIT(PPC_RAW_STDX(src_reg, dst_reg, tmp2_reg)); + } else { + EMIT(PPC_RAW_STD(src_reg, dst_reg, off)); } - PPC_BPF_STL(src_reg, dst_reg, off); break; /* @@ -751,14 +787,14 @@ emit_clear: /* *(u32 *)(dst + off) += src */ /* Get EA into TMP_REG_1 */ - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); + EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off)); tmp_idx = ctx->idx * 4; /* load value from memory into TMP_REG_2 */ - EMIT(PPC_RAW_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); + EMIT(PPC_RAW_LWARX(tmp2_reg, 0, tmp1_reg, 0)); /* add value from src_reg into this */ - EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); + EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg)); /* store result back */ - EMIT(PPC_RAW_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); + EMIT(PPC_RAW_STWCX(tmp2_reg, 0, tmp1_reg)); /* we're done if this succeeded */ PPC_BCC_SHORT(COND_NE, tmp_idx); break; @@ -771,11 +807,11 @@ emit_clear: } /* *(u64 *)(dst + off) += src */ - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); + EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off)); tmp_idx = ctx->idx * 4; - EMIT(PPC_RAW_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); - EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); - EMIT(PPC_RAW_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); + EMIT(PPC_RAW_LDARX(tmp2_reg, 0, tmp1_reg, 0)); + EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg)); + EMIT(PPC_RAW_STDCX(tmp2_reg, 0, tmp1_reg)); PPC_BCC_SHORT(COND_NE, tmp_idx); break; @@ -801,18 +837,17 @@ emit_clear: * set dst_reg=0 and move on. */ if (BPF_MODE(code) == BPF_PROBE_MEM) { - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], src_reg, off)); + EMIT(PPC_RAW_ADDI(tmp1_reg, src_reg, off)); if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) - PPC_LI64(b2p[TMP_REG_2], 0x8000000000000000ul); + PPC_LI64(tmp2_reg, 0x8000000000000000ul); else /* BOOK3S_64 */ - PPC_LI64(b2p[TMP_REG_2], PAGE_OFFSET); - EMIT(PPC_RAW_CMPLD(b2p[TMP_REG_1], b2p[TMP_REG_2])); - PPC_BCC(COND_GT, (ctx->idx + 4) * 4); + PPC_LI64(tmp2_reg, PAGE_OFFSET); + EMIT(PPC_RAW_CMPLD(tmp1_reg, tmp2_reg)); + PPC_BCC_SHORT(COND_GT, (ctx->idx + 3) * 4); EMIT(PPC_RAW_LI(dst_reg, 0)); /* - * Check if 'off' is word aligned because PPC_BPF_LL() - * (BPF_DW case) generates two instructions if 'off' is not - * word-aligned and one instruction otherwise. + * Check if 'off' is word aligned for BPF_DW, because + * we might generate two instructions. */ if (BPF_SIZE(code) == BPF_DW && (off & 3)) PPC_JMP((ctx->idx + 3) * 4); @@ -831,7 +866,12 @@ emit_clear: EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); break; case BPF_DW: - PPC_BPF_LL(dst_reg, src_reg, off); + if (off % 4) { + EMIT(PPC_RAW_LI(tmp1_reg, off)); + EMIT(PPC_RAW_LDX(dst_reg, src_reg, tmp1_reg)); + } else { + EMIT(PPC_RAW_LD(dst_reg, src_reg, off)); + } break; } @@ -871,8 +911,11 @@ emit_clear: * the epilogue. If we _are_ the last instruction, * we'll just fall through to the epilogue. */ - if (i != flen - 1) - PPC_JMP(exit_addr); + if (i != flen - 1) { + ret = bpf_jit_emit_exit_insn(image, ctx, tmp1_reg, exit_addr); + if (ret) + return ret; + } /* else fall through to the epilogue */ break; @@ -888,11 +931,15 @@ emit_clear: return ret; if (func_addr_fixed) - bpf_jit_emit_func_call_hlp(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_hlp(image, ctx, func_addr); else - bpf_jit_emit_func_call_rel(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); + + if (ret) + return ret; + /* move return value from r3 to BPF_REG_0 */ - EMIT(PPC_RAW_MR(b2p[BPF_REG_0], 3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0), _R3)); break; /* @@ -998,14 +1045,10 @@ cond_branch: case BPF_JMP | BPF_JSET | BPF_X: case BPF_JMP32 | BPF_JSET | BPF_X: if (BPF_CLASS(code) == BPF_JMP) { - EMIT(PPC_RAW_AND_DOT(b2p[TMP_REG_1], dst_reg, - src_reg)); + EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg, src_reg)); } else { - int tmp_reg = b2p[TMP_REG_1]; - - EMIT(PPC_RAW_AND(tmp_reg, dst_reg, src_reg)); - EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0, - 31)); + EMIT(PPC_RAW_AND(tmp1_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_RLWINM_DOT(tmp1_reg, tmp1_reg, 0, 0, 31)); } break; case BPF_JMP | BPF_JNE | BPF_K: @@ -1034,14 +1077,12 @@ cond_branch: EMIT(PPC_RAW_CMPLDI(dst_reg, imm)); } else { /* sign-extending load */ - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); /* ... but unsigned comparison */ if (is_jmp32) - EMIT(PPC_RAW_CMPLW(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPLW(dst_reg, tmp1_reg)); else - EMIT(PPC_RAW_CMPLD(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPLD(dst_reg, tmp1_reg)); } break; } @@ -1066,13 +1107,11 @@ cond_branch: else EMIT(PPC_RAW_CMPDI(dst_reg, imm)); } else { - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); if (is_jmp32) - EMIT(PPC_RAW_CMPW(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPW(dst_reg, tmp1_reg)); else - EMIT(PPC_RAW_CMPD(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPD(dst_reg, tmp1_reg)); } break; } @@ -1081,19 +1120,16 @@ cond_branch: /* andi does not sign-extend the immediate */ if (imm >= 0 && imm < 32768) /* PPC_ANDI is _only/always_ dot-form */ - EMIT(PPC_RAW_ANDI(b2p[TMP_REG_1], dst_reg, imm)); + EMIT(PPC_RAW_ANDI(tmp1_reg, dst_reg, imm)); else { - int tmp_reg = b2p[TMP_REG_1]; - - PPC_LI32(tmp_reg, imm); + PPC_LI32(tmp1_reg, imm); if (BPF_CLASS(code) == BPF_JMP) { - EMIT(PPC_RAW_AND_DOT(tmp_reg, dst_reg, - tmp_reg)); + EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg, + tmp1_reg)); } else { - EMIT(PPC_RAW_AND(tmp_reg, dst_reg, - tmp_reg)); - EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, - 0, 0, 31)); + EMIT(PPC_RAW_AND(tmp1_reg, dst_reg, tmp1_reg)); + EMIT(PPC_RAW_RLWINM_DOT(tmp1_reg, tmp1_reg, + 0, 0, 31)); } } break; diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h index d6fa6e25234f..19a8d051ddf1 100644 --- a/arch/powerpc/perf/callchain.h +++ b/arch/powerpc/perf/callchain.h @@ -2,7 +2,6 @@ #ifndef _POWERPC_PERF_CALLCHAIN_H #define _POWERPC_PERF_CALLCHAIN_H -int read_user_stack_slow(const void __user *ptr, void *buf, int nb); void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, @@ -26,17 +25,11 @@ static inline int __read_user_stack(const void __user *ptr, void *ret, size_t size) { unsigned long addr = (unsigned long)ptr; - int rc; if (addr > TASK_SIZE - size || (addr & (size - 1))) return -EFAULT; - rc = copy_from_user_nofault(ret, ptr, size); - - if (IS_ENABLED(CONFIG_PPC64) && !radix_enabled() && rc) - return read_user_stack_slow(ptr, ret, size); - - return rc; + return copy_from_user_nofault(ret, ptr, size); } #endif /* _POWERPC_PERF_CALLCHAIN_H */ diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index 8d0df4226328..488e8a21a11e 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -18,33 +18,6 @@ #include "callchain.h" -/* - * On 64-bit we don't want to invoke hash_page on user addresses from - * interrupt context, so if the access faults, we read the page tables - * to find which page (if any) is mapped and access it directly. Radix - * has no need for this so it doesn't use read_user_stack_slow. - */ -int read_user_stack_slow(const void __user *ptr, void *buf, int nb) -{ - - unsigned long addr = (unsigned long) ptr; - unsigned long offset; - struct page *page; - void *kaddr; - - if (get_user_page_fast_only(addr, FOLL_WRITE, &page)) { - kaddr = page_address(page); - - /* align address to page boundary */ - offset = addr & ~PAGE_MASK; - - memcpy(buf, kaddr + offset, nb); - put_page(page); - return 0; - } - return -EFAULT; -} - static int read_user_stack_64(const unsigned long __user *ptr, unsigned long *ret) { return __read_user_stack(ptr, ret, sizeof(*ret)); diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c index b6e25f75109d..f3db88aee4dd 100644 --- a/arch/powerpc/perf/generic-compat-pmu.c +++ b/arch/powerpc/perf/generic-compat-pmu.c @@ -130,7 +130,7 @@ static struct attribute *generic_compat_events_attr[] = { NULL }; -static struct attribute_group generic_compat_pmu_events_group = { +static const struct attribute_group generic_compat_pmu_events_group = { .name = "events", .attrs = generic_compat_events_attr, }; @@ -146,7 +146,7 @@ static struct attribute *generic_compat_pmu_format_attr[] = { NULL, }; -static struct attribute_group generic_compat_pmu_format_group = { +static const struct attribute_group generic_compat_pmu_format_group = { .name = "format", .attrs = generic_compat_pmu_format_attr, }; diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 1e8aa934e37e..12c1777187fc 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -204,7 +204,7 @@ static struct attribute *format_attrs[] = { NULL, }; -static struct attribute_group format_group = { +static const struct attribute_group format_group = { .name = "format", .attrs = format_attrs, }; @@ -1148,7 +1148,7 @@ static struct attribute *cpumask_attrs[] = { NULL, }; -static struct attribute_group cpumask_attr_group = { +static const struct attribute_group cpumask_attr_group = { .attrs = cpumask_attrs, }; @@ -1162,7 +1162,7 @@ static struct attribute *if_attrs[] = { NULL, }; -static struct attribute_group if_group = { +static const struct attribute_group if_group = { .name = "interface", .bin_attrs = if_bin_attrs, .attrs = if_attrs, diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index c756228a081f..5eb60ed5b5e8 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -65,12 +65,12 @@ static struct attribute *format_attrs[] = { NULL, }; -static struct attribute_group format_group = { +static const struct attribute_group format_group = { .name = "format", .attrs = format_attrs, }; -static struct attribute_group event_group = { +static const struct attribute_group event_group = { .name = "events", .attrs = hv_gpci_event_attrs, }; @@ -126,11 +126,11 @@ static struct attribute *cpumask_attrs[] = { NULL, }; -static struct attribute_group cpumask_attr_group = { +static const struct attribute_group cpumask_attr_group = { .attrs = cpumask_attrs, }; -static struct attribute_group interface_group = { +static const struct attribute_group interface_group = { .name = "interface", .attrs = interface_attrs, }; diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index e106909ff9c3..526d4b767534 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -71,7 +71,7 @@ static struct attribute *imc_format_attrs[] = { NULL, }; -static struct attribute_group imc_format_group = { +static const struct attribute_group imc_format_group = { .name = "format", .attrs = imc_format_attrs, }; @@ -90,7 +90,7 @@ static struct attribute *trace_imc_format_attrs[] = { NULL, }; -static struct attribute_group trace_imc_format_group = { +static const struct attribute_group trace_imc_format_group = { .name = "format", .attrs = trace_imc_format_attrs, }; @@ -125,7 +125,7 @@ static struct attribute *imc_pmu_cpumask_attrs[] = { NULL, }; -static struct attribute_group imc_pmu_cpumask_attr_group = { +static const struct attribute_group imc_pmu_cpumask_attr_group = { .attrs = imc_pmu_cpumask_attrs, }; @@ -1457,7 +1457,11 @@ static int trace_imc_event_init(struct perf_event *event) event->hw.idx = -1; - event->pmu->task_ctx_nr = perf_hw_context; + /* + * There can only be a single PMU for perf_hw_context events which is assigned to + * core PMU. Hence use "perf_sw_context" for trace_imc. + */ + event->pmu->task_ctx_nr = perf_sw_context; event->destroy = reset_global_refc; return 0; } diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 4037ea652522..a74d382ecbb7 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -37,7 +37,7 @@ static struct attribute *isa207_pmu_format_attr[] = { NULL, }; -struct attribute_group isa207_pmu_format_group = { +const struct attribute_group isa207_pmu_format_group = { .name = "format", .attrs = isa207_pmu_format_attr, }; diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index 51d31b65e423..350dccb0143c 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -134,12 +134,10 @@ int perf_reg_validate(u64 mask) u64 perf_reg_abi(struct task_struct *task) { -#ifdef CONFIG_PPC64 - if (!test_tsk_thread_flag(task, TIF_32BIT)) - return PERF_SAMPLE_REGS_ABI_64; + if (is_tsk_32bit_task(task)) + return PERF_SAMPLE_REGS_ABI_32; else -#endif - return PERF_SAMPLE_REGS_ABI_32; + return PERF_SAMPLE_REGS_ABI_64; } void perf_get_regs_user(struct perf_regs *regs_user, diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index 0975ad0b42c4..d3398100a60f 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -200,12 +200,12 @@ static struct attribute *power10_events_attr[] = { NULL }; -static struct attribute_group power10_pmu_events_group_dd1 = { +static const struct attribute_group power10_pmu_events_group_dd1 = { .name = "events", .attrs = power10_events_attr_dd1, }; -static struct attribute_group power10_pmu_events_group = { +static const struct attribute_group power10_pmu_events_group = { .name = "events", .attrs = power10_events_attr, }; @@ -253,7 +253,7 @@ static struct attribute *power10_pmu_format_attr[] = { NULL, }; -static struct attribute_group power10_pmu_format_group = { +static const struct attribute_group power10_pmu_format_group = { .name = "format", .attrs = power10_pmu_format_attr, }; diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index 99b5ba314ea7..a74211410b8d 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -405,7 +405,7 @@ static struct attribute *power7_events_attr[] = { NULL }; -static struct attribute_group power7_pmu_events_group = { +static const struct attribute_group power7_pmu_events_group = { .name = "events", .attrs = power7_events_attr, }; @@ -417,7 +417,7 @@ static struct attribute *power7_pmu_format_attr[] = { NULL, }; -static struct attribute_group power7_pmu_format_group = { +static const struct attribute_group power7_pmu_format_group = { .name = "format", .attrs = power7_pmu_format_attr, }; diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index f21194b5604a..e37b1e714d2b 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -92,7 +92,7 @@ enum { */ /* PowerISA v2.07 format attribute structure*/ -extern struct attribute_group isa207_pmu_format_group; +extern const struct attribute_group isa207_pmu_format_group; /* Table of alternatives, sorted by column 0 */ static const unsigned int event_alternatives[][MAX_ALT] = { @@ -182,7 +182,7 @@ static struct attribute *power8_events_attr[] = { NULL }; -static struct attribute_group power8_pmu_events_group = { +static const struct attribute_group power8_pmu_events_group = { .name = "events", .attrs = power8_events_attr, }; diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 4b7c17e36100..c9eb5232e68b 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -96,7 +96,7 @@ extern u64 PERF_REG_EXTENDED_MASK; #define PVR_POWER9_CUMULUS 0x00002000 /* PowerISA v2.07 format attribute structure*/ -extern struct attribute_group isa207_pmu_format_group; +extern const struct attribute_group isa207_pmu_format_group; int p9_dd21_bl_ev[] = { PM_MRK_ST_DONE_L2, @@ -217,7 +217,7 @@ static struct attribute *power9_events_attr[] = { NULL }; -static struct attribute_group power9_pmu_events_group = { +static const struct attribute_group power9_pmu_events_group = { .name = "events", .attrs = power9_events_attr, }; @@ -253,7 +253,7 @@ static struct attribute *power9_pmu_format_attr[] = { NULL, }; -static struct attribute_group power9_pmu_format_group = { +static const struct attribute_group power9_pmu_format_group = { .name = "format", .attrs = power9_pmu_format_attr, }; diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 8d6029099848..17ae75d62518 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -37,7 +37,7 @@ void __init corenet_gen_pic_init(void) unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU | MPIC_NO_RESET; - if (ppc_md.get_irq == mpic_get_coreint_irq) + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) && !IS_ENABLED(CONFIG_KEXEC_CORE)) flags |= MPIC_ENABLE_COREINT; mpic = mpic_alloc(NULL, 0, flags, 0, 512, " OpenPIC "); diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c index a4127b0b161f..4c4d577effd9 100644 --- a/arch/powerpc/platforms/85xx/qemu_e500.c +++ b/arch/powerpc/platforms/85xx/qemu_e500.c @@ -67,4 +67,9 @@ define_machine(qemu_e500) { .get_irq = mpic_get_coreint_irq, .calibrate_decr = generic_calibrate_decr, .progress = udbg_progress, +#ifdef CONFIG_PPC64 + .power_save = book3e_idle, +#else + .power_save = e500_idle, +#endif }; diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c index b697918b727d..a6b8ffcbf01a 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -95,12 +95,6 @@ static int __init mpc86xx_hpcn_probe(void) if (of_machine_is_compatible("fsl,mpc8641hpcn")) return 1; /* Looks good */ - /* Be nice and don't give silent boot death. Delete this in 2.6.27 */ - if (of_machine_is_compatible("mpc86xx")) { - pr_warn("WARNING: your dts/dtb is old. You must update before the next kernel release.\n"); - return 1; - } - return 0; } diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c index f2ba837249d6..04a6abf14c29 100644 --- a/arch/powerpc/platforms/8xx/pic.c +++ b/arch/powerpc/platforms/8xx/pic.c @@ -153,6 +153,7 @@ int __init mpc8xx_pic_init(void) if (mpc8xx_pic_host == NULL) { printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n"); ret = -ENOMEM; + goto out; } ret = 0; diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 87bc1929ee5a..e2e1fec91c6e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -107,6 +107,7 @@ config PPC_BOOK3S_64 config PPC_BOOK3E_64 bool "Embedded processors" + select PPC_FSL_BOOK3E select PPC_FPU # Make it a choice ? select PPC_SMP_MUXED_IPI select PPC_DOORBELL @@ -295,7 +296,7 @@ config FSL_BOOKE config PPC_FSL_BOOK3E bool select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64 - select FSL_EMB_PERFMON + imply FSL_EMB_PERFMON select PPC_SMP_MUXED_IPI select PPC_DOORBELL select PPC_KUEP diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 4d82c92ddd52..f9a1615b74da 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -316,6 +316,7 @@ static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) return PTR_ERR(txwin); } + mutex_init(&txwin->task_ref.mmap_mutex); cp_inst->txwin = txwin; return 0; @@ -350,6 +351,124 @@ static int coproc_release(struct inode *inode, struct file *fp) return 0; } +/* + * If the executed instruction that caused the fault was a paste, then + * clear regs CR0[EQ], advance NIP, and return 0. Else return error code. + */ +static int do_fail_paste(void) +{ + struct pt_regs *regs = current->thread.regs; + u32 instword; + + if (WARN_ON_ONCE(!regs)) + return -EINVAL; + + if (WARN_ON_ONCE(!user_mode(regs))) + return -EINVAL; + + /* + * If we couldn't translate the instruction, the driver should + * return success without handling the fault, it will be retried + * or the instruction fetch will fault. + */ + if (get_user(instword, (u32 __user *)(regs->nip))) + return -EAGAIN; + + /* + * Not a paste instruction, driver may fail the fault. + */ + if ((instword & PPC_INST_PASTE_MASK) != PPC_INST_PASTE) + return -ENOENT; + + regs->ccr &= ~0xe0000000; /* Clear CR0[0-2] to fail paste */ + regs_add_return_ip(regs, 4); /* Emulate the paste */ + + return 0; +} + +/* + * This fault handler is invoked when the core generates page fault on + * the paste address. Happens if the kernel closes window in hypervisor + * (on pseries) due to lost credit or the paste address is not mapped. + */ +static vm_fault_t vas_mmap_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct file *fp = vma->vm_file; + struct coproc_instance *cp_inst = fp->private_data; + struct vas_window *txwin; + vm_fault_t fault; + u64 paste_addr; + int ret; + + /* + * window is not opened. Shouldn't expect this error. + */ + if (!cp_inst || !cp_inst->txwin) { + pr_err("%s(): Unexpected fault on paste address with TX window closed\n", + __func__); + return VM_FAULT_SIGBUS; + } + + txwin = cp_inst->txwin; + /* + * When the LPAR lost credits due to core removal or during + * migration, invalidate the existing mapping for the current + * paste addresses and set windows in-active (zap_page_range in + * reconfig_close_windows()). + * New mapping will be done later after migration or new credits + * available. So continue to receive faults if the user space + * issue NX request. + */ + if (txwin->task_ref.vma != vmf->vma) { + pr_err("%s(): No previous mapping with paste address\n", + __func__); + return VM_FAULT_SIGBUS; + } + + mutex_lock(&txwin->task_ref.mmap_mutex); + /* + * The window may be inactive due to lost credit (Ex: core + * removal with DLPAR). If the window is active again when + * the credit is available, map the new paste address at the + * the window virtual address. + */ + if (txwin->status == VAS_WIN_ACTIVE) { + paste_addr = cp_inst->coproc->vops->paste_addr(txwin); + if (paste_addr) { + fault = vmf_insert_pfn(vma, vma->vm_start, + (paste_addr >> PAGE_SHIFT)); + mutex_unlock(&txwin->task_ref.mmap_mutex); + return fault; + } + } + mutex_unlock(&txwin->task_ref.mmap_mutex); + + /* + * Received this fault due to closing the actual window. + * It can happen during migration or lost credits. + * Since no mapping, return the paste instruction failure + * to the user space. + */ + ret = do_fail_paste(); + /* + * The user space can retry several times until success (needed + * for migration) or should fallback to SW compression or + * manage with the existing open windows if available. + * Looking at sysfs interface, it can determine whether these + * failures are coming during migration or core removal: + * nr_used_credits > nr_total_credits when lost credits + */ + if (!ret || (ret == -EAGAIN)) + return VM_FAULT_NOPAGE; + + return VM_FAULT_SIGBUS; +} + +static const struct vm_operations_struct vas_vm_ops = { + .fault = vas_mmap_fault, +}; + static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) { struct coproc_instance *cp_inst = fp->private_data; @@ -378,10 +497,29 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) return -EACCES; } + /* + * The initial mmap is done after the window is opened + * with ioctl. But before mmap(), this window can be closed in + * the hypervisor due to lost credit (core removal on pseries). + * So if the window is not active, return mmap() failure with + * -EACCES and expects the user space reissue mmap() when it + * is active again or open new window when the credit is available. + * mmap_mutex protects the paste address mmap() with DLPAR + * close/open event and allows mmap() only when the window is + * active. + */ + mutex_lock(&txwin->task_ref.mmap_mutex); + if (txwin->status != VAS_WIN_ACTIVE) { + pr_err("%s(): Window is not active\n", __func__); + rc = -EACCES; + goto out; + } + paste_addr = cp_inst->coproc->vops->paste_addr(txwin); if (!paste_addr) { pr_err("%s(): Window paste address failed\n", __func__); - return -EINVAL; + rc = -EINVAL; + goto out; } pfn = paste_addr >> PAGE_SHIFT; @@ -398,6 +536,11 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__, paste_addr, vma->vm_start, rc); + txwin->task_ref.vma = vma; + vma->vm_ops = &vas_vm_ops; + +out: + mutex_unlock(&txwin->task_ref.mmap_mutex); return rc; } diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c index 2ece77f49bc3..abb5e527b4db 100644 --- a/arch/powerpc/platforms/cell/cbe_thermal.c +++ b/arch/powerpc/platforms/cell/cbe_thermal.c @@ -255,7 +255,7 @@ static struct attribute *spu_attributes[] = { NULL, }; -static struct attribute_group spu_attribute_group = { +static const struct attribute_group spu_attribute_group = { .name = "thermal", .attrs = spu_attributes, }; diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 83cea9e7ee72..2eecba3345c3 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -490,7 +490,7 @@ int spu_add_dev_attr(struct device_attribute *attr) } EXPORT_SYMBOL_GPL(spu_add_dev_attr); -int spu_add_dev_attr_group(struct attribute_group *attrs) +int spu_add_dev_attr_group(const struct attribute_group *attrs) { struct spu *spu; int rc = 0; @@ -529,7 +529,7 @@ void spu_remove_dev_attr(struct device_attribute *attr) } EXPORT_SYMBOL_GPL(spu_remove_dev_attr); -void spu_remove_dev_attr_group(struct attribute_group *attrs) +void spu_remove_dev_attr_group(const struct attribute_group *attrs) { struct spu *spu; diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 369206489895..99bd027a7f7c 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -340,8 +340,7 @@ static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff, static void aff_set_ref_point_location(struct spu_gang *gang) { int mem_aff, gs, lowest_offset; - struct spu_context *ctx; - struct spu *tmp; + struct spu_context *tmp, *ctx; mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM; lowest_offset = 0; @@ -1053,6 +1052,7 @@ void spuctx_switch_state(struct spu_context *ctx, } } +#ifdef CONFIG_PROC_FS static int show_spu_loadavg(struct seq_file *s, void *private) { int a, b, c; @@ -1074,7 +1074,8 @@ static int show_spu_loadavg(struct seq_file *s, void *private) atomic_read(&nr_spu_contexts), idr_get_cursor(&task_active_pid_ns(current)->idr) - 1); return 0; -}; +} +#endif int __init spu_sched_init(void) { diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h index 29d2036dcc9d..ba8d4e97095b 100644 --- a/arch/powerpc/platforms/powermac/pmac.h +++ b/arch/powerpc/platforms/powermac/pmac.h @@ -5,6 +5,8 @@ #include <linux/pci.h> #include <linux/irq.h> +#include <asm/pmac_feature.h> + /* * Declaration for the various functions exported by the * pmac_* files. Mostly for use by pmac_setup diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 9942289f379b..a6677a111aca 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -12,7 +12,6 @@ #include <linux/device.h> #include <linux/cpu.h> -#include <asm/asm-prototypes.h> #include <asm/firmware.h> #include <asm/interrupt.h> #include <asm/machdep.h> diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index 0331f1973f0e..b97bc179f65a 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -603,7 +603,7 @@ static struct bin_attribute *mpipl_bin_attr[] = { }; -static struct attribute_group mpipl_group = { +static const struct attribute_group mpipl_group = { .attrs = mpipl_attr, .bin_attrs = mpipl_bin_attr, }; diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 410ed5b9de29..16c5860f1372 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -150,7 +150,7 @@ static struct attribute *initiate_attrs[] = { NULL, }; -static struct attribute_group initiate_attr_group = { +static const struct attribute_group initiate_attr_group = { .attrs = initiate_attrs, }; diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c index 7e7d38b17420..18481a8c52fa 100644 --- a/arch/powerpc/platforms/powernv/opal-flash.c +++ b/arch/powerpc/platforms/powernv/opal-flash.c @@ -512,7 +512,7 @@ static struct attribute *image_op_attrs[] = { NULL /* need to NULL terminate the list of attributes */ }; -static struct attribute_group image_op_attr_group = { +static const struct attribute_group image_op_attr_group = { .attrs = image_op_attrs, }; diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index f16a43540e30..91b36541b9e5 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -2,7 +2,6 @@ #include <linux/percpu.h> #include <linux/jump_label.h> #include <asm/trace.h> -#include <asm/asm-prototypes.h> #ifdef CONFIG_JUMP_LABEL struct static_key opal_tracepoint_key = STATIC_KEY_INIT; diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index b4386714494a..e3d44b36ae98 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -43,7 +43,11 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) unsigned long parity; /* Calculate the parity of the value */ - asm ("popcntd %0,%1" : "=r" (parity) : "r" (val)); + asm (".machine push; \ + .machine power7; \ + popcntd %0,%1; \ + .machine pop;" + : "=r" (parity) : "r" (val)); /* xor our value with the previous mask */ val ^= rng->mask; diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index c8b50fec56bf..b637bf292047 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -603,7 +603,7 @@ static dma_addr_t ps3_ioc0_map_page(struct device *_dev, struct page *page, default: /* not happned */ BUG(); - }; + } result = ps3_dma_map(dev->d_region, (unsigned long)ptr, size, &bus_addr, iopte_flag); @@ -762,7 +762,7 @@ int ps3_system_bus_device_register(struct ps3_system_bus_device *dev) break; default: BUG(); - }; + } dev->core.of_node = NULL; set_dev_node(&dev->core, 0); diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index ee60b59024b4..9764e1a2ed5c 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -6,7 +6,8 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \ of_helpers.o \ setup.o iommu.o event_sources.o ras.o \ firmware.o power.o dlpar.o mobility.o rng.o \ - pci.o pci_dlpar.o eeh_pseries.o msi.o + pci.o pci_dlpar.o eeh_pseries.o msi.o \ + papr_platform_attributes.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KEXEC_CORE) += kexec.o obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o @@ -29,6 +30,6 @@ obj-$(CONFIG_PPC_SVM) += svm.o obj-$(CONFIG_FA_DUMP) += rtas-fadump.o obj-$(CONFIG_SUSPEND) += suspend.o -obj-$(CONFIG_PPC_VAS) += vas.o +obj-$(CONFIG_PPC_VAS) += vas.o vas-sysfs.o obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += cc_platform.o diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index f162156b7b68..09c119b2f623 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -66,6 +66,7 @@ hypertas_fw_features_table[] = { {FW_FEATURE_BLOCK_REMOVE, "hcall-block-remove"}, {FW_FEATURE_PAPR_SCM, "hcall-scm"}, {FW_FEATURE_RPT_INVALIDATE, "hcall-rpt-invalidate"}, + {FW_FEATURE_ENERGY_SCALE_INFO, "hcall-energy-scale-info"}, }; /* Build up the firmware features bitmask using the contents of diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index f8899d506ea4..760581c5752f 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -40,7 +40,6 @@ #include <asm/plpar_wrappers.h> #include <asm/kexec.h> #include <asm/fadump.h> -#include <asm/asm-prototypes.h> #include <asm/dtl.h> #include "pseries.h" diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index c7940fcfc911..2119c003fcf9 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -311,6 +311,92 @@ static void parse_mpp_x_data(struct seq_file *m) seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles); } +/* + * PAPR defines, in section "7.3.16 System Parameters Option", the token 55 to + * read the LPAR name, and the largest output data to 4000 + 2 bytes length. + */ +#define SPLPAR_LPAR_NAME_TOKEN 55 +#define GET_SYS_PARM_BUF_SIZE 4002 +#if GET_SYS_PARM_BUF_SIZE > RTAS_DATA_BUF_SIZE +#error "GET_SYS_PARM_BUF_SIZE is larger than RTAS_DATA_BUF_SIZE" +#endif + +/* + * Read the lpar name using the RTAS ibm,get-system-parameter call. + * + * The name read through this call is updated if changes are made by the end + * user on the hypervisor side. + * + * Some hypervisor (like Qemu) may not provide this value. In that case, a non + * null value is returned. + */ +static int read_rtas_lpar_name(struct seq_file *m) +{ + int rc, len, token; + union { + char raw_buffer[GET_SYS_PARM_BUF_SIZE]; + struct { + __be16 len; + char name[GET_SYS_PARM_BUF_SIZE-2]; + }; + } *local_buffer; + + token = rtas_token("ibm,get-system-parameter"); + if (token == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + local_buffer = kmalloc(sizeof(*local_buffer), GFP_KERNEL); + if (!local_buffer) + return -ENOMEM; + + do { + spin_lock(&rtas_data_buf_lock); + memset(rtas_data_buf, 0, sizeof(*local_buffer)); + rc = rtas_call(token, 3, 1, NULL, SPLPAR_LPAR_NAME_TOKEN, + __pa(rtas_data_buf), sizeof(*local_buffer)); + if (!rc) + memcpy(local_buffer->raw_buffer, rtas_data_buf, + sizeof(local_buffer->raw_buffer)); + spin_unlock(&rtas_data_buf_lock); + } while (rtas_busy_delay(rc)); + + if (!rc) { + /* Force end of string */ + len = min((int) be16_to_cpu(local_buffer->len), + (int) sizeof(local_buffer->name)-1); + local_buffer->name[len] = '\0'; + + seq_printf(m, "partition_name=%s\n", local_buffer->name); + } else + rc = -ENODATA; + + kfree(local_buffer); + return rc; +} + +/* + * Read the LPAR name from the Device Tree. + * + * The value read in the DT is not updated if the end-user is touching the LPAR + * name on the hypervisor side. + */ +static int read_dt_lpar_name(struct seq_file *m) +{ + const char *name; + + if (of_property_read_string(of_root, "ibm,partition-name", &name)) + return -ENOENT; + + seq_printf(m, "partition_name=%s\n", name); + return 0; +} + +static void read_lpar_name(struct seq_file *m) +{ + if (read_rtas_lpar_name(m) && read_dt_lpar_name(m)) + pr_err_once("Error can't get the LPAR name"); +} + #define SPLPAR_CHARACTERISTICS_TOKEN 20 #define SPLPAR_MAXLENGTH 1026*(sizeof(char)) @@ -496,6 +582,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) if (firmware_has_feature(FW_FEATURE_SPLPAR)) { /* this call handles the ibm,get-system-parameter contents */ + read_lpar_name(m); parse_system_parameter_string(m); parse_ppp_data(m); parse_mpp_data(m); diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 85033f392c78..78f3f74c7056 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -26,6 +26,7 @@ #include <asm/machdep.h> #include <asm/rtas.h> #include "pseries.h" +#include "vas.h" /* vas_migration_handler() */ #include "../../kernel/cacheinfo.h" static struct kobject *mobility_kobj; @@ -265,7 +266,7 @@ static int add_dt_node(struct device_node *parent_dn, __be32 drc_index) return rc; } -int pseries_devicetree_update(s32 scope) +static int pseries_devicetree_update(s32 scope) { char *rtas_buf; __be32 *data; @@ -669,12 +670,16 @@ static int pseries_migrate_partition(u64 handle) if (ret) return ret; + vas_migration_handler(VAS_SUSPEND); + ret = pseries_suspend(handle); if (ret == 0) post_mobility_fixup(); else pseries_cancel_migration(handle, ret); + vas_migration_handler(VAS_RESUME); + return ret; } diff --git a/arch/powerpc/platforms/pseries/papr_platform_attributes.c b/arch/powerpc/platforms/pseries/papr_platform_attributes.c new file mode 100644 index 000000000000..515150417bb3 --- /dev/null +++ b/arch/powerpc/platforms/pseries/papr_platform_attributes.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Platform energy and frequency attributes driver + * + * This driver creates a sys file at /sys/firmware/papr/ which encapsulates a + * directory structure containing files in keyword - value pairs that specify + * energy and frequency configuration of the system. + * + * The format of exposing the sysfs information is as follows: + * /sys/firmware/papr/energy_scale_info/ + * |-- <id>/ + * |-- desc + * |-- value + * |-- value_desc (if exists) + * |-- <id>/ + * |-- desc + * |-- value + * |-- value_desc (if exists) + * + * Copyright 2022 IBM Corp. + */ + +#include <asm/hvcall.h> +#include <asm/machdep.h> + +#include "pseries.h" + +/* + * Flag attributes to fetch either all or one attribute from the HCALL + * flag = BE(0) => fetch all attributes with firstAttributeId = 0 + * flag = BE(1) => fetch a single attribute with firstAttributeId = id + */ +#define ESI_FLAGS_ALL 0 +#define ESI_FLAGS_SINGLE (1ull << 63) + +#define KOBJ_MAX_ATTRS 3 + +#define ESI_HDR_SIZE sizeof(struct h_energy_scale_info_hdr) +#define ESI_ATTR_SIZE sizeof(struct energy_scale_attribute) +#define CURR_MAX_ESI_ATTRS 8 + +struct energy_scale_attribute { + __be64 id; + __be64 val; + u8 desc[64]; + u8 value_desc[64]; +} __packed; + +struct h_energy_scale_info_hdr { + __be64 num_attrs; + __be64 array_offset; + u8 data_header_version; +} __packed; + +struct papr_attr { + u64 id; + struct kobj_attribute kobj_attr; +}; + +struct papr_group { + struct attribute_group pg; + struct papr_attr pgattrs[KOBJ_MAX_ATTRS]; +}; + +static struct papr_group *papr_groups; +/* /sys/firmware/papr */ +static struct kobject *papr_kobj; +/* /sys/firmware/papr/energy_scale_info */ +static struct kobject *esi_kobj; + +/* + * Energy modes can change dynamically hence making a new hcall each time the + * information needs to be retrieved + */ +static int papr_get_attr(u64 id, struct energy_scale_attribute *esi) +{ + int esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * ESI_ATTR_SIZE); + int ret, max_esi_attrs = CURR_MAX_ESI_ATTRS; + struct energy_scale_attribute *curr_esi; + struct h_energy_scale_info_hdr *hdr; + char *buf; + + buf = kmalloc(esi_buf_size, GFP_KERNEL); + if (buf == NULL) + return -ENOMEM; + +retry: + ret = plpar_hcall_norets(H_GET_ENERGY_SCALE_INFO, ESI_FLAGS_SINGLE, + id, virt_to_phys(buf), + esi_buf_size); + + /* + * If the hcall fails with not enough memory for either the + * header or data, attempt to allocate more + */ + if (ret == H_PARTIAL || ret == H_P4) { + char *temp_buf; + + max_esi_attrs += 4; + esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs); + + temp_buf = krealloc(buf, esi_buf_size, GFP_KERNEL); + if (temp_buf) + buf = temp_buf; + else + return -ENOMEM; + + goto retry; + } + + if (ret != H_SUCCESS) { + pr_warn("hcall failed: H_GET_ENERGY_SCALE_INFO"); + ret = -EIO; + goto out_buf; + } + + hdr = (struct h_energy_scale_info_hdr *) buf; + curr_esi = (struct energy_scale_attribute *) + (buf + be64_to_cpu(hdr->array_offset)); + + if (esi_buf_size < + be64_to_cpu(hdr->array_offset) + (be64_to_cpu(hdr->num_attrs) + * sizeof(struct energy_scale_attribute))) { + ret = -EIO; + goto out_buf; + } + + *esi = *curr_esi; + +out_buf: + kfree(buf); + + return ret; +} + +/* + * Extract and export the description of the energy scale attributes + */ +static ssize_t desc_show(struct kobject *kobj, + struct kobj_attribute *kobj_attr, + char *buf) +{ + struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr, + kobj_attr); + struct energy_scale_attribute esi; + int ret; + + ret = papr_get_attr(pattr->id, &esi); + if (ret) + return ret; + + return sysfs_emit(buf, "%s\n", esi.desc); +} + +/* + * Extract and export the numeric value of the energy scale attributes + */ +static ssize_t val_show(struct kobject *kobj, + struct kobj_attribute *kobj_attr, + char *buf) +{ + struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr, + kobj_attr); + struct energy_scale_attribute esi; + int ret; + + ret = papr_get_attr(pattr->id, &esi); + if (ret) + return ret; + + return sysfs_emit(buf, "%llu\n", be64_to_cpu(esi.val)); +} + +/* + * Extract and export the value description in string format of the energy + * scale attributes + */ +static ssize_t val_desc_show(struct kobject *kobj, + struct kobj_attribute *kobj_attr, + char *buf) +{ + struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr, + kobj_attr); + struct energy_scale_attribute esi; + int ret; + + ret = papr_get_attr(pattr->id, &esi); + if (ret) + return ret; + + return sysfs_emit(buf, "%s\n", esi.value_desc); +} + +static struct papr_ops_info { + const char *attr_name; + ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *kobj_attr, + char *buf); +} ops_info[KOBJ_MAX_ATTRS] = { + { "desc", desc_show }, + { "value", val_show }, + { "value_desc", val_desc_show }, +}; + +static void add_attr(u64 id, int index, struct papr_attr *attr) +{ + attr->id = id; + sysfs_attr_init(&attr->kobj_attr.attr); + attr->kobj_attr.attr.name = ops_info[index].attr_name; + attr->kobj_attr.attr.mode = 0444; + attr->kobj_attr.show = ops_info[index].show; +} + +static int add_attr_group(u64 id, struct papr_group *pg, bool show_val_desc) +{ + int i; + + for (i = 0; i < KOBJ_MAX_ATTRS; i++) { + if (!strcmp(ops_info[i].attr_name, "value_desc") && + !show_val_desc) { + continue; + } + add_attr(id, i, &pg->pgattrs[i]); + pg->pg.attrs[i] = &pg->pgattrs[i].kobj_attr.attr; + } + + return sysfs_create_group(esi_kobj, &pg->pg); +} + + +static int __init papr_init(void) +{ + int esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * ESI_ATTR_SIZE); + int ret, idx, i, max_esi_attrs = CURR_MAX_ESI_ATTRS; + struct h_energy_scale_info_hdr *esi_hdr; + struct energy_scale_attribute *esi_attrs; + uint64_t num_attrs; + char *esi_buf; + + if (!firmware_has_feature(FW_FEATURE_LPAR) || + !firmware_has_feature(FW_FEATURE_ENERGY_SCALE_INFO)) { + return -ENXIO; + } + + esi_buf = kmalloc(esi_buf_size, GFP_KERNEL); + if (esi_buf == NULL) + return -ENOMEM; + /* + * hcall( + * uint64 H_GET_ENERGY_SCALE_INFO, // Get energy scale info + * uint64 flags, // Per the flag request + * uint64 firstAttributeId, // The attribute id + * uint64 bufferAddress, // Guest physical address of the output buffer + * uint64 bufferSize); // The size in bytes of the output buffer + */ +retry: + + ret = plpar_hcall_norets(H_GET_ENERGY_SCALE_INFO, ESI_FLAGS_ALL, 0, + virt_to_phys(esi_buf), esi_buf_size); + + /* + * If the hcall fails with not enough memory for either the + * header or data, attempt to allocate more + */ + if (ret == H_PARTIAL || ret == H_P4) { + char *temp_esi_buf; + + max_esi_attrs += 4; + esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs); + + temp_esi_buf = krealloc(esi_buf, esi_buf_size, GFP_KERNEL); + if (temp_esi_buf) + esi_buf = temp_esi_buf; + else + return -ENOMEM; + + goto retry; + } + + if (ret != H_SUCCESS) { + pr_warn("hcall failed: H_GET_ENERGY_SCALE_INFO, ret: %d\n", ret); + goto out_free_esi_buf; + } + + esi_hdr = (struct h_energy_scale_info_hdr *) esi_buf; + num_attrs = be64_to_cpu(esi_hdr->num_attrs); + esi_attrs = (struct energy_scale_attribute *) + (esi_buf + be64_to_cpu(esi_hdr->array_offset)); + + if (esi_buf_size < + be64_to_cpu(esi_hdr->array_offset) + + (num_attrs * sizeof(struct energy_scale_attribute))) { + goto out_free_esi_buf; + } + + papr_groups = kcalloc(num_attrs, sizeof(*papr_groups), GFP_KERNEL); + if (!papr_groups) + goto out_free_esi_buf; + + papr_kobj = kobject_create_and_add("papr", firmware_kobj); + if (!papr_kobj) { + pr_warn("kobject_create_and_add papr failed\n"); + goto out_papr_groups; + } + + esi_kobj = kobject_create_and_add("energy_scale_info", papr_kobj); + if (!esi_kobj) { + pr_warn("kobject_create_and_add energy_scale_info failed\n"); + goto out_kobj; + } + + /* Allocate the groups before registering */ + for (idx = 0; idx < num_attrs; idx++) { + papr_groups[idx].pg.attrs = kcalloc(KOBJ_MAX_ATTRS + 1, + sizeof(*papr_groups[idx].pg.attrs), + GFP_KERNEL); + if (!papr_groups[idx].pg.attrs) + goto out_pgattrs; + + papr_groups[idx].pg.name = kasprintf(GFP_KERNEL, "%lld", + be64_to_cpu(esi_attrs[idx].id)); + if (papr_groups[idx].pg.name == NULL) + goto out_pgattrs; + } + + for (idx = 0; idx < num_attrs; idx++) { + bool show_val_desc = true; + + /* Do not add the value desc attr if it does not exist */ + if (strnlen(esi_attrs[idx].value_desc, + sizeof(esi_attrs[idx].value_desc)) == 0) + show_val_desc = false; + + if (add_attr_group(be64_to_cpu(esi_attrs[idx].id), + &papr_groups[idx], + show_val_desc)) { + pr_warn("Failed to create papr attribute group %s\n", + papr_groups[idx].pg.name); + idx = num_attrs; + goto out_pgattrs; + } + } + + kfree(esi_buf); + return 0; +out_pgattrs: + for (i = 0; i < idx ; i++) { + kfree(papr_groups[i].pg.attrs); + kfree(papr_groups[i].pg.name); + } + kobject_put(esi_kobj); +out_kobj: + kobject_put(papr_kobj); +out_papr_groups: + kfree(papr_groups); +out_free_esi_buf: + kfree(esi_buf); + + return -ENOMEM; +} + +machine_device_initcall(pseries, papr_init); diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index f48e87ac89c9..1238b94b3cc1 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -120,6 +120,10 @@ struct papr_scm_priv { /* length of the stat buffer as expected by phyp */ size_t stat_buffer_len; + + /* The bits which needs to be overridden */ + u64 health_bitmap_inject_mask; + }; static int papr_scm_pmem_flush(struct nd_region *nd_region, @@ -347,19 +351,29 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, static int __drc_pmem_query_health(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; + u64 bitmap = 0; long rc; /* issue the hcall */ rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index); - if (rc != H_SUCCESS) { + if (rc == H_SUCCESS) + bitmap = ret[0] & ret[1]; + else if (rc == H_FUNCTION) + dev_info_once(&p->pdev->dev, + "Hcall H_SCM_HEALTH not implemented, assuming empty health bitmap"); + else { + dev_err(&p->pdev->dev, "Failed to query health information, Err:%ld\n", rc); return -ENXIO; } p->lasthealth_jiffies = jiffies; - p->health_bitmap = ret[0] & ret[1]; - + /* Allow injecting specific health bits via inject mask. */ + if (p->health_bitmap_inject_mask) + bitmap = (bitmap & ~p->health_bitmap_inject_mask) | + p->health_bitmap_inject_mask; + WRITE_ONCE(p->health_bitmap, bitmap); dev_dbg(&p->pdev->dev, "Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n", ret[0], ret[1]); @@ -669,6 +683,56 @@ out: return rc; } +/* Inject a smart error Add the dirty-shutdown-counter value to the pdsm */ +static int papr_pdsm_smart_inject(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + int rc; + u32 supported_flags = 0; + u64 inject_mask = 0, clear_mask = 0; + u64 mask; + + /* Check for individual smart error flags and update inject/clear masks */ + if (payload->smart_inject.flags & PDSM_SMART_INJECT_HEALTH_FATAL) { + supported_flags |= PDSM_SMART_INJECT_HEALTH_FATAL; + if (payload->smart_inject.fatal_enable) + inject_mask |= PAPR_PMEM_HEALTH_FATAL; + else + clear_mask |= PAPR_PMEM_HEALTH_FATAL; + } + + if (payload->smart_inject.flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) { + supported_flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN; + if (payload->smart_inject.unsafe_shutdown_enable) + inject_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; + else + clear_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; + } + + dev_dbg(&p->pdev->dev, "[Smart-inject] inject_mask=%#llx clear_mask=%#llx\n", + inject_mask, clear_mask); + + /* Prevent concurrent access to dimm health bitmap related members */ + rc = mutex_lock_interruptible(&p->health_mutex); + if (rc) + return rc; + + /* Use inject/clear masks to set health_bitmap_inject_mask */ + mask = READ_ONCE(p->health_bitmap_inject_mask); + mask = (mask & ~clear_mask) | inject_mask; + WRITE_ONCE(p->health_bitmap_inject_mask, mask); + + /* Invalidate cached health bitmap */ + p->lasthealth_jiffies = 0; + + mutex_unlock(&p->health_mutex); + + /* Return the supported flags back to userspace */ + payload->smart_inject.flags = supported_flags; + + return sizeof(struct nd_papr_pdsm_health); +} + /* * 'struct pdsm_cmd_desc' * Identifies supported PDSMs' expected length of in/out payloads @@ -702,6 +766,12 @@ static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { .size_out = sizeof(struct nd_papr_pdsm_health), .service = papr_pdsm_health, }, + + [PAPR_PDSM_SMART_INJECT] = { + .size_in = sizeof(struct nd_papr_pdsm_smart_inject), + .size_out = sizeof(struct nd_papr_pdsm_smart_inject), + .service = papr_pdsm_smart_inject, + }, /* Empty */ [PAPR_PDSM_MAX] = { .size_in = 0, @@ -838,6 +908,19 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, return 0; } +static ssize_t health_bitmap_inject_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + + return sprintf(buf, "%#llx\n", + READ_ONCE(p->health_bitmap_inject_mask)); +} + +static DEVICE_ATTR_ADMIN_RO(health_bitmap_inject); + static ssize_t perf_stats_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -952,10 +1035,11 @@ static struct attribute *papr_nd_attributes[] = { &dev_attr_flags.attr, &dev_attr_perf_stats.attr, &dev_attr_dirty_shutdown.attr, + &dev_attr_health_bitmap_inject.attr, NULL, }; -static struct attribute_group papr_nd_attribute_group = { +static const struct attribute_group papr_nd_attribute_group = { .name = "papr", .is_visible = papr_nd_attribute_visible, .attrs = papr_nd_attributes, diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 90c9d3531694..4ba824568119 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -78,6 +78,9 @@ int remove_phb_dynamic(struct pci_controller *phb) pseries_msi_free_domains(phb); + /* Keep a reference so phb isn't freed yet */ + get_device(&host_bridge->dev); + /* Remove the PCI bus and unregister the bridge device from sysfs */ phb->bus = NULL; pci_remove_bus(b); @@ -101,6 +104,7 @@ int remove_phb_dynamic(struct pci_controller *phb) * the pcibios_free_controller_deferred() callback; * see pseries_root_bridge_prepare(). */ + put_device(&host_bridge->dev); return 0; } diff --git a/arch/powerpc/platforms/pseries/power.c b/arch/powerpc/platforms/pseries/power.c index ee343ec6ab94..3676cb297767 100644 --- a/arch/powerpc/platforms/pseries/power.c +++ b/arch/powerpc/platforms/pseries/power.c @@ -51,7 +51,7 @@ static struct attribute *g[] = { NULL, }; -static struct attribute_group attr_group = { +static const struct attribute_group attr_group = { .attrs = g, }; diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 56c9ef9052e9..af162aeeae86 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -21,6 +21,7 @@ struct pt_regs; extern int pSeries_system_reset_exception(struct pt_regs *regs); extern int pSeries_machine_check_exception(struct pt_regs *regs); extern long pseries_machine_check_realmode(struct pt_regs *regs); +void pSeries_machine_check_log_err(void); #ifdef CONFIG_SMP extern void smp_init_pseries(void); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 74c9b1b5bc66..f12516c3998c 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -23,11 +23,6 @@ static DEFINE_SPINLOCK(ras_log_buf_lock); static int ras_check_exception_token; -static void mce_process_errlog_event(struct irq_work *work); -static struct irq_work mce_errlog_process_work = { - .func = mce_process_errlog_event, -}; - #define EPOW_SENSOR_TOKEN 9 #define EPOW_SENSOR_INDEX 0 @@ -60,11 +55,17 @@ struct pseries_mc_errorlog { * XX 2: Reserved. * XXX 3: Type of UE error. * - * For error_type != MC_ERROR_TYPE_UE + * For error_type == MC_ERROR_TYPE_SLB/ERAT/TLB * XXXXXXXX * X 1: Effective address provided. * XXXXX 5: Reserved. * XX 2: Type of SLB/ERAT/TLB error. + * + * For error_type == MC_ERROR_TYPE_CTRL_MEM_ACCESS + * XXXXXXXX + * X 1: Error causing address provided. + * XXX 3: Type of error. + * XXXX 4: Reserved. */ u8 sub_err_type; u8 reserved_1[6]; @@ -80,6 +81,7 @@ struct pseries_mc_errorlog { #define MC_ERROR_TYPE_TLB 0x04 #define MC_ERROR_TYPE_D_CACHE 0x05 #define MC_ERROR_TYPE_I_CACHE 0x07 +#define MC_ERROR_TYPE_CTRL_MEM_ACCESS 0x08 /* RTAS pseries MCE error sub types */ #define MC_ERROR_UE_INDETERMINATE 0 @@ -90,6 +92,7 @@ struct pseries_mc_errorlog { #define UE_EFFECTIVE_ADDR_PROVIDED 0x40 #define UE_LOGICAL_ADDR_PROVIDED 0x20 +#define MC_EFFECTIVE_ADDR_PROVIDED 0x80 #define MC_ERROR_SLB_PARITY 0 #define MC_ERROR_SLB_MULTIHIT 1 @@ -103,6 +106,9 @@ struct pseries_mc_errorlog { #define MC_ERROR_TLB_MULTIHIT 2 #define MC_ERROR_TLB_INDETERMINATE 3 +#define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK 0 +#define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS 1 + static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) { switch (mlog->error_type) { @@ -112,6 +118,8 @@ static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) case MC_ERROR_TYPE_ERAT: case MC_ERROR_TYPE_TLB: return (mlog->sub_err_type & 0x03); + case MC_ERROR_TYPE_CTRL_MEM_ACCESS: + return (mlog->sub_err_type & 0x70) >> 4; default: return 0; } @@ -658,7 +666,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; break; } - if (mce_log->sub_err_type & 0x80) + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) eaddr = be64_to_cpu(mce_log->effective_address); break; case MC_ERROR_TYPE_ERAT: @@ -675,7 +683,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE; break; } - if (mce_log->sub_err_type & 0x80) + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) eaddr = be64_to_cpu(mce_log->effective_address); break; case MC_ERROR_TYPE_TLB: @@ -692,7 +700,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE; break; } - if (mce_log->sub_err_type & 0x80) + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) eaddr = be64_to_cpu(mce_log->effective_address); break; case MC_ERROR_TYPE_D_CACHE: @@ -701,6 +709,21 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, case MC_ERROR_TYPE_I_CACHE: mce_err.error_type = MCE_ERROR_TYPE_ICACHE; break; + case MC_ERROR_TYPE_CTRL_MEM_ACCESS: + mce_err.error_type = MCE_ERROR_TYPE_RA; + switch (err_sub_type) { + case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK: + mce_err.u.ra_error_type = + MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN; + break; + case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS: + mce_err.u.ra_error_type = + MCE_RA_ERROR_LOAD_STORE_FOREIGN; + break; + } + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) + eaddr = be64_to_cpu(mce_log->effective_address); + break; case MC_ERROR_TYPE_UNKNOWN: default: mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; @@ -717,7 +740,6 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) struct pseries_errorlog *pseries_log; struct pseries_mc_errorlog *mce_log = NULL; int disposition = rtas_error_disposition(errp); - unsigned long msr; u8 error_type; if (!rtas_error_extended(errp)) @@ -731,40 +753,16 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) error_type = mce_log->error_type; disposition = mce_handle_err_realmode(disposition, error_type); - - /* - * Enable translation as we will be accessing per-cpu variables - * in save_mce_event() which may fall outside RMO region, also - * leave it enabled because subsequently we will be queuing work - * to workqueues where again per-cpu variables accessed, besides - * fwnmi_release_errinfo() crashes when called in realmode on - * pseries. - * Note: All the realmode handling like flushing SLB entries for - * SLB multihit is done by now. - */ out: - msr = mfmsr(); - mtmsr(msr | MSR_IR | MSR_DR); - disposition = mce_handle_err_virtmode(regs, errp, mce_log, disposition); - - /* - * Queue irq work to log this rtas event later. - * irq_work_queue uses per-cpu variables, so do this in virt - * mode as well. - */ - irq_work_queue(&mce_errlog_process_work); - - mtmsr(msr); - return disposition; } /* * Process MCE rtas errlog event. */ -static void mce_process_errlog_event(struct irq_work *work) +void pSeries_machine_check_log_err(void) { struct rtas_error_log *err; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 83a04d967a59..069d7b3bb142 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -1086,6 +1086,7 @@ define_machine(pseries) { .system_reset_exception = pSeries_system_reset_exception, .machine_check_early = pseries_machine_check_realmode, .machine_check_exception = pSeries_machine_check_exception, + .machine_check_log_err = pSeries_machine_check_log_err, #ifdef CONFIG_KEXEC_CORE .machine_kexec = pSeries_machine_kexec, .kexec_cpu_down = pseries_kexec_cpu_down, diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c new file mode 100644 index 000000000000..4a7fcde5afc0 --- /dev/null +++ b/arch/powerpc/platforms/pseries/vas-sysfs.c @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2022-23 IBM Corp. + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/miscdevice.h> +#include <linux/kobject.h> +#include <linux/slab.h> +#include <linux/mm.h> + +#include "vas.h" + +#ifdef CONFIG_SYSFS +static struct kobject *pseries_vas_kobj; +static struct kobject *gzip_caps_kobj; + +struct vas_caps_entry { + struct kobject kobj; + struct vas_cop_feat_caps *caps; +}; + +#define to_caps_entry(entry) container_of(entry, struct vas_caps_entry, kobj) + +/* + * This function is used to get the notification from the drmgr when + * QoS credits are changed. Though receiving the target total QoS + * credits here, get the official QoS capabilities from the hypervisor. + */ +static ssize_t update_total_credits_trigger(struct vas_cop_feat_caps *caps, + const char *buf, size_t count) +{ + int err; + u16 creds; + + err = kstrtou16(buf, 0, &creds); + if (!err) + err = vas_reconfig_capabilties(caps->win_type); + + if (err) + return -EINVAL; + + return count; +} + +#define sysfs_caps_entry_read(_name) \ +static ssize_t _name##_show(struct vas_cop_feat_caps *caps, char *buf) \ +{ \ + return sprintf(buf, "%d\n", atomic_read(&caps->_name)); \ +} + +struct vas_sysfs_entry { + struct attribute attr; + ssize_t (*show)(struct vas_cop_feat_caps *, char *); + ssize_t (*store)(struct vas_cop_feat_caps *, const char *, size_t); +}; + +#define VAS_ATTR_RO(_name) \ + sysfs_caps_entry_read(_name); \ + static struct vas_sysfs_entry _name##_attribute = __ATTR(_name, \ + 0444, _name##_show, NULL); + +/* + * Create sysfs interface: + * /sys/devices/vas/vas0/gzip/default_capabilities + * This directory contains the following VAS GZIP capabilities + * for the defaule credit type. + * /sys/devices/vas/vas0/gzip/default_capabilities/nr_total_credits + * Total number of default credits assigned to the LPAR which + * can be changed with DLPAR operation. + * /sys/devices/vas/vas0/gzip/default_capabilities/nr_used_credits + * Number of credits used by the user space. One credit will + * be assigned for each window open. + * + * /sys/devices/vas/vas0/gzip/qos_capabilities + * This directory contains the following VAS GZIP capabilities + * for the Quality of Service (QoS) credit type. + * /sys/devices/vas/vas0/gzip/qos_capabilities/nr_total_credits + * Total number of QoS credits assigned to the LPAR. The user + * has to define this value using HMC interface. It can be + * changed dynamically by the user. + * /sys/devices/vas/vas0/gzip/qos_capabilities/nr_used_credits + * Number of credits used by the user space. + * /sys/devices/vas/vas0/gzip/qos_capabilities/update_total_credits + * Update total QoS credits dynamically + */ + +VAS_ATTR_RO(nr_total_credits); +VAS_ATTR_RO(nr_used_credits); + +static struct vas_sysfs_entry update_total_credits_attribute = + __ATTR(update_total_credits, 0200, NULL, update_total_credits_trigger); + +static struct attribute *vas_def_capab_attrs[] = { + &nr_total_credits_attribute.attr, + &nr_used_credits_attribute.attr, + NULL, +}; + +static struct attribute *vas_qos_capab_attrs[] = { + &nr_total_credits_attribute.attr, + &nr_used_credits_attribute.attr, + &update_total_credits_attribute.attr, + NULL, +}; + +static ssize_t vas_type_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct vas_caps_entry *centry; + struct vas_cop_feat_caps *caps; + struct vas_sysfs_entry *entry; + + centry = to_caps_entry(kobj); + caps = centry->caps; + entry = container_of(attr, struct vas_sysfs_entry, attr); + + if (!entry->show) + return -EIO; + + return entry->show(caps, buf); +} + +static ssize_t vas_type_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct vas_caps_entry *centry; + struct vas_cop_feat_caps *caps; + struct vas_sysfs_entry *entry; + + centry = to_caps_entry(kobj); + caps = centry->caps; + entry = container_of(attr, struct vas_sysfs_entry, attr); + if (!entry->store) + return -EIO; + + return entry->store(caps, buf, count); +} + +static void vas_type_release(struct kobject *kobj) +{ + struct vas_caps_entry *centry = to_caps_entry(kobj); + kfree(centry); +} + +static const struct sysfs_ops vas_sysfs_ops = { + .show = vas_type_show, + .store = vas_type_store, +}; + +static struct kobj_type vas_def_attr_type = { + .release = vas_type_release, + .sysfs_ops = &vas_sysfs_ops, + .default_attrs = vas_def_capab_attrs, +}; + +static struct kobj_type vas_qos_attr_type = { + .release = vas_type_release, + .sysfs_ops = &vas_sysfs_ops, + .default_attrs = vas_qos_capab_attrs, +}; + +static char *vas_caps_kobj_name(struct vas_caps_entry *centry, + struct kobject **kobj) +{ + struct vas_cop_feat_caps *caps = centry->caps; + + if (caps->descriptor == VAS_GZIP_QOS_CAPABILITIES) { + kobject_init(¢ry->kobj, &vas_qos_attr_type); + *kobj = gzip_caps_kobj; + return "qos_capabilities"; + } else if (caps->descriptor == VAS_GZIP_DEFAULT_CAPABILITIES) { + kobject_init(¢ry->kobj, &vas_def_attr_type); + *kobj = gzip_caps_kobj; + return "default_capabilities"; + } else + return "Unknown"; +} + +/* + * Add feature specific capability dir entry. + * Ex: VDefGzip or VQosGzip + */ +int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps) +{ + struct vas_caps_entry *centry; + struct kobject *kobj = NULL; + int ret = 0; + char *name; + + centry = kzalloc(sizeof(*centry), GFP_KERNEL); + if (!centry) + return -ENOMEM; + + centry->caps = caps; + name = vas_caps_kobj_name(centry, &kobj); + + if (kobj) { + ret = kobject_add(¢ry->kobj, kobj, "%s", name); + + if (ret) { + pr_err("VAS: sysfs kobject add / event failed %d\n", + ret); + kobject_put(¢ry->kobj); + } + } + + return ret; +} + +static struct miscdevice vas_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "vas", +}; + +/* + * Add VAS and VasCaps (overall capabilities) dir entries. + */ +int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps) +{ + int ret; + + ret = misc_register(&vas_miscdev); + if (ret < 0) { + pr_err("%s: register vas misc device failed\n", __func__); + return ret; + } + + /* + * The hypervisor does not expose multiple VAS instances, but can + * see multiple VAS instances on PowerNV. So create 'vas0' directory + * on pseries. + */ + pseries_vas_kobj = kobject_create_and_add("vas0", + &vas_miscdev.this_device->kobj); + if (!pseries_vas_kobj) { + pr_err("Failed to create VAS sysfs entry\n"); + return -ENOMEM; + } + + if ((vas_caps->feat_type & VAS_GZIP_QOS_FEAT_BIT) || + (vas_caps->feat_type & VAS_GZIP_DEF_FEAT_BIT)) { + gzip_caps_kobj = kobject_create_and_add("gzip", + pseries_vas_kobj); + if (!gzip_caps_kobj) { + pr_err("Failed to create VAS GZIP capability entry\n"); + kobject_put(pseries_vas_kobj); + return -ENOMEM; + } + } + + return 0; +} + +#else +int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps) +{ + return 0; +} + +int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps) +{ + return 0; +} +#endif diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index d243ddc58827..1f59d78c77a1 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -26,9 +26,11 @@ static struct vas_all_caps caps_all; static bool copypaste_feat; +static struct hv_vas_cop_feat_caps hv_cop_caps; static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; static DEFINE_MUTEX(vas_pseries_mutex); +static bool migration_in_progress; static long hcall_return_busy_check(long rc) { @@ -107,7 +109,6 @@ static int h_deallocate_vas_window(u64 winid) static int h_modify_vas_window(struct pseries_vas_window *win) { long rc; - u32 lpid = mfspr(SPRN_PID); /* * AMR value is not supported in Linux VAS implementation. @@ -115,7 +116,7 @@ static int h_modify_vas_window(struct pseries_vas_window *win) */ do { rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, - win->vas_win.winid, lpid, 0, + win->vas_win.winid, win->pid, 0, VAS_MOD_WIN_FLAGS, 0); rc = hcall_return_busy_check(rc); @@ -124,8 +125,8 @@ static int h_modify_vas_window(struct pseries_vas_window *win) if (rc == H_SUCCESS) return 0; - pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n", - rc, win->vas_win.winid, lpid); + pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n", + rc, win->vas_win.winid, win->pid); return -EIO; } @@ -310,8 +311,8 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, cop_feat_caps = &caps->caps; - if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) > - atomic_read(&cop_feat_caps->target_lpar_creds)) { + if (atomic_inc_return(&cop_feat_caps->nr_used_credits) > + atomic_read(&cop_feat_caps->nr_total_credits)) { pr_err("Credits are not available to allocate window\n"); rc = -EINVAL; goto out; @@ -338,6 +339,8 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, } } + txwin->pid = mfspr(SPRN_PID); + /* * Allocate / Deallocate window hcalls and setup / free IRQs * have to be protected with mutex. @@ -354,7 +357,10 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, * same fault IRQ is not freed by the OS before. */ mutex_lock(&vas_pseries_mutex); - rc = allocate_setup_window(txwin, (u64 *)&domain[0], + if (migration_in_progress) + rc = -EBUSY; + else + rc = allocate_setup_window(txwin, (u64 *)&domain[0], cop_feat_caps->win_type); mutex_unlock(&vas_pseries_mutex); if (rc) @@ -369,13 +375,28 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, if (rc) goto out_free; - vas_user_win_add_mm_context(&txwin->vas_win.task_ref); txwin->win_type = cop_feat_caps->win_type; mutex_lock(&vas_pseries_mutex); - list_add(&txwin->win_list, &caps->list); + /* + * Possible to lose the acquired credit with DLPAR core + * removal after the window is opened. So if there are any + * closed windows (means with lost credits), do not give new + * window to user space. New windows will be opened only + * after the existing windows are reopened when credits are + * available. + */ + if (!caps->nr_close_wins) { + list_add(&txwin->win_list, &caps->list); + caps->nr_open_windows++; + mutex_unlock(&vas_pseries_mutex); + vas_user_win_add_mm_context(&txwin->vas_win.task_ref); + return &txwin->vas_win; + } mutex_unlock(&vas_pseries_mutex); - return &txwin->vas_win; + put_vas_user_win_ref(&txwin->vas_win.task_ref); + rc = -EBUSY; + pr_err("No credit is available to allocate window\n"); out_free: /* @@ -385,7 +406,7 @@ out_free: free_irq_setup(txwin); h_deallocate_vas_window(txwin->vas_win.winid); out: - atomic_dec(&cop_feat_caps->used_lpar_creds); + atomic_dec(&cop_feat_caps->nr_used_credits); kfree(txwin); return ERR_PTR(rc); } @@ -438,14 +459,25 @@ static int vas_deallocate_window(struct vas_window *vwin) caps = &vascaps[win->win_type].caps; mutex_lock(&vas_pseries_mutex); - rc = deallocate_free_window(win); - if (rc) { - mutex_unlock(&vas_pseries_mutex); - return rc; - } + /* + * VAS window is already closed in the hypervisor when + * lost the credit or with migration. So just remove the entry + * from the list, remove task references and free vas_window + * struct. + */ + if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && + !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { + rc = deallocate_free_window(win); + if (rc) { + mutex_unlock(&vas_pseries_mutex); + return rc; + } + } else + vascaps[win->win_type].nr_close_wins--; list_del(&win->win_list); - atomic_dec(&caps->used_lpar_creds); + atomic_dec(&caps->nr_used_credits); + vascaps[win->win_type].nr_open_windows--; mutex_unlock(&vas_pseries_mutex); put_vas_user_win_ref(&vwin->task_ref); @@ -500,6 +532,7 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, memset(vcaps, 0, sizeof(*vcaps)); INIT_LIST_HEAD(&vcaps->list); + vcaps->feat = feat; caps = &vcaps->caps; rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, @@ -521,7 +554,7 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, } caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); - atomic_set(&caps->target_lpar_creds, + atomic_set(&caps->nr_total_credits, be16_to_cpu(hv_caps->target_lpar_creds)); if (feat == VAS_GZIP_DEF_FEAT) { caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); @@ -533,16 +566,409 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, } } + rc = sysfs_add_vas_caps(caps); + if (rc) + return rc; + copypaste_feat = true; return 0; } +/* + * VAS windows can be closed due to lost credits when the core is + * removed. So reopen them if credits are available due to DLPAR + * core add and set the window active status. When NX sees the page + * fault on the unmapped paste address, the kernel handles the fault + * by setting the remapping to new paste address if the window is + * active. + */ +static int reconfig_open_windows(struct vas_caps *vcaps, int creds, + bool migrate) +{ + long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; + struct vas_cop_feat_caps *caps = &vcaps->caps; + struct pseries_vas_window *win = NULL, *tmp; + int rc, mv_ents = 0; + int flag; + + /* + * Nothing to do if there are no closed windows. + */ + if (!vcaps->nr_close_wins) + return 0; + + /* + * For the core removal, the hypervisor reduces the credits + * assigned to the LPAR and the kernel closes VAS windows + * in the hypervisor depends on reduced credits. The kernel + * uses LIFO (the last windows that are opened will be closed + * first) and expects to open in the same order when credits + * are available. + * For example, 40 windows are closed when the LPAR lost 2 cores + * (dedicated). If 1 core is added, this LPAR can have 20 more + * credits. It means the kernel can reopen 20 windows. So move + * 20 entries in the VAS windows lost and reopen next 20 windows. + * For partition migration, reopen all windows that are closed + * during resume. + */ + if ((vcaps->nr_close_wins > creds) && !migrate) + mv_ents = vcaps->nr_close_wins - creds; + + list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) { + if (!mv_ents) + break; + + mv_ents--; + } + + /* + * Open windows if they are closed only with migration or + * DLPAR (lost credit) before. + */ + if (migrate) + flag = VAS_WIN_MIGRATE_CLOSE; + else + flag = VAS_WIN_NO_CRED_CLOSE; + + list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) { + /* + * This window is closed with DLPAR and migration events. + * So reopen the window with the last event. + * The user space is not suspended with the current + * migration notifier. So the user space can issue DLPAR + * CPU hotplug while migration in progress. In this case + * this window will be opened with the last event. + */ + if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && + (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { + win->vas_win.status &= ~flag; + continue; + } + + /* + * Nothing to do on this window if it is not closed + * with this flag + */ + if (!(win->vas_win.status & flag)) + continue; + + rc = allocate_setup_window(win, (u64 *)&domain[0], + caps->win_type); + if (rc) + return rc; + + rc = h_modify_vas_window(win); + if (rc) + goto out; + + mutex_lock(&win->vas_win.task_ref.mmap_mutex); + /* + * Set window status to active + */ + win->vas_win.status &= ~flag; + mutex_unlock(&win->vas_win.task_ref.mmap_mutex); + win->win_type = caps->win_type; + if (!--vcaps->nr_close_wins) + break; + } + + return 0; +out: + /* + * Window modify HCALL failed. So close the window to the + * hypervisor and return. + */ + free_irq_setup(win); + h_deallocate_vas_window(win->vas_win.winid); + return rc; +} + +/* + * The hypervisor reduces the available credits if the LPAR lost core. It + * means the excessive windows should not be active and the user space + * should not be using these windows to send compression requests to NX. + * So the kernel closes the excessive windows and unmap the paste address + * such that the user space receives paste instruction failure. Then up to + * the user space to fall back to SW compression and manage with the + * existing windows. + */ +static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds, + bool migrate) +{ + struct pseries_vas_window *win, *tmp; + struct vas_user_win_ref *task_ref; + struct vm_area_struct *vma; + int rc = 0, flag; + + if (migrate) + flag = VAS_WIN_MIGRATE_CLOSE; + else + flag = VAS_WIN_NO_CRED_CLOSE; + + list_for_each_entry_safe(win, tmp, &vcap->list, win_list) { + /* + * This window is already closed due to lost credit + * or for migration before. Go for next window. + * For migration, nothing to do since this window + * closed for DLPAR and will be reopened even on + * the destination system with other DLPAR operation. + */ + if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) || + (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) { + win->vas_win.status |= flag; + continue; + } + + task_ref = &win->vas_win.task_ref; + mutex_lock(&task_ref->mmap_mutex); + vma = task_ref->vma; + /* + * Number of available credits are reduced, So select + * and close windows. + */ + win->vas_win.status |= flag; + + mmap_write_lock(task_ref->mm); + /* + * vma is set in the original mapping. But this mapping + * is done with mmap() after the window is opened with ioctl. + * so we may not see the original mapping if the core remove + * is done before the original mmap() and after the ioctl. + */ + if (vma) + zap_page_range(vma, vma->vm_start, + vma->vm_end - vma->vm_start); + + mmap_write_unlock(task_ref->mm); + mutex_unlock(&task_ref->mmap_mutex); + /* + * Close VAS window in the hypervisor, but do not + * free vas_window struct since it may be reused + * when the credit is available later (DLPAR with + * adding cores). This struct will be used + * later when the process issued with close(FD). + */ + rc = deallocate_free_window(win); + /* + * This failure is from the hypervisor. + * No way to stop migration for these failures. + * So ignore error and continue closing other windows. + */ + if (rc && !migrate) + return rc; + + vcap->nr_close_wins++; + + /* + * For migration, do not depend on lpar_creds in case if + * mismatch with the hypervisor value (should not happen). + * So close all active windows in the list and will be + * reopened windows based on the new lpar_creds on the + * destination system during resume. + */ + if (!migrate && !--excess_creds) + break; + } + + return 0; +} + +/* + * Get new VAS capabilities when the core add/removal configuration + * changes. Reconfig window configurations based on the credits + * availability from this new capabilities. + */ +int vas_reconfig_capabilties(u8 type) +{ + struct vas_cop_feat_caps *caps; + int old_nr_creds, new_nr_creds; + struct vas_caps *vcaps; + int rc = 0, nr_active_wins; + + if (type >= VAS_MAX_FEAT_TYPE) { + pr_err("Invalid credit type %d\n", type); + return -EINVAL; + } + + vcaps = &vascaps[type]; + caps = &vcaps->caps; + + mutex_lock(&vas_pseries_mutex); + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, vcaps->feat, + (u64)virt_to_phys(&hv_cop_caps)); + if (rc) + goto out; + + new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); + + old_nr_creds = atomic_read(&caps->nr_total_credits); + + atomic_set(&caps->nr_total_credits, new_nr_creds); + /* + * The total number of available credits may be decreased or + * inceased with DLPAR operation. Means some windows have to be + * closed / reopened. Hold the vas_pseries_mutex so that the + * the user space can not open new windows. + */ + if (old_nr_creds < new_nr_creds) { + /* + * If the existing target credits is less than the new + * target, reopen windows if they are closed due to + * the previous DLPAR (core removal). + */ + rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds, + false); + } else { + /* + * # active windows is more than new LPAR available + * credits. So close the excessive windows. + * On pseries, each window will have 1 credit. + */ + nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins; + if (nr_active_wins > new_nr_creds) + rc = reconfig_close_windows(vcaps, + nr_active_wins - new_nr_creds, + false); + } + +out: + mutex_unlock(&vas_pseries_mutex); + return rc; +} +/* + * Total number of default credits available (target_credits) + * in LPAR depends on number of cores configured. It varies based on + * whether processors are in shared mode or dedicated mode. + * Get the notifier when CPU configuration is changed with DLPAR + * operation so that get the new target_credits (vas default capabilities) + * and then update the existing windows usage if needed. + */ +static int pseries_vas_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct of_reconfig_data *rd = data; + struct device_node *dn = rd->dn; + const __be32 *intserv = NULL; + int len, rc = 0; + + if ((action == OF_RECONFIG_ATTACH_NODE) || + (action == OF_RECONFIG_DETACH_NODE)) + intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", + &len); + /* + * Processor config is not changed + */ + if (!intserv) + return NOTIFY_OK; + + rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE); + if (rc) + pr_err("Failed reconfig VAS capabilities with DLPAR\n"); + + return rc; +} + +static struct notifier_block pseries_vas_nb = { + .notifier_call = pseries_vas_notifier, +}; + +/* + * For LPM, all windows have to be closed on the source partition + * before migration and reopen them on the destination partition + * after migration. So closing windows during suspend and + * reopen them during resume. + */ +int vas_migration_handler(int action) +{ + struct vas_cop_feat_caps *caps; + int old_nr_creds, new_nr_creds = 0; + struct vas_caps *vcaps; + int i, rc = 0; + + /* + * NX-GZIP is not enabled. Nothing to do for migration. + */ + if (!copypaste_feat) + return rc; + + mutex_lock(&vas_pseries_mutex); + + if (action == VAS_SUSPEND) + migration_in_progress = true; + else + migration_in_progress = false; + + for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) { + vcaps = &vascaps[i]; + caps = &vcaps->caps; + old_nr_creds = atomic_read(&caps->nr_total_credits); + + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, + vcaps->feat, + (u64)virt_to_phys(&hv_cop_caps)); + if (!rc) { + new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); + /* + * Should not happen. But incase print messages, close + * all windows in the list during suspend and reopen + * windows based on new lpar_creds on the destination + * system. + */ + if (old_nr_creds != new_nr_creds) { + pr_err("Target credits mismatch with the hypervisor\n"); + pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n", + action, old_nr_creds, new_nr_creds); + pr_err("Used creds: %d, Active creds: %d\n", + atomic_read(&caps->nr_used_credits), + vcaps->nr_open_windows - vcaps->nr_close_wins); + } + } else { + pr_err("state(%d): Get VAS capabilities failed with %d\n", + action, rc); + /* + * We can not stop migration with the current lpm + * implementation. So continue closing all windows in + * the list (during suspend) and return without + * opening windows (during resume) if VAS capabilities + * HCALL failed. + */ + if (action == VAS_RESUME) + goto out; + } + + switch (action) { + case VAS_SUSPEND: + rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows, + true); + break; + case VAS_RESUME: + atomic_set(&caps->nr_total_credits, new_nr_creds); + rc = reconfig_open_windows(vcaps, new_nr_creds, true); + break; + default: + /* should not happen */ + pr_err("Invalid migration action %d\n", action); + rc = -EINVAL; + goto out; + } + + /* + * Ignore errors during suspend and return for resume. + */ + if (rc && (action == VAS_RESUME)) + goto out; + } + +out: + mutex_unlock(&vas_pseries_mutex); + return rc; +} + static int __init pseries_vas_init(void) { - struct hv_vas_cop_feat_caps *hv_cop_caps; struct hv_vas_all_caps *hv_caps; - int rc; + int rc = 0; /* * Linux supports user space COPY/PASTE only with Radix @@ -566,35 +992,39 @@ static int __init pseries_vas_init(void) caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); - hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL); - if (!hv_cop_caps) { - rc = -ENOMEM; - goto out; - } + sysfs_pseries_vas_init(&caps_all); + /* * QOS capabilities available */ if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, - VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps); + VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps); if (rc) - goto out_cop; + goto out; } /* * Default capabilities available */ - if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) { + if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, - VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps); - if (rc) - goto out_cop; - } + VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps); + + if (!rc && copypaste_feat) { + if (firmware_has_feature(FW_FEATURE_LPAR)) + of_reconfig_notifier_register(&pseries_vas_nb); - pr_info("GZIP feature is available\n"); + pr_info("GZIP feature is available\n"); + } else { + /* + * Should not happen, but only when get default + * capabilities HCALL failed. So disable copy paste + * feature. + */ + copypaste_feat = false; + } -out_cop: - kfree(hv_cop_caps); out: kfree(hv_caps); return rc; diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 4ecb3fcabd10..34177881e998 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -30,6 +30,14 @@ #define VAS_COPY_PASTE_USER_MODE 0x00000001 #define VAS_COP_OP_USER_MODE 0x00000010 +#define VAS_GZIP_QOS_CAPABILITIES 0x56516F73477A6970 +#define VAS_GZIP_DEFAULT_CAPABILITIES 0x56446566477A6970 + +enum vas_migrate_action { + VAS_SUSPEND, + VAS_RESUME, +}; + /* * Co-processor feature - GZIP QoS windows or GZIP default windows */ @@ -72,9 +80,8 @@ struct vas_cop_feat_caps { }; /* Total LPAR available credits. Can be different from max LPAR */ /* credits due to DLPAR operation */ - atomic_t target_lpar_creds; - atomic_t used_lpar_creds; /* Used credits so far */ - u16 avail_lpar_creds; /* Remaining available credits */ + atomic_t nr_total_credits; /* Total credits assigned to LPAR */ + atomic_t nr_used_credits; /* Used credits so far */ }; /* @@ -84,6 +91,9 @@ struct vas_cop_feat_caps { struct vas_caps { struct vas_cop_feat_caps caps; struct list_head list; /* List of open windows */ + int nr_close_wins; /* closed windows in the hypervisor for DLPAR */ + int nr_open_windows; /* Number of successful open windows */ + u8 feat; /* Feature type */ }; /* @@ -115,6 +125,7 @@ struct pseries_vas_window { u64 domain[6]; /* Associativity domain Ids */ /* this window is allocated */ u64 util; + u32 pid; /* PID associated with this window */ /* List of windows opened which is used for LPM */ struct list_head win_list; @@ -122,4 +133,17 @@ struct pseries_vas_window { char *name; int fault_virq; }; + +int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps); +int vas_reconfig_capabilties(u8 type); +int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps); + +#ifdef CONFIG_PPC_VAS +int vas_migration_handler(int action); +#else +static inline int vas_migration_handler(int action) +{ + return 0; +} +#endif #endif /* _VAS_H */ diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c index 8963eaffb1b7..39186ad6b3c3 100644 --- a/arch/powerpc/sysdev/fsl_gtm.c +++ b/arch/powerpc/sysdev/fsl_gtm.c @@ -86,7 +86,7 @@ static LIST_HEAD(gtms); */ struct gtm_timer *gtm_get_timer16(void) { - struct gtm *gtm = NULL; + struct gtm *gtm; int i; list_for_each_entry(gtm, >ms, list_node) { @@ -103,7 +103,7 @@ struct gtm_timer *gtm_get_timer16(void) spin_unlock_irq(>m->lock); } - if (gtm) + if (!list_empty(>ms)) return ERR_PTR(-EBUSY); return ERR_PTR(-ENODEV); } diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index d5cb48b61bbd..dbcbaa4c0663 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1404,10 +1404,8 @@ struct mpic * __init mpic_alloc(struct device_node *node, * with device trees generated by older versions of QEMU. * fsl_version will be zero if MPIC_FSL is not set. */ - if (fsl_version < 0x400 && (flags & MPIC_ENABLE_COREINT)) { - WARN_ON(ppc_md.get_irq != mpic_get_coreint_irq); + if (fsl_version < 0x400 && (flags & MPIC_ENABLE_COREINT)) ppc_md.get_irq = mpic_get_irq; - } /* Reset */ diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 1ca5564bda9d..bb5bda6b2357 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1708,20 +1708,20 @@ __be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift) static int __init xive_off(char *arg) { xive_cmdline_disabled = true; - return 0; + return 1; } __setup("xive=off", xive_off); static int __init xive_store_eoi_cmdline(char *arg) { if (!arg) - return -EINVAL; + return 1; if (strncmp(arg, "off", 3) == 0) { pr_info("StoreEOI disabled on kernel command line\n"); xive_store_eoi = false; } - return 0; + return 1; } __setup("xive.store-eoi=", xive_store_eoi_cmdline); @@ -1791,7 +1791,7 @@ static int xive_ipi_debug_show(struct seq_file *m, void *private) if (xive_ops->debug_show) xive_ops->debug_show(m, private); - for_each_possible_cpu(cpu) + for_each_online_cpu(cpu) xive_debug_show_ipi(m, cpu); return 0; } diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 928f95004501..29456c255f9f 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -67,6 +67,17 @@ static int __init xive_irq_bitmap_add(int base, int count) return 0; } +static void xive_irq_bitmap_remove_all(void) +{ + struct xive_irq_bitmap *xibm, *tmp; + + list_for_each_entry_safe(xibm, tmp, &xive_irq_bitmaps, list) { + list_del(&xibm->list); + kfree(xibm->bitmap); + kfree(xibm); + } +} + static int __xive_irq_bitmap_alloc(struct xive_irq_bitmap *xibm) { int irq; @@ -803,7 +814,7 @@ bool __init xive_spapr_init(void) u32 val; u32 len; const __be32 *reg; - int i; + int i, err; if (xive_spapr_disabled()) return false; @@ -828,23 +839,26 @@ bool __init xive_spapr_init(void) } if (!xive_get_max_prio(&max_prio)) - return false; + goto err_unmap; /* Feed the IRQ number allocator with the ranges given in the DT */ reg = of_get_property(np, "ibm,xive-lisn-ranges", &len); if (!reg) { pr_err("Failed to read 'ibm,xive-lisn-ranges' property\n"); - return false; + goto err_unmap; } if (len % (2 * sizeof(u32)) != 0) { pr_err("invalid 'ibm,xive-lisn-ranges' property\n"); - return false; + goto err_unmap; } - for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2) - xive_irq_bitmap_add(be32_to_cpu(reg[0]), - be32_to_cpu(reg[1])); + for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2) { + err = xive_irq_bitmap_add(be32_to_cpu(reg[0]), + be32_to_cpu(reg[1])); + if (err < 0) + goto err_mem_free; + } /* Iterate the EQ sizes and pick one */ of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, reg, val) { @@ -855,10 +869,16 @@ bool __init xive_spapr_init(void) /* Initialize XIVE core with our backend */ if (!xive_core_init(np, &xive_spapr_ops, tima, TM_QW1_OS, max_prio)) - return false; + goto err_mem_free; pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10)); return true; + +err_mem_free: + xive_irq_bitmap_remove_all(); +err_unmap: + iounmap(tima); + return false; } machine_arch_initcall(pseries, xive_core_debug_init); diff --git a/arch/powerpc/tools/relocs_check.sh b/arch/powerpc/tools/relocs_check.sh index 014e00e74d2b..63792af00417 100755 --- a/arch/powerpc/tools/relocs_check.sh +++ b/arch/powerpc/tools/relocs_check.sh @@ -39,6 +39,7 @@ $objdump -R "$vmlinux" | # R_PPC_NONE grep -F -w -v 'R_PPC64_RELATIVE R_PPC64_NONE +R_PPC64_UADDR64 R_PPC_ADDR16_LO R_PPC_ADDR16_HI R_PPC_ADDR16_HA @@ -54,9 +55,3 @@ fi num_bad=$(echo "$bad_relocs" | wc -l) echo "WARNING: $num_bad bad relocations" echo "$bad_relocs" - -# If we see this type of relocation it's an idication that -# we /may/ be using an old version of binutils. -if echo "$bad_relocs" | grep -q -F -w R_PPC64_UADDR64; then - echo "WARNING: You need at least binutils >= 2.19 to build a CONFIG_RELOCATABLE kernel" -fi diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c index c1fdf2896021..1943a007e2d5 100644 --- a/drivers/macintosh/macio_asic.c +++ b/drivers/macintosh/macio_asic.c @@ -756,7 +756,7 @@ MODULE_DEVICE_TABLE (pci, pci_ids); /* pci driver glue; this is a "new style" PCI driver module */ static struct pci_driver macio_pci_driver = { - .name = (char *) "macio", + .name = "macio", .id_table = pci_ids, .probe = macio_pci_probe, diff --git a/drivers/macintosh/via-cuda.c b/drivers/macintosh/via-cuda.c index cd267392289c..3d0d0b9d471d 100644 --- a/drivers/macintosh/via-cuda.c +++ b/drivers/macintosh/via-cuda.c @@ -21,6 +21,7 @@ #ifdef CONFIG_PPC #include <asm/prom.h> #include <asm/machdep.h> +#include <asm/pmac_feature.h> #else #include <asm/macintosh.h> #include <asm/macints.h> diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c index f69b964b9952..e2228b6fc09b 100644 --- a/drivers/misc/lkdtm/core.c +++ b/drivers/misc/lkdtm/core.c @@ -149,6 +149,7 @@ static const struct crashtype crashtypes[] = { CRASHTYPE(WRITE_RO), CRASHTYPE(WRITE_RO_AFTER_INIT), CRASHTYPE(WRITE_KERN), + CRASHTYPE(WRITE_OPD), CRASHTYPE(REFCOUNT_INC_OVERFLOW), CRASHTYPE(REFCOUNT_ADD_OVERFLOW), CRASHTYPE(REFCOUNT_INC_NOT_ZERO_OVERFLOW), diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h index d6137c70ebbe..305fc2ec3f25 100644 --- a/drivers/misc/lkdtm/lkdtm.h +++ b/drivers/misc/lkdtm/lkdtm.h @@ -106,6 +106,7 @@ void __init lkdtm_perms_init(void); void lkdtm_WRITE_RO(void); void lkdtm_WRITE_RO_AFTER_INIT(void); void lkdtm_WRITE_KERN(void); +void lkdtm_WRITE_OPD(void); void lkdtm_EXEC_DATA(void); void lkdtm_EXEC_STACK(void); void lkdtm_EXEC_KMALLOC(void); diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c index 2dede2ef658f..2c6aba3ff32b 100644 --- a/drivers/misc/lkdtm/perms.c +++ b/drivers/misc/lkdtm/perms.c @@ -10,6 +10,7 @@ #include <linux/mman.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> +#include <asm/sections.h> /* Whether or not to fill the target memory area with do_nothing(). */ #define CODE_WRITE true @@ -21,7 +22,7 @@ /* This is non-const, so it will end up in the .data section. */ static u8 data_area[EXEC_SIZE]; -/* This is cost, so it will end up in the .rodata section. */ +/* This is const, so it will end up in the .rodata section. */ static const unsigned long rodata = 0xAA55AA55; /* This is marked __ro_after_init, so it should ultimately be .rodata. */ @@ -31,31 +32,51 @@ static unsigned long ro_after_init __ro_after_init = 0x55AA5500; * This just returns to the caller. It is designed to be copied into * non-executable memory regions. */ -static void do_nothing(void) +static noinline void do_nothing(void) { return; } /* Must immediately follow do_nothing for size calculuations to work out. */ -static void do_overwritten(void) +static noinline void do_overwritten(void) { pr_info("do_overwritten wasn't overwritten!\n"); return; } +static noinline void do_almost_nothing(void) +{ + pr_info("do_nothing was hijacked!\n"); +} + +static void *setup_function_descriptor(func_desc_t *fdesc, void *dst) +{ + if (!have_function_descriptors()) + return dst; + + memcpy(fdesc, do_nothing, sizeof(*fdesc)); + fdesc->addr = (unsigned long)dst; + barrier(); + + return fdesc; +} + static noinline void execute_location(void *dst, bool write) { - void (*func)(void) = dst; + void (*func)(void); + func_desc_t fdesc; + void *do_nothing_text = dereference_function_descriptor(do_nothing); - pr_info("attempting ok execution at %px\n", do_nothing); + pr_info("attempting ok execution at %px\n", do_nothing_text); do_nothing(); if (write == CODE_WRITE) { - memcpy(dst, do_nothing, EXEC_SIZE); + memcpy(dst, do_nothing_text, EXEC_SIZE); flush_icache_range((unsigned long)dst, (unsigned long)dst + EXEC_SIZE); } - pr_info("attempting bad execution at %px\n", func); + pr_info("attempting bad execution at %px\n", dst); + func = setup_function_descriptor(&fdesc, dst); func(); pr_err("FAIL: func returned\n"); } @@ -65,16 +86,19 @@ static void execute_user_location(void *dst) int copied; /* Intentionally crossing kernel/user memory boundary. */ - void (*func)(void) = dst; + void (*func)(void); + func_desc_t fdesc; + void *do_nothing_text = dereference_function_descriptor(do_nothing); - pr_info("attempting ok execution at %px\n", do_nothing); + pr_info("attempting ok execution at %px\n", do_nothing_text); do_nothing(); - copied = access_process_vm(current, (unsigned long)dst, do_nothing, + copied = access_process_vm(current, (unsigned long)dst, do_nothing_text, EXEC_SIZE, FOLL_WRITE); if (copied < EXEC_SIZE) return; - pr_info("attempting bad execution at %px\n", func); + pr_info("attempting bad execution at %px\n", dst); + func = setup_function_descriptor(&fdesc, dst); func(); pr_err("FAIL: func returned\n"); } @@ -113,8 +137,9 @@ void lkdtm_WRITE_KERN(void) size_t size; volatile unsigned char *ptr; - size = (unsigned long)do_overwritten - (unsigned long)do_nothing; - ptr = (unsigned char *)do_overwritten; + size = (unsigned long)dereference_function_descriptor(do_overwritten) - + (unsigned long)dereference_function_descriptor(do_nothing); + ptr = dereference_function_descriptor(do_overwritten); pr_info("attempting bad %zu byte write at %px\n", size, ptr); memcpy((void *)ptr, (unsigned char *)do_nothing, size); @@ -124,6 +149,23 @@ void lkdtm_WRITE_KERN(void) do_overwritten(); } +void lkdtm_WRITE_OPD(void) +{ + size_t size = sizeof(func_desc_t); + void (*func)(void) = do_nothing; + + if (!have_function_descriptors()) { + pr_info("XFAIL: Platform doesn't use function descriptors.\n"); + return; + } + pr_info("attempting bad %zu bytes write at %px\n", size, do_nothing); + memcpy(do_nothing, do_almost_nothing, size); + pr_err("FAIL: survived bad write\n"); + + asm("" : "=m"(func)); + func(); +} + void lkdtm_EXEC_DATA(void) { execute_location(data_area, CODE_WRITE); @@ -151,7 +193,8 @@ void lkdtm_EXEC_VMALLOC(void) void lkdtm_EXEC_RODATA(void) { - execute_location(lkdtm_rodata_do_nothing, CODE_AS_IS); + execute_location(dereference_function_descriptor(lkdtm_rodata_do_nothing), + CODE_AS_IS); } void lkdtm_EXEC_USERSPACE(void) diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c index 0a3c80ba66be..e6991ff67526 100644 --- a/drivers/pci/hotplug/rpadlpar_core.c +++ b/drivers/pci/hotplug/rpadlpar_core.c @@ -478,3 +478,4 @@ static void __exit rpadlpar_io_exit(void) module_init(rpadlpar_io_init); module_exit(rpadlpar_io_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("RPA Dynamic Logical Partitioning driver for I/O slots"); diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 690f741764e1..d0f7bdd2fdf2 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -59,11 +59,24 @@ extern char __noinstr_text_start[], __noinstr_text_end[]; extern __visible const void __nosave_begin, __nosave_end; /* Function descriptor handling (if any). Override in asm/sections.h */ -#ifndef dereference_function_descriptor +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +void *dereference_function_descriptor(void *ptr); +void *dereference_kernel_function_descriptor(void *ptr); +#else #define dereference_function_descriptor(p) ((void *)(p)) #define dereference_kernel_function_descriptor(p) ((void *)(p)) + +/* An address is simply the address of the function. */ +typedef struct { + unsigned long addr; +} func_desc_t; #endif +static inline bool have_function_descriptors(void) +{ + return IS_ENABLED(CONFIG_HAVE_FUNCTION_DESCRIPTORS); +} + /** * memory_contains - checks if an object is contained within a memory region * @begin: virtual address of the beginning of the memory region diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 4176c7eca7b5..ce1bd2fbf23e 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -48,7 +48,7 @@ static inline int is_ksym_addr(unsigned long addr) static inline void *dereference_symbol_descriptor(void *ptr) { -#ifdef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS struct module *mod; ptr = dereference_kernel_function_descriptor(ptr); diff --git a/kernel/extable.c b/kernel/extable.c index b6f330f0fe74..bda5e9761541 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -3,6 +3,7 @@ Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM. */ +#include <linux/elf.h> #include <linux/ftrace.h> #include <linux/memory.h> #include <linux/extable.h> @@ -132,12 +133,33 @@ out: } /* - * On some architectures (PPC64, IA64) function pointers + * On some architectures (PPC64, IA64, PARISC) function pointers * are actually only tokens to some data that then holds the * real function address. As a result, to find if a function * pointer is part of the kernel text, we need to do some * special dereferencing first. */ +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +void *dereference_function_descriptor(void *ptr) +{ + func_desc_t *desc = ptr; + void *p; + + if (!get_kernel_nofault(p, (void *)&desc->addr)) + ptr = p; + return ptr; +} +EXPORT_SYMBOL_GPL(dereference_function_descriptor); + +void *dereference_kernel_function_descriptor(void *ptr) +{ + if (ptr < (void *)__start_opd || ptr >= (void *)__end_opd) + return ptr; + + return dereference_function_descriptor(ptr); +} +#endif + int func_ptr_is_kernel_text(void *ptr) { unsigned long addr; diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 585494ec464f..bc475e62279d 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -190,7 +190,7 @@ static int klp_find_object_symbol(const char *objname, const char *name, return -EINVAL; } -static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, +static int klp_resolve_symbols(Elf_Shdr *sechdrs, const char *strtab, unsigned int symndx, Elf_Shdr *relasec, const char *sec_objname) { @@ -218,7 +218,7 @@ static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, relas = (Elf_Rela *) relasec->sh_addr; /* For each rela in this klp relocation section */ for (i = 0; i < relasec->sh_size / sizeof(Elf_Rela); i++) { - sym = (Elf64_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); + sym = (Elf_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); if (sym->st_shndx != SHN_LIVEPATCH) { pr_err("symbol %s is not marked as a livepatch symbol\n", strtab + sym->st_name); diff --git a/sound/ppc/pmac.h b/sound/ppc/pmac.h index a758caf689d2..b6f454130463 100644 --- a/sound/ppc/pmac.h +++ b/sound/ppc/pmac.h @@ -26,6 +26,7 @@ #include <asm/dbdma.h> #include <asm/prom.h> #include <asm/machdep.h> +#include <asm/pmac_feature.h> /* maximum number of fragments */ #define PMAC_MAX_FRAGS 32 diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 6b36b7f5dcf9..243c781f0780 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -44,6 +44,7 @@ ACCESS_NULL WRITE_RO WRITE_RO_AFTER_INIT WRITE_KERN +WRITE_OPD REFCOUNT_INC_OVERFLOW REFCOUNT_ADD_OVERFLOW REFCOUNT_INC_NOT_ZERO_OVERFLOW diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 0830e63818c1..6ba95cd19e42 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -30,8 +30,10 @@ SUB_DIRS = alignment \ eeh \ vphn \ math \ + papr_attributes \ ptrace \ - security + security \ + mce endif diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore index 994b11af765c..7283e8b07b75 100644 --- a/tools/testing/selftests/powerpc/copyloops/.gitignore +++ b/tools/testing/selftests/powerpc/copyloops/.gitignore @@ -13,3 +13,4 @@ copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 copy_mc_64 +memmove_64 diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 3095b1f1c02b..77594e697f2f 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -13,7 +13,8 @@ TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \ copyuser_p7_t0 copyuser_p7_t1 \ memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \ memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \ - copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 + copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 \ + memmove_64 EXTRA_SOURCES := validate.c ../harness.c stubs.S @@ -56,3 +57,9 @@ $(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \ -D COPY_LOOP=test___copy_tofrom_user_base \ -D SELFTEST_CASE=$(subst copyuser_64_exc_t,,$(notdir $@)) \ -o $@ $^ + +$(OUTPUT)/memmove_64: mem_64.S memcpy_64.S memmove_validate.c ../harness.c \ + memcpy_stubs.S + $(CC) $(CPPFLAGS) $(CFLAGS) \ + -D TEST_MEMMOVE=test_memmove \ + -o $@ $^ diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h index 58c1cef3e399..003e1b3d9300 100644 --- a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h +++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h @@ -26,6 +26,7 @@ #define _GLOBAL(A) FUNC_START(test_ ## A) #define _GLOBAL_TOC(A) _GLOBAL(A) #define _GLOBAL_TOC_KASAN(A) _GLOBAL(A) +#define _GLOBAL_KASAN(A) _GLOBAL(A) #define PPC_MTOCRF(A, B) mtocrf A, B diff --git a/tools/testing/selftests/powerpc/copyloops/mem_64.S b/tools/testing/selftests/powerpc/copyloops/mem_64.S new file mode 120000 index 000000000000..db254c9a5f5c --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/mem_64.S @@ -0,0 +1 @@ +../../../../../arch/powerpc/lib/mem_64.S
\ No newline at end of file diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S b/tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S new file mode 100644 index 000000000000..d9baa832fa49 --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <asm/ppc_asm.h> + +FUNC_START(memcpy) + b test_memcpy + +FUNC_START(backwards_memcpy) + b test_backwards_memcpy diff --git a/tools/testing/selftests/powerpc/copyloops/memmove_validate.c b/tools/testing/selftests/powerpc/copyloops/memmove_validate.c new file mode 100644 index 000000000000..1a23218b5757 --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/memmove_validate.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <malloc.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include "utils.h" + +void *TEST_MEMMOVE(const void *s1, const void *s2, size_t n); + +#define BUF_LEN 65536 +#define MAX_OFFSET 512 + +size_t max(size_t a, size_t b) +{ + if (a >= b) + return a; + return b; +} + +static int testcase_run(void) +{ + size_t i, src_off, dst_off, len; + + char *usermap = memalign(BUF_LEN, BUF_LEN); + char *kernelmap = memalign(BUF_LEN, BUF_LEN); + + assert(usermap != NULL); + assert(kernelmap != NULL); + + memset(usermap, 0, BUF_LEN); + memset(kernelmap, 0, BUF_LEN); + + for (i = 0; i < BUF_LEN; i++) { + usermap[i] = i & 0xff; + kernelmap[i] = i & 0xff; + } + + for (src_off = 0; src_off < MAX_OFFSET; src_off++) { + for (dst_off = 0; dst_off < MAX_OFFSET; dst_off++) { + for (len = 1; len < MAX_OFFSET - max(src_off, dst_off); len++) { + + memmove(usermap + dst_off, usermap + src_off, len); + TEST_MEMMOVE(kernelmap + dst_off, kernelmap + src_off, len); + if (memcmp(usermap, kernelmap, MAX_OFFSET) != 0) { + printf("memmove failed at %ld %ld %ld\n", + src_off, dst_off, len); + abort(); + } + } + } + } + return 0; +} + +int main(void) +{ + return test_harness(testcase_run, "memmove"); +} diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h index c0f2742a3a59..c422be8a42b2 100644 --- a/tools/testing/selftests/powerpc/include/reg.h +++ b/tools/testing/selftests/powerpc/include/reg.h @@ -52,6 +52,9 @@ #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ #define SPRN_TAR 0x32f /* Target Address Register */ +#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) +#define SPRN_PVR 0x11F + #define SPRN_DSCR_PRIV 0x11 /* Privilege State DSCR */ #define SPRN_DSCR 0x03 /* Data Stream Control Register */ #define SPRN_PPR 896 /* Program Priority Register */ @@ -84,6 +87,7 @@ #define TEXASR_ROT 0x0000000002000000 /* MSR register bits */ +#define MSR_HV (1ul << 60) /* Hypervisor state */ #define MSR_TS_S_LG 33 /* Trans Mem state: Suspended */ #define MSR_TS_T_LG 34 /* Trans Mem state: Active */ diff --git a/tools/testing/selftests/powerpc/mce/Makefile b/tools/testing/selftests/powerpc/mce/Makefile new file mode 100644 index 000000000000..2424513982d9 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/Makefile @@ -0,0 +1,7 @@ +#SPDX-License-Identifier: GPL-2.0-or-later + +TEST_GEN_PROGS := inject-ra-err + +include ../../lib.mk + +$(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/powerpc/mce/inject-ra-err.c b/tools/testing/selftests/powerpc/mce/inject-ra-err.c new file mode 100644 index 000000000000..94323c34d9a6 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/inject-ra-err.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdio.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "vas-api.h" +#include "utils.h" + +static bool faulted; + +static void sigbus_handler(int n, siginfo_t *info, void *ctxt_v) +{ + ucontext_t *ctxt = (ucontext_t *)ctxt_v; + struct pt_regs *regs = ctxt->uc_mcontext.regs; + + faulted = true; + regs->nip += 4; +} + +static int test_ra_error(void) +{ + struct vas_tx_win_open_attr attr; + int fd, *paste_addr; + char *devname = "/dev/crypto/nx-gzip"; + struct sigaction act = { + .sa_sigaction = sigbus_handler, + .sa_flags = SA_SIGINFO, + }; + + memset(&attr, 0, sizeof(attr)); + attr.version = 1; + attr.vas_id = 0; + + SKIP_IF(access(devname, F_OK)); + + fd = open(devname, O_RDWR); + FAIL_IF(fd < 0); + FAIL_IF(ioctl(fd, VAS_TX_WIN_OPEN, &attr) < 0); + FAIL_IF(sigaction(SIGBUS, &act, NULL) != 0); + + paste_addr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0ULL); + + /* The following assignment triggers exception */ + mb(); + *paste_addr = 1; + mb(); + + FAIL_IF(!faulted); + + return 0; +} + +int main(void) +{ + return test_harness(test_ra_error, "inject-ra-err"); +} + diff --git a/tools/testing/selftests/powerpc/mce/vas-api.h b/tools/testing/selftests/powerpc/mce/vas-api.h new file mode 120000 index 000000000000..1455c1bcd351 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/vas-api.h @@ -0,0 +1 @@ +../../../../../arch/powerpc/include/uapi/asm/vas-api.h
\ No newline at end of file diff --git a/arch/powerpc/kernel/vdso32/.gitignore b/tools/testing/selftests/powerpc/papr_attributes/.gitignore index 824b863ec6bd..d5f42b6d9e99 100644 --- a/arch/powerpc/kernel/vdso32/.gitignore +++ b/tools/testing/selftests/powerpc/papr_attributes/.gitignore @@ -1,3 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -vdso32.lds -vdso32.so.dbg +attr_test diff --git a/tools/testing/selftests/powerpc/papr_attributes/Makefile b/tools/testing/selftests/powerpc/papr_attributes/Makefile new file mode 100644 index 000000000000..e899712d49db --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_attributes/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +TEST_GEN_PROGS := attr_test + +top_srcdir = ../../../../.. +include ../../lib.mk + +$(TEST_GEN_PROGS): ../harness.c ../utils.c
\ No newline at end of file diff --git a/tools/testing/selftests/powerpc/papr_attributes/attr_test.c b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c new file mode 100644 index 000000000000..bab0dc06e90b --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PAPR Energy attributes sniff test + * This checks if the papr folders and contents are populated relating to + * the energy and frequency attributes + * + * Copyright 2022, Pratik Rajesh Sampat, IBM Corp. + */ + +#include <stdio.h> +#include <string.h> +#include <dirent.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <stdlib.h> + +#include "utils.h" + +enum energy_freq_attrs { + POWER_PERFORMANCE_MODE = 1, + IDLE_POWER_SAVER_STATUS = 2, + MIN_FREQ = 3, + STAT_FREQ = 4, + MAX_FREQ = 6, + PROC_FOLDING_STATUS = 8 +}; + +enum type { + INVALID, + STR_VAL, + NUM_VAL +}; + +int value_type(int id) +{ + int val_type; + + switch (id) { + case POWER_PERFORMANCE_MODE: + case IDLE_POWER_SAVER_STATUS: + val_type = STR_VAL; + break; + case MIN_FREQ: + case STAT_FREQ: + case MAX_FREQ: + case PROC_FOLDING_STATUS: + val_type = NUM_VAL; + break; + default: + val_type = INVALID; + } + + return val_type; +} + +int verify_energy_info(void) +{ + const char *path = "/sys/firmware/papr/energy_scale_info"; + struct dirent *entry; + struct stat s; + DIR *dirp; + + if (stat(path, &s) || !S_ISDIR(s.st_mode)) + return -1; + dirp = opendir(path); + + while ((entry = readdir(dirp)) != NULL) { + char file_name[64]; + int id, attr_type; + FILE *f; + + if (strcmp(entry->d_name, ".") == 0 || + strcmp(entry->d_name, "..") == 0) + continue; + + id = atoi(entry->d_name); + attr_type = value_type(id); + if (attr_type == INVALID) + return -1; + + /* Check if the files exist and have data in them */ + sprintf(file_name, "%s/%d/desc", path, id); + f = fopen(file_name, "r"); + if (!f || fgetc(f) == EOF) + return -1; + + sprintf(file_name, "%s/%d/value", path, id); + f = fopen(file_name, "r"); + if (!f || fgetc(f) == EOF) + return -1; + + if (attr_type == STR_VAL) { + sprintf(file_name, "%s/%d/value_desc", path, id); + f = fopen(file_name, "r"); + if (!f || fgetc(f) == EOF) + return -1; + } + } + + return 0; +} + +int main(void) +{ + return test_harness(verify_energy_info, "papr_attributes"); +} diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index 904672fb78dd..edbd96d3b2ab 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -8,7 +8,7 @@ EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c top_srcdir = ../../../../.. include ../../lib.mk -all: $(TEST_GEN_PROGS) ebb +all: $(TEST_GEN_PROGS) ebb sampling_tests $(TEST_GEN_PROGS): $(EXTRA_SOURCES) @@ -26,25 +26,32 @@ DEFAULT_RUN_TESTS := $(RUN_TESTS) override define RUN_TESTS $(DEFAULT_RUN_TESTS) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests + TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests endef DEFAULT_EMIT_TESTS := $(EMIT_TESTS) override define EMIT_TESTS $(DEFAULT_EMIT_TESTS) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests + TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests endef DEFAULT_INSTALL_RULE := $(INSTALL_RULE) override define INSTALL_RULE $(DEFAULT_INSTALL_RULE) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install + TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install endef clean: $(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean + TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean ebb: TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all -.PHONY: all run_tests clean ebb +sampling_tests: + TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all + +.PHONY: all run_tests clean ebb sampling_tests diff --git a/tools/testing/selftests/powerpc/pmu/event.c b/tools/testing/selftests/powerpc/pmu/event.c index 48e3a413b15d..0c1c1bdba081 100644 --- a/tools/testing/selftests/powerpc/pmu/event.c +++ b/tools/testing/selftests/powerpc/pmu/event.c @@ -8,6 +8,7 @@ #include <sys/syscall.h> #include <string.h> #include <stdio.h> +#include <stdbool.h> #include <sys/ioctl.h> #include "event.h" @@ -20,7 +21,8 @@ int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, group_fd, flags); } -void event_init_opts(struct event *e, u64 config, int type, char *name) +static void __event_init_opts(struct event *e, u64 config, + int type, char *name, bool sampling) { memset(e, 0, sizeof(*e)); @@ -32,6 +34,16 @@ void event_init_opts(struct event *e, u64 config, int type, char *name) /* This has to match the structure layout in the header */ e->attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | \ PERF_FORMAT_TOTAL_TIME_RUNNING; + if (sampling) { + e->attr.sample_period = 1000; + e->attr.sample_type = PERF_SAMPLE_REGS_INTR; + e->attr.disabled = 1; + } +} + +void event_init_opts(struct event *e, u64 config, int type, char *name) +{ + __event_init_opts(e, config, type, name, false); } void event_init_named(struct event *e, u64 config, char *name) @@ -44,6 +56,11 @@ void event_init(struct event *e, u64 config) event_init_opts(e, config, PERF_TYPE_RAW, "event"); } +void event_init_sampling(struct event *e, u64 config) +{ + __event_init_opts(e, config, PERF_TYPE_RAW, "event", true); +} + #define PERF_CURRENT_PID 0 #define PERF_NO_PID -1 #define PERF_NO_CPU -1 diff --git a/tools/testing/selftests/powerpc/pmu/event.h b/tools/testing/selftests/powerpc/pmu/event.h index 302eaab51706..51aad0b6d9ad 100644 --- a/tools/testing/selftests/powerpc/pmu/event.h +++ b/tools/testing/selftests/powerpc/pmu/event.h @@ -22,11 +22,17 @@ struct event { u64 running; u64 enabled; } result; + /* + * mmap buffer used while recording sample. + * Accessed as "struct perf_event_mmap_page" + */ + void *mmap_buffer; }; void event_init(struct event *e, u64 config); void event_init_named(struct event *e, u64 config, char *name); void event_init_opts(struct event *e, u64 config, int type, char *name); +void event_init_sampling(struct event *e, u64 config); int event_open_with_options(struct event *e, pid_t pid, int cpu, int group_fd); int event_open_with_group(struct event *e, int group_fd); int event_open_with_pid(struct event *e, pid_t pid); diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore new file mode 100644 index 000000000000..0fce5a694684 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -0,0 +1,11 @@ +mmcr0_exceptionbits_test +mmcr0_cc56run_test +mmcr0_pmccext_test +mmcr0_pmcjce_test +mmcr0_fc56_pmc1ce_test +mmcr0_fc56_pmc56_test +mmcr1_comb_test +mmcr2_l2l3_test +mmcr2_fcs_fch_test +mmcr3_src_test +mmcra_thresh_marked_sample_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile new file mode 100644 index 000000000000..a785c6a173b9 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -m64 + +TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ + mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ + mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \ + mmcr3_src_test mmcra_thresh_marked_sample_test + +top_srcdir = ../../../../../.. +include ../../../lib.mk + +$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h ../loop.S diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c new file mode 100644 index 000000000000..fca054bbc094 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c @@ -0,0 +1,412 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + * Copyright 2022, Madhavan Srinivasan, IBM Corp. + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include <unistd.h> +#include <sys/syscall.h> +#include <string.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <stdlib.h> +#include <ctype.h> + +#include "misc.h" + +#define PAGE_SIZE sysconf(_SC_PAGESIZE) + +/* Storage for platform version */ +int pvr; +u64 platform_extended_mask; + +/* Mask and Shift for Event code fields */ +int ev_mask_pmcxsel, ev_shift_pmcxsel; //pmcxsel field +int ev_mask_marked, ev_shift_marked; //marked filed +int ev_mask_comb, ev_shift_comb; //combine field +int ev_mask_unit, ev_shift_unit; //unit field +int ev_mask_pmc, ev_shift_pmc; //pmc field +int ev_mask_cache, ev_shift_cache; //Cache sel field +int ev_mask_sample, ev_shift_sample; //Random sampling field +int ev_mask_thd_sel, ev_shift_thd_sel; //thresh_sel field +int ev_mask_thd_start, ev_shift_thd_start; //thresh_start field +int ev_mask_thd_stop, ev_shift_thd_stop; //thresh_stop field +int ev_mask_thd_cmp, ev_shift_thd_cmp; //thresh cmp field +int ev_mask_sm, ev_shift_sm; //SDAR mode field +int ev_mask_rsq, ev_shift_rsq; //radix scope qual field +int ev_mask_l2l3, ev_shift_l2l3; //l2l3 sel field +int ev_mask_mmcr3_src, ev_shift_mmcr3_src; //mmcr3 field + +static void init_ev_encodes(void) +{ + ev_mask_pmcxsel = 0xff; + ev_shift_pmcxsel = 0; + ev_mask_marked = 1; + ev_shift_marked = 8; + ev_mask_unit = 0xf; + ev_shift_unit = 12; + ev_mask_pmc = 0xf; + ev_shift_pmc = 16; + ev_mask_sample = 0x1f; + ev_shift_sample = 24; + ev_mask_thd_sel = 0x7; + ev_shift_thd_sel = 29; + ev_mask_thd_start = 0xf; + ev_shift_thd_start = 36; + ev_mask_thd_stop = 0xf; + ev_shift_thd_stop = 32; + + switch (pvr) { + case POWER10: + ev_mask_rsq = 1; + ev_shift_rsq = 9; + ev_mask_comb = 3; + ev_shift_comb = 10; + ev_mask_cache = 3; + ev_shift_cache = 20; + ev_mask_sm = 0x3; + ev_shift_sm = 22; + ev_mask_l2l3 = 0x1f; + ev_shift_l2l3 = 40; + ev_mask_mmcr3_src = 0x7fff; + ev_shift_mmcr3_src = 45; + break; + case POWER9: + ev_mask_comb = 3; + ev_shift_comb = 10; + ev_mask_cache = 0xf; + ev_shift_cache = 20; + ev_mask_thd_cmp = 0x3ff; + ev_shift_thd_cmp = 40; + ev_mask_sm = 0x3; + ev_shift_sm = 50; + break; + default: + FAIL_IF_EXIT(1); + } +} + +/* Return the extended regs mask value */ +static u64 perf_get_platform_reg_mask(void) +{ + if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) + return PERF_POWER10_MASK; + if (have_hwcap2(PPC_FEATURE2_ARCH_3_00)) + return PERF_POWER9_MASK; + + return -1; +} + +int check_extended_regs_support(void) +{ + int fd; + struct event event; + + event_init(&event, 0x1001e); + + event.attr.type = 4; + event.attr.sample_period = 1; + event.attr.disabled = 1; + event.attr.sample_type = PERF_SAMPLE_REGS_INTR; + event.attr.sample_regs_intr = platform_extended_mask; + + fd = event_open(&event); + if (fd != -1) + return 0; + + return -1; +} + +int check_pvr_for_sampling_tests(void) +{ + pvr = PVR_VER(mfspr(SPRN_PVR)); + + platform_extended_mask = perf_get_platform_reg_mask(); + + /* + * Check for supported platforms + * for sampling test + */ + if ((pvr != POWER10) && (pvr != POWER9)) + goto out; + + /* + * Check PMU driver registered by looking for + * PPC_FEATURE2_EBB bit in AT_HWCAP2 + */ + if (!have_hwcap2(PPC_FEATURE2_EBB)) + goto out; + + /* check if platform supports extended regs */ + if (check_extended_regs_support()) + goto out; + + init_ev_encodes(); + return 0; +out: + printf("%s: Sampling tests un-supported\n", __func__); + return -1; +} +/* + * Allocate mmap buffer of "mmap_pages" number of + * pages. + */ +void *event_sample_buf_mmap(int fd, int mmap_pages) +{ + size_t page_size = sysconf(_SC_PAGESIZE); + size_t mmap_size; + void *buff; + + if (mmap_pages <= 0) + return NULL; + + if (fd <= 0) + return NULL; + + mmap_size = page_size * (1 + mmap_pages); + buff = mmap(NULL, mmap_size, + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if (buff == MAP_FAILED) { + perror("mmap() failed."); + return NULL; + } + return buff; +} + +/* + * Post process the mmap buffer. + * - If sample_count != NULL then return count of total + * number of samples present in the mmap buffer. + * - If sample_count == NULL then return the address + * of first sample from the mmap buffer + */ +void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count) +{ + size_t page_size = sysconf(_SC_PAGESIZE); + struct perf_event_header *header = sample_buff + page_size; + struct perf_event_mmap_page *metadata_page = sample_buff; + unsigned long data_head, data_tail; + + /* + * PERF_RECORD_SAMPLE: + * struct { + * struct perf_event_header hdr; + * u64 data[]; + * }; + */ + + data_head = metadata_page->data_head; + /* sync memory before reading sample */ + mb(); + data_tail = metadata_page->data_tail; + + /* Check for sample_count */ + if (sample_count) + *sample_count = 0; + + while (1) { + /* + * Reads the mmap data buffer by moving + * the data_tail to know the last read data. + * data_head points to head in data buffer. + * refer "struct perf_event_mmap_page" in + * "include/uapi/linux/perf_event.h". + */ + if (data_head - data_tail < sizeof(header)) + return NULL; + + data_tail += sizeof(header); + if (header->type == PERF_RECORD_SAMPLE) { + *size = (header->size - sizeof(header)); + if (!sample_count) + return sample_buff + page_size + data_tail; + data_tail += *size; + *sample_count += 1; + } else { + *size = (header->size - sizeof(header)); + if ((metadata_page->data_tail + *size) > metadata_page->data_head) + data_tail = metadata_page->data_head; + else + data_tail += *size; + } + header = (struct perf_event_header *)((void *)header + header->size); + } + return NULL; +} + +int collect_samples(void *sample_buff) +{ + u64 sample_count; + size_t size = 0; + + __event_read_samples(sample_buff, &size, &sample_count); + return sample_count; +} + +static void *perf_read_first_sample(void *sample_buff, size_t *size) +{ + return __event_read_samples(sample_buff, size, NULL); +} + +u64 *get_intr_regs(struct event *event, void *sample_buff) +{ + u64 type = event->attr.sample_type; + u64 *intr_regs; + size_t size = 0; + + if ((type ^ PERF_SAMPLE_REGS_INTR)) + return NULL; + + intr_regs = (u64 *)perf_read_first_sample(sample_buff, &size); + if (!intr_regs) + return NULL; + + /* + * First entry in the sample buffer used to specify + * PERF_SAMPLE_REGS_ABI_64, skip perf regs abi to access + * interrupt registers. + */ + ++intr_regs; + + return intr_regs; +} + +static const unsigned int __perf_reg_mask(const char *register_name) +{ + if (!strcmp(register_name, "R0")) + return 0; + else if (!strcmp(register_name, "R1")) + return 1; + else if (!strcmp(register_name, "R2")) + return 2; + else if (!strcmp(register_name, "R3")) + return 3; + else if (!strcmp(register_name, "R4")) + return 4; + else if (!strcmp(register_name, "R5")) + return 5; + else if (!strcmp(register_name, "R6")) + return 6; + else if (!strcmp(register_name, "R7")) + return 7; + else if (!strcmp(register_name, "R8")) + return 8; + else if (!strcmp(register_name, "R9")) + return 9; + else if (!strcmp(register_name, "R10")) + return 10; + else if (!strcmp(register_name, "R11")) + return 11; + else if (!strcmp(register_name, "R12")) + return 12; + else if (!strcmp(register_name, "R13")) + return 13; + else if (!strcmp(register_name, "R14")) + return 14; + else if (!strcmp(register_name, "R15")) + return 15; + else if (!strcmp(register_name, "R16")) + return 16; + else if (!strcmp(register_name, "R17")) + return 17; + else if (!strcmp(register_name, "R18")) + return 18; + else if (!strcmp(register_name, "R19")) + return 19; + else if (!strcmp(register_name, "R20")) + return 20; + else if (!strcmp(register_name, "R21")) + return 21; + else if (!strcmp(register_name, "R22")) + return 22; + else if (!strcmp(register_name, "R23")) + return 23; + else if (!strcmp(register_name, "R24")) + return 24; + else if (!strcmp(register_name, "R25")) + return 25; + else if (!strcmp(register_name, "R26")) + return 26; + else if (!strcmp(register_name, "R27")) + return 27; + else if (!strcmp(register_name, "R28")) + return 28; + else if (!strcmp(register_name, "R29")) + return 29; + else if (!strcmp(register_name, "R30")) + return 30; + else if (!strcmp(register_name, "R31")) + return 31; + else if (!strcmp(register_name, "NIP")) + return 32; + else if (!strcmp(register_name, "MSR")) + return 33; + else if (!strcmp(register_name, "ORIG_R3")) + return 34; + else if (!strcmp(register_name, "CTR")) + return 35; + else if (!strcmp(register_name, "LINK")) + return 36; + else if (!strcmp(register_name, "XER")) + return 37; + else if (!strcmp(register_name, "CCR")) + return 38; + else if (!strcmp(register_name, "SOFTE")) + return 39; + else if (!strcmp(register_name, "TRAP")) + return 40; + else if (!strcmp(register_name, "DAR")) + return 41; + else if (!strcmp(register_name, "DSISR")) + return 42; + else if (!strcmp(register_name, "SIER")) + return 43; + else if (!strcmp(register_name, "MMCRA")) + return 44; + else if (!strcmp(register_name, "MMCR0")) + return 45; + else if (!strcmp(register_name, "MMCR1")) + return 46; + else if (!strcmp(register_name, "MMCR2")) + return 47; + else if (!strcmp(register_name, "MMCR3")) + return 48; + else if (!strcmp(register_name, "SIER2")) + return 49; + else if (!strcmp(register_name, "SIER3")) + return 50; + else if (!strcmp(register_name, "PMC1")) + return 51; + else if (!strcmp(register_name, "PMC2")) + return 52; + else if (!strcmp(register_name, "PMC3")) + return 53; + else if (!strcmp(register_name, "PMC4")) + return 54; + else if (!strcmp(register_name, "PMC5")) + return 55; + else if (!strcmp(register_name, "PMC6")) + return 56; + else if (!strcmp(register_name, "SDAR")) + return 57; + else if (!strcmp(register_name, "SIAR")) + return 58; + else + return -1; +} + +u64 get_reg_value(u64 *intr_regs, char *register_name) +{ + int register_bit_position; + + register_bit_position = __perf_reg_mask(register_name); + + if (register_bit_position < 0 || (!((platform_extended_mask >> + (register_bit_position - 1)) & 1))) + return -1; + + return *(intr_regs + register_bit_position); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h new file mode 100644 index 000000000000..7675f3177725 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -0,0 +1,227 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + * Copyright 2022, Madhavan Srinivasan, IBM Corp. + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include "../event.h" + +#define POWER10 0x80 +#define POWER9 0x4e +#define PERF_POWER9_MASK 0x7f8ffffffffffff +#define PERF_POWER10_MASK 0x7ffffffffffffff + +#define MMCR0_FC56 0x00000010UL /* freeze counters 5 and 6 */ +#define MMCR0_PMCCEXT 0x00000200UL /* PMCCEXT control */ +#define MMCR1_RSQ 0x200000000000ULL /* radix scope qual field */ +#define BHRB_DISABLE 0x2000000000ULL /* MMCRA BHRB DISABLE bit */ + +extern int ev_mask_pmcxsel, ev_shift_pmcxsel; +extern int ev_mask_marked, ev_shift_marked; +extern int ev_mask_comb, ev_shift_comb; +extern int ev_mask_unit, ev_shift_unit; +extern int ev_mask_pmc, ev_shift_pmc; +extern int ev_mask_cache, ev_shift_cache; +extern int ev_mask_sample, ev_shift_sample; +extern int ev_mask_thd_sel, ev_shift_thd_sel; +extern int ev_mask_thd_start, ev_shift_thd_start; +extern int ev_mask_thd_stop, ev_shift_thd_stop; +extern int ev_mask_thd_cmp, ev_shift_thd_cmp; +extern int ev_mask_sm, ev_shift_sm; +extern int ev_mask_rsq, ev_shift_rsq; +extern int ev_mask_l2l3, ev_shift_l2l3; +extern int ev_mask_mmcr3_src, ev_shift_mmcr3_src; +extern int pvr; +extern u64 platform_extended_mask; +extern int check_pvr_for_sampling_tests(void); + +/* + * Event code field extraction macro. + * Raw event code is combination of multiple + * fields. Macro to extract individual fields + * + * x - Raw event code value + * y - Field to extract + */ +#define EV_CODE_EXTRACT(x, y) \ + ((x >> ev_shift_##y) & ev_mask_##y) + +void *event_sample_buf_mmap(int fd, int mmap_pages); +void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count); +int collect_samples(void *sample_buff); +u64 *get_intr_regs(struct event *event, void *sample_buff); +u64 get_reg_value(u64 *intr_regs, char *register_name); + +static inline int get_mmcr0_fc56(u64 mmcr0, int pmc) +{ + return (mmcr0 & MMCR0_FC56); +} + +static inline int get_mmcr0_pmccext(u64 mmcr0, int pmc) +{ + return (mmcr0 & MMCR0_PMCCEXT); +} + +static inline int get_mmcr0_pmao(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 7) & 0x1); +} + +static inline int get_mmcr0_cc56run(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 8) & 0x1); +} + +static inline int get_mmcr0_pmcjce(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 14) & 0x1); +} + +static inline int get_mmcr0_pmc1ce(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 15) & 0x1); +} + +static inline int get_mmcr0_pmae(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 27) & 0x1); +} + +static inline int get_mmcr1_pmcxsel(u64 mmcr1, int pmc) +{ + return ((mmcr1 >> ((24 - (((pmc) - 1) * 8))) & 0xff)); +} + +static inline int get_mmcr1_unit(u64 mmcr1, int pmc) +{ + return ((mmcr1 >> ((60 - (4 * ((pmc) - 1))))) & 0xf); +} + +static inline int get_mmcr1_comb(u64 mmcr1, int pmc) +{ + return ((mmcr1 >> (38 - ((pmc - 1) * 2))) & 0x3); +} + +static inline int get_mmcr1_cache(u64 mmcr1, int pmc) +{ + return ((mmcr1 >> 46) & 0x3); +} + +static inline int get_mmcr1_rsq(u64 mmcr1, int pmc) +{ + return mmcr1 & MMCR1_RSQ; +} + +static inline int get_mmcr2_fcs(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (63 - (((pmc) - 1) * 9)))) >> (63 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcp(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (62 - (((pmc) - 1) * 9)))) >> (62 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcpc(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (61 - (((pmc) - 1) * 9)))) >> (61 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcm1(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (60 - (((pmc) - 1) * 9)))) >> (60 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcm0(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (59 - (((pmc) - 1) * 9)))) >> (59 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcwait(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (58 - (((pmc) - 1) * 9)))) >> (58 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fch(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (57 - (((pmc) - 1) * 9)))) >> (57 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcti(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (56 - (((pmc) - 1) * 9)))) >> (56 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcta(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (55 - (((pmc) - 1) * 9)))) >> (55 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_l2l3(u64 mmcr2, int pmc) +{ + if (pvr == POWER10) + return ((mmcr2 & 0xf8) >> 3); + return 0; +} + +static inline int get_mmcr3_src(u64 mmcr3, int pmc) +{ + if (pvr != POWER10) + return 0; + return ((mmcr3 >> ((49 - (15 * ((pmc) - 1))))) & 0x7fff); +} + +static inline int get_mmcra_thd_cmp(u64 mmcra, int pmc) +{ + if (pvr == POWER10) + return ((mmcra >> 45) & 0x7ff); + return ((mmcra >> 45) & 0x3ff); +} + +static inline int get_mmcra_sm(u64 mmcra, int pmc) +{ + return ((mmcra >> 42) & 0x3); +} + +static inline int get_mmcra_bhrb_disable(u64 mmcra, int pmc) +{ + if (pvr == POWER10) + return mmcra & BHRB_DISABLE; + return 0; +} + +static inline int get_mmcra_ifm(u64 mmcra, int pmc) +{ + return ((mmcra >> 30) & 0x3); +} + +static inline int get_mmcra_thd_sel(u64 mmcra, int pmc) +{ + return ((mmcra >> 16) & 0x7); +} + +static inline int get_mmcra_thd_start(u64 mmcra, int pmc) +{ + return ((mmcra >> 12) & 0xf); +} + +static inline int get_mmcra_thd_stop(u64 mmcra, int pmc) +{ + return ((mmcra >> 8) & 0xf); +} + +static inline int get_mmcra_rand_samp_elig(u64 mmcra, int pmc) +{ + return ((mmcra >> 4) & 0x7); +} + +static inline int get_mmcra_sample_mode(u64 mmcra, int pmc) +{ + return ((mmcra >> 1) & 0x3); +} + +static inline int get_mmcra_marked(u64 mmcra, int pmc) +{ + return mmcra & 0x1; +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c new file mode 100644 index 000000000000..ae4172f83817 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * field: cc56run. + */ +static int mmcr0_cc56run(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x500fa); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that cc56run bit is set in MMCR0 */ + FAIL_IF(!get_mmcr0_cc56run(get_reg_value(intr_regs, "MMCR0"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_cc56run, "mmcr0_cc56run"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c new file mode 100644 index 000000000000..982aa56d2171 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * fields : pmae, pmao. + */ +static int mmcr0_exceptionbits(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x500fa); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that pmae is cleared and pmao is set in MMCR0 */ + FAIL_IF(get_mmcr0_pmae(get_reg_value(intr_regs, "MMCR0"), 5)); + FAIL_IF(!get_mmcr0_pmao(get_reg_value(intr_regs, "MMCR0"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_exceptionbits, "mmcr0_exceptionbits"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c new file mode 100644 index 000000000000..1c1813c182c0 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * fields: fc56, pmc1ce. + */ +static int mmcr0_fc56_pmc1ce(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x1001e); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that fc56, pmc1ce fields are set in MMCR0 */ + FAIL_IF(!get_mmcr0_fc56(get_reg_value(intr_regs, "MMCR0"), 1)); + FAIL_IF(!get_mmcr0_pmc1ce(get_reg_value(intr_regs, "MMCR0"), 1)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_fc56_pmc1ce, "mmcr0_fc56_pmc1ce"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c new file mode 100644 index 000000000000..332d24b5ab9c --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * fields: fc56_pmc56 + */ +static int mmcr0_fc56_pmc56(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x500fa); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that fc56 is not set in MMCR0 when using PMC5 */ + FAIL_IF(get_mmcr0_fc56(get_reg_value(intr_regs, "MMCR0"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_fc56_pmc56, "mmcr0_fc56_pmc56"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c new file mode 100644 index 000000000000..dfd186cd8eec --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * field: pmccext + */ +static int mmcr0_pmccext(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x4001e); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that pmccext field is set in MMCR0 */ + FAIL_IF(!get_mmcr0_pmccext(get_reg_value(intr_regs, "MMCR0"), 4)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_pmccext, "mmcr0_pmccext"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c new file mode 100644 index 000000000000..fdd8ed9bf725 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * field: pmcjce + */ +static int mmcr0_pmcjce(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x500fa); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that pmcjce field is set in MMCR0 */ + FAIL_IF(!get_mmcr0_pmcjce(get_reg_value(intr_regs, "MMCR0"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_pmcjce, "mmcr0_pmcjce"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c new file mode 100644 index 000000000000..5aea6499ee9a --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* All successful D-side store dispatches for this thread that were L2 Miss */ +#define EventCode 0x46880 + +extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target); + +/* + * A perf sampling test for mmcr1 + * fields : comb. + */ +static int mmcr1_comb(void) +{ + struct event event; + u64 *intr_regs; + u64 dummy; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop_with_ll_sc(10000000, &dummy); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that comb field match with + * corresponding event code fields + */ + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, comb) != + get_mmcr1_comb(get_reg_value(intr_regs, "MMCR1"), 4)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr1_comb, "mmcr1_comb"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c new file mode 100644 index 000000000000..4e242fd61b25 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Madhavan Srinivasan, IBM Corp. + */ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +static bool is_hv; + +static void sig_usr2_handler(int signum, siginfo_t *info, void *data) +{ + ucontext_t *uctx = data; + + is_hv = !!(uctx->uc_mcontext.gp_regs[PT_MSR] & MSR_HV); +} + +/* + * A perf sampling test for mmcr2 + * fields : fcs, fch. + */ +static int mmcr2_fcs_fch(void) +{ + struct sigaction sigact = { + .sa_sigaction = sig_usr2_handler, + .sa_flags = SA_SIGINFO + }; + struct event event; + u64 *intr_regs; + + FAIL_IF(sigaction(SIGUSR2, &sigact, NULL)); + FAIL_IF(kill(getpid(), SIGUSR2)); + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x1001e); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.exclude_kernel = 1; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that fcs and fch field of MMCR2 match + * with corresponding modifier fields. + */ + if (is_hv) + FAIL_IF(event.attr.exclude_kernel != + get_mmcr2_fch(get_reg_value(intr_regs, "MMCR2"), 1)); + else + FAIL_IF(event.attr.exclude_kernel != + get_mmcr2_fcs(get_reg_value(intr_regs, "MMCR2"), 1)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr2_fcs_fch, "mmcr2_fcs_fch"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c new file mode 100644 index 000000000000..ceca597016b2 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Madhavan Srinivasan, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* All successful D-side store dispatches for this thread */ +#define EventCode 0x010000046080 + +#define MALLOC_SIZE (0x10000 * 10) /* Ought to be enough .. */ + +/* + * A perf sampling test for mmcr2 + * fields : l2l3 + */ +static int mmcr2_l2l3(void) +{ + struct event event; + u64 *intr_regs; + char *p; + int i; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + p = malloc(MALLOC_SIZE); + FAIL_IF(!p); + + for (i = 0; i < MALLOC_SIZE; i += 0x10000) + p[i] = i; + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that l2l3 field of MMCR2 match with + * corresponding event code field + */ + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, l2l3) != + get_mmcr2_l2l3(get_reg_value(intr_regs, "MMCR2"), 4)); + + event_close(&event); + free(p); + + return 0; +} + +int main(void) +{ + return test_harness(mmcr2_l2l3, "mmcr2_l2l3"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c new file mode 100644 index 000000000000..e154e2a4cc3a --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target); + +/* The data cache was reloaded from local core's L3 due to a demand load */ +#define EventCode 0x1340000001c040 + +/* + * A perf sampling test for mmcr3 + * fields. + */ +static int mmcr3_src(void) +{ + struct event event; + u64 *intr_regs; + u64 dummy; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make event overflow */ + thirty_two_instruction_loop_with_ll_sc(1000000, &dummy); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that src field of MMCR3 match with + * corresponding event code field + */ + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, mmcr3_src) != + get_mmcr3_src(get_reg_value(intr_regs, "MMCR3"), 1)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr3_src, "mmcr3_src"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c new file mode 100644 index 000000000000..022cc1655eb5 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* + * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0) + * Threshold event selection used is issue to complete for cycles + * Sampling criteria is Load only sampling + */ +#define EventCode 0x35340401e0 + +extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target); + +/* A perf sampling test to test mmcra fields */ +static int mmcra_thresh_marked_sample(void) +{ + struct event event; + u64 *intr_regs; + u64 dummy; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop_with_ll_sc(1000000, &dummy); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that thresh sel/start/stop, marked, random sample + * eligibility, sdar mode and sample mode fields match with + * the corresponding event code fields + */ + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, thd_sel) != + get_mmcra_thd_sel(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, thd_start) != + get_mmcra_thd_start(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, thd_stop) != + get_mmcra_thd_stop(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, marked) != + get_mmcra_marked(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, sample >> 2) != + get_mmcra_rand_samp_elig(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, sample & 0x3) != + get_mmcra_sample_mode(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, sm) != + get_mmcra_sm(get_reg_value(intr_regs, "MMCRA"), 4)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcra_thresh_marked_sample, "mmcra_thresh_marked_sample"); +} diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c index 83647b8277e7..d42ca8c676c3 100644 --- a/tools/testing/selftests/powerpc/security/spectre_v2.c +++ b/tools/testing/selftests/powerpc/security/spectre_v2.c @@ -125,8 +125,6 @@ static enum spectre_v2_state get_sysfs_state(void) #define PM_BR_PRED_PCACHE 0x048a0 // P9 only #define PM_BR_MPRED_PCACHE 0x048b0 // P9 only -#define SPRN_PVR 287 - int spectre_v2_test(void) { enum spectre_v2_state state; |