diff options
Diffstat (limited to 'arch')
1031 files changed, 17739 insertions, 13407 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 48b5e103bdb0..98de654b79b3 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -393,18 +393,23 @@ config HAVE_ARCH_JUMP_LABEL config HAVE_ARCH_JUMP_LABEL_RELATIVE bool -config HAVE_RCU_TABLE_FREE +config MMU_GATHER_TABLE_FREE bool -config HAVE_RCU_TABLE_NO_INVALIDATE +config MMU_GATHER_RCU_TABLE_FREE bool + select MMU_GATHER_TABLE_FREE -config HAVE_MMU_GATHER_PAGE_SIZE +config MMU_GATHER_PAGE_SIZE bool -config HAVE_MMU_GATHER_NO_GATHER +config MMU_GATHER_NO_RANGE bool +config MMU_GATHER_NO_GATHER + bool + depends on MMU_GATHER_TABLE_FREE + config ARCH_HAVE_NMI_SAFE_CMPXCHG bool diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index 1989b946a28d..d1ed5a8133c5 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -283,14 +283,8 @@ static inline void __iomem *ioremap(unsigned long port, unsigned long size) return IO_CONCAT(__IO_PREFIX,ioremap) (port, size); } -static inline void __iomem * ioremap_nocache(unsigned long offset, - unsigned long size) -{ - return ioremap(offset, size); -} - -#define ioremap_wc ioremap_nocache -#define ioremap_uc ioremap_nocache +#define ioremap_wc ioremap +#define ioremap_uc ioremap static inline void iounmap(volatile void __iomem *addr) { diff --git a/arch/alpha/include/asm/vmalloc.h b/arch/alpha/include/asm/vmalloc.h new file mode 100644 index 000000000000..0a9a366a4d34 --- /dev/null +++ b/arch/alpha/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_ALPHA_VMALLOC_H +#define _ASM_ALPHA_VMALLOC_H + +#endif /* _ASM_ALPHA_VMALLOC_H */ diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 5d4c76a77a9f..f19aa577354b 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -655,8 +655,6 @@ setup_arch(char **cmdline_p) #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) conswitchp = &vga_con; -#elif defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; #endif #endif diff --git a/arch/alpha/kernel/srm_env.c b/arch/alpha/kernel/srm_env.c index 7268222cf4e1..528d2be58182 100644 --- a/arch/alpha/kernel/srm_env.c +++ b/arch/alpha/kernel/srm_env.c @@ -119,13 +119,12 @@ static ssize_t srm_env_proc_write(struct file *file, const char __user *buffer, return res; } -static const struct file_operations srm_env_proc_fops = { - .owner = THIS_MODULE, - .open = srm_env_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = srm_env_proc_write, +static const struct proc_ops srm_env_proc_ops = { + .proc_open = srm_env_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = srm_env_proc_write, }; static int __init @@ -182,7 +181,7 @@ srm_env_init(void) entry = srm_named_entries; while (entry->name && entry->id) { if (!proc_create_data(entry->name, 0644, named_dir, - &srm_env_proc_fops, (void *)entry->id)) + &srm_env_proc_ops, (void *)entry->id)) goto cleanup; entry++; } @@ -194,7 +193,7 @@ srm_env_init(void) char name[4]; sprintf(name, "%ld", var_num); if (!proc_create_data(name, 0644, numbered_dir, - &srm_env_proc_fops, (void *)var_num)) + &srm_env_proc_ops, (void *)var_num)) goto cleanup; } diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 8e13b0b2928d..36d42da7466a 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -475,3 +475,5 @@ 543 common fspick sys_fspick 544 common pidfd_open sys_pidfd_open # 545 reserved for clone3 +547 common openat2 sys_openat2 +548 common pidfd_getfd sys_pidfd_getfd diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 26108ea785c2..ff2a393b635c 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -13,7 +13,7 @@ config ARC select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC select ARCH_32BIT_OFF_T - select BUILDTIME_EXTABLE_SORT + select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS select COMMON_CLK select DMA_DIRECT_REMAP @@ -28,6 +28,7 @@ config ARC select GENERIC_SMP_IDLE_THREAD select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK + select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_STACKOVERFLOW select HAVE_DEBUG_KMEMLEAK select HAVE_FUTEX_CMPXCHG if FUTEX @@ -350,9 +351,8 @@ config NODES_SHIFT Accessing memory beyond 1GB (with or w/o PAE) requires 2 memory zones. -if ISA_ARCOMPACT - config ARC_COMPACT_IRQ_LEVELS + depends on ISA_ARCOMPACT bool "Setup Timer IRQ as high Priority" # if SMP, LV2 enabled ONLY if ARC implementation has LV2 re-entrancy depends on !SMP @@ -360,14 +360,10 @@ config ARC_COMPACT_IRQ_LEVELS config ARC_FPU_SAVE_RESTORE bool "Enable FPU state persistence across context switch" help - Double Precision Floating Point unit had dedicated regs which - need to be saved/restored across context-switch. - Note that ARC FPU is overly simplistic, unlike say x86, which has - hardware pieces to allow software to conditionally save/restore, - based on actual usage of FPU by a task. Thus our implemn does - this for all tasks in system. - -endif #ISA_ARCOMPACT + ARCompact FPU has internal registers to assist with Double precision + Floating Point operations. There are control and stauts registers + for floating point exceptions and rounding modes. These are + preserved across task context switch when enabled. config ARC_CANT_LLSC def_bool n diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index f9a5c9ddcae7..1d109b06e7d8 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -78,6 +78,7 @@ interrupt-names = "macirq"; phy-mode = "rgmii"; snps,pbl = < 32 >; + snps,multicast-filter-bins = <256>; clocks = <&apbclk>; clock-names = "stmmaceth"; max-speed = <100>; diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 5134f0baf33c..f7e432448e4b 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -39,6 +39,8 @@ #define ARC_REG_CLUSTER_BCR 0xcf #define ARC_REG_AUX_ICCM 0x208 /* ICCM Base Addr (ARCv2) */ #define ARC_REG_LPB_CTRL 0x488 /* ARCv2 Loop Buffer control */ +#define ARC_REG_FPU_CTRL 0x300 +#define ARC_REG_FPU_STATUS 0x301 /* Common for ARCompact and ARCv2 status register */ #define ARC_REG_STATUS32 0x0A diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h index 41b16f21beec..0b8b63d0bec1 100644 --- a/arch/arc/include/asm/entry-arcv2.h +++ b/arch/arc/include/asm/entry-arcv2.h @@ -162,7 +162,7 @@ #endif #ifdef CONFIG_ARC_HAS_ACCL_REGS - ST2 r58, r59, PT_sp + 12 + ST2 r58, r59, PT_r58 #endif .endm @@ -172,8 +172,8 @@ LD2 gp, fp, PT_r26 ; gp (r26), fp (r27) - ld r12, [sp, PT_sp + 4] - ld r30, [sp, PT_sp + 8] + ld r12, [sp, PT_r12] + ld r30, [sp, PT_r30] ; Restore SP (into AUX_USER_SP) only if returning to U mode ; - for K mode, it will be implicitly restored as stack is unwound @@ -190,7 +190,7 @@ #endif #ifdef CONFIG_ARC_HAS_ACCL_REGS - LD2 r58, r59, PT_sp + 12 + LD2 r58, r59, PT_r58 #endif .endm diff --git a/arch/arc/include/asm/fpu.h b/arch/arc/include/asm/fpu.h new file mode 100644 index 000000000000..64347250fdf5 --- /dev/null +++ b/arch/arc/include/asm/fpu.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com) + * + */ + +#ifndef _ASM_ARC_FPU_H +#define _ASM_ARC_FPU_H + +#ifdef CONFIG_ARC_FPU_SAVE_RESTORE + +#include <asm/ptrace.h> + +#ifdef CONFIG_ISA_ARCOMPACT + +/* These DPFP regs need to be saved/restored across ctx-sw */ +struct arc_fpu { + struct { + unsigned int l, h; + } aux_dpfp[2]; +}; + +#define fpu_init_task(regs) + +#else + +/* + * ARCv2 FPU Control aux register + * - bits to enable Traps on Exceptions + * - Rounding mode + * + * ARCv2 FPU Status aux register + * - FPU exceptions flags (Inv, Div-by-Zero, overflow, underflow, inexact) + * - Flag Write Enable to clear flags explicitly (vs. by fpu instructions + * only + */ + +struct arc_fpu { + unsigned int ctrl, status; +}; + +extern void fpu_init_task(struct pt_regs *regs); + +#endif /* !CONFIG_ISA_ARCOMPACT */ + +extern void fpu_save_restore(struct task_struct *p, struct task_struct *n); + +#else /* !CONFIG_ARC_FPU_SAVE_RESTORE */ + +#define fpu_save_restore(p, n) +#define fpu_init_task(regs) + +#endif /* CONFIG_ARC_FPU_SAVE_RESTORE */ + +#endif /* _ASM_ARC_FPU_H */ diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h index 9a74ce71a767..30ac40fed2c5 100644 --- a/arch/arc/include/asm/hugepage.h +++ b/arch/arc/include/asm/hugepage.h @@ -8,7 +8,6 @@ #define _ASM_ARC_HUGEPAGE_H #include <linux/types.h> -#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> static inline pte_t pmd_pte(pmd_t pmd) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 9019ed9f9c94..12be7e1b7cc0 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -273,6 +273,7 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) #define pmd_none(x) (!pmd_val(x)) #define pmd_bad(x) ((pmd_val(x) & ~PAGE_MASK)) #define pmd_present(x) (pmd_val(x)) +#define pmd_leaf(x) (pmd_val(x) & _PAGE_HW_SZ) #define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0) #define pte_page(pte) pfn_to_page(pte_pfn(pte)) diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index 706edeaa5583..ec532d1e0725 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -14,15 +14,7 @@ #ifndef __ASSEMBLY__ #include <asm/ptrace.h> - -#ifdef CONFIG_ARC_FPU_SAVE_RESTORE -/* These DPFP regs need to be saved/restored across ctx-sw */ -struct arc_fpu { - struct { - unsigned int l, h; - } aux_dpfp[2]; -}; -#endif +#include <asm/fpu.h> #ifdef CONFIG_ARC_PLAT_EZNPS struct eznps_dp { diff --git a/arch/arc/include/asm/switch_to.h b/arch/arc/include/asm/switch_to.h index 77f123385e96..aadf65b2b56c 100644 --- a/arch/arc/include/asm/switch_to.h +++ b/arch/arc/include/asm/switch_to.h @@ -9,19 +9,7 @@ #ifndef __ASSEMBLY__ #include <linux/sched.h> - -#ifdef CONFIG_ARC_FPU_SAVE_RESTORE - -extern void fpu_save_restore(struct task_struct *p, struct task_struct *n); -#define ARC_FPU_PREV(p, n) fpu_save_restore(p, n) -#define ARC_FPU_NEXT(t) - -#else - -#define ARC_FPU_PREV(p, n) -#define ARC_FPU_NEXT(n) - -#endif /* !CONFIG_ARC_FPU_SAVE_RESTORE */ +#include <asm/fpu.h> #ifdef CONFIG_ARC_PLAT_EZNPS extern void dp_save_restore(struct task_struct *p, struct task_struct *n); @@ -36,9 +24,8 @@ struct task_struct *__switch_to(struct task_struct *p, struct task_struct *n); #define switch_to(prev, next, last) \ do { \ ARC_EZNPS_DP_PREV(prev, next); \ - ARC_FPU_PREV(prev, next); \ + fpu_save_restore(prev, next); \ last = __switch_to(prev, next);\ - ARC_FPU_NEXT(next); \ mb(); \ } while (0) diff --git a/arch/arc/include/asm/syscalls.h b/arch/arc/include/asm/syscalls.h index 7ddba13e9b59..c3f4714a4f5c 100644 --- a/arch/arc/include/asm/syscalls.h +++ b/arch/arc/include/asm/syscalls.h @@ -11,6 +11,7 @@ #include <linux/types.h> int sys_clone_wrapper(int, int, int, int, int); +int sys_clone3_wrapper(void *, size_t); int sys_cacheflush(uint32_t, uint32_t uint32_t); int sys_arc_settls(void *); int sys_arc_gettls(void); diff --git a/arch/arc/include/asm/vmalloc.h b/arch/arc/include/asm/vmalloc.h new file mode 100644 index 000000000000..973095aad665 --- /dev/null +++ b/arch/arc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_ARC_VMALLOC_H +#define _ASM_ARC_VMALLOC_H + +#endif /* _ASM_ARC_VMALLOC_H */ diff --git a/arch/arc/include/uapi/asm/unistd.h b/arch/arc/include/uapi/asm/unistd.h index 5eafa1115162..fa2713ae6bea 100644 --- a/arch/arc/include/uapi/asm/unistd.h +++ b/arch/arc/include/uapi/asm/unistd.h @@ -21,6 +21,7 @@ #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_TIME32_SYSCALLS diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile index e784f5396dda..75539670431a 100644 --- a/arch/arc/kernel/Makefile +++ b/arch/arc/kernel/Makefile @@ -23,7 +23,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_ARC_FPU_SAVE_RESTORE) += fpu.o +ifdef CONFIG_ISA_ARCOMPACT CFLAGS_fpu.o += -mdpfp +endif ifdef CONFIG_ARC_DW2_UNWIND CFLAGS_ctx_sw.o += -fno-omit-frame-pointer diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c index 1f621e416521..c783bcd35eb8 100644 --- a/arch/arc/kernel/asm-offsets.c +++ b/arch/arc/kernel/asm-offsets.c @@ -66,7 +66,15 @@ int main(void) DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs)); DEFINE(SZ_PT_REGS, sizeof(struct pt_regs)); - DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25)); + +#ifdef CONFIG_ISA_ARCV2 + OFFSET(PT_r12, pt_regs, r12); + OFFSET(PT_r30, pt_regs, r30); +#endif +#ifdef CONFIG_ARC_HAS_ACCL_REGS + OFFSET(PT_r58, pt_regs, r58); + OFFSET(PT_r59, pt_regs, r59); +#endif return 0; } diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 72be01270e24..60406ec62eb8 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -35,6 +35,18 @@ ENTRY(sys_clone_wrapper) b .Lret_from_system_call END(sys_clone_wrapper) +ENTRY(sys_clone3_wrapper) + SAVE_CALLEE_SAVED_USER + bl @sys_clone3 + DISCARD_CALLEE_SAVED_USER + + GET_CURR_THR_INFO_FLAGS r10 + btst r10, TIF_SYSCALL_TRACE + bnz tracesys_exit + + b .Lret_from_system_call +END(sys_clone3_wrapper) + ENTRY(ret_from_fork) ; when the forked child comes here from the __switch_to function ; r0 has the last task pointer. @@ -337,11 +349,11 @@ resume_user_mode_begin: resume_kernel_mode: ; Disable Interrupts from this point on - ; CONFIG_PREEMPT: This is a must for preempt_schedule_irq() - ; !CONFIG_PREEMPT: To ensure restore_regs is intr safe + ; CONFIG_PREEMPTION: This is a must for preempt_schedule_irq() + ; !CONFIG_PREEMPTION: To ensure restore_regs is intr safe IRQ_DISABLE r9 -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION ; Can't preempt if preemption disabled GET_CURR_THR_INFO_FROM_SP r10 diff --git a/arch/arc/kernel/fpu.c b/arch/arc/kernel/fpu.c index 07e22b563fbb..c67c0f0f5f77 100644 --- a/arch/arc/kernel/fpu.c +++ b/arch/arc/kernel/fpu.c @@ -6,7 +6,9 @@ */ #include <linux/sched.h> -#include <asm/switch_to.h> +#include <asm/fpu.h> + +#ifdef CONFIG_ISA_ARCOMPACT /* * To save/restore FPU regs, simplest scheme would use LR/SR insns. @@ -50,3 +52,28 @@ void fpu_save_restore(struct task_struct *prev, struct task_struct *next) : "r" (zero), "r" (*(readfrom + 3)), "r" (*(readfrom + 2)) ); } + +#else + +void fpu_init_task(struct pt_regs *regs) +{ + /* default rounding mode */ + write_aux_reg(ARC_REG_FPU_CTRL, 0x100); + + /* set "Write enable" to allow explicit write to exception flags */ + write_aux_reg(ARC_REG_FPU_STATUS, 0x80000000); +} + +void fpu_save_restore(struct task_struct *prev, struct task_struct *next) +{ + struct arc_fpu *save = &prev->thread.fpu; + struct arc_fpu *restore = &next->thread.fpu; + + save->ctrl = read_aux_reg(ARC_REG_FPU_CTRL); + save->status = read_aux_reg(ARC_REG_FPU_STATUS); + + write_aux_reg(ARC_REG_FPU_CTRL, restore->ctrl); + write_aux_reg(ARC_REG_FPU_STATUS, restore->status); +} + +#endif diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index e1889ce3faf9..315528f04bc1 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -20,6 +20,8 @@ #include <linux/elf.h> #include <linux/tick.h> +#include <asm/fpu.h> + SYSCALL_DEFINE1(arc_settls, void *, user_tls_data_ptr) { task_thread_info(current)->thr_ptr = (unsigned int)user_tls_data_ptr; @@ -171,9 +173,8 @@ asmlinkage void ret_from_fork(void); * | user_r25 | * ------------------ <===== END of PAGE */ -int copy_thread(unsigned long clone_flags, - unsigned long usp, unsigned long kthread_arg, - struct task_struct *p) +int copy_thread_tls(unsigned long clone_flags, unsigned long usp, + unsigned long kthread_arg, struct task_struct *p, unsigned long tls) { struct pt_regs *c_regs; /* child's pt_regs */ unsigned long *childksp; /* to unwind out of __switch_to() */ @@ -231,7 +232,7 @@ int copy_thread(unsigned long clone_flags, * set task's userland tls data ptr from 4th arg * clone C-lib call is difft from clone sys-call */ - task_thread_info(p)->thr_ptr = regs->r3; + task_thread_info(p)->thr_ptr = tls; } else { /* Normal fork case: set parent's TLS ptr in child */ task_thread_info(p)->thr_ptr = @@ -264,7 +265,7 @@ int copy_thread(unsigned long clone_flags, /* * Do necessary setup to start up a new user task */ -void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp) +void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp) { regs->sp = usp; regs->ret = pc; @@ -280,6 +281,8 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp) regs->eflags = 0; #endif + fpu_init_task(regs); + /* bogus seed values for debugging */ regs->lp_start = 0x10; regs->lp_end = 0x80; diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 7ee89dc61f6e..e1c647490f00 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -572,10 +572,6 @@ void __init setup_arch(char **cmdline_p) */ root_mountflags &= ~MS_RDONLY; -#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; -#endif - arc_unwind_init(); } diff --git a/arch/arc/kernel/sys.c b/arch/arc/kernel/sys.c index fddecc76efb7..1069446bdc58 100644 --- a/arch/arc/kernel/sys.c +++ b/arch/arc/kernel/sys.c @@ -7,6 +7,7 @@ #include <asm/syscalls.h> #define sys_clone sys_clone_wrapper +#define sys_clone3 sys_clone3_wrapper #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index dc05a63516f5..27ea64b1fa33 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -42,10 +42,10 @@ do { \ #define EXTRA_INFO(f) { \ BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \ - % FIELD_SIZEOF(struct unwind_frame_info, f)) \ + % sizeof_field(struct unwind_frame_info, f)) \ + offsetof(struct unwind_frame_info, f) \ - / FIELD_SIZEOF(struct unwind_frame_info, f), \ - FIELD_SIZEOF(struct unwind_frame_info, f) \ + / sizeof_field(struct unwind_frame_info, f), \ + sizeof_field(struct unwind_frame_info, f) \ } #define PTREGS_INFO(f) EXTRA_INFO(regs.f) diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig index a376a50d3fea..a931d0a256d0 100644 --- a/arch/arc/plat-eznps/Kconfig +++ b/arch/arc/plat-eznps/Kconfig @@ -7,7 +7,7 @@ menuconfig ARC_PLAT_EZNPS bool "\"EZchip\" ARC dev platform" select CPU_BIG_ENDIAN - select CLKSRC_NPS + select CLKSRC_NPS if !PHYS_ADDR_T_64BIT select EZNPS_GIC select EZCHIP_NPS_MANAGEMENT_ENET if ETHERNET help diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ba75e3661a41..497e0e4702b7 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -36,7 +36,7 @@ config ARM select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_IPC_PARSE_VERSION select BINFMT_FLAT_ARGVP_ENVP_ON_STACK - select BUILDTIME_EXTABLE_SORT if MMU + select BUILDTIME_TABLE_SORT if MMU select CLONE_BACKWARDS select CPU_PM if SUSPEND || CPU_IDLE select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS @@ -72,6 +72,7 @@ config ARM select HAVE_ARM_SMCCC if CPU_V7 select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32 select HAVE_CONTEXT_TRACKING + select HAVE_COPY_THREAD_TLS select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS if MMU @@ -101,7 +102,7 @@ config ARM select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP - select HAVE_RCU_TABLE_FREE if SMP && ARM_LPAE + select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RSEQ select HAVE_STACKPROTECTOR diff --git a/arch/arm/boot/dts/am335x-boneblack-common.dtsi b/arch/arm/boot/dts/am335x-boneblack-common.dtsi index 7ad079861efd..91f93bc89716 100644 --- a/arch/arm/boot/dts/am335x-boneblack-common.dtsi +++ b/arch/arm/boot/dts/am335x-boneblack-common.dtsi @@ -131,6 +131,11 @@ }; / { + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0x20000000>; /* 512 MB */ + }; + clk_mcasp0_fixed: clk_mcasp0_fixed { #clock-cells = <0>; compatible = "fixed-clock"; diff --git a/arch/arm/boot/dts/am335x-sancloud-bbe.dts b/arch/arm/boot/dts/am335x-sancloud-bbe.dts index 8678e6e35493..e5fdb7abb0d5 100644 --- a/arch/arm/boot/dts/am335x-sancloud-bbe.dts +++ b/arch/arm/boot/dts/am335x-sancloud-bbe.dts @@ -108,7 +108,7 @@ &cpsw_emac0 { phy-handle = <ðphy0>; - phy-mode = "rgmii-txid"; + phy-mode = "rgmii-id"; }; &i2c0 { diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts b/arch/arm/boot/dts/am437x-gp-evm.dts index cae4500194fe..811c8cae315b 100644 --- a/arch/arm/boot/dts/am437x-gp-evm.dts +++ b/arch/arm/boot/dts/am437x-gp-evm.dts @@ -86,7 +86,7 @@ }; lcd0: display { - compatible = "osddisplays,osd057T0559-34ts", "panel-dpi"; + compatible = "osddisplays,osd070t1718-19ts", "panel-dpi"; label = "lcd"; backlight = <&lcd_bl>; diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index 95314121d111..a6fbc088daa8 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -42,7 +42,7 @@ }; lcd0: display { - compatible = "osddisplays,osd057T0559-34ts", "panel-dpi"; + compatible = "osddisplays,osd070t1718-19ts", "panel-dpi"; label = "lcd"; backlight = <&lcd_bl>; @@ -848,6 +848,7 @@ pinctrl-names = "default", "sleep"; pinctrl-0 = <&spi0_pins_default>; pinctrl-1 = <&spi0_pins_sleep>; + ti,pindir-d0-out-d1-in = <1>; }; &spi1 { @@ -855,6 +856,7 @@ pinctrl-names = "default", "sleep"; pinctrl-0 = <&spi1_pins_default>; pinctrl-1 = <&spi1_pins_sleep>; + ti,pindir-d0-out-d1-in = <1>; }; &usb2_phy1 { diff --git a/arch/arm/boot/dts/am571x-idk.dts b/arch/arm/boot/dts/am571x-idk.dts index 820ce3b60bb6..669559c9c95b 100644 --- a/arch/arm/boot/dts/am571x-idk.dts +++ b/arch/arm/boot/dts/am571x-idk.dts @@ -167,11 +167,7 @@ &pcie1_rc { status = "okay"; - gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>; -}; - -&pcie1_ep { - gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>; + gpios = <&gpio5 18 GPIO_ACTIVE_HIGH>; }; &mmc1 { diff --git a/arch/arm/boot/dts/am572x-idk-common.dtsi b/arch/arm/boot/dts/am572x-idk-common.dtsi index a064f13b3880..ddf123620e96 100644 --- a/arch/arm/boot/dts/am572x-idk-common.dtsi +++ b/arch/arm/boot/dts/am572x-idk-common.dtsi @@ -147,10 +147,6 @@ gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>; }; -&pcie1_ep { - gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>; -}; - &mailbox5 { status = "okay"; mbox_ipu1_ipc3x: mbox_ipu1_ipc3x { diff --git a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi index bc76f1705c0f..a813a0cf3ff3 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi +++ b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi @@ -29,6 +29,27 @@ reg = <0x0 0x80000000 0x0 0x80000000>; }; + main_12v0: fixedregulator-main_12v0 { + /* main supply */ + compatible = "regulator-fixed"; + regulator-name = "main_12v0"; + regulator-min-microvolt = <12000000>; + regulator-max-microvolt = <12000000>; + regulator-always-on; + regulator-boot-on; + }; + + evm_5v0: fixedregulator-evm_5v0 { + /* Output of TPS54531D */ + compatible = "regulator-fixed"; + regulator-name = "evm_5v0"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + vin-supply = <&main_12v0>; + regulator-always-on; + regulator-boot-on; + }; + vdd_3v3: fixedregulator-vdd_3v3 { compatible = "regulator-fixed"; regulator-name = "vdd_3v3"; @@ -547,10 +568,6 @@ gpios = <&gpio2 8 GPIO_ACTIVE_LOW>; }; -&pcie1_ep { - gpios = <&gpio2 8 GPIO_ACTIVE_LOW>; -}; - &mcasp3 { #sound-dai-cells = <0>; assigned-clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP3_CLKCTRL 24>; diff --git a/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts b/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts index c1c9cd30f980..13f7aefe045e 100644 --- a/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts +++ b/arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts @@ -258,9 +258,9 @@ }; }; - pca0: pca9552@60 { + pca0: pca9552@61 { compatible = "nxp,pca9552"; - reg = <0x60>; + reg = <0x61>; #address-cells = <1>; #size-cells = <0>; @@ -521,371 +521,6 @@ &i2c13 { status = "okay"; -}; - -&i2c14 { - status = "okay"; -}; - -&i2c15 { - status = "okay"; -}; - -&i2c0 { - status = "okay"; -}; - -&i2c1 { - status = "okay"; -}; - -&i2c2 { - status = "okay"; -}; - -&i2c3 { - status = "okay"; - - power-supply@68 { - compatible = "ibm,cffps2"; - reg = <0x68>; - }; - - power-supply@69 { - compatible = "ibm,cffps2"; - reg = <0x69>; - }; - - power-supply@6a { - compatible = "ibm,cffps2"; - reg = <0x6a>; - }; - - power-supply@6b { - compatible = "ibm,cffps2"; - reg = <0x6b>; - }; -}; - -&i2c4 { - status = "okay"; - - tmp275@48 { - compatible = "ti,tmp275"; - reg = <0x48>; - }; - - tmp275@49 { - compatible = "ti,tmp275"; - reg = <0x49>; - }; - - tmp275@4a { - compatible = "ti,tmp275"; - reg = <0x4a>; - }; -}; - -&i2c5 { - status = "okay"; - - tmp275@48 { - compatible = "ti,tmp275"; - reg = <0x48>; - }; - - tmp275@49 { - compatible = "ti,tmp275"; - reg = <0x49>; - }; -}; - -&i2c6 { - status = "okay"; - - tmp275@48 { - compatible = "ti,tmp275"; - reg = <0x48>; - }; - - tmp275@4a { - compatible = "ti,tmp275"; - reg = <0x4a>; - }; - - tmp275@4b { - compatible = "ti,tmp275"; - reg = <0x4b>; - }; -}; - -&i2c7 { - status = "okay"; - - si7021-a20@20 { - compatible = "silabs,si7020"; - reg = <0x20>; - }; - - tmp275@48 { - compatible = "ti,tmp275"; - reg = <0x48>; - }; - - max31785@52 { - compatible = "maxim,max31785a"; - reg = <0x52>; - #address-cells = <1>; - #size-cells = <0>; - - fan@0 { - compatible = "pmbus-fan"; - reg = <0>; - tach-pulses = <2>; - }; - - fan@1 { - compatible = "pmbus-fan"; - reg = <1>; - tach-pulses = <2>; - }; - - fan@2 { - compatible = "pmbus-fan"; - reg = <2>; - tach-pulses = <2>; - }; - - fan@3 { - compatible = "pmbus-fan"; - reg = <3>; - tach-pulses = <2>; - }; - }; - - pca0: pca9552@60 { - compatible = "nxp,pca9552"; - reg = <0x60>; - #address-cells = <1>; - #size-cells = <0>; - - gpio-controller; - #gpio-cells = <2>; - - gpio@0 { - reg = <0>; - }; - - gpio@1 { - reg = <1>; - }; - - gpio@2 { - reg = <2>; - }; - - gpio@3 { - reg = <3>; - }; - - gpio@4 { - reg = <4>; - }; - - gpio@5 { - reg = <5>; - }; - - gpio@6 { - reg = <6>; - }; - - gpio@7 { - reg = <7>; - }; - - gpio@8 { - reg = <8>; - }; - - gpio@9 { - reg = <9>; - }; - - gpio@10 { - reg = <10>; - }; - - gpio@11 { - reg = <11>; - }; - - gpio@12 { - reg = <12>; - }; - - gpio@13 { - reg = <13>; - }; - - gpio@14 { - reg = <14>; - }; - - gpio@15 { - reg = <15>; - }; - }; - - dps: dps310@76 { - compatible = "infineon,dps310"; - reg = <0x76>; - #io-channel-cells = <0>; - }; -}; - -&i2c8 { - status = "okay"; - - ucd90320@b { - compatible = "ti,ucd90160"; - reg = <0x0b>; - }; - - ucd90320@c { - compatible = "ti,ucd90160"; - reg = <0x0c>; - }; - - ucd90320@11 { - compatible = "ti,ucd90160"; - reg = <0x11>; - }; - - rtc@32 { - compatible = "epson,rx8900"; - reg = <0x32>; - }; - - tmp275@48 { - compatible = "ti,tmp275"; - reg = <0x48>; - }; - - tmp275@4a { - compatible = "ti,tmp275"; - reg = <0x4a>; - }; -}; - -&i2c9 { - status = "okay"; - - ir35221@42 { - compatible = "infineon,ir35221"; - reg = <0x42>; - }; - - ir35221@43 { - compatible = "infineon,ir35221"; - reg = <0x43>; - }; - - ir35221@44 { - compatible = "infineon,ir35221"; - reg = <0x44>; - }; - - tmp423a@4c { - compatible = "ti,tmp423"; - reg = <0x4c>; - }; - - tmp423b@4d { - compatible = "ti,tmp423"; - reg = <0x4d>; - }; - - ir35221@72 { - compatible = "infineon,ir35221"; - reg = <0x72>; - }; - - ir35221@73 { - compatible = "infineon,ir35221"; - reg = <0x73>; - }; - - ir35221@74 { - compatible = "infineon,ir35221"; - reg = <0x74>; - }; -}; - -&i2c10 { - status = "okay"; - - ir35221@42 { - compatible = "infineon,ir35221"; - reg = <0x42>; - }; - - ir35221@43 { - compatible = "infineon,ir35221"; - reg = <0x43>; - }; - - ir35221@44 { - compatible = "infineon,ir35221"; - reg = <0x44>; - }; - - tmp423a@4c { - compatible = "ti,tmp423"; - reg = <0x4c>; - }; - - tmp423b@4d { - compatible = "ti,tmp423"; - reg = <0x4d>; - }; - - ir35221@72 { - compatible = "infineon,ir35221"; - reg = <0x72>; - }; - - ir35221@73 { - compatible = "infineon,ir35221"; - reg = <0x73>; - }; - - ir35221@74 { - compatible = "infineon,ir35221"; - reg = <0x74>; - }; -}; - -&i2c11 { - status = "okay"; - - tmp275@48 { - compatible = "ti,tmp275"; - reg = <0x48>; - }; - - tmp275@49 { - compatible = "ti,tmp275"; - reg = <0x49>; - }; -}; - -&i2c12 { - status = "okay"; -}; - -&i2c13 { - status = "okay"; eeprom@50 { compatible = "atmel,24c64"; diff --git a/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts b/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts index f02de4ab058c..ff49ec76fa7c 100644 --- a/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts +++ b/arch/arm/boot/dts/aspeed-bmc-opp-tacoma.dts @@ -122,37 +122,6 @@ }; }; -&fmc { - status = "okay"; - flash@0 { - status = "okay"; - m25p,fast-read; - label = "bmc"; - spi-max-frequency = <50000000>; -#include "openbmc-flash-layout-128.dtsi" - }; - - flash@1 { - status = "okay"; - m25p,fast-read; - label = "alt-bmc"; - spi-max-frequency = <50000000>; - }; -}; - -&spi1 { - status = "okay"; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_spi1_default>; - - flash@0 { - status = "okay"; - m25p,fast-read; - label = "pnor"; - spi-max-frequency = <100000000>; - }; -}; - &mac2 { status = "okay"; pinctrl-names = "default"; @@ -165,6 +134,11 @@ &emmc { status = "okay"; +}; + +&fsim0 { + status = "okay"; + #address-cells = <2>; #size-cells = <0>; @@ -820,373 +794,6 @@ status = "okay"; }; -&i2c0 { - status = "okay"; -}; - -&i2c1 { - status = "okay"; -}; - -&i2c2 { - status = "okay"; -}; - -&i2c3 { - status = "okay"; - - bmp: bmp280@77 { - compatible = "bosch,bmp280"; - reg = <0x77>; - #io-channel-cells = <1>; - }; - - max31785@52 { - compatible = "maxim,max31785a"; - reg = <0x52>; - #address-cells = <1>; - #size-cells = <0>; - - fan@0 { - compatible = "pmbus-fan"; - reg = <0>; - tach-pulses = <2>; - maxim,fan-rotor-input = "tach"; - maxim,fan-pwm-freq = <25000>; - maxim,fan-dual-tach; - maxim,fan-no-watchdog; - maxim,fan-no-fault-ramp; - maxim,fan-ramp = <2>; - maxim,fan-fault-pin-mon; - }; - - fan@1 { - compatible = "pmbus-fan"; - reg = <1>; - tach-pulses = <2>; - maxim,fan-rotor-input = "tach"; - maxim,fan-pwm-freq = <25000>; - maxim,fan-dual-tach; - maxim,fan-no-watchdog; - maxim,fan-no-fault-ramp; - maxim,fan-ramp = <2>; - maxim,fan-fault-pin-mon; - }; - - fan@2 { - compatible = "pmbus-fan"; - reg = <2>; - tach-pulses = <2>; - maxim,fan-rotor-input = "tach"; - maxim,fan-pwm-freq = <25000>; - maxim,fan-dual-tach; - maxim,fan-no-watchdog; - maxim,fan-no-fault-ramp; - maxim,fan-ramp = <2>; - maxim,fan-fault-pin-mon; - }; - - fan@3 { - compatible = "pmbus-fan"; - reg = <3>; - tach-pulses = <2>; - maxim,fan-rotor-input = "tach"; - maxim,fan-pwm-freq = <25000>; - maxim,fan-dual-tach; - maxim,fan-no-watchdog; - maxim,fan-no-fault-ramp; - maxim,fan-ramp = <2>; - maxim,fan-fault-pin-mon; - }; - }; - - dps: dps310@76 { - compatible = "infineon,dps310"; - reg = <0x76>; - #io-channel-cells = <0>; - }; - - pca0: pca9552@60 { - compatible = "nxp,pca9552"; - reg = <0x60>; - #address-cells = <1>; - #size-cells = <0>; - - gpio-controller; - #gpio-cells = <2>; - - gpio@0 { - reg = <0>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@1 { - reg = <1>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@2 { - reg = <2>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@3 { - reg = <3>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@4 { - reg = <4>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@5 { - reg = <5>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@6 { - reg = <6>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@7 { - reg = <7>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@8 { - reg = <8>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@9 { - reg = <9>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@10 { - reg = <10>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@11 { - reg = <11>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@12 { - reg = <12>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@13 { - reg = <13>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@14 { - reg = <14>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@15 { - reg = <15>; - type = <PCA955X_TYPE_GPIO>; - }; - }; - - power-supply@68 { - compatible = "ibm,cffps1"; - reg = <0x68>; - }; - - power-supply@69 { - compatible = "ibm,cffps1"; - reg = <0x69>; - }; -}; - -&i2c4 { - status = "okay"; - - tmp423a@4c { - compatible = "ti,tmp423"; - reg = <0x4c>; - }; - - ir35221@70 { - compatible = "infineon,ir35221"; - reg = <0x70>; - }; - - ir35221@71 { - compatible = "infineon,ir35221"; - reg = <0x71>; - }; -}; - -&i2c5 { - status = "okay"; - - tmp423a@4c { - compatible = "ti,tmp423"; - reg = <0x4c>; - }; - - ir35221@70 { - compatible = "infineon,ir35221"; - reg = <0x70>; - }; - - ir35221@71 { - compatible = "infineon,ir35221"; - reg = <0x71>; - }; -}; - -&i2c7 { - status = "okay"; -}; - -&i2c9 { - status = "okay"; - - tmp275@4a { - compatible = "ti,tmp275"; - reg = <0x4a>; - }; -}; - -&i2c10 { - status = "okay"; -}; - -&i2c11 { - status = "okay"; - - pca9552: pca9552@60 { - compatible = "nxp,pca9552"; - reg = <0x60>; - #address-cells = <1>; - #size-cells = <0>; - gpio-controller; - #gpio-cells = <2>; - - gpio-line-names = "PS_SMBUS_RESET_N", "APSS_RESET_N", - "GPU0_TH_OVERT_N_BUFF", "GPU1_TH_OVERT_N_BUFF", - "GPU2_TH_OVERT_N_BUFF", "GPU3_TH_OVERT_N_BUFF", - "GPU4_TH_OVERT_N_BUFF", "GPU5_TH_OVERT_N_BUFF", - "GPU0_PWR_GOOD_BUFF", "GPU1_PWR_GOOD_BUFF", - "GPU2_PWR_GOOD_BUFF", "GPU3_PWR_GOOD_BUFF", - "GPU4_PWR_GOOD_BUFF", "GPU5_PWR_GOOD_BUFF", - "12V_BREAKER_FLT_N", "THROTTLE_UNLATCHED_N"; - - gpio@0 { - reg = <0>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@1 { - reg = <1>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@2 { - reg = <2>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@3 { - reg = <3>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@4 { - reg = <4>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@5 { - reg = <5>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@6 { - reg = <6>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@7 { - reg = <7>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@8 { - reg = <8>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@9 { - reg = <9>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@10 { - reg = <10>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@11 { - reg = <11>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@12 { - reg = <12>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@13 { - reg = <13>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@14 { - reg = <14>; - type = <PCA955X_TYPE_GPIO>; - }; - - gpio@15 { - reg = <15>; - type = <PCA955X_TYPE_GPIO>; - }; - }; - - rtc@32 { - compatible = "epson,rx8900"; - reg = <0x32>; - }; - - eeprom@51 { - compatible = "atmel,24c64"; - reg = <0x51>; - }; - - ucd90160@64 { - compatible = "ti,ucd90160"; - reg = <0x64>; - }; -}; - -&i2c12 { - status = "okay"; -}; - -&i2c13 { - status = "okay"; -}; - &pinctrl { /* Hog these as no driver is probed for the entire LPC block */ pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed-g6.dtsi index 5f6142d99eeb..b72afbaadaf8 100644 --- a/arch/arm/boot/dts/aspeed-g6.dtsi +++ b/arch/arm/boot/dts/aspeed-g6.dtsi @@ -163,26 +163,6 @@ spi-max-frequency = <50000000>; status = "disabled"; }; - - fsim0: fsi@1e79b000 { - compatible = "aspeed,ast2600-fsi-master", "fsi-master"; - reg = <0x1e79b000 0x94>; - interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_fsi1_default>; - clocks = <&syscon ASPEED_CLK_GATE_FSICLK>; - status = "disabled"; - }; - - fsim1: fsi@1e79b100 { - compatible = "aspeed,ast2600-fsi-master", "fsi-master"; - reg = <0x1e79b100 0x94>; - interrupts = <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_fsi2_default>; - clocks = <&syscon ASPEED_CLK_GATE_FSICLK>; - status = "disabled"; - }; }; mdio0: mdio@1e650000 { @@ -595,6 +575,25 @@ ranges = <0 0x1e78a000 0x1000>; }; + fsim0: fsi@1e79b000 { + compatible = "aspeed,ast2600-fsi-master", "fsi-master"; + reg = <0x1e79b000 0x94>; + interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_fsi1_default>; + clocks = <&syscon ASPEED_CLK_GATE_FSICLK>; + status = "disabled"; + }; + + fsim1: fsi@1e79b100 { + compatible = "aspeed,ast2600-fsi-master", "fsi-master"; + reg = <0x1e79b100 0x94>; + interrupts = <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_fsi2_default>; + clocks = <&syscon ASPEED_CLK_GATE_FSICLK>; + status = "disabled"; + }; }; }; }; diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi index 2dac3efc7640..1bc45cfd5453 100644 --- a/arch/arm/boot/dts/bcm-cygnus.dtsi +++ b/arch/arm/boot/dts/bcm-cygnus.dtsi @@ -174,8 +174,8 @@ mdio: mdio@18002000 { compatible = "brcm,iproc-mdio"; reg = <0x18002000 0x8>; - #size-cells = <1>; - #address-cells = <0>; + #size-cells = <0>; + #address-cells = <1>; status = "disabled"; gphy0: ethernet-phy@0 { diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi index 961bed832755..8eedd6eb409b 100644 --- a/arch/arm/boot/dts/bcm2711.dtsi +++ b/arch/arm/boot/dts/bcm2711.dtsi @@ -43,7 +43,7 @@ <0x7c000000 0x0 0xfc000000 0x02000000>, <0x40000000 0x0 0xff800000 0x00800000>; /* Emulate a contiguous 30-bit address range for DMA */ - dma-ranges = <0xc0000000 0x0 0x00000000 0x3c000000>; + dma-ranges = <0xc0000000 0x0 0x00000000 0x40000000>; /* * This node is the provider for the enable-method for @@ -66,6 +66,17 @@ IRQ_TYPE_LEVEL_HIGH)>; }; + avs_monitor: avs-monitor@7d5d2000 { + compatible = "brcm,bcm2711-avs-monitor", + "syscon", "simple-mfd"; + reg = <0x7d5d2000 0xf00>; + + thermal: thermal { + compatible = "brcm,bcm2711-thermal"; + #thermal-sensor-cells = <0>; + }; + }; + dma: dma@7e007000 { compatible = "brcm,bcm2835-dma"; reg = <0x7e007000 0xb00>; @@ -363,6 +374,7 @@ &cpu_thermal { coefficients = <(-487) 410040>; + thermal-sensors = <&thermal>; }; &dsi0 { diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 3caaa57eb6c8..839491628e87 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -37,7 +37,7 @@ trips { cpu-crit { - temperature = <80000>; + temperature = <90000>; hysteresis = <0>; type = "critical"; }; diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi index 372dc1eb88a0..2d9b4dd05830 100644 --- a/arch/arm/boot/dts/bcm5301x.dtsi +++ b/arch/arm/boot/dts/bcm5301x.dtsi @@ -353,8 +353,8 @@ mdio: mdio@18003000 { compatible = "brcm,iproc-mdio"; reg = <0x18003000 0x8>; - #size-cells = <1>; - #address-cells = <0>; + #size-cells = <0>; + #address-cells = <1>; }; mdio-bus-mux@18003000 { diff --git a/arch/arm/boot/dts/dra7-evm-common.dtsi b/arch/arm/boot/dts/dra7-evm-common.dtsi index 82eeba8faef1..23244b5a9942 100644 --- a/arch/arm/boot/dts/dra7-evm-common.dtsi +++ b/arch/arm/boot/dts/dra7-evm-common.dtsi @@ -4,7 +4,7 @@ */ #include <dt-bindings/gpio/gpio.h> -#include <dt-bindings/clk/ti-dra7-atl.h> +#include <dt-bindings/clock/ti-dra7-atl.h> #include <dt-bindings/input/input.h> / { diff --git a/arch/arm/boot/dts/dra72-evm-common.dtsi b/arch/arm/boot/dts/dra72-evm-common.dtsi index 8641a3d7d8ad..9eabfd1502da 100644 --- a/arch/arm/boot/dts/dra72-evm-common.dtsi +++ b/arch/arm/boot/dts/dra72-evm-common.dtsi @@ -6,7 +6,7 @@ #include "dra72x.dtsi" #include <dt-bindings/gpio/gpio.h> -#include <dt-bindings/clk/ti-dra7-atl.h> +#include <dt-bindings/clock/ti-dra7-atl.h> / { compatible = "ti,dra72-evm", "ti,dra722", "ti,dra72", "ti,dra7"; diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi index 93e1eb83bed9..ccf0fd477cf9 100644 --- a/arch/arm/boot/dts/dra7xx-clocks.dtsi +++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi @@ -1734,6 +1734,20 @@ }; }; + gpu_cm: gpu-cm@1200 { + compatible = "ti,omap4-cm"; + reg = <0x1200 0x100>; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0x1200 0x100>; + + gpu_clkctrl: gpu-clkctrl@20 { + compatible = "ti,clkctrl"; + reg = <0x20 0x4>; + #clock-cells = <2>; + }; + }; + l3init_cm: l3init-cm@1300 { compatible = "ti,omap4-cm"; reg = <0x1300 0x100>; diff --git a/arch/arm/boot/dts/e60k02.dtsi b/arch/arm/boot/dts/e60k02.dtsi index 6472b056a001..5a2c5320437d 100644 --- a/arch/arm/boot/dts/e60k02.dtsi +++ b/arch/arm/boot/dts/e60k02.dtsi @@ -265,11 +265,6 @@ regulator-name = "LDORTC1"; regulator-boot-on; }; - - ldortc2_reg: LDORTC2 { - regulator-name = "LDORTC2"; - regulator-boot-on; - }; }; }; }; diff --git a/arch/arm/boot/dts/imx6dl-icore-mipi.dts b/arch/arm/boot/dts/imx6dl-icore-mipi.dts index e43bccb78ab2..d8f3821a0ffd 100644 --- a/arch/arm/boot/dts/imx6dl-icore-mipi.dts +++ b/arch/arm/boot/dts/imx6dl-icore-mipi.dts @@ -8,7 +8,7 @@ /dts-v1/; #include "imx6dl.dtsi" -#include "imx6qdl-icore.dtsi" +#include "imx6qdl-icore-1.5.dtsi" / { model = "Engicam i.CoreM6 DualLite/Solo MIPI Starter Kit"; diff --git a/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts b/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts index 5219553df1e7..bb74fc62d913 100644 --- a/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts +++ b/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts @@ -63,7 +63,7 @@ #sound-dai-cells = <0>; clocks = <&clk_ext_audio_codec>; VDDA-supply = <®_3p3v>; - VDDIO-supply = <®_3p3v>; + VDDIO-supply = <&sw2_reg>; }; }; diff --git a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi index 845cfad99bf9..87f0aa897086 100644 --- a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi +++ b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi @@ -204,7 +204,7 @@ }; rtc@56 { - compatible = "rv3029c2"; + compatible = "microcrystal,rv3029"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_rtc_hw300>; reg = <0x56>; diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi index 71ca76a5e4a5..fe59dde41b64 100644 --- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi @@ -749,10 +749,6 @@ vin-supply = <&vgen5_reg>; }; -®_vdd3p0 { - vin-supply = <&sw2_reg>; -}; - ®_vdd2p5 { vin-supply = <&vgen5_reg>; }; diff --git a/arch/arm/boot/dts/imx6sl-evk.dts b/arch/arm/boot/dts/imx6sl-evk.dts index 4829aa682aeb..bc86cfaaa9c2 100644 --- a/arch/arm/boot/dts/imx6sl-evk.dts +++ b/arch/arm/boot/dts/imx6sl-evk.dts @@ -584,10 +584,6 @@ vin-supply = <&sw2_reg>; }; -®_vdd3p0 { - vin-supply = <&sw2_reg>; -}; - ®_vdd2p5 { vin-supply = <&sw2_reg>; }; diff --git a/arch/arm/boot/dts/imx6sll-evk.dts b/arch/arm/boot/dts/imx6sll-evk.dts index 3e1d32fdf4b8..5ace9e6acf85 100644 --- a/arch/arm/boot/dts/imx6sll-evk.dts +++ b/arch/arm/boot/dts/imx6sll-evk.dts @@ -265,10 +265,6 @@ status = "okay"; }; -®_3p0 { - vin-supply = <&sw2_reg>; -}; - &snvs_poweroff { status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6sx-sdb-reva.dts b/arch/arm/boot/dts/imx6sx-sdb-reva.dts index f1830ed387a5..91a7548fdb8d 100644 --- a/arch/arm/boot/dts/imx6sx-sdb-reva.dts +++ b/arch/arm/boot/dts/imx6sx-sdb-reva.dts @@ -159,10 +159,6 @@ vin-supply = <&vgen6_reg>; }; -®_vdd3p0 { - vin-supply = <&sw2_reg>; -}; - ®_vdd2p5 { vin-supply = <&vgen6_reg>; }; diff --git a/arch/arm/boot/dts/imx6sx-sdb.dts b/arch/arm/boot/dts/imx6sx-sdb.dts index a8ee7087af5a..5a63ca615722 100644 --- a/arch/arm/boot/dts/imx6sx-sdb.dts +++ b/arch/arm/boot/dts/imx6sx-sdb.dts @@ -141,10 +141,6 @@ vin-supply = <&vgen6_reg>; }; -®_vdd3p0 { - vin-supply = <&sw2_reg>; -}; - ®_vdd2p5 { vin-supply = <&vgen6_reg>; }; diff --git a/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi b/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi index 1506eb12b21e..212144511b66 100644 --- a/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi +++ b/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi @@ -30,14 +30,26 @@ enable-active-high; }; - reg_sensors: regulator-sensors { + reg_peri_3v3: regulator-peri-3v3 { compatible = "regulator-fixed"; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_sensors_reg>; - regulator-name = "sensors-supply"; + pinctrl-0 = <&pinctrl_peri_3v3>; + regulator-name = "VPERI_3V3"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; gpio = <&gpio5 2 GPIO_ACTIVE_LOW>; + /* + * If you want to want to make this dynamic please + * check schematics and test all affected peripherals: + * + * - sensors + * - ethernet phy + * - can + * - bluetooth + * - wm8960 audio codec + * - ov5640 camera + */ + regulator-always-on; }; reg_can_3v3: regulator-can-3v3 { @@ -140,6 +152,7 @@ pinctrl-0 = <&pinctrl_enet1>; phy-mode = "rmii"; phy-handle = <ðphy0>; + phy-supply = <®_peri_3v3>; status = "okay"; }; @@ -148,6 +161,7 @@ pinctrl-0 = <&pinctrl_enet2>; phy-mode = "rmii"; phy-handle = <ðphy1>; + phy-supply = <®_peri_3v3>; status = "okay"; mdio { @@ -193,8 +207,8 @@ magnetometer@e { compatible = "fsl,mag3110"; reg = <0x0e>; - vdd-supply = <®_sensors>; - vddio-supply = <®_sensors>; + vdd-supply = <®_peri_3v3>; + vddio-supply = <®_peri_3v3>; }; }; @@ -227,7 +241,7 @@ flash0: n25q256a@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "micron,n25q256a"; + compatible = "micron,n25q256a", "jedec,spi-nor"; spi-max-frequency = <29000000>; spi-rx-bus-width = <4>; spi-tx-bus-width = <4>; @@ -462,7 +476,7 @@ >; }; - pinctrl_sensors_reg: sensorsreggrp { + pinctrl_peri_3v3: peri3v3grp { fsl,pins = < MX6UL_PAD_SNVS_TAMPER2__GPIO5_IO02 0x1b0b0 >; diff --git a/arch/arm/boot/dts/imx7s-colibri.dtsi b/arch/arm/boot/dts/imx7s-colibri.dtsi index 1fb1ec5d3d70..6d16e32aed89 100644 --- a/arch/arm/boot/dts/imx7s-colibri.dtsi +++ b/arch/arm/boot/dts/imx7s-colibri.dtsi @@ -49,3 +49,7 @@ reg = <0x80000000 0x10000000>; }; }; + +&gpmi { + status = "okay"; +}; diff --git a/arch/arm/boot/dts/imx7ulp.dtsi b/arch/arm/boot/dts/imx7ulp.dtsi index d37a1927c88e..ab91c98f2124 100644 --- a/arch/arm/boot/dts/imx7ulp.dtsi +++ b/arch/arm/boot/dts/imx7ulp.dtsi @@ -37,10 +37,10 @@ #address-cells = <1>; #size-cells = <0>; - cpu0: cpu@0 { + cpu0: cpu@f00 { compatible = "arm,cortex-a7"; device_type = "cpu"; - reg = <0>; + reg = <0xf00>; }; }; diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi index 5a7e3e5caebe..3c534cd50ee3 100644 --- a/arch/arm/boot/dts/meson8.dtsi +++ b/arch/arm/boot/dts/meson8.dtsi @@ -253,7 +253,7 @@ &aobus { pmu: pmu@e0 { compatible = "amlogic,meson8-pmu", "syscon"; - reg = <0xe0 0x8>; + reg = <0xe0 0x18>; }; pinctrl_aobus: pinctrl@84 { diff --git a/arch/arm/boot/dts/mmp3.dtsi b/arch/arm/boot/dts/mmp3.dtsi index d9762de0ed34..6f480827b94d 100644 --- a/arch/arm/boot/dts/mmp3.dtsi +++ b/arch/arm/boot/dts/mmp3.dtsi @@ -356,7 +356,7 @@ twsi1: i2c@d4011000 { compatible = "mrvl,mmp-twsi"; - reg = <0xd4011000 0x1000>; + reg = <0xd4011000 0x70>; interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; clocks = <&soc_clocks MMP2_CLK_TWSI0>; resets = <&soc_clocks MMP2_CLK_TWSI0>; @@ -368,7 +368,7 @@ twsi2: i2c@d4031000 { compatible = "mrvl,mmp-twsi"; - reg = <0xd4031000 0x1000>; + reg = <0xd4031000 0x70>; interrupt-parent = <&twsi_mux>; interrupts = <0>; clocks = <&soc_clocks MMP2_CLK_TWSI1>; @@ -380,7 +380,7 @@ twsi3: i2c@d4032000 { compatible = "mrvl,mmp-twsi"; - reg = <0xd4032000 0x1000>; + reg = <0xd4032000 0x70>; interrupt-parent = <&twsi_mux>; interrupts = <1>; clocks = <&soc_clocks MMP2_CLK_TWSI2>; @@ -392,7 +392,7 @@ twsi4: i2c@d4033000 { compatible = "mrvl,mmp-twsi"; - reg = <0xd4033000 0x1000>; + reg = <0xd4033000 0x70>; interrupt-parent = <&twsi_mux>; interrupts = <2>; clocks = <&soc_clocks MMP2_CLK_TWSI3>; @@ -405,7 +405,7 @@ twsi5: i2c@d4033800 { compatible = "mrvl,mmp-twsi"; - reg = <0xd4033800 0x1000>; + reg = <0xd4033800 0x70>; interrupt-parent = <&twsi_mux>; interrupts = <3>; clocks = <&soc_clocks MMP2_CLK_TWSI4>; @@ -417,7 +417,7 @@ twsi6: i2c@d4034000 { compatible = "mrvl,mmp-twsi"; - reg = <0xd4034000 0x1000>; + reg = <0xd4034000 0x70>; interrupt-parent = <&twsi_mux>; interrupts = <4>; clocks = <&soc_clocks MMP2_CLK_TWSI5>; diff --git a/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts b/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts index 26160c324802..942e3a2cac35 100644 --- a/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts +++ b/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts @@ -143,7 +143,7 @@ compatible = "smsc,usb3503a"; reg = <0x8>; connect-gpios = <&gpioext2 1 GPIO_ACTIVE_HIGH>; - intn-gpios = <&gpioext2 0 GPIO_ACTIVE_LOW>; + intn-gpios = <&gpioext2 0 GPIO_ACTIVE_HIGH>; initial-mode = <1>; }; }; diff --git a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts index fb928503ad45..d9be511f054f 100644 --- a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts +++ b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts @@ -101,7 +101,7 @@ initial-mode = <1>; /* initialize in HUB mode */ disabled-ports = <1>; intn-gpios = <&pio 7 5 GPIO_ACTIVE_HIGH>; /* PH5 */ - reset-gpios = <&pio 4 16 GPIO_ACTIVE_HIGH>; /* PE16 */ + reset-gpios = <&pio 4 16 GPIO_ACTIVE_LOW>; /* PE16 */ connect-gpios = <&pio 4 17 GPIO_ACTIVE_HIGH>; /* PE17 */ refclk-frequency = <19200000>; }; diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index e7e4bb5ad8d5..fde84f123fbb 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -350,6 +350,7 @@ CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_SOFTLOCKUP_DETECTOR=y # CONFIG_DETECT_HUNG_TASK is not set diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 26d6dee67aa6..3608e55eaecd 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -462,6 +462,7 @@ CONFIG_FONT_8x8=y CONFIG_FONT_8x16=y CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y # CONFIG_SCHED_DEBUG is not set CONFIG_PROVE_LOCKING=y # CONFIG_DEBUG_BUGVERBOSE is not set diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 3f1b96dc7faa..f5d19cc1f4f2 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -496,6 +496,7 @@ CONFIG_IMX_THERMAL=y CONFIG_ROCKCHIP_THERMAL=y CONFIG_RCAR_THERMAL=y CONFIG_ARMADA_THERMAL=y +CONFIG_BCM2711_THERMAL=m CONFIG_BCM2835_THERMAL=m CONFIG_BRCMSTB_THERMAL=m CONFIG_ST_THERMAL_MEMMAP=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 8c37cc8ab6f2..c32c338f7704 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -92,6 +92,7 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NETFILTER=y CONFIG_PHONET=m +CONFIG_NET_SWITCHDEV=y CONFIG_CAN=m CONFIG_CAN_C_CAN=m CONFIG_CAN_C_CAN_PLATFORM=m @@ -181,6 +182,7 @@ CONFIG_SMSC911X=y # CONFIG_NET_VENDOR_STMICRO is not set CONFIG_TI_DAVINCI_EMAC=y CONFIG_TI_CPSW=y +CONFIG_TI_CPSW_SWITCHDEV=y CONFIG_TI_CPTS=y # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set @@ -554,6 +556,6 @@ CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_SPLIT=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y CONFIG_SCHEDSTATS=y # CONFIG_DEBUG_BUGVERBOSE is not set -CONFIG_TI_CPSW_SWITCHDEV=y diff --git a/arch/arm/configs/shmobile_defconfig b/arch/arm/configs/shmobile_defconfig index bda57cafa2bc..de3830443613 100644 --- a/arch/arm/configs/shmobile_defconfig +++ b/arch/arm/configs/shmobile_defconfig @@ -212,4 +212,5 @@ CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=64 CONFIG_PRINTK_TIME=y # CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index cdb1a07e7ad0..b668c97663ec 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -138,14 +138,8 @@ static int ce_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - int ret; - - ret = ce_aes_expandkey(ctx, in_key, key_len); - if (!ret) - return 0; - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; + return ce_aes_expandkey(ctx, in_key, key_len); } struct crypto_aes_xts_ctx { @@ -167,11 +161,7 @@ static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key, if (!ret) ret = ce_aes_expandkey(&ctx->key2, &in_key[key_len / 2], key_len / 2); - if (!ret) - return 0; - - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; + return ret; } static int ecb_encrypt(struct skcipher_request *req) diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c index 6ebbb2b241d2..6fdb0ac62b3d 100644 --- a/arch/arm/crypto/chacha-glue.c +++ b/arch/arm/crypto/chacha-glue.c @@ -115,7 +115,7 @@ static int chacha_stream_xor(struct skcipher_request *req, if (nbytes < walk.total) nbytes = round_down(nbytes, walk.stride); - if (!neon) { + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) { chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr, nbytes, state, ctx->nrounds); state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE); @@ -159,7 +159,7 @@ static int do_xchacha(struct skcipher_request *req, bool neon) chacha_init_generic(state, ctx->key, req->iv); - if (!neon) { + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) { hchacha_block_arm(state, subctx.key, ctx->nrounds); } else { kernel_neon_begin(); diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c index 95592499b9bd..2208445808d7 100644 --- a/arch/arm/crypto/crc32-ce-glue.c +++ b/arch/arm/crypto/crc32-ce-glue.c @@ -54,10 +54,8 @@ static int crc32_setkey(struct crypto_shash *hash, const u8 *key, { u32 *mctx = crypto_shash_ctx(hash); - if (keylen != sizeof(u32)) { - crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + if (keylen != sizeof(u32)) return -EINVAL; - } *mctx = le32_to_cpup((__le32 *)key); return 0; } diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c index f3f42cf3b893..776ae07e0469 100644 --- a/arch/arm/crypto/curve25519-glue.c +++ b/arch/arm/crypto/curve25519-glue.c @@ -38,6 +38,13 @@ void curve25519_arch(u8 out[CURVE25519_KEY_SIZE], } EXPORT_SYMBOL(curve25519_arch); +void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]) +{ + return curve25519_arch(pub, secret, curve25519_base_point); +} +EXPORT_SYMBOL(curve25519_base_arch); + static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf, unsigned int len) { diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c index c691077679a6..a00fd329255f 100644 --- a/arch/arm/crypto/ghash-ce-glue.c +++ b/arch/arm/crypto/ghash-ce-glue.c @@ -163,10 +163,8 @@ static int ghash_setkey(struct crypto_shash *tfm, struct ghash_key *key = crypto_shash_ctx(tfm); be128 h; - if (keylen != GHASH_BLOCK_SIZE) { - crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + if (keylen != GHASH_BLOCK_SIZE) return -EINVAL; - } /* needed for the fallback */ memcpy(&key->k, inkey, GHASH_BLOCK_SIZE); @@ -296,16 +294,11 @@ static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key, { struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); struct crypto_ahash *child = &ctx->cryptd_tfm->base; - int err; crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); - err = crypto_ahash_setkey(child, key, keylen); - crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child) - & CRYPTO_TFM_RES_MASK); - - return err; + return crypto_ahash_setkey(child, key, keylen); } static int ghash_async_init_tfm(struct crypto_tfm *tfm) diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c index abe3f2d587dc..ceec04ec2f40 100644 --- a/arch/arm/crypto/poly1305-glue.c +++ b/arch/arm/crypto/poly1305-glue.c @@ -20,7 +20,7 @@ void poly1305_init_arm(void *state, const u8 *key); void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit); -void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce); +void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce); void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) { @@ -179,9 +179,6 @@ EXPORT_SYMBOL(poly1305_update_arch); void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) { - __le32 digest[4]; - u64 f = 0; - if (unlikely(dctx->buflen)) { dctx->buf[dctx->buflen++] = 1; memset(dctx->buf + dctx->buflen, 0, @@ -189,18 +186,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); } - poly1305_emit_arm(&dctx->h, digest, dctx->s); - - /* mac = (h + s) % (2^128) */ - f = (f >> 32) + le32_to_cpu(digest[0]); - put_unaligned_le32(f, dst); - f = (f >> 32) + le32_to_cpu(digest[1]); - put_unaligned_le32(f, dst + 4); - f = (f >> 32) + le32_to_cpu(digest[2]); - put_unaligned_le32(f, dst + 8); - f = (f >> 32) + le32_to_cpu(digest[3]); - put_unaligned_le32(f, dst + 12); - + poly1305_emit_arm(&dctx->h, dst, dctx->s); *dctx = (struct poly1305_desc_ctx){}; } EXPORT_SYMBOL(poly1305_final_arch); diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index fa50bb04f580..b5752f0e8936 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -10,6 +10,7 @@ #ifndef __ASSEMBLY__ #include <linux/io.h> +#include <linux/io-64-nonatomic-lo-hi.h> #include <asm/barrier.h> #include <asm/cacheflush.h> #include <asm/cp15.h> @@ -327,6 +328,7 @@ static inline u64 __gic_readq_nonatomic(const volatile void __iomem *addr) /* * GITS_VPROPBASER - hi and lo bits may be accessed independently. */ +#define gits_read_vpropbaser(c) __gic_readq_nonatomic(c) #define gits_write_vpropbaser(v, c) __gic_writeq_nonatomic(v, c) /* diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 7667826b93f1..5ac46e2860bc 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -50,19 +50,16 @@ void efi_virtmap_unload(void); /* arch specific definitions used by the stub code */ -#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) -#define __efi_call_early(f, ...) f(__VA_ARGS__) -#define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__) -#define efi_is_64bit() (false) +#define efi_bs_call(func, ...) efi_system_table()->boottime->func(__VA_ARGS__) +#define efi_rt_call(func, ...) efi_system_table()->runtime->func(__VA_ARGS__) +#define efi_is_native() (true) -#define efi_table_attr(table, attr, instance) \ - ((table##_t *)instance)->attr +#define efi_table_attr(inst, attr) (inst->attr) -#define efi_call_proto(protocol, f, instance, ...) \ - ((protocol##_t *)instance)->f(instance, ##__VA_ARGS__) +#define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__) -struct screen_info *alloc_screen_info(efi_system_table_t *sys_table_arg); -void free_screen_info(efi_system_table_t *sys_table, struct screen_info *si); +struct screen_info *alloc_screen_info(void); +void free_screen_info(struct screen_info *si); static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) { diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index aefdabdbeb84..ab2b654084fa 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -356,7 +356,6 @@ static inline void memcpy_toio(volatile void __iomem *to, const void *from, * * Function Memory type Cacheability Cache hint * ioremap() Device n/a n/a - * ioremap_nocache() Device n/a n/a * ioremap_cache() Normal Writeback Read allocate * ioremap_wc() Normal Non-cacheable n/a * ioremap_wt() Normal Non-cacheable n/a @@ -368,13 +367,6 @@ static inline void memcpy_toio(volatile void __iomem *to, const void *from, * - unaligned accesses are "unpredictable" * - writes may be delayed before they hit the endpoint device * - * ioremap_nocache() is the same as ioremap() as there are too many device - * drivers using this for device registers, and documentation which tells - * people to use it for such for this to be any different. This is not a - * safe fallback for memory-like mappings, or memory regions where the - * compiler may generate unaligned accesses - eg, via inlining its own - * memcpy. - * * All normal memory mappings have the following properties: * - reads can be repeated with no side effects * - repeated reads return the last value written diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 9b118516d2db..3944305e81df 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -9,18 +9,29 @@ #include <linux/kvm_host.h> #include <asm/kvm_asm.h> -#include <asm/kvm_mmio.h> #include <asm/kvm_arm.h> #include <asm/cputype.h> /* arm64 compatibility macros */ +#define PSR_AA32_MODE_FIQ FIQ_MODE +#define PSR_AA32_MODE_SVC SVC_MODE #define PSR_AA32_MODE_ABT ABT_MODE #define PSR_AA32_MODE_UND UND_MODE #define PSR_AA32_T_BIT PSR_T_BIT +#define PSR_AA32_F_BIT PSR_F_BIT #define PSR_AA32_I_BIT PSR_I_BIT #define PSR_AA32_A_BIT PSR_A_BIT #define PSR_AA32_E_BIT PSR_E_BIT #define PSR_AA32_IT_MASK PSR_IT_MASK +#define PSR_AA32_GE_MASK 0x000f0000 +#define PSR_AA32_DIT_BIT 0x00200000 +#define PSR_AA32_PAN_BIT 0x00400000 +#define PSR_AA32_SSBS_BIT 0x00800000 +#define PSR_AA32_Q_BIT PSR_Q_BIT +#define PSR_AA32_V_BIT PSR_V_BIT +#define PSR_AA32_C_BIT PSR_C_BIT +#define PSR_AA32_Z_BIT PSR_Z_BIT +#define PSR_AA32_N_BIT PSR_N_BIT unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); @@ -41,6 +52,11 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) *__vcpu_spsr(vcpu) = v; } +static inline unsigned long host_spsr_to_spsr32(unsigned long spsr) +{ + return spsr; +} + static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu, u8 reg_num) { @@ -182,6 +198,11 @@ static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & HSR_SSE; } +static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) +{ + return false; +} + static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT; @@ -198,7 +219,7 @@ static inline bool kvm_vcpu_dabt_is_cm(struct kvm_vcpu *vcpu) } /* Get Access Size from a data abort */ -static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu) +static inline unsigned int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu) { switch ((kvm_vcpu_get_hsr(vcpu) >> 22) & 0x3) { case 0: @@ -209,7 +230,7 @@ static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu) return 4; default: kvm_err("Hardware is weird: SAS 0b11 is reserved\n"); - return -EFAULT; + return 4; } } diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 556cd818eccf..c3314b286a61 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -14,7 +14,6 @@ #include <asm/cputype.h> #include <asm/kvm.h> #include <asm/kvm_asm.h> -#include <asm/kvm_mmio.h> #include <asm/fpstate.h> #include <kvm/arm_arch_timer.h> @@ -202,9 +201,6 @@ struct kvm_vcpu_arch { /* Don't run the guest (internal implementation need) */ bool pause; - /* IO related fields */ - struct kvm_decode mmio_decode; - /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; @@ -284,8 +280,6 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); -struct kvm_vcpu *kvm_arm_get_running_vcpu(void); -struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); @@ -300,6 +294,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, static inline void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index) {} +/* MMIO helpers */ +void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); +unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); + +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); +int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, + phys_addr_t fault_ipa); + static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) @@ -363,9 +365,9 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); static inline bool kvm_arch_requires_vhe(void) { return false; } static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} -static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} +static inline void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_init_debug(void) {} static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h index 40e9034db601..3c1b55ecc578 100644 --- a/arch/arm/include/asm/kvm_hyp.h +++ b/arch/arm/include/asm/kvm_hyp.h @@ -10,6 +10,7 @@ #include <linux/compiler.h> #include <linux/kvm_host.h> #include <asm/cp15.h> +#include <asm/kvm_arm.h> #include <asm/vfp.h> #define __hyp_text __section(.hyp.text) notrace diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h deleted file mode 100644 index 7c0eddb0adb2..000000000000 --- a/arch/arm/include/asm/kvm_mmio.h +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall <c.dall@virtualopensystems.com> - */ - -#ifndef __ARM_KVM_MMIO_H__ -#define __ARM_KVM_MMIO_H__ - -#include <linux/kvm_host.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_arm.h> - -struct kvm_decode { - unsigned long rt; - bool sign_extend; -}; - -void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); -unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); - -int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); -int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, - phys_addr_t fault_ipa); - -#endif /* __ARM_KVM_MMIO_H__ */ diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 51beec41d48c..0d3ea35c97fe 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -189,6 +189,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) } #define pmd_large(pmd) (pmd_val(pmd) & 2) +#define pmd_leaf(pmd) (pmd_val(pmd) & 2) #define pmd_bad(pmd) (pmd_val(pmd) & 2) #define pmd_present(pmd) (pmd_val(pmd)) diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 5b18295021a0..ad55ab068dbf 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -134,6 +134,7 @@ #define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ PMD_TYPE_SECT) #define pmd_large(pmd) pmd_sect(pmd) +#define pmd_leaf(pmd) pmd_sect(pmd) #define pud_clear(pudp) \ do { \ diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h index d3e937dcee4d..007d8fea7157 100644 --- a/arch/arm/include/asm/switch_to.h +++ b/arch/arm/include/asm/switch_to.h @@ -10,7 +10,7 @@ * to ensure that the maintenance completes in case we migrate to another * CPU. */ -#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) && defined(CONFIG_CPU_V7) +#if defined(CONFIG_PREEMPTION) && defined(CONFIG_SMP) && defined(CONFIG_CPU_V7) #define __complete_pending_tlbi() dsb(ish) #else #define __complete_pending_tlbi() diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 669474add486..4d4e7b6aabff 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -37,10 +37,6 @@ static inline void __tlb_remove_table(void *_table) #include <asm-generic/tlb.h> -#ifndef CONFIG_HAVE_RCU_TABLE_FREE -#define tlb_remove_table(tlb, entry) tlb_remove_page(tlb, entry) -#endif - static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { diff --git a/arch/arm/include/asm/vdso/gettimeofday.h b/arch/arm/include/asm/vdso/gettimeofday.h index 0ad2429c324f..fe6e1f65932d 100644 --- a/arch/arm/include/asm/vdso/gettimeofday.h +++ b/arch/arm/include/asm/vdso/gettimeofday.h @@ -52,6 +52,24 @@ static __always_inline long clock_gettime_fallback( return ret; } +static __always_inline long clock_gettime32_fallback( + clockid_t _clkid, + struct old_timespec32 *_ts) +{ + register struct old_timespec32 *ts asm("r1") = _ts; + register clockid_t clkid asm("r0") = _clkid; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_clock_gettime; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + static __always_inline int clock_getres_fallback( clockid_t _clkid, struct __kernel_timespec *_ts) @@ -70,6 +88,24 @@ static __always_inline int clock_getres_fallback( return ret; } +static __always_inline int clock_getres32_fallback( + clockid_t _clkid, + struct old_timespec32 *_ts) +{ + register struct old_timespec32 *ts asm("r1") = _ts; + register clockid_t clkid asm("r0") = _clkid; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_clock_getres; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + static __always_inline u64 __arch_get_hw_counter(int clock_mode) { #ifdef CONFIG_ARM_ARCH_TIMER diff --git a/arch/arm/include/asm/vdso/vsyscall.h b/arch/arm/include/asm/vdso/vsyscall.h index c4166f317071..cff87d8d30da 100644 --- a/arch/arm/include/asm/vdso/vsyscall.h +++ b/arch/arm/include/asm/vdso/vsyscall.h @@ -34,9 +34,9 @@ struct vdso_data *__arm_get_k_vdso_data(void) #define __arch_get_k_vdso_data __arm_get_k_vdso_data static __always_inline -int __arm_update_vdso_data(void) +bool __arm_update_vdso_data(void) { - return !cntvct_ok; + return cntvct_ok; } #define __arch_update_vdso_data __arm_update_vdso_data diff --git a/arch/arm/include/asm/vmalloc.h b/arch/arm/include/asm/vmalloc.h new file mode 100644 index 000000000000..a9b3718b8600 --- /dev/null +++ b/arch/arm/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_ARM_VMALLOC_H +#define _ASM_ARM_VMALLOC_H + +#endif /* _ASM_ARM_VMALLOC_H */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 8b679e2ca3c3..89e5d864e923 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -53,8 +53,8 @@ obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o obj-$(CONFIG_ARM_ARCH_TIMER) += arch_timer.o obj-$(CONFIG_FUNCTION_TRACER) += entry-ftrace.o -obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o -obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o patch.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o patch.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o insn.o patch.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o # Main staffs in KPROBES are in arch/arm/probes/ . diff --git a/arch/arm/kernel/atags_proc.c b/arch/arm/kernel/atags_proc.c index 312cb89ec364..4247ebf4b893 100644 --- a/arch/arm/kernel/atags_proc.c +++ b/arch/arm/kernel/atags_proc.c @@ -17,9 +17,9 @@ static ssize_t atags_read(struct file *file, char __user *buf, return simple_read_from_buffer(buf, count, ppos, b->data, b->size); } -static const struct file_operations atags_fops = { - .read = atags_read, - .llseek = default_llseek, +static const struct proc_ops atags_proc_ops = { + .proc_read = atags_read, + .proc_lseek = default_llseek, }; #define BOOT_PARAMS_SIZE 1536 @@ -61,7 +61,7 @@ static int __init init_atags_procfs(void) b->size = size; memcpy(b->data, atags_copy, size); - tags_entry = proc_create_data("atags", 0400, NULL, &atags_fops, b); + tags_entry = proc_create_data("atags", 0400, NULL, &atags_proc_ops, b); if (!tags_entry) goto nomem; diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 858d4e541532..77f54830554c 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -211,7 +211,7 @@ __irq_svc: svc_entry irq_handler -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION ldr r8, [tsk, #TI_PREEMPT] @ get preempt count ldr r0, [tsk, #TI_FLAGS] @ get flags teq r8, #0 @ if preempt count != 0 @@ -226,7 +226,7 @@ ENDPROC(__irq_svc) .ltorg -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION svc_preempt: mov r8, lr 1: bl preempt_schedule_irq @ irq en/disable is done inside diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index bda949fd84e8..2a5ff69c28e6 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -22,6 +22,7 @@ #include <asm/ftrace.h> #include <asm/insn.h> #include <asm/set_memory.h> +#include <asm/patch.h> #ifdef CONFIG_THUMB2_KERNEL #define NOP 0xf85deb04 /* pop.w {lr} */ @@ -35,9 +36,7 @@ static int __ftrace_modify_code(void *data) { int *command = data; - set_kernel_text_rw(); ftrace_modify_all_code(*command); - set_kernel_text_ro(); return 0; } @@ -59,13 +58,11 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr) int ftrace_arch_code_modify_prepare(void) { - set_all_modules_text_rw(); return 0; } int ftrace_arch_code_modify_post_process(void) { - set_all_modules_text_ro(); /* Make sure any TLB misses during machine stop are cleared. */ flush_tlb_all(); return 0; @@ -97,10 +94,7 @@ static int ftrace_modify_code(unsigned long pc, unsigned long old, return -EINVAL; } - if (probe_kernel_write((void *)pc, &new, MCOUNT_INSN_SIZE)) - return -EPERM; - - flush_icache_range(pc, pc + MCOUNT_INSN_SIZE); + __patch_text((void *)pc, new); return 0; } diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index ae5020302de4..6607fa817bba 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -146,10 +146,9 @@ ARM_BE8(orr r7, r7, #(1 << 25)) @ HSCTLR.EE #if !defined(ZIMAGE) && defined(CONFIG_ARM_ARCH_TIMER) @ make CNTP_* and CNTPCT accessible from PL1 mrc p15, 0, r7, c0, c1, 1 @ ID_PFR1 - lsr r7, #16 - and r7, #0xf - cmp r7, #1 - bne 1f + ubfx r7, r7, #16, #4 + teq r7, #0 + beq 1f mrc p15, 4, r7, c14, c1, 0 @ CNTHCTL orr r7, r7, #3 @ PL1PCEN | PL1PCTEN mcr p15, 4, r7, c14, c1, 0 @ CNTHCTL diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index cea1c27c29cb..46e478fb5ea2 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -226,8 +226,8 @@ void release_thread(struct task_struct *dead_task) asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); int -copy_thread(unsigned long clone_flags, unsigned long stack_start, - unsigned long stk_sz, struct task_struct *p) +copy_thread_tls(unsigned long clone_flags, unsigned long stack_start, + unsigned long stk_sz, struct task_struct *p, unsigned long tls) { struct thread_info *thread = task_thread_info(p); struct pt_regs *childregs = task_pt_regs(p); @@ -261,7 +261,7 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, clear_ptrace_hw_breakpoint(p); if (clone_flags & CLONE_SETTLS) - thread->tp_value[0] = childregs->ARM_r3; + thread->tp_value[0] = tls; thread->tp_value[1] = get_tpuser(); thread_notify(THREAD_NOTIFY_COPY, thread); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index d0a464e317ea..d8e18cdd96d3 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1164,8 +1164,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) conswitchp = &vga_con; -#elif defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; #endif #endif diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index c053abd1fb53..abb7dd7e656f 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -248,6 +248,8 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) #ifdef CONFIG_PREEMPT #define S_PREEMPT " PREEMPT" +#elif defined(CONFIG_PREEMPT_RT) +#define S_PREEMPT " PREEMPT_RT" #else #define S_PREEMPT "" #endif diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 0e6f23504c26..9f7ae0d8690f 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -34,11 +34,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) -{ - return 0; -} - static u64 core_reg_offset_from_id(u64 id) { return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); diff --git a/arch/arm/mach-bcm/bcm2711.c b/arch/arm/mach-bcm/bcm2711.c index dbe296798647..fa0300d8c79d 100644 --- a/arch/arm/mach-bcm/bcm2711.c +++ b/arch/arm/mach-bcm/bcm2711.c @@ -13,6 +13,7 @@ static const char * const bcm2711_compat[] = { #ifdef CONFIG_ARCH_MULTI_V7 "brcm,bcm2711", #endif + NULL }; DT_MACHINE_START(BCM2711, "BCM2711") diff --git a/arch/arm/mach-bcm/platsmp.c b/arch/arm/mach-bcm/platsmp.c index 21400b3fa5fe..c9db2a9006d9 100644 --- a/arch/arm/mach-bcm/platsmp.c +++ b/arch/arm/mach-bcm/platsmp.c @@ -105,7 +105,7 @@ static int nsp_write_lut(unsigned int cpu) if (!secondary_boot_addr) return -EINVAL; - sku_rom_lut = ioremap_nocache((phys_addr_t)secondary_boot_addr, + sku_rom_lut = ioremap((phys_addr_t)secondary_boot_addr, sizeof(phys_addr_t)); if (!sku_rom_lut) { pr_warn("unable to ioremap SKU-ROM LUT register for cpu %u\n", cpu); @@ -174,7 +174,7 @@ static int kona_boot_secondary(unsigned int cpu, struct task_struct *idle) if (!secondary_boot_addr) return -EINVAL; - boot_reg = ioremap_nocache((phys_addr_t)secondary_boot_addr, + boot_reg = ioremap((phys_addr_t)secondary_boot_addr, sizeof(phys_addr_t)); if (!boot_reg) { pr_err("unable to map boot register for cpu %u\n", cpu_id); diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig index dd427bd2768c..02b180ad7245 100644 --- a/arch/arm/mach-davinci/Kconfig +++ b/arch/arm/mach-davinci/Kconfig @@ -9,6 +9,7 @@ menuconfig ARCH_DAVINCI select PM_GENERIC_DOMAINS if PM select PM_GENERIC_DOMAINS_OF if PM && OF select REGMAP_MMIO + select RESET_CONTROLLER select HAVE_IDE select PINCTRL_SINGLE diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c index 9d87d4e440ea..040c949414fa 100644 --- a/arch/arm/mach-davinci/board-dm644x-evm.c +++ b/arch/arm/mach-davinci/board-dm644x-evm.c @@ -823,6 +823,17 @@ static int davinci_phy_fixup(struct phy_device *phydev) #define HAS_NAND IS_ENABLED(CONFIG_MTD_NAND_DAVINCI) +#define GPIO_nVBUS_DRV 160 + +static struct gpiod_lookup_table dm644evm_usb_gpio_table = { + .dev_id = "musb-davinci", + .table = { + GPIO_LOOKUP("davinci_gpio", GPIO_nVBUS_DRV, NULL, + GPIO_ACTIVE_HIGH), + { } + }, +}; + static __init void davinci_evm_init(void) { int ret; @@ -875,6 +886,7 @@ static __init void davinci_evm_init(void) dm644x_init_asp(); /* irlml6401 switches over 1A, in under 8 msec */ + gpiod_add_lookup_table(&dm644evm_usb_gpio_table); davinci_setup_usb(1000, 8); if (IS_BUILTIN(CONFIG_PHYLIB)) { diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c index 3e447d468845..e650131ee88f 100644 --- a/arch/arm/mach-davinci/devices.c +++ b/arch/arm/mach-davinci/devices.c @@ -34,7 +34,7 @@ void __iomem *davinci_sysmod_base; void davinci_map_sysmod(void) { - davinci_sysmod_base = ioremap_nocache(DAVINCI_SYSTEM_MODULE_BASE, + davinci_sysmod_base = ioremap(DAVINCI_SYSTEM_MODULE_BASE, 0x800); /* * Throw a bug since a lot of board initialization code depends diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 4ef56571145b..6e7f10c8098a 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -12,6 +12,7 @@ menuconfig ARCH_EXYNOS select ARCH_SUPPORTS_BIG_ENDIAN select ARM_AMBA select ARM_GIC + select EXYNOS_IRQ_COMBINER select COMMON_CLK_SAMSUNG select EXYNOS_ASV select EXYNOS_CHIPID diff --git a/arch/arm/mach-imx/cpu.c b/arch/arm/mach-imx/cpu.c index d8118031c51f..871f98342d50 100644 --- a/arch/arm/mach-imx/cpu.c +++ b/arch/arm/mach-imx/cpu.c @@ -84,7 +84,7 @@ struct device * __init imx_soc_device_init(void) const char *ocotp_compat = NULL; struct soc_device *soc_dev; struct device_node *root; - struct regmap *ocotp; + struct regmap *ocotp = NULL; const char *soc_id; u64 soc_uid = 0; u32 val; @@ -148,11 +148,11 @@ struct device * __init imx_soc_device_init(void) soc_id = "i.MX6UL"; break; case MXC_CPU_IMX6ULL: - ocotp_compat = "fsl,imx6ul-ocotp"; + ocotp_compat = "fsl,imx6ull-ocotp"; soc_id = "i.MX6ULL"; break; case MXC_CPU_IMX6ULZ: - ocotp_compat = "fsl,imx6ul-ocotp"; + ocotp_compat = "fsl,imx6ull-ocotp"; soc_id = "i.MX6ULZ"; break; case MXC_CPU_IMX6SLL: @@ -175,7 +175,9 @@ struct device * __init imx_soc_device_init(void) ocotp = syscon_regmap_lookup_by_compatible(ocotp_compat); if (IS_ERR(ocotp)) pr_err("%s: failed to find %s regmap!\n", __func__, ocotp_compat); + } + if (!IS_ERR_OR_NULL(ocotp)) { regmap_read(ocotp, OCOTP_UID_H, &val); soc_uid = val; regmap_read(ocotp, OCOTP_UID_L, &val); diff --git a/arch/arm/mach-ixp4xx/fsg-setup.c b/arch/arm/mach-ixp4xx/fsg-setup.c index 648932d8d7a8..507ee3878769 100644 --- a/arch/arm/mach-ixp4xx/fsg-setup.c +++ b/arch/arm/mach-ixp4xx/fsg-setup.c @@ -132,6 +132,22 @@ static struct platform_device fsg_leds = { }; /* Built-in 10/100 Ethernet MAC interfaces */ +static struct resource fsg_eth_npeb_resources[] = { + { + .start = IXP4XX_EthB_BASE_PHYS, + .end = IXP4XX_EthB_BASE_PHYS + 0x0fff, + .flags = IORESOURCE_MEM, + }, +}; + +static struct resource fsg_eth_npec_resources[] = { + { + .start = IXP4XX_EthC_BASE_PHYS, + .end = IXP4XX_EthC_BASE_PHYS + 0x0fff, + .flags = IORESOURCE_MEM, + }, +}; + static struct eth_plat_info fsg_plat_eth[] = { { .phy = 5, @@ -151,12 +167,16 @@ static struct platform_device fsg_eth[] = { .dev = { .platform_data = fsg_plat_eth, }, + .num_resources = ARRAY_SIZE(fsg_eth_npeb_resources), + .resource = fsg_eth_npeb_resources, }, { .name = "ixp4xx_eth", .id = IXP4XX_ETH_NPEC, .dev = { .platform_data = fsg_plat_eth + 1, }, + .num_resources = ARRAY_SIZE(fsg_eth_npec_resources), + .resource = fsg_eth_npec_resources, } }; diff --git a/arch/arm/mach-ixp4xx/goramo_mlr.c b/arch/arm/mach-ixp4xx/goramo_mlr.c index a0e0b6b7dc5c..07b50dfcc489 100644 --- a/arch/arm/mach-ixp4xx/goramo_mlr.c +++ b/arch/arm/mach-ixp4xx/goramo_mlr.c @@ -11,6 +11,7 @@ #include <linux/irq.h> #include <linux/kernel.h> #include <linux/pci.h> +#include <linux/platform_data/wan_ixp4xx_hss.h> #include <linux/serial_8250.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> @@ -272,6 +273,22 @@ static struct platform_device device_uarts = { /* Built-in 10/100 Ethernet MAC interfaces */ +static struct resource eth_npeb_resources[] = { + { + .start = IXP4XX_EthB_BASE_PHYS, + .end = IXP4XX_EthB_BASE_PHYS + 0x0fff, + .flags = IORESOURCE_MEM, + }, +}; + +static struct resource eth_npec_resources[] = { + { + .start = IXP4XX_EthC_BASE_PHYS, + .end = IXP4XX_EthC_BASE_PHYS + 0x0fff, + .flags = IORESOURCE_MEM, + }, +}; + static struct eth_plat_info eth_plat[] = { { .phy = 0, @@ -289,10 +306,14 @@ static struct platform_device device_eth_tab[] = { .name = "ixp4xx_eth", .id = IXP4XX_ETH_NPEB, .dev.platform_data = eth_plat, + .num_resources = ARRAY_SIZE(eth_npeb_resources), + .resource = eth_npeb_resources, }, { .name = "ixp4xx_eth", .id = IXP4XX_ETH_NPEC, .dev.platform_data = eth_plat + 1, + .num_resources = ARRAY_SIZE(eth_npec_resources), + .resource = eth_npec_resources, } }; @@ -405,6 +426,9 @@ static void __init gmlr_init(void) if (hw_bits & CFG_HW_HAS_HSS1) device_tab[devices++] = &device_hss_tab[1]; /* max index 5 */ + hss_plat[0].timer_freq = ixp4xx_timer_freq; + hss_plat[1].timer_freq = ixp4xx_timer_freq; + gpio_request(GPIO_SCL, "SCL/clock"); gpio_request(GPIO_SDA, "SDA/data"); gpio_request(GPIO_STR, "strobe"); diff --git a/arch/arm/mach-ixp4xx/include/mach/ixp46x_ts.h b/arch/arm/mach-ixp4xx/include/mach/ixp46x_ts.h deleted file mode 100644 index d792130e27b0..000000000000 --- a/arch/arm/mach-ixp4xx/include/mach/ixp46x_ts.h +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * PTP 1588 clock using the IXP46X - * - * Copyright (C) 2010 OMICRON electronics GmbH - */ - -#ifndef _IXP46X_TS_H_ -#define _IXP46X_TS_H_ - -#define DEFAULT_ADDEND 0xF0000029 -#define TICKS_NS_SHIFT 4 - -struct ixp46x_channel_ctl { - u32 ch_control; /* 0x40 Time Synchronization Channel Control */ - u32 ch_event; /* 0x44 Time Synchronization Channel Event */ - u32 tx_snap_lo; /* 0x48 Transmit Snapshot Low Register */ - u32 tx_snap_hi; /* 0x4C Transmit Snapshot High Register */ - u32 rx_snap_lo; /* 0x50 Receive Snapshot Low Register */ - u32 rx_snap_hi; /* 0x54 Receive Snapshot High Register */ - u32 src_uuid_lo; /* 0x58 Source UUID0 Low Register */ - u32 src_uuid_hi; /* 0x5C Sequence Identifier/Source UUID0 High */ -}; - -struct ixp46x_ts_regs { - u32 control; /* 0x00 Time Sync Control Register */ - u32 event; /* 0x04 Time Sync Event Register */ - u32 addend; /* 0x08 Time Sync Addend Register */ - u32 accum; /* 0x0C Time Sync Accumulator Register */ - u32 test; /* 0x10 Time Sync Test Register */ - u32 unused; /* 0x14 */ - u32 rsystime_lo; /* 0x18 RawSystemTime_Low Register */ - u32 rsystime_hi; /* 0x1C RawSystemTime_High Register */ - u32 systime_lo; /* 0x20 SystemTime_Low Register */ - u32 systime_hi; /* 0x24 SystemTime_High Register */ - u32 trgt_lo; /* 0x28 TargetTime_Low Register */ - u32 trgt_hi; /* 0x2C TargetTime_High Register */ - u32 asms_lo; /* 0x30 Auxiliary Slave Mode Snapshot Low */ - u32 asms_hi; /* 0x34 Auxiliary Slave Mode Snapshot High */ - u32 amms_lo; /* 0x38 Auxiliary Master Mode Snapshot Low */ - u32 amms_hi; /* 0x3C Auxiliary Master Mode Snapshot High */ - - struct ixp46x_channel_ctl channel[3]; -}; - -/* 0x00 Time Sync Control Register Bits */ -#define TSCR_AMM (1<<3) -#define TSCR_ASM (1<<2) -#define TSCR_TTM (1<<1) -#define TSCR_RST (1<<0) - -/* 0x04 Time Sync Event Register Bits */ -#define TSER_SNM (1<<3) -#define TSER_SNS (1<<2) -#define TTIPEND (1<<1) - -/* 0x40 Time Synchronization Channel Control Register Bits */ -#define MASTER_MODE (1<<0) -#define TIMESTAMP_ALL (1<<1) - -/* 0x44 Time Synchronization Channel Event Register Bits */ -#define TX_SNAPSHOT_LOCKED (1<<0) -#define RX_SNAPSHOT_LOCKED (1<<1) - -/* The ptp_ixp46x module will set this variable */ -extern int ixp46x_phc_index; - -#endif diff --git a/arch/arm/mach-ixp4xx/include/mach/platform.h b/arch/arm/mach-ixp4xx/include/mach/platform.h index 342acbe20f7c..6d403fe0bf52 100644 --- a/arch/arm/mach-ixp4xx/include/mach/platform.h +++ b/arch/arm/mach-ixp4xx/include/mach/platform.h @@ -15,6 +15,7 @@ #ifndef __ASSEMBLY__ #include <linux/reboot.h> +#include <linux/platform_data/eth_ixp4xx.h> #include <asm/types.h> @@ -92,27 +93,6 @@ struct ixp4xx_pata_data { void __iomem *cs1; }; -#define IXP4XX_ETH_NPEA 0x00 -#define IXP4XX_ETH_NPEB 0x10 -#define IXP4XX_ETH_NPEC 0x20 - -/* Information about built-in Ethernet MAC interfaces */ -struct eth_plat_info { - u8 phy; /* MII PHY ID, 0 - 31 */ - u8 rxq; /* configurable, currently 0 - 31 only */ - u8 txreadyq; - u8 hwaddr[6]; -}; - -/* Information about built-in HSS (synchronous serial) interfaces */ -struct hss_plat_info { - int (*set_clock)(int port, unsigned int clock_type); - int (*open)(int port, void *pdev, - void (*set_carrier_cb)(void *pdev, int carrier)); - void (*close)(int port, void *pdev); - u8 txreadyq; -}; - /* * Frequency of clock used for primary clocksource */ diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c index 6f0f7ed18ea8..45d5b720ded6 100644 --- a/arch/arm/mach-ixp4xx/ixdp425-setup.c +++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c @@ -187,6 +187,22 @@ static struct platform_device ixdp425_uart = { }; /* Built-in 10/100 Ethernet MAC interfaces */ +static struct resource ixp425_npeb_resources[] = { + { + .start = IXP4XX_EthB_BASE_PHYS, + .end = IXP4XX_EthB_BASE_PHYS + 0x0fff, + .flags = IORESOURCE_MEM, + }, +}; + +static struct resource ixp425_npec_resources[] = { + { + .start = IXP4XX_EthC_BASE_PHYS, + .end = IXP4XX_EthC_BASE_PHYS + 0x0fff, + .flags = IORESOURCE_MEM, + }, +}; + static struct eth_plat_info ixdp425_plat_eth[] = { { .phy = 0, @@ -204,10 +220,14 @@ static struct platform_device ixdp425_eth[] = { .name = "ixp4xx_eth", .id = IXP4XX_ETH_NPEB, .dev.platform_data = ixdp425_plat_eth, + .num_resources = ARRAY_SIZE(ixp425_npeb_resources), + .resource = ixp425_npeb_resources, }, { .name = "ixp4xx_eth", .id = IXP4XX_ETH_NPEC, .dev.platform_data = ixdp425_plat_eth + 1, + .num_resources = ARRAY_SIZE(ixp425_npec_resources), + .resource = ixp425_npec_resources, } }; diff --git a/arch/arm/mach-ixp4xx/nas100d-setup.c b/arch/arm/mach-ixp4xx/nas100d-setup.c index c142cfa8c5d6..6959ad2e3aec 100644 --- a/arch/arm/mach-ixp4xx/nas100d-setup.c +++ b/arch/arm/mach-ixp4xx/nas100d-setup.c @@ -165,6 +165,14 @@ static struct platform_device nas100d_uart = { }; /* Built-in 10/100 Ethernet MAC interfaces */ +static struct resource nas100d_eth_resources[] = { + { + .start = IXP4XX_EthB_BASE_PHYS, + .end = IXP4XX_EthB_BASE_PHYS + 0x0fff, + .flags = IORESOURCE_MEM, + }, +}; + static struct eth_plat_info nas100d_plat_eth[] = { { .phy = 0, @@ -178,6 +186,8 @@ static struct platform_device nas100d_eth[] = { .name = "ixp4xx_eth", .id = IXP4XX_ETH_NPEB, .dev.platform_data = nas100d_plat_eth, + .num_resources = ARRAY_SIZE(nas100d_eth_resources), + .resource = nas100d_eth_resources, } }; diff --git a/arch/arm/mach-ixp4xx/nslu2-setup.c b/arch/arm/mach-ixp4xx/nslu2-setup.c index ee1877fcfafe..a428bb918703 100644 --- a/arch/arm/mach-ixp4xx/nslu2-setup.c +++ b/arch/arm/mach-ixp4xx/nslu2-setup.c @@ -185,6 +185,14 @@ static struct platform_device nslu2_uart = { }; /* Built-in 10/100 Ethernet MAC interfaces */ +static struct resource nslu2_eth_resources[] = { + { + .start = IXP4XX_EthB_BASE_PHYS, + .end = IXP4XX_EthB_BASE_PHYS + 0x0fff, + .flags = IORESOURCE_MEM, + }, +}; + static struct eth_plat_info nslu2_plat_eth[] = { { .phy = 1, @@ -198,6 +206,8 @@ static struct platform_device nslu2_eth[] = { .name = "ixp4xx_eth", .id = IXP4XX_ETH_NPEB, .dev.platform_data = nslu2_plat_eth, + .num_resources = ARRAY_SIZE(nslu2_eth_resources), + .resource = nslu2_eth_resources, } }; diff --git a/arch/arm/mach-ixp4xx/omixp-setup.c b/arch/arm/mach-ixp4xx/omixp-setup.c index 6ed5a9aed600..8f2b8c473d7a 100644 --- a/arch/arm/mach-ixp4xx/omixp-setup.c +++ b/arch/arm/mach-ixp4xx/omixp-setup.c @@ -170,6 +170,22 @@ static struct platform_device mic256_leds = { }; /* Built-in 10/100 Ethernet MAC interfaces */ +static struct resource ixp425_npeb_resources[] = { + { + .start = IXP4XX_EthB_BASE_PHYS, + .end = IXP4XX_EthB_BASE_PHYS + 0x0fff, + .flags = IORESOURCE_MEM, + }, +}; + +static struct resource ixp425_npec_resources[] = { + { + .start = IXP4XX_EthC_BASE_PHYS, + .end = IXP4XX_EthC_BASE_PHYS + 0x0fff, + .flags = IORESOURCE_MEM, + }, +}; + static struct eth_plat_info ixdp425_plat_eth[] = { { .phy = 0, @@ -187,10 +203,14 @@ static struct platform_device ixdp425_eth[] = { .name = "ixp4xx_eth", .id = IXP4XX_ETH_NPEB, .dev.platform_data = ixdp425_plat_eth, + .num_resources = ARRAY_SIZE(ixp425_npeb_resources), + .resource = ixp425_npeb_resources, }, { .name = "ixp4xx_eth", .id = IXP4XX_ETH_NPEC, .dev.platform_data = ixdp425_plat_eth + 1, + .num_resources = ARRAY_SIZE(ixp425_npec_resources), + .resource = ixp425_npec_resources, }, }; diff --git a/arch/arm/mach-ixp4xx/vulcan-setup.c b/arch/arm/mach-ixp4xx/vulcan-setup.c index d2ebb7c675a8..e506d2af98ad 100644 --- a/arch/arm/mach-ixp4xx/vulcan-setup.c +++ b/arch/arm/mach-ixp4xx/vulcan-setup.c @@ -124,6 +124,22 @@ static struct platform_device vulcan_uart = { .num_resources = ARRAY_SIZE(vulcan_uart_resources), }; +static struct resource vulcan_npeb_resources[] = { + { + .start = IXP4XX_EthB_BASE_PHYS, + .end = IXP4XX_EthB_BASE_PHYS + 0x0fff, + .flags = IORESOURCE_MEM, + }, +}; + +static struct resource vulcan_npec_resources[] = { + { + .start = IXP4XX_EthC_BASE_PHYS, + .end = IXP4XX_EthC_BASE_PHYS + 0x0fff, + .flags = IORESOURCE_MEM, + }, +}; + static struct eth_plat_info vulcan_plat_eth[] = { [0] = { .phy = 0, @@ -144,6 +160,8 @@ static struct platform_device vulcan_eth[] = { .dev = { .platform_data = &vulcan_plat_eth[0], }, + .num_resources = ARRAY_SIZE(vulcan_npeb_resources), + .resource = vulcan_npeb_resources, }, [1] = { .name = "ixp4xx_eth", @@ -151,6 +169,8 @@ static struct platform_device vulcan_eth[] = { .dev = { .platform_data = &vulcan_plat_eth[1], }, + .num_resources = ARRAY_SIZE(vulcan_npec_resources), + .resource = vulcan_npec_resources, }, }; diff --git a/arch/arm/mach-mmp/pxa168.h b/arch/arm/mach-mmp/pxa168.h index 0331c58b07a2..dff651b9f252 100644 --- a/arch/arm/mach-mmp/pxa168.h +++ b/arch/arm/mach-mmp/pxa168.h @@ -17,9 +17,9 @@ extern void pxa168_clear_keypad_wakeup(void); #include <linux/platform_data/keypad-pxa27x.h> #include <linux/pxa168_eth.h> #include <linux/platform_data/mv_usb.h> +#include <linux/soc/mmp/cputype.h> #include "devices.h" -#include "cputype.h" extern struct pxa_device_desc pxa168_device_uart1; extern struct pxa_device_desc pxa168_device_uart2; diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c index 110dcb3314d1..c65cfc1ad99b 100644 --- a/arch/arm/mach-mmp/time.c +++ b/arch/arm/mach-mmp/time.c @@ -207,7 +207,7 @@ static int __init mmp_dt_init_timer(struct device_node *np) ret = clk_prepare_enable(clk); if (ret) return ret; - rate = clk_get_rate(clk) / 2; + rate = clk_get_rate(clk); } else if (cpu_is_pj4()) { rate = 6500000; } else { diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index ad08d470a2ca..dca7d06c0b93 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -95,6 +95,7 @@ config ARCH_OMAP2PLUS bool select ARCH_HAS_BANDGAP select ARCH_HAS_HOLES_MEMORYMODEL + select ARCH_HAS_RESET_CONTROLLER select ARCH_OMAP select CLKSRC_MMIO select GENERIC_IRQ_CHIP @@ -105,11 +106,11 @@ config ARCH_OMAP2PLUS select OMAP_DM_TIMER select OMAP_GPMC select PINCTRL + select RESET_CONTROLLER select SOC_BUS select TI_SYSC select OMAP_IRQCHIP select CLKSRC_TI_32K - select ARCH_HAS_RESET_CONTROLLER help Systems based on OMAP2, OMAP3, OMAP4 or OMAP5 diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index ca52271de5a8..e95c224ffc4d 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -306,10 +306,14 @@ static void __init dra7x_evm_mmc_quirk(void) static struct clockdomain *ti_sysc_find_one_clockdomain(struct clk *clk) { + struct clk_hw *hw = __clk_get_hw(clk); struct clockdomain *clkdm = NULL; struct clk_hw_omap *hwclk; - hwclk = to_clk_hw_omap(__clk_get_hw(clk)); + hwclk = to_clk_hw_omap(hw); + if (!omap2_clk_is_hw_omap(hw)) + return NULL; + if (hwclk && hwclk->clkdm_name) clkdm = clkdm_lookup(hwclk->clkdm_name); diff --git a/arch/arm/mach-pxa/colibri-pxa320.c b/arch/arm/mach-pxa/colibri-pxa320.c index eba917d69c0a..35dd3adb7712 100644 --- a/arch/arm/mach-pxa/colibri-pxa320.c +++ b/arch/arm/mach-pxa/colibri-pxa320.c @@ -11,9 +11,9 @@ #include <linux/init.h> #include <linux/kernel.h> #include <linux/platform_device.h> +#include <linux/gpio/machine.h> #include <linux/gpio.h> #include <linux/interrupt.h> -#include <linux/usb/gpio_vbus.h> #include <asm/mach-types.h> #include <linux/sizes.h> @@ -144,17 +144,18 @@ static inline void __init colibri_pxa320_init_eth(void) {} #endif /* CONFIG_AX88796 */ #if defined(CONFIG_USB_PXA27X)||defined(CONFIG_USB_PXA27X_MODULE) -static struct gpio_vbus_mach_info colibri_pxa320_gpio_vbus_info = { - .gpio_vbus = mfp_to_gpio(MFP_PIN_GPIO96), - .gpio_pullup = -1, +static struct gpiod_lookup_table gpio_vbus_gpiod_table = { + .dev_id = "gpio-vbus", + .table = { + GPIO_LOOKUP("gpio-pxa", MFP_PIN_GPIO96, + "vbus", GPIO_ACTIVE_HIGH), + { }, + }, }; static struct platform_device colibri_pxa320_gpio_vbus = { .name = "gpio-vbus", .id = -1, - .dev = { - .platform_data = &colibri_pxa320_gpio_vbus_info, - }, }; static void colibri_pxa320_udc_command(int cmd) @@ -173,6 +174,7 @@ static struct pxa2xx_udc_mach_info colibri_pxa320_udc_info __initdata = { static void __init colibri_pxa320_init_udc(void) { pxa_set_udc_info(&colibri_pxa320_udc_info); + gpiod_add_lookup_table(&gpio_vbus_gpiod_table); platform_device_register(&colibri_pxa320_gpio_vbus); } #else diff --git a/arch/arm/mach-pxa/eseries.c b/arch/arm/mach-pxa/eseries.c index 91f7c3e40065..f37c44b6139d 100644 --- a/arch/arm/mach-pxa/eseries.c +++ b/arch/arm/mach-pxa/eseries.c @@ -14,6 +14,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/clk-provider.h> +#include <linux/gpio/machine.h> #include <linux/gpio.h> #include <linux/delay.h> #include <linux/platform_device.h> @@ -22,7 +23,6 @@ #include <linux/mfd/t7l66xb.h> #include <linux/mtd/rawnand.h> #include <linux/mtd/partitions.h> -#include <linux/usb/gpio_vbus.h> #include <linux/memblock.h> #include <video/w100fb.h> @@ -51,18 +51,20 @@ void __init eseries_fixup(struct tag *tags, char **cmdline) memblock_add(0xa0000000, SZ_64M); } -struct gpio_vbus_mach_info e7xx_udc_info = { - .gpio_vbus = GPIO_E7XX_USB_DISC, - .gpio_pullup = GPIO_E7XX_USB_PULLUP, - .gpio_pullup_inverted = 1 +static struct gpiod_lookup_table e7xx_gpio_vbus_gpiod_table __maybe_unused = { + .dev_id = "gpio-vbus", + .table = { + GPIO_LOOKUP("gpio-pxa", GPIO_E7XX_USB_DISC, + "vbus", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-pxa", GPIO_E7XX_USB_PULLUP, + "pullup", GPIO_ACTIVE_LOW), + { }, + }, }; static struct platform_device e7xx_gpio_vbus __maybe_unused = { .name = "gpio-vbus", .id = -1, - .dev = { - .platform_data = &e7xx_udc_info, - }, }; struct pxaficp_platform_data e7xx_ficp_platform_data = { @@ -165,6 +167,7 @@ static void __init e330_init(void) pxa_set_stuart_info(NULL); eseries_register_clks(); eseries_get_tmio_gpios(); + gpiod_add_lookup_table(&e7xx_gpio_vbus_gpiod_table); platform_add_devices(ARRAY_AND_SIZE(e330_devices)); } @@ -216,6 +219,7 @@ static void __init e350_init(void) pxa_set_stuart_info(NULL); eseries_register_clks(); eseries_get_tmio_gpios(); + gpiod_add_lookup_table(&e7xx_gpio_vbus_gpiod_table); platform_add_devices(ARRAY_AND_SIZE(e350_devices)); } @@ -340,6 +344,7 @@ static void __init e400_init(void) eseries_register_clks(); eseries_get_tmio_gpios(); pxa_set_fb_info(NULL, &e400_pxafb_mach_info); + gpiod_add_lookup_table(&e7xx_gpio_vbus_gpiod_table); platform_add_devices(ARRAY_AND_SIZE(e400_devices)); } @@ -534,6 +539,7 @@ static void __init e740_init(void) clk_add_alias("CLK_CK48M", e740_t7l66xb_device.name, "UDCCLK", &pxa25x_device_udc.dev), eseries_get_tmio_gpios(); + gpiod_add_lookup_table(&e7xx_gpio_vbus_gpiod_table); platform_add_devices(ARRAY_AND_SIZE(e740_devices)); pxa_set_ac97_info(NULL); pxa_set_ficp_info(&e7xx_ficp_platform_data); @@ -733,6 +739,7 @@ static void __init e750_init(void) clk_add_alias("CLK_CK3P6MI", e750_tc6393xb_device.name, "GPIO11_CLK", NULL), eseries_get_tmio_gpios(); + gpiod_add_lookup_table(&e7xx_gpio_vbus_gpiod_table); platform_add_devices(ARRAY_AND_SIZE(e750_devices)); pxa_set_ac97_info(NULL); pxa_set_ficp_info(&e7xx_ficp_platform_data); @@ -888,18 +895,20 @@ static struct platform_device e800_fb_device = { /* --------------------------- UDC definitions --------------------------- */ -static struct gpio_vbus_mach_info e800_udc_info = { - .gpio_vbus = GPIO_E800_USB_DISC, - .gpio_pullup = GPIO_E800_USB_PULLUP, - .gpio_pullup_inverted = 1 +static struct gpiod_lookup_table e800_gpio_vbus_gpiod_table = { + .dev_id = "gpio-vbus", + .table = { + GPIO_LOOKUP("gpio-pxa", GPIO_E800_USB_DISC, + "vbus", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-pxa", GPIO_E800_USB_PULLUP, + "pullup", GPIO_ACTIVE_LOW), + { }, + }, }; static struct platform_device e800_gpio_vbus = { .name = "gpio-vbus", .id = -1, - .dev = { - .platform_data = &e800_udc_info, - }, }; @@ -949,6 +958,7 @@ static void __init e800_init(void) clk_add_alias("CLK_CK3P6MI", e800_tc6393xb_device.name, "GPIO11_CLK", NULL), eseries_get_tmio_gpios(); + gpiod_add_lookup_table(&e800_gpio_vbus_gpiod_table); platform_add_devices(ARRAY_AND_SIZE(e800_devices)); pxa_set_ac97_info(NULL); } diff --git a/arch/arm/mach-pxa/gumstix.c b/arch/arm/mach-pxa/gumstix.c index 4b4589cf431f..49dd618b10f7 100644 --- a/arch/arm/mach-pxa/gumstix.c +++ b/arch/arm/mach-pxa/gumstix.c @@ -20,10 +20,10 @@ #include <linux/delay.h> #include <linux/mtd/mtd.h> #include <linux/mtd/partitions.h> +#include <linux/gpio/machine.h> #include <linux/gpio.h> #include <linux/err.h> #include <linux/clk.h> -#include <linux/usb/gpio_vbus.h> #include <asm/setup.h> #include <asm/memory.h> @@ -101,21 +101,25 @@ static void __init gumstix_mmc_init(void) #endif #ifdef CONFIG_USB_PXA25X -static struct gpio_vbus_mach_info gumstix_udc_info = { - .gpio_vbus = GPIO_GUMSTIX_USB_GPIOn, - .gpio_pullup = GPIO_GUMSTIX_USB_GPIOx, +static struct gpiod_lookup_table gumstix_gpio_vbus_gpiod_table = { + .dev_id = "gpio-vbus", + .table = { + GPIO_LOOKUP("gpio-pxa", GPIO_GUMSTIX_USB_GPIOn, + "vbus", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-pxa", GPIO_GUMSTIX_USB_GPIOx, + "pullup", GPIO_ACTIVE_HIGH), + { }, + }, }; static struct platform_device gumstix_gpio_vbus = { .name = "gpio-vbus", .id = -1, - .dev = { - .platform_data = &gumstix_udc_info, - }, }; static void __init gumstix_udc_init(void) { + gpiod_add_lookup_table(&gumstix_gpio_vbus_gpiod_table); platform_device_register(&gumstix_gpio_vbus); } #else diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c index 311268d186ab..238a751a8797 100644 --- a/arch/arm/mach-pxa/hx4700.c +++ b/arch/arm/mach-pxa/hx4700.c @@ -34,7 +34,6 @@ #include <linux/spi/ads7846.h> #include <linux/spi/spi.h> #include <linux/spi/pxa2xx_spi.h> -#include <linux/usb/gpio_vbus.h> #include <linux/platform_data/i2c-pxa.h> #include <mach/hardware.h> @@ -578,18 +577,24 @@ static struct pwm_lookup hx4700_pwm_lookup[] = { * USB "Transceiver" */ -static struct gpio_vbus_mach_info gpio_vbus_info = { - .gpio_pullup = GPIO76_HX4700_USBC_PUEN, - .gpio_vbus = GPIOD14_nUSBC_DETECT, - .gpio_vbus_inverted = 1, +static struct gpiod_lookup_table gpio_vbus_gpiod_table = { + .dev_id = "gpio-vbus", + .table = { + /* This GPIO is on ASIC3 */ + GPIO_LOOKUP("asic3", + /* Convert to a local offset on the ASIC3 */ + GPIOD14_nUSBC_DETECT - HX4700_ASIC3_GPIO_BASE, + "vbus", GPIO_ACTIVE_LOW), + /* This one is on the primary SOC GPIO */ + GPIO_LOOKUP("gpio-pxa", GPIO76_HX4700_USBC_PUEN, + "pullup", GPIO_ACTIVE_HIGH), + { }, + }, }; static struct platform_device gpio_vbus = { .name = "gpio-vbus", .id = -1, - .dev = { - .platform_data = &gpio_vbus_info, - }, }; static struct pxa2xx_udc_mach_info hx4700_udc_info; @@ -883,6 +888,7 @@ static void __init hx4700_init(void) pxa_set_stuart_info(NULL); gpiod_add_lookup_table(&bq24022_gpiod_table); + gpiod_add_lookup_table(&gpio_vbus_gpiod_table); platform_add_devices(devices, ARRAY_SIZE(devices)); pwm_add_table(hx4700_pwm_lookup, ARRAY_SIZE(hx4700_pwm_lookup)); diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c index e1a394ac3eea..5d0591f93f4d 100644 --- a/arch/arm/mach-pxa/magician.c +++ b/arch/arm/mach-pxa/magician.c @@ -27,7 +27,6 @@ #include <linux/regulator/fixed.h> #include <linux/regulator/gpio-regulator.h> #include <linux/regulator/machine.h> -#include <linux/usb/gpio_vbus.h> #include <linux/platform_data/i2c-pxa.h> #include <mach/hardware.h> @@ -506,9 +505,20 @@ static struct resource gpio_vbus_resource = { .end = IRQ_MAGICIAN_VBUS, }; -static struct gpio_vbus_mach_info gpio_vbus_info = { - .gpio_pullup = GPIO27_MAGICIAN_USBC_PUEN, - .gpio_vbus = EGPIO_MAGICIAN_CABLE_VBUS, +static struct gpiod_lookup_table gpio_vbus_gpiod_table = { + .dev_id = "gpio-vbus", + .table = { + /* + * EGPIO on register 4 index 1, the second EGPIO chip + * starts at register 4 so this will be at index 1 on that + * chip. + */ + GPIO_LOOKUP("htc-egpio-1", 1, + "vbus", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-pxa", GPIO27_MAGICIAN_USBC_PUEN, + "pullup", GPIO_ACTIVE_HIGH), + { }, + }, }; static struct platform_device gpio_vbus = { @@ -516,9 +526,6 @@ static struct platform_device gpio_vbus = { .id = -1, .num_resources = 1, .resource = &gpio_vbus_resource, - .dev = { - .platform_data = &gpio_vbus_info, - }, }; /* @@ -1008,7 +1015,7 @@ static void __init magician_init(void) pxa_set_udc_info(&magician_udc_info); /* Check LCD type we have */ - cpld = ioremap_nocache(PXA_CS3_PHYS, 0x1000); + cpld = ioremap(PXA_CS3_PHYS, 0x1000); if (cpld) { u8 board_id = __raw_readb(cpld + 0x14); @@ -1032,6 +1039,7 @@ static void __init magician_init(void) ARRAY_SIZE(pwm_backlight_supply), 5000000); gpiod_add_lookup_table(&bq24022_gpiod_table); + gpiod_add_lookup_table(&gpio_vbus_gpiod_table); platform_add_devices(ARRAY_AND_SIZE(devices)); } diff --git a/arch/arm/mach-pxa/mioa701.c b/arch/arm/mach-pxa/mioa701.c index c360023a989c..0b8bae9610f1 100644 --- a/arch/arm/mach-pxa/mioa701.c +++ b/arch/arm/mach-pxa/mioa701.c @@ -24,7 +24,6 @@ #include <linux/power_supply.h> #include <linux/wm97xx.h> #include <linux/mtd/physmap.h> -#include <linux/usb/gpio_vbus.h> #include <linux/reboot.h> #include <linux/regulator/fixed.h> #include <linux/regulator/max1586.h> @@ -368,10 +367,13 @@ static struct pxa2xx_udc_mach_info mioa701_udc_info = { .gpio_pullup = GPIO22_USB_ENABLE, }; -struct gpio_vbus_mach_info gpio_vbus_data = { - .gpio_vbus = GPIO13_nUSB_DETECT, - .gpio_vbus_inverted = 1, - .gpio_pullup = -1, +static struct gpiod_lookup_table gpio_vbus_gpiod_table = { + .dev_id = "gpio-vbus", + .table = { + GPIO_LOOKUP("gpio-pxa", GPIO13_nUSB_DETECT, + "vbus", GPIO_ACTIVE_LOW), + { }, + }, }; /* @@ -677,7 +679,7 @@ MIO_SIMPLE_DEV(mioa701_led, "leds-gpio", &gpio_led_info) MIO_SIMPLE_DEV(pxa2xx_pcm, "pxa2xx-pcm", NULL) MIO_SIMPLE_DEV(mioa701_sound, "mioa701-wm9713", NULL) MIO_SIMPLE_DEV(mioa701_board, "mioa701-board", NULL) -MIO_SIMPLE_DEV(gpio_vbus, "gpio-vbus", &gpio_vbus_data); +MIO_SIMPLE_DEV(gpio_vbus, "gpio-vbus", NULL); static struct platform_device *devices[] __initdata = { &mioa701_gpio_keys, @@ -750,6 +752,7 @@ static void __init mioa701_machine_init(void) pxa_set_ac97_info(&mioa701_ac97_info); pm_power_off = mioa701_poweroff; pwm_add_table(mioa701_pwm_lookup, ARRAY_SIZE(mioa701_pwm_lookup)); + gpiod_add_lookup_table(&gpio_vbus_gpiod_table); platform_add_devices(devices, ARRAY_SIZE(devices)); gsm_init(); diff --git a/arch/arm/mach-pxa/palm27x.c b/arch/arm/mach-pxa/palm27x.c index 3ad0b3915ae1..b600b63af3a6 100644 --- a/arch/arm/mach-pxa/palm27x.c +++ b/arch/arm/mach-pxa/palm27x.c @@ -13,10 +13,10 @@ #include <linux/pda_power.h> #include <linux/pwm.h> #include <linux/pwm_backlight.h> +#include <linux/gpio/machine.h> #include <linux/gpio.h> #include <linux/wm97xx.h> #include <linux/power_supply.h> -#include <linux/usb/gpio_vbus.h> #include <linux/regulator/max1586.h> #include <linux/platform_data/i2c-pxa.h> @@ -159,32 +159,32 @@ void __init palm27x_lcd_init(int power, struct pxafb_mode_info *mode) ******************************************************************************/ #if defined(CONFIG_USB_PXA27X) || \ defined(CONFIG_USB_PXA27X_MODULE) -static struct gpio_vbus_mach_info palm27x_udc_info = { - .gpio_vbus_inverted = 1, + +/* The actual GPIO offsets get filled in in the palm27x_udc_init() call */ +static struct gpiod_lookup_table palm27x_udc_gpiod_table = { + .dev_id = "gpio-vbus", + .table = { + GPIO_LOOKUP("gpio-pxa", 0, + "vbus", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-pxa", 0, + "pullup", GPIO_ACTIVE_HIGH), + { }, + }, }; static struct platform_device palm27x_gpio_vbus = { .name = "gpio-vbus", .id = -1, - .dev = { - .platform_data = &palm27x_udc_info, - }, }; void __init palm27x_udc_init(int vbus, int pullup, int vbus_inverted) { - palm27x_udc_info.gpio_vbus = vbus; - palm27x_udc_info.gpio_pullup = pullup; - - palm27x_udc_info.gpio_vbus_inverted = vbus_inverted; - - if (!gpio_request(pullup, "USB Pullup")) { - gpio_direction_output(pullup, - palm27x_udc_info.gpio_vbus_inverted); - gpio_free(pullup); - } else - return; + palm27x_udc_gpiod_table.table[0].chip_hwnum = vbus; + palm27x_udc_gpiod_table.table[1].chip_hwnum = pullup; + if (vbus_inverted) + palm27x_udc_gpiod_table.table[0].flags = GPIO_ACTIVE_LOW; + gpiod_add_lookup_table(&palm27x_udc_gpiod_table); platform_device_register(&palm27x_gpio_vbus); } #endif diff --git a/arch/arm/mach-pxa/palmt5.c b/arch/arm/mach-pxa/palmt5.c index 902403367786..7c7cbb4e677e 100644 --- a/arch/arm/mach-pxa/palmt5.c +++ b/arch/arm/mach-pxa/palmt5.c @@ -23,7 +23,6 @@ #include <linux/gpio.h> #include <linux/wm97xx.h> #include <linux/power_supply.h> -#include <linux/usb/gpio_vbus.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> diff --git a/arch/arm/mach-pxa/palmtc.c b/arch/arm/mach-pxa/palmtc.c index f52bd155e825..fda9deaaae02 100644 --- a/arch/arm/mach-pxa/palmtc.c +++ b/arch/arm/mach-pxa/palmtc.c @@ -23,7 +23,6 @@ #include <linux/power_supply.h> #include <linux/gpio_keys.h> #include <linux/mtd/physmap.h> -#include <linux/usb/gpio_vbus.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> @@ -319,22 +318,25 @@ static inline void palmtc_mkp_init(void) {} * UDC ******************************************************************************/ #if defined(CONFIG_USB_PXA25X)||defined(CONFIG_USB_PXA25X_MODULE) -static struct gpio_vbus_mach_info palmtc_udc_info = { - .gpio_vbus = GPIO_NR_PALMTC_USB_DETECT_N, - .gpio_vbus_inverted = 1, - .gpio_pullup = GPIO_NR_PALMTC_USB_POWER, +static struct gpiod_lookup_table palmtc_udc_gpiod_table = { + .dev_id = "gpio-vbus", + .table = { + GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMTC_USB_DETECT_N, + "vbus", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMTC_USB_POWER, + "pullup", GPIO_ACTIVE_HIGH), + { }, + }, }; static struct platform_device palmtc_gpio_vbus = { .name = "gpio-vbus", .id = -1, - .dev = { - .platform_data = &palmtc_udc_info, - }, }; static void __init palmtc_udc_init(void) { + gpiod_add_lookup_table(&palmtc_udc_gpiod_table); platform_device_register(&palmtc_gpio_vbus); }; #else diff --git a/arch/arm/mach-pxa/palmte2.c b/arch/arm/mach-pxa/palmte2.c index a92b9665f425..7171014fd311 100644 --- a/arch/arm/mach-pxa/palmte2.c +++ b/arch/arm/mach-pxa/palmte2.c @@ -23,7 +23,6 @@ #include <linux/gpio.h> #include <linux/wm97xx.h> #include <linux/power_supply.h> -#include <linux/usb/gpio_vbus.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> @@ -201,18 +200,20 @@ static struct pxaficp_platform_data palmte2_ficp_platform_data = { /****************************************************************************** * UDC ******************************************************************************/ -static struct gpio_vbus_mach_info palmte2_udc_info = { - .gpio_vbus = GPIO_NR_PALMTE2_USB_DETECT_N, - .gpio_vbus_inverted = 1, - .gpio_pullup = GPIO_NR_PALMTE2_USB_PULLUP, +static struct gpiod_lookup_table palmte2_udc_gpiod_table = { + .dev_id = "gpio-vbus", + .table = { + GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMTE2_USB_DETECT_N, + "vbus", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("gpio-pxa", GPIO_NR_PALMTE2_USB_PULLUP, + "pullup", GPIO_ACTIVE_HIGH), + { }, + }, }; static struct platform_device palmte2_gpio_vbus = { .name = "gpio-vbus", .id = -1, - .dev = { - .platform_data = &palmte2_udc_info, - }, }; /****************************************************************************** @@ -368,6 +369,7 @@ static void __init palmte2_init(void) pxa_set_ficp_info(&palmte2_ficp_platform_data); pwm_add_table(palmte2_pwm_lookup, ARRAY_SIZE(palmte2_pwm_lookup)); + gpiod_add_lookup_table(&palmte2_udc_gpiod_table); platform_add_devices(devices, ARRAY_SIZE(devices)); } diff --git a/arch/arm/mach-pxa/palmtx.c b/arch/arm/mach-pxa/palmtx.c index 926593ecf1c9..07332c92c9f7 100644 --- a/arch/arm/mach-pxa/palmtx.c +++ b/arch/arm/mach-pxa/palmtx.c @@ -23,7 +23,6 @@ #include <linux/gpio.h> #include <linux/wm97xx.h> #include <linux/power_supply.h> -#include <linux/usb/gpio_vbus.h> #include <linux/mtd/platnand.h> #include <linux/mtd/mtd.h> #include <linux/mtd/physmap.h> diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c index 77fe2e367324..4df443943579 100644 --- a/arch/arm/mach-pxa/palmz72.c +++ b/arch/arm/mach-pxa/palmz72.c @@ -25,7 +25,6 @@ #include <linux/gpio.h> #include <linux/wm97xx.h> #include <linux/power_supply.h> -#include <linux/usb/gpio_vbus.h> #include <linux/platform_data/i2c-gpio.h> #include <linux/gpio/machine.h> diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c index 4e13893edeb9..3d2c108e911e 100644 --- a/arch/arm/mach-pxa/tosa.c +++ b/arch/arm/mach-pxa/tosa.c @@ -33,7 +33,6 @@ #include <linux/spi/pxa2xx_spi.h> #include <linux/input/matrix_keypad.h> #include <linux/platform_data/i2c-pxa.h> -#include <linux/usb/gpio_vbus.h> #include <linux/reboot.h> #include <linux/memblock.h> @@ -240,18 +239,20 @@ static struct scoop_pcmcia_config tosa_pcmcia_config = { /* * USB Device Controller */ -static struct gpio_vbus_mach_info tosa_udc_info = { - .gpio_pullup = TOSA_GPIO_USB_PULLUP, - .gpio_vbus = TOSA_GPIO_USB_IN, - .gpio_vbus_inverted = 1, +static struct gpiod_lookup_table tosa_udc_gpiod_table = { + .dev_id = "gpio-vbus", + .table = { + GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_USB_IN, + "vbus", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_USB_PULLUP, + "pullup", GPIO_ACTIVE_HIGH), + { }, + }, }; static struct platform_device tosa_gpio_vbus = { .name = "gpio-vbus", .id = -1, - .dev = { - .platform_data = &tosa_udc_info, - }, }; /* @@ -949,6 +950,7 @@ static void __init tosa_init(void) clk_add_alias("CLK_CK3P6MI", tc6393xb_device.name, "GPIO11_CLK", NULL); + gpiod_add_lookup_table(&tosa_udc_gpiod_table); platform_add_devices(devices, ARRAY_SIZE(devices)); } diff --git a/arch/arm/mach-pxa/vpac270.c b/arch/arm/mach-pxa/vpac270.c index 26a5ebc00069..14505e83479e 100644 --- a/arch/arm/mach-pxa/vpac270.c +++ b/arch/arm/mach-pxa/vpac270.c @@ -14,7 +14,6 @@ #include <linux/leds.h> #include <linux/gpio.h> #include <linux/gpio/machine.h> -#include <linux/usb/gpio_vbus.h> #include <linux/mtd/mtd.h> #include <linux/mtd/partitions.h> #include <linux/mtd/physmap.h> @@ -352,17 +351,18 @@ static inline void vpac270_uhc_init(void) {} * USB Gadget ******************************************************************************/ #if defined(CONFIG_USB_PXA27X)||defined(CONFIG_USB_PXA27X_MODULE) -static struct gpio_vbus_mach_info vpac270_gpio_vbus_info = { - .gpio_vbus = GPIO41_VPAC270_UDC_DETECT, - .gpio_pullup = -1, +static struct gpiod_lookup_table vpac270_gpio_vbus_gpiod_table = { + .dev_id = "gpio-vbus", + .table = { + GPIO_LOOKUP("gpio-pxa", GPIO41_VPAC270_UDC_DETECT, + "vbus", GPIO_ACTIVE_HIGH), + { }, + }, }; static struct platform_device vpac270_gpio_vbus = { .name = "gpio-vbus", .id = -1, - .dev = { - .platform_data = &vpac270_gpio_vbus_info, - }, }; static void vpac270_udc_command(int cmd) @@ -381,6 +381,7 @@ static struct pxa2xx_udc_mach_info vpac270_udc_info __initdata = { static void __init vpac270_udc_init(void) { pxa_set_udc_info(&vpac270_udc_info); + gpiod_add_lookup_table(&vpac270_gpio_vbus_gpiod_table); platform_device_register(&vpac270_gpio_vbus); } #else diff --git a/arch/arm/mach-s3c64xx/mach-smartq.c b/arch/arm/mach-s3c64xx/mach-smartq.c index 951208f168e7..829d5dbd69ee 100644 --- a/arch/arm/mach-s3c64xx/mach-smartq.c +++ b/arch/arm/mach-s3c64xx/mach-smartq.c @@ -13,7 +13,6 @@ #include <linux/serial_core.h> #include <linux/serial_s3c.h> #include <linux/spi/spi_gpio.h> -#include <linux/usb/gpio_vbus.h> #include <linux/platform_data/s3c-hsotg.h> #include <asm/mach-types.h> @@ -124,15 +123,16 @@ static struct s3c2410_hcd_info smartq_usb_host_info = { .enable_oc = smartq_usb_host_enableoc, }; -static struct gpio_vbus_mach_info smartq_usb_otg_vbus_pdata = { - .gpio_vbus = S3C64XX_GPL(9), - .gpio_pullup = -1, - .gpio_vbus_inverted = true, +static struct gpiod_lookup_table smartq_usb_otg_vbus_gpiod_table = { + .dev_id = "gpio-vbus", + .table = { + GPIO_LOOKUP("GPL", 9, "vbus", GPIO_ACTIVE_LOW), + { }, + }, }; static struct platform_device smartq_usb_otg_vbus_dev = { .name = "gpio-vbus", - .dev.platform_data = &smartq_usb_otg_vbus_pdata, }; static struct pwm_lookup smartq_pwm_lookup[] = { @@ -418,6 +418,7 @@ void __init smartq_machine_init(void) pwm_add_table(smartq_pwm_lookup, ARRAY_SIZE(smartq_pwm_lookup)); gpiod_add_lookup_table(&smartq_lcd_control_gpiod_table); + gpiod_add_lookup_table(&smartq_usb_otg_vbus_gpiod_table); platform_add_devices(smartq_devices, ARRAY_SIZE(smartq_devices)); gpiod_add_lookup_table(&smartq_audio_gpios); diff --git a/arch/arm/mach-shmobile/platsmp-apmu.c b/arch/arm/mach-shmobile/platsmp-apmu.c index 96330ef25641..e771ce70e132 100644 --- a/arch/arm/mach-shmobile/platsmp-apmu.c +++ b/arch/arm/mach-shmobile/platsmp-apmu.c @@ -189,7 +189,7 @@ static void apmu_init_cpu(struct resource *res, int cpu, int bit) if ((cpu >= ARRAY_SIZE(apmu_cpus)) || apmu_cpus[cpu].iomem) return; - apmu_cpus[cpu].iomem = ioremap_nocache(res->start, resource_size(res)); + apmu_cpus[cpu].iomem = ioremap(res->start, resource_size(res)); apmu_cpus[cpu].bit = bit; pr_debug("apmu ioremap %d %d %pr\n", cpu, bit, res); diff --git a/arch/arm/mach-shmobile/pm-rcar-gen2.c b/arch/arm/mach-shmobile/pm-rcar-gen2.c index e84599dd96f1..672081405a7e 100644 --- a/arch/arm/mach-shmobile/pm-rcar-gen2.c +++ b/arch/arm/mach-shmobile/pm-rcar-gen2.c @@ -103,7 +103,7 @@ map: iounmap(p); /* setup reset vectors */ - p = ioremap_nocache(RST, 0x63); + p = ioremap(RST, 0x63); bar = phys_to_sbar(res.start); if (has_a15) { writel_relaxed(bar, p + CA15BAR); diff --git a/arch/arm/mach-shmobile/setup-r8a7740.c b/arch/arm/mach-shmobile/setup-r8a7740.c index 787d039b5a07..f760c27c9907 100644 --- a/arch/arm/mach-shmobile/setup-r8a7740.c +++ b/arch/arm/mach-shmobile/setup-r8a7740.c @@ -28,7 +28,7 @@ static void __init r8a7740_meram_workaround(void) { void __iomem *reg; - reg = ioremap_nocache(MEBUFCNTR, 4); + reg = ioremap(MEBUFCNTR, 4); if (reg) { iowrite32(0x01600164, reg); iounmap(reg); @@ -37,9 +37,9 @@ static void __init r8a7740_meram_workaround(void) static void __init r8a7740_init_irq_of(void) { - void __iomem *intc_prio_base = ioremap_nocache(0xe6900010, 0x10); - void __iomem *intc_msk_base = ioremap_nocache(0xe6900040, 0x10); - void __iomem *pfc_inta_ctrl = ioremap_nocache(0xe605807c, 0x4); + void __iomem *intc_prio_base = ioremap(0xe6900010, 0x10); + void __iomem *intc_msk_base = ioremap(0xe6900040, 0x10); + void __iomem *pfc_inta_ctrl = ioremap(0xe605807c, 0x4); irqchip_init(); diff --git a/arch/arm/mach-shmobile/setup-r8a7778.c b/arch/arm/mach-shmobile/setup-r8a7778.c index ce51794f64c7..2bc93f391bcf 100644 --- a/arch/arm/mach-shmobile/setup-r8a7778.c +++ b/arch/arm/mach-shmobile/setup-r8a7778.c @@ -22,7 +22,7 @@ static void __init r8a7778_init_irq_dt(void) { - void __iomem *base = ioremap_nocache(0xfe700000, 0x00100000); + void __iomem *base = ioremap(0xfe700000, 0x00100000); BUG_ON(!base); diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c index a79fa3b0c8ed..a1694d977ec9 100644 --- a/arch/arm/mach-u300/core.c +++ b/arch/arm/mach-u300/core.c @@ -201,7 +201,7 @@ static unsigned long pin_highz_conf[] = { }; /* Pin control settings */ -static struct pinctrl_map __initdata u300_pinmux_map[] = { +static const struct pinctrl_map u300_pinmux_map[] = { /* anonymous maps for chip power and EMIFs */ PIN_MAP_MUX_GROUP_HOG_DEFAULT("pinctrl-u300", NULL, "power"), PIN_MAP_MUX_GROUP_HOG_DEFAULT("pinctrl-u300", NULL, "emif0"), diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c index 354e0e7025ae..1da11bdb1dfb 100644 --- a/arch/arm/mach-vexpress/spc.c +++ b/arch/arm/mach-vexpress/spc.c @@ -551,8 +551,9 @@ static struct clk *ve_spc_clk_register(struct device *cpu_dev) static int __init ve_spc_clk_init(void) { - int cpu; + int cpu, cluster; struct clk *clk; + bool init_opp_table[MAX_CLUSTERS] = { false }; if (!info) return 0; /* Continue only if SPC is initialised */ @@ -578,8 +579,17 @@ static int __init ve_spc_clk_init(void) continue; } + cluster = topology_physical_package_id(cpu_dev->id); + if (init_opp_table[cluster]) + continue; + if (ve_init_opp_table(cpu_dev)) pr_warn("failed to initialise cpu%d opp table\n", cpu); + else if (dev_pm_opp_set_sharing_cpus(cpu_dev, + topology_core_cpumask(cpu_dev->id))) + pr_warn("failed to mark OPPs shared for cpu%d\n", cpu); + else + init_opp_table[cluster] = true; } platform_device_register_simple("vexpress-spc-cpufreq", -1, NULL, 0); diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 788c5cf46de5..84718eddae60 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -162,12 +162,12 @@ static ssize_t alignment_proc_write(struct file *file, const char __user *buffer return count; } -static const struct file_operations alignment_proc_fops = { - .open = alignment_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = alignment_proc_write, +static const struct proc_ops alignment_proc_ops = { + .proc_open = alignment_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = alignment_proc_write, }; #endif /* CONFIG_PROC_FS */ @@ -1016,7 +1016,7 @@ static int __init alignment_init(void) struct proc_dir_entry *res; res = proc_create("cpu/alignment", S_IWUSR | S_IRUGO, NULL, - &alignment_proc_fops); + &alignment_proc_ops); if (!res) return -ENOMEM; #endif diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 0ee8fc4b4672..dc8f152f3556 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -135,13 +135,13 @@ flush_levels: and r1, r1, #7 @ mask of the bits for current cache only cmp r1, #2 @ see what cache we have at this level blt skip @ skip if no cache, or just i-cache -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic #endif mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr isb @ isb to sych the new cssr&csidr mrc p15, 1, r1, c0, c0, 0 @ read the new csidr -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION restore_irqs_notrace r9 #endif and r2, r1, #7 @ extract the length of the cache lines diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S index a0035c426ce6..1bc3a0a50753 100644 --- a/arch/arm/mm/cache-v7m.S +++ b/arch/arm/mm/cache-v7m.S @@ -183,13 +183,13 @@ flush_levels: and r1, r1, #7 @ mask of the bits for current cache only cmp r1, #2 @ see what cache we have at this level blt skip @ skip if no cache, or just i-cache -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic #endif write_csselr r10, r1 @ set current cache level isb @ isb to sych the new cssr&csidr read_ccsidr r1 @ read the new csidr -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION restore_irqs_notrace r9 #endif and r2, r1, #7 @ extract the length of the cache lines diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index e822af0d9219..9414d72f664b 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -221,7 +221,7 @@ EXPORT_SYMBOL(arm_coherent_dma_ops); static int __dma_supported(struct device *dev, u64 mask, bool warn) { - unsigned long max_dma_pfn = min(max_pfn, arm_dma_pfn_limit); + unsigned long max_dma_pfn = min(max_pfn - 1, arm_dma_pfn_limit); /* * Translate the device's DMA mask to a PFN limit. This diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 97dc386e3cb8..cc29869d12a3 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1260,12 +1260,9 @@ static inline void emit_push_r64(const s8 src[], struct jit_ctx *ctx) static void build_prologue(struct jit_ctx *ctx) { - const s8 r0 = bpf2a32[BPF_REG_0][1]; - const s8 r2 = bpf2a32[BPF_REG_1][1]; - const s8 r3 = bpf2a32[BPF_REG_1][0]; - const s8 r4 = bpf2a32[BPF_REG_6][1]; - const s8 fplo = bpf2a32[BPF_REG_FP][1]; - const s8 fphi = bpf2a32[BPF_REG_FP][0]; + const s8 arm_r0 = bpf2a32[BPF_REG_0][1]; + const s8 *bpf_r1 = bpf2a32[BPF_REG_1]; + const s8 *bpf_fp = bpf2a32[BPF_REG_FP]; const s8 *tcc = bpf2a32[TCALL_CNT]; /* Save callee saved registers. */ @@ -1278,8 +1275,10 @@ static void build_prologue(struct jit_ctx *ctx) emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx); emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx); #endif - /* Save frame pointer for later */ - emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx); + /* mov r3, #0 */ + /* sub r2, sp, #SCRATCH_SIZE */ + emit(ARM_MOV_I(bpf_r1[0], 0), ctx); + emit(ARM_SUB_I(bpf_r1[1], ARM_SP, SCRATCH_SIZE), ctx); ctx->stack_size = imm8m(STACK_SIZE); @@ -1287,18 +1286,15 @@ static void build_prologue(struct jit_ctx *ctx) emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx); /* Set up BPF prog stack base register */ - emit_a32_mov_r(fplo, ARM_IP, ctx); - emit_a32_mov_i(fphi, 0, ctx); + emit_a32_mov_r64(true, bpf_fp, bpf_r1, ctx); - /* mov r4, 0 */ - emit(ARM_MOV_I(r4, 0), ctx); + /* Initialize Tail Count */ + emit(ARM_MOV_I(bpf_r1[1], 0), ctx); + emit_a32_mov_r64(true, tcc, bpf_r1, ctx); /* Move BPF_CTX to BPF_R1 */ - emit(ARM_MOV_R(r3, r4), ctx); - emit(ARM_MOV_R(r2, r0), ctx); - /* Initialize Tail Count */ - emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[0])), ctx); - emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[1])), ctx); + emit(ARM_MOV_R(bpf_r1[1], arm_r0), ctx); + /* end of prologue */ } diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 6da7dc4d79cc..4d1cf74a2caa 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -449,3 +449,5 @@ 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open 435 common clone3 sys_clone3 +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 0fda344beb0b..1babb392e70a 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -14,7 +14,7 @@ targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.so.raw vdso.lds obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) ccflags-y := -fPIC -fno-common -fno-builtin -fno-stack-protector -ccflags-y += -DDISABLE_BRANCH_PROFILING +ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO32 ldflags-$(CONFIG_CPU_ENDIAN_BE8) := --be8 ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b1b4476ddb83..0b30e884e088 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -34,32 +34,32 @@ config ARM64 select ARCH_HAS_TEARDOWN_DMA_OPS if IOMMU_SUPPORT select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_NMI_SAFE_CMPXCHG - select ARCH_INLINE_READ_LOCK if !PREEMPT - select ARCH_INLINE_READ_LOCK_BH if !PREEMPT - select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPT - select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPT - select ARCH_INLINE_READ_UNLOCK if !PREEMPT - select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPT - select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPT - select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPT - select ARCH_INLINE_WRITE_LOCK if !PREEMPT - select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPT - select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPT - select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPT - select ARCH_INLINE_WRITE_UNLOCK if !PREEMPT - select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPT - select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPT - select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPT - select ARCH_INLINE_SPIN_TRYLOCK if !PREEMPT - select ARCH_INLINE_SPIN_TRYLOCK_BH if !PREEMPT - select ARCH_INLINE_SPIN_LOCK if !PREEMPT - select ARCH_INLINE_SPIN_LOCK_BH if !PREEMPT - select ARCH_INLINE_SPIN_LOCK_IRQ if !PREEMPT - select ARCH_INLINE_SPIN_LOCK_IRQSAVE if !PREEMPT - select ARCH_INLINE_SPIN_UNLOCK if !PREEMPT - select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPT - select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPT - select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPT + select ARCH_INLINE_READ_LOCK if !PREEMPTION + select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION + select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPTION + select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPTION + select ARCH_INLINE_READ_UNLOCK if !PREEMPTION + select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPTION + select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPTION + select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPTION + select ARCH_INLINE_WRITE_LOCK if !PREEMPTION + select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPTION + select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPTION + select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPTION + select ARCH_INLINE_WRITE_UNLOCK if !PREEMPTION + select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPTION + select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPTION + select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPTION + select ARCH_INLINE_SPIN_TRYLOCK if !PREEMPTION + select ARCH_INLINE_SPIN_TRYLOCK_BH if !PREEMPTION + select ARCH_INLINE_SPIN_LOCK if !PREEMPTION + select ARCH_INLINE_SPIN_LOCK_BH if !PREEMPTION + select ARCH_INLINE_SPIN_LOCK_IRQ if !PREEMPTION + select ARCH_INLINE_SPIN_LOCK_IRQSAVE if !PREEMPTION + select ARCH_INLINE_SPIN_UNLOCK if !PREEMPTION + select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPTION + select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION + select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION select ARCH_KEEP_MEMBLOCK select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_QUEUED_RWLOCKS @@ -69,6 +69,7 @@ config ARM64 select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG) select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT + select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) @@ -81,7 +82,7 @@ config ARM64 select ARM_GIC_V3 select ARM_GIC_V3_ITS if PCI select ARM_PSCI_FW - select BUILDTIME_EXTABLE_SORT + select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS select COMMON_CLK select CPU_PM if (SUSPEND || CPU_IDLE) @@ -103,6 +104,7 @@ config ARM64 select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_PCI_IOMAP + select GENERIC_PTDUMP select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_STRNCPY_FROM_USER @@ -138,6 +140,7 @@ config ARM64 select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL select HAVE_CONTEXT_TRACKING + select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS @@ -161,7 +164,8 @@ config ARM64 select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_FUNCTION_ARG_ACCESS_API - select HAVE_RCU_TABLE_FREE + select HAVE_FUTEX_CMPXCHG if FUTEX + select MMU_GATHER_RCU_TABLE_FREE select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS @@ -301,6 +305,9 @@ config ARCH_SUPPORTS_UPROBES config ARCH_PROC_KCORE_TEXT def_bool y +config BROKEN_GAS_INST + def_bool !$(as-instr,1:\n.inst 0\n.rept . - 1b\n\nnop\n.endr\n) + config KASAN_SHADOW_OFFSET hex depends on KASAN @@ -514,9 +521,13 @@ config ARM64_ERRATUM_1418040 If unsure, say Y. +config ARM64_WORKAROUND_SPECULATIVE_AT_VHE + bool + config ARM64_ERRATUM_1165522 bool "Cortex-A76: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" default y + select ARM64_WORKAROUND_SPECULATIVE_AT_VHE help This option adds a workaround for ARM Cortex-A76 erratum 1165522. @@ -526,6 +537,19 @@ config ARM64_ERRATUM_1165522 If unsure, say Y. +config ARM64_ERRATUM_1530923 + bool "Cortex-A55: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" + default y + select ARM64_WORKAROUND_SPECULATIVE_AT_VHE + help + This option adds a workaround for ARM Cortex-A55 erratum 1530923. + + Affected Cortex-A55 cores (r0p0, r0p1, r1p0, r2p0) could end-up with + corrupted TLBs by speculating an AT instruction during a guest + context switch. + + If unsure, say Y. + config ARM64_ERRATUM_1286807 bool "Cortex-A76: Modification of the translation table for a virtual address might lead to read-after-read ordering violation" default y @@ -542,9 +566,13 @@ config ARM64_ERRATUM_1286807 invalidated has been observed by other observers. The workaround repeats the TLBI+DSB operation. +config ARM64_WORKAROUND_SPECULATIVE_AT_NVHE + bool + config ARM64_ERRATUM_1319367 bool "Cortex-A57/A72: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" default y + select ARM64_WORKAROUND_SPECULATIVE_AT_NVHE help This option adds work arounds for ARM Cortex-A57 erratum 1319537 and A72 erratum 1319367 @@ -1363,6 +1391,11 @@ config ARM64_PAN instruction if the cpu does not implement the feature. config ARM64_LSE_ATOMICS + bool + default ARM64_USE_LSE_ATOMICS + depends on $(as-instr,.arch_extension lse) + +config ARM64_USE_LSE_ATOMICS bool "Atomic instructions" depends on JUMP_LABEL default y @@ -1484,6 +1517,30 @@ config ARM64_PTR_AUTH endmenu +menu "ARMv8.5 architectural features" + +config ARM64_E0PD + bool "Enable support for E0PD" + default y + help + E0PD (part of the ARMv8.5 extensions) allows us to ensure + that EL0 accesses made via TTBR1 always fault in constant time, + providing similar benefits to KASLR as those provided by KPTI, but + with lower overhead and without disrupting legitimate access to + kernel memory such as SPE. + + This option enables E0PD for TTBR1 where available. + +config ARCH_RANDOM + bool "Enable support for random number generation" + default y + help + Random number generation (part of the ARMv8.5 Extensions) + provides a high bandwidth, cryptographically secure + hardware random number generator. + +endmenu + config ARM64_SVE bool "ARM Scalable Vector Extension support" default y @@ -1544,7 +1601,7 @@ config ARM64_MODULE_PLTS config ARM64_PSEUDO_NMI bool "Support for NMI-like interrupts" - select CONFIG_ARM_GIC_V3 + select ARM_GIC_V3 help Adds support for mimicking Non-Maskable Interrupts through the use of GIC interrupt priority. This support requires version 3 or later of diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index cf09010d825f..1c906d932d6b 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -1,22 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -config ARM64_PTDUMP_CORE - def_bool n - -config ARM64_PTDUMP_DEBUGFS - bool "Export kernel pagetable layout to userspace via debugfs" - depends on DEBUG_KERNEL - select ARM64_PTDUMP_CORE - select DEBUG_FS - help - Say Y here if you want to show the kernel pagetable layout in a - debugfs file. This information is only useful for kernel developers - who are working in architecture specific areas of the kernel. - It is probably not a good idea to enable this feature in a production - kernel. - - If in doubt, say N. - config PID_IN_CONTEXTIDR bool "Write the current PID to the CONTEXTIDR register" help @@ -42,7 +25,7 @@ config ARM64_RANDOMIZE_TEXT_OFFSET config DEBUG_WX bool "Warn on W+X mappings at boot" - select ARM64_PTDUMP_CORE + select PTDUMP_CORE ---help--- Generate a warning if any W+X mappings are found at boot. diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 1fbe24d4fdb6..dca1a97751ab 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -30,11 +30,8 @@ LDFLAGS_vmlinux += --fix-cortex-a53-843419 endif endif -# Check for binutils support for specific extensions -lseinstr := $(call as-instr,.arch_extension lse,-DCONFIG_AS_LSE=1) - -ifeq ($(CONFIG_ARM64_LSE_ATOMICS), y) - ifeq ($(lseinstr),) +ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS), y) + ifneq ($(CONFIG_ARM64_LSE_ATOMICS), y) $(warning LSE atomics not supported by binutils) endif endif @@ -45,19 +42,15 @@ cc_has_k_constraint := $(call try-run,echo \ return 0; \ }' | $(CC) -S -x c -o "$$TMP" -,,-DCONFIG_CC_HAS_K_CONSTRAINT=1) -ifeq ($(CONFIG_ARM64), y) -brokengasinst := $(call as-instr,1:\n.inst 0\n.rept . - 1b\n\nnop\n.endr\n,,-DCONFIG_BROKEN_GAS_INST=1) - - ifneq ($(brokengasinst),) +ifeq ($(CONFIG_BROKEN_GAS_INST),y) $(warning Detected assembler with broken .inst; disassembly will be unreliable) - endif endif -KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) \ +KBUILD_CFLAGS += -mgeneral-regs-only \ $(compat_vdso) $(cc_has_k_constraint) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += $(call cc-disable-warning, psabi) -KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) $(compat_vdso) +KBUILD_AFLAGS += $(compat_vdso) KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) KBUILD_AFLAGS += $(call cc-option,-mabi=lp64) diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index 1f012c506434..cd3414898d10 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -16,7 +16,7 @@ OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S -targets := Image Image.gz +targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino-emmc.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino-emmc.dts index 96ab0227e82d..121e6cc4849b 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino-emmc.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino-emmc.dts @@ -15,7 +15,7 @@ pinctrl-names = "default"; pinctrl-0 = <&mmc2_pins>; vmmc-supply = <®_dcdc1>; - vqmmc-supply = <®_dcdc1>; + vqmmc-supply = <®_eldo1>; bus-width = <8>; non-removable; cap-mmc-hw-reset; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts index 01a9a52edae4..393c1948a495 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts @@ -140,7 +140,7 @@ &mmc1 { pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins>; - vmmc-supply = <®_aldo2>; + vmmc-supply = <®_dcdc1>; vqmmc-supply = <®_dldo4>; mmc-pwrseq = <&wifi_pwrseq>; bus-width = <4>; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index 144a2c19ac02..d1fc9c2055f4 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -61,10 +61,10 @@ pmu { compatible = "arm,armv8-pmuv3"; - interrupts = <0 120 8>, - <0 121 8>, - <0 122 8>, - <0 123 8>; + interrupts = <0 170 4>, + <0 171 4>, + <0 172 4>, + <0 173 4>; interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts index 5bd07469766b..a8bb3fa9fec9 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts +++ b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts @@ -46,25 +46,47 @@ }; gpio-keys { - compatible = "gpio-keys-polled"; - poll-interval = <100>; + compatible = "gpio-keys"; key1 { label = "A"; linux,code = <BTN_0>; gpios = <&gpio GPIOH_6 GPIO_ACTIVE_LOW>; + interrupt-parent = <&gpio_intc>; + interrupts = <34 IRQ_TYPE_EDGE_BOTH>; }; key2 { label = "B"; linux,code = <BTN_1>; gpios = <&gpio GPIOH_7 GPIO_ACTIVE_LOW>; + interrupt-parent = <&gpio_intc>; + interrupts = <35 IRQ_TYPE_EDGE_BOTH>; }; key3 { label = "C"; linux,code = <BTN_2>; gpios = <&gpio_ao GPIOAO_2 GPIO_ACTIVE_LOW>; + interrupt-parent = <&gpio_intc>; + interrupts = <2 IRQ_TYPE_EDGE_BOTH>; + }; + + mic_mute { + label = "MicMute"; + linux,code = <SW_MUTE_DEVICE>; + linux,input-type = <EV_SW>; + gpios = <&gpio_ao GPIOE_2 GPIO_ACTIVE_LOW>; + interrupt-parent = <&gpio_intc>; + interrupts = <99 IRQ_TYPE_EDGE_BOTH>; + }; + + power_key { + label = "PowerKey"; + linux,code = <KEY_POWER>; + gpios = <&gpio_ao GPIOAO_3 GPIO_ACTIVE_LOW>; + interrupt-parent = <&gpio_intc>; + interrupts = <3 IRQ_TYPE_EDGE_BOTH>; }; }; @@ -569,6 +591,8 @@ bluetooth { compatible = "brcm,bcm43438-bt"; + interrupt-parent = <&gpio_intc>; + interrupts = <95 IRQ_TYPE_LEVEL_HIGH>; shutdown-gpios = <&gpio GPIOX_17 GPIO_ACTIVE_HIGH>; max-speed = <2000000>; clocks = <&wifi32k>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index 8e8a77eb596a..a6f9b7784e8f 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -88,7 +88,7 @@ reboot { compatible ="syscon-reboot"; - regmap = <&dcfg>; + regmap = <&rst>; offset = <0xb0>; mask = <0x02>; }; @@ -175,7 +175,13 @@ dcfg: syscon@1e00000 { compatible = "fsl,ls1028a-dcfg", "syscon"; reg = <0x0 0x1e00000 0x0 0x10000>; - big-endian; + little-endian; + }; + + rst: syscon@1e60000 { + compatible = "syscon"; + reg = <0x0 0x1e60000 0x0 0x10000>; + little-endian; }; scfg: syscon@1fc0000 { @@ -584,7 +590,7 @@ 0x00010004 0x0000003d 0x00010005 0x00000045 0x00010006 0x0000004d - 0x00010007 0x00000045 + 0x00010007 0x00000055 0x00010008 0x0000005e 0x00010009 0x00000066 0x0001000a 0x0000006e diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index 6edbdfe2d0d7..3d95b66a2d71 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -740,7 +740,7 @@ reg = <0x30bd0000 0x10000>; interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MM_CLK_SDMA1_ROOT>, - <&clk IMX8MM_CLK_SDMA1_ROOT>; + <&clk IMX8MM_CLK_AHB>; clock-names = "ipg", "ahb"; #dma-cells = <3>; fsl,sdma-ram-script-name = "imx/sdma/sdma-imx7d.bin"; diff --git a/arch/arm64/boot/dts/freescale/imx8mq-librem5-devkit.dts b/arch/arm64/boot/dts/freescale/imx8mq-librem5-devkit.dts index 2a759dff9f87..596bc65f475c 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-librem5-devkit.dts +++ b/arch/arm64/boot/dts/freescale/imx8mq-librem5-devkit.dts @@ -421,7 +421,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_imu>; interrupt-parent = <&gpio3>; - interrupts = <19 IRQ_TYPE_LEVEL_LOW>; + interrupts = <19 IRQ_TYPE_LEVEL_HIGH>; vdd-supply = <®_3v3_p>; vddio-supply = <®_3v3_p>; }; diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi index 94090c6fb946..d43e1299c8ef 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi +++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi @@ -60,10 +60,10 @@ pmu { compatible = "arm,armv8-pmuv3"; - interrupts = <0 120 8>, - <0 121 8>, - <0 122 8>, - <0 123 8>; + interrupts = <0 170 4>, + <0 171 4>, + <0 172 4>, + <0 173 4>; interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, diff --git a/arch/arm64/boot/dts/rockchip/rk3328-a1.dts b/arch/arm64/boot/dts/rockchip/rk3328-a1.dts index 76b49f573101..16f1656d5203 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-a1.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-a1.dts @@ -49,7 +49,8 @@ ir-receiver { compatible = "gpio-ir-receiver"; - gpios = <&gpio2 RK_PA2 GPIO_ACTIVE_HIGH>; + gpios = <&gpio2 RK_PA2 GPIO_ACTIVE_LOW>; + linux,rc-map-name = "rc-beelink-gs1"; }; }; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 6a83ba2aea3e..b2f667307f82 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -442,6 +442,7 @@ CONFIG_ROCKCHIP_THERMAL=m CONFIG_RCAR_THERMAL=y CONFIG_RCAR_GEN3_THERMAL=y CONFIG_ARMADA_THERMAL=y +CONFIG_BCM2711_THERMAL=m CONFIG_BCM2835_THERMAL=m CONFIG_BRCMSTB_THERMAL=m CONFIG_EXYNOS_THERMAL=y diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S index 9add9bbc48d8..99a028e298ed 100644 --- a/arch/arm64/crypto/aes-ce-ccm-core.S +++ b/arch/arm64/crypto/aes-ce-ccm-core.S @@ -15,7 +15,7 @@ * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, * u32 *macp, u8 const rk[], u32 rounds); */ -ENTRY(ce_aes_ccm_auth_data) +SYM_FUNC_START(ce_aes_ccm_auth_data) ldr w8, [x3] /* leftover from prev round? */ ld1 {v0.16b}, [x0] /* load mac */ cbz w8, 1f @@ -81,13 +81,13 @@ ENTRY(ce_aes_ccm_auth_data) st1 {v0.16b}, [x0] 10: str w8, [x3] ret -ENDPROC(ce_aes_ccm_auth_data) +SYM_FUNC_END(ce_aes_ccm_auth_data) /* * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[], * u32 rounds); */ -ENTRY(ce_aes_ccm_final) +SYM_FUNC_START(ce_aes_ccm_final) ld1 {v3.4s}, [x2], #16 /* load first round key */ ld1 {v0.16b}, [x0] /* load mac */ cmp w3, #12 /* which key size? */ @@ -121,7 +121,7 @@ ENTRY(ce_aes_ccm_final) eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ st1 {v0.16b}, [x0] /* store result */ ret -ENDPROC(ce_aes_ccm_final) +SYM_FUNC_END(ce_aes_ccm_final) .macro aes_ccm_do_crypt,enc ldr x8, [x6, #8] /* load lower ctr */ @@ -212,10 +212,10 @@ CPU_LE( rev x8, x8 ) * u8 const rk[], u32 rounds, u8 mac[], * u8 ctr[]); */ -ENTRY(ce_aes_ccm_encrypt) +SYM_FUNC_START(ce_aes_ccm_encrypt) aes_ccm_do_crypt 1 -ENDPROC(ce_aes_ccm_encrypt) +SYM_FUNC_END(ce_aes_ccm_encrypt) -ENTRY(ce_aes_ccm_decrypt) +SYM_FUNC_START(ce_aes_ccm_decrypt) aes_ccm_do_crypt 0 -ENDPROC(ce_aes_ccm_decrypt) +SYM_FUNC_END(ce_aes_ccm_decrypt) diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index 541cf9165748..f6d19b0dc893 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -47,14 +47,8 @@ static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key, unsigned int key_len) { struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm); - int ret; - ret = ce_aes_expandkey(ctx, in_key, key_len); - if (!ret) - return 0; - - tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; + return ce_aes_expandkey(ctx, in_key, key_len); } static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) diff --git a/arch/arm64/crypto/aes-ce-core.S b/arch/arm64/crypto/aes-ce-core.S index 76a30fe4ba8b..e52e13eb8fdb 100644 --- a/arch/arm64/crypto/aes-ce-core.S +++ b/arch/arm64/crypto/aes-ce-core.S @@ -8,7 +8,7 @@ .arch armv8-a+crypto -ENTRY(__aes_ce_encrypt) +SYM_FUNC_START(__aes_ce_encrypt) sub w3, w3, #2 ld1 {v0.16b}, [x2] ld1 {v1.4s}, [x0], #16 @@ -34,9 +34,9 @@ ENTRY(__aes_ce_encrypt) eor v0.16b, v0.16b, v3.16b st1 {v0.16b}, [x1] ret -ENDPROC(__aes_ce_encrypt) +SYM_FUNC_END(__aes_ce_encrypt) -ENTRY(__aes_ce_decrypt) +SYM_FUNC_START(__aes_ce_decrypt) sub w3, w3, #2 ld1 {v0.16b}, [x2] ld1 {v1.4s}, [x0], #16 @@ -62,23 +62,23 @@ ENTRY(__aes_ce_decrypt) eor v0.16b, v0.16b, v3.16b st1 {v0.16b}, [x1] ret -ENDPROC(__aes_ce_decrypt) +SYM_FUNC_END(__aes_ce_decrypt) /* * __aes_ce_sub() - use the aese instruction to perform the AES sbox * substitution on each byte in 'input' */ -ENTRY(__aes_ce_sub) +SYM_FUNC_START(__aes_ce_sub) dup v1.4s, w0 movi v0.16b, #0 aese v0.16b, v1.16b umov w0, v0.s[0] ret -ENDPROC(__aes_ce_sub) +SYM_FUNC_END(__aes_ce_sub) -ENTRY(__aes_ce_invert) +SYM_FUNC_START(__aes_ce_invert) ld1 {v0.4s}, [x1] aesimc v1.16b, v0.16b st1 {v1.4s}, [x0] ret -ENDPROC(__aes_ce_invert) +SYM_FUNC_END(__aes_ce_invert) diff --git a/arch/arm64/crypto/aes-ce-glue.c b/arch/arm64/crypto/aes-ce-glue.c index 6d085dc56c51..56a5f6f0b0c1 100644 --- a/arch/arm64/crypto/aes-ce-glue.c +++ b/arch/arm64/crypto/aes-ce-glue.c @@ -143,14 +143,8 @@ int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); - int ret; - ret = ce_aes_expandkey(ctx, in_key, key_len); - if (!ret) - return 0; - - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; + return ce_aes_expandkey(ctx, in_key, key_len); } EXPORT_SYMBOL(ce_aes_setkey); diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S index c132c49c89a8..45062553467f 100644 --- a/arch/arm64/crypto/aes-ce.S +++ b/arch/arm64/crypto/aes-ce.S @@ -9,8 +9,8 @@ #include <linux/linkage.h> #include <asm/assembler.h> -#define AES_ENTRY(func) ENTRY(ce_ ## func) -#define AES_ENDPROC(func) ENDPROC(ce_ ## func) +#define AES_ENTRY(func) SYM_FUNC_START(ce_ ## func) +#define AES_ENDPROC(func) SYM_FUNC_END(ce_ ## func) .arch armv8-a+crypto diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S index 423d0aebc570..c9d6955f8404 100644 --- a/arch/arm64/crypto/aes-cipher-core.S +++ b/arch/arm64/crypto/aes-cipher-core.S @@ -122,11 +122,11 @@ CPU_BE( rev w7, w7 ) ret .endm -ENTRY(__aes_arm64_encrypt) +SYM_FUNC_START(__aes_arm64_encrypt) do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2 -ENDPROC(__aes_arm64_encrypt) +SYM_FUNC_END(__aes_arm64_encrypt) .align 5 -ENTRY(__aes_arm64_decrypt) +SYM_FUNC_START(__aes_arm64_decrypt) do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0 -ENDPROC(__aes_arm64_decrypt) +SYM_FUNC_END(__aes_arm64_decrypt) diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index aa57dc639f77..ed5409c6abf4 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -132,13 +132,8 @@ static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - int ret; - - ret = aes_expandkey(ctx, in_key, key_len); - if (ret) - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return ret; + return aes_expandkey(ctx, in_key, key_len); } static int __maybe_unused xts_set_key(struct crypto_skcipher *tfm, @@ -155,11 +150,7 @@ static int __maybe_unused xts_set_key(struct crypto_skcipher *tfm, if (!ret) ret = aes_expandkey(&ctx->key2, &in_key[key_len / 2], key_len / 2); - if (!ret) - return 0; - - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; + return ret; } static int __maybe_unused essiv_cbc_set_key(struct crypto_skcipher *tfm, @@ -173,19 +164,12 @@ static int __maybe_unused essiv_cbc_set_key(struct crypto_skcipher *tfm, ret = aes_expandkey(&ctx->key1, in_key, key_len); if (ret) - goto out; + return ret; desc->tfm = ctx->hash; crypto_shash_digest(desc, in_key, key_len, digest); - ret = aes_expandkey(&ctx->key2, digest, sizeof(digest)); - if (ret) - goto out; - - return 0; -out: - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; + return aes_expandkey(&ctx->key2, digest, sizeof(digest)); } static int __maybe_unused ecb_encrypt(struct skcipher_request *req) @@ -791,13 +775,8 @@ static int cbcmac_setkey(struct crypto_shash *tfm, const u8 *in_key, unsigned int key_len) { struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm); - int err; - err = aes_expandkey(&ctx->key, in_key, key_len); - if (err) - crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - - return err; + return aes_expandkey(&ctx->key, in_key, key_len); } static void cmac_gf128_mul_by_x(be128 *y, const be128 *x) diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 131618389f1f..8a2faa42b57e 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -22,26 +22,26 @@ #define ST5(x...) x #endif -aes_encrypt_block4x: +SYM_FUNC_START_LOCAL(aes_encrypt_block4x) encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 ret -ENDPROC(aes_encrypt_block4x) +SYM_FUNC_END(aes_encrypt_block4x) -aes_decrypt_block4x: +SYM_FUNC_START_LOCAL(aes_decrypt_block4x) decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 ret -ENDPROC(aes_decrypt_block4x) +SYM_FUNC_END(aes_decrypt_block4x) #if MAX_STRIDE == 5 -aes_encrypt_block5x: +SYM_FUNC_START_LOCAL(aes_encrypt_block5x) encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 ret -ENDPROC(aes_encrypt_block5x) +SYM_FUNC_END(aes_encrypt_block5x) -aes_decrypt_block5x: +SYM_FUNC_START_LOCAL(aes_decrypt_block5x) decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 ret -ENDPROC(aes_decrypt_block5x) +SYM_FUNC_END(aes_decrypt_block5x) #endif /* diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S index 22d9b110cf78..247d34ddaab0 100644 --- a/arch/arm64/crypto/aes-neon.S +++ b/arch/arm64/crypto/aes-neon.S @@ -8,8 +8,8 @@ #include <linux/linkage.h> #include <asm/assembler.h> -#define AES_ENTRY(func) ENTRY(neon_ ## func) -#define AES_ENDPROC(func) ENDPROC(neon_ ## func) +#define AES_ENTRY(func) SYM_FUNC_START(neon_ ## func) +#define AES_ENDPROC(func) SYM_FUNC_END(neon_ ## func) xtsmask .req v7 cbciv .req v7 diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S index 65982039fa36..b357164379f6 100644 --- a/arch/arm64/crypto/aes-neonbs-core.S +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -380,7 +380,7 @@ ISRM0: .octa 0x0306090c00070a0d01040b0e0205080f /* * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds) */ -ENTRY(aesbs_convert_key) +SYM_FUNC_START(aesbs_convert_key) ld1 {v7.4s}, [x1], #16 // load round 0 key ld1 {v17.4s}, [x1], #16 // load round 1 key @@ -425,10 +425,10 @@ ENTRY(aesbs_convert_key) eor v17.16b, v17.16b, v7.16b str q17, [x0] ret -ENDPROC(aesbs_convert_key) +SYM_FUNC_END(aesbs_convert_key) .align 4 -aesbs_encrypt8: +SYM_FUNC_START_LOCAL(aesbs_encrypt8) ldr q9, [bskey], #16 // round 0 key ldr q8, M0SR ldr q24, SR @@ -488,10 +488,10 @@ aesbs_encrypt8: eor v2.16b, v2.16b, v12.16b eor v5.16b, v5.16b, v12.16b ret -ENDPROC(aesbs_encrypt8) +SYM_FUNC_END(aesbs_encrypt8) .align 4 -aesbs_decrypt8: +SYM_FUNC_START_LOCAL(aesbs_decrypt8) lsl x9, rounds, #7 add bskey, bskey, x9 @@ -553,7 +553,7 @@ aesbs_decrypt8: eor v3.16b, v3.16b, v12.16b eor v5.16b, v5.16b, v12.16b ret -ENDPROC(aesbs_decrypt8) +SYM_FUNC_END(aesbs_decrypt8) /* * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, @@ -621,21 +621,21 @@ ENDPROC(aesbs_decrypt8) .endm .align 4 -ENTRY(aesbs_ecb_encrypt) +SYM_FUNC_START(aesbs_ecb_encrypt) __ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 -ENDPROC(aesbs_ecb_encrypt) +SYM_FUNC_END(aesbs_ecb_encrypt) .align 4 -ENTRY(aesbs_ecb_decrypt) +SYM_FUNC_START(aesbs_ecb_decrypt) __ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 -ENDPROC(aesbs_ecb_decrypt) +SYM_FUNC_END(aesbs_ecb_decrypt) /* * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, * int blocks, u8 iv[]) */ .align 4 -ENTRY(aesbs_cbc_decrypt) +SYM_FUNC_START(aesbs_cbc_decrypt) frame_push 6 mov x19, x0 @@ -720,7 +720,7 @@ ENTRY(aesbs_cbc_decrypt) 2: frame_pop ret -ENDPROC(aesbs_cbc_decrypt) +SYM_FUNC_END(aesbs_cbc_decrypt) .macro next_tweak, out, in, const, tmp sshr \tmp\().2d, \in\().2d, #63 @@ -736,7 +736,7 @@ ENDPROC(aesbs_cbc_decrypt) * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, * int blocks, u8 iv[]) */ -__xts_crypt8: +SYM_FUNC_START_LOCAL(__xts_crypt8) mov x6, #1 lsl x6, x6, x23 subs w23, w23, #8 @@ -789,7 +789,7 @@ __xts_crypt8: 0: mov bskey, x21 mov rounds, x22 br x7 -ENDPROC(__xts_crypt8) +SYM_FUNC_END(__xts_crypt8) .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 frame_push 6, 64 @@ -854,13 +854,13 @@ ENDPROC(__xts_crypt8) ret .endm -ENTRY(aesbs_xts_encrypt) +SYM_FUNC_START(aesbs_xts_encrypt) __xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 -ENDPROC(aesbs_xts_encrypt) +SYM_FUNC_END(aesbs_xts_encrypt) -ENTRY(aesbs_xts_decrypt) +SYM_FUNC_START(aesbs_xts_decrypt) __xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 -ENDPROC(aesbs_xts_decrypt) +SYM_FUNC_END(aesbs_xts_decrypt) .macro next_ctr, v mov \v\().d[1], x8 @@ -874,7 +874,7 @@ ENDPROC(aesbs_xts_decrypt) * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], * int rounds, int blocks, u8 iv[], u8 final[]) */ -ENTRY(aesbs_ctr_encrypt) +SYM_FUNC_START(aesbs_ctr_encrypt) frame_push 8 mov x19, x0 @@ -1002,4 +1002,4 @@ CPU_LE( rev x8, x8 ) 7: cbz x25, 8b st1 {v5.16b}, [x25] b 8b -ENDPROC(aesbs_ctr_encrypt) +SYM_FUNC_END(aesbs_ctr_encrypt) diff --git a/arch/arm64/crypto/chacha-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S index 706c4e10e9e2..e90386a7db8e 100644 --- a/arch/arm64/crypto/chacha-neon-core.S +++ b/arch/arm64/crypto/chacha-neon-core.S @@ -36,7 +36,7 @@ * * Clobbers: w3, x10, v4, v12 */ -chacha_permute: +SYM_FUNC_START_LOCAL(chacha_permute) adr_l x10, ROT8 ld1 {v12.4s}, [x10] @@ -104,9 +104,9 @@ chacha_permute: b.ne .Ldoubleround ret -ENDPROC(chacha_permute) +SYM_FUNC_END(chacha_permute) -ENTRY(chacha_block_xor_neon) +SYM_FUNC_START(chacha_block_xor_neon) // x0: Input state matrix, s // x1: 1 data block output, o // x2: 1 data block input, i @@ -143,9 +143,9 @@ ENTRY(chacha_block_xor_neon) ldp x29, x30, [sp], #16 ret -ENDPROC(chacha_block_xor_neon) +SYM_FUNC_END(chacha_block_xor_neon) -ENTRY(hchacha_block_neon) +SYM_FUNC_START(hchacha_block_neon) // x0: Input state matrix, s // x1: output (8 32-bit words) // w2: nrounds @@ -163,7 +163,7 @@ ENTRY(hchacha_block_neon) ldp x29, x30, [sp], #16 ret -ENDPROC(hchacha_block_neon) +SYM_FUNC_END(hchacha_block_neon) a0 .req w12 a1 .req w13 @@ -183,7 +183,7 @@ ENDPROC(hchacha_block_neon) a15 .req w28 .align 6 -ENTRY(chacha_4block_xor_neon) +SYM_FUNC_START(chacha_4block_xor_neon) frame_push 10 // x0: Input state matrix, s @@ -845,7 +845,7 @@ CPU_BE( rev a15, a15 ) eor v31.16b, v31.16b, v3.16b st1 {v28.16b-v31.16b}, [x1] b .Lout -ENDPROC(chacha_4block_xor_neon) +SYM_FUNC_END(chacha_4block_xor_neon) .section ".rodata", "a", %progbits .align L1_CACHE_SHIFT diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S index e545b42e6a46..5a95c2628fbf 100644 --- a/arch/arm64/crypto/crct10dif-ce-core.S +++ b/arch/arm64/crypto/crct10dif-ce-core.S @@ -131,7 +131,7 @@ tbl bd4.16b, {\bd\().16b}, perm4.16b .endm -__pmull_p8_core: +SYM_FUNC_START_LOCAL(__pmull_p8_core) .L__pmull_p8_core: ext t4.8b, ad.8b, ad.8b, #1 // A1 ext t5.8b, ad.8b, ad.8b, #2 // A2 @@ -194,7 +194,7 @@ __pmull_p8_core: eor t4.16b, t4.16b, t5.16b eor t6.16b, t6.16b, t3.16b ret -ENDPROC(__pmull_p8_core) +SYM_FUNC_END(__pmull_p8_core) .macro __pmull_p8, rq, ad, bd, i .ifnc \bd, fold_consts @@ -488,9 +488,9 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 ) // // Assumes len >= 16. // -ENTRY(crc_t10dif_pmull_p8) +SYM_FUNC_START(crc_t10dif_pmull_p8) crc_t10dif_pmull p8 -ENDPROC(crc_t10dif_pmull_p8) +SYM_FUNC_END(crc_t10dif_pmull_p8) .align 5 // @@ -498,9 +498,9 @@ ENDPROC(crc_t10dif_pmull_p8) // // Assumes len >= 16. // -ENTRY(crc_t10dif_pmull_p64) +SYM_FUNC_START(crc_t10dif_pmull_p64) crc_t10dif_pmull p64 -ENDPROC(crc_t10dif_pmull_p64) +SYM_FUNC_END(crc_t10dif_pmull_p64) .section ".rodata", "a" .align 4 diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index a791c4adf8e6..084c6a30b03a 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -350,13 +350,13 @@ CPU_LE( rev64 T1.16b, T1.16b ) * void pmull_ghash_update(int blocks, u64 dg[], const char *src, * struct ghash_key const *k, const char *head) */ -ENTRY(pmull_ghash_update_p64) +SYM_FUNC_START(pmull_ghash_update_p64) __pmull_ghash p64 -ENDPROC(pmull_ghash_update_p64) +SYM_FUNC_END(pmull_ghash_update_p64) -ENTRY(pmull_ghash_update_p8) +SYM_FUNC_START(pmull_ghash_update_p8) __pmull_ghash p8 -ENDPROC(pmull_ghash_update_p8) +SYM_FUNC_END(pmull_ghash_update_p8) KS0 .req v8 KS1 .req v9 diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 522cf004ce65..22831d3b7f62 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -248,10 +248,8 @@ static int ghash_setkey(struct crypto_shash *tfm, { struct ghash_key *key = crypto_shash_ctx(tfm); - if (keylen != GHASH_BLOCK_SIZE) { - crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + if (keylen != GHASH_BLOCK_SIZE) return -EINVAL; - } return __ghash_setkey(key, inkey, keylen); } @@ -259,7 +257,7 @@ static int ghash_setkey(struct crypto_shash *tfm, static struct shash_alg ghash_alg[] = {{ .base.cra_name = "ghash", .base.cra_driver_name = "ghash-neon", - .base.cra_priority = 100, + .base.cra_priority = 150, .base.cra_blocksize = GHASH_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct ghash_key), .base.cra_module = THIS_MODULE, @@ -306,10 +304,8 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey, int ret; ret = aes_expandkey(&ctx->aes_key, inkey, keylen); - if (ret) { - tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + if (ret) return -EINVAL; - } aes_encrypt(&ctx->aes_key, key, (u8[AES_BLOCK_SIZE]){}); diff --git a/arch/arm64/crypto/nh-neon-core.S b/arch/arm64/crypto/nh-neon-core.S index e05570c38de7..51c0a534ef87 100644 --- a/arch/arm64/crypto/nh-neon-core.S +++ b/arch/arm64/crypto/nh-neon-core.S @@ -62,7 +62,7 @@ * * It's guaranteed that message_len % 16 == 0. */ -ENTRY(nh_neon) +SYM_FUNC_START(nh_neon) ld1 {K0.4s,K1.4s}, [KEY], #32 movi PASS0_SUMS.2d, #0 @@ -100,4 +100,4 @@ ENTRY(nh_neon) addp T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d st1 {T0.16b,T1.16b}, [HASH] ret -ENDPROC(nh_neon) +SYM_FUNC_END(nh_neon) diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c index 83a2338a8826..e97b092f56b8 100644 --- a/arch/arm64/crypto/poly1305-glue.c +++ b/arch/arm64/crypto/poly1305-glue.c @@ -21,7 +21,7 @@ asmlinkage void poly1305_init_arm64(void *state, const u8 *key); asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit); asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_emit(void *state, __le32 *digest, const u32 *nonce); +asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce); static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); @@ -162,9 +162,6 @@ EXPORT_SYMBOL(poly1305_update_arch); void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) { - __le32 digest[4]; - u64 f = 0; - if (unlikely(dctx->buflen)) { dctx->buf[dctx->buflen++] = 1; memset(dctx->buf + dctx->buflen, 0, @@ -172,18 +169,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); } - poly1305_emit(&dctx->h, digest, dctx->s); - - /* mac = (h + s) % (2^128) */ - f = (f >> 32) + le32_to_cpu(digest[0]); - put_unaligned_le32(f, dst); - f = (f >> 32) + le32_to_cpu(digest[1]); - put_unaligned_le32(f, dst + 4); - f = (f >> 32) + le32_to_cpu(digest[2]); - put_unaligned_le32(f, dst + 8); - f = (f >> 32) + le32_to_cpu(digest[3]); - put_unaligned_le32(f, dst + 12); - + poly1305_emit(&dctx->h, dst, dctx->s); *dctx = (struct poly1305_desc_ctx){}; } EXPORT_SYMBOL(poly1305_final_arch); diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index c2ce1f820706..92d0d2753e81 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -65,7 +65,7 @@ * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, * int blocks) */ -ENTRY(sha1_ce_transform) +SYM_FUNC_START(sha1_ce_transform) frame_push 3 mov x19, x0 @@ -160,4 +160,4 @@ CPU_LE( rev32 v11.16b, v11.16b ) str dgb, [x19, #16] frame_pop ret -ENDPROC(sha1_ce_transform) +SYM_FUNC_END(sha1_ce_transform) diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index bdc1b6d7aff7..63c875d3314b 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -28,6 +28,13 @@ struct sha1_ce_state { asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, int blocks); +static void __sha1_ce_transform(struct sha1_state *sst, u8 const *src, + int blocks) +{ + sha1_ce_transform(container_of(sst, struct sha1_ce_state, sst), src, + blocks); +} + const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count); const u32 sha1_ce_offsetof_finalize = offsetof(struct sha1_ce_state, finalize); @@ -41,8 +48,7 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data, sctx->finalize = 0; kernel_neon_begin(); - sha1_base_do_update(desc, data, len, - (sha1_block_fn *)sha1_ce_transform); + sha1_base_do_update(desc, data, len, __sha1_ce_transform); kernel_neon_end(); return 0; @@ -64,10 +70,9 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, sctx->finalize = finalize; kernel_neon_begin(); - sha1_base_do_update(desc, data, len, - (sha1_block_fn *)sha1_ce_transform); + sha1_base_do_update(desc, data, len, __sha1_ce_transform); if (!finalize) - sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform); + sha1_base_do_finalize(desc, __sha1_ce_transform); kernel_neon_end(); return sha1_base_finish(desc, out); } @@ -81,7 +86,7 @@ static int sha1_ce_final(struct shash_desc *desc, u8 *out) sctx->finalize = 0; kernel_neon_begin(); - sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform); + sha1_base_do_finalize(desc, __sha1_ce_transform); kernel_neon_end(); return sha1_base_finish(desc, out); } diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 6f728a419009..3f9d0f326987 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -75,7 +75,7 @@ * int blocks) */ .text -ENTRY(sha2_ce_transform) +SYM_FUNC_START(sha2_ce_transform) frame_push 3 mov x19, x0 @@ -166,4 +166,4 @@ CPU_LE( rev32 v19.16b, v19.16b ) 4: st1 {dgav.4s, dgbv.4s}, [x19] frame_pop ret -ENDPROC(sha2_ce_transform) +SYM_FUNC_END(sha2_ce_transform) diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index 604a01a4ede6..a8e67bafba3d 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -28,6 +28,13 @@ struct sha256_ce_state { asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, int blocks); +static void __sha2_ce_transform(struct sha256_state *sst, u8 const *src, + int blocks) +{ + sha2_ce_transform(container_of(sst, struct sha256_ce_state, sst), src, + blocks); +} + const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state, sst.count); const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state, @@ -35,6 +42,12 @@ const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state, asmlinkage void sha256_block_data_order(u32 *digest, u8 const *src, int blocks); +static void __sha256_block_data_order(struct sha256_state *sst, u8 const *src, + int blocks) +{ + sha256_block_data_order(sst->state, src, blocks); +} + static int sha256_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { @@ -42,12 +55,11 @@ static int sha256_ce_update(struct shash_desc *desc, const u8 *data, if (!crypto_simd_usable()) return sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_block_data_order); + __sha256_block_data_order); sctx->finalize = 0; kernel_neon_begin(); - sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha2_ce_transform); + sha256_base_do_update(desc, data, len, __sha2_ce_transform); kernel_neon_end(); return 0; @@ -62,9 +74,8 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data, if (!crypto_simd_usable()) { if (len) sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_block_data_order); - sha256_base_do_finalize(desc, - (sha256_block_fn *)sha256_block_data_order); + __sha256_block_data_order); + sha256_base_do_finalize(desc, __sha256_block_data_order); return sha256_base_finish(desc, out); } @@ -75,11 +86,9 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data, sctx->finalize = finalize; kernel_neon_begin(); - sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha2_ce_transform); + sha256_base_do_update(desc, data, len, __sha2_ce_transform); if (!finalize) - sha256_base_do_finalize(desc, - (sha256_block_fn *)sha2_ce_transform); + sha256_base_do_finalize(desc, __sha2_ce_transform); kernel_neon_end(); return sha256_base_finish(desc, out); } @@ -89,14 +98,13 @@ static int sha256_ce_final(struct shash_desc *desc, u8 *out) struct sha256_ce_state *sctx = shash_desc_ctx(desc); if (!crypto_simd_usable()) { - sha256_base_do_finalize(desc, - (sha256_block_fn *)sha256_block_data_order); + sha256_base_do_finalize(desc, __sha256_block_data_order); return sha256_base_finish(desc, out); } sctx->finalize = 0; kernel_neon_begin(); - sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform); + sha256_base_do_finalize(desc, __sha2_ce_transform); kernel_neon_end(); return sha256_base_finish(desc, out); } diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c index e273faca924f..ddf4a0d85c1c 100644 --- a/arch/arm64/crypto/sha256-glue.c +++ b/arch/arm64/crypto/sha256-glue.c @@ -27,14 +27,26 @@ asmlinkage void sha256_block_data_order(u32 *digest, const void *data, unsigned int num_blks); EXPORT_SYMBOL(sha256_block_data_order); +static void __sha256_block_data_order(struct sha256_state *sst, u8 const *src, + int blocks) +{ + sha256_block_data_order(sst->state, src, blocks); +} + asmlinkage void sha256_block_neon(u32 *digest, const void *data, unsigned int num_blks); +static void __sha256_block_neon(struct sha256_state *sst, u8 const *src, + int blocks) +{ + sha256_block_neon(sst->state, src, blocks); +} + static int crypto_sha256_arm64_update(struct shash_desc *desc, const u8 *data, unsigned int len) { return sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_block_data_order); + __sha256_block_data_order); } static int crypto_sha256_arm64_finup(struct shash_desc *desc, const u8 *data, @@ -42,9 +54,8 @@ static int crypto_sha256_arm64_finup(struct shash_desc *desc, const u8 *data, { if (len) sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_block_data_order); - sha256_base_do_finalize(desc, - (sha256_block_fn *)sha256_block_data_order); + __sha256_block_data_order); + sha256_base_do_finalize(desc, __sha256_block_data_order); return sha256_base_finish(desc, out); } @@ -87,7 +98,7 @@ static int sha256_update_neon(struct shash_desc *desc, const u8 *data, if (!crypto_simd_usable()) return sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_block_data_order); + __sha256_block_data_order); while (len > 0) { unsigned int chunk = len; @@ -97,14 +108,13 @@ static int sha256_update_neon(struct shash_desc *desc, const u8 *data, * input when running on a preemptible kernel, but process the * data block by block instead. */ - if (IS_ENABLED(CONFIG_PREEMPT) && + if (IS_ENABLED(CONFIG_PREEMPTION) && chunk + sctx->count % SHA256_BLOCK_SIZE > SHA256_BLOCK_SIZE) chunk = SHA256_BLOCK_SIZE - sctx->count % SHA256_BLOCK_SIZE; kernel_neon_begin(); - sha256_base_do_update(desc, data, chunk, - (sha256_block_fn *)sha256_block_neon); + sha256_base_do_update(desc, data, chunk, __sha256_block_neon); kernel_neon_end(); data += chunk; len -= chunk; @@ -118,15 +128,13 @@ static int sha256_finup_neon(struct shash_desc *desc, const u8 *data, if (!crypto_simd_usable()) { if (len) sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_block_data_order); - sha256_base_do_finalize(desc, - (sha256_block_fn *)sha256_block_data_order); + __sha256_block_data_order); + sha256_base_do_finalize(desc, __sha256_block_data_order); } else { if (len) sha256_update_neon(desc, data, len); kernel_neon_begin(); - sha256_base_do_finalize(desc, - (sha256_block_fn *)sha256_block_neon); + sha256_base_do_finalize(desc, __sha256_block_neon); kernel_neon_end(); } return sha256_base_finish(desc, out); diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S index a7d587fa54f6..1cfb768df350 100644 --- a/arch/arm64/crypto/sha3-ce-core.S +++ b/arch/arm64/crypto/sha3-ce-core.S @@ -40,7 +40,7 @@ * sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size) */ .text -ENTRY(sha3_ce_transform) +SYM_FUNC_START(sha3_ce_transform) frame_push 4 mov x19, x0 @@ -218,7 +218,7 @@ ENTRY(sha3_ce_transform) st1 {v24.1d}, [x19] frame_pop ret -ENDPROC(sha3_ce_transform) +SYM_FUNC_END(sha3_ce_transform) .section ".rodata", "a" .align 8 diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S index ce65e3abe4f2..cde606c0323e 100644 --- a/arch/arm64/crypto/sha512-ce-core.S +++ b/arch/arm64/crypto/sha512-ce-core.S @@ -106,7 +106,7 @@ * int blocks) */ .text -ENTRY(sha512_ce_transform) +SYM_FUNC_START(sha512_ce_transform) frame_push 3 mov x19, x0 @@ -216,4 +216,4 @@ CPU_LE( rev64 v19.16b, v19.16b ) 3: st1 {v8.2d-v11.2d}, [x19] frame_pop ret -ENDPROC(sha512_ce_transform) +SYM_FUNC_END(sha512_ce_transform) diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c index 2369540040aa..dc890a719f54 100644 --- a/arch/arm64/crypto/sha512-ce-glue.c +++ b/arch/arm64/crypto/sha512-ce-glue.c @@ -29,16 +29,21 @@ asmlinkage void sha512_ce_transform(struct sha512_state *sst, u8 const *src, asmlinkage void sha512_block_data_order(u64 *digest, u8 const *src, int blocks); +static void __sha512_block_data_order(struct sha512_state *sst, u8 const *src, + int blocks) +{ + sha512_block_data_order(sst->state, src, blocks); +} + static int sha512_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { if (!crypto_simd_usable()) return sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_block_data_order); + __sha512_block_data_order); kernel_neon_begin(); - sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_ce_transform); + sha512_base_do_update(desc, data, len, sha512_ce_transform); kernel_neon_end(); return 0; @@ -50,16 +55,14 @@ static int sha512_ce_finup(struct shash_desc *desc, const u8 *data, if (!crypto_simd_usable()) { if (len) sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_block_data_order); - sha512_base_do_finalize(desc, - (sha512_block_fn *)sha512_block_data_order); + __sha512_block_data_order); + sha512_base_do_finalize(desc, __sha512_block_data_order); return sha512_base_finish(desc, out); } kernel_neon_begin(); - sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_ce_transform); - sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_ce_transform); + sha512_base_do_update(desc, data, len, sha512_ce_transform); + sha512_base_do_finalize(desc, sha512_ce_transform); kernel_neon_end(); return sha512_base_finish(desc, out); } @@ -67,13 +70,12 @@ static int sha512_ce_finup(struct shash_desc *desc, const u8 *data, static int sha512_ce_final(struct shash_desc *desc, u8 *out) { if (!crypto_simd_usable()) { - sha512_base_do_finalize(desc, - (sha512_block_fn *)sha512_block_data_order); + sha512_base_do_finalize(desc, __sha512_block_data_order); return sha512_base_finish(desc, out); } kernel_neon_begin(); - sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_ce_transform); + sha512_base_do_finalize(desc, sha512_ce_transform); kernel_neon_end(); return sha512_base_finish(desc, out); } diff --git a/arch/arm64/crypto/sha512-glue.c b/arch/arm64/crypto/sha512-glue.c index d915c656e5fe..78d3083de6b7 100644 --- a/arch/arm64/crypto/sha512-glue.c +++ b/arch/arm64/crypto/sha512-glue.c @@ -20,15 +20,21 @@ MODULE_LICENSE("GPL v2"); MODULE_ALIAS_CRYPTO("sha384"); MODULE_ALIAS_CRYPTO("sha512"); -asmlinkage void sha512_block_data_order(u32 *digest, const void *data, +asmlinkage void sha512_block_data_order(u64 *digest, const void *data, unsigned int num_blks); EXPORT_SYMBOL(sha512_block_data_order); +static void __sha512_block_data_order(struct sha512_state *sst, u8 const *src, + int blocks) +{ + sha512_block_data_order(sst->state, src, blocks); +} + static int sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len) { return sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_block_data_order); + __sha512_block_data_order); } static int sha512_finup(struct shash_desc *desc, const u8 *data, @@ -36,9 +42,8 @@ static int sha512_finup(struct shash_desc *desc, const u8 *data, { if (len) sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_block_data_order); - sha512_base_do_finalize(desc, - (sha512_block_fn *)sha512_block_data_order); + __sha512_block_data_order); + sha512_base_do_finalize(desc, __sha512_block_data_order); return sha512_base_finish(desc, out); } diff --git a/arch/arm64/crypto/sm3-ce-core.S b/arch/arm64/crypto/sm3-ce-core.S index d50d187906cb..ef97d3187cb7 100644 --- a/arch/arm64/crypto/sm3-ce-core.S +++ b/arch/arm64/crypto/sm3-ce-core.S @@ -73,7 +73,7 @@ * int blocks) */ .text -ENTRY(sm3_ce_transform) +SYM_FUNC_START(sm3_ce_transform) /* load state */ ld1 {v8.4s-v9.4s}, [x0] rev64 v8.4s, v8.4s @@ -131,7 +131,7 @@ CPU_LE( rev32 v3.16b, v3.16b ) ext v9.16b, v9.16b, v9.16b, #8 st1 {v8.4s-v9.4s}, [x0] ret -ENDPROC(sm3_ce_transform) +SYM_FUNC_END(sm3_ce_transform) .section ".rodata", "a" .align 3 diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S index af3bfbc3f4d4..4ac6cfbc5797 100644 --- a/arch/arm64/crypto/sm4-ce-core.S +++ b/arch/arm64/crypto/sm4-ce-core.S @@ -15,7 +15,7 @@ * void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in); */ .text -ENTRY(sm4_ce_do_crypt) +SYM_FUNC_START(sm4_ce_do_crypt) ld1 {v8.4s}, [x2] ld1 {v0.4s-v3.4s}, [x0], #64 CPU_LE( rev32 v8.16b, v8.16b ) @@ -33,4 +33,4 @@ CPU_LE( rev32 v8.16b, v8.16b ) CPU_LE( rev32 v8.16b, v8.16b ) st1 {v8.4s}, [x1] ret -ENDPROC(sm4_ce_do_crypt) +SYM_FUNC_END(sm4_ce_do_crypt) diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index b9f8d787eea9..324e7d5ab37e 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -35,13 +35,16 @@ void apply_alternatives_module(void *start, size_t length); static inline void apply_alternatives_module(void *start, size_t length) { } #endif -#define ALTINSTR_ENTRY(feature,cb) \ +#define ALTINSTR_ENTRY(feature) \ " .word 661b - .\n" /* label */ \ - " .if " __stringify(cb) " == 0\n" \ " .word 663f - .\n" /* new instruction */ \ - " .else\n" \ + " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .byte 662b-661b\n" /* source len */ \ + " .byte 664f-663f\n" /* replacement len */ + +#define ALTINSTR_ENTRY_CB(feature, cb) \ + " .word 661b - .\n" /* label */ \ " .word " __stringify(cb) "- .\n" /* callback */ \ - " .endif\n" \ " .hword " __stringify(feature) "\n" /* feature bit */ \ " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ @@ -62,15 +65,14 @@ static inline void apply_alternatives_module(void *start, size_t length) { } * * Alternatives with callbacks do not generate replacement instructions. */ -#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \ +#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature,cb) \ + ALTINSTR_ENTRY(feature) \ ".popsection\n" \ - " .if " __stringify(cb) " == 0\n" \ ".pushsection .altinstr_replacement, \"a\"\n" \ "663:\n\t" \ newinstr "\n" \ @@ -78,17 +80,25 @@ static inline void apply_alternatives_module(void *start, size_t length) { } ".popsection\n\t" \ ".org . - (664b-663b) + (662b-661b)\n\t" \ ".org . - (662b-661b) + (664b-663b)\n" \ - ".else\n\t" \ + ".endif\n" + +#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \ + ".if "__stringify(cfg_enabled)" == 1\n" \ + "661:\n\t" \ + oldinstr "\n" \ + "662:\n" \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY_CB(feature, cb) \ + ".popsection\n" \ "663:\n\t" \ "664:\n\t" \ - ".endif\n" \ ".endif\n" #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ - __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0) + __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) #define ALTERNATIVE_CB(oldinstr, cb) \ - __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb) + __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb) #else #include <asm/assembler.h> diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 89e4c8b79349..4750fc8030c3 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -141,6 +141,7 @@ static inline u32 gic_read_rpr(void) #define gicr_read_pendbaser(c) readq_relaxed(c) #define gits_write_vpropbaser(v, c) writeq_relaxed(v, c) +#define gits_read_vpropbaser(c) readq_relaxed(c) #define gits_write_vpendbaser(v, c) writeq_relaxed(v, c) #define gits_read_vpendbaser(c) readq_relaxed(c) diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h new file mode 100644 index 000000000000..3fe02da70004 --- /dev/null +++ b/arch/arm64/include/asm/archrandom.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ARCHRANDOM_H +#define _ASM_ARCHRANDOM_H + +#ifdef CONFIG_ARCH_RANDOM + +#include <linux/random.h> +#include <asm/cpufeature.h> + +static inline bool __arm64_rndr(unsigned long *v) +{ + bool ok; + + /* + * Reads of RNDR set PSTATE.NZCV to 0b0000 on success, + * and set PSTATE.NZCV to 0b0100 otherwise. + */ + asm volatile( + __mrs_s("%0", SYS_RNDR_EL0) "\n" + " cset %w1, ne\n" + : "=r" (*v), "=r" (ok) + : + : "cc"); + + return ok; +} + +static inline bool __must_check arch_get_random_long(unsigned long *v) +{ + return false; +} + +static inline bool __must_check arch_get_random_int(unsigned int *v) +{ + return false; +} + +static inline bool __must_check arch_get_random_seed_long(unsigned long *v) +{ + /* + * Only support the generic interface after we have detected + * the system wide capability, avoiding complexity with the + * cpufeature code and with potential scheduling between CPUs + * with and without the feature. + */ + if (!cpus_have_const_cap(ARM64_HAS_RNG)) + return false; + + return __arm64_rndr(v); +} + + +static inline bool __must_check arch_get_random_seed_int(unsigned int *v) +{ + unsigned long val; + bool ok = arch_get_random_seed_long(&val); + + *v = val; + return ok; +} + +static inline bool __init __early_cpu_has_rndr(void) +{ + /* Open code as we run prior to the first call to cpufeature. */ + unsigned long ftr = read_sysreg_s(SYS_ID_AA64ISAR0_EL1); + return (ftr >> ID_AA64ISAR0_RNDR_SHIFT) & 0xf; +} + +#else + +static inline bool __arm64_rndr(unsigned long *v) { return false; } +static inline bool __init __early_cpu_has_rndr(void) { return false; } + +#endif /* CONFIG_ARCH_RANDOM */ +#endif /* _ASM_ARCHRANDOM_H */ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index b8cf7c85ffa2..aca337d79d12 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -40,12 +40,6 @@ msr daif, \flags .endm - /* Only on aarch64 pstate, PSR_D_BIT is different for aarch32 */ - .macro inherit_daif, pstate:req, tmp:req - and \tmp, \pstate, #(PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) - msr daif, \tmp - .endm - /* IRQ is the lowest priority flag, unconditionally unmask the rest. */ .macro enable_da_f msr daifclr, #(8 | 4 | 1) @@ -86,13 +80,6 @@ .endm /* - * SMP data memory barrier - */ - .macro smp_dmb, opt - dmb \opt - .endm - -/* * RAS Error Synchronization barrier */ .macro esb @@ -462,17 +449,6 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .endm /* - * Annotate a function as position independent, i.e., safe to be called before - * the kernel virtual mapping is activated. - */ -#define ENDPIPROC(x) \ - .globl __pi_##x; \ - .type __pi_##x, %function; \ - .set __pi_##x, x; \ - .size __pi_##x, . - x; \ - ENDPROC(x) - -/* * Annotate a function as being unsuitable for kprobes. */ #ifdef CONFIG_KPROBES @@ -699,8 +675,8 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU * where <label> is optional, and marks the point where execution will resume * after a yield has been performed. If omitted, execution resumes right after * the endif_yield_neon invocation. Note that the entire sequence, including - * the provided patchup code, will be omitted from the image if CONFIG_PREEMPT - * is not defined. + * the provided patchup code, will be omitted from the image if + * CONFIG_PREEMPTION is not defined. * * As a convenience, in the case where no patchup code is required, the above * sequence may be abbreviated to @@ -728,7 +704,7 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .endm .macro if_will_cond_yield_neon -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION get_current_task x0 ldr x0, [x0, #TSK_TI_PREEMPT] sub x0, x0, #PREEMPT_DISABLE_OFFSET diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index 7b012148bfd6..13869b76b58c 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -12,7 +12,7 @@ #include <linux/stringify.h> -#if IS_ENABLED(CONFIG_ARM64_LSE_ATOMICS) && IS_ENABLED(CONFIG_AS_LSE) +#ifdef CONFIG_ARM64_LSE_ATOMICS #define __LL_SC_FALLBACK(asm_ops) \ " b 3f\n" \ " .subsection 1\n" \ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 574808b9df4c..da3280f639cd 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -14,6 +14,7 @@ static inline void __lse_atomic_##op(int i, atomic_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " " #asm_op " %w[i], %[v]\n" \ : [i] "+r" (i), [v] "+Q" (v->counter) \ : "r" (v)); \ @@ -30,6 +31,7 @@ ATOMIC_OP(add, stadd) static inline int __lse_atomic_fetch_##op##name(int i, atomic_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " " #asm_op #mb " %w[i], %w[i], %[v]" \ : [i] "+r" (i), [v] "+Q" (v->counter) \ : "r" (v) \ @@ -58,6 +60,7 @@ static inline int __lse_atomic_add_return##name(int i, atomic_t *v) \ u32 tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " ldadd" #mb " %w[i], %w[tmp], %[v]\n" \ " add %w[i], %w[i], %w[tmp]" \ : [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \ @@ -77,6 +80,7 @@ ATOMIC_OP_ADD_RETURN( , al, "memory") static inline void __lse_atomic_and(int i, atomic_t *v) { asm volatile( + __LSE_PREAMBLE " mvn %w[i], %w[i]\n" " stclr %w[i], %[v]" : [i] "+&r" (i), [v] "+Q" (v->counter) @@ -87,6 +91,7 @@ static inline void __lse_atomic_and(int i, atomic_t *v) static inline int __lse_atomic_fetch_and##name(int i, atomic_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " mvn %w[i], %w[i]\n" \ " ldclr" #mb " %w[i], %w[i], %[v]" \ : [i] "+&r" (i), [v] "+Q" (v->counter) \ @@ -106,6 +111,7 @@ ATOMIC_FETCH_OP_AND( , al, "memory") static inline void __lse_atomic_sub(int i, atomic_t *v) { asm volatile( + __LSE_PREAMBLE " neg %w[i], %w[i]\n" " stadd %w[i], %[v]" : [i] "+&r" (i), [v] "+Q" (v->counter) @@ -118,6 +124,7 @@ static inline int __lse_atomic_sub_return##name(int i, atomic_t *v) \ u32 tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], %w[tmp], %[v]\n" \ " add %w[i], %w[i], %w[tmp]" \ @@ -139,6 +146,7 @@ ATOMIC_OP_SUB_RETURN( , al, "memory") static inline int __lse_atomic_fetch_sub##name(int i, atomic_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], %w[i], %[v]" \ : [i] "+&r" (i), [v] "+Q" (v->counter) \ @@ -159,6 +167,7 @@ ATOMIC_FETCH_OP_SUB( , al, "memory") static inline void __lse_atomic64_##op(s64 i, atomic64_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " " #asm_op " %[i], %[v]\n" \ : [i] "+r" (i), [v] "+Q" (v->counter) \ : "r" (v)); \ @@ -175,6 +184,7 @@ ATOMIC64_OP(add, stadd) static inline long __lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v)\ { \ asm volatile( \ + __LSE_PREAMBLE \ " " #asm_op #mb " %[i], %[i], %[v]" \ : [i] "+r" (i), [v] "+Q" (v->counter) \ : "r" (v) \ @@ -203,6 +213,7 @@ static inline long __lse_atomic64_add_return##name(s64 i, atomic64_t *v)\ unsigned long tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " ldadd" #mb " %[i], %x[tmp], %[v]\n" \ " add %[i], %[i], %x[tmp]" \ : [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \ @@ -222,6 +233,7 @@ ATOMIC64_OP_ADD_RETURN( , al, "memory") static inline void __lse_atomic64_and(s64 i, atomic64_t *v) { asm volatile( + __LSE_PREAMBLE " mvn %[i], %[i]\n" " stclr %[i], %[v]" : [i] "+&r" (i), [v] "+Q" (v->counter) @@ -232,6 +244,7 @@ static inline void __lse_atomic64_and(s64 i, atomic64_t *v) static inline long __lse_atomic64_fetch_and##name(s64 i, atomic64_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " mvn %[i], %[i]\n" \ " ldclr" #mb " %[i], %[i], %[v]" \ : [i] "+&r" (i), [v] "+Q" (v->counter) \ @@ -251,6 +264,7 @@ ATOMIC64_FETCH_OP_AND( , al, "memory") static inline void __lse_atomic64_sub(s64 i, atomic64_t *v) { asm volatile( + __LSE_PREAMBLE " neg %[i], %[i]\n" " stadd %[i], %[v]" : [i] "+&r" (i), [v] "+Q" (v->counter) @@ -263,6 +277,7 @@ static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v) \ unsigned long tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], %x[tmp], %[v]\n" \ " add %[i], %[i], %x[tmp]" \ @@ -284,6 +299,7 @@ ATOMIC64_OP_SUB_RETURN( , al, "memory") static inline long __lse_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], %[i], %[v]" \ : [i] "+&r" (i), [v] "+Q" (v->counter) \ @@ -305,6 +321,7 @@ static inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v) unsigned long tmp; asm volatile( + __LSE_PREAMBLE "1: ldr %x[tmp], %[v]\n" " subs %[ret], %x[tmp], #1\n" " b.lt 2f\n" @@ -332,6 +349,7 @@ __lse__cmpxchg_case_##name##sz(volatile void *ptr, \ unsigned long tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " mov %" #w "[tmp], %" #w "[old]\n" \ " cas" #mb #sfx "\t%" #w "[tmp], %" #w "[new], %[v]\n" \ " mov %" #w "[ret], %" #w "[tmp]" \ @@ -379,6 +397,7 @@ __lse__cmpxchg_double##name(unsigned long old1, \ register unsigned long x4 asm ("x4") = (unsigned long)ptr; \ \ asm volatile( \ + __LSE_PREAMBLE \ " casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\ " eor %[old1], %[old1], %[oldval1]\n" \ " eor %[old2], %[old2], %[oldval2]\n" \ diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h index d064a50deb5f..8d2a7de39744 100644 --- a/arch/arm64/include/asm/checksum.h +++ b/arch/arm64/include/asm/checksum.h @@ -35,6 +35,9 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) } #define ip_fast_csum ip_fast_csum +extern unsigned int do_csum(const unsigned char *buff, int len); +#define do_csum do_csum + #include <asm-generic/checksum.h> #endif /* __ASM_CHECKSUM_H */ diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index b0d53a265f1d..935d2aa231bf 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -4,6 +4,9 @@ */ #ifndef __ASM_COMPAT_H #define __ASM_COMPAT_H + +#include <asm-generic/compat.h> + #ifdef CONFIG_COMPAT /* @@ -13,8 +16,6 @@ #include <linux/sched.h> #include <linux/sched/task_stack.h> -#include <asm-generic/compat.h> - #define COMPAT_USER_HZ 100 #ifdef __AARCH64EB__ #define COMPAT_UTS_MACHINE "armv8b\0\0" @@ -113,23 +114,6 @@ typedef u32 compat_sigset_word; #define COMPAT_OFF_T_MAX 0x7fffffff -/* - * A pointer passed in from user mode. This should not - * be used for syscall parameters, just declare them - * as pointers because the syscall entry code will have - * appropriately converted them already. - */ - -static inline void __user *compat_ptr(compat_uptr_t uptr) -{ - return (void __user *)(unsigned long)uptr; -} - -static inline compat_uptr_t ptr_to_compat(void __user *uptr) -{ - return (u32)(unsigned long)uptr; -} - #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current))) #define COMPAT_MINSIGSTKSZ 2048 diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index d72d995b7e25..b4a40535a3d8 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -39,6 +39,7 @@ struct cpuinfo_arm64 { u32 reg_id_isar3; u32 reg_id_isar4; u32 reg_id_isar5; + u32 reg_id_isar6; u32 reg_id_mmfr0; u32 reg_id_mmfr1; u32 reg_id_mmfr2; diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index b92683871119..865e0253fc1e 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -44,7 +44,7 @@ #define ARM64_SSBS 34 #define ARM64_WORKAROUND_1418040 35 #define ARM64_HAS_SB 36 -#define ARM64_WORKAROUND_1165522 37 +#define ARM64_WORKAROUND_SPECULATIVE_AT_VHE 37 #define ARM64_HAS_ADDRESS_AUTH_ARCH 38 #define ARM64_HAS_ADDRESS_AUTH_IMP_DEF 39 #define ARM64_HAS_GENERIC_AUTH_ARCH 40 @@ -55,8 +55,10 @@ #define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45 #define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46 #define ARM64_WORKAROUND_1542419 47 -#define ARM64_WORKAROUND_1319367 48 +#define ARM64_WORKAROUND_SPECULATIVE_AT_NVHE 48 +#define ARM64_HAS_E0PD 49 +#define ARM64_HAS_RNG 50 -#define ARM64_NCAPS 49 +#define ARM64_NCAPS 51 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 4261d55e8506..92ef9539874a 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -613,6 +613,11 @@ static inline bool system_has_prio_mask_debugging(void) system_uses_irq_prio_masking(); } +static inline bool system_capabilities_finalized(void) +{ + return static_branch_likely(&arm64_const_caps_ready); +} + #define ARM64_BP_HARDEN_UNKNOWN -1 #define ARM64_BP_HARDEN_WA_NEEDED 0 #define ARM64_BP_HARDEN_NOT_REQUIRED 1 diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index aca07c2f6e6e..a87a93f67671 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -85,6 +85,8 @@ #define QCOM_CPU_PART_FALKOR_V1 0x800 #define QCOM_CPU_PART_FALKOR 0xC00 #define QCOM_CPU_PART_KRYO 0x200 +#define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803 +#define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805 #define NVIDIA_CPU_PART_DENVER 0x003 #define NVIDIA_CPU_PART_CARMEL 0x004 @@ -111,6 +113,8 @@ #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) +#define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER) +#define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 72acd2db167f..ec213b4a1650 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -38,7 +38,7 @@ static inline void local_daif_mask(void) trace_hardirqs_off(); } -static inline unsigned long local_daif_save(void) +static inline unsigned long local_daif_save_flags(void) { unsigned long flags; @@ -50,6 +50,15 @@ static inline unsigned long local_daif_save(void) flags |= PSR_I_BIT; } + return flags; +} + +static inline unsigned long local_daif_save(void) +{ + unsigned long flags; + + flags = local_daif_save_flags(); + local_daif_mask(); return flags; diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index b54d3a86c444..44531a69d32b 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -93,21 +93,17 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS_MIN - 1)); } -#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) -#define __efi_call_early(f, ...) f(__VA_ARGS__) -#define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__) -#define efi_is_64bit() (true) +#define efi_bs_call(func, ...) efi_system_table()->boottime->func(__VA_ARGS__) +#define efi_rt_call(func, ...) efi_system_table()->runtime->func(__VA_ARGS__) +#define efi_is_native() (true) -#define efi_table_attr(table, attr, instance) \ - ((table##_t *)instance)->attr +#define efi_table_attr(inst, attr) (inst->attr) -#define efi_call_proto(protocol, f, instance, ...) \ - ((protocol##_t *)instance)->f(instance, ##__VA_ARGS__) +#define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__) #define alloc_screen_info(x...) &screen_info -static inline void free_screen_info(efi_system_table_t *sys_table_arg, - struct screen_info *si) +static inline void free_screen_info(struct screen_info *si) { } diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 4d5f3b5f50cd..b87c6e276ab1 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -45,8 +45,8 @@ void do_sysinstr(unsigned int esr, struct pt_regs *regs); void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr); void do_cp15instr(unsigned int esr, struct pt_regs *regs); -void el0_svc_handler(struct pt_regs *regs); -void el0_svc_compat_handler(struct pt_regs *regs); +void do_el0_svc(struct pt_regs *regs); +void do_el0_svc_compat(struct pt_regs *regs); void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr, struct pt_regs *regs); diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 3d2f2472a36c..0f00265248b5 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -86,6 +86,14 @@ #define KERNEL_HWCAP_SVESM4 __khwcap2_feature(SVESM4) #define KERNEL_HWCAP_FLAGM2 __khwcap2_feature(FLAGM2) #define KERNEL_HWCAP_FRINT __khwcap2_feature(FRINT) +#define KERNEL_HWCAP_SVEI8MM __khwcap2_feature(SVEI8MM) +#define KERNEL_HWCAP_SVEF32MM __khwcap2_feature(SVEF32MM) +#define KERNEL_HWCAP_SVEF64MM __khwcap2_feature(SVEF64MM) +#define KERNEL_HWCAP_SVEBF16 __khwcap2_feature(SVEBF16) +#define KERNEL_HWCAP_I8MM __khwcap2_feature(I8MM) +#define KERNEL_HWCAP_BF16 __khwcap2_feature(BF16) +#define KERNEL_HWCAP_DGH __khwcap2_feature(DGH) +#define KERNEL_HWCAP_RNG __khwcap2_feature(RNG) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 12a561a54128..d24b527e8c00 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -96,6 +96,10 @@ static inline void crash_post_resume(void) {} struct kimage_arch { void *dtb; unsigned long dtb_mem; + /* Core ELF header buffer */ + void *elf_headers; + unsigned long elf_headers_mem; + unsigned long elf_headers_sz; }; extern const struct kexec_file_ops kexec_image_ops; diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5efe5ca8fecf..688c63412cc2 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -17,7 +17,6 @@ #include <asm/esr.h> #include <asm/kvm_arm.h> #include <asm/kvm_hyp.h> -#include <asm/kvm_mmio.h> #include <asm/ptrace.h> #include <asm/cputype.h> #include <asm/virt.h> @@ -219,6 +218,38 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v; } +/* + * The layout of SPSR for an AArch32 state is different when observed from an + * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32 + * view given an AArch64 view. + * + * In ARM DDI 0487E.a see: + * + * - The AArch64 view (SPSR_EL2) in section C5.2.18, page C5-426 + * - The AArch32 view (SPSR_abt) in section G8.2.126, page G8-6256 + * - The AArch32 view (SPSR_und) in section G8.2.132, page G8-6280 + * + * Which show the following differences: + * + * | Bit | AA64 | AA32 | Notes | + * +-----+------+------+-----------------------------| + * | 24 | DIT | J | J is RES0 in ARMv8 | + * | 21 | SS | DIT | SS doesn't exist in AArch32 | + * + * ... and all other bits are (currently) common. + */ +static inline unsigned long host_spsr_to_spsr32(unsigned long spsr) +{ + const unsigned long overlap = BIT(24) | BIT(21); + unsigned long dit = !!(spsr & PSR_AA32_DIT_BIT); + + spsr &= ~overlap; + + spsr |= dit << 21; + + return spsr; +} + static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) { u32 mode; @@ -283,6 +314,11 @@ static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); } +static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF); +} + static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; @@ -304,7 +340,7 @@ static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM); } -static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) +static inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) { return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c61260cf63c5..d87aa609d2b6 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -24,7 +24,6 @@ #include <asm/fpsimd.h> #include <asm/kvm.h> #include <asm/kvm_asm.h> -#include <asm/kvm_mmio.h> #include <asm/thread_info.h> #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -53,7 +52,7 @@ int kvm_arm_init_sve(void); int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu); +void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext); void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); @@ -325,9 +324,6 @@ struct kvm_vcpu_arch { /* Don't run the guest (internal implementation need) */ bool pause; - /* IO related fields */ - struct kvm_decode mmio_decode; - /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; @@ -446,8 +442,6 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); -struct kvm_vcpu *kvm_arm_get_running_vcpu(void); -struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); @@ -491,6 +485,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); +/* MMIO helpers */ +void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); +unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); + +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); +int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, + phys_addr_t fault_ipa); + int kvm_perf_init(void); int kvm_perf_teardown(void); @@ -547,7 +549,7 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, * wrong, and hyp will crash and burn when it uses any * cpus_have_const_cap() wrapper. */ - BUG_ON(!static_branch_likely(&arm64_const_caps_ready)); + BUG_ON(!system_capabilities_finalized()); __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2); /* @@ -571,7 +573,7 @@ static inline bool kvm_arch_requires_vhe(void) return true; /* Some implementations have defects that confine them to VHE */ - if (cpus_have_cap(ARM64_WORKAROUND_1165522)) + if (cpus_have_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) return true; return false; diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 97f21cc66657..a3a6a2ba9a63 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -91,11 +91,11 @@ static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm) write_sysreg(kvm_get_vttbr(kvm), vttbr_el2); /* - * ARM erratum 1165522 requires the actual execution of the above - * before we can switch to the EL1/EL0 translation regime used by + * ARM errata 1165522 and 1530923 require the actual execution of the + * above before we can switch to the EL1/EL0 translation regime used by * the guest. */ - asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_1165522)); + asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT_VHE)); } #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h deleted file mode 100644 index 02b5c48fd467..000000000000 --- a/arch/arm64/include/asm/kvm_mmio.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall <c.dall@virtualopensystems.com> - */ - -#ifndef __ARM64_KVM_MMIO_H__ -#define __ARM64_KVM_MMIO_H__ - -#include <linux/kvm_host.h> -#include <asm/kvm_arm.h> - -/* - * This is annoying. The mmio code requires this, even if we don't - * need any decoding. To be fixed. - */ -struct kvm_decode { - unsigned long rt; - bool sign_extend; -}; - -void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); -unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); - -int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); -int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, - phys_addr_t fault_ipa); - -#endif /* __ARM64_KVM_MMIO_H__ */ diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index 1b266292f0be..ebee3113a62f 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -4,4 +4,20 @@ #define __ALIGN .align 2 #define __ALIGN_STR ".align 2" +/* + * Annotate a function as position independent, i.e., safe to be called before + * the kernel virtual mapping is activated. + */ +#define SYM_FUNC_START_PI(x) \ + SYM_FUNC_START_ALIAS(__pi_##x); \ + SYM_FUNC_START(x) + +#define SYM_FUNC_START_WEAK_PI(x) \ + SYM_FUNC_START_ALIAS(__pi_##x); \ + SYM_FUNC_START_WEAK(x) + +#define SYM_FUNC_END_PI(x) \ + SYM_FUNC_END(x); \ + SYM_FUNC_END_ALIAS(__pi_##x) + #endif diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index 80b388278149..d429f7701c36 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -4,7 +4,9 @@ #include <asm/atomic_ll_sc.h> -#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) +#ifdef CONFIG_ARM64_LSE_ATOMICS + +#define __LSE_PREAMBLE ".arch armv8-a+lse\n" #include <linux/compiler_types.h> #include <linux/export.h> @@ -14,8 +16,6 @@ #include <asm/atomic_lse.h> #include <asm/cpucaps.h> -__asm__(".arch_extension lse"); - extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; extern struct static_key_false arm64_const_caps_ready; @@ -34,9 +34,9 @@ static inline bool system_uses_lse_atomics(void) /* In-line patching at runtime */ #define ARM64_LSE_ATOMIC_INSN(llsc, lse) \ - ALTERNATIVE(llsc, lse, ARM64_HAS_LSE_ATOMICS) + ALTERNATIVE(llsc, __LSE_PREAMBLE lse, ARM64_HAS_LSE_ATOMICS) -#else /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ +#else /* CONFIG_ARM64_LSE_ATOMICS */ static inline bool system_uses_lse_atomics(void) { return false; } @@ -44,5 +44,5 @@ static inline bool system_uses_lse_atomics(void) { return false; } #define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc -#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ +#endif /* CONFIG_ARM64_LSE_ATOMICS */ #endif /* __ASM_LSE_H */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index f217e3292919..e4d862420bb4 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -29,52 +29,11 @@ typedef struct { */ #define ASID(mm) ((mm)->context.id.counter & 0xffff) -static inline bool arm64_kernel_unmapped_at_el0(void) -{ - return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && - cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0); -} +extern bool arm64_use_ng_mappings; -static inline bool arm64_kernel_use_ng_mappings(void) +static inline bool arm64_kernel_unmapped_at_el0(void) { - bool tx1_bug; - - /* What's a kpti? Use global mappings if we don't know. */ - if (!IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) - return false; - - /* - * Note: this function is called before the CPU capabilities have - * been configured, so our early mappings will be global. If we - * later determine that kpti is required, then - * kpti_install_ng_mappings() will make them non-global. - */ - if (arm64_kernel_unmapped_at_el0()) - return true; - - if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) - return false; - - /* - * KASLR is enabled so we're going to be enabling kpti on non-broken - * CPUs regardless of their susceptibility to Meltdown. Rather - * than force everybody to go through the G -> nG dance later on, - * just put down non-global mappings from the beginning. - */ - if (!IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) { - tx1_bug = false; -#ifndef MODULE - } else if (!static_branch_likely(&arm64_const_caps_ready)) { - extern const struct midr_range cavium_erratum_27456_cpus[]; - - tx1_bug = is_midr_in_range_list(read_cpuid_id(), - cavium_erratum_27456_cpus); -#endif - } else { - tx1_bug = __cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_27456); - } - - return !tx1_bug && kaslr_offset() > 0; + return arm64_use_ng_mappings; } typedef void (*bp_hardening_cb_t)(void); @@ -128,6 +87,7 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, pgprot_t prot, bool page_mappings_only); extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot); extern void mark_linear_text_alias_ro(void); +extern bool kaslr_requires_kpti(void); #define INIT_MM_CONTEXT(name) \ .pgd = init_pg_dir, diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index d9fbd433cc17..6bf5e650da78 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -110,6 +110,7 @@ #define PUD_TABLE_BIT (_AT(pudval_t, 1) << 1) #define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0) #define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0) +#define PUD_SECT_RDONLY (_AT(pudval_t, 1) << 7) /* AP[2] */ /* * Level 2 descriptor (PMD). @@ -292,6 +293,8 @@ #define TCR_HD (UL(1) << 40) #define TCR_NFD0 (UL(1) << 53) #define TCR_NFD1 (UL(1) << 54) +#define TCR_E0PD0 (UL(1) << 55) +#define TCR_E0PD1 (UL(1) << 56) /* * TTBR. diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 8dc6c5cdabe6..6f87839f0249 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -26,8 +26,8 @@ #define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) -#define PTE_MAYBE_NG (arm64_kernel_use_ng_mappings() ? PTE_NG : 0) -#define PMD_MAYBE_NG (arm64_kernel_use_ng_mappings() ? PMD_SECT_NG : 0) +#define PTE_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PTE_NG : 0) +#define PMD_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PMD_SECT_NG : 0) #define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) #define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) @@ -85,13 +85,12 @@ #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) -#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_READONLY #define __P011 PAGE_READONLY -#define __P100 PAGE_EXECONLY +#define __P100 PAGE_READONLY_EXEC #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_READONLY_EXEC #define __P111 PAGE_READONLY_EXEC @@ -100,7 +99,7 @@ #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED -#define __S100 PAGE_EXECONLY +#define __S100 PAGE_READONLY_EXEC #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 5d15b4735a0e..538c85e62f86 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -96,12 +96,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) -/* - * Execute-only user mappings do not have the PTE_USER bit set. All valid - * kernel mappings have the PTE_UXN bit set. - */ #define pte_valid_not_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) + ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) #define pte_valid_young(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) #define pte_valid_user(pte) \ @@ -117,8 +113,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; /* * p??_access_permitted() is true for valid user mappings (subject to the - * write permission check) other than user execute-only which do not have the - * PTE_USER bit set. PROT_NONE mappings do not have the PTE_VALID bit set. + * write permission check). PROT_NONE mappings do not have the PTE_VALID bit + * set. */ #define pte_access_permitted(pte, write) \ (pte_valid_user(pte) && (!(write) || pte_write(pte))) @@ -445,6 +441,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, PMD_TYPE_TABLE) #define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ PMD_TYPE_SECT) +#define pmd_leaf(pmd) pmd_sect(pmd) #if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3 static inline bool pud_sect(pud_t pud) { return false; } @@ -529,6 +526,7 @@ static inline void pte_unmap(pte_t *pte) { } #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) #define pud_present(pud) pte_present(pud_pte(pud)) +#define pud_leaf(pud) pud_sect(pud) #define pud_valid(pud) pte_valid(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h index d49951647014..80e946b2abee 100644 --- a/arch/arm64/include/asm/preempt.h +++ b/arch/arm64/include/asm/preempt.h @@ -79,11 +79,11 @@ static inline bool should_resched(int preempt_offset) return pc == preempt_offset; } -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION void preempt_schedule(void); #define __preempt_schedule() preempt_schedule() void preempt_schedule_notrace(void); #define __preempt_schedule_notrace() preempt_schedule_notrace() -#endif /* CONFIG_PREEMPT */ +#endif /* CONFIG_PREEMPTION */ #endif /* __ASM_PREEMPT_H */ diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index 0b8e7269ec82..38187f74e089 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -5,7 +5,7 @@ #ifndef __ASM_PTDUMP_H #define __ASM_PTDUMP_H -#ifdef CONFIG_ARM64_PTDUMP_CORE +#ifdef CONFIG_PTDUMP_CORE #include <linux/mm_types.h> #include <linux/seq_file.h> @@ -21,15 +21,15 @@ struct ptdump_info { unsigned long base_addr; }; -void ptdump_walk_pgd(struct seq_file *s, struct ptdump_info *info); -#ifdef CONFIG_ARM64_PTDUMP_DEBUGFS +void ptdump_walk(struct seq_file *s, struct ptdump_info *info); +#ifdef CONFIG_PTDUMP_DEBUGFS void ptdump_debugfs_register(struct ptdump_info *info, const char *name); #else static inline void ptdump_debugfs_register(struct ptdump_info *info, const char *name) { } #endif void ptdump_check_wx(void); -#endif /* CONFIG_ARM64_PTDUMP_CORE */ +#endif /* CONFIG_PTDUMP_CORE */ #ifdef CONFIG_DEBUG_WX #define debug_checkwx() ptdump_check_wx() diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index fbebb411ae20..bf57308fcd63 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -62,6 +62,7 @@ #define PSR_AA32_I_BIT 0x00000080 #define PSR_AA32_A_BIT 0x00000100 #define PSR_AA32_E_BIT 0x00000200 +#define PSR_AA32_PAN_BIT 0x00400000 #define PSR_AA32_SSBS_BIT 0x00800000 #define PSR_AA32_DIT_BIT 0x01000000 #define PSR_AA32_Q_BIT 0x08000000 diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index 25a73aab438f..3994169985ef 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -8,7 +8,6 @@ #include <asm-generic/sections.h> extern char __alt_instructions[], __alt_instructions_end[]; -extern char __exception_text_start[], __exception_text_end[]; extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; extern char __hyp_text_start[], __hyp_text_end[]; diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index 7434844036d3..89cba2622b79 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -26,6 +26,8 @@ DECLARE_PER_CPU(bool, fpsimd_context_busy); static __must_check inline bool may_use_simd(void) { /* + * We must make sure that the SVE has been initialized properly + * before using the SIMD in kernel. * fpsimd_context_busy is only set while preemption is disabled, * and is clear whenever preemption is enabled. Since * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy @@ -33,8 +35,10 @@ static __must_check inline bool may_use_simd(void) * migrated, and if it's clear we cannot be migrated to a CPU * where it is set. */ - return !in_irq() && !irqs_disabled() && !in_nmi() && - !this_cpu_read(fpsimd_context_busy); + return !WARN_ON(!system_capabilities_finalized()) && + system_supports_fpsimd() && + !in_irq() && !irqs_disabled() && !in_nmi() && + !this_cpu_read(fpsimd_context_busy); } #else /* ! CONFIG_KERNEL_MODE_NEON */ diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index b093b287babf..102404dc1e13 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -11,4 +11,13 @@ /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb() +/* + * Changing this will break osq_lock() thanks to the call inside + * smp_cond_load_relaxed(). + * + * See: + * https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-ass.net + */ +#define vcpu_is_preempted(cpu) false + #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6e919fafb43d..b91570ff9db1 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -146,6 +146,7 @@ #define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4) #define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5) #define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) +#define SYS_ID_ISAR6_EL1 sys_reg(3, 0, 0, 2, 7) #define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0) #define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1) @@ -365,6 +366,9 @@ #define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) #define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7) +#define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0) +#define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1) + #define SYS_PMCR_EL0 sys_reg(3, 3, 9, 12, 0) #define SYS_PMCNTENSET_EL0 sys_reg(3, 3, 9, 12, 1) #define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2) @@ -538,7 +542,20 @@ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\ ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1) +/* MAIR_ELx memory attributes (used by Linux) */ +#define MAIR_ATTR_DEVICE_nGnRnE UL(0x00) +#define MAIR_ATTR_DEVICE_nGnRE UL(0x04) +#define MAIR_ATTR_DEVICE_GRE UL(0x0c) +#define MAIR_ATTR_NORMAL_NC UL(0x44) +#define MAIR_ATTR_NORMAL_WT UL(0xbb) +#define MAIR_ATTR_NORMAL UL(0xff) +#define MAIR_ATTR_MASK UL(0xff) + +/* Position the attr at the correct index */ +#define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8)) + /* id_aa64isar0 */ +#define ID_AA64ISAR0_RNDR_SHIFT 60 #define ID_AA64ISAR0_TS_SHIFT 52 #define ID_AA64ISAR0_FHM_SHIFT 48 #define ID_AA64ISAR0_DP_SHIFT 44 @@ -553,6 +570,10 @@ #define ID_AA64ISAR0_AES_SHIFT 4 /* id_aa64isar1 */ +#define ID_AA64ISAR1_I8MM_SHIFT 52 +#define ID_AA64ISAR1_DGH_SHIFT 48 +#define ID_AA64ISAR1_BF16_SHIFT 44 +#define ID_AA64ISAR1_SPECRES_SHIFT 40 #define ID_AA64ISAR1_SB_SHIFT 36 #define ID_AA64ISAR1_FRINTTS_SHIFT 32 #define ID_AA64ISAR1_GPI_SHIFT 28 @@ -605,12 +626,20 @@ #define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 /* id_aa64zfr0 */ +#define ID_AA64ZFR0_F64MM_SHIFT 56 +#define ID_AA64ZFR0_F32MM_SHIFT 52 +#define ID_AA64ZFR0_I8MM_SHIFT 44 #define ID_AA64ZFR0_SM4_SHIFT 40 #define ID_AA64ZFR0_SHA3_SHIFT 32 +#define ID_AA64ZFR0_BF16_SHIFT 20 #define ID_AA64ZFR0_BITPERM_SHIFT 16 #define ID_AA64ZFR0_AES_SHIFT 4 #define ID_AA64ZFR0_SVEVER_SHIFT 0 +#define ID_AA64ZFR0_F64MM 0x1 +#define ID_AA64ZFR0_F32MM 0x1 +#define ID_AA64ZFR0_I8MM 0x1 +#define ID_AA64ZFR0_BF16 0x1 #define ID_AA64ZFR0_SM4 0x1 #define ID_AA64ZFR0_SHA3 0x1 #define ID_AA64ZFR0_BITPERM 0x1 @@ -655,6 +684,7 @@ #define ID_AA64MMFR1_VMIDBITS_16 2 /* id_aa64mmfr2 */ +#define ID_AA64MMFR2_E0PD_SHIFT 60 #define ID_AA64MMFR2_FWB_SHIFT 40 #define ID_AA64MMFR2_AT_SHIFT 32 #define ID_AA64MMFR2_LVA_SHIFT 16 @@ -679,6 +709,14 @@ #define ID_ISAR5_AES_SHIFT 4 #define ID_ISAR5_SEVL_SHIFT 0 +#define ID_ISAR6_I8MM_SHIFT 24 +#define ID_ISAR6_BF16_SHIFT 20 +#define ID_ISAR6_SPECRES_SHIFT 16 +#define ID_ISAR6_SB_SHIFT 12 +#define ID_ISAR6_FHM_SHIFT 8 +#define ID_ISAR6_DP_SHIFT 4 +#define ID_ISAR6_JSCVT_SHIFT 0 + #define MVFR0_FPROUND_SHIFT 28 #define MVFR0_FPSHVEC_SHIFT 24 #define MVFR0_FPSQRT_SHIFT 20 diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 2629a68b8724..1dd22da1c3a9 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,11 +38,10 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 436 +#define __NR_compat_syscalls 439 #endif #define __ARCH_WANT_SYS_CLONE -#define __ARCH_WANT_SYS_CLONE3 #ifndef __COMPAT_SYSCALL_NR #include <uapi/asm/unistd.h> diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 94ab29cf4f00..c1c61635f89c 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -879,6 +879,10 @@ __SYSCALL(__NR_fspick, sys_fspick) __SYSCALL(__NR_pidfd_open, sys_pidfd_open) #define __NR_clone3 435 __SYSCALL(__NR_clone3, sys_clone3) +#define __NR_openat2 437 +__SYSCALL(__NR_openat2, sys_openat2) +#define __NR_pidfd_getfd 438 +__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index c50ee1b7d5cd..537b1e695365 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -16,7 +16,7 @@ #define VDSO_HAS_CLOCK_GETRES 1 -#define VDSO_HAS_32BIT_FALLBACK 1 +#define BUILD_VDSO32 1 static __always_inline int gettimeofday_fallback(struct __kernel_old_timeval *_tv, diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h new file mode 100644 index 000000000000..2ca708ab9b20 --- /dev/null +++ b/arch/arm64/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_ARM64_VMALLOC_H +#define _ASM_ARM64_VMALLOC_H + +#endif /* _ASM_ARM64_VMALLOC_H */ diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index a1e72886b30c..7752d93bb50f 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -65,5 +65,13 @@ #define HWCAP2_SVESM4 (1 << 6) #define HWCAP2_FLAGM2 (1 << 7) #define HWCAP2_FRINT (1 << 8) +#define HWCAP2_SVEI8MM (1 << 9) +#define HWCAP2_SVEF32MM (1 << 10) +#define HWCAP2_SVEF64MM (1 << 11) +#define HWCAP2_SVEBF16 (1 << 12) +#define HWCAP2_I8MM (1 << 13) +#define HWCAP2_BF16 (1 << 14) +#define HWCAP2_DGH (1 << 15) +#define HWCAP2_RNG (1 << 16) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 820e5751ada7..ba85bb23f060 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -220,10 +220,18 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_PTIMER_CVAL ARM64_SYS_REG(3, 3, 14, 2, 2) #define KVM_REG_ARM_PTIMER_CNT ARM64_SYS_REG(3, 3, 14, 0, 1) -/* EL0 Virtual Timer Registers */ +/* + * EL0 Virtual Timer Registers + * + * WARNING: + * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined + * with the appropriate register encodings. Their values have been + * accidentally swapped. As this is set API, the definitions here + * must be used, rather than ones derived from the encodings. + */ #define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1) -#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) #define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) +#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) /* KVM-as-firmware specific pseudo-registers */ #define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT) diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 7ed9294e2004..d1bb5b69f1ce 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -49,6 +49,7 @@ #define PSR_SSBS_BIT 0x00001000 #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 +#define PSR_DIT_BIT 0x01000000 #define PSR_V_BIT 0x10000000 #define PSR_C_BIT 0x20000000 #define PSR_Z_BIT 0x40000000 diff --git a/arch/arm64/include/uapi/asm/unistd.h b/arch/arm64/include/uapi/asm/unistd.h index 4703d218663a..f83a70e07df8 100644 --- a/arch/arm64/include/uapi/asm/unistd.h +++ b/arch/arm64/include/uapi/asm/unistd.h @@ -19,5 +19,6 @@ #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_TIME32_SYSCALLS +#define __ARCH_WANT_SYS_CLONE3 #include <asm-generic/unistd.h> diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 3a58e9db5cfe..a100483b47c4 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -274,7 +274,7 @@ int apei_claim_sea(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES)) return err; - current_flags = arch_local_save_flags(); + current_flags = local_daif_save_flags(); /* * SEA can interrupt SError, mask it and describe this as an NMI so diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index ca158be21f83..7832b3216370 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -618,7 +618,8 @@ static struct insn_emulation_ops setend_ops = { }; /* - * Invoked as late_initcall, since not needed before init spawned. + * Invoked as core_initcall, which guarantees that the instruction + * emulation is ready for userspace. */ static int __init armv8_deprecated_init(void) { diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index 6ea337d464c4..32c7bf858dd9 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -42,11 +42,11 @@ ENTRY(__cpu_soft_restart) mov x0, #HVC_SOFT_RESTART hvc #0 // no return -1: mov x18, x1 // entry +1: mov x8, x1 // entry mov x0, x2 // arg0 mov x1, x3 // arg1 mov x2, x4 // arg2 - br x18 + br x8 ENDPROC(__cpu_soft_restart) .popsection diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 6a09ca7644ea..703ad0a84f99 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -547,6 +547,9 @@ static const struct midr_range spectre_v2_safe_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), + MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), { /* sentinel */ } }; @@ -756,6 +759,20 @@ static const struct arm64_cpu_capabilities erratum_843419_list[] = { }; #endif +#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT_VHE +static const struct midr_range erratum_speculative_at_vhe_list[] = { +#ifdef CONFIG_ARM64_ERRATUM_1165522 + /* Cortex A76 r0p0 to r2p0 */ + MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0), +#endif +#ifdef CONFIG_ARM64_ERRATUM_1530923 + /* Cortex A55 r0p0 to r2p0 */ + MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 2, 0), +#endif + {}, +}; +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -882,12 +899,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_RANGE_LIST(erratum_1418040_list), }, #endif -#ifdef CONFIG_ARM64_ERRATUM_1165522 +#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT_VHE { - /* Cortex-A76 r0p0 to r2p0 */ - .desc = "ARM erratum 1165522", - .capability = ARM64_WORKAROUND_1165522, - ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0), + .desc = "ARM errata 1165522, 1530923", + .capability = ARM64_WORKAROUND_SPECULATIVE_AT_VHE, + ERRATA_MIDR_RANGE_LIST(erratum_speculative_at_vhe_list), }, #endif #ifdef CONFIG_ARM64_ERRATUM_1463225 @@ -924,7 +940,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_ERRATUM_1319367 { .desc = "ARM erratum 1319367", - .capability = ARM64_WORKAROUND_1319367, + .capability = ARM64_WORKAROUND_SPECULATIVE_AT_NVHE, ERRATA_MIDR_RANGE_LIST(ca57_a72), }, #endif diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 04cf64e9f0c9..0b6715625cf6 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -32,9 +32,7 @@ static unsigned long elf_hwcap __read_mostly; #define COMPAT_ELF_HWCAP_DEFAULT \ (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ - COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ - COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ - COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\ + COMPAT_HWCAP_TLS|COMPAT_HWCAP_IDIV|\ COMPAT_HWCAP_LPAE) unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; @@ -47,19 +45,23 @@ static struct arm64_cpu_capabilities const __ro_after_init *cpu_hwcaps_ptrs[ARM6 /* Need also bit for ARM64_CB_PATCH */ DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE); +bool arm64_use_ng_mappings = false; +EXPORT_SYMBOL(arm64_use_ng_mappings); + /* * Flag to indicate if we have computed the system wide * capabilities based on the boot time active CPUs. This * will be used to determine if a new booting CPU should * go through the verification process to make sure that it * supports the system capabilities, without using a hotplug - * notifier. + * notifier. This is also used to decide if we could use + * the fast path for checking constant CPU caps. */ -static bool sys_caps_initialised; - -static inline void set_sys_caps_initialised(void) +DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); +EXPORT_SYMBOL(arm64_const_caps_ready); +static inline void finalize_system_capabilities(void) { - sys_caps_initialised = true; + static_branch_enable(&arm64_const_caps_ready); } static int dump_cpu_hwcaps(struct notifier_block *self, unsigned long v, void *p) @@ -119,6 +121,7 @@ static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap); * sync with the documentation of the CPU feature register ABI. */ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_RNDR_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0), @@ -135,6 +138,10 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_I8MM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DGH_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_BF16_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SPECRES_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FRINTTS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), @@ -177,10 +184,18 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_F64MM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_F32MM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_I8MM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BF16_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0), @@ -225,6 +240,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_E0PD_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0), @@ -313,6 +329,17 @@ static const struct arm64_ftr_bits ftr_id_mmfr4[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_isar6[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_I8MM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_BF16_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_SPECRES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_SB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_FHM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_DP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_JSCVT_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* State3 */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), /* State2 */ @@ -396,6 +423,7 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_ID_ISAR5_EL1, ftr_id_isar5), ARM64_FTR_REG(SYS_ID_MMFR4_EL1, ftr_id_mmfr4), + ARM64_FTR_REG(SYS_ID_ISAR6_EL1, ftr_id_isar6), /* Op1 = 0, CRn = 0, CRm = 3 */ ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_generic_32bits), @@ -600,6 +628,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3); init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4); init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5); + init_cpu_ftr_reg(SYS_ID_ISAR6_EL1, info->reg_id_isar6); init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0); init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); @@ -753,6 +782,8 @@ void update_cpu_features(int cpu, info->reg_id_isar4, boot->reg_id_isar4); taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu, info->reg_id_isar5, boot->reg_id_isar5); + taint |= check_update_ftr_reg(SYS_ID_ISAR6_EL1, cpu, + info->reg_id_isar6, boot->reg_id_isar6); /* * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and @@ -785,7 +816,7 @@ void update_cpu_features(int cpu, /* Probe vector lengths, unless we already gave up on SVE */ if (id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) && - !sys_caps_initialised) + !system_capabilities_finalized()) sve_update_vq_map(); } @@ -831,6 +862,7 @@ static u64 __read_sysreg_by_encoding(u32 sys_id) read_sysreg_case(SYS_ID_ISAR3_EL1); read_sysreg_case(SYS_ID_ISAR4_EL1); read_sysreg_case(SYS_ID_ISAR5_EL1); + read_sysreg_case(SYS_ID_ISAR6_EL1); read_sysreg_case(SYS_MVFR0_EL1); read_sysreg_case(SYS_MVFR1_EL1); read_sysreg_case(SYS_MVFR2_EL1); @@ -965,6 +997,46 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope) return has_cpuid_feature(entry, scope); } +/* + * This check is triggered during the early boot before the cpufeature + * is initialised. Checking the status on the local CPU allows the boot + * CPU to detect the need for non-global mappings and thus avoiding a + * pagetable re-write after all the CPUs are booted. This check will be + * anyway run on individual CPUs, allowing us to get the consistent + * state once the SMP CPUs are up and thus make the switch to non-global + * mappings if required. + */ +bool kaslr_requires_kpti(void) +{ + if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) + return false; + + /* + * E0PD does a similar job to KPTI so can be used instead + * where available. + */ + if (IS_ENABLED(CONFIG_ARM64_E0PD)) { + u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1); + if (cpuid_feature_extract_unsigned_field(mmfr2, + ID_AA64MMFR2_E0PD_SHIFT)) + return false; + } + + /* + * Systems affected by Cavium erratum 24756 are incompatible + * with KPTI. + */ + if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) { + extern const struct midr_range cavium_erratum_27456_cpus[]; + + if (is_midr_in_range_list(read_cpuid_id(), + cavium_erratum_27456_cpus)) + return false; + } + + return kaslr_offset() > 0; +} + static bool __meltdown_safe = true; static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ @@ -975,6 +1047,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, static const struct midr_range kpti_safe_list[] = { MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), + MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), @@ -1008,7 +1081,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, } /* Useful for KASLR robustness */ - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0) { + if (kaslr_requires_kpti()) { if (!__kpti_forced) { str = "KASLR"; __kpti_forced = 1; @@ -1043,7 +1116,6 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) extern kpti_remap_fn idmap_kpti_install_ng_mappings; kpti_remap_fn *remap_fn; - static bool kpti_applied = false; int cpu = smp_processor_id(); /* @@ -1051,7 +1123,7 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) * it already or we have KASLR enabled and therefore have not * created any global mappings at all. */ - if (kpti_applied || kaslr_offset() > 0) + if (arm64_use_ng_mappings) return; remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings); @@ -1061,7 +1133,7 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) cpu_uninstall_idmap(); if (!cpu) - kpti_applied = true; + arm64_use_ng_mappings = true; return; } @@ -1251,6 +1323,14 @@ static void cpu_enable_address_auth(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_PTR_AUTH */ +#ifdef CONFIG_ARM64_E0PD +static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) +{ + if (this_cpu_has_cap(ARM64_HAS_E0PD)) + sysreg_clear_set(tcr_el1, 0, TCR_E0PD1); +} +#endif /* CONFIG_ARM64_E0PD */ + #ifdef CONFIG_ARM64_PSEUDO_NMI static bool enable_pseudo_nmi; @@ -1291,7 +1371,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_pan, }, #endif /* CONFIG_ARM64_PAN */ -#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) +#ifdef CONFIG_ARM64_LSE_ATOMICS { .desc = "LSE atomic instructions", .capability = ARM64_HAS_LSE_ATOMICS, @@ -1302,7 +1382,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .min_field_value = 2, }, -#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ +#endif /* CONFIG_ARM64_LSE_ATOMICS */ { .desc = "Software prefetching using PRFM", .capability = ARM64_HAS_NO_HW_PREFETCH, @@ -1368,7 +1448,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { /* FP/SIMD is not implemented */ .capability = ARM64_HAS_NO_FPSIMD, - .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE, .min_field_value = 0, .matches = has_no_fpsimd, }, @@ -1567,6 +1647,31 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .min_field_value = 1, }, #endif +#ifdef CONFIG_ARM64_E0PD + { + .desc = "E0PD", + .capability = ARM64_HAS_E0PD, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR2_E0PD_SHIFT, + .matches = has_cpuid_feature, + .min_field_value = 1, + .cpu_enable = cpu_enable_e0pd, + }, +#endif +#ifdef CONFIG_ARCH_RANDOM + { + .desc = "Random Number Generator", + .capability = ARM64_HAS_RNG, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64ISAR0_EL1, + .field_pos = ID_AA64ISAR0_RNDR_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = 1, + }, +#endif {}, }; @@ -1596,6 +1701,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .match_list = list, \ } +#define HWCAP_CAP_MATCH(match, cap_type, cap) \ + { \ + __HWCAP_CAP(#cap, cap_type, cap) \ + .matches = match, \ + } + #ifdef CONFIG_ARM64_PTR_AUTH static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { { @@ -1638,6 +1749,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), @@ -1651,6 +1763,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM), HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), @@ -1658,8 +1773,12 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), #endif HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), #ifdef CONFIG_ARM64_PTR_AUTH @@ -1669,8 +1788,35 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { {}, }; +#ifdef CONFIG_COMPAT +static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope) +{ + /* + * Check that all of MVFR1_EL1.{SIMDSP, SIMDInt, SIMDLS} are available, + * in line with that of arm32 as in vfp_init(). We make sure that the + * check is future proof, by making sure value is non-zero. + */ + u32 mvfr1; + + WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); + if (scope == SCOPE_SYSTEM) + mvfr1 = read_sanitised_ftr_reg(SYS_MVFR1_EL1); + else + mvfr1 = read_sysreg_s(SYS_MVFR1_EL1); + + return cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDSP_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDINT_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDLS_SHIFT); +} +#endif + static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { #ifdef CONFIG_COMPAT + HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON), + HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), + /* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */ + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), @@ -1974,7 +2120,7 @@ void check_local_cpu_capabilities(void) * Otherwise, this CPU should verify that it has all the system * advertised capabilities. */ - if (!sys_caps_initialised) + if (!system_capabilities_finalized()) update_cpu_capabilities(SCOPE_LOCAL_CPU); else verify_local_cpu_capabilities(); @@ -1988,14 +2134,6 @@ static void __init setup_boot_cpu_capabilities(void) enable_cpu_capabilities(SCOPE_BOOT_CPU); } -DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); -EXPORT_SYMBOL(arm64_const_caps_ready); - -static void __init mark_const_caps_ready(void) -{ - static_branch_enable(&arm64_const_caps_ready); -} - bool this_cpu_has_cap(unsigned int n) { if (!WARN_ON(preemptible()) && n < ARM64_NCAPS) { @@ -2054,7 +2192,6 @@ void __init setup_cpu_features(void) u32 cwg; setup_system_capabilities(); - mark_const_caps_ready(); setup_elf_hwcaps(arm64_elf_hwcaps); if (system_supports_32bit_el0()) @@ -2067,7 +2204,7 @@ void __init setup_cpu_features(void) minsigstksz_setup(); /* Advertise that we have computed the system capabilities */ - set_sys_caps_initialised(); + finalize_system_capabilities(); /* * Check for sane CTR_EL0.CWG value. diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 56bba746da1c..86136075ae41 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -84,6 +84,14 @@ static const char *const hwcap_str[] = { "svesm4", "flagm2", "frint", + "svei8mm", + "svef32mm", + "svef64mm", + "svebf16", + "i8mm", + "bf16", + "dgh", + "rng", NULL }; @@ -360,6 +368,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1); info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1); info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1); + info->reg_id_isar6 = read_cpuid(ID_ISAR6_EL1); info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1); info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 5dce5e56995a..fde59981445c 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -36,14 +36,14 @@ static void notrace el1_pc(struct pt_regs *regs, unsigned long esr) } NOKPROBE_SYMBOL(el1_pc); -static void el1_undef(struct pt_regs *regs) +static void notrace el1_undef(struct pt_regs *regs) { local_daif_inherit(regs); do_undefinstr(regs); } NOKPROBE_SYMBOL(el1_undef); -static void el1_inv(struct pt_regs *regs, unsigned long esr) +static void notrace el1_inv(struct pt_regs *regs, unsigned long esr) { local_daif_inherit(regs); bad_mode(regs, 0, esr); @@ -215,7 +215,7 @@ static void notrace el0_svc(struct pt_regs *regs) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - el0_svc_handler(regs); + do_el0_svc(regs); } NOKPROBE_SYMBOL(el0_svc); @@ -281,7 +281,7 @@ static void notrace el0_svc_compat(struct pt_regs *regs) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - el0_svc_compat_handler(regs); + do_el0_svc_compat(regs); } NOKPROBE_SYMBOL(el0_svc_compat); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 7c6a0a41676f..9461d812ae27 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -60,16 +60,16 @@ .macro kernel_ventry, el, label, regsize = 64 .align 7 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -alternative_if ARM64_UNMAP_KERNEL_AT_EL0 .if \el == 0 +alternative_if ARM64_UNMAP_KERNEL_AT_EL0 .if \regsize == 64 mrs x30, tpidrro_el0 msr tpidrro_el0, xzr .else mov x30, xzr .endif - .endif alternative_else_nop_endif + .endif #endif sub sp, sp, #S_FRAME_SIZE @@ -167,9 +167,13 @@ alternative_cb_end .if \el == 0 clear_gp_regs mrs x21, sp_el0 - ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear, - ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug - disable_step_tsk x19, x20 // exceptions when scheduling. + ldr_this_cpu tsk, __entry_task, x20 + msr sp_el0, tsk + + // Ensure MDSCR_EL1.SS is clear, since we can unmask debug exceptions + // when scheduling. + ldr x19, [tsk, #TSK_TI_FLAGS] + disable_step_tsk x19, x20 apply_ssbd 1, x22, x23 @@ -232,13 +236,6 @@ alternative_else_nop_endif str w21, [sp, #S_SYSCALLNO] .endif - /* - * Set sp_el0 to current thread_info. - */ - .if \el == 0 - msr sp_el0, tsk - .endif - /* Save pmr */ alternative_if ARM64_HAS_IRQ_PRIO_MASKING mrs_s x20, SYS_ICC_PMR_EL1 @@ -605,7 +602,7 @@ el1_irq: irq_handler -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count alternative_if ARM64_HAS_IRQ_PRIO_MASKING /* @@ -653,6 +650,7 @@ el0_sync: mov x0, sp bl el0_sync_handler b ret_to_user +ENDPROC(el0_sync) #ifdef CONFIG_COMPAT .align 6 @@ -661,16 +659,18 @@ el0_sync_compat: mov x0, sp bl el0_sync_compat_handler b ret_to_user -ENDPROC(el0_sync) +ENDPROC(el0_sync_compat) .align 6 el0_irq_compat: kernel_entry 0, 32 b el0_irq_naked +ENDPROC(el0_irq_compat) el0_error_compat: kernel_entry 0, 32 b el0_error_naked +ENDPROC(el0_error_compat) #endif .align 6 diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 3eb338f14386..94289d126993 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -269,6 +269,7 @@ static void sve_free(struct task_struct *task) */ static void task_fpsimd_load(void) { + WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); if (system_supports_sve() && test_thread_flag(TIF_SVE)) @@ -289,6 +290,7 @@ static void fpsimd_save(void) this_cpu_ptr(&fpsimd_last_state); /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ + WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { @@ -1092,6 +1094,7 @@ void fpsimd_bind_task_to_cpu(void) struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); + WARN_ON(!system_supports_fpsimd()); last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; last->sve_vl = current->thread.sve_vl; @@ -1114,6 +1117,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); + WARN_ON(!system_supports_fpsimd()); WARN_ON(!in_softirq() && !irqs_disabled()); last->st = st; @@ -1128,8 +1132,19 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, */ void fpsimd_restore_current_state(void) { - if (!system_supports_fpsimd()) + /* + * For the tasks that were created before we detected the absence of + * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(), + * e.g, init. This could be then inherited by the children processes. + * If we later detect that the system doesn't support FP/SIMD, + * we must clear the flag for all the tasks to indicate that the + * FPSTATE is clean (as we can't have one) to avoid looping for ever in + * do_notify_resume(). + */ + if (!system_supports_fpsimd()) { + clear_thread_flag(TIF_FOREIGN_FPSTATE); return; + } get_cpu_fpsimd_context(); @@ -1148,7 +1163,7 @@ void fpsimd_restore_current_state(void) */ void fpsimd_update_current_state(struct user_fpsimd_state const *state) { - if (!system_supports_fpsimd()) + if (WARN_ON(!system_supports_fpsimd())) return; get_cpu_fpsimd_context(); @@ -1179,7 +1194,13 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) void fpsimd_flush_task_state(struct task_struct *t) { t->thread.fpsimd_cpu = NR_CPUS; - + /* + * If we don't support fpsimd, bail out after we have + * reset the fpsimd_cpu for this task and clear the + * FPSTATE. + */ + if (!system_supports_fpsimd()) + return; barrier(); set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE); @@ -1193,6 +1214,7 @@ void fpsimd_flush_task_state(struct task_struct *t) */ static void fpsimd_flush_cpu_state(void) { + WARN_ON(!system_supports_fpsimd()); __this_cpu_write(fpsimd_last_state.st, NULL); set_thread_flag(TIF_FOREIGN_FPSTATE); } @@ -1203,6 +1225,8 @@ static void fpsimd_flush_cpu_state(void) */ void fpsimd_save_and_flush_cpu_state(void) { + if (!system_supports_fpsimd()) + return; WARN_ON(preemptible()); __get_cpu_fpsimd_context(); fpsimd_save(); diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index a96b2921d22c..590963c9c609 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -182,78 +182,79 @@ int arch_hibernation_header_restore(void *addr) } EXPORT_SYMBOL(arch_hibernation_header_restore); -/* - * Copies length bytes, starting at src_start into an new page, - * perform cache maintentance, then maps it at the specified address low - * address as executable. - * - * This is used by hibernate to copy the code it needs to execute when - * overwriting the kernel text. This function generates a new set of page - * tables, which it loads into ttbr0. - * - * Length is provided as we probably only want 4K of data, even on a 64K - * page system. - */ -static int create_safe_exec_page(void *src_start, size_t length, - unsigned long dst_addr, - phys_addr_t *phys_dst_addr, - void *(*allocator)(gfp_t mask), - gfp_t mask) +static int trans_pgd_map_page(pgd_t *trans_pgd, void *page, + unsigned long dst_addr, + pgprot_t pgprot) { - int rc = 0; - pgd_t *trans_pgd; pgd_t *pgdp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; - unsigned long dst = (unsigned long)allocator(mask); - - if (!dst) { - rc = -ENOMEM; - goto out; - } - - memcpy((void *)dst, src_start, length); - __flush_icache_range(dst, dst + length); - - trans_pgd = allocator(mask); - if (!trans_pgd) { - rc = -ENOMEM; - goto out; - } pgdp = pgd_offset_raw(trans_pgd, dst_addr); if (pgd_none(READ_ONCE(*pgdp))) { - pudp = allocator(mask); - if (!pudp) { - rc = -ENOMEM; - goto out; - } + pudp = (void *)get_safe_page(GFP_ATOMIC); + if (!pudp) + return -ENOMEM; pgd_populate(&init_mm, pgdp, pudp); } pudp = pud_offset(pgdp, dst_addr); if (pud_none(READ_ONCE(*pudp))) { - pmdp = allocator(mask); - if (!pmdp) { - rc = -ENOMEM; - goto out; - } + pmdp = (void *)get_safe_page(GFP_ATOMIC); + if (!pmdp) + return -ENOMEM; pud_populate(&init_mm, pudp, pmdp); } pmdp = pmd_offset(pudp, dst_addr); if (pmd_none(READ_ONCE(*pmdp))) { - ptep = allocator(mask); - if (!ptep) { - rc = -ENOMEM; - goto out; - } + ptep = (void *)get_safe_page(GFP_ATOMIC); + if (!ptep) + return -ENOMEM; pmd_populate_kernel(&init_mm, pmdp, ptep); } ptep = pte_offset_kernel(pmdp, dst_addr); - set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC)); + set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC)); + + return 0; +} + +/* + * Copies length bytes, starting at src_start into an new page, + * perform cache maintenance, then maps it at the specified address low + * address as executable. + * + * This is used by hibernate to copy the code it needs to execute when + * overwriting the kernel text. This function generates a new set of page + * tables, which it loads into ttbr0. + * + * Length is provided as we probably only want 4K of data, even on a 64K + * page system. + */ +static int create_safe_exec_page(void *src_start, size_t length, + unsigned long dst_addr, + phys_addr_t *phys_dst_addr) +{ + void *page = (void *)get_safe_page(GFP_ATOMIC); + pgd_t *trans_pgd; + int rc; + + if (!page) + return -ENOMEM; + + memcpy(page, src_start, length); + __flush_icache_range((unsigned long)page, (unsigned long)page + length); + + trans_pgd = (void *)get_safe_page(GFP_ATOMIC); + if (!trans_pgd) + return -ENOMEM; + + rc = trans_pgd_map_page(trans_pgd, page, dst_addr, + PAGE_KERNEL_EXEC); + if (rc) + return rc; /* * Load our new page tables. A strict BBM approach requires that we @@ -269,13 +270,12 @@ static int create_safe_exec_page(void *src_start, size_t length, */ cpu_set_reserved_ttbr0(); local_flush_tlb_all(); - write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1); + write_sysreg(phys_to_ttbr(virt_to_phys(trans_pgd)), ttbr0_el1); isb(); - *phys_dst_addr = virt_to_phys((void *)dst); + *phys_dst_addr = virt_to_phys(page); -out: - return rc; + return 0; } #define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start)) @@ -450,7 +450,7 @@ static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start, return -ENOMEM; } else { set_pud(dst_pudp, - __pud(pud_val(pud) & ~PMD_SECT_RDONLY)); + __pud(pud_val(pud) & ~PUD_SECT_RDONLY)); } } while (dst_pudp++, src_pudp++, addr = next, addr != end); @@ -476,6 +476,24 @@ static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start, return 0; } +static int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start, + unsigned long end) +{ + int rc; + pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC); + + if (!trans_pgd) { + pr_err("Failed to allocate memory for temporary page tables.\n"); + return -ENOMEM; + } + + rc = copy_page_tables(trans_pgd, start, end); + if (!rc) + *dst_pgdp = trans_pgd; + + return rc; +} + /* * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit(). * @@ -484,7 +502,7 @@ static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start, */ int swsusp_arch_resume(void) { - int rc = 0; + int rc; void *zero_page; size_t exit_size; pgd_t *tmp_pg_dir; @@ -497,15 +515,9 @@ int swsusp_arch_resume(void) * Create a second copy of just the linear map, and use this when * restoring. */ - tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC); - if (!tmp_pg_dir) { - pr_err("Failed to allocate memory for temporary page tables.\n"); - rc = -ENOMEM; - goto out; - } - rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, PAGE_END); + rc = trans_pgd_create_copy(&tmp_pg_dir, PAGE_OFFSET, PAGE_END); if (rc) - goto out; + return rc; /* * We need a zero page that is zero before & after resume in order to @@ -514,8 +526,7 @@ int swsusp_arch_resume(void) zero_page = (void *)get_safe_page(GFP_ATOMIC); if (!zero_page) { pr_err("Failed to allocate zero page.\n"); - rc = -ENOMEM; - goto out; + return -ENOMEM; } /* @@ -530,11 +541,10 @@ int swsusp_arch_resume(void) */ rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size, (unsigned long)hibernate_exit, - &phys_hibernate_exit, - (void *)get_safe_page, GFP_ATOMIC); + &phys_hibernate_exit); if (rc) { pr_err("Failed to create safe executable page for hibernate_exit code.\n"); - goto out; + return rc; } /* @@ -561,8 +571,7 @@ int swsusp_arch_resume(void) resume_hdr.reenter_kernel, restore_pblist, resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page)); -out: - return rc; + return 0; } int hibernate_resume_nonboot_cpu_disable(void) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 2a11a962e571..53b8a4ee64ff 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -120,6 +120,17 @@ u64 __init kaslr_early_init(u64 dt_phys) return 0; } + /* + * Mix in any entropy obtainable architecturally, open coded + * since this runs extremely early. + */ + if (__early_cpu_has_rndr()) { + unsigned long raw; + + if (__arm64_rndr(&raw)) + seed ^= raw; + } + if (!seed) { kaslr_status = KASLR_DISABLED_NO_SEED; return 0; diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index 29a9428486a5..af9987c154ca 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -47,10 +47,6 @@ static void *image_load(struct kimage *image, struct kexec_segment *kernel_segment; int ret; - /* We don't support crash kernels yet. */ - if (image->type == KEXEC_TYPE_CRASH) - return ERR_PTR(-EOPNOTSUPP); - /* * We require a kernel with an unambiguous Image header. Per * Documentation/arm64/booting.rst, this is the case when image_size diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 0df8493624e0..8e9c924423b4 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -160,18 +160,6 @@ void machine_kexec(struct kimage *kimage) kexec_image_info(kimage); - pr_debug("%s:%d: control_code_page: %p\n", __func__, __LINE__, - kimage->control_code_page); - pr_debug("%s:%d: reboot_code_buffer_phys: %pa\n", __func__, __LINE__, - &reboot_code_buffer_phys); - pr_debug("%s:%d: reboot_code_buffer: %p\n", __func__, __LINE__, - reboot_code_buffer); - pr_debug("%s:%d: relocate_new_kernel: %p\n", __func__, __LINE__, - arm64_relocate_new_kernel); - pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n", - __func__, __LINE__, arm64_relocate_new_kernel_size, - arm64_relocate_new_kernel_size); - /* * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use * after the kernel is shut down. diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 7b08bf9499b6..dd3ae8081b38 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -17,12 +17,15 @@ #include <linux/memblock.h> #include <linux/of_fdt.h> #include <linux/random.h> +#include <linux/slab.h> #include <linux/string.h> #include <linux/types.h> #include <linux/vmalloc.h> #include <asm/byteorder.h> /* relevant device tree properties */ +#define FDT_PROP_KEXEC_ELFHDR "linux,elfcorehdr" +#define FDT_PROP_MEM_RANGE "linux,usable-memory-range" #define FDT_PROP_INITRD_START "linux,initrd-start" #define FDT_PROP_INITRD_END "linux,initrd-end" #define FDT_PROP_BOOTARGS "bootargs" @@ -40,6 +43,10 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) vfree(image->arch.dtb); image->arch.dtb = NULL; + vfree(image->arch.elf_headers); + image->arch.elf_headers = NULL; + image->arch.elf_headers_sz = 0; + return kexec_image_post_load_cleanup_default(image); } @@ -55,6 +62,31 @@ static int setup_dtb(struct kimage *image, off = ret; + ret = fdt_delprop(dtb, off, FDT_PROP_KEXEC_ELFHDR); + if (ret && ret != -FDT_ERR_NOTFOUND) + goto out; + ret = fdt_delprop(dtb, off, FDT_PROP_MEM_RANGE); + if (ret && ret != -FDT_ERR_NOTFOUND) + goto out; + + if (image->type == KEXEC_TYPE_CRASH) { + /* add linux,elfcorehdr */ + ret = fdt_appendprop_addrrange(dtb, 0, off, + FDT_PROP_KEXEC_ELFHDR, + image->arch.elf_headers_mem, + image->arch.elf_headers_sz); + if (ret) + return (ret == -FDT_ERR_NOSPACE ? -ENOMEM : -EINVAL); + + /* add linux,usable-memory-range */ + ret = fdt_appendprop_addrrange(dtb, 0, off, + FDT_PROP_MEM_RANGE, + crashk_res.start, + crashk_res.end - crashk_res.start + 1); + if (ret) + return (ret == -FDT_ERR_NOSPACE ? -ENOMEM : -EINVAL); + } + /* add bootargs */ if (cmdline) { ret = fdt_setprop_string(dtb, off, FDT_PROP_BOOTARGS, cmdline); @@ -125,8 +157,8 @@ out: } /* - * More space needed so that we can add initrd, bootargs, kaslr-seed, and - * rng-seed. + * More space needed so that we can add initrd, bootargs, kaslr-seed, + * rng-seed, userable-memory-range and elfcorehdr. */ #define DTB_EXTRA_SPACE 0x1000 @@ -174,6 +206,43 @@ static int create_dtb(struct kimage *image, } } +static int prepare_elf_headers(void **addr, unsigned long *sz) +{ + struct crash_mem *cmem; + unsigned int nr_ranges; + int ret; + u64 i; + phys_addr_t start, end; + + nr_ranges = 1; /* for exclusion of crashkernel region */ + for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, + MEMBLOCK_NONE, &start, &end, NULL) + nr_ranges++; + + cmem = kmalloc(sizeof(struct crash_mem) + + sizeof(struct crash_mem_range) * nr_ranges, GFP_KERNEL); + if (!cmem) + return -ENOMEM; + + cmem->max_nr_ranges = nr_ranges; + cmem->nr_ranges = 0; + for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, + MEMBLOCK_NONE, &start, &end, NULL) { + cmem->ranges[cmem->nr_ranges].start = start; + cmem->ranges[cmem->nr_ranges].end = end - 1; + cmem->nr_ranges++; + } + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + + if (!ret) + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); + + kfree(cmem); + return ret; +} + int load_other_segments(struct kimage *image, unsigned long kernel_load_addr, unsigned long kernel_size, @@ -181,14 +250,43 @@ int load_other_segments(struct kimage *image, char *cmdline) { struct kexec_buf kbuf; - void *dtb = NULL; - unsigned long initrd_load_addr = 0, dtb_len; + void *headers, *dtb = NULL; + unsigned long headers_sz, initrd_load_addr = 0, dtb_len; int ret = 0; kbuf.image = image; /* not allocate anything below the kernel */ kbuf.buf_min = kernel_load_addr + kernel_size; + /* load elf core header */ + if (image->type == KEXEC_TYPE_CRASH) { + ret = prepare_elf_headers(&headers, &headers_sz); + if (ret) { + pr_err("Preparing elf core header failed\n"); + goto out_err; + } + + kbuf.buffer = headers; + kbuf.bufsz = headers_sz; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = headers_sz; + kbuf.buf_align = SZ_64K; /* largest supported page size */ + kbuf.buf_max = ULONG_MAX; + kbuf.top_down = true; + + ret = kexec_add_buffer(&kbuf); + if (ret) { + vfree(headers); + goto out_err; + } + image->arch.elf_headers = headers; + image->arch.elf_headers_mem = kbuf.mem; + image->arch.elf_headers_sz = headers_sz; + + pr_debug("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + image->arch.elf_headers_mem, headers_sz, headers_sz); + } + /* load initrd */ if (initrd) { kbuf.buffer = initrd; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 71f788cd2b18..bbb0f0c145f6 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -360,8 +360,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) asmlinkage void ret_from_fork(void) asm("ret_from_fork"); -int copy_thread(unsigned long clone_flags, unsigned long stack_start, - unsigned long stk_sz, struct task_struct *p) +int copy_thread_tls(unsigned long clone_flags, unsigned long stack_start, + unsigned long stk_sz, struct task_struct *p, unsigned long tls) { struct pt_regs *childregs = task_pt_regs(p); @@ -394,11 +394,11 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, } /* - * If a TLS pointer was passed to clone (4th argument), use it - * for the new thread. + * If a TLS pointer was passed to clone, use it for the new + * thread. */ if (clone_flags & CLONE_SETTLS) - p->thread.uw.tp_value = childregs->regs[3]; + p->thread.uw.tp_value = tls; } else { memset(childregs, 0, sizeof(struct pt_regs)); childregs->pstate = PSR_MODE_EL1h; @@ -646,6 +646,6 @@ asmlinkage void __sched arm64_preempt_schedule_irq(void) * Only allow a task to be preempted once cpufeatures have been * enabled. */ - if (static_branch_likely(&arm64_const_caps_ready)) + if (system_capabilities_finalized()) preempt_schedule_irq(); } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 6771c399d40c..cd6e5fa48b9c 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -615,6 +615,13 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, return 0; } +static int fpr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!system_supports_fpsimd()) + return -ENODEV; + return regset->n; +} + /* * TODO: update fp accessors for lazy context switching (sync/flush hwstate) */ @@ -637,6 +644,9 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { + if (!system_supports_fpsimd()) + return -EINVAL; + if (target == current) fpsimd_preserve_current_state(); @@ -676,6 +686,9 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, { int ret; + if (!system_supports_fpsimd()) + return -EINVAL; + ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, 0); if (ret) return ret; @@ -1134,6 +1147,7 @@ static const struct user_regset aarch64_regsets[] = { */ .size = sizeof(u32), .align = sizeof(u32), + .active = fpr_active, .get = fpr_get, .set = fpr_set }, @@ -1348,6 +1362,9 @@ static int compat_vfp_get(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; + if (!system_supports_fpsimd()) + return -EINVAL; + uregs = &target->thread.uw.fpsimd_state; if (target == current) @@ -1381,6 +1398,9 @@ static int compat_vfp_set(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; + if (!system_supports_fpsimd()) + return -EINVAL; + uregs = &target->thread.uw.fpsimd_state; vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); @@ -1438,6 +1458,7 @@ static const struct user_regset aarch32_regsets[] = { .n = VFP_STATE_SIZE / sizeof(compat_ulong_t), .size = sizeof(compat_ulong_t), .align = sizeof(compat_ulong_t), + .active = fpr_active, .get = compat_vfp_get, .set = compat_vfp_set }, diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 56f664561754..a34890bf309f 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -285,6 +285,13 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; + /* + * If know now we are going to need KPTI then use non-global + * mappings from the start, avoiding the cost of rewriting + * everything later. + */ + arm64_use_ng_mappings = kaslr_requires_kpti(); + early_fixmap_init(); early_ioremap_init(); @@ -353,9 +360,6 @@ void __init setup_arch(char **cmdline_p) init_task.thread_info.ttbr0 = __pa_symbol(empty_zero_page); #endif -#ifdef CONFIG_VT - conswitchp = &dummy_con; -#endif if (boot_args[1] || boot_args[2] || boot_args[3]) { pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n" "\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n" diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index dd2cdc0d5be2..339882db5a91 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -371,6 +371,8 @@ static int parse_user_sigframe(struct user_ctxs *user, goto done; case FPSIMD_MAGIC: + if (!system_supports_fpsimd()) + goto invalid; if (user->fpsimd) goto invalid; @@ -506,7 +508,7 @@ static int restore_sigframe(struct pt_regs *regs, if (err == 0) err = parse_user_sigframe(&user, sf); - if (err == 0) { + if (err == 0 && system_supports_fpsimd()) { if (!user.fpsimd) return -EINVAL; @@ -623,7 +625,7 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set)); - if (err == 0) { + if (err == 0 && system_supports_fpsimd()) { struct fpsimd_context __user *fpsimd_ctx = apply_user_offset(user, user->fpsimd_offset); err |= preserve_fpsimd_context(fpsimd_ctx); diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 12a585386c2f..82feca6f7052 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -223,7 +223,7 @@ static int compat_restore_sigframe(struct pt_regs *regs, err |= !valid_user_regs(®s->user_regs, current); aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace; - if (err == 0) + if (err == 0 && system_supports_fpsimd()) err |= compat_restore_vfp_context(&aux->vfp); return err; @@ -419,7 +419,7 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf, aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace; - if (err == 0) + if (err == 0 && system_supports_fpsimd()) err |= compat_preserve_vfp_context(&aux->vfp); __put_user_error(0, &aux->end_magic, err); diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c index 52cfc6148355..b26955f56750 100644 --- a/arch/arm64/kernel/ssbd.c +++ b/arch/arm64/kernel/ssbd.c @@ -37,7 +37,7 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) /* Unsupported */ if (state == ARM64_SSBD_UNKNOWN) - return -EINVAL; + return -ENODEV; /* Treat the unaffected/mitigated state separately */ if (state == ARM64_SSBD_MITIGATED) { @@ -102,7 +102,7 @@ static int ssbd_prctl_get(struct task_struct *task) { switch (arm64_get_ssbd_state()) { case ARM64_SSBD_UNKNOWN: - return -EINVAL; + return -ENODEV; case ARM64_SSBD_FORCE_ENABLE: return PR_SPEC_DISABLE; case ARM64_SSBD_KERNEL: diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 9a9d98a443fc..a12c0c88d345 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -154,14 +154,14 @@ static inline void sve_user_discard(void) sve_user_disable(); } -void el0_svc_handler(struct pt_regs *regs) +void do_el0_svc(struct pt_regs *regs) { sve_user_discard(); el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table); } #ifdef CONFIG_COMPAT -void el0_svc_compat_handler(struct pt_regs *regs) +void do_el0_svc_compat(struct pt_regs *regs) { el0_svc_common(regs, regs->regs[7], __NR_compat_syscalls, compat_sys_call_table); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 73caf35c2262..cf402be5c573 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -144,9 +144,12 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) #ifdef CONFIG_PREEMPT #define S_PREEMPT " PREEMPT" +#elif defined(CONFIG_PREEMPT_RT) +#define S_PREEMPT " PREEMPT_RT" #else #define S_PREEMPT "" #endif + #define S_SMP " SMP" static int __die(const char *str, int err, struct pt_regs *regs) diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 43487f035385..7a7e425616b5 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -101,7 +101,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) { bool trap_debug = !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY); - unsigned long mdscr; + unsigned long mdscr, orig_mdcr_el2 = vcpu->arch.mdcr_el2; trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug); @@ -197,6 +197,10 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE)) vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; + /* Write mdcr_el2 changes since vcpu_load on VHE systems */ + if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2) + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); + trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2); trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1)); } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 2fff06114a8f..2bd92301d32f 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -47,11 +47,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) -{ - return 0; -} - static bool core_reg_offset_is_vreg(u64 off) { return off >= KVM_REG_ARM_CORE_REG(fp_regs.vregs) && diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index e5cc8d66bf53..d22d0534dd60 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -22,7 +22,12 @@ .text .pushsection .hyp.text, "ax" +/* + * We treat x18 as callee-saved as the host may use it as a platform + * register (e.g. for shadow call stack). + */ .macro save_callee_saved_regs ctxt + str x18, [\ctxt, #CPU_XREG_OFFSET(18)] stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] @@ -32,6 +37,8 @@ .endm .macro restore_callee_saved_regs ctxt + // We require \ctxt is not x18-x28 + ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)] ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] @@ -44,11 +51,11 @@ * u64 __guest_enter(struct kvm_vcpu *vcpu, * struct kvm_cpu_context *host_ctxt); */ -ENTRY(__guest_enter) +SYM_FUNC_START(__guest_enter) // x0: vcpu // x1: host context // x2-x17: clobbered by macros - // x18: guest context + // x29: guest context // Store the host regs save_callee_saved_regs x1 @@ -67,38 +74,34 @@ alternative_else_nop_endif ret 1: - add x18, x0, #VCPU_CONTEXT + add x29, x0, #VCPU_CONTEXT // Macro ptrauth_switch_to_guest format: // ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3) // The below macro to restore guest keys is not implemented in C code // as it may cause Pointer Authentication key signing mismatch errors // when this feature is enabled for kernel code. - ptrauth_switch_to_guest x18, x0, x1, x2 + ptrauth_switch_to_guest x29, x0, x1, x2 // Restore guest regs x0-x17 - ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)] - ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)] - ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)] - ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)] - ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)] - ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)] - ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)] - ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)] - ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)] - - // Restore guest regs x19-x29, lr - restore_callee_saved_regs x18 - - // Restore guest reg x18 - ldr x18, [x18, #CPU_XREG_OFFSET(18)] + ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)] + ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)] + ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)] + ldp x6, x7, [x29, #CPU_XREG_OFFSET(6)] + ldp x8, x9, [x29, #CPU_XREG_OFFSET(8)] + ldp x10, x11, [x29, #CPU_XREG_OFFSET(10)] + ldp x12, x13, [x29, #CPU_XREG_OFFSET(12)] + ldp x14, x15, [x29, #CPU_XREG_OFFSET(14)] + ldp x16, x17, [x29, #CPU_XREG_OFFSET(16)] + + // Restore guest regs x18-x29, lr + restore_callee_saved_regs x29 // Do not touch any register after this! eret sb -ENDPROC(__guest_enter) -ENTRY(__guest_exit) +SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) // x0: return code // x1: vcpu // x2-x29,lr: vcpu regs @@ -114,7 +117,7 @@ ENTRY(__guest_exit) // Retrieve the guest regs x0-x1 from the stack ldp x2, x3, [sp], #16 // x0, x1 - // Store the guest regs x0-x1 and x4-x18 + // Store the guest regs x0-x1 and x4-x17 stp x2, x3, [x1, #CPU_XREG_OFFSET(0)] stp x4, x5, [x1, #CPU_XREG_OFFSET(4)] stp x6, x7, [x1, #CPU_XREG_OFFSET(6)] @@ -123,9 +126,8 @@ ENTRY(__guest_exit) stp x12, x13, [x1, #CPU_XREG_OFFSET(12)] stp x14, x15, [x1, #CPU_XREG_OFFSET(14)] stp x16, x17, [x1, #CPU_XREG_OFFSET(16)] - str x18, [x1, #CPU_XREG_OFFSET(18)] - // Store the guest regs x19-x29, lr + // Store the guest regs x18-x29, lr save_callee_saved_regs x1 get_host_ctxt x2, x3 @@ -192,4 +194,4 @@ abort_guest_exit_end: msr spsr_el2, x4 orr x0, x0, x5 1: ret -ENDPROC(__guest_exit) +SYM_FUNC_END(__guest_enter) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 72fbbd86eb5e..dfe8dd172512 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -28,7 +28,15 @@ /* Check whether the FP regs were dirtied while in the host-side run loop: */ static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu) { - if (vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) + /* + * When the system doesn't support FP/SIMD, we cannot rely on + * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an + * abort on the very first access to FP and thus we should never + * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always + * trap the accesses. + */ + if (!system_supports_fpsimd() || + vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_FP_HOST); @@ -119,7 +127,7 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) write_sysreg(val, cptr_el2); - if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; isb(); @@ -158,11 +166,11 @@ static void deactivate_traps_vhe(void) write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); /* - * ARM erratum 1165522 requires the actual execution of the above - * before we can switch to the EL2/EL0 translation regime used by + * ARM errata 1165522 and 1530923 require the actual execution of the + * above before we can switch to the EL2/EL0 translation regime used by * the host. */ - asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_1165522)); + asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT_VHE)); write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); write_sysreg(vectors, vbar_el1); @@ -173,7 +181,7 @@ static void __hyp_text __deactivate_traps_nvhe(void) { u64 mdcr_el2 = read_sysreg(mdcr_el2); - if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { u64 val; /* diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 22b8128d19f6..7672a978926c 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -118,7 +118,7 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); - if (!cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + if (!cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); } else if (!ctxt->__hyp_running_vcpu) { @@ -149,7 +149,7 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); - if (cpus_have_const_cap(ARM64_WORKAROUND_1319367) && + if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE) && ctxt->__hyp_running_vcpu) { /* * Must only be done for host registers, hence the context diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index c2bc17ca6430..92f560e3e1aa 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -23,10 +23,10 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, local_irq_save(cxt->flags); - if (cpus_have_const_cap(ARM64_WORKAROUND_1165522)) { + if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) { /* - * For CPUs that are affected by ARM erratum 1165522, we - * cannot trust stage-1 to be in a correct state at that + * For CPUs that are affected by ARM errata 1165522 or 1530923, + * we cannot trust stage-1 to be in a correct state at that * point. Since we do not want to force a full load of the * vcpu state, we prevent the EL1 page-table walker to * allocate new TLBs. This is done by setting the EPD bits @@ -63,7 +63,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm, struct tlb_inv_context *cxt) { - if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { u64 val; /* @@ -103,7 +103,7 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm, write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); isb(); - if (cpus_have_const_cap(ARM64_WORKAROUND_1165522)) { + if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) { /* Restore the registers to what they were */ write_sysreg_el1(cxt->tcr, SYS_TCR); write_sysreg_el1(cxt->sctlr, SYS_SCTLR); @@ -117,7 +117,7 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm, { write_sysreg(0, vttbr_el2); - if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + if (cpus_have_const_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { /* Ensure write of the host VMID */ isb(); /* Restore the host's TCR_EL1 */ diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index ccdb6a051ab2..6aafc2825c1c 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -14,9 +14,6 @@ #include <asm/kvm_emulate.h> #include <asm/esr.h> -#define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \ - PSR_I_BIT | PSR_D_BIT) - #define CURRENT_EL_SP_EL0_VECTOR 0x0 #define CURRENT_EL_SP_ELx_VECTOR 0x200 #define LOWER_EL_AArch64_VECTOR 0x400 @@ -50,6 +47,69 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; } +/* + * When an exception is taken, most PSTATE fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all + * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx + * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0. + * + * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429. + * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426. + * + * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from + * MSB to LSB. + */ +static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu) +{ + unsigned long sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); + unsigned long old, new; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_N_BIT); + new |= (old & PSR_Z_BIT); + new |= (old & PSR_C_BIT); + new |= (old & PSR_V_BIT); + + // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) + + new |= (old & PSR_DIT_BIT); + + // PSTATE.UAO is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D5-2579. + + // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0 + // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page D5-2578. + new |= (old & PSR_PAN_BIT); + if (!(sctlr & SCTLR_EL1_SPAN)) + new |= PSR_PAN_BIT; + + // PSTATE.SS is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D2-2452. + + // PSTATE.IL is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D1-2306. + + // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64 + // See ARM DDI 0487E.a, page D13-3258 + if (sctlr & SCTLR_ELx_DSSBS) + new |= PSR_SSBS_BIT; + + // PSTATE.BTYPE is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, pages D1-2293 to D1-2294. + + new |= PSR_D_BIT; + new |= PSR_A_BIT; + new |= PSR_I_BIT; + new |= PSR_F_BIT; + + new |= PSR_MODE_EL1h; + + return new; +} + static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); @@ -59,7 +119,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); vcpu_write_spsr(vcpu, cpsr); vcpu_write_sys_reg(vcpu, addr, FAR_EL1); @@ -94,7 +154,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); vcpu_write_spsr(vcpu, cpsr); /* diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f4a8ae918827..30b7ea680f66 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -204,7 +204,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu) return true; } -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) { kfree(vcpu->arch.sve_state); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 46822afc57e0..3e909b117f0c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1424,7 +1424,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_SANITISED(ID_ISAR4_EL1), ID_SANITISED(ID_ISAR5_EL1), ID_SANITISED(ID_MMFR4_EL1), - ID_UNALLOCATED(2,7), + ID_SANITISED(ID_ISAR6_EL1), /* CRm=3 */ ID_SANITISED(MVFR0_EL1), @@ -2098,9 +2098,9 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu, WARN_ON(1); } - kvm_err("Unsupported guest CP%d access at: %08lx [%08lx]\n", - cp, *vcpu_pc(vcpu), *vcpu_cpsr(vcpu)); - print_sys_reg_instr(params); + print_sys_reg_msg(params, + "Unsupported guest CP%d access at: %08lx [%08lx]\n", + cp, *vcpu_pc(vcpu), *vcpu_cpsr(vcpu)); kvm_inject_undefined(vcpu); } @@ -2233,6 +2233,12 @@ int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run) NULL, 0); } +static bool is_imp_def_sys_reg(struct sys_reg_params *params) +{ + // See ARM DDI 0487E.a, section D12.3.2 + return params->Op0 == 3 && (params->CRn & 0b1011) == 0b1011; +} + static int emulate_sys_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *params) { @@ -2248,10 +2254,12 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu, if (likely(r)) { perform_access(vcpu, params, r); + } else if (is_imp_def_sys_reg(params)) { + kvm_inject_undefined(vcpu); } else { - kvm_err("Unsupported guest sys_reg access at: %lx [%08lx]\n", - *vcpu_pc(vcpu), *vcpu_cpsr(vcpu)); - print_sys_reg_instr(params); + print_sys_reg_msg(params, + "Unsupported guest sys_reg access at: %lx [%08lx]\n", + *vcpu_pc(vcpu), *vcpu_cpsr(vcpu)); kvm_inject_undefined(vcpu); } return 1; @@ -2360,8 +2368,11 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG) return NULL; + if (!index_to_params(id, ¶ms)) + return NULL; + table = get_target_table(vcpu->arch.target, true, &num); - r = find_reg_by_id(id, ¶ms, table, num); + r = find_reg(¶ms, table, num); if (!r) r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 9bca0312d798..5a6fc30f5989 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -62,11 +62,24 @@ struct sys_reg_desc { #define REG_HIDDEN_USER (1 << 0) /* hidden from userspace ioctls */ #define REG_HIDDEN_GUEST (1 << 1) /* hidden from guest */ -static inline void print_sys_reg_instr(const struct sys_reg_params *p) +static __printf(2, 3) +inline void print_sys_reg_msg(const struct sys_reg_params *p, + char *fmt, ...) { + va_list va; + + va_start(va, fmt); /* Look, we even formatted it for you to paste into the table! */ - kvm_pr_unimpl(" { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n", + kvm_pr_unimpl("%pV { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n", + &(struct va_format){ fmt, &va }, p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read"); + va_end(va); +} + +static inline void print_sys_reg_instr(const struct sys_reg_params *p) +{ + /* GCC warns on an empty format string */ + print_sys_reg_msg(p, "%s", ""); } static inline bool ignore_write(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index dab1fea4752a..a4f48c1ac28c 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -13,52 +13,46 @@ #include <asm/kvm_mmu.h> /* - * The LSB of the random hyp VA tag or 0 if no randomization is used. + * The LSB of the HYP VA tag */ static u8 tag_lsb; /* - * The random hyp VA tag value with the region bit if hyp randomization is used + * The HYP VA tag value with the region bit */ static u64 tag_val; static u64 va_mask; +/* + * We want to generate a hyp VA with the following format (with V == + * vabits_actual): + * + * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0 + * --------------------------------------------------------- + * | 0000000 | hyp_va_msb | random tag | kern linear VA | + * |--------- tag_val -----------|----- va_mask ---| + * + * which does not conflict with the idmap regions. + */ __init void kvm_compute_layout(void) { phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start); u64 hyp_va_msb; - int kva_msb; /* Where is my RAM region? */ hyp_va_msb = idmap_addr & BIT(vabits_actual - 1); hyp_va_msb ^= BIT(vabits_actual - 1); - kva_msb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^ + tag_lsb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^ (u64)(high_memory - 1)); - if (kva_msb == (vabits_actual - 1)) { - /* - * No space in the address, let's compute the mask so - * that it covers (vabits_actual - 1) bits, and the region - * bit. The tag stays set to zero. - */ - va_mask = BIT(vabits_actual - 1) - 1; - va_mask |= hyp_va_msb; - } else { - /* - * We do have some free bits to insert a random tag. - * Hyp VAs are now created from kernel linear map VAs - * using the following formula (with V == vabits_actual): - * - * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0 - * --------------------------------------------------------- - * | 0000000 | hyp_va_msb | random tag | kern linear VA | - */ - tag_lsb = kva_msb; - va_mask = GENMASK_ULL(tag_lsb - 1, 0); - tag_val = get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); - tag_val |= hyp_va_msb; - tag_val >>= tag_lsb; + va_mask = GENMASK_ULL(tag_lsb - 1, 0); + tag_val = hyp_va_msb; + + if (tag_lsb != (vabits_actual - 1)) { + /* We have some free bits to insert a random tag. */ + tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); } + tag_val >>= tag_lsb; } static u32 compute_instruction(int n, u32 rd, u32 rn) @@ -117,11 +111,11 @@ void __init kvm_update_va_mask(struct alt_instr *alt, * VHE doesn't need any address translation, let's NOP * everything. * - * Alternatively, if we don't have any spare bits in - * the address, NOP everything after masking that - * kernel VA. + * Alternatively, if the tag is zero (because the layout + * dictates it and we don't have any spare bits in the + * address), NOP everything after masking the kernel VA. */ - if (has_vhe() || (!tag_lsb && i > 0)) { + if (has_vhe() || (!tag_val && i > 0)) { updptr[i] = cpu_to_le32(aarch64_insn_gen_nop()); continue; } diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index c21b936dc01d..2fc253466dbf 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -1,9 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 lib-y := clear_user.o delay.o copy_from_user.o \ copy_to_user.o copy_in_user.o copy_page.o \ - clear_page.o memchr.o memcpy.o memmove.o memset.o \ - memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \ - strchr.o strrchr.o tishift.o + clear_page.o csum.o memchr.o memcpy.o memmove.o \ + memset.o memcmp.o strcmp.o strncmp.o strlen.o \ + strnlen.o strchr.o strrchr.o tishift.o ifeq ($(CONFIG_KERNEL_MODE_NEON), y) obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S index 78a9ef66288a..073acbf02a7c 100644 --- a/arch/arm64/lib/clear_page.S +++ b/arch/arm64/lib/clear_page.S @@ -14,7 +14,7 @@ * Parameters: * x0 - dest */ -ENTRY(clear_page) +SYM_FUNC_START(clear_page) mrs x1, dczid_el0 and w1, w1, #0xf mov x2, #4 @@ -25,5 +25,5 @@ ENTRY(clear_page) tst x0, #(PAGE_SIZE - 1) b.ne 1b ret -ENDPROC(clear_page) +SYM_FUNC_END(clear_page) EXPORT_SYMBOL(clear_page) diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index aeafc03e961a..48a3a26eff66 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -19,7 +19,7 @@ * * Alignment fixed up by hardware. */ -ENTRY(__arch_clear_user) +SYM_FUNC_START(__arch_clear_user) mov x2, x1 // save the size for fixup return subs x1, x1, #8 b.mi 2f @@ -40,7 +40,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2 uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 5: mov x0, #0 ret -ENDPROC(__arch_clear_user) +SYM_FUNC_END(__arch_clear_user) EXPORT_SYMBOL(__arch_clear_user) .section .fixup,"ax" diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index ebb3c06cbb5d..8e25e89ad01f 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -53,12 +53,12 @@ .endm end .req x5 -ENTRY(__arch_copy_from_user) +SYM_FUNC_START(__arch_copy_from_user) add end, x0, x2 #include "copy_template.S" mov x0, #0 // Nothing to copy ret -ENDPROC(__arch_copy_from_user) +SYM_FUNC_END(__arch_copy_from_user) EXPORT_SYMBOL(__arch_copy_from_user) .section .fixup,"ax" diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 3d8153a1ebce..667139013ed1 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -55,12 +55,12 @@ end .req x5 -ENTRY(__arch_copy_in_user) +SYM_FUNC_START(__arch_copy_in_user) add end, x0, x2 #include "copy_template.S" mov x0, #0 ret -ENDPROC(__arch_copy_in_user) +SYM_FUNC_END(__arch_copy_in_user) EXPORT_SYMBOL(__arch_copy_in_user) .section .fixup,"ax" diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index bbb8562396af..e7a793961408 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -17,7 +17,7 @@ * x0 - dest * x1 - src */ -ENTRY(copy_page) +SYM_FUNC_START(copy_page) alternative_if ARM64_HAS_NO_HW_PREFETCH // Prefetch three cache lines ahead. prfm pldl1strm, [x1, #128] @@ -34,46 +34,46 @@ alternative_else_nop_endif ldp x14, x15, [x1, #96] ldp x16, x17, [x1, #112] - mov x18, #(PAGE_SIZE - 128) + add x0, x0, #256 add x1, x1, #128 1: - subs x18, x18, #128 + tst x0, #(PAGE_SIZE - 1) alternative_if ARM64_HAS_NO_HW_PREFETCH prfm pldl1strm, [x1, #384] alternative_else_nop_endif - stnp x2, x3, [x0] + stnp x2, x3, [x0, #-256] ldp x2, x3, [x1] - stnp x4, x5, [x0, #16] + stnp x4, x5, [x0, #16 - 256] ldp x4, x5, [x1, #16] - stnp x6, x7, [x0, #32] + stnp x6, x7, [x0, #32 - 256] ldp x6, x7, [x1, #32] - stnp x8, x9, [x0, #48] + stnp x8, x9, [x0, #48 - 256] ldp x8, x9, [x1, #48] - stnp x10, x11, [x0, #64] + stnp x10, x11, [x0, #64 - 256] ldp x10, x11, [x1, #64] - stnp x12, x13, [x0, #80] + stnp x12, x13, [x0, #80 - 256] ldp x12, x13, [x1, #80] - stnp x14, x15, [x0, #96] + stnp x14, x15, [x0, #96 - 256] ldp x14, x15, [x1, #96] - stnp x16, x17, [x0, #112] + stnp x16, x17, [x0, #112 - 256] ldp x16, x17, [x1, #112] add x0, x0, #128 add x1, x1, #128 - b.gt 1b + b.ne 1b - stnp x2, x3, [x0] - stnp x4, x5, [x0, #16] - stnp x6, x7, [x0, #32] - stnp x8, x9, [x0, #48] - stnp x10, x11, [x0, #64] - stnp x12, x13, [x0, #80] - stnp x14, x15, [x0, #96] - stnp x16, x17, [x0, #112] + stnp x2, x3, [x0, #-256] + stnp x4, x5, [x0, #16 - 256] + stnp x6, x7, [x0, #32 - 256] + stnp x8, x9, [x0, #48 - 256] + stnp x10, x11, [x0, #64 - 256] + stnp x12, x13, [x0, #80 - 256] + stnp x14, x15, [x0, #96 - 256] + stnp x16, x17, [x0, #112 - 256] ret -ENDPROC(copy_page) +SYM_FUNC_END(copy_page) EXPORT_SYMBOL(copy_page) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 357eae2c18eb..1a104d0089f3 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -52,12 +52,12 @@ .endm end .req x5 -ENTRY(__arch_copy_to_user) +SYM_FUNC_START(__arch_copy_to_user) add end, x0, x2 #include "copy_template.S" mov x0, #0 ret -ENDPROC(__arch_copy_to_user) +SYM_FUNC_END(__arch_copy_to_user) EXPORT_SYMBOL(__arch_copy_to_user) .section .fixup,"ax" diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S index e6135f16649b..243e107e9896 100644 --- a/arch/arm64/lib/crc32.S +++ b/arch/arm64/lib/crc32.S @@ -85,17 +85,17 @@ CPU_BE( rev16 w3, w3 ) .endm .align 5 -ENTRY(crc32_le) +SYM_FUNC_START(crc32_le) alternative_if_not ARM64_HAS_CRC32 b crc32_le_base alternative_else_nop_endif __crc32 -ENDPROC(crc32_le) +SYM_FUNC_END(crc32_le) .align 5 -ENTRY(__crc32c_le) +SYM_FUNC_START(__crc32c_le) alternative_if_not ARM64_HAS_CRC32 b __crc32c_le_base alternative_else_nop_endif __crc32 c -ENDPROC(__crc32c_le) +SYM_FUNC_END(__crc32c_le) diff --git a/arch/arm64/lib/csum.c b/arch/arm64/lib/csum.c new file mode 100644 index 000000000000..1f82c66b32ea --- /dev/null +++ b/arch/arm64/lib/csum.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2019-2020 Arm Ltd. + +#include <linux/compiler.h> +#include <linux/kasan-checks.h> +#include <linux/kernel.h> + +#include <net/checksum.h> + +/* Looks dumb, but generates nice-ish code */ +static u64 accumulate(u64 sum, u64 data) +{ + __uint128_t tmp = (__uint128_t)sum + data; + return tmp + (tmp >> 64); +} + +unsigned int do_csum(const unsigned char *buff, int len) +{ + unsigned int offset, shift, sum; + const u64 *ptr; + u64 data, sum64 = 0; + + if (unlikely(len == 0)) + return 0; + + offset = (unsigned long)buff & 7; + /* + * This is to all intents and purposes safe, since rounding down cannot + * result in a different page or cache line being accessed, and @buff + * should absolutely not be pointing to anything read-sensitive. We do, + * however, have to be careful not to piss off KASAN, which means using + * unchecked reads to accommodate the head and tail, for which we'll + * compensate with an explicit check up-front. + */ + kasan_check_read(buff, len); + ptr = (u64 *)(buff - offset); + len = len + offset - 8; + + /* + * Head: zero out any excess leading bytes. Shifting back by the same + * amount should be at least as fast as any other way of handling the + * odd/even alignment, and means we can ignore it until the very end. + */ + shift = offset * 8; + data = READ_ONCE_NOCHECK(*ptr++); +#ifdef __LITTLE_ENDIAN + data = (data >> shift) << shift; +#else + data = (data << shift) >> shift; +#endif + + /* + * Body: straightforward aligned loads from here on (the paired loads + * underlying the quadword type still only need dword alignment). The + * main loop strictly excludes the tail, so the second loop will always + * run at least once. + */ + while (unlikely(len > 64)) { + __uint128_t tmp1, tmp2, tmp3, tmp4; + + tmp1 = READ_ONCE_NOCHECK(*(__uint128_t *)ptr); + tmp2 = READ_ONCE_NOCHECK(*(__uint128_t *)(ptr + 2)); + tmp3 = READ_ONCE_NOCHECK(*(__uint128_t *)(ptr + 4)); + tmp4 = READ_ONCE_NOCHECK(*(__uint128_t *)(ptr + 6)); + + len -= 64; + ptr += 8; + + /* This is the "don't dump the carry flag into a GPR" idiom */ + tmp1 += (tmp1 >> 64) | (tmp1 << 64); + tmp2 += (tmp2 >> 64) | (tmp2 << 64); + tmp3 += (tmp3 >> 64) | (tmp3 << 64); + tmp4 += (tmp4 >> 64) | (tmp4 << 64); + tmp1 = ((tmp1 >> 64) << 64) | (tmp2 >> 64); + tmp1 += (tmp1 >> 64) | (tmp1 << 64); + tmp3 = ((tmp3 >> 64) << 64) | (tmp4 >> 64); + tmp3 += (tmp3 >> 64) | (tmp3 << 64); + tmp1 = ((tmp1 >> 64) << 64) | (tmp3 >> 64); + tmp1 += (tmp1 >> 64) | (tmp1 << 64); + tmp1 = ((tmp1 >> 64) << 64) | sum64; + tmp1 += (tmp1 >> 64) | (tmp1 << 64); + sum64 = tmp1 >> 64; + } + while (len > 8) { + __uint128_t tmp; + + sum64 = accumulate(sum64, data); + tmp = READ_ONCE_NOCHECK(*(__uint128_t *)ptr); + + len -= 16; + ptr += 2; + +#ifdef __LITTLE_ENDIAN + data = tmp >> 64; + sum64 = accumulate(sum64, tmp); +#else + data = tmp; + sum64 = accumulate(sum64, tmp >> 64); +#endif + } + if (len > 0) { + sum64 = accumulate(sum64, data); + data = READ_ONCE_NOCHECK(*ptr); + len -= 8; + } + /* + * Tail: zero any over-read bytes similarly to the head, again + * preserving odd/even alignment. + */ + shift = len * -8; +#ifdef __LITTLE_ENDIAN + data = (data << shift) >> shift; +#else + data = (data >> shift) << shift; +#endif + sum64 = accumulate(sum64, data); + + /* Finally, folding */ + sum64 += (sum64 >> 32) | (sum64 << 32); + sum = sum64 >> 32; + sum += (sum >> 16) | (sum << 16); + if (offset & 1) + return (u16)swab32(sum); + + return sum >> 16; +} diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S index 48a3ab636e4f..edf6b970a277 100644 --- a/arch/arm64/lib/memchr.S +++ b/arch/arm64/lib/memchr.S @@ -19,7 +19,7 @@ * Returns: * x0 - address of first occurrence of 'c' or 0 */ -WEAK(memchr) +SYM_FUNC_START_WEAK_PI(memchr) and w1, w1, #0xff 1: subs x2, x2, #1 b.mi 2f @@ -30,5 +30,5 @@ WEAK(memchr) ret 2: mov x0, #0 ret -ENDPIPROC(memchr) +SYM_FUNC_END_PI(memchr) EXPORT_SYMBOL_NOKASAN(memchr) diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S index b297bdaaf549..c0671e793ea9 100644 --- a/arch/arm64/lib/memcmp.S +++ b/arch/arm64/lib/memcmp.S @@ -46,7 +46,7 @@ pos .req x11 limit_wd .req x12 mask .req x13 -WEAK(memcmp) +SYM_FUNC_START_WEAK_PI(memcmp) cbz limit, .Lret0 eor tmp1, src1, src2 tst tmp1, #7 @@ -243,5 +243,5 @@ CPU_LE( rev data2, data2 ) .Lret0: mov result, #0 ret -ENDPIPROC(memcmp) +SYM_FUNC_END_PI(memcmp) EXPORT_SYMBOL_NOKASAN(memcmp) diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index d79f48994dbb..9f382adfa88a 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -57,11 +57,11 @@ .endm .weak memcpy -ENTRY(__memcpy) -ENTRY(memcpy) +SYM_FUNC_START_ALIAS(__memcpy) +SYM_FUNC_START_PI(memcpy) #include "copy_template.S" ret -ENDPIPROC(memcpy) +SYM_FUNC_END_PI(memcpy) EXPORT_SYMBOL(memcpy) -ENDPROC(__memcpy) +SYM_FUNC_END_ALIAS(__memcpy) EXPORT_SYMBOL(__memcpy) diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S index 784775136480..02cda2e33bde 100644 --- a/arch/arm64/lib/memmove.S +++ b/arch/arm64/lib/memmove.S @@ -46,8 +46,8 @@ D_l .req x13 D_h .req x14 .weak memmove -ENTRY(__memmove) -ENTRY(memmove) +SYM_FUNC_START_ALIAS(__memmove) +SYM_FUNC_START_PI(memmove) cmp dstin, src b.lo __memcpy add tmp1, src, count @@ -184,7 +184,7 @@ ENTRY(memmove) tst count, #0x3f b.ne .Ltail63 ret -ENDPIPROC(memmove) +SYM_FUNC_END_PI(memmove) EXPORT_SYMBOL(memmove) -ENDPROC(__memmove) +SYM_FUNC_END_ALIAS(__memmove) EXPORT_SYMBOL(__memmove) diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index 9fb97e6bc560..77c3c7ba0084 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -43,8 +43,8 @@ tmp3w .req w9 tmp3 .req x9 .weak memset -ENTRY(__memset) -ENTRY(memset) +SYM_FUNC_START_ALIAS(__memset) +SYM_FUNC_START_PI(memset) mov dst, dstin /* Preserve return value. */ and A_lw, val, #255 orr A_lw, A_lw, A_lw, lsl #8 @@ -203,7 +203,7 @@ ENTRY(memset) ands count, count, zva_bits_x b.ne .Ltail_maybe_long ret -ENDPIPROC(memset) +SYM_FUNC_END_PI(memset) EXPORT_SYMBOL(memset) -ENDPROC(__memset) +SYM_FUNC_END_ALIAS(__memset) EXPORT_SYMBOL(__memset) diff --git a/arch/arm64/lib/strchr.S b/arch/arm64/lib/strchr.S index ca3ec18171a4..1f47eae3b0d6 100644 --- a/arch/arm64/lib/strchr.S +++ b/arch/arm64/lib/strchr.S @@ -18,7 +18,7 @@ * Returns: * x0 - address of first occurrence of 'c' or 0 */ -WEAK(strchr) +SYM_FUNC_START_WEAK(strchr) and w1, w1, #0xff 1: ldrb w2, [x0], #1 cmp w2, w1 @@ -28,5 +28,5 @@ WEAK(strchr) cmp w2, w1 csel x0, x0, xzr, eq ret -ENDPROC(strchr) +SYM_FUNC_END(strchr) EXPORT_SYMBOL_NOKASAN(strchr) diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S index e9aefbe0b740..4767540d1b94 100644 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@ -48,7 +48,7 @@ tmp3 .req x9 zeroones .req x10 pos .req x11 -WEAK(strcmp) +SYM_FUNC_START_WEAK_PI(strcmp) eor tmp1, src1, src2 mov zeroones, #REP8_01 tst tmp1, #7 @@ -219,5 +219,5 @@ CPU_BE( orr syndrome, diff, has_nul ) lsr data1, data1, #56 sub result, data1, data2, lsr #56 ret -ENDPIPROC(strcmp) +SYM_FUNC_END_PI(strcmp) EXPORT_SYMBOL_NOKASAN(strcmp) diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S index 87b0cb066915..ee3ed882dd79 100644 --- a/arch/arm64/lib/strlen.S +++ b/arch/arm64/lib/strlen.S @@ -44,7 +44,7 @@ pos .req x12 #define REP8_7f 0x7f7f7f7f7f7f7f7f #define REP8_80 0x8080808080808080 -WEAK(strlen) +SYM_FUNC_START_WEAK_PI(strlen) mov zeroones, #REP8_01 bic src, srcin, #15 ands tmp1, srcin, #15 @@ -111,5 +111,5 @@ CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ csinv data1, data1, xzr, le csel data2, data2, data2a, le b .Lrealigned -ENDPIPROC(strlen) +SYM_FUNC_END_PI(strlen) EXPORT_SYMBOL_NOKASAN(strlen) diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S index f571581888fa..2a7ee949ed47 100644 --- a/arch/arm64/lib/strncmp.S +++ b/arch/arm64/lib/strncmp.S @@ -52,7 +52,7 @@ limit_wd .req x13 mask .req x14 endloop .req x15 -WEAK(strncmp) +SYM_FUNC_START_WEAK_PI(strncmp) cbz limit, .Lret0 eor tmp1, src1, src2 mov zeroones, #REP8_01 @@ -295,5 +295,5 @@ CPU_BE( orr syndrome, diff, has_nul ) .Lret0: mov result, #0 ret -ENDPIPROC(strncmp) +SYM_FUNC_END_PI(strncmp) EXPORT_SYMBOL_NOKASAN(strncmp) diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S index c0bac9493c68..b72913a99038 100644 --- a/arch/arm64/lib/strnlen.S +++ b/arch/arm64/lib/strnlen.S @@ -47,7 +47,7 @@ limit_wd .req x14 #define REP8_7f 0x7f7f7f7f7f7f7f7f #define REP8_80 0x8080808080808080 -WEAK(strnlen) +SYM_FUNC_START_WEAK_PI(strnlen) cbz limit, .Lhit_limit mov zeroones, #REP8_01 bic src, srcin, #15 @@ -156,5 +156,5 @@ CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ .Lhit_limit: mov len, limit ret -ENDPIPROC(strnlen) +SYM_FUNC_END_PI(strnlen) EXPORT_SYMBOL_NOKASAN(strnlen) diff --git a/arch/arm64/lib/strrchr.S b/arch/arm64/lib/strrchr.S index 794ac49ea433..13132d1ed6d1 100644 --- a/arch/arm64/lib/strrchr.S +++ b/arch/arm64/lib/strrchr.S @@ -18,7 +18,7 @@ * Returns: * x0 - address of last occurrence of 'c' or 0 */ -WEAK(strrchr) +SYM_FUNC_START_WEAK_PI(strrchr) mov x3, #0 and w1, w1, #0xff 1: ldrb w2, [x0], #1 @@ -29,5 +29,5 @@ WEAK(strrchr) b 1b 2: mov x0, x3 ret -ENDPIPROC(strrchr) +SYM_FUNC_END_PI(strrchr) EXPORT_SYMBOL_NOKASAN(strrchr) diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S index 047622536535..a88613834fb0 100644 --- a/arch/arm64/lib/tishift.S +++ b/arch/arm64/lib/tishift.S @@ -7,7 +7,7 @@ #include <asm/assembler.h> -ENTRY(__ashlti3) +SYM_FUNC_START(__ashlti3) cbz x2, 1f mov x3, #64 sub x3, x3, x2 @@ -26,10 +26,10 @@ ENTRY(__ashlti3) lsl x1, x0, x1 mov x0, x2 ret -ENDPROC(__ashlti3) +SYM_FUNC_END(__ashlti3) EXPORT_SYMBOL(__ashlti3) -ENTRY(__ashrti3) +SYM_FUNC_START(__ashrti3) cbz x2, 1f mov x3, #64 sub x3, x3, x2 @@ -48,10 +48,10 @@ ENTRY(__ashrti3) asr x0, x1, x0 mov x1, x2 ret -ENDPROC(__ashrti3) +SYM_FUNC_END(__ashrti3) EXPORT_SYMBOL(__ashrti3) -ENTRY(__lshrti3) +SYM_FUNC_START(__lshrti3) cbz x2, 1f mov x3, #64 sub x3, x3, x2 @@ -70,5 +70,5 @@ ENTRY(__lshrti3) lsr x0, x1, x0 mov x1, x2 ret -ENDPROC(__lshrti3) +SYM_FUNC_END(__lshrti3) EXPORT_SYMBOL(__lshrti3) diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 849c1df3d214..d91030f0ffee 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -4,8 +4,8 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ ioremap.o mmap.o pgd.o mmu.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_ARM64_PTDUMP_CORE) += dump.o -obj-$(CONFIG_ARM64_PTDUMP_DEBUGFS) += ptdump_debugfs.o +obj-$(CONFIG_PTDUMP_CORE) += dump.o +obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o KASAN_SANITIZE_physaddr.o += n diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index db767b072601..2d881f34dd9d 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -24,7 +24,7 @@ * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(__flush_icache_range) +SYM_FUNC_START(__flush_icache_range) /* FALLTHROUGH */ /* @@ -37,7 +37,7 @@ ENTRY(__flush_icache_range) * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(__flush_cache_user_range) +SYM_FUNC_START(__flush_cache_user_range) uaccess_ttbr0_enable x2, x3, x4 alternative_if ARM64_HAS_CACHE_IDC dsb ishst @@ -66,8 +66,8 @@ alternative_else_nop_endif 9: mov x0, #-EFAULT b 1b -ENDPROC(__flush_icache_range) -ENDPROC(__flush_cache_user_range) +SYM_FUNC_END(__flush_icache_range) +SYM_FUNC_END(__flush_cache_user_range) /* * invalidate_icache_range(start,end) @@ -77,7 +77,7 @@ ENDPROC(__flush_cache_user_range) * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(invalidate_icache_range) +SYM_FUNC_START(invalidate_icache_range) alternative_if ARM64_HAS_CACHE_DIC mov x0, xzr isb @@ -94,7 +94,7 @@ alternative_else_nop_endif 2: mov x0, #-EFAULT b 1b -ENDPROC(invalidate_icache_range) +SYM_FUNC_END(invalidate_icache_range) /* * __flush_dcache_area(kaddr, size) @@ -105,10 +105,10 @@ ENDPROC(invalidate_icache_range) * - kaddr - kernel address * - size - size in question */ -ENTRY(__flush_dcache_area) +SYM_FUNC_START_PI(__flush_dcache_area) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -ENDPIPROC(__flush_dcache_area) +SYM_FUNC_END_PI(__flush_dcache_area) /* * __clean_dcache_area_pou(kaddr, size) @@ -119,14 +119,14 @@ ENDPIPROC(__flush_dcache_area) * - kaddr - kernel address * - size - size in question */ -ENTRY(__clean_dcache_area_pou) +SYM_FUNC_START(__clean_dcache_area_pou) alternative_if ARM64_HAS_CACHE_IDC dsb ishst ret alternative_else_nop_endif dcache_by_line_op cvau, ish, x0, x1, x2, x3 ret -ENDPROC(__clean_dcache_area_pou) +SYM_FUNC_END(__clean_dcache_area_pou) /* * __inval_dcache_area(kaddr, size) @@ -138,7 +138,8 @@ ENDPROC(__clean_dcache_area_pou) * - kaddr - kernel address * - size - size in question */ -ENTRY(__inval_dcache_area) +SYM_FUNC_START_LOCAL(__dma_inv_area) +SYM_FUNC_START_PI(__inval_dcache_area) /* FALLTHROUGH */ /* @@ -146,7 +147,6 @@ ENTRY(__inval_dcache_area) * - start - virtual start address of region * - size - size in question */ -__dma_inv_area: add x1, x1, x0 dcache_line_size x2, x3 sub x3, x2, #1 @@ -165,8 +165,8 @@ __dma_inv_area: b.lo 2b dsb sy ret -ENDPIPROC(__inval_dcache_area) -ENDPROC(__dma_inv_area) +SYM_FUNC_END_PI(__inval_dcache_area) +SYM_FUNC_END(__dma_inv_area) /* * __clean_dcache_area_poc(kaddr, size) @@ -177,7 +177,8 @@ ENDPROC(__dma_inv_area) * - kaddr - kernel address * - size - size in question */ -ENTRY(__clean_dcache_area_poc) +SYM_FUNC_START_LOCAL(__dma_clean_area) +SYM_FUNC_START_PI(__clean_dcache_area_poc) /* FALLTHROUGH */ /* @@ -185,11 +186,10 @@ ENTRY(__clean_dcache_area_poc) * - start - virtual start address of region * - size - size in question */ -__dma_clean_area: dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret -ENDPIPROC(__clean_dcache_area_poc) -ENDPROC(__dma_clean_area) +SYM_FUNC_END_PI(__clean_dcache_area_poc) +SYM_FUNC_END(__dma_clean_area) /* * __clean_dcache_area_pop(kaddr, size) @@ -200,13 +200,13 @@ ENDPROC(__dma_clean_area) * - kaddr - kernel address * - size - size in question */ -ENTRY(__clean_dcache_area_pop) +SYM_FUNC_START_PI(__clean_dcache_area_pop) alternative_if_not ARM64_HAS_DCPOP b __clean_dcache_area_poc alternative_else_nop_endif dcache_by_line_op cvap, sy, x0, x1, x2, x3 ret -ENDPIPROC(__clean_dcache_area_pop) +SYM_FUNC_END_PI(__clean_dcache_area_pop) /* * __dma_flush_area(start, size) @@ -216,10 +216,10 @@ ENDPIPROC(__clean_dcache_area_pop) * - start - virtual start address of region * - size - size in question */ -ENTRY(__dma_flush_area) +SYM_FUNC_START_PI(__dma_flush_area) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -ENDPIPROC(__dma_flush_area) +SYM_FUNC_END_PI(__dma_flush_area) /* * __dma_map_area(start, size, dir) @@ -227,11 +227,11 @@ ENDPIPROC(__dma_flush_area) * - size - size of region * - dir - DMA direction */ -ENTRY(__dma_map_area) +SYM_FUNC_START_PI(__dma_map_area) cmp w2, #DMA_FROM_DEVICE b.eq __dma_inv_area b __dma_clean_area -ENDPIPROC(__dma_map_area) +SYM_FUNC_END_PI(__dma_map_area) /* * __dma_unmap_area(start, size, dir) @@ -239,8 +239,8 @@ ENDPIPROC(__dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(__dma_unmap_area) +SYM_FUNC_START_PI(__dma_unmap_area) cmp w2, #DMA_TO_DEVICE b.ne __dma_inv_area ret -ENDPIPROC(__dma_unmap_area) +SYM_FUNC_END_PI(__dma_unmap_area) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index b5e329fde2dd..8ef73e89d514 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -29,15 +29,9 @@ static cpumask_t tlb_flush_pending; #define ASID_MASK (~GENMASK(asid_bits - 1, 0)) #define ASID_FIRST_VERSION (1UL << asid_bits) -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -#define NUM_USER_ASIDS (ASID_FIRST_VERSION >> 1) -#define asid2idx(asid) (((asid) & ~ASID_MASK) >> 1) -#define idx2asid(idx) (((idx) << 1) & ~ASID_MASK) -#else -#define NUM_USER_ASIDS (ASID_FIRST_VERSION) +#define NUM_USER_ASIDS ASID_FIRST_VERSION #define asid2idx(asid) ((asid) & ~ASID_MASK) #define idx2asid(idx) asid2idx(idx) -#endif /* Get the ASIDBits supported by the current CPU */ static u32 get_cpu_asid_bits(void) @@ -77,13 +71,33 @@ void verify_cpu_asid_bits(void) } } +static void set_kpti_asid_bits(void) +{ + unsigned int len = BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(unsigned long); + /* + * In case of KPTI kernel/user ASIDs are allocated in + * pairs, the bottom bit distinguishes the two: if it + * is set, then the ASID will map only userspace. Thus + * mark even as reserved for kernel. + */ + memset(asid_map, 0xaa, len); +} + +static void set_reserved_asid_bits(void) +{ + if (arm64_kernel_unmapped_at_el0()) + set_kpti_asid_bits(); + else + bitmap_clear(asid_map, 0, NUM_USER_ASIDS); +} + static void flush_context(void) { int i; u64 asid; /* Update the list of reserved ASIDs and the ASID bitmap. */ - bitmap_clear(asid_map, 0, NUM_USER_ASIDS); + set_reserved_asid_bits(); for_each_possible_cpu(i) { asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); @@ -261,6 +275,14 @@ static int asids_init(void) panic("Failed to allocate bitmap for %lu ASIDs\n", NUM_USER_ASIDS); + /* + * We cannot call set_reserved_asid_bits() here because CPU + * caps are not finalized yet, so it is safer to assume KPTI + * and reserve kernel ASID's from beginning. + */ + if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) + set_kpti_asid_bits(); + pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS); return 0; } diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 0a920b538a89..860c00ec8bd3 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -15,6 +15,7 @@ #include <linux/io.h> #include <linux/init.h> #include <linux/mm.h> +#include <linux/ptdump.h> #include <linux/sched.h> #include <linux/seq_file.h> @@ -75,10 +76,11 @@ static struct addr_marker address_markers[] = { * dumps out a description of the range. */ struct pg_state { + struct ptdump_state ptdump; struct seq_file *seq; const struct addr_marker *marker; unsigned long start_address; - unsigned level; + int level; u64 current_prot; bool check_wx; unsigned long wx_pages; @@ -174,11 +176,14 @@ struct pg_level { }; static struct pg_level pg_level[] = { - { - }, { /* pgd */ + { /* pgd */ .name = "PGD", .bits = pte_bits, .num = ARRAY_SIZE(pte_bits), + }, { /* p4d */ + .name = "P4D", + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), }, { /* pud */ .name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD", .bits = pte_bits, @@ -241,13 +246,17 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) st->wx_pages += (addr - st->start_address) / PAGE_SIZE; } -static void note_page(struct pg_state *st, unsigned long addr, unsigned level, - u64 val) +static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, + unsigned long val) { + struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); static const char units[] = "KMGTPE"; - u64 prot = val & pg_level[level].mask; + u64 prot = 0; + + if (level >= 0) + prot = val & pg_level[level].mask; - if (!st->level) { + if (st->level == -1) { st->level = level; st->current_prot = prot; st->start_address = addr; @@ -260,21 +269,22 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, if (st->current_prot) { note_prot_uxn(st, addr); note_prot_wx(st, addr); - pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx ", + } + + pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx ", st->start_address, addr); - delta = (addr - st->start_address) >> 10; - while (!(delta & 1023) && unit[1]) { - delta >>= 10; - unit++; - } - pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit, - pg_level[st->level].name); - if (pg_level[st->level].bits) - dump_prot(st, pg_level[st->level].bits, - pg_level[st->level].num); - pt_dump_seq_puts(st->seq, "\n"); + delta = (addr - st->start_address) >> 10; + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; } + pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit, + pg_level[st->level].name); + if (st->current_prot && pg_level[st->level].bits) + dump_prot(st, pg_level[st->level].bits, + pg_level[st->level].num); + pt_dump_seq_puts(st->seq, "\n"); if (addr >= st->marker[1].start_address) { st->marker++; @@ -293,85 +303,27 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, } -static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start, - unsigned long end) -{ - unsigned long addr = start; - pte_t *ptep = pte_offset_kernel(pmdp, start); - - do { - note_page(st, addr, 4, READ_ONCE(pte_val(*ptep))); - } while (ptep++, addr += PAGE_SIZE, addr != end); -} - -static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start, - unsigned long end) -{ - unsigned long next, addr = start; - pmd_t *pmdp = pmd_offset(pudp, start); - - do { - pmd_t pmd = READ_ONCE(*pmdp); - next = pmd_addr_end(addr, end); - - if (pmd_none(pmd) || pmd_sect(pmd)) { - note_page(st, addr, 3, pmd_val(pmd)); - } else { - BUG_ON(pmd_bad(pmd)); - walk_pte(st, pmdp, addr, next); - } - } while (pmdp++, addr = next, addr != end); -} - -static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start, - unsigned long end) +void ptdump_walk(struct seq_file *s, struct ptdump_info *info) { - unsigned long next, addr = start; - pud_t *pudp = pud_offset(pgdp, start); - - do { - pud_t pud = READ_ONCE(*pudp); - next = pud_addr_end(addr, end); - - if (pud_none(pud) || pud_sect(pud)) { - note_page(st, addr, 2, pud_val(pud)); - } else { - BUG_ON(pud_bad(pud)); - walk_pmd(st, pudp, addr, next); - } - } while (pudp++, addr = next, addr != end); -} + unsigned long end = ~0UL; + struct pg_state st; -static void walk_pgd(struct pg_state *st, struct mm_struct *mm, - unsigned long start) -{ - unsigned long end = (start < TASK_SIZE_64) ? TASK_SIZE_64 : 0; - unsigned long next, addr = start; - pgd_t *pgdp = pgd_offset(mm, start); - - do { - pgd_t pgd = READ_ONCE(*pgdp); - next = pgd_addr_end(addr, end); - - if (pgd_none(pgd)) { - note_page(st, addr, 1, pgd_val(pgd)); - } else { - BUG_ON(pgd_bad(pgd)); - walk_pud(st, pgdp, addr, next); - } - } while (pgdp++, addr = next, addr != end); -} + if (info->base_addr < TASK_SIZE_64) + end = TASK_SIZE_64; -void ptdump_walk_pgd(struct seq_file *m, struct ptdump_info *info) -{ - struct pg_state st = { - .seq = m, + st = (struct pg_state){ + .seq = s, .marker = info->markers, + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]){ + {info->base_addr, end}, + {0, 0} + } + } }; - walk_pgd(&st, info->mm, info->base_addr); - - note_page(&st, 0, 0, 0); + ptdump_walk_pgd(&st.ptdump, info->mm, NULL); } static void ptdump_initialize(void) @@ -398,11 +350,19 @@ void ptdump_check_wx(void) { 0, NULL}, { -1, NULL}, }, + .level = -1, .check_wx = true, + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]) { + {PAGE_OFFSET, ~0UL}, + {0, 0} + } + } }; - walk_pgd(&st, &init_mm, PAGE_OFFSET); - note_page(&st, 0, 0, 0); + ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); + if (st.wx_pages || st.uxn_pages) pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n", st.wx_pages, st.uxn_pages); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 077b02a2d4d3..85566d32958f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -445,7 +445,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, const struct fault_info *inf; struct mm_struct *mm = current->mm; vm_fault_t fault, major = 0; - unsigned long vm_flags = VM_READ | VM_WRITE; + unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (kprobe_page_fault(regs, esr)) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 5a3b15a14a7f..128f70852bf3 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -943,13 +943,13 @@ int __init arch_ioremap_pud_supported(void) * SW table walks can't handle removal of intermediate entries. */ return IS_ENABLED(CONFIG_ARM64_4K_PAGES) && - !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); + !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); } int __init arch_ioremap_pmd_supported(void) { /* See arch_ioremap_pud_supported() */ - return !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); + return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); } int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) @@ -1070,7 +1070,6 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; /* * FIXME: Cleanup page tables (also in arch_add_memory() in case @@ -1079,7 +1078,6 @@ void arch_remove_memory(int nid, u64 start, u64 size, * unplug. ARCH_ENABLE_MEMORY_HOTREMOVE must not be * unlocked yet. */ - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 9ce7bd9d4d9c..250c49008d73 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -54,7 +54,7 @@ static int change_memory_common(unsigned long addr, int numpages, pgprot_t set_mask, pgprot_t clear_mask) { unsigned long start = addr; - unsigned long size = PAGE_SIZE*numpages; + unsigned long size = PAGE_SIZE * numpages; unsigned long end = start + size; struct vm_struct *area; int i; diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index a1e0592d1fbc..aafed6902411 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -42,7 +42,14 @@ #define TCR_KASAN_FLAGS 0 #endif -#define MAIR(attr, mt) ((attr) << ((mt) * 8)) +/* Default MAIR_EL1 */ +#define MAIR_EL1_SET \ + (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \ + MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \ + MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) #ifdef CONFIG_CPU_PM /** @@ -50,7 +57,7 @@ * * x0: virtual address of context pointer */ -ENTRY(cpu_do_suspend) +SYM_FUNC_START(cpu_do_suspend) mrs x2, tpidr_el0 mrs x3, tpidrro_el0 mrs x4, contextidr_el1 @@ -74,7 +81,7 @@ alternative_endif stp x10, x11, [x0, #64] stp x12, x13, [x0, #80] ret -ENDPROC(cpu_do_suspend) +SYM_FUNC_END(cpu_do_suspend) /** * cpu_do_resume - restore CPU register context @@ -82,7 +89,7 @@ ENDPROC(cpu_do_suspend) * x0: Address of context pointer */ .pushsection ".idmap.text", "awx" -ENTRY(cpu_do_resume) +SYM_FUNC_START(cpu_do_resume) ldp x2, x3, [x0] ldp x4, x5, [x0, #16] ldp x6, x8, [x0, #32] @@ -131,7 +138,7 @@ alternative_else_nop_endif isb ret -ENDPROC(cpu_do_resume) +SYM_FUNC_END(cpu_do_resume) .popsection #endif @@ -142,7 +149,7 @@ ENDPROC(cpu_do_resume) * * - pgd_phys - physical address of new TTB */ -ENTRY(cpu_do_switch_mm) +SYM_FUNC_START(cpu_do_switch_mm) mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id phys_to_ttbr x3, x0 @@ -161,7 +168,7 @@ alternative_else_nop_endif msr ttbr0_el1, x3 // now update TTBR0 isb b post_ttbr_update_workaround // Back to C code... -ENDPROC(cpu_do_switch_mm) +SYM_FUNC_END(cpu_do_switch_mm) .pushsection ".idmap.text", "awx" @@ -182,7 +189,7 @@ ENDPROC(cpu_do_switch_mm) * This is the low-level counterpart to cpu_replace_ttbr1, and should not be * called by anything else. It can only be executed from a TTBR0 mapping. */ -ENTRY(idmap_cpu_replace_ttbr1) +SYM_FUNC_START(idmap_cpu_replace_ttbr1) save_and_disable_daif flags=x2 __idmap_cpu_set_reserved_ttbr1 x1, x3 @@ -194,7 +201,7 @@ ENTRY(idmap_cpu_replace_ttbr1) restore_daif x2 ret -ENDPROC(idmap_cpu_replace_ttbr1) +SYM_FUNC_END(idmap_cpu_replace_ttbr1) .popsection #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 @@ -222,7 +229,7 @@ ENDPROC(idmap_cpu_replace_ttbr1) */ __idmap_kpti_flag: .long 1 -ENTRY(idmap_kpti_install_ng_mappings) +SYM_FUNC_START(idmap_kpti_install_ng_mappings) cpu .req w0 num_cpus .req w1 swapper_pa .req x2 @@ -250,15 +257,15 @@ ENTRY(idmap_kpti_install_ng_mappings) /* We're the boot CPU. Wait for the others to catch up */ sevl 1: wfe - ldaxr w18, [flag_ptr] - eor w18, w18, num_cpus - cbnz w18, 1b + ldaxr w17, [flag_ptr] + eor w17, w17, num_cpus + cbnz w17, 1b /* We need to walk swapper, so turn off the MMU. */ pre_disable_mmu_workaround - mrs x18, sctlr_el1 - bic x18, x18, #SCTLR_ELx_M - msr sctlr_el1, x18 + mrs x17, sctlr_el1 + bic x17, x17, #SCTLR_ELx_M + msr sctlr_el1, x17 isb /* Everybody is enjoying the idmap, so we can rewrite swapper. */ @@ -281,9 +288,9 @@ skip_pgd: isb /* We're done: fire up the MMU again */ - mrs x18, sctlr_el1 - orr x18, x18, #SCTLR_ELx_M - msr sctlr_el1, x18 + mrs x17, sctlr_el1 + orr x17, x17, #SCTLR_ELx_M + msr sctlr_el1, x17 isb /* @@ -353,47 +360,48 @@ skip_pte: b.ne do_pte b next_pmd + .unreq cpu + .unreq num_cpus + .unreq swapper_pa + .unreq cur_pgdp + .unreq end_pgdp + .unreq pgd + .unreq cur_pudp + .unreq end_pudp + .unreq pud + .unreq cur_pmdp + .unreq end_pmdp + .unreq pmd + .unreq cur_ptep + .unreq end_ptep + .unreq pte + /* Secondary CPUs end up here */ __idmap_kpti_secondary: /* Uninstall swapper before surgery begins */ - __idmap_cpu_set_reserved_ttbr1 x18, x17 + __idmap_cpu_set_reserved_ttbr1 x16, x17 /* Increment the flag to let the boot CPU we're ready */ -1: ldxr w18, [flag_ptr] - add w18, w18, #1 - stxr w17, w18, [flag_ptr] +1: ldxr w16, [flag_ptr] + add w16, w16, #1 + stxr w17, w16, [flag_ptr] cbnz w17, 1b /* Wait for the boot CPU to finish messing around with swapper */ sevl 1: wfe - ldxr w18, [flag_ptr] - cbnz w18, 1b + ldxr w16, [flag_ptr] + cbnz w16, 1b /* All done, act like nothing happened */ - offset_ttbr1 swapper_ttb, x18 + offset_ttbr1 swapper_ttb, x16 msr ttbr1_el1, swapper_ttb isb ret - .unreq cpu - .unreq num_cpus - .unreq swapper_pa .unreq swapper_ttb .unreq flag_ptr - .unreq cur_pgdp - .unreq end_pgdp - .unreq pgd - .unreq cur_pudp - .unreq end_pudp - .unreq pud - .unreq cur_pmdp - .unreq end_pmdp - .unreq pmd - .unreq cur_ptep - .unreq end_ptep - .unreq pte -ENDPROC(idmap_kpti_install_ng_mappings) +SYM_FUNC_END(idmap_kpti_install_ng_mappings) .popsection #endif @@ -404,7 +412,7 @@ ENDPROC(idmap_kpti_install_ng_mappings) * value of the SCTLR_EL1 register. */ .pushsection ".idmap.text", "awx" -ENTRY(__cpu_setup) +SYM_FUNC_START(__cpu_setup) tlbi vmalle1 // Invalidate local TLB dsb nsh @@ -416,23 +424,9 @@ ENTRY(__cpu_setup) enable_dbg // since this is per-cpu reset_pmuserenr_el0 x0 // Disable PMU access from EL0 /* - * Memory region attributes for LPAE: - * - * n = AttrIndx[2:0] - * n MAIR - * DEVICE_nGnRnE 000 00000000 - * DEVICE_nGnRE 001 00000100 - * DEVICE_GRE 010 00001100 - * NORMAL_NC 011 01000100 - * NORMAL 100 11111111 - * NORMAL_WT 101 10111011 + * Memory region attributes */ - ldr x5, =MAIR(0x00, MT_DEVICE_nGnRnE) | \ - MAIR(0x04, MT_DEVICE_nGnRE) | \ - MAIR(0x0c, MT_DEVICE_GRE) | \ - MAIR(0x44, MT_NORMAL_NC) | \ - MAIR(0xff, MT_NORMAL) | \ - MAIR(0xbb, MT_NORMAL_WT) + mov_q x5, MAIR_EL1_SET msr mair_el1, x5 /* * Prepare SCTLR @@ -475,4 +469,4 @@ ENTRY(__cpu_setup) #endif /* CONFIG_ARM64_HW_AFDBM */ msr tcr_el1, x10 ret // return to head.S -ENDPROC(__cpu_setup) +SYM_FUNC_END(__cpu_setup) diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c index 064163f25592..1f2eae3e988b 100644 --- a/arch/arm64/mm/ptdump_debugfs.c +++ b/arch/arm64/mm/ptdump_debugfs.c @@ -7,7 +7,7 @@ static int ptdump_show(struct seq_file *m, void *v) { struct ptdump_info *info = m->private; - ptdump_walk_pgd(m, info); + ptdump_walk(m, info); return 0; } DEFINE_SHOW_ATTRIBUTE(ptdump); diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index c5f05c4a4d00..5b09aca55108 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -56,11 +56,11 @@ #define XEN_IMM 0xEA1 #define HYPERCALL_SIMPLE(hypercall) \ -ENTRY(HYPERVISOR_##hypercall) \ +SYM_FUNC_START(HYPERVISOR_##hypercall) \ mov x16, #__HYPERVISOR_##hypercall; \ hvc XEN_IMM; \ ret; \ -ENDPROC(HYPERVISOR_##hypercall) +SYM_FUNC_END(HYPERVISOR_##hypercall) #define HYPERCALL0 HYPERCALL_SIMPLE #define HYPERCALL1 HYPERCALL_SIMPLE @@ -86,7 +86,7 @@ HYPERCALL2(multicall); HYPERCALL2(vm_assist); HYPERCALL3(dm_op); -ENTRY(privcmd_call) +SYM_FUNC_START(privcmd_call) mov x16, x0 mov x0, x1 mov x1, x2 @@ -109,4 +109,4 @@ ENTRY(privcmd_call) */ uaccess_ttbr0_disable x6, x7 ret -ENDPROC(privcmd_call); +SYM_FUNC_END(privcmd_call); diff --git a/arch/c6x/include/asm/vmalloc.h b/arch/c6x/include/asm/vmalloc.h new file mode 100644 index 000000000000..26c6c6696bbd --- /dev/null +++ b/arch/c6x/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_C6X_VMALLOC_H +#define _ASM_C6X_VMALLOC_H + +#endif /* _ASM_C6X_VMALLOC_H */ diff --git a/arch/c6x/kernel/entry.S b/arch/c6x/kernel/entry.S index 4332a10aec6c..fb154d19625b 100644 --- a/arch/c6x/kernel/entry.S +++ b/arch/c6x/kernel/entry.S @@ -18,7 +18,7 @@ #define DP B14 #define SP B15 -#ifndef CONFIG_PREEMPT +#ifndef CONFIG_PREEMPTION #define resume_kernel restore_all #endif @@ -287,7 +287,7 @@ work_notifysig: ;; is a little bit different ;; ENTRY(ret_from_exception) -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION MASK_INT B2 #endif @@ -557,7 +557,7 @@ ENDPROC(_nmi_handler) ;; ;; Jump to schedule() then return to ret_from_isr ;; -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION resume_kernel: GET_THREAD_INFO A12 LDW .D1T1 *+A12(THREAD_INFO_PREEMPT_COUNT),A1 @@ -582,7 +582,7 @@ preempt_schedule: B .S2 preempt_schedule_irq #endif ADDKPC .S2 preempt_schedule,B3,4 -#endif /* CONFIG_PREEMPT */ +#endif /* CONFIG_PREEMPTION */ ENTRY(enable_exception) DINT diff --git a/arch/csky/include/asm/vmalloc.h b/arch/csky/include/asm/vmalloc.h new file mode 100644 index 000000000000..43dca6336b4c --- /dev/null +++ b/arch/csky/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_CSKY_VMALLOC_H +#define _ASM_CSKY_VMALLOC_H + +#endif /* _ASM_CSKY_VMALLOC_H */ diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S index a7a5b67df898..007706328000 100644 --- a/arch/csky/kernel/entry.S +++ b/arch/csky/kernel/entry.S @@ -277,7 +277,7 @@ ENTRY(csky_irq) zero_fp psrset ee -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION mov r9, sp /* Get current stack pointer */ bmaski r10, THREAD_SHIFT andn r9, r10 /* Get thread_info */ @@ -294,7 +294,7 @@ ENTRY(csky_irq) mov a0, sp jbsr csky_do_IRQ -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION subi r12, 1 stw r12, (r9, TINFO_PREEMPT) cmpnei r12, 0 diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c index 23ee604aafdb..52eaf31ba27f 100644 --- a/arch/csky/kernel/setup.c +++ b/arch/csky/kernel/setup.c @@ -136,10 +136,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_HIGHMEM kmap_init(); #endif - -#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; -#endif } unsigned long va_pa_offset; diff --git a/arch/h8300/include/asm/vmalloc.h b/arch/h8300/include/asm/vmalloc.h new file mode 100644 index 000000000000..08a55c1dfa23 --- /dev/null +++ b/arch/h8300/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_H8300_VMALLOC_H +#define _ASM_H8300_VMALLOC_H + +#endif /* _ASM_H8300_VMALLOC_H */ diff --git a/arch/h8300/kernel/entry.S b/arch/h8300/kernel/entry.S index 4ade5f8299ba..c6e289b5f1f2 100644 --- a/arch/h8300/kernel/entry.S +++ b/arch/h8300/kernel/entry.S @@ -284,12 +284,12 @@ badsys: mov.l er0,@(LER0:16,sp) bra resume_userspace -#if !defined(CONFIG_PREEMPT) +#if !defined(CONFIG_PREEMPTION) #define resume_kernel restore_all #endif ret_from_exception: -#if defined(CONFIG_PREEMPT) +#if defined(CONFIG_PREEMPTION) orc #0xc0,ccr #endif ret_from_interrupt: @@ -319,7 +319,7 @@ work_resched: restore_all: RESTORE_ALL /* Does RTE */ -#if defined(CONFIG_PREEMPT) +#if defined(CONFIG_PREEMPTION) resume_kernel: mov.l @(TI_PRE_COUNT:16,er4),er0 bne restore_all:8 diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index 12cd9231c4b8..0231d69c8bf2 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h @@ -91,7 +91,7 @@ static inline void atomic_##op(int i, atomic_t *v) \ "1: %0 = memw_locked(%1);\n" \ " %0 = "#op "(%0,%2);\n" \ " memw_locked(%1,P3)=%0;\n" \ - " if !P3 jump 1b;\n" \ + " if (!P3) jump 1b;\n" \ : "=&r" (output) \ : "r" (&v->counter), "r" (i) \ : "memory", "p3" \ @@ -107,7 +107,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ "1: %0 = memw_locked(%1);\n" \ " %0 = "#op "(%0,%2);\n" \ " memw_locked(%1,P3)=%0;\n" \ - " if !P3 jump 1b;\n" \ + " if (!P3) jump 1b;\n" \ : "=&r" (output) \ : "r" (&v->counter), "r" (i) \ : "memory", "p3" \ @@ -124,7 +124,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ "1: %0 = memw_locked(%2);\n" \ " %1 = "#op "(%0,%3);\n" \ " memw_locked(%2,P3)=%1;\n" \ - " if !P3 jump 1b;\n" \ + " if (!P3) jump 1b;\n" \ : "=&r" (output), "=&r" (val) \ : "r" (&v->counter), "r" (i) \ : "memory", "p3" \ @@ -173,7 +173,7 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) " }" " memw_locked(%2, p3) = %1;" " {" - " if !p3 jump 1b;" + " if (!p3) jump 1b;" " }" "2:" : "=&r" (__oldval), "=&r" (tmp) diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h index 47384b094b94..71429f756af0 100644 --- a/arch/hexagon/include/asm/bitops.h +++ b/arch/hexagon/include/asm/bitops.h @@ -38,7 +38,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr) "1: R12 = memw_locked(R10);\n" " { P0 = tstbit(R12,R11); R12 = clrbit(R12,R11); }\n" " memw_locked(R10,P1) = R12;\n" - " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n" + " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n" : "=&r" (oldval) : "r" (addr), "r" (nr) : "r10", "r11", "r12", "p0", "p1", "memory" @@ -62,7 +62,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr) "1: R12 = memw_locked(R10);\n" " { P0 = tstbit(R12,R11); R12 = setbit(R12,R11); }\n" " memw_locked(R10,P1) = R12;\n" - " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n" + " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n" : "=&r" (oldval) : "r" (addr), "r" (nr) : "r10", "r11", "r12", "p0", "p1", "memory" @@ -88,7 +88,7 @@ static inline int test_and_change_bit(int nr, volatile void *addr) "1: R12 = memw_locked(R10);\n" " { P0 = tstbit(R12,R11); R12 = togglebit(R12,R11); }\n" " memw_locked(R10,P1) = R12;\n" - " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n" + " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n" : "=&r" (oldval) : "r" (addr), "r" (nr) : "r10", "r11", "r12", "p0", "p1", "memory" @@ -223,7 +223,7 @@ static inline int ffs(int x) int r; asm("{ P0 = cmp.eq(%1,#0); %0 = ct0(%1);}\n" - "{ if P0 %0 = #0; if !P0 %0 = add(%0,#1);}\n" + "{ if (P0) %0 = #0; if (!P0) %0 = add(%0,#1);}\n" : "=&r" (r) : "r" (x) : "p0"); diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h index 6091322c3af9..92b8a02e588a 100644 --- a/arch/hexagon/include/asm/cmpxchg.h +++ b/arch/hexagon/include/asm/cmpxchg.h @@ -30,7 +30,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, __asm__ __volatile__ ( "1: %0 = memw_locked(%1);\n" /* load into retval */ " memw_locked(%1,P0) = %2;\n" /* store into memory */ - " if !P0 jump 1b;\n" + " if (!P0) jump 1b;\n" : "=&r" (retval) : "r" (ptr), "r" (x) : "memory", "p0" diff --git a/arch/hexagon/include/asm/futex.h b/arch/hexagon/include/asm/futex.h index cb635216a732..0191f7c7193e 100644 --- a/arch/hexagon/include/asm/futex.h +++ b/arch/hexagon/include/asm/futex.h @@ -16,7 +16,7 @@ /* For example: %1 = %4 */ \ insn \ "2: memw_locked(%3,p2) = %1;\n" \ - " if !p2 jump 1b;\n" \ + " if (!p2) jump 1b;\n" \ " %1 = #0;\n" \ "3:\n" \ ".section .fixup,\"ax\"\n" \ @@ -84,10 +84,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, "1: %1 = memw_locked(%3)\n" " {\n" " p2 = cmp.eq(%1,%4)\n" - " if !p2.new jump:NT 3f\n" + " if (!p2.new) jump:NT 3f\n" " }\n" "2: memw_locked(%3,p2) = %5\n" - " if !p2 jump 1b\n" + " if (!p2) jump 1b\n" "3:\n" ".section .fixup,\"ax\"\n" "4: %0 = #%6\n" diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h index 539e3efcf39c..bda2a9c2df78 100644 --- a/arch/hexagon/include/asm/io.h +++ b/arch/hexagon/include/asm/io.h @@ -172,7 +172,7 @@ static inline void writel(u32 data, volatile void __iomem *addr) #define writel_relaxed __raw_writel void __iomem *ioremap(unsigned long phys_addr, unsigned long size); -#define ioremap_nocache ioremap +#define ioremap_uc(X, Y) ioremap((X), (Y)) #define __raw_writel writel diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h index bfe07d842ff3..ef103b73bec8 100644 --- a/arch/hexagon/include/asm/spinlock.h +++ b/arch/hexagon/include/asm/spinlock.h @@ -30,9 +30,9 @@ static inline void arch_read_lock(arch_rwlock_t *lock) __asm__ __volatile__( "1: R6 = memw_locked(%0);\n" " { P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" " memw_locked(%0,P3) = R6;\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -46,7 +46,7 @@ static inline void arch_read_unlock(arch_rwlock_t *lock) "1: R6 = memw_locked(%0);\n" " R6 = add(R6,#-1);\n" " memw_locked(%0,P3) = R6\n" - " if !P3 jump 1b;\n" + " if (!P3) jump 1b;\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -61,7 +61,7 @@ static inline int arch_read_trylock(arch_rwlock_t *lock) __asm__ __volatile__( " R6 = memw_locked(%1);\n" " { %0 = #0; P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n" - " { if !P3 jump 1f; }\n" + " { if (!P3) jump 1f; }\n" " memw_locked(%1,P3) = R6;\n" " { %0 = P3 }\n" "1:\n" @@ -78,9 +78,9 @@ static inline void arch_write_lock(arch_rwlock_t *lock) __asm__ __volatile__( "1: R6 = memw_locked(%0)\n" " { P3 = cmp.eq(R6,#0); R6 = #-1;}\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" " memw_locked(%0,P3) = R6;\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -94,7 +94,7 @@ static inline int arch_write_trylock(arch_rwlock_t *lock) __asm__ __volatile__( " R6 = memw_locked(%1)\n" " { %0 = #0; P3 = cmp.eq(R6,#0); R6 = #-1;}\n" - " { if !P3 jump 1f; }\n" + " { if (!P3) jump 1f; }\n" " memw_locked(%1,P3) = R6;\n" " %0 = P3;\n" "1:\n" @@ -117,9 +117,9 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) __asm__ __volatile__( "1: R6 = memw_locked(%0);\n" " P3 = cmp.eq(R6,#0);\n" - " { if !P3 jump 1b; R6 = #1; }\n" + " { if (!P3) jump 1b; R6 = #1; }\n" " memw_locked(%0,P3) = R6;\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -139,7 +139,7 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) __asm__ __volatile__( " R6 = memw_locked(%1);\n" " P3 = cmp.eq(R6,#0);\n" - " { if !P3 jump 1f; R6 = #1; %0 = #0; }\n" + " { if (!P3) jump 1f; R6 = #1; %0 = #0; }\n" " memw_locked(%1,P3) = R6;\n" " %0 = P3;\n" "1:\n" diff --git a/arch/hexagon/include/asm/vmalloc.h b/arch/hexagon/include/asm/vmalloc.h new file mode 100644 index 000000000000..7b04609e525c --- /dev/null +++ b/arch/hexagon/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_HEXAGON_VMALLOC_H +#define _ASM_HEXAGON_VMALLOC_H + +#endif /* _ASM_HEXAGON_VMALLOC_H */ diff --git a/arch/hexagon/kernel/stacktrace.c b/arch/hexagon/kernel/stacktrace.c index 35f29423fda8..5ed02f699479 100644 --- a/arch/hexagon/kernel/stacktrace.c +++ b/arch/hexagon/kernel/stacktrace.c @@ -11,8 +11,6 @@ #include <linux/thread_info.h> #include <linux/module.h> -register unsigned long current_frame_pointer asm("r30"); - struct stackframe { unsigned long fp; unsigned long rets; @@ -30,7 +28,7 @@ void save_stack_trace(struct stack_trace *trace) low = (unsigned long)task_stack_page(current); high = low + THREAD_SIZE; - fp = current_frame_pointer; + fp = (unsigned long)__builtin_frame_address(0); while (fp >= low && fp <= (high - sizeof(*frame))) { frame = (struct stackframe *)fp; diff --git a/arch/hexagon/kernel/vm_entry.S b/arch/hexagon/kernel/vm_entry.S index 12242c27e2df..554371d92bed 100644 --- a/arch/hexagon/kernel/vm_entry.S +++ b/arch/hexagon/kernel/vm_entry.S @@ -265,12 +265,12 @@ event_dispatch: * should be in the designated register (usually R19) * * If we were in kernel mode, we don't need to check scheduler - * or signals if CONFIG_PREEMPT is not set. If set, then it has + * or signals if CONFIG_PREEMPTION is not set. If set, then it has * to jump to a need_resched kind of block. - * BTW, CONFIG_PREEMPT is not supported yet. + * BTW, CONFIG_PREEMPTION is not supported yet. */ -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION R0 = #VM_INT_DISABLE trap1(#HVM_TRAP1_VMSETIE) #endif @@ -369,7 +369,7 @@ ret_from_fork: R26.L = #LO(do_work_pending); R0 = #VM_INT_DISABLE; } - if P0 jump check_work_pending + if (P0) jump check_work_pending { R0 = R25; callr R24 diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h index f886d4dc9d55..b66ba907019c 100644 --- a/arch/ia64/include/asm/acpi.h +++ b/arch/ia64/include/asm/acpi.h @@ -38,7 +38,10 @@ int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); /* Low-level suspend routine. */ extern int acpi_suspend_lowlevel(void); -extern unsigned long acpi_wakeup_address; +static inline unsigned long acpi_get_wakeup_address(void) +{ + return 0; +} /* * Record the cpei override flag and current logical cpu. This is diff --git a/arch/ia64/include/asm/vga.h b/arch/ia64/include/asm/vga.h index 30cb373f3de8..64ce0b971a0a 100644 --- a/arch/ia64/include/asm/vga.h +++ b/arch/ia64/include/asm/vga.h @@ -18,7 +18,7 @@ extern unsigned long vga_console_iobase; extern unsigned long vga_console_membase; -#define VGA_MAP_MEM(x,s) ((unsigned long) ioremap_nocache(vga_console_membase + (x), s)) +#define VGA_MAP_MEM(x,s) ((unsigned long) ioremap(vga_console_membase + (x), s)) #define vga_readb(x) (*(x)) #define vga_writeb(x,y) (*(y) = (x)) diff --git a/arch/ia64/include/asm/vmalloc.h b/arch/ia64/include/asm/vmalloc.h new file mode 100644 index 000000000000..a2b51141ad28 --- /dev/null +++ b/arch/ia64/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_IA64_VMALLOC_H +#define _ASM_IA64_VMALLOC_H + +#endif /* _ASM_IA64_VMALLOC_H */ diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 70d1587ddcd4..a5636524af76 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -42,8 +42,6 @@ int acpi_lapic; unsigned int acpi_cpei_override; unsigned int acpi_cpei_phys_cpuid; -unsigned long acpi_wakeup_address = 0; - #define ACPI_MAX_PLATFORM_INTERRUPTS 256 /* Array to record platform interrupt vectors for generic interrupt routing. */ diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c index f80eb7fb544d..258d7b70c0f3 100644 --- a/arch/ia64/kernel/cyclone.c +++ b/arch/ia64/kernel/cyclone.c @@ -50,7 +50,7 @@ int __init init_cyclone_clock(void) /* find base address */ offset = (CYCLONE_CBAR_ADDR); - reg = ioremap_nocache(offset, sizeof(u64)); + reg = ioremap(offset, sizeof(u64)); if(!reg){ printk(KERN_ERR "Summit chipset: Could not find valid CBAR" " register.\n"); @@ -68,7 +68,7 @@ int __init init_cyclone_clock(void) /* setup PMCC */ offset = (base + CYCLONE_PMCC_OFFSET); - reg = ioremap_nocache(offset, sizeof(u64)); + reg = ioremap(offset, sizeof(u64)); if(!reg){ printk(KERN_ERR "Summit chipset: Could not find valid PMCC" " register.\n"); @@ -80,7 +80,7 @@ int __init init_cyclone_clock(void) /* setup MPCS */ offset = (base + CYCLONE_MPCS_OFFSET); - reg = ioremap_nocache(offset, sizeof(u64)); + reg = ioremap(offset, sizeof(u64)); if(!reg){ printk(KERN_ERR "Summit chipset: Could not find valid MPCS" " register.\n"); @@ -92,7 +92,7 @@ int __init init_cyclone_clock(void) /* map in cyclone_timer */ offset = (base + CYCLONE_MPMC_OFFSET); - cyclone_timer = ioremap_nocache(offset, sizeof(u32)); + cyclone_timer = ioremap(offset, sizeof(u32)); if(!cyclone_timer){ printk(KERN_ERR "Summit chipset: Could not find valid MPMC" " register.\n"); diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index a9992be5718b..2ac926331500 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -670,12 +670,12 @@ GLOBAL_ENTRY(ia64_leave_syscall) * * p6 controls whether current_thread_info()->flags needs to be check for * extra work. We always check for extra work when returning to user-level. - * With CONFIG_PREEMPT, we also check for extra work when the preempt_count + * With CONFIG_PREEMPTION, we also check for extra work when the preempt_count * is 0. After extra work processing has been completed, execution * resumes at ia64_work_processed_syscall with p6 set to 1 if the extra-work-check * needs to be redone. */ -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION RSM_PSR_I(p0, r2, r18) // disable interrupts cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 @@ -685,7 +685,7 @@ GLOBAL_ENTRY(ia64_leave_syscall) (pUStk) mov r21=0 // r21 <- 0 ;; cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) -#else /* !CONFIG_PREEMPT */ +#else /* !CONFIG_PREEMPTION */ RSM_PSR_I(pUStk, r2, r18) cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk @@ -814,12 +814,12 @@ GLOBAL_ENTRY(ia64_leave_kernel) * * p6 controls whether current_thread_info()->flags needs to be check for * extra work. We always check for extra work when returning to user-level. - * With CONFIG_PREEMPT, we also check for extra work when the preempt_count + * With CONFIG_PREEMPTION, we also check for extra work when the preempt_count * is 0. After extra work processing has been completed, execution * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check * needs to be redone. */ -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION RSM_PSR_I(p0, r17, r31) // disable interrupts cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 @@ -1120,7 +1120,7 @@ skip_rbs_switch: /* * On entry: - * r20 = ¤t->thread_info->pre_count (if CONFIG_PREEMPT) + * r20 = ¤t->thread_info->pre_count (if CONFIG_PREEMPTION) * r31 = current->thread_info->flags * On exit: * p6 = TRUE if work-pending-check needs to be redone diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index b8356edbde65..a6d6a0556f08 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -841,7 +841,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) return 1; } -#if !defined(CONFIG_PREEMPT) +#if !defined(CONFIG_PREEMPTION) if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) { /* Boost up -- we can execute copied instructions directly */ ia64_psr(regs)->ri = p->ainsn.slot; diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index b392c0a50346..a25ab9b37953 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -331,10 +331,10 @@ retry: return size; } -static const struct file_operations salinfo_event_fops = { - .open = salinfo_event_open, - .read = salinfo_event_read, - .llseek = noop_llseek, +static const struct proc_ops salinfo_event_proc_ops = { + .proc_open = salinfo_event_open, + .proc_read = salinfo_event_read, + .proc_lseek = noop_llseek, }; static int @@ -534,12 +534,12 @@ salinfo_log_write(struct file *file, const char __user *buffer, size_t count, lo return count; } -static const struct file_operations salinfo_data_fops = { - .open = salinfo_log_open, - .release = salinfo_log_release, - .read = salinfo_log_read, - .write = salinfo_log_write, - .llseek = default_llseek, +static const struct proc_ops salinfo_data_proc_ops = { + .proc_open = salinfo_log_open, + .proc_release = salinfo_log_release, + .proc_read = salinfo_log_read, + .proc_write = salinfo_log_write, + .proc_lseek = default_llseek, }; static int salinfo_cpu_online(unsigned int cpu) @@ -617,13 +617,13 @@ salinfo_init(void) continue; entry = proc_create_data("event", S_IRUSR, dir, - &salinfo_event_fops, data); + &salinfo_event_proc_ops, data); if (!entry) continue; *sdir++ = entry; entry = proc_create_data("data", S_IRUSR | S_IWUSR, dir, - &salinfo_data_fops, data); + &salinfo_data_proc_ops, data); if (!entry) continue; *sdir++ = entry; diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index c49fcef754de..4009383453f7 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -608,9 +608,6 @@ setup_arch (char **cmdline_p) #ifdef CONFIG_VT if (!conswitchp) { -# if defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; -# endif # if defined(CONFIG_VGA_CONSOLE) /* * Non-legacy systems may route legacy VGA MMIO range to system diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 36d5faf4c86c..042911e670b8 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -356,3 +356,5 @@ 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open # 435 reserved for clone3 +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 58fd67068bac..b01d68a2d5d9 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -689,9 +689,7 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 6663f1741798..6ad6cdac74b3 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -14,6 +14,7 @@ config M68K select HAVE_AOUT if MMU select HAVE_ASM_MODVERSIONS select HAVE_DEBUG_BUGVERBOSE + select HAVE_COPY_THREAD_TLS select GENERIC_IRQ_SHOW select GENERIC_ATOMIC64 select HAVE_UID16 diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 619d30d663a2..e1134c3e0b69 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -562,6 +562,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -574,7 +575,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -612,6 +613,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -620,6 +624,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -651,4 +656,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index caa0558abcdb..484cb1643df1 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -518,6 +518,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -530,7 +531,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -568,6 +569,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -576,6 +580,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -607,4 +612,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 2551c7e9ac54..eb6a46b6d135 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -540,6 +540,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -552,7 +553,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -590,6 +591,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -598,6 +602,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -629,4 +634,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 4ffc1e5646d5..bee9263a409c 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -511,6 +511,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -523,7 +524,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -561,6 +562,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -569,6 +573,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -600,4 +605,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 806da3d97ca4..c8847a8bcbd6 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -520,6 +520,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -532,7 +533,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -570,6 +571,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -578,6 +582,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -609,4 +614,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 250da20e291c..303ffafd9cad 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -542,6 +542,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -554,7 +555,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -592,6 +593,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -600,6 +604,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -631,4 +636,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index b764a0368a56..89a704226cd9 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -628,6 +628,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -640,7 +641,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -678,6 +679,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -686,6 +690,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -717,4 +722,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 7800d3a8d46e..f62c1f4d03a0 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -510,6 +510,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -522,7 +523,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -560,6 +561,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -568,6 +572,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -599,4 +604,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index c32dc2d2058d..58dcad26a751 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -511,6 +511,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -523,7 +524,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -561,6 +562,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -569,6 +573,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -600,4 +605,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index bf0a65ce57e0..5d3c28d1d545 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -529,6 +529,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -541,7 +542,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -579,6 +580,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -587,6 +591,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -618,4 +623,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 5f3cfa2926d2..5ef9e17dcd51 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -513,6 +513,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -525,7 +526,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -563,6 +564,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 58354d2018d5..22e1accc60a3 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -512,6 +512,7 @@ CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CFB=m @@ -524,7 +525,7 @@ CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -562,6 +563,9 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m @@ -570,6 +574,7 @@ CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m @@ -601,4 +606,3 @@ CONFIG_TEST_KMOD=m CONFIG_TEST_MEMCAT_P=m CONFIG_TEST_STACKINIT=m CONFIG_TEST_MEMINIT=m -CONFIG_EARLY_PRINTK=y diff --git a/arch/m68k/emu/nfeth.c b/arch/m68k/emu/nfeth.c index a4ebd2445eda..d2875e32abfc 100644 --- a/arch/m68k/emu/nfeth.c +++ b/arch/m68k/emu/nfeth.c @@ -167,7 +167,7 @@ static int nfeth_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } -static void nfeth_tx_timeout(struct net_device *dev) +static void nfeth_tx_timeout(struct net_device *dev, unsigned int txqueue) { dev->stats.tx_errors++; netif_wake_queue(dev); diff --git a/arch/m68k/include/asm/kmap.h b/arch/m68k/include/asm/kmap.h index 559cb91bede1..dec05743d426 100644 --- a/arch/m68k/include/asm/kmap.h +++ b/arch/m68k/include/asm/kmap.h @@ -27,7 +27,6 @@ static inline void __iomem *ioremap(unsigned long physaddr, unsigned long size) return __ioremap(physaddr, size, IOMAP_NOCACHE_SER); } -#define ioremap_nocache ioremap #define ioremap_uc ioremap #define ioremap_wt ioremap_wt static inline void __iomem *ioremap_wt(unsigned long physaddr, diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index 2e0047cf86f8..4ae52414cd9d 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -30,5 +30,6 @@ #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK +#define __ARCH_WANT_SYS_CLONE3 #endif /* _ASM_M68K_UNISTD_H_ */ diff --git a/arch/m68k/include/asm/vmalloc.h b/arch/m68k/include/asm/vmalloc.h new file mode 100644 index 000000000000..bc1dca6cf134 --- /dev/null +++ b/arch/m68k/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_M68K_VMALLOC_H +#define _ASM_M68K_VMALLOC_H + +#endif /* _ASM_M68K_VMALLOC_H */ diff --git a/arch/m68k/kernel/bootinfo_proc.c b/arch/m68k/kernel/bootinfo_proc.c index 3b9cab84917d..857fa2aaa748 100644 --- a/arch/m68k/kernel/bootinfo_proc.c +++ b/arch/m68k/kernel/bootinfo_proc.c @@ -26,9 +26,9 @@ static ssize_t bootinfo_read(struct file *file, char __user *buf, bootinfo_size); } -static const struct file_operations bootinfo_fops = { - .read = bootinfo_read, - .llseek = default_llseek, +static const struct proc_ops bootinfo_proc_ops = { + .proc_read = bootinfo_read, + .proc_lseek = default_llseek, }; void __init save_bootinfo(const struct bi_record *bi) @@ -67,7 +67,7 @@ static int __init init_bootinfo_procfs(void) if (!bootinfo_copy) return -ENOMEM; - pde = proc_create_data("bootinfo", 0400, NULL, &bootinfo_fops, NULL); + pde = proc_create_data("bootinfo", 0400, NULL, &bootinfo_proc_ops, NULL); if (!pde) { kfree(bootinfo_copy); return -ENOMEM; diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S index 97cd3ea5f10b..9dd76fbb7c6b 100644 --- a/arch/m68k/kernel/entry.S +++ b/arch/m68k/kernel/entry.S @@ -69,6 +69,13 @@ ENTRY(__sys_vfork) lea %sp@(24),%sp rts +ENTRY(__sys_clone3) + SAVE_SWITCH_STACK + pea %sp@(SWITCH_STACK_SIZE) + jbsr m68k_clone3 + lea %sp@(28),%sp + rts + ENTRY(sys_sigreturn) SAVE_SWITCH_STACK movel %sp,%sp@- | switch_stack pointer diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 4e77a06735c1..8f0d9140700f 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -30,8 +30,9 @@ #include <linux/init_task.h> #include <linux/mqueue.h> #include <linux/rcupdate.h> - +#include <linux/syscalls.h> #include <linux/uaccess.h> + #include <asm/traps.h> #include <asm/machdep.h> #include <asm/setup.h> @@ -107,20 +108,43 @@ void flush_thread(void) * on top of pt_regs, which means that sys_clone() arguments would be * buried. We could, of course, copy them, but it's too costly for no * good reason - generic clone() would have to copy them *again* for - * do_fork() anyway. So in this case it's actually better to pass pt_regs * - * and extract arguments for do_fork() from there. Eventually we might - * go for calling do_fork() directly from the wrapper, but only after we - * are finished with do_fork() prototype conversion. + * _do_fork() anyway. So in this case it's actually better to pass pt_regs * + * and extract arguments for _do_fork() from there. Eventually we might + * go for calling _do_fork() directly from the wrapper, but only after we + * are finished with _do_fork() prototype conversion. */ asmlinkage int m68k_clone(struct pt_regs *regs) { /* regs will be equal to current_pt_regs() */ - return do_fork(regs->d1, regs->d2, 0, - (int __user *)regs->d3, (int __user *)regs->d4); + struct kernel_clone_args args = { + .flags = regs->d1 & ~CSIGNAL, + .pidfd = (int __user *)regs->d3, + .child_tid = (int __user *)regs->d4, + .parent_tid = (int __user *)regs->d3, + .exit_signal = regs->d1 & CSIGNAL, + .stack = regs->d2, + .tls = regs->d5, + }; + + if (!legacy_clone_args_valid(&args)) + return -EINVAL; + + return _do_fork(&args); +} + +/* + * Because extra registers are saved on the stack after the sys_clone3() + * arguments, this C wrapper extracts them from pt_regs * and then calls the + * generic sys_clone3() implementation. + */ +asmlinkage int m68k_clone3(struct pt_regs *regs) +{ + return sys_clone3((struct clone_args __user *)regs->d1, regs->d2); } -int copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long arg, struct task_struct *p) +int copy_thread_tls(unsigned long clone_flags, unsigned long usp, + unsigned long arg, struct task_struct *p, + unsigned long tls) { struct fork_frame { struct switch_stack sw; @@ -155,7 +179,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.usp = usp ?: rdusp(); if (clone_flags & CLONE_SETTLS) - task_thread_info(p)->tp_value = frame->regs.d5; + task_thread_info(p)->tp_value = tls; #ifdef CONFIG_FPU if (!FPU_IS_EMU) { diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index 528484feff80..ab8aa7be260f 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -274,10 +274,6 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - switch (m68k_machtype) { #ifdef CONFIG_AMIGA case MACH_AMIGA: diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c index 3c5def10d486..a63483de7a42 100644 --- a/arch/m68k/kernel/setup_no.c +++ b/arch/m68k/kernel/setup_no.c @@ -146,10 +146,6 @@ void __init setup_arch(char **cmdline_p) memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); boot_command_line[COMMAND_LINE_SIZE-1] = 0; -#if defined(CONFIG_FRAMEBUFFER_CONSOLE) && defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; -#endif - /* * Give all the memory to the bootmap allocator, tell it to put the * boot mem_map at the start of memory. diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index a88a285a0e5f..f4f49fcb76d0 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -434,4 +434,6 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open -# 435 reserved for clone3 +435 common clone3 __sys_clone3 +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd diff --git a/arch/m68k/sun3x/config.c b/arch/m68k/sun3x/config.c index 03ce7f9facfe..d806dee71a9c 100644 --- a/arch/m68k/sun3x/config.c +++ b/arch/m68k/sun3x/config.c @@ -70,7 +70,6 @@ void __init config_sun3x(void) break; default: serial_console = 0; - conswitchp = &dummy_con; break; } #endif diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index e6289294e7fc..6a331bd57ea8 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -11,7 +11,7 @@ config MICROBLAZE select ARCH_HAS_UNCACHED_SEGMENT if !MMU select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_WANT_IPC_PARSE_VERSION - select BUILDTIME_EXTABLE_SORT + select BUILDTIME_TABLE_SORT select TIMER_OF select CLONE_BACKWARDS3 select COMMON_CLK diff --git a/arch/microblaze/include/asm/vmalloc.h b/arch/microblaze/include/asm/vmalloc.h new file mode 100644 index 000000000000..04013a42b0fe --- /dev/null +++ b/arch/microblaze/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_MICROBLAZE_VMALLOC_H +#define _ASM_MICROBLAZE_VMALLOC_H + +#endif /* _ASM_MICROBLAZE_VMALLOC_H */ diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S index de7083bd1d24..f6ded356394a 100644 --- a/arch/microblaze/kernel/entry.S +++ b/arch/microblaze/kernel/entry.S @@ -728,7 +728,7 @@ no_intr_resched: bri 6f; /* MS: Return to kernel state. */ 2: -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION lwi r11, CURRENT_TASK, TS_THREAD_INFO; /* MS: get preempt_count from thread info */ lwi r5, r11, TI_PREEMPT_COUNT; diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c index 522a0c5d9c59..511c1ab7f57f 100644 --- a/arch/microblaze/kernel/setup.c +++ b/arch/microblaze/kernel/setup.c @@ -65,10 +65,6 @@ void __init setup_arch(char **cmdline_p) microblaze_cache_init(); xilinx_pci_init(); - -#if defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; -#endif } #ifdef CONFIG_MTD_UCLINUX diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 09b0cd7dab0a..4c67b11f9c9e 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -441,3 +441,5 @@ 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open 435 common clone3 sys_clone3 +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index add388236f4e..797d7f1ad5fe 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -5,9 +5,11 @@ config MIPS select ARCH_32BIT_OFF_T if !64BIT select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT select ARCH_CLOCKSOURCE_DATA + select ARCH_HAS_FORTIFY_SOURCE + select ARCH_HAS_KCOV + select ARCH_HAS_PTE_SPECIAL if !(32BIT && CPU_HAS_RIXI) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UBSAN_SANITIZE_ALL - select ARCH_HAS_FORTIFY_SOURCE select ARCH_SUPPORTS_UPROBES select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if 64BIT @@ -15,7 +17,7 @@ config MIPS select ARCH_USE_QUEUED_SPINLOCKS select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_IPC_PARSE_VERSION - select BUILDTIME_EXTABLE_SORT + select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS select CPU_NO_EFFICIENT_FFS if (TARGET_ISA_REV < 1) select CPU_PM if CPU_IDLE @@ -47,7 +49,7 @@ config MIPS select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES select HAVE_ASM_MODVERSIONS - select HAVE_EBPF_JIT if (!CPU_MICROMIPS) + select HAVE_CBPF_JIT if !64BIT && !CPU_MICROMIPS select HAVE_CONTEXT_TRACKING select HAVE_COPY_THREAD_TLS select HAVE_C_RECORDMCOUNT @@ -55,11 +57,14 @@ config MIPS select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE + select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2 select HAVE_EXIT_THREAD select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER + select HAVE_GCC_PLUGINS + select HAVE_GENERIC_VDSO select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK @@ -78,18 +83,14 @@ config MIPS select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP - select HAVE_GENERIC_VDSO select IRQ_FORCED_THREADING select ISA if EISA - select MODULES_USE_ELF_RELA if MODULES && 64BIT select MODULES_USE_ELF_REL if MODULES + select MODULES_USE_ELF_RELA if MODULES && 64BIT select PERF_USE_VMALLOC select RTC_LIB select SYSCTL_EXCEPTION_TRACE select VIRT_TO_BUS - select ARCH_HAS_PTE_SPECIAL if !(32BIT && CPU_HAS_RIXI) - select ARCH_HAS_KCOV - select HAVE_GCC_PLUGINS menu "Machine selection" @@ -104,20 +105,18 @@ config MIPS_GENERIC select CEVT_R4K select CLKSRC_MIPS_GIC select COMMON_CLK - select CPU_MIPSR2_IRQ_VI select CPU_MIPSR2_IRQ_EI + select CPU_MIPSR2_IRQ_VI select CSRC_R4K select DMA_PERDEV_COHERENT select HAVE_PCI select IRQ_MIPS_CPU - select LIBFDT select MIPS_AUTO_PFN_OFFSET select MIPS_CPU_SCACHE select MIPS_GIC select MIPS_L1_CACHE_SHIFT_7 select NO_EXCEPT_FILL select PCI_DRIVERS_GENERIC - select PINCTRL select SMP_UP if SMP select SWAP_IO_SPACE select SYS_HAS_CPU_MIPS32_R1 @@ -132,11 +131,12 @@ config MIPS_GENERIC select SYS_SUPPORTS_HIGHMEM select SYS_SUPPORTS_LITTLE_ENDIAN select SYS_SUPPORTS_MICROMIPS - select SYS_SUPPORTS_MIPS_CPS select SYS_SUPPORTS_MIPS16 + select SYS_SUPPORTS_MIPS_CPS select SYS_SUPPORTS_MULTITHREADING select SYS_SUPPORTS_RELOCATABLE select SYS_SUPPORTS_SMARTMIPS + select UHI_BOOT select USB_EHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN select USB_OHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN @@ -144,7 +144,6 @@ config MIPS_GENERIC select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN select USE_OF - select UHI_BOOT help Select this to build a kernel which aims to support multiple boards, generally using a flattened device tree passed from the bootloader @@ -403,7 +402,6 @@ config MACH_INGENIC select GENERIC_IRQ_CHIP select BUILTIN_DTB if MIPS_NO_APPENDED_DTB select USE_OF - select LIBFDT config LANTIQ bool "Lantiq based platforms" @@ -510,7 +508,6 @@ config MACH_PISTACHIO select DMA_NONCOHERENT select GPIOLIB select IRQ_MIPS_CPU - select LIBFDT select MFD_SYSCON select MIPS_CPU_SCACHE select MIPS_GIC @@ -548,7 +545,6 @@ config MIPS_MALTA select I8253 select I8259 select IRQ_MIPS_CPU - select LIBFDT select MIPS_BONITO64 select MIPS_CPU_SCACHE select MIPS_GIC @@ -980,7 +976,6 @@ config CAVIUM_OCTEON_SOC select ZONE_DMA32 select HOLES_IN_ZONE select GPIOLIB - select LIBFDT select USE_OF select ARCH_SPARSEMEM_ENABLE select SYS_SUPPORTS_SMP @@ -1223,8 +1218,7 @@ config NO_IOPORT_MAP def_bool n config GENERIC_CSUM - bool - default y if !CPU_HAS_LOAD_STORE_LR + def_bool CPU_NO_LOAD_STORE_LR config GENERIC_ISA_DMA bool @@ -1442,11 +1436,14 @@ config CPU_LOONGSON64 bool "Loongson 64-bit CPU" depends on SYS_HAS_CPU_LOONGSON64 select ARCH_HAS_PHYS_TO_DMA + select CPU_MIPSR2 + select CPU_HAS_PREFETCH select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_HUGEPAGES select CPU_SUPPORTS_MSA - select CPU_HAS_LOAD_STORE_LR + select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT + select CPU_MIPSR2_IRQ_VI select WEAK_ORDERING select WEAK_REORDERING_BEYOND_LLSC select MIPS_ASID_BITS_VARIABLE @@ -1464,8 +1461,6 @@ config CPU_LOONGSON64 config LOONGSON3_ENHANCEMENT bool "New Loongson-3 CPU Enhancements" default n - select CPU_MIPSR2 - select CPU_HAS_PREFETCH depends on CPU_LOONGSON64 help New Loongson-3 cores (since Loongson-3A R2, as opposed to Loongson-3A @@ -1542,7 +1537,6 @@ config CPU_MIPS32_R1 bool "MIPS32 Release 1" depends on SYS_HAS_CPU_MIPS32_R1 select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_HIGHMEM help @@ -1560,7 +1554,6 @@ config CPU_MIPS32_R2 bool "MIPS32 Release 2" depends on SYS_HAS_CPU_MIPS32_R2 select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_MSA @@ -1576,6 +1569,7 @@ config CPU_MIPS32_R6 bool "MIPS32 Release 6" depends on SYS_HAS_CPU_MIPS32_R6 select CPU_HAS_PREFETCH + select CPU_NO_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_MSA @@ -1591,7 +1585,6 @@ config CPU_MIPS64_R1 bool "MIPS64 Release 1" depends on SYS_HAS_CPU_MIPS64_R1 select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1611,7 +1604,6 @@ config CPU_MIPS64_R2 bool "MIPS64 Release 2" depends on SYS_HAS_CPU_MIPS64_R2 select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1629,6 +1621,7 @@ config CPU_MIPS64_R6 bool "MIPS64 Release 6" depends on SYS_HAS_CPU_MIPS64_R6 select CPU_HAS_PREFETCH + select CPU_NO_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1646,7 +1639,6 @@ config CPU_R3000 bool "R3000" depends on SYS_HAS_CPU_R3000 select CPU_HAS_WB - select CPU_HAS_LOAD_STORE_LR select CPU_R3K_TLB select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1662,7 +1654,6 @@ config CPU_TX39XX bool "R39XX" depends on SYS_HAS_CPU_TX39XX select CPU_SUPPORTS_32BIT_KERNEL - select CPU_HAS_LOAD_STORE_LR select CPU_R3K_TLB config CPU_VR41XX @@ -1670,7 +1661,6 @@ config CPU_VR41XX depends on SYS_HAS_CPU_VR41XX select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL - select CPU_HAS_LOAD_STORE_LR help The options selects support for the NEC VR4100 series of processors. Only choose this option if you have one of these processors as a @@ -1683,7 +1673,6 @@ config CPU_R4X00 select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HUGEPAGES - select CPU_HAS_LOAD_STORE_LR help MIPS Technologies R4000-series processors other than 4300, including the R4000, R4400, R4600, and 4700. @@ -1692,7 +1681,6 @@ config CPU_TX49XX bool "R49XX" depends on SYS_HAS_CPU_TX49XX select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HUGEPAGES @@ -1703,7 +1691,6 @@ config CPU_R5000 select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HUGEPAGES - select CPU_HAS_LOAD_STORE_LR help MIPS Technologies R5000-series processors other than the Nevada. @@ -1713,7 +1700,6 @@ config CPU_R5500 select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HUGEPAGES - select CPU_HAS_LOAD_STORE_LR help NEC VR5500 and VR5500A series processors implement 64-bit MIPS IV instruction set. @@ -1724,7 +1710,6 @@ config CPU_NEVADA select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HUGEPAGES - select CPU_HAS_LOAD_STORE_LR help QED / PMC-Sierra RM52xx-series ("Nevada") processors. @@ -1732,7 +1717,6 @@ config CPU_R10000 bool "R10000" depends on SYS_HAS_CPU_R10000 select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1744,7 +1728,6 @@ config CPU_RM7000 bool "RM7000" depends on SYS_HAS_CPU_RM7000 select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1753,7 +1736,6 @@ config CPU_RM7000 config CPU_SB1 bool "SB1" depends on SYS_HAS_CPU_SB1 - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1764,7 +1746,6 @@ config CPU_CAVIUM_OCTEON bool "Cavium Octeon processor" depends on SYS_HAS_CPU_CAVIUM_OCTEON select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_64BIT_KERNEL select WEAK_ORDERING select CPU_SUPPORTS_HIGHMEM @@ -1794,7 +1775,6 @@ config CPU_BMIPS select WEAK_ORDERING select CPU_SUPPORTS_HIGHMEM select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_CPUFREQ select MIPS_EXTERNAL_TIMER help @@ -1803,7 +1783,6 @@ config CPU_BMIPS config CPU_XLR bool "Netlogic XLR SoC" depends on SYS_HAS_CPU_XLR - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1822,7 +1801,6 @@ config CPU_XLP select WEAK_ORDERING select WEAK_REORDERING_BEYOND_LLSC select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_MIPSR2 select CPU_SUPPORTS_HUGEPAGES select MIPS_ASID_BITS_VARIABLE @@ -1928,14 +1906,12 @@ config CPU_LOONGSON2EF select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_HUGEPAGES select ARCH_HAS_PHYS_TO_DMA - select CPU_HAS_LOAD_STORE_LR config CPU_LOONGSON32 bool select CPU_MIPS32 select CPU_MIPSR2 select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_CPUFREQ @@ -2110,12 +2086,14 @@ config CPU_MIPSR2 bool default y if CPU_MIPS32_R2 || CPU_MIPS64_R2 || CPU_CAVIUM_OCTEON select CPU_HAS_RIXI + select CPU_HAS_DIEI if !CPU_DIEI_BROKEN select MIPS_SPRAM config CPU_MIPSR6 bool default y if CPU_MIPS32_R6 || CPU_MIPS64_R6 select CPU_HAS_RIXI + select CPU_HAS_DIEI if !CPU_DIEI_BROKEN select HAVE_ARCH_BITREVERSE select MIPS_ASID_BITS_VARIABLE select MIPS_CRC_SUPPORT @@ -2575,15 +2553,23 @@ config CPU_HAS_WB config XKS01 bool +config CPU_HAS_DIEI + depends on !CPU_DIEI_BROKEN + bool + +config CPU_DIEI_BROKEN + bool + config CPU_HAS_RIXI bool -config CPU_HAS_LOAD_STORE_LR +config CPU_NO_LOAD_STORE_LR bool help - CPU has support for unaligned load and store instructions: + CPU lacks support for unaligned load and store instructions: LWL, LWR, SWL, SWR (Load/store word left/right). - LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit systems). + LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit + systems). # # Vectored interrupt mode is an R2 feature @@ -2696,6 +2682,14 @@ config NUMA config SYS_SUPPORTS_NUMA bool +config HAVE_SETUP_PER_CPU_AREA + def_bool y + depends on NUMA + +config NEED_PER_CPU_EMBED_FIRST_CHUNK + def_bool y + depends on NUMA + config RELOCATABLE bool "Relocatable kernel" depends on SYS_SUPPORTS_RELOCATABLE && (CPU_MIPS32_R2 || CPU_MIPS64_R2 || CPU_MIPS32_R6 || CPU_MIPS64_R6 || CAVIUM_OCTEON_SOC) diff --git a/arch/mips/Makefile.postlink b/arch/mips/Makefile.postlink index f03fdc95143e..4b1d3ba3a8a2 100644 --- a/arch/mips/Makefile.postlink +++ b/arch/mips/Makefile.postlink @@ -17,7 +17,7 @@ quiet_cmd_ls3_llsc = LLSCCHK $@ cmd_ls3_llsc = $(CMD_LS3_LLSC) $@ CMD_RELOCS = arch/mips/boot/tools/relocs -quiet_cmd_relocs = RELOCS $@ +quiet_cmd_relocs = RELOCS $@ cmd_relocs = $(CMD_RELOCS) $@ # `@true` prevents complaint when there is nothing to be done diff --git a/arch/mips/ar7/clock.c b/arch/mips/ar7/clock.c index 7de162432d7f..95def949c971 100644 --- a/arch/mips/ar7/clock.c +++ b/arch/mips/ar7/clock.c @@ -236,9 +236,9 @@ static void tnetd7300_set_clock(u32 shift, struct tnetd7300_clock *clock, static void __init tnetd7300_init_clocks(void) { - u32 *bootcr = (u32 *)ioremap_nocache(AR7_REGS_DCL, 4); + u32 *bootcr = (u32 *)ioremap(AR7_REGS_DCL, 4); struct tnetd7300_clocks *clocks = - ioremap_nocache(UR8_REGS_CLOCKS, + ioremap(UR8_REGS_CLOCKS, sizeof(struct tnetd7300_clocks)); bus_clk.rate = tnetd7300_get_clock(BUS_PLL_SOURCE_SHIFT, @@ -320,9 +320,9 @@ static int tnetd7200_get_clock_base(int clock_id, u32 *bootcr) static void __init tnetd7200_init_clocks(void) { - u32 *bootcr = (u32 *)ioremap_nocache(AR7_REGS_DCL, 4); + u32 *bootcr = (u32 *)ioremap(AR7_REGS_DCL, 4); struct tnetd7200_clocks *clocks = - ioremap_nocache(AR7_REGS_CLOCKS, + ioremap(AR7_REGS_CLOCKS, sizeof(struct tnetd7200_clocks)); int cpu_base, cpu_mul, cpu_prediv, cpu_postdiv; int dsp_base, dsp_mul, dsp_prediv, dsp_postdiv; diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c index 2292e55c12e2..8b006addd6ba 100644 --- a/arch/mips/ar7/gpio.c +++ b/arch/mips/ar7/gpio.c @@ -308,7 +308,7 @@ int __init ar7_gpio_init(void) size = 0x1f; } - gpch->regs = ioremap_nocache(AR7_REGS_GPIO, size); + gpch->regs = ioremap(AR7_REGS_GPIO, size); if (!gpch->regs) { printk(KERN_ERR "%s: failed to ioremap regs\n", gpch->chip.label); diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c index 1f2028266493..215149a85d83 100644 --- a/arch/mips/ar7/platform.c +++ b/arch/mips/ar7/platform.c @@ -702,7 +702,7 @@ static int __init ar7_register_devices(void) pr_warn("unable to register usb slave: %d\n", res); /* Register watchdog only if enabled in hardware */ - bootcr = ioremap_nocache(AR7_REGS_DCL, 4); + bootcr = ioremap(AR7_REGS_DCL, 4); val = readl(bootcr); iounmap(bootcr); if (val & AR7_WDT_HW_ENA) { diff --git a/arch/mips/ath25/ar2315.c b/arch/mips/ath25/ar2315.c index 8da996142d6a..24f619199ee7 100644 --- a/arch/mips/ath25/ar2315.c +++ b/arch/mips/ath25/ar2315.c @@ -262,7 +262,7 @@ void __init ar2315_plat_mem_setup(void) u32 config; /* Detect memory size */ - sdram_base = ioremap_nocache(AR2315_SDRAMCTL_BASE, + sdram_base = ioremap(AR2315_SDRAMCTL_BASE, AR2315_SDRAMCTL_SIZE); memcfg = __raw_readl(sdram_base + AR2315_MEM_CFG); memsize = 1 + ATH25_REG_MS(memcfg, AR2315_MEM_CFG_DATA_WIDTH); @@ -272,7 +272,7 @@ void __init ar2315_plat_mem_setup(void) add_memory_region(0, memsize, BOOT_MEM_RAM); iounmap(sdram_base); - ar2315_rst_base = ioremap_nocache(AR2315_RST_BASE, AR2315_RST_SIZE); + ar2315_rst_base = ioremap(AR2315_RST_BASE, AR2315_RST_SIZE); /* Detect the hardware based on the device ID */ devid = ar2315_rst_reg_read(AR2315_SREV) & AR2315_REV_CHIP; diff --git a/arch/mips/ath25/ar5312.c b/arch/mips/ath25/ar5312.c index acd55a9cffe3..47f3e98974fc 100644 --- a/arch/mips/ath25/ar5312.c +++ b/arch/mips/ath25/ar5312.c @@ -185,7 +185,7 @@ static void __init ar5312_flash_init(void) void __iomem *flashctl_base; u32 ctl; - flashctl_base = ioremap_nocache(AR5312_FLASHCTL_BASE, + flashctl_base = ioremap(AR5312_FLASHCTL_BASE, AR5312_FLASHCTL_SIZE); ctl = __raw_readl(flashctl_base + AR5312_FLASHCTL0); @@ -358,7 +358,7 @@ void __init ar5312_plat_mem_setup(void) u32 devid; /* Detect memory size */ - sdram_base = ioremap_nocache(AR5312_SDRAMCTL_BASE, + sdram_base = ioremap(AR5312_SDRAMCTL_BASE, AR5312_SDRAMCTL_SIZE); memcfg = __raw_readl(sdram_base + AR5312_MEM_CFG1); bank0_ac = ATH25_REG_MS(memcfg, AR5312_MEM_CFG1_AC0); @@ -369,7 +369,7 @@ void __init ar5312_plat_mem_setup(void) add_memory_region(0, memsize, BOOT_MEM_RAM); iounmap(sdram_base); - ar5312_rst_base = ioremap_nocache(AR5312_RST_BASE, AR5312_RST_SIZE); + ar5312_rst_base = ioremap(AR5312_RST_BASE, AR5312_RST_SIZE); devid = ar5312_rst_reg_read(AR5312_REV); devid >>= AR5312_REV_WMAC_MIN_S; diff --git a/arch/mips/ath25/board.c b/arch/mips/ath25/board.c index 989e71015ee6..cb99f9739910 100644 --- a/arch/mips/ath25/board.c +++ b/arch/mips/ath25/board.c @@ -111,7 +111,7 @@ int __init ath25_find_config(phys_addr_t base, unsigned long size) u8 *mac_addr; u32 offset; - flash_base = ioremap_nocache(base, size); + flash_base = ioremap(base, size); flash_limit = flash_base + size; ath25_board.config = NULL; diff --git a/arch/mips/ath79/common.c b/arch/mips/ath79/common.c index 63eacb8b0eb5..137abbc65c60 100644 --- a/arch/mips/ath79/common.c +++ b/arch/mips/ath79/common.c @@ -41,7 +41,7 @@ static void __iomem *ath79_ddr_pci_win_base; void ath79_ddr_ctrl_init(void) { - ath79_ddr_base = ioremap_nocache(AR71XX_DDR_CTRL_BASE, + ath79_ddr_base = ioremap(AR71XX_DDR_CTRL_BASE, AR71XX_DDR_CTRL_SIZE); if (soc_is_ar913x() || soc_is_ar724x() || soc_is_ar933x()) { ath79_ddr_wb_flush_base = ath79_ddr_base + 0x7c; diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index ea385a865781..484ee28922a9 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -226,9 +226,9 @@ void __init plat_mem_setup(void) else if (fw_passed_dtb) __dt_setup_arch((void *)KSEG0ADDR(fw_passed_dtb)); - ath79_reset_base = ioremap_nocache(AR71XX_RESET_BASE, + ath79_reset_base = ioremap(AR71XX_RESET_BASE, AR71XX_RESET_SIZE); - ath79_pll_base = ioremap_nocache(AR71XX_PLL_BASE, + ath79_pll_base = ioremap(AR71XX_PLL_BASE, AR71XX_PLL_SIZE); ath79_detect_sys_type(); ath79_ddr_ctrl_init(); diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile index 528bd73d530a..4ed45ade32a1 100644 --- a/arch/mips/boot/Makefile +++ b/arch/mips/boot/Makefile @@ -123,7 +123,7 @@ $(obj)/vmlinux.its.S: $(addprefix $(srctree)/arch/mips/$(PLATFORM)/,$(ITS_INPUTS targets += vmlinux.its targets += vmlinux.gz.its targets += vmlinux.bz2.its -targets += vmlinux.lzmo.its +targets += vmlinux.lzma.its targets += vmlinux.lzo.its quiet_cmd_cpp_its_S = ITS $@ diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 172801ed35b8..d859f079b771 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -29,6 +29,9 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \ -DKERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS) +# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. +KCOV_INSTRUMENT := n + # decompressor objects (linked with vmlinuz) vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o diff --git a/arch/mips/boot/dts/ingenic/Makefile b/arch/mips/boot/dts/ingenic/Makefile index 9cc48441eb71..e1654291a7b0 100644 --- a/arch/mips/boot/dts/ingenic/Makefile +++ b/arch/mips/boot/dts/ingenic/Makefile @@ -2,5 +2,6 @@ dtb-$(CONFIG_JZ4740_QI_LB60) += qi_lb60.dtb dtb-$(CONFIG_JZ4770_GCW0) += gcw0.dtb dtb-$(CONFIG_JZ4780_CI20) += ci20.dtb +dtb-$(CONFIG_X1000_CU1000_NEO) += cu1000-neo.dtb obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y)) diff --git a/arch/mips/boot/dts/ingenic/cu1000-neo.dts b/arch/mips/boot/dts/ingenic/cu1000-neo.dts new file mode 100644 index 000000000000..03abd94acd84 --- /dev/null +++ b/arch/mips/boot/dts/ingenic/cu1000-neo.dts @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 +/dts-v1/; + +#include "x1000.dtsi" +#include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/clock/ingenic,tcu.h> +#include <dt-bindings/interrupt-controller/irq.h> + +/ { + compatible = "yna,cu1000-neo", "ingenic,x1000"; + model = "YSH & ATIL General Board CU Neo"; + + aliases { + serial2 = &uart2; + }; + + chosen { + stdout-path = "serial2:115200n8"; + }; + + memory { + device_type = "memory"; + reg = <0x0 0x04000000>; + }; + + wlan_pwrseq: msc1-pwrseq { + compatible = "mmc-pwrseq-simple"; + + clocks = <&lpoclk>; + clock-names = "ext_clock"; + + reset-gpios = <&gpc 17 GPIO_ACTIVE_LOW>; + post-power-on-delay-ms = <200>; + + lpoclk: ap6212a { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + }; + }; +}; + +&exclk { + clock-frequency = <24000000>; +}; + +&tcu { + /* 1500 kHz for the system timer and clocksource */ + assigned-clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER2>; + assigned-clock-rates = <1500000>, <1500000>; + + /* Use channel #0 for the system timer channel #2 for the clocksource */ + ingenic,pwm-channels-mask = <0xfa>; +}; + +&i2c0 { + status = "okay"; + + clock-frequency = <400000>; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_i2c0>; + + ads7830@48 { + compatible = "ti,ads7830"; + reg = <0x48>; + }; +}; + +&uart2 { + pinctrl-names = "default"; + pinctrl-0 = <&pins_uart2>; + + status = "okay"; +}; + +&mac { + phy-mode = "rmii"; + phy-handle = <&lan8720a>; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_mac>; + + snps,reset-gpio = <&gpc 23 GPIO_ACTIVE_LOW>; /* PC23 */ + snps,reset-active-low; + snps,reset-delays-us = <0 10000 30000>; + + status = "okay"; +}; + +&mdio { + status = "okay"; + + lan8720a: ethernet-phy@0 { + compatible = "ethernet-phy-id0007.c0f0", "ethernet-phy-ieee802.3-c22"; + reg = <0>; + }; +}; + +&msc0 { + bus-width = <8>; + max-frequency = <50000000>; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_msc0>; + + non-removable; + + status = "okay"; +}; + +&msc1 { + bus-width = <4>; + max-frequency = <50000000>; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_msc1>; + + #address-cells = <1>; + #size-cells = <0>; + + non-removable; + + mmc-pwrseq = <&wlan_pwrseq>; + + status = "okay"; + + ap6212a: wifi@1 { + compatible = "brcm,bcm4329-fmac"; + reg = <1>; + + interrupt-parent = <&gpc>; + interrupts = <16 IRQ_TYPE_EDGE_FALLING>; + interrupt-names = "host-wake"; + + brcm,drive-strength = <10>; + }; +}; + +&pinctrl { + pins_i2c0: i2c0 { + function = "i2c0"; + groups = "i2c0-data"; + bias-disable; + }; + + pins_uart2: uart2 { + function = "uart2"; + groups = "uart2-data-d"; + bias-disable; + }; + + pins_mac: mac { + function = "mac"; + groups = "mac"; + bias-disable; + }; + + pins_msc0: msc0 { + function = "mmc0"; + groups = "mmc0-1bit", "mmc0-4bit", "mmc0-8bit"; + bias-disable; + }; + + pins_msc1: msc1 { + function = "mmc1"; + groups = "mmc1-1bit", "mmc1-4bit"; + bias-disable; + }; +}; diff --git a/arch/mips/boot/dts/ingenic/x1000.dtsi b/arch/mips/boot/dts/ingenic/x1000.dtsi new file mode 100644 index 000000000000..4994c695a1a7 --- /dev/null +++ b/arch/mips/boot/dts/ingenic/x1000.dtsi @@ -0,0 +1,317 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <dt-bindings/clock/x1000-cgu.h> +#include <dt-bindings/dma/x1000-dma.h> + +/ { + #address-cells = <1>; + #size-cells = <1>; + compatible = "ingenic,x1000", "ingenic,x1000e"; + + cpuintc: interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + compatible = "mti,cpu-interrupt-controller"; + }; + + intc: interrupt-controller@10001000 { + compatible = "ingenic,x1000-intc", "ingenic,jz4780-intc"; + reg = <0x10001000 0x50>; + + interrupt-controller; + #interrupt-cells = <1>; + + interrupt-parent = <&cpuintc>; + interrupts = <2>; + }; + + exclk: ext { + compatible = "fixed-clock"; + #clock-cells = <0>; + }; + + rtclk: rtc { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + }; + + cgu: x1000-cgu@10000000 { + compatible = "ingenic,x1000-cgu"; + reg = <0x10000000 0x100>; + + #clock-cells = <1>; + + clocks = <&exclk>, <&rtclk>; + clock-names = "ext", "rtc"; + }; + + tcu: timer@10002000 { + compatible = "ingenic,x1000-tcu", + "ingenic,jz4770-tcu", + "simple-mfd"; + reg = <0x10002000 0x1000>; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x10002000 0x1000>; + + #clock-cells = <1>; + + clocks = <&cgu X1000_CLK_RTCLK + &cgu X1000_CLK_EXCLK + &cgu X1000_CLK_PCLK>; + clock-names = "rtc", "ext", "pclk"; + + interrupt-controller; + #interrupt-cells = <1>; + + interrupt-parent = <&intc>; + interrupts = <27 26 25>; + + wdt: watchdog@0 { + compatible = "ingenic,x1000-watchdog", "ingenic,jz4780-watchdog"; + reg = <0x0 0x10>; + + clocks = <&cgu X1000_CLK_RTCLK>; + clock-names = "wdt"; + }; + }; + + rtc: rtc@10003000 { + compatible = "ingenic,x1000-rtc", "ingenic,jz4780-rtc"; + reg = <0x10003000 0x4c>; + + interrupt-parent = <&intc>; + interrupts = <32>; + + clocks = <&cgu X1000_CLK_RTCLK>; + clock-names = "rtc"; + }; + + pinctrl: pin-controller@10010000 { + compatible = "ingenic,x1000-pinctrl"; + reg = <0x10010000 0x800>; + #address-cells = <1>; + #size-cells = <0>; + + gpa: gpio@0 { + compatible = "ingenic,x1000-gpio"; + reg = <0>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 0 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <17>; + }; + + gpb: gpio@1 { + compatible = "ingenic,x1000-gpio"; + reg = <1>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 32 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <16>; + }; + + gpc: gpio@2 { + compatible = "ingenic,x1000-gpio"; + reg = <2>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 64 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <15>; + }; + + gpd: gpio@3 { + compatible = "ingenic,x1000-gpio"; + reg = <3>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 96 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <14>; + }; + }; + + i2c0: i2c-controller@10050000 { + compatible = "ingenic,x1000-i2c"; + reg = <0x10050000 0x1000>; + + #address-cells = <1>; + #size-cells = <0>; + + interrupt-parent = <&intc>; + interrupts = <60>; + + clocks = <&cgu X1000_CLK_I2C0>; + + status = "disabled"; + }; + + i2c1: i2c-controller@10051000 { + compatible = "ingenic,x1000-i2c"; + reg = <0x10051000 0x1000>; + + #address-cells = <1>; + #size-cells = <0>; + + interrupt-parent = <&intc>; + interrupts = <59>; + + clocks = <&cgu X1000_CLK_I2C1>; + + status = "disabled"; + }; + + i2c2: i2c-controller@10052000 { + compatible = "ingenic,x1000-i2c"; + reg = <0x10052000 0x1000>; + + #address-cells = <1>; + #size-cells = <0>; + + interrupt-parent = <&intc>; + interrupts = <58>; + + clocks = <&cgu X1000_CLK_I2C2>; + + status = "disabled"; + }; + + uart0: serial@10030000 { + compatible = "ingenic,x1000-uart"; + reg = <0x10030000 0x100>; + + interrupt-parent = <&intc>; + interrupts = <51>; + + clocks = <&exclk>, <&cgu X1000_CLK_UART0>; + clock-names = "baud", "module"; + + status = "disabled"; + }; + + uart1: serial@10031000 { + compatible = "ingenic,x1000-uart"; + reg = <0x10031000 0x100>; + + interrupt-parent = <&intc>; + interrupts = <50>; + + clocks = <&exclk>, <&cgu X1000_CLK_UART1>; + clock-names = "baud", "module"; + + status = "disabled"; + }; + + uart2: serial@10032000 { + compatible = "ingenic,x1000-uart"; + reg = <0x10032000 0x100>; + + interrupt-parent = <&intc>; + interrupts = <49>; + + clocks = <&exclk>, <&cgu X1000_CLK_UART2>; + clock-names = "baud", "module"; + + status = "disabled"; + }; + + pdma: dma-controller@13420000 { + compatible = "ingenic,x1000-dma"; + reg = <0x13420000 0x400 + 0x13421000 0x40>; + #dma-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <10>; + + clocks = <&cgu X1000_CLK_PDMA>; + }; + + mac: ethernet@134b0000 { + compatible = "ingenic,x1000-mac", "snps,dwmac"; + reg = <0x134b0000 0x2000>; + + interrupt-parent = <&intc>; + interrupts = <55>; + interrupt-names = "macirq"; + + clocks = <&cgu X1000_CLK_MAC>; + clock-names = "stmmaceth"; + + status = "disabled"; + + mdio: mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + + status = "disabled"; + }; + }; + + msc0: mmc@13450000 { + compatible = "ingenic,x1000-mmc"; + reg = <0x13450000 0x1000>; + + interrupt-parent = <&intc>; + interrupts = <37>; + + clocks = <&cgu X1000_CLK_MSC0>; + clock-names = "mmc"; + + cap-sd-highspeed; + cap-mmc-highspeed; + cap-sdio-irq; + + dmas = <&pdma X1000_DMA_MSC0_RX 0xffffffff>, + <&pdma X1000_DMA_MSC0_TX 0xffffffff>; + dma-names = "rx", "tx"; + + status = "disabled"; + }; + + msc1: mmc@13460000 { + compatible = "ingenic,x1000-mmc"; + reg = <0x13460000 0x1000>; + + interrupt-parent = <&intc>; + interrupts = <36>; + + clocks = <&cgu X1000_CLK_MSC1>; + clock-names = "mmc"; + + cap-sd-highspeed; + cap-mmc-highspeed; + cap-sdio-irq; + + dmas = <&pdma X1000_DMA_MSC1_RX 0xffffffff>, + <&pdma X1000_DMA_MSC1_TX 0xffffffff>; + dma-names = "rx", "tx"; + + status = "disabled"; + }; +}; diff --git a/arch/mips/boot/dts/qca/ar9331.dtsi b/arch/mips/boot/dts/qca/ar9331.dtsi index 5cfc9d347826..8f5aed760abb 100644 --- a/arch/mips/boot/dts/qca/ar9331.dtsi +++ b/arch/mips/boot/dts/qca/ar9331.dtsi @@ -126,6 +126,9 @@ clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>; clock-names = "eth", "mdio"; + phy-mode = "mii"; + phy-handle = <&phy_port4>; + status = "disabled"; }; @@ -133,13 +136,127 @@ compatible = "qca,ar9330-eth"; reg = <0x1a000000 0x200>; interrupts = <5>; - resets = <&rst 13>, <&rst 23>; reset-names = "mac", "mdio"; clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>; clock-names = "eth", "mdio"; + phy-mode = "gmii"; + status = "disabled"; + + fixed-link { + speed = <1000>; + full-duplex; + }; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + switch10: switch@10 { + #address-cells = <1>; + #size-cells = <0>; + + compatible = "qca,ar9331-switch"; + reg = <0x10>; + resets = <&rst 8>; + reset-names = "switch"; + + interrupt-parent = <&miscintc>; + interrupts = <12>; + + interrupt-controller; + #interrupt-cells = <1>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + switch_port0: port@0 { + reg = <0x0>; + label = "cpu"; + ethernet = <ð1>; + + phy-mode = "gmii"; + + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + + switch_port1: port@1 { + reg = <0x1>; + phy-handle = <&phy_port0>; + phy-mode = "internal"; + + status = "disabled"; + }; + + switch_port2: port@2 { + reg = <0x2>; + phy-handle = <&phy_port1>; + phy-mode = "internal"; + + status = "disabled"; + }; + + switch_port3: port@3 { + reg = <0x3>; + phy-handle = <&phy_port2>; + phy-mode = "internal"; + + status = "disabled"; + }; + + switch_port4: port@4 { + reg = <0x4>; + phy-handle = <&phy_port3>; + phy-mode = "internal"; + + status = "disabled"; + }; + }; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + interrupt-parent = <&switch10>; + + phy_port0: phy@0 { + reg = <0x0>; + interrupts = <0>; + status = "disabled"; + }; + + phy_port1: phy@1 { + reg = <0x1>; + interrupts = <0>; + status = "disabled"; + }; + + phy_port2: phy@2 { + reg = <0x2>; + interrupts = <0>; + status = "disabled"; + }; + + phy_port3: phy@3 { + reg = <0x3>; + interrupts = <0>; + status = "disabled"; + }; + + phy_port4: phy@4 { + reg = <0x4>; + interrupts = <0>; + status = "disabled"; + }; + }; + }; + }; }; usb: usb@1b000100 { diff --git a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts index 77bab823eb3b..0f2b20044834 100644 --- a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts +++ b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts @@ -84,3 +84,16 @@ ð1 { status = "okay"; }; + +&switch_port1 { + label = "lan0"; + status = "okay"; +}; + +&phy_port0 { + status = "okay"; +}; + +&phy_port4 { + status = "okay"; +}; diff --git a/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts b/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts index aa5caaa31104..6069b33cf09f 100644 --- a/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts +++ b/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts @@ -177,6 +177,9 @@ pinctrl-names = "default"; pinctrl-0 = <&pinmux_i2s_gpio>; /* GPIO0..3 */ + fifo-size = <8>; + tx-threshold = <8>; + rts-gpios = <&gpio 1 GPIO_ACTIVE_LOW>; cts-gpios = <&gpio 2 GPIO_ACTIVE_LOW>; }; @@ -195,3 +198,8 @@ &watchdog { status = "okay"; }; + +&wmac { + status = "okay"; + mediatek,mtd-eeprom = <&factory 0x0000>; +}; diff --git a/arch/mips/boot/dts/ralink/mt7628a.dtsi b/arch/mips/boot/dts/ralink/mt7628a.dtsi index 742bcc1dc2e0..892e8ab863c5 100644 --- a/arch/mips/boot/dts/ralink/mt7628a.dtsi +++ b/arch/mips/boot/dts/ralink/mt7628a.dtsi @@ -285,4 +285,14 @@ interrupt-parent = <&intc>; interrupts = <18>; }; + + wmac: wmac@10300000 { + compatible = "mediatek,mt7628-wmac"; + reg = <0x10300000 0x100000>; + + interrupt-parent = <&cpuintc>; + interrupts = <6>; + + status = "disabled"; + }; }; diff --git a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c index ba8f82a29a81..e794b2d53adf 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c +++ b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c @@ -45,13 +45,6 @@ static struct cvmx_bootmem_desc *cvmx_bootmem_desc; /* See header file for descriptions of functions */ /** - * This macro returns the size of a member of a structure. - * Logically it is the same as "sizeof(s::field)" in C++, but - * C lacks the "::" operator. - */ -#define SIZEOF_FIELD(s, field) sizeof(((s *)NULL)->field) - -/** * This macro returns a member of the * cvmx_bootmem_named_block_desc_t structure. These members can't * be directly addressed as they might be in memory not directly @@ -65,7 +58,7 @@ static struct cvmx_bootmem_desc *cvmx_bootmem_desc; #define CVMX_BOOTMEM_NAMED_GET_FIELD(addr, field) \ __cvmx_bootmem_desc_get(addr, \ offsetof(struct cvmx_bootmem_named_block_desc, field), \ - SIZEOF_FIELD(struct cvmx_bootmem_named_block_desc, field)) + sizeof_field(struct cvmx_bootmem_named_block_desc, field)) /** * This function is the implementation of the get macros defined diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index f97be32bf699..6bd1e97effdf 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -2193,7 +2193,7 @@ static int octeon_irq_cib_map(struct irq_domain *d, struct octeon_irq_cib_chip_data *cd; if (hw >= host_data->max_bits) { - pr_err("ERROR: %s mapping %u is to big!\n", + pr_err("ERROR: %s mapping %u is too big!\n", irq_domain_get_of_node(d)->name, (unsigned)hw); return -EINVAL; } diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index 1f742c32a883..4f34d92b52f9 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -357,7 +357,7 @@ static void octeon_write_lcd(const char *s) { if (octeon_bootinfo->led_display_base_addr) { void __iomem *lcd_address = - ioremap_nocache(octeon_bootinfo->led_display_base_addr, + ioremap(octeon_bootinfo->led_display_base_addr, 8); int i; for (i = 0; i < 8; i++, s++) { diff --git a/arch/mips/configs/cu1000-neo_defconfig b/arch/mips/configs/cu1000-neo_defconfig new file mode 100644 index 000000000000..9b05a8fdabe1 --- /dev/null +++ b/arch/mips/configs/cu1000-neo_defconfig @@ -0,0 +1,117 @@ +CONFIG_LOCALVERSION_AUTO=y +CONFIG_KERNEL_GZIP=y +CONFIG_SYSVIPC=y +CONFIG_NO_HZ_IDLE=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_PREEMPT=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_CGROUPS=y +CONFIG_MEMCG=y +CONFIG_MEMCG_KMEM=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_NAMESPACES=y +CONFIG_USER_NS=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_SLAB=y +CONFIG_MACH_INGENIC=y +CONFIG_X1000_CU1000_NEO=y +CONFIG_HIGHMEM=y +CONFIG_HZ_100=y +# CONFIG_SECCOMP is not set +# CONFIG_SUSPEND is not set +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_COMPACTION is not set +CONFIG_CMA=y +CONFIG_CMA_AREAS=7 +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_CFG80211=y +CONFIG_UEVENT_HELPER=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +# CONFIG_FW_LOADER is not set +# CONFIG_ALLOW_DEV_COREDUMP is not set +CONFIG_NETDEVICES=y +CONFIG_STMMAC_ETH=y +CONFIG_SMSC_PHY=y +CONFIG_BRCMFMAC=y +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +CONFIG_VT_HW_CONSOLE_BINDING=y +CONFIG_LEGACY_PTY_COUNT=2 +CONFIG_SERIAL_EARLYCON=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=3 +CONFIG_SERIAL_8250_RUNTIME_UARTS=3 +CONFIG_SERIAL_8250_INGENIC=y +CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_HW_RANDOM is not set +CONFIG_I2C=y +CONFIG_I2C_JZ4780=y +CONFIG_GPIO_SYSFS=y +CONFIG_SENSORS_ADS7828=y +CONFIG_WATCHDOG=y +CONFIG_JZ4740_WDT=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set +# CONFIG_VGA_CONSOLE is not set +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_MMC=y +CONFIG_MMC_JZ4740=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_JZ4740=y +CONFIG_DMADEVICES=y +CONFIG_DMA_JZ4780=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_NVMEM=y +CONFIG_NVMEM_SYSFS=y +CONFIG_EXT4_FS=y +# CONFIG_DNOTIFY is not set +CONFIG_AUTOFS_FS=y +CONFIG_PROC_KCORE=y +# CONFIG_PROC_PAGE_MONITOR is not set +CONFIG_TMPFS=y +CONFIG_CONFIGFS_FS=y +CONFIG_NFS_FS=y +CONFIG_NLS=y +CONFIG_NLS_CODEPAGE_936=y +CONFIG_NLS_CODEPAGE_950=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_CRYPTO_ECHAINIV=y +CONFIG_CRYPTO_AES=y +CONFIG_CRYPTO_DEFLATE=y +CONFIG_CRYPTO_LZO=y +CONFIG_PRINTK_TIME=y +CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15 +CONFIG_CONSOLE_LOGLEVEL_QUIET=15 +CONFIG_MESSAGE_LOGLEVEL_DEFAULT=7 +CONFIG_DEBUG_INFO=y +CONFIG_STRIP_ASM_SYMS=y +CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PANIC_TIMEOUT=10 +# CONFIG_SCHED_DEBUG is not set +# CONFIG_DEBUG_PREEMPT is not set +CONFIG_STACKTRACE=y +# CONFIG_FTRACE is not set +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="earlycon clk_ignore_unused" diff --git a/arch/mips/configs/generic/board-ocelot.config b/arch/mips/configs/generic/board-ocelot.config index 1134fbb99fc2..7626f2a75b03 100644 --- a/arch/mips/configs/generic/board-ocelot.config +++ b/arch/mips/configs/generic/board-ocelot.config @@ -41,6 +41,7 @@ CONFIG_SPI_DESIGNWARE=y CONFIG_SPI_DW_MMIO=y CONFIG_SPI_SPIDEV=y +CONFIG_PINCTRL=y CONFIG_PINCTRL_OCELOT=y CONFIG_GPIO_SYSFS=y diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c index 7d1d2425746f..faa88a6a74c0 100644 --- a/arch/mips/crypto/crc32-mips.c +++ b/arch/mips/crypto/crc32-mips.c @@ -177,10 +177,8 @@ static int chksum_setkey(struct crypto_shash *tfm, const u8 *key, { struct chksum_ctx *mctx = crypto_shash_ctx(tfm); - if (keylen != sizeof(mctx->key)) { - crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + if (keylen != sizeof(mctx->key)) return -EINVAL; - } mctx->key = get_unaligned_le32(key); return 0; } diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c index b37d29cf5d0a..fc881b46d911 100644 --- a/arch/mips/crypto/poly1305-glue.c +++ b/arch/mips/crypto/poly1305-glue.c @@ -15,7 +15,7 @@ asmlinkage void poly1305_init_mips(void *state, const u8 *key); asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_emit_mips(void *state, __le32 *digest, const u32 *nonce); +asmlinkage void poly1305_emit_mips(void *state, u8 *digest, const u32 *nonce); void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) { @@ -134,9 +134,6 @@ EXPORT_SYMBOL(poly1305_update_arch); void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) { - __le32 digest[4]; - u64 f = 0; - if (unlikely(dctx->buflen)) { dctx->buf[dctx->buflen++] = 1; memset(dctx->buf + dctx->buflen, 0, @@ -144,18 +141,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); } - poly1305_emit_mips(&dctx->h, digest, dctx->s); - - /* mac = (h + s) % (2^128) */ - f = (f >> 32) + le32_to_cpu(digest[0]); - put_unaligned_le32(f, dst); - f = (f >> 32) + le32_to_cpu(digest[1]); - put_unaligned_le32(f, dst + 4); - f = (f >> 32) + le32_to_cpu(digest[2]); - put_unaligned_le32(f, dst + 8); - f = (f >> 32) + le32_to_cpu(digest[3]); - put_unaligned_le32(f, dst + 12); - + poly1305_emit_mips(&dctx->h, dst, dctx->s); *dctx = (struct poly1305_desc_ctx){}; } EXPORT_SYMBOL(poly1305_final_arch); diff --git a/arch/mips/generic/board-ocelot.c b/arch/mips/generic/board-ocelot.c index 06d92fb37769..c238e95190ac 100644 --- a/arch/mips/generic/board-ocelot.c +++ b/arch/mips/generic/board-ocelot.c @@ -51,7 +51,7 @@ static void __init ocelot_earlyprintk_init(void) { void __iomem *uart_base; - uart_base = ioremap_nocache(UART_UART, 0x20); + uart_base = ioremap(UART_UART, 0x20); setup_8250_early_printk_port((unsigned long)uart_base, 2, 50000); } diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 179403ae5837..4ebd8ce254ce 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -18,6 +18,7 @@ generic-y += preempt.h generic-y += qrwlock.h generic-y += qspinlock.h generic-y += sections.h +generic-y += serial.h generic-y += trace_clock.h generic-y += unaligned.h generic-y += user.h diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h index feb069cbf44e..655f40ddb6d1 100644 --- a/arch/mips/include/asm/asmmacro.h +++ b/arch/mips/include/asm/asmmacro.h @@ -63,7 +63,7 @@ .endm .macro local_irq_disable reg=t0 -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION lw \reg, TI_PRE_COUNT($28) addi \reg, \reg, 1 sw \reg, TI_PRE_COUNT($28) @@ -73,7 +73,7 @@ xori \reg, \reg, 1 mtc0 \reg, CP0_STATUS irq_disable_hazard -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION lw \reg, TI_PRE_COUNT($28) addi \reg, \reg, -1 sw \reg, TI_PRE_COUNT($28) diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h index d41a5057bc69..61727785a247 100644 --- a/arch/mips/include/asm/bootinfo.h +++ b/arch/mips/include/asm/bootinfo.h @@ -81,6 +81,7 @@ enum loongson2ef_machine_type { #define MACH_INGENIC_JZ4770 2 /* JZ4770 SOC */ #define MACH_INGENIC_JZ4780 3 /* JZ4780 SOC */ #define MACH_INGENIC_X1000 4 /* X1000 SOC */ +#define MACH_INGENIC_X1830 5 /* X1830 SOC */ extern char *system_type; const char *get_system_type(void); diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h index c99166eadbde..255afcdd79c9 100644 --- a/arch/mips/include/asm/compat.h +++ b/arch/mips/include/asm/compat.h @@ -100,24 +100,6 @@ typedef u32 compat_sigset_word; #define COMPAT_OFF_T_MAX 0x7fffffff -/* - * A pointer passed in from user mode. This should not - * be used for syscall parameters, just declare them - * as pointers because the syscall entry code will have - * appropriately converted them already. - */ - -static inline void __user *compat_ptr(compat_uptr_t uptr) -{ - /* cast to a __user pointer via "unsigned long" makes sparse happy */ - return (void __user *)(unsigned long)(long)uptr; -} - -static inline compat_uptr_t ptr_to_compat(void __user *uptr) -{ - return (u32)(unsigned long)uptr; -} - static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = (struct pt_regs *) diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index 983a6a7f43a1..de44c92b1c1f 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -555,6 +555,10 @@ # define cpu_has_perf __opt(MIPS_CPU_PERF) #endif +#ifndef cpu_has_mac2008_only +# define cpu_has_mac2008_only __opt(MIPS_CPU_MAC_2008_ONLY) +#endif + #ifdef CONFIG_SMP /* * Some systems share FTLB RAMs between threads within a core (siblings in diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h index c46c59b0f1b4..49f0061a6051 100644 --- a/arch/mips/include/asm/cpu-type.h +++ b/arch/mips/include/asm/cpu-type.h @@ -15,7 +15,8 @@ static inline int __pure __get_cpu_type(const int cpu_type) { switch (cpu_type) { -#if defined(CONFIG_SYS_HAS_CPU_LOONGSON2EF) +#if defined(CONFIG_SYS_HAS_CPU_LOONGSON2E) || \ + defined(CONFIG_SYS_HAS_CPU_LOONGSON2F) case CPU_LOONGSON2EF: #endif diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index ea830783d663..216a22916740 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -46,7 +46,7 @@ #define PRID_COMP_NETLOGIC 0x0c0000 #define PRID_COMP_CAVIUM 0x0d0000 #define PRID_COMP_LOONGSON 0x140000 -#define PRID_COMP_INGENIC_D0 0xd00000 /* JZ4740, JZ4750 */ +#define PRID_COMP_INGENIC_D0 0xd00000 /* JZ4740, JZ4750, X1830 */ #define PRID_COMP_INGENIC_D1 0xd10000 /* JZ4770, JZ4775, X1000 */ #define PRID_COMP_INGENIC_E1 0xe10000 /* JZ4780 */ @@ -185,7 +185,8 @@ * These are the PRID's for when 23:16 == PRID_COMP_INGENIC_* */ -#define PRID_IMP_XBURST 0x0200 +#define PRID_IMP_XBURST_REV1 0x0200 /* XBurst with MXU SIMD ISA */ +#define PRID_IMP_XBURST_REV2 0x0100 /* XBurst with MXU2 SIMD ISA */ /* * These are the PRID's for when 23:16 == PRID_COMP_NETLOGIC @@ -415,6 +416,7 @@ enum cpu_type_enum { #define MIPS_CPU_MT_PER_TC_PERF_COUNTERS \ BIT_ULL(56) /* CPU has perf counters implemented per TC (MIPSMT ASE) */ #define MIPS_CPU_MMID BIT_ULL(57) /* CPU supports MemoryMapIDs */ +#define MIPS_CPU_MAC_2008_ONLY BIT_ULL(58) /* CPU Only support MAC2008 Fused multiply-add instruction */ /* * CPU ASE encodings diff --git a/arch/mips/include/asm/gio_device.h b/arch/mips/include/asm/gio_device.h index c52948f9ca95..159087f5386e 100644 --- a/arch/mips/include/asm/gio_device.h +++ b/arch/mips/include/asm/gio_device.h @@ -32,8 +32,6 @@ struct gio_driver { }; #define to_gio_driver(drv) container_of(drv, struct gio_driver, driver) -extern const struct gio_device_id *gio_match_device(const struct gio_device_id *, - const struct gio_device *); extern struct gio_device *gio_dev_get(struct gio_device *); extern void gio_dev_put(struct gio_device *); diff --git a/arch/mips/include/asm/hazards.h b/arch/mips/include/asm/hazards.h index a4f48b0f5541..a0b92205f933 100644 --- a/arch/mips/include/asm/hazards.h +++ b/arch/mips/include/asm/hazards.h @@ -23,7 +23,7 @@ * TLB hazards */ #if (defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)) && \ - !defined(CONFIG_CPU_CAVIUM_OCTEON) && !defined(CONFIG_LOONGSON3_ENHANCEMENT) + !defined(CONFIG_CPU_CAVIUM_OCTEON) && !defined(CONFIG_CPU_LOONGSON64) /* * MIPSR2 defines ehb for hazard avoidance @@ -158,7 +158,7 @@ do { \ } while (0) #elif defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_CPU_CAVIUM_OCTEON) || \ - defined(CONFIG_CPU_LOONGSON2EF) || defined(CONFIG_LOONGSON3_ENHANCEMENT) || \ + defined(CONFIG_CPU_LOONGSON2EF) || defined(CONFIG_CPU_LOONGSON64) || \ defined(CONFIG_CPU_R10000) || defined(CONFIG_CPU_R5500) || defined(CONFIG_CPU_XLR) /* diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index 3f6ce74335b4..cf1f2a4a2418 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -227,29 +227,7 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, */ #define ioremap(offset, size) \ __ioremap_mode((offset), (size), _CACHE_UNCACHED) - -/* - * ioremap_nocache - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap_nocache performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - * - * This version of ioremap ensures that the memory is marked uncachable - * on the CPU as well as honouring existing caching rules from things like - * the PCI bus. Note that there are other caches and buffers on many - * busses. In particular driver authors should read up on PCI writes - * - * It's useful if some control registers are in such an area and - * write combining or read caching is not desirable: - */ -#define ioremap_nocache(offset, size) \ - __ioremap_mode((offset), (size), _CACHE_UNCACHED) -#define ioremap_uc ioremap_nocache +#define ioremap_uc ioremap /* * ioremap_cache - map bus memory into CPU space diff --git a/arch/mips/include/asm/irqflags.h b/arch/mips/include/asm/irqflags.h index c4728bbdf15b..47a8ffc0b413 100644 --- a/arch/mips/include/asm/irqflags.h +++ b/arch/mips/include/asm/irqflags.h @@ -18,7 +18,7 @@ #include <asm/compiler.h> #include <asm/hazards.h> -#if defined(CONFIG_CPU_MIPSR2) || defined (CONFIG_CPU_MIPSR6) +#if defined(CONFIG_CPU_HAS_DIEI) static inline void arch_local_irq_disable(void) { @@ -94,7 +94,7 @@ static inline void arch_local_irq_restore(unsigned long flags) void arch_local_irq_disable(void); unsigned long arch_local_irq_save(void); void arch_local_irq_restore(unsigned long flags); -#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ +#endif /* CONFIG_CPU_HAS_DIEI */ static inline void arch_local_irq_enable(void) { @@ -102,7 +102,7 @@ static inline void arch_local_irq_enable(void) " .set push \n" " .set reorder \n" " .set noat \n" -#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) +#if defined(CONFIG_CPU_HAS_DIEI) " ei \n" #else " mfc0 $1,$12 \n" diff --git a/arch/mips/include/asm/local.h b/arch/mips/include/asm/local.h index 02783e141c32..fef0fda8f82f 100644 --- a/arch/mips/include/asm/local.h +++ b/arch/mips/include/asm/local.h @@ -37,6 +37,7 @@ static __inline__ long local_add_return(long i, local_t * l) __asm__ __volatile__( " .set push \n" " .set arch=r4000 \n" + __SYNC(full, loongson3_war) " \n" "1:" __LL "%1, %2 # local_add_return \n" " addu %0, %1, %3 \n" __SC "%0, %2 \n" @@ -52,6 +53,7 @@ static __inline__ long local_add_return(long i, local_t * l) __asm__ __volatile__( " .set push \n" " .set "MIPS_ISA_ARCH_LEVEL" \n" + __SYNC(full, loongson3_war) " \n" "1:" __LL "%1, %2 # local_add_return \n" " addu %0, %1, %3 \n" __SC "%0, %2 \n" @@ -84,6 +86,7 @@ static __inline__ long local_sub_return(long i, local_t * l) __asm__ __volatile__( " .set push \n" " .set arch=r4000 \n" + __SYNC(full, loongson3_war) " \n" "1:" __LL "%1, %2 # local_sub_return \n" " subu %0, %1, %3 \n" __SC "%0, %2 \n" @@ -99,6 +102,7 @@ static __inline__ long local_sub_return(long i, local_t * l) __asm__ __volatile__( " .set push \n" " .set "MIPS_ISA_ARCH_LEVEL" \n" + __SYNC(full, loongson3_war) " \n" "1:" __LL "%1, %2 # local_sub_return \n" " subu %0, %1, %3 \n" __SC "%0, %2 \n" diff --git a/arch/mips/include/asm/mach-ip27/kernel-entry-init.h b/arch/mips/include/asm/mach-ip27/kernel-entry-init.h index f992c1db876b..3e54f605a70b 100644 --- a/arch/mips/include/asm/mach-ip27/kernel-entry-init.h +++ b/arch/mips/include/asm/mach-ip27/kernel-entry-init.h @@ -10,20 +10,10 @@ #define __ASM_MACH_IP27_KERNEL_ENTRY_H #include <asm/sn/addrs.h> -#include <asm/sn/sn0/hubni.h> +#include <asm/sn/agent.h> #include <asm/sn/klkernvars.h> /* - * Returns the local nasid into res. - */ - .macro GET_NASID_ASM res - dli \res, LOCAL_HUB_ADDR(NI_STATUS_REV_ID) - ld \res, (\res) - and \res, NSRI_NODEID_MASK - dsrl \res, NSRI_NODEID_SHFT - .endm - -/* * TLB bits */ #define PAGE_GLOBAL (1 << 6) diff --git a/arch/mips/include/asm/mach-ip27/mangle-port.h b/arch/mips/include/asm/mach-ip27/mangle-port.h index f6e4912ea062..27c56efa519f 100644 --- a/arch/mips/include/asm/mach-ip27/mangle-port.h +++ b/arch/mips/include/asm/mach-ip27/mangle-port.h @@ -8,7 +8,7 @@ #ifndef __ASM_MACH_IP27_MANGLE_PORT_H #define __ASM_MACH_IP27_MANGLE_PORT_H -#define __swizzle_addr_b(port) (port) +#define __swizzle_addr_b(port) ((port) ^ 3) #define __swizzle_addr_w(port) ((port) ^ 2) #define __swizzle_addr_l(port) (port) #define __swizzle_addr_q(port) (port) @@ -20,6 +20,6 @@ # define ioswabl(a, x) (x) # define __mem_ioswabl(a, x) cpu_to_le32(x) # define ioswabq(a, x) (x) -# define __mem_ioswabq(a, x) cpu_to_le32(x) +# define __mem_ioswabq(a, x) cpu_to_le64(x) #endif /* __ASM_MACH_IP27_MANGLE_PORT_H */ diff --git a/arch/mips/include/asm/mach-ip27/mmzone.h b/arch/mips/include/asm/mach-ip27/mmzone.h index f463826515df..08c36e50a860 100644 --- a/arch/mips/include/asm/mach-ip27/mmzone.h +++ b/arch/mips/include/asm/mach-ip27/mmzone.h @@ -4,7 +4,8 @@ #include <asm/sn/addrs.h> #include <asm/sn/arch.h> -#include <asm/sn/hub.h> +#include <asm/sn/agent.h> +#include <asm/sn/klkernvars.h> #define pa_to_nid(addr) NASID_GET(addr) @@ -12,7 +13,6 @@ struct hub_data { kern_vars_t kern_vars; DECLARE_BITMAP(h_bigwin_used, HUB_NUM_BIG_WINDOW); cpumask_t h_cpus; - unsigned long slice_map; }; struct node_data { diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h index be61ddcdacab..d66cc53feab8 100644 --- a/arch/mips/include/asm/mach-ip27/topology.h +++ b/arch/mips/include/asm/mach-ip27/topology.h @@ -2,12 +2,12 @@ #ifndef _ASM_MACH_TOPOLOGY_H #define _ASM_MACH_TOPOLOGY_H 1 -#include <asm/sn/hub.h> #include <asm/sn/types.h> #include <asm/mmzone.h> struct cpuinfo_ip27 { nasid_t p_nasid; /* my node ID in numa-as-id-space */ + unsigned short p_speed; /* cpu speed in MHz */ unsigned char p_slice; /* Physical position on node board */ }; diff --git a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h index 7dc8d75445a9..4fab38c743dd 100644 --- a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h @@ -46,5 +46,7 @@ #define cpu_has_wsbh 1 #define cpu_has_ic_fills_f_dc 1 #define cpu_hwrena_impl_bits 0xc0000000 +#define cpu_has_mac2008_only 1 +#define cpu_has_mips_r2_exec_hazard 0 #endif /* __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H */ diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 0d5a30988697..796fe47cfd17 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -1101,9 +1101,12 @@ /* * Bits 22:20 of the FPU Status Register will be read as 0, * and should be written as zero. + * MAC2008 was removed in Release 5 so we still treat it as + * reserved. */ #define FPU_CSR_RSVD (_ULCAST_(7) << 20) +#define FPU_CSR_MAC2008 (_ULCAST_(1) << 20) #define FPU_CSR_ABS2008 (_ULCAST_(1) << 19) #define FPU_CSR_NAN2008 (_ULCAST_(1) << 18) diff --git a/arch/mips/include/asm/pci/bridge.h b/arch/mips/include/asm/pci/bridge.h index 3bc630ff9ad4..9c476a0400e0 100644 --- a/arch/mips/include/asm/pci/bridge.h +++ b/arch/mips/include/asm/pci/bridge.h @@ -806,7 +806,8 @@ struct bridge_controller { unsigned long baddr; unsigned long intr_addr; struct irq_domain *domain; - unsigned int pci_int[8]; + unsigned int pci_int[8][2]; + unsigned int int_mapping[8][2]; u32 ioc3_sid[8]; nasid_t nasid; }; diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 91b89aab1787..aef5378f909c 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -639,6 +639,11 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#ifdef _PAGE_HUGE +#define pmd_leaf(pmd) ((pmd_val(pmd) & _PAGE_HUGE) != 0) +#define pud_leaf(pud) ((pud_val(pud) & _PAGE_HUGE) != 0) +#endif + #define gup_fast_permitted(start, end) (!cpu_has_dc_aliases) #include <asm-generic/pgtable.h> diff --git a/arch/mips/include/asm/serial.h b/arch/mips/include/asm/serial.h deleted file mode 100644 index 2777148dbfc5..000000000000 --- a/arch/mips/include/asm/serial.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2017 MIPS Tech, LLC - */ -#ifndef __ASM__SERIAL_H -#define __ASM__SERIAL_H - -#ifdef CONFIG_MIPS_GENERIC -/* - * Generic kernels cannot know a correct value for all platforms at - * compile time. Set it to 0 to prevent 8250_early using it - */ -#define BASE_BAUD 0 -#else -#include <asm-generic/serial.h> -#endif - -#endif /* __ASM__SERIAL_H */ diff --git a/arch/mips/include/asm/sn/arch.h b/arch/mips/include/asm/sn/arch.h index f7d3273d9599..9a9682543e89 100644 --- a/arch/mips/include/asm/sn/arch.h +++ b/arch/mips/include/asm/sn/arch.h @@ -25,7 +25,4 @@ #define INVALID_MODULE (moduleid_t)-1 #define INVALID_PARTID (partid_t)-1 -extern nasid_t get_nasid(void); -extern int get_cpu_slice(cpuid_t); - #endif /* _ASM_SN_ARCH_H */ diff --git a/arch/mips/include/asm/sn/hub.h b/arch/mips/include/asm/sn/hub.h deleted file mode 100644 index 45878fbefbae..000000000000 --- a/arch/mips/include/asm/sn/hub.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_SN_HUB_H -#define __ASM_SN_HUB_H - -#include <linux/types.h> -#include <linux/cpumask.h> -#include <asm/sn/types.h> -#include <asm/sn/io.h> -#include <asm/sn/klkernvars.h> -#include <asm/xtalk/xtalk.h> - -/* ip27-hubio.c */ -extern unsigned long hub_pio_map(nasid_t nasid, xwidgetnum_t widget, - unsigned long xtalk_addr, size_t size); -extern void hub_pio_init(nasid_t nasid); - -#endif /* __ASM_SN_HUB_H */ diff --git a/arch/mips/include/asm/sn/intr.h b/arch/mips/include/asm/sn/intr.h index fc1348193957..3d6954d370dc 100644 --- a/arch/mips/include/asm/sn/intr.h +++ b/arch/mips/include/asm/sn/intr.h @@ -8,15 +8,6 @@ #ifndef __ASM_SN_INTR_H #define __ASM_SN_INTR_H -/* Number of interrupt levels associated with each interrupt register. */ -#define N_INTPEND_BITS 64 - -#define INT_PEND0_BASELVL 0 -#define INT_PEND1_BASELVL 64 - -#define N_INTPENDJUNK_BITS 8 -#define INTPENDJUNK_CLRBIT 0x80 - /* * Macros to manipulate the interrupt register on the calling hub chip. */ @@ -84,14 +75,6 @@ do { \ #define CPU_RESCHED_B_IRQ 8 #define CPU_CALL_A_IRQ 9 #define CPU_CALL_B_IRQ 10 -#define MSC_MESG_INTR 11 -#define BASE_PCI_IRQ 12 - -/* - * INT_PEND0 again, bits determined by hardware / hardcoded: - */ -#define SDISK_INTR 63 /* SABLE name */ -#define IP_PEND0_6_63 63 /* What is this bit? */ /* * INT_PEND1 hard-coded bits: diff --git a/arch/mips/include/asm/sn/ioc3.h b/arch/mips/include/asm/sn/ioc3.h index 78ef760ddde4..2c09c17cadcd 100644 --- a/arch/mips/include/asm/sn/ioc3.h +++ b/arch/mips/include/asm/sn/ioc3.h @@ -21,50 +21,50 @@ struct ioc3_serialregs { /* SUPERIO uart register map */ struct ioc3_uartregs { + u8 iu_lcr; union { - u8 iu_rbr; /* read only, DLAB == 0 */ - u8 iu_thr; /* write only, DLAB == 0 */ - u8 iu_dll; /* DLAB == 1 */ + u8 iu_iir; /* read only */ + u8 iu_fcr; /* write only */ }; union { u8 iu_ier; /* DLAB == 0 */ u8 iu_dlm; /* DLAB == 1 */ }; union { - u8 iu_iir; /* read only */ - u8 iu_fcr; /* write only */ + u8 iu_rbr; /* read only, DLAB == 0 */ + u8 iu_thr; /* write only, DLAB == 0 */ + u8 iu_dll; /* DLAB == 1 */ }; - u8 iu_lcr; - u8 iu_mcr; - u8 iu_lsr; - u8 iu_msr; u8 iu_scr; + u8 iu_msr; + u8 iu_lsr; + u8 iu_mcr; }; struct ioc3_sioregs { u8 fill[0x141]; /* starts at 0x141 */ - u8 uartc; u8 kbdcg; + u8 uartc; - u8 fill0[0x150 - 0x142 - 1]; + u8 fill0[0x151 - 0x142 - 1]; - u8 pp_data; - u8 pp_dsr; u8 pp_dcr; + u8 pp_dsr; + u8 pp_data; - u8 fill1[0x158 - 0x152 - 1]; + u8 fill1[0x159 - 0x153 - 1]; - u8 pp_fifa; - u8 pp_cfgb; u8 pp_ecr; + u8 pp_cfgb; + u8 pp_fifa; - u8 fill2[0x168 - 0x15a - 1]; + u8 fill2[0x16a - 0x15b - 1]; - u8 rtcad; u8 rtcdat; + u8 rtcad; - u8 fill3[0x170 - 0x169 - 1]; + u8 fill3[0x170 - 0x16b - 1]; struct ioc3_uartregs uartb; /* 0x20170 */ struct ioc3_uartregs uarta; /* 0x20178 */ @@ -598,5 +598,9 @@ struct ioc3_etxd { #define IOC3_SUBSYS_IP30_SYSBOARD 0xc304 #define IOC3_SUBSYS_MENET 0xc305 #define IOC3_SUBSYS_MENET4 0xc306 +#define IOC3_SUBSYS_IO7 0xc307 +#define IOC3_SUBSYS_IO8 0xc308 +#define IOC3_SUBSYS_IO9 0xc309 +#define IOC3_SUBSYS_IP34_SYSBOARD 0xc30A #endif /* MIPS_SN_IOC3_H */ diff --git a/arch/mips/include/asm/sn/klconfig.h b/arch/mips/include/asm/sn/klconfig.h index 467c313d5767..117f85e4bef5 100644 --- a/arch/mips/include/asm/sn/klconfig.h +++ b/arch/mips/include/asm/sn/klconfig.h @@ -889,10 +889,6 @@ typedef union { extern lboard_t *find_lboard(lboard_t *start, unsigned char type); extern klinfo_t *find_component(lboard_t *brd, klinfo_t *kli, unsigned char type); extern klinfo_t *find_first_component(lboard_t *brd, unsigned char type); -extern klcpu_t *nasid_slice_to_cpuinfo(nasid_t, int); extern lboard_t *find_lboard_class(lboard_t *start, unsigned char brd_class); - -extern klcpu_t *sn_get_cpuinfo(cpuid_t cpu); - #endif /* _ASM_SN_KLCONFIG_H */ diff --git a/arch/mips/include/asm/sn/kldir.h b/arch/mips/include/asm/sn/kldir.h index bfb3aec94539..245f59bf3845 100644 --- a/arch/mips/include/asm/sn/kldir.h +++ b/arch/mips/include/asm/sn/kldir.h @@ -1,201 +1,16 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Derived from IRIX <sys/SN/kldir.h>, revision 1.21. - * - * Copyright (C) 1992 - 1997, 1999, 2000 Silicon Graphics, Inc. - * Copyright (C) 1999, 2000 by Ralf Baechle - */ +/* SPDX-License-Identifier: GPL-2.0 */ + #ifndef _ASM_SN_KLDIR_H #define _ASM_SN_KLDIR_H - -/* - * The kldir memory area resides at a fixed place in each node's memory and - * provides pointers to most other IP27 memory areas. This allows us to - * resize and/or relocate memory areas at a later time without breaking all - * firmware and kernels that use them. Indices in the array are - * permanently dedicated to areas listed below. Some memory areas (marked - * below) reside at a permanently fixed location, but are included in the - * directory for completeness. - */ - #define KLDIR_MAGIC 0x434d5f53505f5357 -/* - * The upper portion of the memory map applies during boot - * only and is overwritten by IRIX/SYMMON. - * - * MEMORY MAP PER NODE - * - * 0x2000000 (32M) +-----------------------------------------+ - * | IO6 BUFFERS FOR FLASH ENET IOC3 | - * 0x1F80000 (31.5M) +-----------------------------------------+ - * | IO6 TEXT/DATA/BSS/stack | - * 0x1C00000 (30M) +-----------------------------------------+ - * | IO6 PROM DEBUG TEXT/DATA/BSS/stack | - * 0x0800000 (28M) +-----------------------------------------+ - * | IP27 PROM TEXT/DATA/BSS/stack | - * 0x1B00000 (27M) +-----------------------------------------+ - * | IP27 CFG | - * 0x1A00000 (26M) +-----------------------------------------+ - * | Graphics PROM | - * 0x1800000 (24M) +-----------------------------------------+ - * | 3rd Party PROM drivers | - * 0x1600000 (22M) +-----------------------------------------+ - * | | - * | Free | - * | | - * +-----------------------------------------+ - * | UNIX DEBUG Version | - * 0x190000 (2M--) +-----------------------------------------+ - * | SYMMON | - * | (For UNIX Debug only) | - * 0x34000 (208K) +-----------------------------------------+ - * | SYMMON STACK [NUM_CPU_PER_NODE] | - * | (For UNIX Debug only) | - * 0x25000 (148K) +-----------------------------------------+ - * | KLCONFIG - II (temp) | - * | | - * | ---------------------------- | - * | | - * | UNIX NON-DEBUG Version | - * 0x19000 (100K) +-----------------------------------------+ - * - * - * The lower portion of the memory map contains information that is - * permanent and is used by the IP27PROM, IO6PROM and IRIX. - * - * 0x19000 (100K) +-----------------------------------------+ - * | | - * | PI Error Spools (32K) | - * | | - * 0x12000 (72K) +-----------------------------------------+ - * | Unused | - * 0x11c00 (71K) +-----------------------------------------+ - * | CPU 1 NMI Eframe area | - * 0x11a00 (70.5K) +-----------------------------------------+ - * | CPU 0 NMI Eframe area | - * 0x11800 (70K) +-----------------------------------------+ - * | CPU 1 NMI Register save area | - * 0x11600 (69.5K) +-----------------------------------------+ - * | CPU 0 NMI Register save area | - * 0x11400 (69K) +-----------------------------------------+ - * | GDA (1k) | - * 0x11000 (68K) +-----------------------------------------+ - * | Early cache Exception stack | - * | and/or | - * | kernel/io6prom nmi registers | - * 0x10800 (66k) +-----------------------------------------+ - * | cache error eframe | - * 0x10400 (65K) +-----------------------------------------+ - * | Exception Handlers (UALIAS copy) | - * 0x10000 (64K) +-----------------------------------------+ - * | | - * | | - * | KLCONFIG - I (permanent) (48K) | - * | | - * | | - * | | - * 0x4000 (16K) +-----------------------------------------+ - * | NMI Handler (Protected Page) | - * 0x3000 (12K) +-----------------------------------------+ - * | ARCS PVECTORS (master node only) | - * 0x2c00 (11K) +-----------------------------------------+ - * | ARCS TVECTORS (master node only) | - * 0x2800 (10K) +-----------------------------------------+ - * | LAUNCH [NUM_CPU] | - * 0x2400 (9K) +-----------------------------------------+ - * | Low memory directory (KLDIR) | - * 0x2000 (8K) +-----------------------------------------+ - * | ARCS SPB (1K) | - * 0x1000 (4K) +-----------------------------------------+ - * | Early cache Exception stack | - * | and/or | - * | kernel/io6prom nmi registers | - * 0x800 (2k) +-----------------------------------------+ - * | cache error eframe | - * 0x400 (1K) +-----------------------------------------+ - * | Exception Handlers | - * 0x0 (0K) +-----------------------------------------+ - */ - -#ifdef __ASSEMBLY__ #define KLDIR_OFF_MAGIC 0x00 #define KLDIR_OFF_OFFSET 0x08 #define KLDIR_OFF_POINTER 0x10 #define KLDIR_OFF_SIZE 0x18 #define KLDIR_OFF_COUNT 0x20 #define KLDIR_OFF_STRIDE 0x28 -#endif /* __ASSEMBLY__ */ - -/* - * This is defined here because IP27_SYMMON_STK_SIZE must be at least what - * we define here. Since it's set up in the prom. We can't redefine it later - * and expect more space to be allocated. The way to find out the true size - * of the symmon stacks is to divide SYMMON_STK_SIZE by SYMMON_STK_STRIDE - * for a particular node. - */ -#define SYMMON_STACK_SIZE 0x8000 - -#if defined(PROM) - -/* - * These defines are prom version dependent. No code other than the IP27 - * prom should attempt to use these values. - */ -#define IP27_LAUNCH_OFFSET 0x2400 -#define IP27_LAUNCH_SIZE 0x400 -#define IP27_LAUNCH_COUNT 2 -#define IP27_LAUNCH_STRIDE 0x200 - -#define IP27_KLCONFIG_OFFSET 0x4000 -#define IP27_KLCONFIG_SIZE 0xc000 -#define IP27_KLCONFIG_COUNT 1 -#define IP27_KLCONFIG_STRIDE 0 - -#define IP27_NMI_OFFSET 0x3000 -#define IP27_NMI_SIZE 0x40 -#define IP27_NMI_COUNT 2 -#define IP27_NMI_STRIDE 0x40 - -#define IP27_PI_ERROR_OFFSET 0x12000 -#define IP27_PI_ERROR_SIZE 0x4000 -#define IP27_PI_ERROR_COUNT 1 -#define IP27_PI_ERROR_STRIDE 0 - -#define IP27_SYMMON_STK_OFFSET 0x25000 -#define IP27_SYMMON_STK_SIZE 0xe000 -#define IP27_SYMMON_STK_COUNT 2 -/* IP27_SYMMON_STK_STRIDE must be >= SYMMON_STACK_SIZE */ -#define IP27_SYMMON_STK_STRIDE 0x7000 - -#define IP27_FREEMEM_OFFSET 0x19000 -#define IP27_FREEMEM_SIZE -1 -#define IP27_FREEMEM_COUNT 1 -#define IP27_FREEMEM_STRIDE 0 - -#endif /* PROM */ -/* - * There will be only one of these in a partition so the IO6 must set it up. - */ -#define IO6_GDA_OFFSET 0x11000 -#define IO6_GDA_SIZE 0x400 -#define IO6_GDA_COUNT 1 -#define IO6_GDA_STRIDE 0 - -/* - * save area of kernel nmi regs in the prom format - */ -#define IP27_NMI_KREGS_OFFSET 0x11400 -#define IP27_NMI_KREGS_CPU_SIZE 0x200 -/* - * save area of kernel nmi regs in eframe format - */ -#define IP27_NMI_EFRAME_OFFSET 0x11800 -#define IP27_NMI_EFRAME_SIZE 0x200 #define KLDIR_ENT_SIZE 0x40 #define KLDIR_MAX_ENTRIES (0x400 / 0x40) @@ -214,4 +29,8 @@ typedef struct kldir_ent_s { } kldir_ent_t; #endif /* !__ASSEMBLY__ */ +#ifdef CONFIG_SGI_IP27 +#include <asm/sn/sn0/kldir.h> +#endif + #endif /* _ASM_SN_KLDIR_H */ diff --git a/arch/mips/include/asm/sn/sn0/hub.h b/arch/mips/include/asm/sn/sn0/hub.h index d78dd76d5dcf..c84adde36d41 100644 --- a/arch/mips/include/asm/sn/sn0/hub.h +++ b/arch/mips/include/asm/sn/sn0/hub.h @@ -37,4 +37,26 @@ #define UATTR_MSPEC 2 #define UATTR_UNCAC 3 +#ifdef __ASSEMBLY__ +/* + * Returns the local nasid into res. + */ + .macro GET_NASID_ASM res + dli \res, LOCAL_HUB_ADDR(NI_STATUS_REV_ID) + ld \res, (\res) + and \res, NSRI_NODEID_MASK + dsrl \res, NSRI_NODEID_SHFT + .endm +#else + +/* + * get_nasid() returns the physical node id number of the caller. + */ +static inline nasid_t get_nasid(void) +{ + return (nasid_t)((LOCAL_HUB_L(NI_STATUS_REV_ID) & NSRI_NODEID_MASK) + >> NSRI_NODEID_SHFT); +} +#endif + #endif /* _ASM_SN_SN0_HUB_H */ diff --git a/arch/mips/include/asm/sn/sn0/hubni.h b/arch/mips/include/asm/sn/sn0/hubni.h index b73c4bee65f2..b8253142cb83 100644 --- a/arch/mips/include/asm/sn/sn0/hubni.h +++ b/arch/mips/include/asm/sn/sn0/hubni.h @@ -250,6 +250,14 @@ typedef union hubni_port_error_u { #define NI_LLP_CB_MAX 0xff #define NI_LLP_SN_MAX 0xff +static inline int get_region_shift(void) +{ + if (LOCAL_HUB_L(NI_STATUS_REV_ID) & NSRI_REGIONSIZE_MASK) + return NASID_TO_FINEREG_SHFT; + + return NASID_TO_COARSEREG_SHFT; +} + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_SGI_SN0_HUBNI_H */ diff --git a/arch/mips/include/asm/sn/sn0/ip27.h b/arch/mips/include/asm/sn/sn0/ip27.h deleted file mode 100644 index 3b5efeefcc3f..000000000000 --- a/arch/mips/include/asm/sn/sn0/ip27.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Derived from IRIX <sys/SN/SN0/IP27.h>. - * - * Copyright (C) 1992 - 1997, 1999 Silicon Graphics, Inc. - * Copyright (C) 1999, 2006 by Ralf Baechle - */ -#ifndef _ASM_SN_SN0_IP27_H -#define _ASM_SN_SN0_IP27_H - -#include <asm/mipsregs.h> - -/* - * Simple definitions for the masks which remove SW bits from pte. - */ - -#define TLBLO_HWBITSHIFT 0 /* Shift value, for masking */ - -#ifndef __ASSEMBLY__ - -#define CAUSE_BERRINTR IE_IRQ5 - -#define ECCF_CACHE_ERR 0 -#define ECCF_TAGLO 1 -#define ECCF_ECC 2 -#define ECCF_ERROREPC 3 -#define ECCF_PADDR 4 -#define ECCF_SIZE (5 * sizeof(long)) - -#endif /* !__ASSEMBLY__ */ - -#ifdef __ASSEMBLY__ - -/* - * KL_GET_CPUNUM (similar to EV_GET_SPNUM for EVEREST platform) reads - * the processor number of the calling processor. The proc parameters - * must be a register. - */ -#define KL_GET_CPUNUM(proc) \ - dli proc, LOCAL_HUB(0); \ - ld proc, PI_CPU_NUM(proc) - -#endif /* __ASSEMBLY__ */ - -/* - * R10000 status register interrupt bit mask usage for IP27. - */ -#define SRB_SWTIMO IE_SW0 /* 0x0100 */ -#define SRB_NET IE_SW1 /* 0x0200 */ -#define SRB_DEV0 IE_IRQ0 /* 0x0400 */ -#define SRB_DEV1 IE_IRQ1 /* 0x0800 */ -#define SRB_TIMOCLK IE_IRQ2 /* 0x1000 */ -#define SRB_PROFCLK IE_IRQ3 /* 0x2000 */ -#define SRB_ERR IE_IRQ4 /* 0x4000 */ -#define SRB_SCHEDCLK IE_IRQ5 /* 0x8000 */ - -#define SR_IBIT_HI SRB_DEV0 -#define SR_IBIT_PROF SRB_PROFCLK - -#define SRB_SWTIMO_IDX 0 -#define SRB_NET_IDX 1 -#define SRB_DEV0_IDX 2 -#define SRB_DEV1_IDX 3 -#define SRB_TIMOCLK_IDX 4 -#define SRB_PROFCLK_IDX 5 -#define SRB_ERR_IDX 6 -#define SRB_SCHEDCLK_IDX 7 - -#define NUM_CAUSE_INTRS 8 - -#define SCACHE_LINESIZE 128 -#define SCACHE_LINEMASK (SCACHE_LINESIZE - 1) - -#include <asm/sn/addrs.h> - -#define LED_CYCLE_MASK 0x0f -#define LED_CYCLE_SHFT 4 - -#define SEND_NMI(_nasid, _slice) \ - REMOTE_HUB_S((_nasid), (PI_NMI_A + ((_slice) * PI_NMI_OFFSET)), 1) - -#endif /* _ASM_SN_SN0_IP27_H */ diff --git a/arch/mips/include/asm/sn/sn0/kldir.h b/arch/mips/include/asm/sn/sn0/kldir.h new file mode 100644 index 000000000000..1b10af6cbd5e --- /dev/null +++ b/arch/mips/include/asm/sn/sn0/kldir.h @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Derived from IRIX <sys/SN/kldir.h>, revision 1.21. + * + * Copyright (C) 1992 - 1997, 1999, 2000 Silicon Graphics, Inc. + * Copyright (C) 1999, 2000 by Ralf Baechle + */ +#ifndef _ASM_SN_SN0_KLDIR_H +#define _ASM_SN_SN0_KLDIR_H + + +/* + * The kldir memory area resides at a fixed place in each node's memory and + * provides pointers to most other IP27 memory areas. This allows us to + * resize and/or relocate memory areas at a later time without breaking all + * firmware and kernels that use them. Indices in the array are + * permanently dedicated to areas listed below. Some memory areas (marked + * below) reside at a permanently fixed location, but are included in the + * directory for completeness. + */ + +/* + * The upper portion of the memory map applies during boot + * only and is overwritten by IRIX/SYMMON. + * + * MEMORY MAP PER NODE + * + * 0x2000000 (32M) +-----------------------------------------+ + * | IO6 BUFFERS FOR FLASH ENET IOC3 | + * 0x1F80000 (31.5M) +-----------------------------------------+ + * | IO6 TEXT/DATA/BSS/stack | + * 0x1C00000 (30M) +-----------------------------------------+ + * | IO6 PROM DEBUG TEXT/DATA/BSS/stack | + * 0x0800000 (28M) +-----------------------------------------+ + * | IP27 PROM TEXT/DATA/BSS/stack | + * 0x1B00000 (27M) +-----------------------------------------+ + * | IP27 CFG | + * 0x1A00000 (26M) +-----------------------------------------+ + * | Graphics PROM | + * 0x1800000 (24M) +-----------------------------------------+ + * | 3rd Party PROM drivers | + * 0x1600000 (22M) +-----------------------------------------+ + * | | + * | Free | + * | | + * +-----------------------------------------+ + * | UNIX DEBUG Version | + * 0x190000 (2M--) +-----------------------------------------+ + * | SYMMON | + * | (For UNIX Debug only) | + * 0x34000 (208K) +-----------------------------------------+ + * | SYMMON STACK [NUM_CPU_PER_NODE] | + * | (For UNIX Debug only) | + * 0x25000 (148K) +-----------------------------------------+ + * | KLCONFIG - II (temp) | + * | | + * | ---------------------------- | + * | | + * | UNIX NON-DEBUG Version | + * 0x19000 (100K) +-----------------------------------------+ + * + * + * The lower portion of the memory map contains information that is + * permanent and is used by the IP27PROM, IO6PROM and IRIX. + * + * 0x19000 (100K) +-----------------------------------------+ + * | | + * | PI Error Spools (32K) | + * | | + * 0x12000 (72K) +-----------------------------------------+ + * | Unused | + * 0x11c00 (71K) +-----------------------------------------+ + * | CPU 1 NMI Eframe area | + * 0x11a00 (70.5K) +-----------------------------------------+ + * | CPU 0 NMI Eframe area | + * 0x11800 (70K) +-----------------------------------------+ + * | CPU 1 NMI Register save area | + * 0x11600 (69.5K) +-----------------------------------------+ + * | CPU 0 NMI Register save area | + * 0x11400 (69K) +-----------------------------------------+ + * | GDA (1k) | + * 0x11000 (68K) +-----------------------------------------+ + * | Early cache Exception stack | + * | and/or | + * | kernel/io6prom nmi registers | + * 0x10800 (66k) +-----------------------------------------+ + * | cache error eframe | + * 0x10400 (65K) +-----------------------------------------+ + * | Exception Handlers (UALIAS copy) | + * 0x10000 (64K) +-----------------------------------------+ + * | | + * | | + * | KLCONFIG - I (permanent) (48K) | + * | | + * | | + * | | + * 0x4000 (16K) +-----------------------------------------+ + * | NMI Handler (Protected Page) | + * 0x3000 (12K) +-----------------------------------------+ + * | ARCS PVECTORS (master node only) | + * 0x2c00 (11K) +-----------------------------------------+ + * | ARCS TVECTORS (master node only) | + * 0x2800 (10K) +-----------------------------------------+ + * | LAUNCH [NUM_CPU] | + * 0x2400 (9K) +-----------------------------------------+ + * | Low memory directory (KLDIR) | + * 0x2000 (8K) +-----------------------------------------+ + * | ARCS SPB (1K) | + * 0x1000 (4K) +-----------------------------------------+ + * | Early cache Exception stack | + * | and/or | + * | kernel/io6prom nmi registers | + * 0x800 (2k) +-----------------------------------------+ + * | cache error eframe | + * 0x400 (1K) +-----------------------------------------+ + * | Exception Handlers | + * 0x0 (0K) +-----------------------------------------+ + */ + +/* + * This is defined here because IP27_SYMMON_STK_SIZE must be at least what + * we define here. Since it's set up in the prom. We can't redefine it later + * and expect more space to be allocated. The way to find out the true size + * of the symmon stacks is to divide SYMMON_STK_SIZE by SYMMON_STK_STRIDE + * for a particular node. + */ +#define SYMMON_STACK_SIZE 0x8000 + +#if defined(PROM) + +/* + * These defines are prom version dependent. No code other than the IP27 + * prom should attempt to use these values. + */ +#define IP27_LAUNCH_OFFSET 0x2400 +#define IP27_LAUNCH_SIZE 0x400 +#define IP27_LAUNCH_COUNT 2 +#define IP27_LAUNCH_STRIDE 0x200 + +#define IP27_KLCONFIG_OFFSET 0x4000 +#define IP27_KLCONFIG_SIZE 0xc000 +#define IP27_KLCONFIG_COUNT 1 +#define IP27_KLCONFIG_STRIDE 0 + +#define IP27_NMI_OFFSET 0x3000 +#define IP27_NMI_SIZE 0x40 +#define IP27_NMI_COUNT 2 +#define IP27_NMI_STRIDE 0x40 + +#define IP27_PI_ERROR_OFFSET 0x12000 +#define IP27_PI_ERROR_SIZE 0x4000 +#define IP27_PI_ERROR_COUNT 1 +#define IP27_PI_ERROR_STRIDE 0 + +#define IP27_SYMMON_STK_OFFSET 0x25000 +#define IP27_SYMMON_STK_SIZE 0xe000 +#define IP27_SYMMON_STK_COUNT 2 +/* IP27_SYMMON_STK_STRIDE must be >= SYMMON_STACK_SIZE */ +#define IP27_SYMMON_STK_STRIDE 0x7000 + +#define IP27_FREEMEM_OFFSET 0x19000 +#define IP27_FREEMEM_SIZE -1 +#define IP27_FREEMEM_COUNT 1 +#define IP27_FREEMEM_STRIDE 0 + +#endif /* PROM */ +/* + * There will be only one of these in a partition so the IO6 must set it up. + */ +#define IO6_GDA_OFFSET 0x11000 +#define IO6_GDA_SIZE 0x400 +#define IO6_GDA_COUNT 1 +#define IO6_GDA_STRIDE 0 + +/* + * save area of kernel nmi regs in the prom format + */ +#define IP27_NMI_KREGS_OFFSET 0x11400 +#define IP27_NMI_KREGS_CPU_SIZE 0x200 +/* + * save area of kernel nmi regs in eframe format + */ +#define IP27_NMI_EFRAME_OFFSET 0x11800 +#define IP27_NMI_EFRAME_SIZE 0x200 + +#endif /* _ASM_SN_SN0_KLDIR_H */ diff --git a/arch/mips/include/asm/sn/sn_private.h b/arch/mips/include/asm/sn/sn_private.h deleted file mode 100644 index 63a2c30d81c6..000000000000 --- a/arch/mips/include/asm/sn/sn_private.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_SN_SN_PRIVATE_H -#define __ASM_SN_SN_PRIVATE_H - -#include <asm/sn/types.h> - -extern nasid_t master_nasid; - -extern void cpu_node_probe(void); -extern void hub_rtc_init(nasid_t nasid); -extern void cpu_time_init(void); -extern void per_cpu_init(void); -extern void install_cpu_nmi_handler(int slice); -extern void install_ipi(void); -extern void setup_replication_mask(void); -extern void replicate_kernel_text(void); -extern unsigned long node_getfirstfree(nasid_t nasid); - -#endif /* __ASM_SN_SN_PRIVATE_H */ diff --git a/arch/mips/include/asm/sn/types.h b/arch/mips/include/asm/sn/types.h index 203c927e84d1..451ba1ee41ad 100644 --- a/arch/mips/include/asm/sn/types.h +++ b/arch/mips/include/asm/sn/types.h @@ -11,6 +11,8 @@ #include <linux/types.h> +#ifndef __ASSEMBLY__ + typedef unsigned long cpuid_t; typedef signed short nasid_t; /* node id in numa-as-id space */ typedef signed char partid_t; /* partition ID type */ @@ -18,4 +20,6 @@ typedef signed short moduleid_t; /* user-visible module number type */ typedef dev_t vertex_hdl_t; /* hardware graph vertex handle */ +#endif + #endif /* _ASM_SN_TYPES_H */ diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 4993db40482c..ee26f9a4575d 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -49,8 +49,26 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -/* How to get the thread information struct from C. */ +/* + * A pointer to the struct thread_info for the currently executing thread is + * held in register $28/$gp. + * + * We declare __current_thread_info as a global register variable rather than a + * local register variable within current_thread_info() because clang doesn't + * support explicit local register variables. + * + * When building the VDSO we take care not to declare the global register + * variable because this causes GCC to not preserve the value of $28/$gp in + * functions that change its value (which is common in the PIC VDSO when + * accessing the GOT). Since the VDSO shouldn't be accessing + * __current_thread_info anyway we declare it extern in order to cause a link + * failure if it's referenced. + */ +#ifdef __VDSO__ +extern struct thread_info *__current_thread_info; +#else register struct thread_info *__current_thread_info __asm__("$28"); +#endif static inline struct thread_info *current_thread_info(void) { diff --git a/arch/mips/include/asm/vdso/gettimeofday.h b/arch/mips/include/asm/vdso/gettimeofday.h index b08825531e9f..a58687e26c5d 100644 --- a/arch/mips/include/asm/vdso/gettimeofday.h +++ b/arch/mips/include/asm/vdso/gettimeofday.h @@ -26,8 +26,6 @@ #define __VDSO_USE_SYSCALL ULLONG_MAX -#ifdef CONFIG_MIPS_CLOCK_VSYSCALL - static __always_inline long gettimeofday_fallback( struct __kernel_old_timeval *_tv, struct timezone *_tz) @@ -48,17 +46,6 @@ static __always_inline long gettimeofday_fallback( return error ? -ret : ret; } -#else - -static __always_inline long gettimeofday_fallback( - struct __kernel_old_timeval *_tv, - struct timezone *_tz) -{ - return -1; -} - -#endif - static __always_inline long clock_gettime_fallback( clockid_t _clkid, struct __kernel_timespec *_ts) @@ -109,8 +96,6 @@ static __always_inline int clock_getres_fallback( #if _MIPS_SIM != _MIPS_SIM_ABI64 -#define VDSO_HAS_32BIT_FALLBACK 1 - static __always_inline long clock_gettime32_fallback( clockid_t _clkid, struct old_timespec32 *_ts) diff --git a/arch/mips/include/asm/vmalloc.h b/arch/mips/include/asm/vmalloc.h new file mode 100644 index 000000000000..25dc09b25eaf --- /dev/null +++ b/arch/mips/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_MIPS_VMALLOC_H +#define _ASM_MIPS_VMALLOC_H + +#endif /* _ASM_MIPS_VMALLOC_H */ diff --git a/arch/mips/jz4740/Kconfig b/arch/mips/jz4740/Kconfig index 4dd0c446ecec..412d2faa3cdf 100644 --- a/arch/mips/jz4740/Kconfig +++ b/arch/mips/jz4740/Kconfig @@ -16,6 +16,10 @@ config JZ4780_CI20 bool "MIPS Creator CI20" select MACH_JZ4780 +config X1000_CU1000_NEO + bool "YSH & ATIL CU1000 Module with Neo backplane" + select MACH_X1000 + endchoice config MACH_JZ4740 @@ -33,3 +37,9 @@ config MACH_JZ4780 select MIPS_CPU_SCACHE select SYS_HAS_CPU_MIPS32_R2 select SYS_SUPPORTS_HIGHMEM + +config MACH_X1000 + bool + select MIPS_CPU_SCACHE + select SYS_HAS_CPU_MIPS32_R2 + select SYS_SUPPORTS_HIGHMEM diff --git a/arch/mips/jz4740/setup.c b/arch/mips/jz4740/setup.c index dc8ee21e0948..880c26857aff 100644 --- a/arch/mips/jz4740/setup.c +++ b/arch/mips/jz4740/setup.c @@ -44,6 +44,8 @@ static void __init jz4740_detect_mem(void) static unsigned long __init get_board_mach_type(const void *fdt) { + if (!fdt_node_check_compatible(fdt, 0, "ingenic,x1830")) + return MACH_INGENIC_X1830; if (!fdt_node_check_compatible(fdt, 0, "ingenic,x1000")) return MACH_INGENIC_X1000; if (!fdt_node_check_compatible(fdt, 0, "ingenic,jz4780")) @@ -86,6 +88,8 @@ void __init device_tree_init(void) const char *get_system_type(void) { switch (mips_machtype) { + case MACH_INGENIC_X1830: + return "X1830"; case MACH_INGENIC_X1000: return "X1000"; case MACH_INGENIC_JZ4780: diff --git a/arch/mips/kernel/cacheinfo.c b/arch/mips/kernel/cacheinfo.c index f777e44653d5..47312c529410 100644 --- a/arch/mips/kernel/cacheinfo.c +++ b/arch/mips/kernel/cacheinfo.c @@ -50,6 +50,25 @@ static int __init_cache_level(unsigned int cpu) return 0; } +static void fill_cpumask_siblings(int cpu, cpumask_t *cpu_map) +{ + int cpu1; + + for_each_possible_cpu(cpu1) + if (cpus_are_siblings(cpu, cpu1)) + cpumask_set_cpu(cpu1, cpu_map); +} + +static void fill_cpumask_cluster(int cpu, cpumask_t *cpu_map) +{ + int cpu1; + int cluster = cpu_cluster(&cpu_data[cpu]); + + for_each_possible_cpu(cpu1) + if (cpu_cluster(&cpu_data[cpu1]) == cluster) + cpumask_set_cpu(cpu1, cpu_map); +} + static int __populate_cache_leaves(unsigned int cpu) { struct cpuinfo_mips *c = ¤t_cpu_data; @@ -57,14 +76,20 @@ static int __populate_cache_leaves(unsigned int cpu) struct cacheinfo *this_leaf = this_cpu_ci->info_list; if (c->icache.waysize) { + /* L1 caches are per core */ + fill_cpumask_siblings(cpu, &this_leaf->shared_cpu_map); populate_cache(dcache, this_leaf, 1, CACHE_TYPE_DATA); + fill_cpumask_siblings(cpu, &this_leaf->shared_cpu_map); populate_cache(icache, this_leaf, 1, CACHE_TYPE_INST); } else { populate_cache(dcache, this_leaf, 1, CACHE_TYPE_UNIFIED); } - if (c->scache.waysize) + if (c->scache.waysize) { + /* L2 cache is per cluster */ + fill_cpumask_cluster(cpu, &this_leaf->shared_cpu_map); populate_cache(scache, this_leaf, 2, CACHE_TYPE_UNIFIED); + } if (c->tcache.waysize) populate_cache(tcache, this_leaf, 3, CACHE_TYPE_UNIFIED); diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index c54332697673..6ab6b03d35ba 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -102,7 +102,12 @@ static void cpu_set_fpu_2008(struct cpuinfo_mips *c) if (fir & MIPS_FPIR_HAS2008) { fcsr = read_32bit_cp1_register(CP1_STATUS); - fcsr0 = fcsr & ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008); + /* + * MAC2008 toolchain never landed in real world, so we're only + * testing wether it can be disabled and don't try to enabled + * it. + */ + fcsr0 = fcsr & ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008 | FPU_CSR_MAC2008); write_32bit_cp1_register(CP1_STATUS, fcsr0); fcsr0 = read_32bit_cp1_register(CP1_STATUS); @@ -112,6 +117,15 @@ static void cpu_set_fpu_2008(struct cpuinfo_mips *c) write_32bit_cp1_register(CP1_STATUS, fcsr); + if (c->isa_level & (MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2)) { + /* + * The bit for MAC2008 might be reused by R6 in future, + * so we only test for R2-R5. + */ + if (fcsr0 & FPU_CSR_MAC2008) + c->options |= MIPS_CPU_MAC_2008_ONLY; + } + if (!(fcsr0 & FPU_CSR_NAN2008)) c->options |= MIPS_CPU_NAN_LEGACY; if (fcsr1 & FPU_CSR_NAN2008) @@ -1960,10 +1974,8 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu) BUG_ON(!__builtin_constant_p(cpu_has_counter) || cpu_has_counter); switch (c->processor_id & PRID_IMP_MASK) { - case PRID_IMP_XBURST: - c->cputype = CPU_XBURST; - c->writecombine = _CACHE_UNCACHED_ACCELERATED; - __cpu_name[cpu] = "Ingenic JZRISC"; + case PRID_IMP_XBURST_REV1: + /* * The XBurst core by default attempts to avoid branch target * buffer lookups by detecting & special casing loops. This @@ -1971,34 +1983,43 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu) * Set cp0 config7 bit 4 to disable this feature. */ set_c0_config7(MIPS_CONF7_BTB_LOOP_EN); - break; - default: - panic("Unknown Ingenic Processor ID!"); - break; - } - switch (c->processor_id & PRID_COMP_MASK) { - /* - * The config0 register in the XBurst CPUs with a processor ID of - * PRID_COMP_INGENIC_D1 has an abandoned huge page tlb mode, this - * mode is not compatible with the MIPS standard, it will cause - * tlbmiss and into an infinite loop (line 21 in the tlb-funcs.S) - * when starting the init process. After chip reset, the default - * is HPTLB mode, Write 0xa9000000 to cp0 register 5 sel 4 to - * switch back to VTLB mode to prevent getting stuck. - */ - case PRID_COMP_INGENIC_D1: - write_c0_page_ctrl(XBURST_PAGECTRL_HPTLB_DIS); - break; - /* - * The config0 register in the XBurst CPUs with a processor ID of - * PRID_COMP_INGENIC_D0 report themselves as MIPS32r2 compatible, - * but they don't actually support this ISA. - */ - case PRID_COMP_INGENIC_D0: - c->isa_level &= ~MIPS_CPU_ISA_M32R2; + switch (c->processor_id & PRID_COMP_MASK) { + + /* + * The config0 register in the XBurst CPUs with a processor ID of + * PRID_COMP_INGENIC_D0 report themselves as MIPS32r2 compatible, + * but they don't actually support this ISA. + */ + case PRID_COMP_INGENIC_D0: + c->isa_level &= ~MIPS_CPU_ISA_M32R2; + break; + + /* + * The config0 register in the XBurst CPUs with a processor ID of + * PRID_COMP_INGENIC_D1 has an abandoned huge page tlb mode, this + * mode is not compatible with the MIPS standard, it will cause + * tlbmiss and into an infinite loop (line 21 in the tlb-funcs.S) + * when starting the init process. After chip reset, the default + * is HPTLB mode, Write 0xa9000000 to cp0 register 5 sel 4 to + * switch back to VTLB mode to prevent getting stuck. + */ + case PRID_COMP_INGENIC_D1: + write_c0_page_ctrl(XBURST_PAGECTRL_HPTLB_DIS); + break; + + default: + break; + } + /* fall-through */ + case PRID_IMP_XBURST_REV2: + c->cputype = CPU_XBURST; + c->writecombine = _CACHE_UNCACHED_ACCELERATED; + __cpu_name[cpu] = "Ingenic XBurst"; break; + default: + panic("Unknown Ingenic Processor ID!"); break; } } diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index 5469d43b6966..4849a48afc0f 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -19,7 +19,7 @@ #include <asm/thread_info.h> #include <asm/war.h> -#ifndef CONFIG_PREEMPT +#ifndef CONFIG_PREEMPTION #define resume_kernel restore_all #else #define __ret_from_irq ret_from_exception @@ -27,7 +27,7 @@ .text .align 5 -#ifndef CONFIG_PREEMPT +#ifndef CONFIG_PREEMPTION FEXPORT(ret_from_exception) local_irq_disable # preempt stop b __ret_from_irq @@ -53,7 +53,7 @@ resume_userspace: bnez t0, work_pending j restore_all -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION resume_kernel: local_irq_disable lw t0, TI_PRE_COUNT($28) diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index e5ea3db23d6b..cdb93ed91cde 100644 --- a/arch/mips/kernel/mips-cm.c +++ b/arch/mips/kernel/mips-cm.c @@ -194,7 +194,7 @@ static void mips_cm_probe_l2sync(void) write_gcr_l2_only_sync_base(addr | CM_GCR_L2_ONLY_SYNC_BASE_SYNCEN); /* Map the region */ - mips_cm_l2sync_base = ioremap_nocache(addr, MIPS_CM_L2SYNC_SIZE); + mips_cm_l2sync_base = ioremap(addr, MIPS_CM_L2SYNC_SIZE); } int mips_cm_probe(void) @@ -215,7 +215,7 @@ int mips_cm_probe(void) if (!addr) return -ENODEV; - mips_gcr_base = ioremap_nocache(addr, MIPS_CM_GCR_SIZE); + mips_gcr_base = ioremap(addr, MIPS_CM_GCR_SIZE); if (!mips_gcr_base) return -ENXIO; diff --git a/arch/mips/kernel/mips-cpc.c b/arch/mips/kernel/mips-cpc.c index 69e3e0b556bf..8d2535123f11 100644 --- a/arch/mips/kernel/mips-cpc.c +++ b/arch/mips/kernel/mips-cpc.c @@ -78,7 +78,7 @@ int mips_cpc_probe(void) if (!addr) return -ENODEV; - mips_cpc_base = ioremap_nocache(addr, 0x8000); + mips_cpc_base = ioremap(addr, 0x8000); if (!mips_cpc_base) return -ENXIO; diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index c3d4212b5f1d..1ac2752fb791 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -515,8 +515,7 @@ static void __init request_crashkernel(struct resource *res) ret = request_resource(res, &crashk_res); if (!ret) pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n", - (unsigned long)((crashk_res.end - - crashk_res.start + 1) >> 20), + (unsigned long)(resource_size(&crashk_res) >> 20), (unsigned long)(crashk_res.start >> 20)); } #else /* !defined(CONFIG_KEXEC) */ @@ -698,8 +697,7 @@ static void __init arch_mem_init(char **cmdline_p) mips_parse_crashkernel(); #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) - memblock_reserve(crashk_res.start, - crashk_res.end - crashk_res.start + 1); + memblock_reserve(crashk_res.start, resource_size(&crashk_res)); #endif device_tree_init(); sparse_init(); @@ -796,8 +794,6 @@ void __init setup_arch(char **cmdline_p) #if defined(CONFIG_VT) #if defined(CONFIG_VGA_CONSOLE) conswitchp = &vga_con; -#elif defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; #endif #endif diff --git a/arch/mips/kernel/sync-r4k.c b/arch/mips/kernel/sync-r4k.c index f2973ce87f53..abdd7aaa3311 100644 --- a/arch/mips/kernel/sync-r4k.c +++ b/arch/mips/kernel/sync-r4k.c @@ -90,6 +90,9 @@ void synchronise_count_master(int cpu) void synchronise_count_slave(int cpu) { int i; + unsigned long flags; + + local_irq_save(flags); /* * Not every cpu is online at the time this gets called, @@ -113,5 +116,7 @@ void synchronise_count_slave(int cpu) } /* Arrange for an interrupt in a short while */ write_c0_compare(read_c0_count() + COUNTON); + + local_irq_restore(flags); } #undef NR_LOOPS diff --git a/arch/mips/kernel/syscalls/Makefile b/arch/mips/kernel/syscalls/Makefile index a3d4bec695c6..6efb2f6889a7 100644 --- a/arch/mips/kernel/syscalls/Makefile +++ b/arch/mips/kernel/syscalls/Makefile @@ -18,7 +18,7 @@ quiet_cmd_syshdr = SYSHDR $@ '$(syshdr_pfx_$(basetarget))' \ '$(syshdr_offset_$(basetarget))' -quiet_cmd_sysnr = SYSNR $@ +quiet_cmd_sysnr = SYSNR $@ cmd_sysnr = $(CONFIG_SHELL) '$(sysnr)' '$<' '$@' \ '$(sysnr_abis_$(basetarget))' \ '$(sysnr_pfx_$(basetarget))' \ diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index e7c5ab38e403..1f9e8ad636cc 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -374,3 +374,5 @@ 433 n32 fspick sys_fspick 434 n32 pidfd_open sys_pidfd_open 435 n32 clone3 __sys_clone3 +437 n32 openat2 sys_openat2 +438 n32 pidfd_getfd sys_pidfd_getfd diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 13cd66581f3b..c0b9d802dbf6 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -350,3 +350,5 @@ 433 n64 fspick sys_fspick 434 n64 pidfd_open sys_pidfd_open 435 n64 clone3 __sys_clone3 +437 n64 openat2 sys_openat2 +438 n64 pidfd_getfd sys_pidfd_getfd diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 353539ea4140..ac586774c980 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -423,3 +423,5 @@ 433 o32 fspick sys_fspick 434 o32 pidfd_open sys_pidfd_open 435 o32 clone3 __sys_clone3 +437 o32 openat2 sys_openat2 +438 o32 pidfd_getfd sys_pidfd_getfd diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 83f2a437d9e2..31968cbd6464 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -210,11 +210,6 @@ void show_stack(struct task_struct *task, unsigned long *sp) regs.regs[29] = task->thread.reg29; regs.regs[31] = 0; regs.cp0_epc = task->thread.reg31; -#ifdef CONFIG_KGDB_KDB - } else if (atomic_read(&kgdb_active) != -1 && - kdb_current_regs) { - memcpy(®s, kdb_current_regs, sizeof(regs)); -#endif /* CONFIG_KGDB_KDB */ } else { prepare_frametrace(®s); } diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index 92bd2b0f0548..ca6fc4762d97 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -131,7 +131,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR #define _LoadW(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -152,7 +152,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ /* For CPUs without lwl instruction */ #define _LoadW(addr, value, res, type) \ do { \ @@ -187,7 +187,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ #define _LoadHWU(addr, value, res, type) \ do { \ @@ -213,7 +213,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR #define _LoadWU(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -256,7 +256,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ /* For CPUs without lwl and ldl instructions */ #define _LoadWU(addr, value, res, type) \ do { \ @@ -340,7 +340,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ #define _StoreHW(addr, value, res, type) \ @@ -366,7 +366,7 @@ do { \ : "r" (value), "r" (addr), "i" (-EFAULT));\ } while(0) -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR #define _StoreW(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -407,7 +407,7 @@ do { \ : "r" (value), "r" (addr), "i" (-EFAULT)); \ } while(0) -#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ #define _StoreW(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -483,7 +483,7 @@ do { \ : "memory"); \ } while(0) -#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ #else /* __BIG_ENDIAN */ @@ -509,7 +509,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR #define _LoadW(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -530,7 +530,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ /* For CPUs without lwl instruction */ #define _LoadW(addr, value, res, type) \ do { \ @@ -565,7 +565,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ #define _LoadHWU(addr, value, res, type) \ @@ -592,7 +592,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR #define _LoadWU(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -635,7 +635,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ /* For CPUs without lwl and ldl instructions */ #define _LoadWU(addr, value, res, type) \ do { \ @@ -718,7 +718,7 @@ do { \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ #define _StoreHW(addr, value, res, type) \ do { \ @@ -743,7 +743,7 @@ do { \ : "r" (value), "r" (addr), "i" (-EFAULT));\ } while(0) -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR #define _StoreW(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -784,7 +784,7 @@ do { \ : "r" (value), "r" (addr), "i" (-EFAULT)); \ } while(0) -#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ /* For CPUs without swl and sdl instructions */ #define _StoreW(addr, value, res, type) \ do { \ @@ -861,7 +861,7 @@ do { \ : "memory"); \ } while(0) -#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ #endif #define LoadHWU(addr, value, res) _LoadHWU(addr, value, res, kernel) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 1109924560d8..2606f3f02b54 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -156,7 +156,7 @@ void kvm_mips_free_vcpus(struct kvm *kvm) struct kvm_vcpu *vcpu; kvm_for_each_vcpu(i, vcpu, kvm) { - kvm_arch_vcpu_free(vcpu); + kvm_vcpu_destroy(vcpu); } mutex_lock(&kvm->lock); @@ -280,25 +280,27 @@ static inline void dump_handler(const char *symbol, void *start, void *end) pr_debug("\tEND(%s)\n", symbol); } -struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) +int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) +{ + return 0; +} + +int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) { int err, size; void *gebase, *p, *handler, *refill_start, *refill_end; int i; - struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); - - if (!vcpu) { - err = -ENOMEM; - goto out; - } - - err = kvm_vcpu_init(vcpu, kvm, id); + kvm_debug("kvm @ %p: create cpu %d at %p\n", + vcpu->kvm, vcpu->vcpu_id, vcpu); + err = kvm_mips_callbacks->vcpu_init(vcpu); if (err) - goto out_free_cpu; + return err; - kvm_debug("kvm @ %p: create cpu %d at %p\n", kvm, id, vcpu); + hrtimer_init(&vcpu->arch.comparecount_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + vcpu->arch.comparecount_timer.function = kvm_mips_comparecount_wakeup; /* * Allocate space for host mode exception handlers that handle @@ -313,7 +315,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) if (!gebase) { err = -ENOMEM; - goto out_uninit_cpu; + goto out_uninit_vcpu; } kvm_debug("Allocated %d bytes for KVM Exception Handlers @ %p\n", ALIGN(size, PAGE_SIZE), gebase); @@ -392,38 +394,33 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) vcpu->arch.last_sched_cpu = -1; vcpu->arch.last_exec_cpu = -1; - return vcpu; + /* Initial guest state */ + err = kvm_mips_callbacks->vcpu_setup(vcpu); + if (err) + goto out_free_commpage; + + return 0; +out_free_commpage: + kfree(vcpu->arch.kseg0_commpage); out_free_gebase: kfree(gebase); - -out_uninit_cpu: - kvm_vcpu_uninit(vcpu); - -out_free_cpu: - kfree(vcpu); - -out: - return ERR_PTR(err); +out_uninit_vcpu: + kvm_mips_callbacks->vcpu_uninit(vcpu); + return err; } -void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { hrtimer_cancel(&vcpu->arch.comparecount_timer); - kvm_vcpu_uninit(vcpu); - kvm_mips_dump_stats(vcpu); kvm_mmu_free_memory_caches(vcpu); kfree(vcpu->arch.guest_ebase); kfree(vcpu->arch.kseg0_commpage); - kfree(vcpu); -} -void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) -{ - kvm_arch_vcpu_free(vcpu); + kvm_mips_callbacks->vcpu_uninit(vcpu); } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, @@ -1233,37 +1230,12 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer) return kvm_mips_count_timeout(vcpu); } -int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) -{ - int err; - - err = kvm_mips_callbacks->vcpu_init(vcpu); - if (err) - return err; - - hrtimer_init(&vcpu->arch.comparecount_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - vcpu->arch.comparecount_timer.function = kvm_mips_comparecount_wakeup; - return 0; -} - -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) -{ - kvm_mips_callbacks->vcpu_uninit(vcpu); -} - int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr) { return 0; } -/* Initial guest state */ -int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) -{ - return kvm_mips_callbacks->vcpu_setup(vcpu); -} - static void kvm_mips_set_c0_status(void) { u32 status = read_c0_status(); diff --git a/arch/mips/lantiq/falcon/sysctrl.c b/arch/mips/lantiq/falcon/sysctrl.c index 037b08f3257e..42222f849bd2 100644 --- a/arch/mips/lantiq/falcon/sysctrl.c +++ b/arch/mips/lantiq/falcon/sysctrl.c @@ -221,16 +221,16 @@ void __init ltq_soc_init(void) res_sys[2].name) < 0)) pr_err("Failed to request core resources"); - status_membase = ioremap_nocache(res_status.start, + status_membase = ioremap(res_status.start, resource_size(&res_status)); - ltq_ebu_membase = ioremap_nocache(res_ebu.start, + ltq_ebu_membase = ioremap(res_ebu.start, resource_size(&res_ebu)); if (!status_membase || !ltq_ebu_membase) panic("Failed to remap core resources"); for (i = 0; i < 3; i++) { - sysctl_membase[i] = ioremap_nocache(res_sys[i].start, + sysctl_membase[i] = ioremap(res_sys[i].start, resource_size(&res_sys[i])); if (!sysctl_membase[i]) panic("Failed to remap sysctrl resources"); diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index 115b417dfb8e..df8eed3875f6 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -349,7 +349,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent) res.name)) pr_err("Failed to request icu%i memory\n", vpe); - ltq_icu_membase[vpe] = ioremap_nocache(res.start, + ltq_icu_membase[vpe] = ioremap(res.start, resource_size(&res)); if (!ltq_icu_membase[vpe]) @@ -402,7 +402,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent) res.name)) pr_err("Failed to request eiu memory"); - ltq_eiu_membase = ioremap_nocache(res.start, + ltq_eiu_membase = ioremap(res.start, resource_size(&res)); if (!ltq_eiu_membase) panic("Failed to remap eiu memory"); diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 156a95ac5c72..aa37545ebe8f 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -431,10 +431,10 @@ void __init ltq_soc_init(void) res_ebu.name)) pr_err("Failed to request core resources"); - pmu_membase = ioremap_nocache(res_pmu.start, resource_size(&res_pmu)); - ltq_cgu_membase = ioremap_nocache(res_cgu.start, + pmu_membase = ioremap(res_pmu.start, resource_size(&res_pmu)); + ltq_cgu_membase = ioremap(res_cgu.start, resource_size(&res_cgu)); - ltq_ebu_membase = ioremap_nocache(res_ebu.start, + ltq_ebu_membase = ioremap(res_ebu.start, resource_size(&res_ebu)); if (!pmu_membase || !ltq_cgu_membase || !ltq_ebu_membase) panic("Failed to remap core resources"); diff --git a/arch/mips/lasat/picvue_proc.c b/arch/mips/lasat/picvue_proc.c index 8126f15b8e09..61c033494af5 100644 --- a/arch/mips/lasat/picvue_proc.c +++ b/arch/mips/lasat/picvue_proc.c @@ -89,13 +89,12 @@ static ssize_t pvc_line_proc_write(struct file *file, const char __user *buf, return count; } -static const struct file_operations pvc_line_proc_fops = { - .owner = THIS_MODULE, - .open = pvc_line_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = pvc_line_proc_write, +static const struct proc_ops pvc_line_proc_ops = { + .proc_open = pvc_line_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = pvc_line_proc_write, }; static ssize_t pvc_scroll_proc_write(struct file *file, const char __user *buf, @@ -148,13 +147,12 @@ static int pvc_scroll_proc_open(struct inode *inode, struct file *file) return single_open(file, pvc_scroll_proc_show, NULL); } -static const struct file_operations pvc_scroll_proc_fops = { - .owner = THIS_MODULE, - .open = pvc_scroll_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = pvc_scroll_proc_write, +static const struct proc_ops pvc_scroll_proc_ops = { + .proc_open = pvc_scroll_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = pvc_scroll_proc_write, }; void pvc_proc_timerfunc(struct timer_list *unused) @@ -189,12 +187,11 @@ static int __init pvc_proc_init(void) } for (i = 0; i < PVC_NLINES; i++) { proc_entry = proc_create_data(pvc_linename[i], 0644, dir, - &pvc_line_proc_fops, &pvc_linedata[i]); + &pvc_line_proc_ops, &pvc_linedata[i]); if (proc_entry == NULL) goto error; } - proc_entry = proc_create("scroll", 0644, dir, - &pvc_scroll_proc_fops); + proc_entry = proc_create("scroll", 0644, dir, &pvc_scroll_proc_ops); if (proc_entry == NULL) goto error; diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index cdd19d8561e8..f7994d936505 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S @@ -301,14 +301,14 @@ and t0, src, ADDRMASK PREFS( 0, 2*32(src) ) PREFD( 1, 2*32(dst) ) -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR bnez t1, .Ldst_unaligned\@ nop bnez t0, .Lsrc_unaligned_dst_aligned\@ -#else +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ or t0, t0, t1 bnez t0, .Lcopy_unaligned_bytes\@ -#endif +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ /* * use delay slot for fall-through * src and dst are aligned; need to compute rem @@ -389,7 +389,7 @@ bne rem, len, 1b .set noreorder -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR /* * src and dst are aligned, need to copy rem bytes (rem < NBYTES) * A loop would do only a byte at a time with possible branch @@ -491,7 +491,7 @@ bne len, rem, 1b .set noreorder -#endif /* CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* !CONFIG_CPU_NO_LOAD_STORE_LR */ .Lcopy_bytes_checklen\@: beqz len, .Ldone\@ nop @@ -520,7 +520,7 @@ jr ra nop -#ifndef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifdef CONFIG_CPU_NO_LOAD_STORE_LR .Lcopy_unaligned_bytes\@: 1: COPY_BYTE(0) @@ -534,7 +534,7 @@ ADD src, src, 8 b 1b ADD dst, dst, 8 -#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ .if __memcpy == 1 END(memcpy) .set __memcpy, 0 diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 418611ef13cf..d5449e8a3dfc 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -115,7 +115,7 @@ #endif .set reorder -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR R10KCBARRIER(0(ra)) #ifdef __MIPSEB__ EX(LONG_S_L, a1, (a0), .Lfirst_fixup\@) /* make word/dword aligned */ @@ -125,7 +125,7 @@ PTR_SUBU a0, t0 /* long align ptr */ PTR_ADDU a2, t0 /* correct size */ -#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ #define STORE_BYTE(N) \ EX(sb, a1, N(a0), .Lbyte_fixup\@); \ .set noreorder; \ @@ -150,7 +150,7 @@ ori a0, STORMASK xori a0, STORMASK PTR_ADDIU a0, STORSIZE -#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ 1: ori t1, a2, 0x3f /* # of full blocks */ xori t1, 0x3f andi t0, a2, 0x40-STORSIZE @@ -185,7 +185,7 @@ .set noreorder beqz a2, 1f -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR PTR_ADDU a0, a2 /* What's left */ .set reorder R10KCBARRIER(0(ra)) @@ -194,7 +194,7 @@ #else EX(LONG_S_L, a1, -1(a0), .Llast_fixup\@) #endif -#else +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ PTR_SUBU t0, $0, a2 .set reorder move a2, zero /* No remaining longs */ @@ -211,7 +211,7 @@ EX(sb, a1, 6(a0), .Lbyte_fixup\@) #endif 0: -#endif +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ 1: move a2, zero jr ra @@ -234,7 +234,7 @@ .hidden __memset .endif -#ifndef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifdef CONFIG_CPU_NO_LOAD_STORE_LR .Lbyte_fixup\@: /* * unset_bytes = (#bytes - (#unaligned bytes)) - (-#unaligned bytes remaining + 1) + 1 @@ -243,7 +243,7 @@ PTR_SUBU a2, t0 PTR_ADDIU a2, 1 jr ra -#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ .Lfirst_fixup\@: /* unset_bytes already in a2 */ diff --git a/arch/mips/lib/mips-atomic.c b/arch/mips/lib/mips-atomic.c index 5530070e0d05..de03838b343b 100644 --- a/arch/mips/lib/mips-atomic.c +++ b/arch/mips/lib/mips-atomic.c @@ -15,7 +15,7 @@ #include <linux/export.h> #include <linux/stringify.h> -#if !defined(CONFIG_CPU_MIPSR2) && !defined(CONFIG_CPU_MIPSR6) +#if !defined(CONFIG_CPU_HAS_DIEI) /* * For cli() we have to insert nops to make sure that the new value @@ -110,4 +110,4 @@ notrace void arch_local_irq_restore(unsigned long flags) } EXPORT_SYMBOL(arch_local_irq_restore); -#endif /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR6 */ +#endif /* !CONFIG_CPU_HAS_DIEI */ diff --git a/arch/mips/loongson2ef/common/pm.c b/arch/mips/loongson2ef/common/pm.c index 11f4cfd581fb..bcb7ae9777cf 100644 --- a/arch/mips/loongson2ef/common/pm.c +++ b/arch/mips/loongson2ef/common/pm.c @@ -91,7 +91,7 @@ static inline void stop_perf_counters(void) static void loongson_suspend_enter(void) { - static unsigned int cached_cpu_freq; + unsigned int cached_cpu_freq; /* setup wakeup events via enabling the IRQs */ setup_wakeup_events(); diff --git a/arch/mips/loongson2ef/common/reset.c b/arch/mips/loongson2ef/common/reset.c index e7c87161ce00..e49c40646995 100644 --- a/arch/mips/loongson2ef/common/reset.c +++ b/arch/mips/loongson2ef/common/reset.c @@ -17,11 +17,11 @@ static inline void loongson_reboot(void) { #ifndef CONFIG_CPU_JUMP_WORKAROUNDS - ((void (*)(void))ioremap_nocache(LOONGSON_BOOT_BASE, 4)) (); + ((void (*)(void))ioremap(LOONGSON_BOOT_BASE, 4)) (); #else void (*func)(void); - func = (void *)ioremap_nocache(LOONGSON_BOOT_BASE, 4); + func = (void *)ioremap(LOONGSON_BOOT_BASE, 4); __asm__ __volatile__( " .set noat \n" diff --git a/arch/mips/loongson32/common/prom.c b/arch/mips/loongson32/common/prom.c index 73dd25142484..fd76114fa3b0 100644 --- a/arch/mips/loongson32/common/prom.c +++ b/arch/mips/loongson32/common/prom.c @@ -26,13 +26,13 @@ void __init prom_init(void) memsize = DEFAULT_MEMSIZE; if (strstr(arcs_cmdline, "console=ttyS3")) - uart_base = ioremap_nocache(LS1X_UART3_BASE, 0x0f); + uart_base = ioremap(LS1X_UART3_BASE, 0x0f); else if (strstr(arcs_cmdline, "console=ttyS2")) - uart_base = ioremap_nocache(LS1X_UART2_BASE, 0x0f); + uart_base = ioremap(LS1X_UART2_BASE, 0x0f); else if (strstr(arcs_cmdline, "console=ttyS1")) - uart_base = ioremap_nocache(LS1X_UART1_BASE, 0x0f); + uart_base = ioremap(LS1X_UART1_BASE, 0x0f); else - uart_base = ioremap_nocache(LS1X_UART0_BASE, 0x0f); + uart_base = ioremap(LS1X_UART0_BASE, 0x0f); setup_8250_early_printk_port((unsigned long)uart_base, 0, 0); } diff --git a/arch/mips/loongson32/common/reset.c b/arch/mips/loongson32/common/reset.c index 6c36a414dde7..0c7399b303fb 100644 --- a/arch/mips/loongson32/common/reset.c +++ b/arch/mips/loongson32/common/reset.c @@ -37,7 +37,7 @@ static void ls1x_power_off(void) static int __init ls1x_reboot_setup(void) { - wdt_reg_base = ioremap_nocache(LS1X_WDT_BASE, (SZ_4 + SZ_8)); + wdt_reg_base = ioremap(LS1X_WDT_BASE, (SZ_4 + SZ_8)); if (!wdt_reg_base) panic("Failed to remap watchdog registers"); diff --git a/arch/mips/loongson32/common/time.c b/arch/mips/loongson32/common/time.c index f97662045c73..4cc73f7ac0d4 100644 --- a/arch/mips/loongson32/common/time.c +++ b/arch/mips/loongson32/common/time.c @@ -49,7 +49,7 @@ static inline void ls1x_pwmtimer_restart(void) void __init ls1x_pwmtimer_init(void) { - timer_reg_base = ioremap_nocache(LS1X_TIMER_BASE, SZ_16); + timer_reg_base = ioremap(LS1X_TIMER_BASE, SZ_16); if (!timer_reg_base) panic("Failed to remap timer registers"); diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c index ef94a2278561..e5b40c5e3296 100644 --- a/arch/mips/loongson64/numa.c +++ b/arch/mips/loongson64/numa.c @@ -75,7 +75,7 @@ static int __init compute_node_distance(int row, int col) loongson_sysconf.cores_per_package; if (col == row) - return 0; + return LOCAL_DISTANCE; else if (package_row == package_col) return 40; else diff --git a/arch/mips/loongson64/platform.c b/arch/mips/loongson64/platform.c index 13f3404f0030..9674ae1361a8 100644 --- a/arch/mips/loongson64/platform.c +++ b/arch/mips/loongson64/platform.c @@ -27,6 +27,9 @@ static int __init loongson3_platform_init(void) continue; pdev = kzalloc(sizeof(struct platform_device), GFP_KERNEL); + if (!pdev) + return -ENOMEM; + pdev->name = loongson_sysconf.sensors[i].name; pdev->id = loongson_sysconf.sensors[i].id; pdev->dev.platform_data = &loongson_sysconf.sensors[i]; diff --git a/arch/mips/loongson64/reset.c b/arch/mips/loongson64/reset.c index 88b3bd5fed25..bc7671079f0c 100644 --- a/arch/mips/loongson64/reset.c +++ b/arch/mips/loongson64/reset.c @@ -17,7 +17,7 @@ static inline void loongson_reboot(void) { - ((void (*)(void))ioremap_nocache(LOONGSON_BOOT_BASE, 4)) (); + ((void (*)(void))ioremap(LOONGSON_BOOT_BASE, 4)) (); } static void loongson_restart(char *command) diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 710e1f804a54..9701c89e7e14 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -1514,16 +1514,28 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, break; case madd_s_op: - handler = fpemu_sp_madd; + if (cpu_has_mac2008_only) + handler = ieee754sp_madd; + else + handler = fpemu_sp_madd; goto scoptop; case msub_s_op: - handler = fpemu_sp_msub; + if (cpu_has_mac2008_only) + handler = ieee754sp_msub; + else + handler = fpemu_sp_msub; goto scoptop; case nmadd_s_op: - handler = fpemu_sp_nmadd; + if (cpu_has_mac2008_only) + handler = ieee754sp_nmadd; + else + handler = fpemu_sp_nmadd; goto scoptop; case nmsub_s_op: - handler = fpemu_sp_nmsub; + if (cpu_has_mac2008_only) + handler = ieee754sp_nmsub; + else + handler = fpemu_sp_nmsub; goto scoptop; scoptop: @@ -1610,15 +1622,27 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, break; case madd_d_op: - handler = fpemu_dp_madd; + if (cpu_has_mac2008_only) + handler = ieee754dp_madd; + else + handler = fpemu_dp_madd; goto dcoptop; case msub_d_op: - handler = fpemu_dp_msub; + if (cpu_has_mac2008_only) + handler = ieee754dp_msub; + else + handler = fpemu_dp_msub; goto dcoptop; case nmadd_d_op: - handler = fpemu_dp_nmadd; + if (cpu_has_mac2008_only) + handler = ieee754dp_nmadd; + else + handler = fpemu_dp_nmadd; goto dcoptop; case nmsub_d_op: + if (cpu_has_mac2008_only) + handler = ieee754dp_nmsub; + else handler = fpemu_dp_nmsub; goto dcoptop; diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c index 3da0ce44cdef..e24ef374d828 100644 --- a/arch/mips/math-emu/dp_maddf.c +++ b/arch/mips/math-emu/dp_maddf.c @@ -68,6 +68,12 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, ieee754_clearcx(); + rs = xs ^ ys; + if (flags & MADDF_NEGATE_PRODUCT) + rs ^= 1; + if (flags & MADDF_NEGATE_ADDITION) + zs ^= 1; + /* * Handle the cases when at least one of x, y or z is a NaN. * Order of precedence is sNaN, qNaN and z, x, y. @@ -104,9 +110,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): - if ((zc == IEEE754_CLASS_INF) && - ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) || - ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) { + if ((zc == IEEE754_CLASS_INF) && (zs != rs)) { /* * Cases of addition of infinities with opposite signs * or subtraction of infinities with same signs. @@ -116,15 +120,10 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, } /* * z is here either not an infinity, or an infinity having the - * same sign as product (x*y) (in case of MADDF.D instruction) - * or product -(x*y) (in MSUBF.D case). The result must be an - * infinity, and its sign is determined only by the value of - * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y. + * same sign as product (x*y). The result must be an infinity, + * and its sign is determined only by the sign of product (x*y). */ - if (flags & MADDF_NEGATE_PRODUCT) - return ieee754dp_inf(1 ^ (xs ^ ys)); - else - return ieee754dp_inf(xs ^ ys); + return ieee754dp_inf(rs); case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): @@ -135,10 +134,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, return ieee754dp_inf(zs); if (zc == IEEE754_CLASS_ZERO) { /* Handle cases +0 + (-0) and similar ones. */ - if ((!(flags & MADDF_NEGATE_PRODUCT) - && (zs == (xs ^ ys))) || - ((flags & MADDF_NEGATE_PRODUCT) - && (zs != (xs ^ ys)))) + if (zs == rs) /* * Cases of addition of zeros of equal signs * or subtraction of zeroes of opposite signs. @@ -187,9 +183,6 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, assert(ym & DP_HIDDEN_BIT); re = xe + ye; - rs = xs ^ ys; - if (flags & MADDF_NEGATE_PRODUCT) - rs ^= 1; /* shunt to top of word */ xm <<= 64 - (DP_FBITS + 1); @@ -340,3 +333,27 @@ union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, { return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); } + +union ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x, + union ieee754dp y) +{ + return _dp_maddf(z, x, y, 0); +} + +union ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x, + union ieee754dp y) +{ + return _dp_maddf(z, x, y, MADDF_NEGATE_ADDITION); +} + +union ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x, + union ieee754dp y) +{ + return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION); +} + +union ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x, + union ieee754dp y) +{ + return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); +} diff --git a/arch/mips/math-emu/ieee754.h b/arch/mips/math-emu/ieee754.h index b9167bd4eb60..090caa740b1e 100644 --- a/arch/mips/math-emu/ieee754.h +++ b/arch/mips/math-emu/ieee754.h @@ -68,6 +68,14 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, union ieee754sp y); union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, union ieee754sp y); +union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x, + union ieee754sp y); +union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x, + union ieee754sp y); +union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x, + union ieee754sp y); +union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x, + union ieee754sp y); int ieee754sp_2008class(union ieee754sp x); union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y); union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y); @@ -103,6 +111,14 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, union ieee754dp y); union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, union ieee754dp y); +union ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x, + union ieee754dp y); +union ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x, + union ieee754dp y); +union ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x, + union ieee754dp y); +union ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x, + union ieee754dp y); int ieee754dp_2008class(union ieee754dp x); union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y); union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y); diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h index 52b20119e315..2c3b13546ac8 100644 --- a/arch/mips/math-emu/ieee754int.h +++ b/arch/mips/math-emu/ieee754int.h @@ -16,6 +16,7 @@ enum maddf_flags { MADDF_NEGATE_PRODUCT = 1 << 0, + MADDF_NEGATE_ADDITION = 1 << 1, }; static inline void ieee754_clearcx(void) diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c index d638354add6d..1b85b1a527ac 100644 --- a/arch/mips/math-emu/sp_maddf.c +++ b/arch/mips/math-emu/sp_maddf.c @@ -36,6 +36,12 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, ieee754_clearcx(); + rs = xs ^ ys; + if (flags & MADDF_NEGATE_PRODUCT) + rs ^= 1; + if (flags & MADDF_NEGATE_ADDITION) + zs ^= 1; + /* * Handle the cases when at least one of x, y or z is a NaN. * Order of precedence is sNaN, qNaN and z, x, y. @@ -73,9 +79,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): - if ((zc == IEEE754_CLASS_INF) && - ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) || - ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) { + if ((zc == IEEE754_CLASS_INF) && (zs != rs)) { /* * Cases of addition of infinities with opposite signs * or subtraction of infinities with same signs. @@ -85,15 +89,10 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, } /* * z is here either not an infinity, or an infinity having the - * same sign as product (x*y) (in case of MADDF.D instruction) - * or product -(x*y) (in MSUBF.D case). The result must be an - * infinity, and its sign is determined only by the value of - * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y. + * same sign as product (x*y). The result must be an infinity, + * and its sign is determined only by the sign of product (x*y). */ - if (flags & MADDF_NEGATE_PRODUCT) - return ieee754sp_inf(1 ^ (xs ^ ys)); - else - return ieee754sp_inf(xs ^ ys); + return ieee754sp_inf(rs); case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): @@ -104,10 +103,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, return ieee754sp_inf(zs); if (zc == IEEE754_CLASS_ZERO) { /* Handle cases +0 + (-0) and similar ones. */ - if ((!(flags & MADDF_NEGATE_PRODUCT) - && (zs == (xs ^ ys))) || - ((flags & MADDF_NEGATE_PRODUCT) - && (zs != (xs ^ ys)))) + if (zs == rs) /* * Cases of addition of zeros of equal signs * or subtraction of zeroes of opposite signs. @@ -158,9 +154,6 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, assert(ym & SP_HIDDEN_BIT); re = xe + ye; - rs = xs ^ ys; - if (flags & MADDF_NEGATE_PRODUCT) - rs ^= 1; /* Multiple 24 bit xm and ym to give 48 bit results */ rm64 = (uint64_t)xm * ym; @@ -260,3 +253,27 @@ union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, { return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); } + +union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, 0); +} + +union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, MADDF_NEGATE_ADDITION); +} + +union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION); +} + +union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); +} diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 50f9ed8c6c1b..79684000de0e 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -508,6 +508,51 @@ void __ref free_initmem(void) free_initmem_default(POISON_FREE_INITMEM); } +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); + +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) +{ + return node_distance(cpu_to_node(from), cpu_to_node(to)); +} + +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, + size_t align) +{ + return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS), + MEMBLOCK_ALLOC_ACCESSIBLE, + cpu_to_node(cpu)); +} + +static void __init pcpu_fc_free(void *ptr, size_t size) +{ + memblock_free_early(__pa(ptr), size); +} + +void __init setup_per_cpu_areas(void) +{ + unsigned long delta; + unsigned int cpu; + int rc; + + /* + * Always reserve area for module percpu variables. That's + * what the legacy allocator did. + */ + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, + pcpu_cpu_distance, + pcpu_fc_alloc, pcpu_fc_free); + if (rc < 0) + panic("Failed to initialize percpu areas."); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; +} +#endif + #ifndef CONFIG_MIPS_PGD_C0_CONTEXT unsigned long pgd_current[NR_CPUS]; #endif diff --git a/arch/mips/mti-malta/malta-dtshim.c b/arch/mips/mti-malta/malta-dtshim.c index 98a063093b69..0ddf03df6268 100644 --- a/arch/mips/mti-malta/malta-dtshim.c +++ b/arch/mips/mti-malta/malta-dtshim.c @@ -240,7 +240,7 @@ static void __init remove_gic(void *fdt) * On systems using the RocIT system controller a GIC may be * present without a CM. Detect whether that is the case. */ - biu_base = ioremap_nocache(MSC01_BIU_REG_BASE, + biu_base = ioremap(MSC01_BIU_REG_BASE, MSC01_BIU_ADDRSPACE_SZ); sc_cfg = __raw_readl(biu_base + MSC01_SC_CFG_OFS); if (sc_cfg & MSC01_SC_CFG_GICPRES_MSK) { diff --git a/arch/mips/net/Makefile b/arch/mips/net/Makefile index 2d03af7d6b19..d55912349039 100644 --- a/arch/mips/net/Makefile +++ b/arch/mips/net/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only # MIPS networking code +obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c new file mode 100644 index 000000000000..0af88622c619 --- /dev/null +++ b/arch/mips/net/bpf_jit.c @@ -0,0 +1,1270 @@ +/* + * Just-In-Time compiler for BPF filters on MIPS + * + * Copyright (c) 2014 Imagination Technologies Ltd. + * Author: Markos Chandras <markos.chandras@imgtec.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + */ + +#include <linux/bitops.h> +#include <linux/compiler.h> +#include <linux/errno.h> +#include <linux/filter.h> +#include <linux/if_vlan.h> +#include <linux/moduleloader.h> +#include <linux/netdevice.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <asm/asm.h> +#include <asm/bitops.h> +#include <asm/cacheflush.h> +#include <asm/cpu-features.h> +#include <asm/uasm.h> + +#include "bpf_jit.h" + +/* ABI + * r_skb_hl SKB header length + * r_data SKB data pointer + * r_off Offset + * r_A BPF register A + * r_X BPF register X + * r_skb *skb + * r_M *scratch memory + * r_skb_len SKB length + * + * On entry (*bpf_func)(*skb, *filter) + * a0 = MIPS_R_A0 = skb; + * a1 = MIPS_R_A1 = filter; + * + * Stack + * ... + * M[15] + * M[14] + * M[13] + * ... + * M[0] <-- r_M + * saved reg k-1 + * saved reg k-2 + * ... + * saved reg 0 <-- r_sp + * <no argument area> + * + * Packet layout + * + * <--------------------- len ------------------------> + * <--skb-len(r_skb_hl)-->< ----- skb->data_len ------> + * ---------------------------------------------------- + * | skb->data | + * ---------------------------------------------------- + */ + +#define ptr typeof(unsigned long) + +#define SCRATCH_OFF(k) (4 * (k)) + +/* JIT flags */ +#define SEEN_CALL (1 << BPF_MEMWORDS) +#define SEEN_SREG_SFT (BPF_MEMWORDS + 1) +#define SEEN_SREG_BASE (1 << SEEN_SREG_SFT) +#define SEEN_SREG(x) (SEEN_SREG_BASE << (x)) +#define SEEN_OFF SEEN_SREG(2) +#define SEEN_A SEEN_SREG(3) +#define SEEN_X SEEN_SREG(4) +#define SEEN_SKB SEEN_SREG(5) +#define SEEN_MEM SEEN_SREG(6) +/* SEEN_SK_DATA also implies skb_hl an skb_len */ +#define SEEN_SKB_DATA (SEEN_SREG(7) | SEEN_SREG(1) | SEEN_SREG(0)) + +/* Arguments used by JIT */ +#define ARGS_USED_BY_JIT 2 /* only applicable to 64-bit */ + +#define SBIT(x) (1 << (x)) /* Signed version of BIT() */ + +/** + * struct jit_ctx - JIT context + * @skf: The sk_filter + * @prologue_bytes: Number of bytes for prologue + * @idx: Instruction index + * @flags: JIT flags + * @offsets: Instruction offsets + * @target: Memory location for the compiled filter + */ +struct jit_ctx { + const struct bpf_prog *skf; + unsigned int prologue_bytes; + u32 idx; + u32 flags; + u32 *offsets; + u32 *target; +}; + + +static inline int optimize_div(u32 *k) +{ + /* power of 2 divides can be implemented with right shift */ + if (!(*k & (*k-1))) { + *k = ilog2(*k); + return 1; + } + + return 0; +} + +static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx); + +/* Simply emit the instruction if the JIT memory space has been allocated */ +#define emit_instr(ctx, func, ...) \ +do { \ + if ((ctx)->target != NULL) { \ + u32 *p = &(ctx)->target[ctx->idx]; \ + uasm_i_##func(&p, ##__VA_ARGS__); \ + } \ + (ctx)->idx++; \ +} while (0) + +/* + * Similar to emit_instr but it must be used when we need to emit + * 32-bit or 64-bit instructions + */ +#define emit_long_instr(ctx, func, ...) \ +do { \ + if ((ctx)->target != NULL) { \ + u32 *p = &(ctx)->target[ctx->idx]; \ + UASM_i_##func(&p, ##__VA_ARGS__); \ + } \ + (ctx)->idx++; \ +} while (0) + +/* Determine if immediate is within the 16-bit signed range */ +static inline bool is_range16(s32 imm) +{ + return !(imm >= SBIT(15) || imm < -SBIT(15)); +} + +static inline void emit_addu(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, addu, dst, src1, src2); +} + +static inline void emit_nop(struct jit_ctx *ctx) +{ + emit_instr(ctx, nop); +} + +/* Load a u32 immediate to a register */ +static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx) +{ + if (ctx->target != NULL) { + /* addiu can only handle s16 */ + if (!is_range16(imm)) { + u32 *p = &ctx->target[ctx->idx]; + uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16); + p = &ctx->target[ctx->idx + 1]; + uasm_i_ori(&p, dst, r_tmp_imm, imm & 0xffff); + } else { + u32 *p = &ctx->target[ctx->idx]; + uasm_i_addiu(&p, dst, r_zero, imm); + } + } + ctx->idx++; + + if (!is_range16(imm)) + ctx->idx++; +} + +static inline void emit_or(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, or, dst, src1, src2); +} + +static inline void emit_ori(unsigned int dst, unsigned src, u32 imm, + struct jit_ctx *ctx) +{ + if (imm >= BIT(16)) { + emit_load_imm(r_tmp, imm, ctx); + emit_or(dst, src, r_tmp, ctx); + } else { + emit_instr(ctx, ori, dst, src, imm); + } +} + +static inline void emit_daddiu(unsigned int dst, unsigned int src, + int imm, struct jit_ctx *ctx) +{ + /* + * Only used for stack, so the imm is relatively small + * and it fits in 15-bits + */ + emit_instr(ctx, daddiu, dst, src, imm); +} + +static inline void emit_addiu(unsigned int dst, unsigned int src, + u32 imm, struct jit_ctx *ctx) +{ + if (!is_range16(imm)) { + emit_load_imm(r_tmp, imm, ctx); + emit_addu(dst, r_tmp, src, ctx); + } else { + emit_instr(ctx, addiu, dst, src, imm); + } +} + +static inline void emit_and(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, and, dst, src1, src2); +} + +static inline void emit_andi(unsigned int dst, unsigned int src, + u32 imm, struct jit_ctx *ctx) +{ + /* If imm does not fit in u16 then load it to register */ + if (imm >= BIT(16)) { + emit_load_imm(r_tmp, imm, ctx); + emit_and(dst, src, r_tmp, ctx); + } else { + emit_instr(ctx, andi, dst, src, imm); + } +} + +static inline void emit_xor(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, xor, dst, src1, src2); +} + +static inline void emit_xori(ptr dst, ptr src, u32 imm, struct jit_ctx *ctx) +{ + /* If imm does not fit in u16 then load it to register */ + if (imm >= BIT(16)) { + emit_load_imm(r_tmp, imm, ctx); + emit_xor(dst, src, r_tmp, ctx); + } else { + emit_instr(ctx, xori, dst, src, imm); + } +} + +static inline void emit_stack_offset(int offset, struct jit_ctx *ctx) +{ + emit_long_instr(ctx, ADDIU, r_sp, r_sp, offset); +} + +static inline void emit_subu(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, subu, dst, src1, src2); +} + +static inline void emit_neg(unsigned int reg, struct jit_ctx *ctx) +{ + emit_subu(reg, r_zero, reg, ctx); +} + +static inline void emit_sllv(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + emit_instr(ctx, sllv, dst, src, sa); +} + +static inline void emit_sll(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + /* sa is 5-bits long */ + if (sa >= BIT(5)) + /* Shifting >= 32 results in zero */ + emit_jit_reg_move(dst, r_zero, ctx); + else + emit_instr(ctx, sll, dst, src, sa); +} + +static inline void emit_srlv(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + emit_instr(ctx, srlv, dst, src, sa); +} + +static inline void emit_srl(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + /* sa is 5-bits long */ + if (sa >= BIT(5)) + /* Shifting >= 32 results in zero */ + emit_jit_reg_move(dst, r_zero, ctx); + else + emit_instr(ctx, srl, dst, src, sa); +} + +static inline void emit_slt(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, slt, dst, src1, src2); +} + +static inline void emit_sltu(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, sltu, dst, src1, src2); +} + +static inline void emit_sltiu(unsigned dst, unsigned int src, + unsigned int imm, struct jit_ctx *ctx) +{ + /* 16 bit immediate */ + if (!is_range16((s32)imm)) { + emit_load_imm(r_tmp, imm, ctx); + emit_sltu(dst, src, r_tmp, ctx); + } else { + emit_instr(ctx, sltiu, dst, src, imm); + } + +} + +/* Store register on the stack */ +static inline void emit_store_stack_reg(ptr reg, ptr base, + unsigned int offset, + struct jit_ctx *ctx) +{ + emit_long_instr(ctx, SW, reg, offset, base); +} + +static inline void emit_store(ptr reg, ptr base, unsigned int offset, + struct jit_ctx *ctx) +{ + emit_instr(ctx, sw, reg, offset, base); +} + +static inline void emit_load_stack_reg(ptr reg, ptr base, + unsigned int offset, + struct jit_ctx *ctx) +{ + emit_long_instr(ctx, LW, reg, offset, base); +} + +static inline void emit_load(unsigned int reg, unsigned int base, + unsigned int offset, struct jit_ctx *ctx) +{ + emit_instr(ctx, lw, reg, offset, base); +} + +static inline void emit_load_byte(unsigned int reg, unsigned int base, + unsigned int offset, struct jit_ctx *ctx) +{ + emit_instr(ctx, lb, reg, offset, base); +} + +static inline void emit_half_load(unsigned int reg, unsigned int base, + unsigned int offset, struct jit_ctx *ctx) +{ + emit_instr(ctx, lh, reg, offset, base); +} + +static inline void emit_half_load_unsigned(unsigned int reg, unsigned int base, + unsigned int offset, struct jit_ctx *ctx) +{ + emit_instr(ctx, lhu, reg, offset, base); +} + +static inline void emit_mul(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, mul, dst, src1, src2); +} + +static inline void emit_div(unsigned int dst, unsigned int src, + struct jit_ctx *ctx) +{ + if (ctx->target != NULL) { + u32 *p = &ctx->target[ctx->idx]; + uasm_i_divu(&p, dst, src); + p = &ctx->target[ctx->idx + 1]; + uasm_i_mflo(&p, dst); + } + ctx->idx += 2; /* 2 insts */ +} + +static inline void emit_mod(unsigned int dst, unsigned int src, + struct jit_ctx *ctx) +{ + if (ctx->target != NULL) { + u32 *p = &ctx->target[ctx->idx]; + uasm_i_divu(&p, dst, src); + p = &ctx->target[ctx->idx + 1]; + uasm_i_mfhi(&p, dst); + } + ctx->idx += 2; /* 2 insts */ +} + +static inline void emit_dsll(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + emit_instr(ctx, dsll, dst, src, sa); +} + +static inline void emit_dsrl32(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + emit_instr(ctx, dsrl32, dst, src, sa); +} + +static inline void emit_wsbh(unsigned int dst, unsigned int src, + struct jit_ctx *ctx) +{ + emit_instr(ctx, wsbh, dst, src); +} + +/* load pointer to register */ +static inline void emit_load_ptr(unsigned int dst, unsigned int src, + int imm, struct jit_ctx *ctx) +{ + /* src contains the base addr of the 32/64-pointer */ + emit_long_instr(ctx, LW, dst, imm, src); +} + +/* load a function pointer to register */ +static inline void emit_load_func(unsigned int reg, ptr imm, + struct jit_ctx *ctx) +{ + if (IS_ENABLED(CONFIG_64BIT)) { + /* At this point imm is always 64-bit */ + emit_load_imm(r_tmp, (u64)imm >> 32, ctx); + emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ + emit_ori(r_tmp, r_tmp_imm, (imm >> 16) & 0xffff, ctx); + emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ + emit_ori(reg, r_tmp_imm, imm & 0xffff, ctx); + } else { + emit_load_imm(reg, imm, ctx); + } +} + +/* Move to real MIPS register */ +static inline void emit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) +{ + emit_long_instr(ctx, ADDU, dst, src, r_zero); +} + +/* Move to JIT (32-bit) register */ +static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) +{ + emit_addu(dst, src, r_zero, ctx); +} + +/* Compute the immediate value for PC-relative branches. */ +static inline u32 b_imm(unsigned int tgt, struct jit_ctx *ctx) +{ + if (ctx->target == NULL) + return 0; + + /* + * We want a pc-relative branch. We only do forward branches + * so tgt is always after pc. tgt is the instruction offset + * we want to jump to. + + * Branch on MIPS: + * I: target_offset <- sign_extend(offset) + * I+1: PC += target_offset (delay slot) + * + * ctx->idx currently points to the branch instruction + * but the offset is added to the delay slot so we need + * to subtract 4. + */ + return ctx->offsets[tgt] - + (ctx->idx * 4 - ctx->prologue_bytes) - 4; +} + +static inline void emit_bcond(int cond, unsigned int reg1, unsigned int reg2, + unsigned int imm, struct jit_ctx *ctx) +{ + if (ctx->target != NULL) { + u32 *p = &ctx->target[ctx->idx]; + + switch (cond) { + case MIPS_COND_EQ: + uasm_i_beq(&p, reg1, reg2, imm); + break; + case MIPS_COND_NE: + uasm_i_bne(&p, reg1, reg2, imm); + break; + case MIPS_COND_ALL: + uasm_i_b(&p, imm); + break; + default: + pr_warn("%s: Unhandled branch conditional: %d\n", + __func__, cond); + } + } + ctx->idx++; +} + +static inline void emit_b(unsigned int imm, struct jit_ctx *ctx) +{ + emit_bcond(MIPS_COND_ALL, r_zero, r_zero, imm, ctx); +} + +static inline void emit_jalr(unsigned int link, unsigned int reg, + struct jit_ctx *ctx) +{ + emit_instr(ctx, jalr, link, reg); +} + +static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx) +{ + emit_instr(ctx, jr, reg); +} + +static inline u16 align_sp(unsigned int num) +{ + /* Double word alignment for 32-bit, quadword for 64-bit */ + unsigned int align = IS_ENABLED(CONFIG_64BIT) ? 16 : 8; + num = (num + (align - 1)) & -align; + return num; +} + +static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset) +{ + int i = 0, real_off = 0; + u32 sflags, tmp_flags; + + /* Adjust the stack pointer */ + if (offset) + emit_stack_offset(-align_sp(offset), ctx); + + tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; + /* sflags is essentially a bitmap */ + while (tmp_flags) { + if ((sflags >> i) & 0x1) { + emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off, + ctx); + real_off += SZREG; + } + i++; + tmp_flags >>= 1; + } + + /* save return address */ + if (ctx->flags & SEEN_CALL) { + emit_store_stack_reg(r_ra, r_sp, real_off, ctx); + real_off += SZREG; + } + + /* Setup r_M leaving the alignment gap if necessary */ + if (ctx->flags & SEEN_MEM) { + if (real_off % (SZREG * 2)) + real_off += SZREG; + emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off); + } +} + +static void restore_bpf_jit_regs(struct jit_ctx *ctx, + unsigned int offset) +{ + int i, real_off = 0; + u32 sflags, tmp_flags; + + tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; + /* sflags is a bitmap */ + i = 0; + while (tmp_flags) { + if ((sflags >> i) & 0x1) { + emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off, + ctx); + real_off += SZREG; + } + i++; + tmp_flags >>= 1; + } + + /* restore return address */ + if (ctx->flags & SEEN_CALL) + emit_load_stack_reg(r_ra, r_sp, real_off, ctx); + + /* Restore the sp and discard the scrach memory */ + if (offset) + emit_stack_offset(align_sp(offset), ctx); +} + +static unsigned int get_stack_depth(struct jit_ctx *ctx) +{ + int sp_off = 0; + + + /* How may s* regs do we need to preserved? */ + sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * SZREG; + + if (ctx->flags & SEEN_MEM) + sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */ + + if (ctx->flags & SEEN_CALL) + sp_off += SZREG; /* Space for our ra register */ + + return sp_off; +} + +static void build_prologue(struct jit_ctx *ctx) +{ + int sp_off; + + /* Calculate the total offset for the stack pointer */ + sp_off = get_stack_depth(ctx); + save_bpf_jit_regs(ctx, sp_off); + + if (ctx->flags & SEEN_SKB) + emit_reg_move(r_skb, MIPS_R_A0, ctx); + + if (ctx->flags & SEEN_SKB_DATA) { + /* Load packet length */ + emit_load(r_skb_len, r_skb, offsetof(struct sk_buff, len), + ctx); + emit_load(r_tmp, r_skb, offsetof(struct sk_buff, data_len), + ctx); + /* Load the data pointer */ + emit_load_ptr(r_skb_data, r_skb, + offsetof(struct sk_buff, data), ctx); + /* Load the header length */ + emit_subu(r_skb_hl, r_skb_len, r_tmp, ctx); + } + + if (ctx->flags & SEEN_X) + emit_jit_reg_move(r_X, r_zero, ctx); + + /* + * Do not leak kernel data to userspace, we only need to clear + * r_A if it is ever used. In fact if it is never used, we + * will not save/restore it, so clearing it in this case would + * corrupt the state of the caller. + */ + if (bpf_needs_clear_a(&ctx->skf->insns[0]) && + (ctx->flags & SEEN_A)) + emit_jit_reg_move(r_A, r_zero, ctx); +} + +static void build_epilogue(struct jit_ctx *ctx) +{ + unsigned int sp_off; + + /* Calculate the total offset for the stack pointer */ + + sp_off = get_stack_depth(ctx); + restore_bpf_jit_regs(ctx, sp_off); + + /* Return */ + emit_jr(r_ra, ctx); + emit_nop(ctx); +} + +#define CHOOSE_LOAD_FUNC(K, func) \ + ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \ + func##_positive) + +static int build_body(struct jit_ctx *ctx) +{ + const struct bpf_prog *prog = ctx->skf; + const struct sock_filter *inst; + unsigned int i, off, condt; + u32 k, b_off __maybe_unused; + u8 (*sk_load_func)(unsigned long *skb, int offset); + + for (i = 0; i < prog->len; i++) { + u16 code; + + inst = &(prog->insns[i]); + pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n", + __func__, inst->code, inst->jt, inst->jf, inst->k); + k = inst->k; + code = bpf_anc_helper(inst); + + if (ctx->target == NULL) + ctx->offsets[i] = ctx->idx * 4; + + switch (code) { + case BPF_LD | BPF_IMM: + /* A <- k ==> li r_A, k */ + ctx->flags |= SEEN_A; + emit_load_imm(r_A, k, ctx); + break; + case BPF_LD | BPF_W | BPF_LEN: + BUILD_BUG_ON(sizeof_field(struct sk_buff, len) != 4); + /* A <- len ==> lw r_A, offset(skb) */ + ctx->flags |= SEEN_SKB | SEEN_A; + off = offsetof(struct sk_buff, len); + emit_load(r_A, r_skb, off, ctx); + break; + case BPF_LD | BPF_MEM: + /* A <- M[k] ==> lw r_A, offset(M) */ + ctx->flags |= SEEN_MEM | SEEN_A; + emit_load(r_A, r_M, SCRATCH_OFF(k), ctx); + break; + case BPF_LD | BPF_W | BPF_ABS: + /* A <- P[k:4] */ + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_word); + goto load; + case BPF_LD | BPF_H | BPF_ABS: + /* A <- P[k:2] */ + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_half); + goto load; + case BPF_LD | BPF_B | BPF_ABS: + /* A <- P[k:1] */ + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_byte); +load: + emit_load_imm(r_off, k, ctx); +load_common: + ctx->flags |= SEEN_CALL | SEEN_OFF | + SEEN_SKB | SEEN_A | SEEN_SKB_DATA; + + emit_load_func(r_s0, (ptr)sk_load_func, ctx); + emit_reg_move(MIPS_R_A0, r_skb, ctx); + emit_jalr(MIPS_R_RA, r_s0, ctx); + /* Load second argument to delay slot */ + emit_reg_move(MIPS_R_A1, r_off, ctx); + /* Check the error value */ + emit_bcond(MIPS_COND_EQ, r_ret, 0, b_imm(i + 1, ctx), + ctx); + /* Load return register on DS for failures */ + emit_reg_move(r_ret, r_zero, ctx); + /* Return with error */ + emit_b(b_imm(prog->len, ctx), ctx); + emit_nop(ctx); + break; + case BPF_LD | BPF_W | BPF_IND: + /* A <- P[X + k:4] */ + sk_load_func = sk_load_word; + goto load_ind; + case BPF_LD | BPF_H | BPF_IND: + /* A <- P[X + k:2] */ + sk_load_func = sk_load_half; + goto load_ind; + case BPF_LD | BPF_B | BPF_IND: + /* A <- P[X + k:1] */ + sk_load_func = sk_load_byte; +load_ind: + ctx->flags |= SEEN_OFF | SEEN_X; + emit_addiu(r_off, r_X, k, ctx); + goto load_common; + case BPF_LDX | BPF_IMM: + /* X <- k */ + ctx->flags |= SEEN_X; + emit_load_imm(r_X, k, ctx); + break; + case BPF_LDX | BPF_MEM: + /* X <- M[k] */ + ctx->flags |= SEEN_X | SEEN_MEM; + emit_load(r_X, r_M, SCRATCH_OFF(k), ctx); + break; + case BPF_LDX | BPF_W | BPF_LEN: + /* X <- len */ + ctx->flags |= SEEN_X | SEEN_SKB; + off = offsetof(struct sk_buff, len); + emit_load(r_X, r_skb, off, ctx); + break; + case BPF_LDX | BPF_B | BPF_MSH: + /* X <- 4 * (P[k:1] & 0xf) */ + ctx->flags |= SEEN_X | SEEN_CALL | SEEN_SKB; + /* Load offset to a1 */ + emit_load_func(r_s0, (ptr)sk_load_byte, ctx); + /* + * This may emit two instructions so it may not fit + * in the delay slot. So use a0 in the delay slot. + */ + emit_load_imm(MIPS_R_A1, k, ctx); + emit_jalr(MIPS_R_RA, r_s0, ctx); + emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */ + /* Check the error value */ + emit_bcond(MIPS_COND_NE, r_ret, 0, + b_imm(prog->len, ctx), ctx); + emit_reg_move(r_ret, r_zero, ctx); + /* We are good */ + /* X <- P[1:K] & 0xf */ + emit_andi(r_X, r_A, 0xf, ctx); + /* X << 2 */ + emit_b(b_imm(i + 1, ctx), ctx); + emit_sll(r_X, r_X, 2, ctx); /* delay slot */ + break; + case BPF_ST: + /* M[k] <- A */ + ctx->flags |= SEEN_MEM | SEEN_A; + emit_store(r_A, r_M, SCRATCH_OFF(k), ctx); + break; + case BPF_STX: + /* M[k] <- X */ + ctx->flags |= SEEN_MEM | SEEN_X; + emit_store(r_X, r_M, SCRATCH_OFF(k), ctx); + break; + case BPF_ALU | BPF_ADD | BPF_K: + /* A += K */ + ctx->flags |= SEEN_A; + emit_addiu(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_ADD | BPF_X: + /* A += X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_addu(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_SUB | BPF_K: + /* A -= K */ + ctx->flags |= SEEN_A; + emit_addiu(r_A, r_A, -k, ctx); + break; + case BPF_ALU | BPF_SUB | BPF_X: + /* A -= X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_subu(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_MUL | BPF_K: + /* A *= K */ + /* Load K to scratch register before MUL */ + ctx->flags |= SEEN_A; + emit_load_imm(r_s0, k, ctx); + emit_mul(r_A, r_A, r_s0, ctx); + break; + case BPF_ALU | BPF_MUL | BPF_X: + /* A *= X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_mul(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_DIV | BPF_K: + /* A /= k */ + if (k == 1) + break; + if (optimize_div(&k)) { + ctx->flags |= SEEN_A; + emit_srl(r_A, r_A, k, ctx); + break; + } + ctx->flags |= SEEN_A; + emit_load_imm(r_s0, k, ctx); + emit_div(r_A, r_s0, ctx); + break; + case BPF_ALU | BPF_MOD | BPF_K: + /* A %= k */ + if (k == 1) { + ctx->flags |= SEEN_A; + emit_jit_reg_move(r_A, r_zero, ctx); + } else { + ctx->flags |= SEEN_A; + emit_load_imm(r_s0, k, ctx); + emit_mod(r_A, r_s0, ctx); + } + break; + case BPF_ALU | BPF_DIV | BPF_X: + /* A /= X */ + ctx->flags |= SEEN_X | SEEN_A; + /* Check if r_X is zero */ + emit_bcond(MIPS_COND_EQ, r_X, r_zero, + b_imm(prog->len, ctx), ctx); + emit_load_imm(r_ret, 0, ctx); /* delay slot */ + emit_div(r_A, r_X, ctx); + break; + case BPF_ALU | BPF_MOD | BPF_X: + /* A %= X */ + ctx->flags |= SEEN_X | SEEN_A; + /* Check if r_X is zero */ + emit_bcond(MIPS_COND_EQ, r_X, r_zero, + b_imm(prog->len, ctx), ctx); + emit_load_imm(r_ret, 0, ctx); /* delay slot */ + emit_mod(r_A, r_X, ctx); + break; + case BPF_ALU | BPF_OR | BPF_K: + /* A |= K */ + ctx->flags |= SEEN_A; + emit_ori(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_OR | BPF_X: + /* A |= X */ + ctx->flags |= SEEN_A; + emit_ori(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_XOR | BPF_K: + /* A ^= k */ + ctx->flags |= SEEN_A; + emit_xori(r_A, r_A, k, ctx); + break; + case BPF_ANC | SKF_AD_ALU_XOR_X: + case BPF_ALU | BPF_XOR | BPF_X: + /* A ^= X */ + ctx->flags |= SEEN_A; + emit_xor(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_AND | BPF_K: + /* A &= K */ + ctx->flags |= SEEN_A; + emit_andi(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_AND | BPF_X: + /* A &= X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_and(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_LSH | BPF_K: + /* A <<= K */ + ctx->flags |= SEEN_A; + emit_sll(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_LSH | BPF_X: + /* A <<= X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_sllv(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_RSH | BPF_K: + /* A >>= K */ + ctx->flags |= SEEN_A; + emit_srl(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_RSH | BPF_X: + ctx->flags |= SEEN_A | SEEN_X; + emit_srlv(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_NEG: + /* A = -A */ + ctx->flags |= SEEN_A; + emit_neg(r_A, ctx); + break; + case BPF_JMP | BPF_JA: + /* pc += K */ + emit_b(b_imm(i + k + 1, ctx), ctx); + emit_nop(ctx); + break; + case BPF_JMP | BPF_JEQ | BPF_K: + /* pc += ( A == K ) ? pc->jt : pc->jf */ + condt = MIPS_COND_EQ | MIPS_COND_K; + goto jmp_cmp; + case BPF_JMP | BPF_JEQ | BPF_X: + ctx->flags |= SEEN_X; + /* pc += ( A == X ) ? pc->jt : pc->jf */ + condt = MIPS_COND_EQ | MIPS_COND_X; + goto jmp_cmp; + case BPF_JMP | BPF_JGE | BPF_K: + /* pc += ( A >= K ) ? pc->jt : pc->jf */ + condt = MIPS_COND_GE | MIPS_COND_K; + goto jmp_cmp; + case BPF_JMP | BPF_JGE | BPF_X: + ctx->flags |= SEEN_X; + /* pc += ( A >= X ) ? pc->jt : pc->jf */ + condt = MIPS_COND_GE | MIPS_COND_X; + goto jmp_cmp; + case BPF_JMP | BPF_JGT | BPF_K: + /* pc += ( A > K ) ? pc->jt : pc->jf */ + condt = MIPS_COND_GT | MIPS_COND_K; + goto jmp_cmp; + case BPF_JMP | BPF_JGT | BPF_X: + ctx->flags |= SEEN_X; + /* pc += ( A > X ) ? pc->jt : pc->jf */ + condt = MIPS_COND_GT | MIPS_COND_X; +jmp_cmp: + /* Greater or Equal */ + if ((condt & MIPS_COND_GE) || + (condt & MIPS_COND_GT)) { + if (condt & MIPS_COND_K) { /* K */ + ctx->flags |= SEEN_A; + emit_sltiu(r_s0, r_A, k, ctx); + } else { /* X */ + ctx->flags |= SEEN_A | + SEEN_X; + emit_sltu(r_s0, r_A, r_X, ctx); + } + /* A < (K|X) ? r_scrach = 1 */ + b_off = b_imm(i + inst->jf + 1, ctx); + emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, + ctx); + emit_nop(ctx); + /* A > (K|X) ? scratch = 0 */ + if (condt & MIPS_COND_GT) { + /* Checking for equality */ + ctx->flags |= SEEN_A | SEEN_X; + if (condt & MIPS_COND_K) + emit_load_imm(r_s0, k, ctx); + else + emit_jit_reg_move(r_s0, r_X, + ctx); + b_off = b_imm(i + inst->jf + 1, ctx); + emit_bcond(MIPS_COND_EQ, r_A, r_s0, + b_off, ctx); + emit_nop(ctx); + /* Finally, A > K|X */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_b(b_off, ctx); + emit_nop(ctx); + } else { + /* A >= (K|X) so jump */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_b(b_off, ctx); + emit_nop(ctx); + } + } else { + /* A == K|X */ + if (condt & MIPS_COND_K) { /* K */ + ctx->flags |= SEEN_A; + emit_load_imm(r_s0, k, ctx); + /* jump true */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_bcond(MIPS_COND_EQ, r_A, r_s0, + b_off, ctx); + emit_nop(ctx); + /* jump false */ + b_off = b_imm(i + inst->jf + 1, + ctx); + emit_bcond(MIPS_COND_NE, r_A, r_s0, + b_off, ctx); + emit_nop(ctx); + } else { /* X */ + /* jump true */ + ctx->flags |= SEEN_A | SEEN_X; + b_off = b_imm(i + inst->jt + 1, + ctx); + emit_bcond(MIPS_COND_EQ, r_A, r_X, + b_off, ctx); + emit_nop(ctx); + /* jump false */ + b_off = b_imm(i + inst->jf + 1, ctx); + emit_bcond(MIPS_COND_NE, r_A, r_X, + b_off, ctx); + emit_nop(ctx); + } + } + break; + case BPF_JMP | BPF_JSET | BPF_K: + ctx->flags |= SEEN_A; + /* pc += (A & K) ? pc -> jt : pc -> jf */ + emit_load_imm(r_s1, k, ctx); + emit_and(r_s0, r_A, r_s1, ctx); + /* jump true */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); + emit_nop(ctx); + /* jump false */ + b_off = b_imm(i + inst->jf + 1, ctx); + emit_b(b_off, ctx); + emit_nop(ctx); + break; + case BPF_JMP | BPF_JSET | BPF_X: + ctx->flags |= SEEN_X | SEEN_A; + /* pc += (A & X) ? pc -> jt : pc -> jf */ + emit_and(r_s0, r_A, r_X, ctx); + /* jump true */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); + emit_nop(ctx); + /* jump false */ + b_off = b_imm(i + inst->jf + 1, ctx); + emit_b(b_off, ctx); + emit_nop(ctx); + break; + case BPF_RET | BPF_A: + ctx->flags |= SEEN_A; + if (i != prog->len - 1) + /* + * If this is not the last instruction + * then jump to the epilogue + */ + emit_b(b_imm(prog->len, ctx), ctx); + emit_reg_move(r_ret, r_A, ctx); /* delay slot */ + break; + case BPF_RET | BPF_K: + /* + * It can emit two instructions so it does not fit on + * the delay slot. + */ + emit_load_imm(r_ret, k, ctx); + if (i != prog->len - 1) { + /* + * If this is not the last instruction + * then jump to the epilogue + */ + emit_b(b_imm(prog->len, ctx), ctx); + emit_nop(ctx); + } + break; + case BPF_MISC | BPF_TAX: + /* X = A */ + ctx->flags |= SEEN_X | SEEN_A; + emit_jit_reg_move(r_X, r_A, ctx); + break; + case BPF_MISC | BPF_TXA: + /* A = X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_jit_reg_move(r_A, r_X, ctx); + break; + /* AUX */ + case BPF_ANC | SKF_AD_PROTOCOL: + /* A = ntohs(skb->protocol */ + ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A; + BUILD_BUG_ON(sizeof_field(struct sk_buff, + protocol) != 2); + off = offsetof(struct sk_buff, protocol); + emit_half_load(r_A, r_skb, off, ctx); +#ifdef CONFIG_CPU_LITTLE_ENDIAN + /* This needs little endian fixup */ + if (cpu_has_wsbh) { + /* R2 and later have the wsbh instruction */ + emit_wsbh(r_A, r_A, ctx); + } else { + /* Get first byte */ + emit_andi(r_tmp_imm, r_A, 0xff, ctx); + /* Shift it */ + emit_sll(r_tmp, r_tmp_imm, 8, ctx); + /* Get second byte */ + emit_srl(r_tmp_imm, r_A, 8, ctx); + emit_andi(r_tmp_imm, r_tmp_imm, 0xff, ctx); + /* Put everyting together in r_A */ + emit_or(r_A, r_tmp, r_tmp_imm, ctx); + } +#endif + break; + case BPF_ANC | SKF_AD_CPU: + ctx->flags |= SEEN_A | SEEN_OFF; + /* A = current_thread_info()->cpu */ + BUILD_BUG_ON(sizeof_field(struct thread_info, + cpu) != 4); + off = offsetof(struct thread_info, cpu); + /* $28/gp points to the thread_info struct */ + emit_load(r_A, 28, off, ctx); + break; + case BPF_ANC | SKF_AD_IFINDEX: + /* A = skb->dev->ifindex */ + case BPF_ANC | SKF_AD_HATYPE: + /* A = skb->dev->type */ + ctx->flags |= SEEN_SKB | SEEN_A; + off = offsetof(struct sk_buff, dev); + /* Load *dev pointer */ + emit_load_ptr(r_s0, r_skb, off, ctx); + /* error (0) in the delay slot */ + emit_bcond(MIPS_COND_EQ, r_s0, r_zero, + b_imm(prog->len, ctx), ctx); + emit_reg_move(r_ret, r_zero, ctx); + if (code == (BPF_ANC | SKF_AD_IFINDEX)) { + BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4); + off = offsetof(struct net_device, ifindex); + emit_load(r_A, r_s0, off, ctx); + } else { /* (code == (BPF_ANC | SKF_AD_HATYPE) */ + BUILD_BUG_ON(sizeof_field(struct net_device, type) != 2); + off = offsetof(struct net_device, type); + emit_half_load_unsigned(r_A, r_s0, off, ctx); + } + break; + case BPF_ANC | SKF_AD_MARK: + ctx->flags |= SEEN_SKB | SEEN_A; + BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4); + off = offsetof(struct sk_buff, mark); + emit_load(r_A, r_skb, off, ctx); + break; + case BPF_ANC | SKF_AD_RXHASH: + ctx->flags |= SEEN_SKB | SEEN_A; + BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4); + off = offsetof(struct sk_buff, hash); + emit_load(r_A, r_skb, off, ctx); + break; + case BPF_ANC | SKF_AD_VLAN_TAG: + ctx->flags |= SEEN_SKB | SEEN_A; + BUILD_BUG_ON(sizeof_field(struct sk_buff, + vlan_tci) != 2); + off = offsetof(struct sk_buff, vlan_tci); + emit_half_load_unsigned(r_A, r_skb, off, ctx); + break; + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: + ctx->flags |= SEEN_SKB | SEEN_A; + emit_load_byte(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET(), ctx); + if (PKT_VLAN_PRESENT_BIT) + emit_srl(r_A, r_A, PKT_VLAN_PRESENT_BIT, ctx); + if (PKT_VLAN_PRESENT_BIT < 7) + emit_andi(r_A, r_A, 1, ctx); + break; + case BPF_ANC | SKF_AD_PKTTYPE: + ctx->flags |= SEEN_SKB; + + emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx); + /* Keep only the last 3 bits */ + emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx); +#ifdef __BIG_ENDIAN_BITFIELD + /* Get the actual packet type to the lower 3 bits */ + emit_srl(r_A, r_A, 5, ctx); +#endif + break; + case BPF_ANC | SKF_AD_QUEUE: + ctx->flags |= SEEN_SKB | SEEN_A; + BUILD_BUG_ON(sizeof_field(struct sk_buff, + queue_mapping) != 2); + BUILD_BUG_ON(offsetof(struct sk_buff, + queue_mapping) > 0xff); + off = offsetof(struct sk_buff, queue_mapping); + emit_half_load_unsigned(r_A, r_skb, off, ctx); + break; + default: + pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__, + inst->code); + return -1; + } + } + + /* compute offsets only during the first pass */ + if (ctx->target == NULL) + ctx->offsets[i] = ctx->idx * 4; + + return 0; +} + +void bpf_jit_compile(struct bpf_prog *fp) +{ + struct jit_ctx ctx; + unsigned int alloc_size, tmp_idx; + + if (!bpf_jit_enable) + return; + + memset(&ctx, 0, sizeof(ctx)); + + ctx.offsets = kcalloc(fp->len + 1, sizeof(*ctx.offsets), GFP_KERNEL); + if (ctx.offsets == NULL) + return; + + ctx.skf = fp; + + if (build_body(&ctx)) + goto out; + + tmp_idx = ctx.idx; + build_prologue(&ctx); + ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; + /* just to complete the ctx.idx count */ + build_epilogue(&ctx); + + alloc_size = 4 * ctx.idx; + ctx.target = module_alloc(alloc_size); + if (ctx.target == NULL) + goto out; + + /* Clean it */ + memset(ctx.target, 0, alloc_size); + + ctx.idx = 0; + + /* Generate the actual JIT code */ + build_prologue(&ctx); + build_body(&ctx); + build_epilogue(&ctx); + + /* Update the icache */ + flush_icache_range((ptr)ctx.target, (ptr)(ctx.target + ctx.idx)); + + if (bpf_jit_enable > 1) + /* Dump JIT code */ + bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); + + fp->bpf_func = (void *)ctx.target; + fp->jited = 1; + +out: + kfree(ctx.offsets); +} + +void bpf_jit_free(struct bpf_prog *fp) +{ + if (fp->jited) + module_memfree(fp->bpf_func); + + bpf_prog_unlock_free(fp); +} diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S new file mode 100644 index 000000000000..57154c5883b6 --- /dev/null +++ b/arch/mips/net/bpf_jit_asm.S @@ -0,0 +1,285 @@ +/* + * bpf_jib_asm.S: Packet/header access helper functions for MIPS/MIPS64 BPF + * compiler. + * + * Copyright (C) 2015 Imagination Technologies Ltd. + * Author: Markos Chandras <markos.chandras@imgtec.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + */ + +#include <asm/asm.h> +#include <asm/isa-rev.h> +#include <asm/regdef.h> +#include "bpf_jit.h" + +/* ABI + * + * r_skb_hl skb header length + * r_skb_data skb data + * r_off(a1) offset register + * r_A BPF register A + * r_X PF register X + * r_skb(a0) *skb + * r_M *scratch memory + * r_skb_le skb length + * r_s0 Scratch register 0 + * r_s1 Scratch register 1 + * + * On entry: + * a0: *skb + * a1: offset (imm or imm + X) + * + * All non-BPF-ABI registers are free for use. On return, we only + * care about r_ret. The BPF-ABI registers are assumed to remain + * unmodified during the entire filter operation. + */ + +#define skb a0 +#define offset a1 +#define SKF_LL_OFF (-0x200000) /* Can't include linux/filter.h in assembly */ + + /* We know better :) so prevent assembler reordering etc */ + .set noreorder + +#define is_offset_negative(TYPE) \ + /* If offset is negative we have more work to do */ \ + slti t0, offset, 0; \ + bgtz t0, bpf_slow_path_##TYPE##_neg; \ + /* Be careful what follows in DS. */ + +#define is_offset_in_header(SIZE, TYPE) \ + /* Reading from header? */ \ + addiu $r_s0, $r_skb_hl, -SIZE; \ + slt t0, $r_s0, offset; \ + bgtz t0, bpf_slow_path_##TYPE; \ + +LEAF(sk_load_word) + is_offset_negative(word) +FEXPORT(sk_load_word_positive) + is_offset_in_header(4, word) + /* Offset within header boundaries */ + PTR_ADDU t1, $r_skb_data, offset + .set reorder + lw $r_A, 0(t1) + .set noreorder +#ifdef CONFIG_CPU_LITTLE_ENDIAN +# if MIPS_ISA_REV >= 2 + wsbh t0, $r_A + rotr $r_A, t0, 16 +# else + sll t0, $r_A, 24 + srl t1, $r_A, 24 + srl t2, $r_A, 8 + or t0, t0, t1 + andi t2, t2, 0xff00 + andi t1, $r_A, 0xff00 + or t0, t0, t2 + sll t1, t1, 8 + or $r_A, t0, t1 +# endif +#endif + jr $r_ra + move $r_ret, zero + END(sk_load_word) + +LEAF(sk_load_half) + is_offset_negative(half) +FEXPORT(sk_load_half_positive) + is_offset_in_header(2, half) + /* Offset within header boundaries */ + PTR_ADDU t1, $r_skb_data, offset + lhu $r_A, 0(t1) +#ifdef CONFIG_CPU_LITTLE_ENDIAN +# if MIPS_ISA_REV >= 2 + wsbh $r_A, $r_A +# else + sll t0, $r_A, 8 + srl t1, $r_A, 8 + andi t0, t0, 0xff00 + or $r_A, t0, t1 +# endif +#endif + jr $r_ra + move $r_ret, zero + END(sk_load_half) + +LEAF(sk_load_byte) + is_offset_negative(byte) +FEXPORT(sk_load_byte_positive) + is_offset_in_header(1, byte) + /* Offset within header boundaries */ + PTR_ADDU t1, $r_skb_data, offset + lbu $r_A, 0(t1) + jr $r_ra + move $r_ret, zero + END(sk_load_byte) + +/* + * call skb_copy_bits: + * (prototype in linux/skbuff.h) + * + * int skb_copy_bits(sk_buff *skb, int offset, void *to, int len) + * + * o32 mandates we leave 4 spaces for argument registers in case + * the callee needs to use them. Even though we don't care about + * the argument registers ourselves, we need to allocate that space + * to remain ABI compliant since the callee may want to use that space. + * We also allocate 2 more spaces for $r_ra and our return register (*to). + * + * n64 is a bit different. The *caller* will allocate the space to preserve + * the arguments. So in 64-bit kernels, we allocate the 4-arg space for no + * good reason but it does not matter that much really. + * + * (void *to) is returned in r_s0 + * + */ +#ifdef CONFIG_CPU_LITTLE_ENDIAN +#define DS_OFFSET(SIZE) (4 * SZREG) +#else +#define DS_OFFSET(SIZE) ((4 * SZREG) + (4 - SIZE)) +#endif +#define bpf_slow_path_common(SIZE) \ + /* Quick check. Are we within reasonable boundaries? */ \ + LONG_ADDIU $r_s1, $r_skb_len, -SIZE; \ + sltu $r_s0, offset, $r_s1; \ + beqz $r_s0, fault; \ + /* Load 4th argument in DS */ \ + LONG_ADDIU a3, zero, SIZE; \ + PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \ + PTR_LA t0, skb_copy_bits; \ + PTR_S $r_ra, (5 * SZREG)($r_sp); \ + /* Assign low slot to a2 */ \ + PTR_ADDIU a2, $r_sp, DS_OFFSET(SIZE); \ + jalr t0; \ + /* Reset our destination slot (DS but it's ok) */ \ + INT_S zero, (4 * SZREG)($r_sp); \ + /* \ + * skb_copy_bits returns 0 on success and -EFAULT \ + * on error. Our data live in a2. Do not bother with \ + * our data if an error has been returned. \ + */ \ + /* Restore our frame */ \ + PTR_L $r_ra, (5 * SZREG)($r_sp); \ + INT_L $r_s0, (4 * SZREG)($r_sp); \ + bltz v0, fault; \ + PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \ + move $r_ret, zero; \ + +NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp) + bpf_slow_path_common(4) +#ifdef CONFIG_CPU_LITTLE_ENDIAN +# if MIPS_ISA_REV >= 2 + wsbh t0, $r_s0 + jr $r_ra + rotr $r_A, t0, 16 +# else + sll t0, $r_s0, 24 + srl t1, $r_s0, 24 + srl t2, $r_s0, 8 + or t0, t0, t1 + andi t2, t2, 0xff00 + andi t1, $r_s0, 0xff00 + or t0, t0, t2 + sll t1, t1, 8 + jr $r_ra + or $r_A, t0, t1 +# endif +#else + jr $r_ra + move $r_A, $r_s0 +#endif + + END(bpf_slow_path_word) + +NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp) + bpf_slow_path_common(2) +#ifdef CONFIG_CPU_LITTLE_ENDIAN +# if MIPS_ISA_REV >= 2 + jr $r_ra + wsbh $r_A, $r_s0 +# else + sll t0, $r_s0, 8 + andi t1, $r_s0, 0xff00 + andi t0, t0, 0xff00 + srl t1, t1, 8 + jr $r_ra + or $r_A, t0, t1 +# endif +#else + jr $r_ra + move $r_A, $r_s0 +#endif + + END(bpf_slow_path_half) + +NESTED(bpf_slow_path_byte, (6 * SZREG), $r_sp) + bpf_slow_path_common(1) + jr $r_ra + move $r_A, $r_s0 + + END(bpf_slow_path_byte) + +/* + * Negative entry points + */ + .macro bpf_is_end_of_data + li t0, SKF_LL_OFF + /* Reading link layer data? */ + slt t1, offset, t0 + bgtz t1, fault + /* Be careful what follows in DS. */ + .endm +/* + * call skb_copy_bits: + * (prototype in linux/filter.h) + * + * void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, + * int k, unsigned int size) + * + * see above (bpf_slow_path_common) for ABI restrictions + */ +#define bpf_negative_common(SIZE) \ + PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \ + PTR_LA t0, bpf_internal_load_pointer_neg_helper; \ + PTR_S $r_ra, (5 * SZREG)($r_sp); \ + jalr t0; \ + li a2, SIZE; \ + PTR_L $r_ra, (5 * SZREG)($r_sp); \ + /* Check return pointer */ \ + beqz v0, fault; \ + PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \ + /* Preserve our pointer */ \ + move $r_s0, v0; \ + /* Set return value */ \ + move $r_ret, zero; \ + +bpf_slow_path_word_neg: + bpf_is_end_of_data +NESTED(sk_load_word_negative, (6 * SZREG), $r_sp) + bpf_negative_common(4) + jr $r_ra + lw $r_A, 0($r_s0) + END(sk_load_word_negative) + +bpf_slow_path_half_neg: + bpf_is_end_of_data +NESTED(sk_load_half_negative, (6 * SZREG), $r_sp) + bpf_negative_common(2) + jr $r_ra + lhu $r_A, 0($r_s0) + END(sk_load_half_negative) + +bpf_slow_path_byte_neg: + bpf_is_end_of_data +NESTED(sk_load_byte_negative, (6 * SZREG), $r_sp) + bpf_negative_common(1) + jr $r_ra + lbu $r_A, 0($r_s0) + END(sk_load_byte_negative) + +fault: + jr $r_ra + addiu $r_ret, zero, 1 diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 46b76751f3a5..561154cbcc40 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -604,6 +604,7 @@ static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value) static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx) { int off, b_off; + int tcc_reg; ctx->flags |= EBPF_SEEN_TC; /* @@ -616,14 +617,14 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx) b_off = b_imm(this_idx + 1, ctx); emit_instr(ctx, bne, MIPS_R_AT, MIPS_R_ZERO, b_off); /* - * if (--TCC < 0) + * if (TCC-- < 0) * goto out; */ /* Delay slot */ - emit_instr(ctx, daddiu, MIPS_R_T5, - (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4, -1); + tcc_reg = (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4; + emit_instr(ctx, daddiu, MIPS_R_T5, tcc_reg, -1); b_off = b_imm(this_idx + 1, ctx); - emit_instr(ctx, bltz, MIPS_R_T5, b_off); + emit_instr(ctx, bltz, tcc_reg, b_off); /* * prog = array->ptrs[index]; * if (prog == NULL) @@ -1803,7 +1804,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) unsigned int image_size; u8 *image_ptr; - if (!prog->jit_requested || MIPS_ISA_REV < 2) + if (!prog->jit_requested) return prog; tmp = bpf_jit_blind_constants(prog); diff --git a/arch/mips/pci/pci-alchemy.c b/arch/mips/pci/pci-alchemy.c index 4f2411f489af..01a2af8215c8 100644 --- a/arch/mips/pci/pci-alchemy.c +++ b/arch/mips/pci/pci-alchemy.c @@ -409,7 +409,7 @@ static int alchemy_pci_probe(struct platform_device *pdev) goto out6; } - ctx->regs = ioremap_nocache(r->start, resource_size(r)); + ctx->regs = ioremap(r->start, resource_size(r)); if (!ctx->regs) { dev_err(&pdev->dev, "cannot map pci regs\n"); ret = -ENODEV; diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c index 0fed6fc17fe4..490953f51528 100644 --- a/arch/mips/pci/pci-ar2315.c +++ b/arch/mips/pci/pci-ar2315.c @@ -441,7 +441,7 @@ static int ar2315_pci_probe(struct platform_device *pdev) apc->mem_res.flags = IORESOURCE_MEM; /* Remap PCI config space */ - apc->cfg_mem = devm_ioremap_nocache(dev, res->start, + apc->cfg_mem = devm_ioremap(dev, res->start, AR2315_PCI_CFG_SIZE); if (!apc->cfg_mem) { dev_err(dev, "failed to remap PCI config space\n"); diff --git a/arch/mips/pci/pci-bcm63xx.c b/arch/mips/pci/pci-bcm63xx.c index 151d9b5870bb..5548365605c0 100644 --- a/arch/mips/pci/pci-bcm63xx.c +++ b/arch/mips/pci/pci-bcm63xx.c @@ -221,7 +221,7 @@ static int __init bcm63xx_register_pci(void) * a spinlock for each io access, so this is currently kind of * broken on SMP. */ - pci_iospace_start = ioremap_nocache(BCM_PCI_IO_BASE_PA, 4); + pci_iospace_start = ioremap(BCM_PCI_IO_BASE_PA, 4); if (!pci_iospace_start) return -ENOMEM; diff --git a/arch/mips/pci/pci-ip27.c b/arch/mips/pci/pci-ip27.c index 8e26b120f994..d85cbf84e41c 100644 --- a/arch/mips/pci/pci-ip27.c +++ b/arch/mips/pci/pci-ip27.c @@ -10,7 +10,7 @@ #include <asm/sn/addrs.h> #include <asm/sn/types.h> #include <asm/sn/klconfig.h> -#include <asm/sn/hub.h> +#include <asm/sn/agent.h> #include <asm/sn/ioc3.h> #include <asm/pci/bridge.h> diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c index c9f4d4ba058a..e1f12e398136 100644 --- a/arch/mips/pci/pci-rt2880.c +++ b/arch/mips/pci/pci-rt2880.c @@ -218,7 +218,7 @@ static int rt288x_pci_probe(struct platform_device *pdev) { void __iomem *io_map_base; - rt2880_pci_base = ioremap_nocache(RT2880_PCI_BASE, PAGE_SIZE); + rt2880_pci_base = ioremap(RT2880_PCI_BASE, PAGE_SIZE); io_map_base = ioremap(RT2880_PCI_IO_BASE, RT2880_PCI_IO_SIZE); rt2880_pci_controller.io_map_base = (unsigned long) io_map_base; diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c index 5c1a196be0c5..3b2552fb7735 100644 --- a/arch/mips/pci/pci-xtalk-bridge.c +++ b/arch/mips/pci/pci-xtalk-bridge.c @@ -437,17 +437,28 @@ static int bridge_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) struct irq_alloc_info info; int irq; - irq = bc->pci_int[slot]; + switch (pin) { + case PCI_INTERRUPT_UNKNOWN: + case PCI_INTERRUPT_INTA: + case PCI_INTERRUPT_INTC: + pin = 0; + break; + case PCI_INTERRUPT_INTB: + case PCI_INTERRUPT_INTD: + pin = 1; + } + + irq = bc->pci_int[slot][pin]; if (irq == -1) { info.ctrl = bc; info.nasid = bc->nasid; - info.pin = slot; + info.pin = bc->int_mapping[slot][pin]; irq = irq_domain_alloc_irqs(bc->domain, 1, bc->nasid, &info); if (irq < 0) return irq; - bc->pci_int[slot] = irq; + bc->pci_int[slot][pin] = irq; } return irq; } @@ -458,21 +469,26 @@ static void bridge_setup_ip27_baseio6g(struct bridge_controller *bc) { bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP27_BASEIO6G); bc->ioc3_sid[6] = IOC3_SID(IOC3_SUBSYS_IP27_MIO); + bc->int_mapping[2][1] = 4; + bc->int_mapping[6][1] = 6; } static void bridge_setup_ip27_baseio(struct bridge_controller *bc) { bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP27_BASEIO); + bc->int_mapping[2][1] = 4; } static void bridge_setup_ip29_baseio(struct bridge_controller *bc) { bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP29_SYSBOARD); + bc->int_mapping[2][1] = 3; } static void bridge_setup_ip30_sysboard(struct bridge_controller *bc) { bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP30_SYSBOARD); + bc->int_mapping[2][1] = 4; } static void bridge_setup_menet(struct bridge_controller *bc) @@ -483,6 +499,26 @@ static void bridge_setup_menet(struct bridge_controller *bc) bc->ioc3_sid[3] = IOC3_SID(IOC3_SUBSYS_MENET4); } +static void bridge_setup_io7(struct bridge_controller *bc) +{ + bc->ioc3_sid[4] = IOC3_SID(IOC3_SUBSYS_IO7); +} + +static void bridge_setup_io8(struct bridge_controller *bc) +{ + bc->ioc3_sid[4] = IOC3_SID(IOC3_SUBSYS_IO8); +} + +static void bridge_setup_io9(struct bridge_controller *bc) +{ + bc->ioc3_sid[1] = IOC3_SID(IOC3_SUBSYS_IO9); +} + +static void bridge_setup_ip34_fuel_sysboard(struct bridge_controller *bc) +{ + bc->ioc3_sid[4] = IOC3_SID(IOC3_SUBSYS_IP34_SYSBOARD); +} + #define BRIDGE_BOARD_SETUP(_partno, _setup) \ { .match = _partno, .setup = _setup } @@ -500,6 +536,10 @@ static const struct { BRIDGE_BOARD_SETUP("030-0887-", bridge_setup_ip30_sysboard), BRIDGE_BOARD_SETUP("030-1467-", bridge_setup_ip30_sysboard), BRIDGE_BOARD_SETUP("030-0873-", bridge_setup_menet), + BRIDGE_BOARD_SETUP("030-1557-", bridge_setup_io7), + BRIDGE_BOARD_SETUP("030-1673-", bridge_setup_io8), + BRIDGE_BOARD_SETUP("030-1771-", bridge_setup_io9), + BRIDGE_BOARD_SETUP("030-1707-", bridge_setup_ip34_fuel_sysboard), }; static void bridge_setup_board(struct bridge_controller *bc, char *partnum) @@ -655,7 +695,11 @@ static int bridge_probe(struct platform_device *pdev) for (slot = 0; slot < 8; slot++) { bridge_set(bc, b_device[slot].reg, BRIDGE_DEV_SWAP_DIR); - bc->pci_int[slot] = -1; + bc->pci_int[slot][0] = -1; + bc->pci_int[slot][1] = -1; + /* default interrupt pin mapping */ + bc->int_mapping[slot][0] = slot; + bc->int_mapping[slot][1] = slot ^ 4; } bridge_read(bc, b_wid_tflush); /* wait until Bridge PIO complete */ diff --git a/arch/mips/pic32/pic32mzda/early_console.c b/arch/mips/pic32/pic32mzda/early_console.c index 8c236738b5ee..25372e62783b 100644 --- a/arch/mips/pic32/pic32mzda/early_console.c +++ b/arch/mips/pic32/pic32mzda/early_console.c @@ -135,7 +135,7 @@ void __init fw_init_early_console(char port) char *arch_cmdline = pic32_getcmdline(); int baud = -1; - uart_base = ioremap_nocache(PIC32_BASE_UART, 0xc00); + uart_base = ioremap(PIC32_BASE_UART, 0xc00); baud = get_baud_from_cmdline(arch_cmdline); if (port == -1) diff --git a/arch/mips/pic32/pic32mzda/early_pin.c b/arch/mips/pic32/pic32mzda/early_pin.c index 504e6ab399b5..f2822632b017 100644 --- a/arch/mips/pic32/pic32mzda/early_pin.c +++ b/arch/mips/pic32/pic32mzda/early_pin.c @@ -122,7 +122,7 @@ static const struct void pic32_pps_input(int function, int pin) { - void __iomem *pps_base = ioremap_nocache(PPS_BASE, 0xF4); + void __iomem *pps_base = ioremap(PPS_BASE, 0xF4); int i; for (i = 0; i < ARRAY_SIZE(input_pin_reg); i++) { @@ -252,7 +252,7 @@ static const struct void pic32_pps_output(int function, int pin) { - void __iomem *pps_base = ioremap_nocache(PPS_BASE, 0x170); + void __iomem *pps_base = ioremap(PPS_BASE, 0x170); int i; for (i = 0; i < ARRAY_SIZE(output_pin_reg); i++) { diff --git a/arch/mips/pmcs-msp71xx/msp_serial.c b/arch/mips/pmcs-msp71xx/msp_serial.c index 8e6e8db8dd5f..940c684f6921 100644 --- a/arch/mips/pmcs-msp71xx/msp_serial.c +++ b/arch/mips/pmcs-msp71xx/msp_serial.c @@ -105,7 +105,7 @@ void __init msp_serial_setup(void) /* Initialize first serial port */ up.mapbase = MSP_UART0_BASE; - up.membase = ioremap_nocache(up.mapbase, MSP_UART_REG_LEN); + up.membase = ioremap(up.mapbase, MSP_UART_REG_LEN); up.irq = MSP_INT_UART0; up.uartclk = uartclk; up.regshift = 2; @@ -143,7 +143,7 @@ void __init msp_serial_setup(void) } up.mapbase = MSP_UART1_BASE; - up.membase = ioremap_nocache(up.mapbase, MSP_UART_REG_LEN); + up.membase = ioremap(up.mapbase, MSP_UART_REG_LEN); up.irq = MSP_INT_UART1; up.line = 1; up.private_data = (void*)UART1_STATUS_REG; diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c index 0ddeb31afa93..bdf53807d7c2 100644 --- a/arch/mips/ralink/ill_acc.c +++ b/arch/mips/ralink/ill_acc.c @@ -67,11 +67,13 @@ static int __init ill_acc_of_setup(void) irq = irq_of_parse_and_map(np, 0); if (!irq) { dev_err(&pdev->dev, "failed to get irq\n"); + put_device(&pdev->dev); return -EINVAL; } if (request_irq(irq, ill_acc_irq_handler, 0, "ill_acc", &pdev->dev)) { dev_err(&pdev->dev, "failed to request irq\n"); + put_device(&pdev->dev); return -EINVAL; } diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c index c945d76cfce5..220ca0cd7945 100644 --- a/arch/mips/ralink/irq.c +++ b/arch/mips/ralink/irq.c @@ -165,7 +165,7 @@ static int __init intc_of_init(struct device_node *node, res.name)) pr_err("Failed to request intc memory"); - rt_intc_membase = ioremap_nocache(res.start, + rt_intc_membase = ioremap(res.start, resource_size(&res)); if (!rt_intc_membase) panic("Failed to remap intc memory"); diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index 59b23095bfbb..90c6d4a11c5d 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -43,7 +43,7 @@ __iomem void *plat_of_remap_node(const char *node) res.name)) panic("Failed to request resources for %s", node); - return ioremap_nocache(res.start, resource_size(&res)); + return ioremap(res.start, resource_size(&res)); } void __init device_tree_init(void) diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c index c9ecf17f8660..dd34f1b32b79 100644 --- a/arch/mips/rb532/devices.c +++ b/arch/mips/rb532/devices.c @@ -286,7 +286,7 @@ static int __init plat_setup_devices(void) nand_slot0_res[0].end = nand_slot0_res[0].start + 0x1000; /* Read and map device controller 3 */ - dev3.base = ioremap_nocache(readl(IDT434_REG_BASE + DEV3BASE), 1); + dev3.base = ioremap(readl(IDT434_REG_BASE + DEV3BASE), 1); if (!dev3.base) { printk(KERN_ERR "rb532: cannot remap device controller 3\n"); diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c index fdc704abc8d4..94f02ada4082 100644 --- a/arch/mips/rb532/gpio.c +++ b/arch/mips/rb532/gpio.c @@ -192,7 +192,7 @@ int __init rb532_gpio_init(void) struct resource *r; r = rb532_gpio_reg0_res; - rb532_gpio_chip->regbase = ioremap_nocache(r->start, resource_size(r)); + rb532_gpio_chip->regbase = ioremap(r->start, resource_size(r)); if (!rb532_gpio_chip->regbase) { printk(KERN_ERR "rb532: cannot remap GPIO register 0\n"); diff --git a/arch/mips/rb532/prom.c b/arch/mips/rb532/prom.c index 26e957b21fbf..303cc3dc1749 100644 --- a/arch/mips/rb532/prom.c +++ b/arch/mips/rb532/prom.c @@ -110,7 +110,7 @@ void __init prom_init(void) phys_addr_t memsize; phys_addr_t ddrbase; - ddr = ioremap_nocache(ddr_reg[0].start, + ddr = ioremap(ddr_reg[0].start, ddr_reg[0].end - ddr_reg[0].start); if (!ddr) { diff --git a/arch/mips/rb532/setup.c b/arch/mips/rb532/setup.c index 1aa4df1385cb..51af9d374d66 100644 --- a/arch/mips/rb532/setup.c +++ b/arch/mips/rb532/setup.c @@ -49,7 +49,7 @@ void __init plat_mem_setup(void) set_io_port_base(KSEG1); - pci_reg = ioremap_nocache(pci0_res[0].start, + pci_reg = ioremap(pci0_res[0].start, pci0_res[0].end - pci0_res[0].start); if (!pci_reg) { printk(KERN_ERR "Could not remap PCI registers\n"); diff --git a/arch/mips/sgi-ip22/ip22-gio.c b/arch/mips/sgi-ip22/ip22-gio.c index 282b47c2dc27..de0768a49ee8 100644 --- a/arch/mips/sgi-ip22/ip22-gio.c +++ b/arch/mips/sgi-ip22/ip22-gio.c @@ -47,8 +47,9 @@ static struct device gio_bus = { * Used by a driver to check whether an of_device present in the * system is in its list of supported devices. */ -const struct gio_device_id *gio_match_device(const struct gio_device_id *match, - const struct gio_device *dev) +static const struct gio_device_id * +gio_match_device(const struct gio_device_id *match, + const struct gio_device *dev) { const struct gio_device_id *ids; @@ -58,7 +59,6 @@ const struct gio_device_id *gio_match_device(const struct gio_device_id *match, return NULL; } -EXPORT_SYMBOL_GPL(gio_match_device); struct gio_device *gio_dev_get(struct gio_device *dev) { diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c index 73ad29b180fb..5a38ae6bdfa9 100644 --- a/arch/mips/sgi-ip27/ip27-berr.c +++ b/arch/mips/sgi-ip27/ip27-berr.c @@ -16,8 +16,8 @@ #include <asm/ptrace.h> #include <asm/sn/addrs.h> +#include <asm/sn/agent.h> #include <asm/sn/arch.h> -#include <asm/sn/sn0/hub.h> #include <asm/tlbdebug.h> #include <asm/traps.h> #include <linux/uaccess.h> @@ -30,29 +30,31 @@ static void dump_hub_information(unsigned long errst0, unsigned long errst1) { "WERR", "Uncached Partial Write", "PWERR", "Write Timeout", NULL, NULL, NULL, NULL } }; - int wrb = errst1 & PI_ERR_ST1_WRBRRB_MASK; + union pi_err_stat0 st0; + union pi_err_stat1 st1; - if (!(errst0 & PI_ERR_ST0_VALID_MASK)) { - printk("Hub does not contain valid error information\n"); + st0.pi_stat0_word = errst0; + st1.pi_stat1_word = errst1; + + if (!st0.pi_stat0_fmt.s0_valid) { + pr_info("Hub does not contain valid error information\n"); return; } - - printk("Hub has valid error information:\n"); - if (errst0 & PI_ERR_ST0_OVERRUN_MASK) - printk("Overrun is set. Error stack may contain additional " + pr_info("Hub has valid error information:\n"); + if (st0.pi_stat0_fmt.s0_ovr_run) + pr_info("Overrun is set. Error stack may contain additional " "information.\n"); - printk("Hub error address is %08lx\n", - (errst0 & PI_ERR_ST0_ADDR_MASK) >> (PI_ERR_ST0_ADDR_SHFT - 3)); - printk("Incoming message command 0x%lx\n", - (errst0 & PI_ERR_ST0_CMD_MASK) >> PI_ERR_ST0_CMD_SHFT); - printk("Supplemental field of incoming message is 0x%lx\n", - (errst0 & PI_ERR_ST0_SUPPL_MASK) >> PI_ERR_ST0_SUPPL_SHFT); - printk("T5 Rn (for RRB only) is 0x%lx\n", - (errst0 & PI_ERR_ST0_REQNUM_MASK) >> PI_ERR_ST0_REQNUM_SHFT); - printk("Error type is %s\n", err_type[wrb] - [(errst0 & PI_ERR_ST0_TYPE_MASK) >> PI_ERR_ST0_TYPE_SHFT] - ? : "invalid"); + pr_info("Hub error address is %08lx\n", + (unsigned long)st0.pi_stat0_fmt.s0_addr); + pr_info("Incoming message command 0x%lx\n", + (unsigned long)st0.pi_stat0_fmt.s0_cmd); + pr_info("Supplemental field of incoming message is 0x%lx\n", + (unsigned long)st0.pi_stat0_fmt.s0_supl); + pr_info("T5 Rn (for RRB only) is 0x%lx\n", + (unsigned long)st0.pi_stat0_fmt.s0_t5_req); + pr_info("Error type is %s\n", err_type[st1.pi_stat1_fmt.s1_rw_rb] + [st0.pi_stat0_fmt.s0_err_type] ? : "invalid"); } int ip27_be_handler(struct pt_regs *regs, int is_fixup) diff --git a/arch/mips/sgi-ip27/ip27-common.h b/arch/mips/sgi-ip27/ip27-common.h index 3ffbcf9bfd41..ed008a08464c 100644 --- a/arch/mips/sgi-ip27/ip27-common.h +++ b/arch/mips/sgi-ip27/ip27-common.h @@ -3,8 +3,18 @@ #ifndef __IP27_COMMON_H #define __IP27_COMMON_H -extern void ip27_reboot_setup(void); +extern nasid_t master_nasid; + +extern void cpu_node_probe(void); extern void hub_rt_clock_event_init(void); +extern void hub_rtc_init(nasid_t nasid); +extern void install_cpu_nmi_handler(int slice); +extern void install_ipi(void); +extern void ip27_reboot_setup(void); extern const struct plat_smp_ops ip27_smp_ops; +extern unsigned long node_getfirstfree(nasid_t nasid); +extern void per_cpu_init(void); +extern void replicate_kernel_text(void); +extern void setup_replication_mask(void); #endif /* __IP27_COMMON_H */ diff --git a/arch/mips/sgi-ip27/ip27-console.c b/arch/mips/sgi-ip27/ip27-console.c index 5886bee89d06..7737a88c6569 100644 --- a/arch/mips/sgi-ip27/ip27-console.c +++ b/arch/mips/sgi-ip27/ip27-console.c @@ -9,14 +9,15 @@ #include <asm/page.h> #include <asm/setup.h> #include <asm/sn/addrs.h> -#include <asm/sn/sn0/hub.h> +#include <asm/sn/agent.h> #include <asm/sn/klconfig.h> #include <asm/sn/ioc3.h> -#include <asm/sn/sn_private.h> #include <linux/serial.h> #include <linux/serial_core.h> +#include "ip27-common.h" + #define IOC3_CLK (22000000 / 3) #define IOC3_FLAGS (0) diff --git a/arch/mips/sgi-ip27/ip27-hubio.c b/arch/mips/sgi-ip27/ip27-hubio.c index a538d0ceb61d..8352eb6403b4 100644 --- a/arch/mips/sgi-ip27/ip27-hubio.c +++ b/arch/mips/sgi-ip27/ip27-hubio.c @@ -11,7 +11,9 @@ #include <linux/mmzone.h> #include <asm/sn/addrs.h> #include <asm/sn/arch.h> -#include <asm/sn/hub.h> +#include <asm/sn/agent.h> +#include <asm/sn/io.h> +#include <asm/xtalk/xtalk.h> static int force_fire_and_forget = 1; @@ -82,7 +84,7 @@ unsigned long hub_pio_map(nasid_t nasid, xwidgetnum_t widget, */ static void hub_setup_prb(nasid_t nasid, int prbnum, int credits) { - iprb_t prb; + union iprb_u prb; int prb_offset; /* @@ -135,7 +137,7 @@ static void hub_setup_prb(nasid_t nasid, int prbnum, int credits) static void hub_set_piomode(nasid_t nasid) { u64 ii_iowa; - hubii_wcr_t ii_wcr; + union hubii_wcr_u ii_wcr; unsigned i; ii_iowa = REMOTE_HUB_L(nasid, IIO_OUTWIDGET_ACCESS); diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c index f597e1ee2df7..32bcb8d1dd88 100644 --- a/arch/mips/sgi-ip27/ip27-init.c +++ b/arch/mips/sgi-ip27/ip27-init.c @@ -19,23 +19,18 @@ #include <asm/pgtable.h> #include <asm/sgialib.h> #include <asm/time.h> +#include <asm/sn/agent.h> #include <asm/sn/types.h> -#include <asm/sn/sn0/addrs.h> -#include <asm/sn/sn0/hubni.h> -#include <asm/sn/sn0/hubio.h> #include <asm/sn/klconfig.h> #include <asm/sn/ioc3.h> #include <asm/mipsregs.h> #include <asm/sn/gda.h> -#include <asm/sn/hub.h> #include <asm/sn/intr.h> #include <asm/current.h> #include <asm/processor.h> #include <asm/mmu_context.h> #include <asm/thread_info.h> #include <asm/sn/launch.h> -#include <asm/sn/sn_private.h> -#include <asm/sn/sn0/ip27.h> #include <asm/sn/mapped_kernel.h> #include "ip27-common.h" @@ -77,18 +72,14 @@ static void per_hub_init(nasid_t nasid) void per_cpu_init(void) { int cpu = smp_processor_id(); - int slice = LOCAL_HUB_L(PI_CPU_NUM); nasid_t nasid = get_nasid(); - struct hub_data *hub = hub_data(nasid); - - if (test_and_set_bit(slice, &hub->slice_map)) - return; clear_c0_status(ST0_IM); per_hub_init(nasid); - cpu_time_init(); + pr_info("CPU %d clock is %dMHz.\n", cpu, sn_cpu_info[cpu].p_speed); + install_ipi(); /* Install our NMI handler if symmon hasn't installed one. */ @@ -98,16 +89,6 @@ void per_cpu_init(void) enable_percpu_irq(IP27_HUB_PEND1_IRQ, IRQ_TYPE_NONE); } -/* - * get_nasid() returns the physical node id number of the caller. - */ -nasid_t -get_nasid(void) -{ - return (nasid_t)((LOCAL_HUB_L(NI_STATUS_REV_ID) & NSRI_NODEID_MASK) - >> NSRI_NODEID_SHFT); -} - void __init plat_mem_setup(void) { u64 p, e, n_mode; diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c index c72ae330ea93..42df9fafa943 100644 --- a/arch/mips/sgi-ip27/ip27-irq.c +++ b/arch/mips/sgi-ip27/ip27-irq.c @@ -19,7 +19,6 @@ #include <asm/sn/addrs.h> #include <asm/sn/agent.h> #include <asm/sn/arch.h> -#include <asm/sn/hub.h> #include <asm/sn/intr.h> #include <asm/sn/irq_alloc.h> @@ -288,11 +287,9 @@ void __init arch_init_irq(void) * Mark these as reserved right away so they won't be used accidentally * later. */ - for (i = 0; i <= BASE_PCI_IRQ; i++) + for (i = 0; i <= CPU_CALL_B_IRQ; i++) set_bit(i, hub_irq_map); - set_bit(IP_PEND0_6_63, hub_irq_map); - for (i = NI_BRDCAST_ERR_A; i <= MSC_PANIC_INTR; i++) set_bit(i, hub_irq_map); diff --git a/arch/mips/sgi-ip27/ip27-klconfig.c b/arch/mips/sgi-ip27/ip27-klconfig.c index 6cb2160e7689..81a1646e609a 100644 --- a/arch/mips/sgi-ip27/ip27-klconfig.c +++ b/arch/mips/sgi-ip27/ip27-klconfig.c @@ -72,54 +72,3 @@ lboard_t *find_lboard_class(lboard_t *start, unsigned char brd_type) /* Didn't find it. */ return (lboard_t *)NULL; } - -klcpu_t *nasid_slice_to_cpuinfo(nasid_t nasid, int slice) -{ - lboard_t *brd; - klcpu_t *acpu; - - if (!(brd = find_lboard((lboard_t *)KL_CONFIG_INFO(nasid), KLTYPE_IP27))) - return (klcpu_t *)NULL; - - if (!(acpu = (klcpu_t *)find_first_component(brd, KLSTRUCT_CPU))) - return (klcpu_t *)NULL; - - do { - if ((acpu->cpu_info.physid) == slice) - return acpu; - } while ((acpu = (klcpu_t *)find_component(brd, (klinfo_t *)acpu, - KLSTRUCT_CPU))); - return (klcpu_t *)NULL; -} - -klcpu_t *sn_get_cpuinfo(cpuid_t cpu) -{ - nasid_t nasid; - int slice; - klcpu_t *acpu; - - if (!(cpu < MAXCPUS)) { - printk("sn_get_cpuinfo: illegal cpuid 0x%lx\n", cpu); - return NULL; - } - - nasid = cputonasid(cpu); - if (nasid == INVALID_NASID) - return NULL; - - for (slice = 0; slice < CPUS_PER_NODE; slice++) { - acpu = nasid_slice_to_cpuinfo(nasid, slice); - if (acpu && acpu->cpu_info.virtid == cpu) - return acpu; - } - return NULL; -} - -int get_cpu_slice(cpuid_t cpu) -{ - klcpu_t *acpu; - - if ((acpu = sn_get_cpuinfo(cpu)) == NULL) - return -1; - return acpu->cpu_info.physid; -} diff --git a/arch/mips/sgi-ip27/ip27-klnuma.c b/arch/mips/sgi-ip27/ip27-klnuma.c index ee1c6ff4aa00..abd7a84df7dd 100644 --- a/arch/mips/sgi-ip27/ip27-klnuma.c +++ b/arch/mips/sgi-ip27/ip27-klnuma.c @@ -16,11 +16,11 @@ #include <asm/sn/types.h> #include <asm/sn/arch.h> #include <asm/sn/gda.h> -#include <asm/sn/hub.h> #include <asm/sn/mapped_kernel.h> -#include <asm/sn/sn_private.h> -static cpumask_t ktext_repmask; +#include "ip27-common.h" + +static nodemask_t ktext_repmask; /* * XXX - This needs to be much smarter about where it puts copies of the @@ -30,8 +30,8 @@ static cpumask_t ktext_repmask; void __init setup_replication_mask(void) { /* Set only the master cnode's bit. The master cnode is always 0. */ - cpumask_clear(&ktext_repmask); - cpumask_set_cpu(0, &ktext_repmask); + nodes_clear(ktext_repmask); + node_set(0, ktext_repmask); #ifdef CONFIG_REPLICATE_KTEXT #ifndef CONFIG_MAPPED_KERNEL @@ -44,7 +44,7 @@ void __init setup_replication_mask(void) if (nasid == 0) continue; /* Advertise that we have a copy of the kernel */ - cpumask_set_cpu(nasid, &ktext_repmask); + node_set(nasid, ktext_repmask); } } #endif @@ -98,7 +98,7 @@ void __init replicate_kernel_text(void) continue; /* Check if this node should get a copy of the kernel */ - if (cpumask_test_cpu(client_nasid, &ktext_repmask)) { + if (node_isset(client_nasid, ktext_repmask)) { server_nasid = client_nasid; copy_kernel(server_nasid); } @@ -122,7 +122,7 @@ unsigned long node_getfirstfree(nasid_t nasid) loadbase += 16777216; #endif offset = PAGE_ALIGN((unsigned long)(&_end)) - loadbase; - if ((nasid == 0) || (cpumask_test_cpu(nasid, &ktext_repmask))) + if ((nasid == 0) || (node_isset(nasid, ktext_repmask))) return TO_NODE(nasid, offset) >> PAGE_SHIFT; else return KDM_TO_PHYS(PAGE_ALIGN(SYMMON_STK_ADDR(nasid, 0))) >> PAGE_SHIFT; diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index 563aad5e6398..a45691e6ab90 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -25,10 +25,10 @@ #include <asm/sections.h> #include <asm/sn/arch.h> -#include <asm/sn/hub.h> +#include <asm/sn/agent.h> #include <asm/sn/klconfig.h> -#include <asm/sn/sn_private.h> +#include "ip27-common.h" #define SLOT_PFNSHIFT (SLOT_SHIFT - PAGE_SHIFT) #define PFN_NASIDSHFT (NASID_SHFT - PAGE_SHIFT) @@ -37,31 +37,18 @@ struct node_data *__node_data[MAX_NUMNODES]; EXPORT_SYMBOL(__node_data); -static int fine_mode; - -static int is_fine_dirmode(void) -{ - return ((LOCAL_HUB_L(NI_STATUS_REV_ID) & NSRI_REGIONSIZE_MASK) >> NSRI_REGIONSIZE_SHFT) & REGIONSIZE_FINE; -} - -static u64 get_region(nasid_t nasid) -{ - if (fine_mode) - return nasid >> NASID_TO_FINEREG_SHFT; - else - return nasid >> NASID_TO_COARSEREG_SHFT; -} - -static u64 region_mask; - -static void gen_region_mask(u64 *region_mask) +static u64 gen_region_mask(void) { + int region_shift; + u64 region_mask; nasid_t nasid; - (*region_mask) = 0; - for_each_online_node(nasid) { - (*region_mask) |= 1ULL << get_region(nasid); - } + region_shift = get_region_shift(); + region_mask = 0; + for_each_online_node(nasid) + region_mask |= BIT_ULL(nasid >> region_shift); + + return region_mask; } #define rou_rflag rou_flags @@ -148,25 +135,25 @@ static int __init compute_node_distance(nasid_t nasid_a, nasid_t nasid_b) } while ((brd = find_lboard_class(KLCF_NEXT(brd), KLTYPE_ROUTER))); } + if (nasid_a == nasid_b) + return LOCAL_DISTANCE; + + if (router_a == router_b) + return LOCAL_DISTANCE + 1; + if (router_a == NULL) { pr_info("node_distance: router_a NULL\n"); - return -1; + return 255; } if (router_b == NULL) { pr_info("node_distance: router_b NULL\n"); - return -1; + return 255; } - if (nasid_a == nasid_b) - return 0; - - if (router_a == router_b) - return 1; - router_distance = 100; router_recurse(router_a, router_b, 2); - return router_distance; + return LOCAL_DISTANCE + router_distance; } static void __init init_topology_matrix(void) @@ -281,10 +268,10 @@ static unsigned long __init slot_psize_compute(nasid_t nasid, int slot) static void __init mlreset(void) { + u64 region_mask; nasid_t nasid; master_nasid = get_nasid(); - fine_mode = is_fine_dirmode(); /* * Probe for all CPUs - this creates the cpumask and sets up the @@ -297,7 +284,7 @@ static void __init mlreset(void) init_topology_matrix(); dump_topology(); - gen_region_mask(®ion_mask); + region_mask = gen_region_mask(); setup_replication_mask(); diff --git a/arch/mips/sgi-ip27/ip27-nmi.c b/arch/mips/sgi-ip27/ip27-nmi.c index daf3670d94e7..84889b57d5ff 100644 --- a/arch/mips/sgi-ip27/ip27-nmi.c +++ b/arch/mips/sgi-ip27/ip27-nmi.c @@ -9,7 +9,7 @@ #include <asm/sn/addrs.h> #include <asm/sn/nmi.h> #include <asm/sn/arch.h> -#include <asm/sn/sn0/hub.h> +#include <asm/sn/agent.h> #if 0 #define NODE_NUM_CPUS(n) CNODE_NUM_CPUS(n) @@ -17,6 +17,9 @@ #define NODE_NUM_CPUS(n) CPUS_PER_NODE #endif +#define SEND_NMI(_nasid, _slice) \ + REMOTE_HUB_S((_nasid), (PI_NMI_A + ((_slice) * PI_NMI_OFFSET)), 1) + typedef unsigned long machreg_t; static arch_spinlock_t nmi_lock = __ARCH_SPIN_LOCK_UNLOCKED; diff --git a/arch/mips/sgi-ip27/ip27-reset.c b/arch/mips/sgi-ip27/ip27-reset.c index 74d078247e49..5ac5ad638734 100644 --- a/arch/mips/sgi-ip27/ip27-reset.c +++ b/arch/mips/sgi-ip27/ip27-reset.c @@ -22,9 +22,9 @@ #include <asm/reboot.h> #include <asm/sgialib.h> #include <asm/sn/addrs.h> +#include <asm/sn/agent.h> #include <asm/sn/arch.h> #include <asm/sn/gda.h> -#include <asm/sn/sn0/hub.h> #include "ip27-common.h" diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c index faa0244c8b0c..5d2652a1d35a 100644 --- a/arch/mips/sgi-ip27/ip27-smp.c +++ b/arch/mips/sgi-ip27/ip27-smp.c @@ -15,36 +15,22 @@ #include <asm/page.h> #include <asm/processor.h> #include <asm/ptrace.h> +#include <asm/sn/agent.h> #include <asm/sn/arch.h> #include <asm/sn/gda.h> #include <asm/sn/intr.h> #include <asm/sn/klconfig.h> #include <asm/sn/launch.h> #include <asm/sn/mapped_kernel.h> -#include <asm/sn/sn_private.h> #include <asm/sn/types.h> -#include <asm/sn/sn0/hubpi.h> -#include <asm/sn/sn0/hubio.h> -#include <asm/sn/sn0/ip27.h> #include "ip27-common.h" -/* - * Takes as first input the PROM assigned cpu id, and the kernel - * assigned cpu id as the second. - */ -static void alloc_cpupda(nasid_t nasid, cpuid_t cpu, int cpunum) -{ - cputonasid(cpunum) = nasid; - cputoslice(cpunum) = get_cpu_slice(cpu); -} - -static int do_cpumask(nasid_t nasid, int highest) +static int node_scan_cpus(nasid_t nasid, int highest) { - static int tot_cpus_found = 0; + static int cpus_found; lboard_t *brd; klcpu_t *acpu; - int cpus_found = 0; cpuid_t cpuid; brd = find_lboard((lboard_t *)KL_CONFIG_INFO(nasid), KLTYPE_IP27); @@ -55,13 +41,15 @@ static int do_cpumask(nasid_t nasid, int highest) cpuid = acpu->cpu_info.virtid; /* Only let it join in if it's marked enabled */ if ((acpu->cpu_info.flags & KLINFO_ENABLE) && - (tot_cpus_found != NR_CPUS)) { + (cpus_found != NR_CPUS)) { if (cpuid > highest) highest = cpuid; set_cpu_possible(cpuid, true); - alloc_cpupda(nasid, cpuid, tot_cpus_found); + cputonasid(cpus_found) = nasid; + cputoslice(cpus_found) = acpu->cpu_info.physid; + sn_cpu_info[cpus_found].p_speed = + acpu->cpu_speed; cpus_found++; - tot_cpus_found++; } acpu = (klcpu_t *)find_component(brd, (klinfo_t *)acpu, KLSTRUCT_CPU); @@ -87,7 +75,7 @@ void cpu_node_probe(void) if (nasid == INVALID_NASID) break; node_set_online(nasid); - highest = do_cpumask(nasid, highest); + highest = node_scan_cpus(nasid, highest); } printk("Discovered %d cpus on %d nodes\n", highest + 1, num_online_nodes()); @@ -180,7 +168,8 @@ static void __init ip27_smp_setup(void) /* * PROM sets up system, that boot cpu is always first CPU on nasid 0 */ - alloc_cpupda(0, 0, 0); + cputonasid(0) = 0; + cputoslice(0) = LOCAL_HUB_L(PI_CPU_NUM); } static void __init ip27_prepare_cpus(unsigned int max_cpus) diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c index 17302bbfa7a6..61f3565f3645 100644 --- a/arch/mips/sgi-ip27/ip27-timer.c +++ b/arch/mips/sgi-ip27/ip27-timer.c @@ -25,17 +25,14 @@ #include <asm/sn/klconfig.h> #include <asm/sn/arch.h> #include <asm/sn/addrs.h> -#include <asm/sn/sn_private.h> -#include <asm/sn/sn0/ip27.h> -#include <asm/sn/sn0/hub.h> +#include <asm/sn/agent.h> + +#include "ip27-common.h" #define TICK_SIZE (tick_nsec / 1000) /* Includes for ioc3_init(). */ #include <asm/sn/types.h> -#include <asm/sn/sn0/addrs.h> -#include <asm/sn/sn0/hubni.h> -#include <asm/sn/sn0/hubio.h> #include <asm/pci/bridge.h> #include "ip27-common.h" @@ -153,25 +150,6 @@ void __init plat_time_init(void) hub_rt_clock_event_init(); } -void cpu_time_init(void) -{ - lboard_t *board; - klcpu_t *cpu; - int cpuid; - - /* Don't use ARCS. ARCS is fragile. Klconfig is simple and sane. */ - board = find_lboard(KL_CONFIG_INFO(get_nasid()), KLTYPE_IP27); - if (!board) - panic("Can't find board info for myself."); - - cpuid = LOCAL_HUB_L(PI_CPU_NUM) ? IP27_CPU0_INDEX : IP27_CPU1_INDEX; - cpu = (klcpu_t *) KLCF_COMP(board, cpuid); - if (!cpu) - panic("No information about myself?"); - - printk("CPU %d clock is %dMHz.\n", smp_processor_id(), cpu->cpu_speed); -} - void hub_rtc_init(nasid_t nasid) { @@ -190,23 +168,3 @@ void hub_rtc_init(nasid_t nasid) LOCAL_HUB_S(PI_RT_PEND_B, 0); } } - -static int __init sgi_ip27_rtc_devinit(void) -{ - struct resource res; - - memset(&res, 0, sizeof(res)); - res.start = XPHYSADDR(KL_CONFIG_CH_CONS_INFO(master_nasid)->memory_base + - IOC3_BYTEBUS_DEV0); - res.end = res.start + 32767; - res.flags = IORESOURCE_MEM; - - return IS_ERR(platform_device_register_simple("rtc-m48t35", -1, - &res, 1)); -} - -/* - * kludge make this a device_initcall after ioc3 resource conflicts - * are resolved - */ -late_initcall(sgi_ip27_rtc_devinit); diff --git a/arch/mips/sgi-ip27/ip27-xtalk.c b/arch/mips/sgi-ip27/ip27-xtalk.c index 5218b900f855..000ede156bdc 100644 --- a/arch/mips/sgi-ip27/ip27-xtalk.c +++ b/arch/mips/sgi-ip27/ip27-xtalk.c @@ -15,7 +15,6 @@ #include <asm/sn/addrs.h> #include <asm/sn/types.h> #include <asm/sn/klconfig.h> -#include <asm/sn/hub.h> #include <asm/pci/bridge.h> #include <asm/xtalk/xtalk.h> diff --git a/arch/mips/sgi-ip30/ip30-irq.c b/arch/mips/sgi-ip30/ip30-irq.c index d46655b914f1..c2ffcb920250 100644 --- a/arch/mips/sgi-ip30/ip30-irq.c +++ b/arch/mips/sgi-ip30/ip30-irq.c @@ -232,9 +232,10 @@ static void heart_domain_free(struct irq_domain *domain, return; irqd = irq_domain_get_irq_data(domain, virq); - clear_bit(irqd->hwirq, heart_irq_map); - if (irqd && irqd->chip_data) + if (irqd) { + clear_bit(irqd->hwirq, heart_irq_map); kfree(irqd->chip_data); + } } static const struct irq_domain_ops heart_domain_ops = { diff --git a/arch/mips/sni/rm200.c b/arch/mips/sni/rm200.c index 160b88000b4b..f6fa9afcbfd3 100644 --- a/arch/mips/sni/rm200.c +++ b/arch/mips/sni/rm200.c @@ -399,10 +399,10 @@ void __init sni_rm200_i8259_irqs(void) { int i; - rm200_pic_master = ioremap_nocache(0x16000020, 4); + rm200_pic_master = ioremap(0x16000020, 4); if (!rm200_pic_master) return; - rm200_pic_slave = ioremap_nocache(0x160000a0, 4); + rm200_pic_slave = ioremap(0x160000a0, 4); if (!rm200_pic_slave) { iounmap(rm200_pic_master); return; diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index e05938997e69..b2a2e032dc99 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -18,6 +18,10 @@ ccflags-vdso := \ $(filter -mno-loongson-%,$(KBUILD_CFLAGS)) \ -D__VDSO__ +ifndef CONFIG_64BIT +ccflags-vdso += -DBUILD_VDSO32 +endif + ifdef CONFIG_CC_IS_CLANG ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS)) endif diff --git a/arch/mips/vdso/genvdso.c b/arch/mips/vdso/genvdso.c index b66b6b1c4aeb..be57b832bbe0 100644 --- a/arch/mips/vdso/genvdso.c +++ b/arch/mips/vdso/genvdso.c @@ -251,6 +251,18 @@ int main(int argc, char **argv) fprintf(out_file, "#include <linux/linkage.h>\n"); fprintf(out_file, "#include <linux/mm.h>\n"); fprintf(out_file, "#include <asm/vdso.h>\n"); + fprintf(out_file, "static int vdso_mremap(\n"); + fprintf(out_file, " const struct vm_special_mapping *sm,\n"); + fprintf(out_file, " struct vm_area_struct *new_vma)\n"); + fprintf(out_file, "{\n"); + fprintf(out_file, " unsigned long new_size =\n"); + fprintf(out_file, " new_vma->vm_end - new_vma->vm_start;\n"); + fprintf(out_file, " if (vdso_image.size != new_size)\n"); + fprintf(out_file, " return -EINVAL;\n"); + fprintf(out_file, " current->mm->context.vdso =\n"); + fprintf(out_file, " (void __user *)(new_vma->vm_start);\n"); + fprintf(out_file, " return 0;\n"); + fprintf(out_file, "}\n"); /* Write out the stripped VDSO data. */ fprintf(out_file, @@ -275,6 +287,7 @@ int main(int argc, char **argv) fprintf(out_file, "\t.mapping = {\n"); fprintf(out_file, "\t\t.name = \"[vdso]\",\n"); fprintf(out_file, "\t\t.pages = vdso_pages,\n"); + fprintf(out_file, "\t\t.mremap = vdso_mremap,\n"); fprintf(out_file, "\t},\n"); /* Calculate and write symbol offsets to <output file> */ diff --git a/arch/mips/vdso/vgettimeofday.c b/arch/mips/vdso/vgettimeofday.c index 6ebdc37c89fc..6b83b6376a4b 100644 --- a/arch/mips/vdso/vgettimeofday.c +++ b/arch/mips/vdso/vgettimeofday.c @@ -17,12 +17,22 @@ int __vdso_clock_gettime(clockid_t clock, return __cvdso_clock_gettime32(clock, ts); } +#ifdef CONFIG_MIPS_CLOCK_VSYSCALL + +/* + * This is behind the ifdef so that we don't provide the symbol when there's no + * possibility of there being a usable clocksource, because there's nothing we + * can do without it. When libc fails the symbol lookup it should fall back on + * the standard syscall path. + */ int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { return __cvdso_gettimeofday(tv, tz); } +#endif /* CONFIG_MIPS_CLOCK_VSYSCALL */ + int __vdso_clock_getres(clockid_t clock_id, struct old_timespec32 *res) { @@ -43,12 +53,22 @@ int __vdso_clock_gettime(clockid_t clock, return __cvdso_clock_gettime(clock, ts); } +#ifdef CONFIG_MIPS_CLOCK_VSYSCALL + +/* + * This is behind the ifdef so that we don't provide the symbol when there's no + * possibility of there being a usable clocksource, because there's nothing we + * can do without it. When libc fails the symbol lookup it should fall back on + * the standard syscall path. + */ int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { return __cvdso_gettimeofday(tv, tz); } +#endif /* CONFIG_MIPS_CLOCK_VSYSCALL */ + int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res) { diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 12c06a833b7c..e30298e99e1b 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -62,7 +62,7 @@ config GENERIC_HWEIGHT config GENERIC_LOCKBREAK def_bool y - depends on PREEMPT + depends on PREEMPTION config TRACE_IRQFLAGS_SUPPORT def_bool y diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h index d9ac7e6408ef..caddded56e77 100644 --- a/arch/nds32/include/asm/cacheflush.h +++ b/arch/nds32/include/asm/cacheflush.h @@ -9,7 +9,11 @@ #define PG_dcache_dirty PG_arch_1 void flush_icache_range(unsigned long start, unsigned long end); +#define flush_icache_range flush_icache_range + void flush_icache_page(struct vm_area_struct *vma, struct page *page); +#define flush_icache_page flush_icache_page + #ifdef CONFIG_CPU_CACHE_ALIASING void flush_cache_mm(struct mm_struct *mm); void flush_cache_dup_mm(struct mm_struct *mm); @@ -40,12 +44,11 @@ void invalidate_kernel_vmap_range(void *addr, int size); #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&(mapping)->i_pages) #else -#include <asm-generic/cacheflush.h> -#undef flush_icache_range -#undef flush_icache_page -#undef flush_icache_user_range void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, unsigned long addr, int len); +#define flush_icache_user_range flush_icache_user_range + +#include <asm-generic/cacheflush.h> #endif #endif /* __NDS32_CACHEFLUSH_H__ */ diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h index 0214e4150539..6abc58ac406d 100644 --- a/arch/nds32/include/asm/pgtable.h +++ b/arch/nds32/include/asm/pgtable.h @@ -195,7 +195,7 @@ extern void paging_init(void); #define pte_unmap(pte) do { } while (0) #define pte_unmap_nested(pte) do { } while (0) -#define pmd_off_k(address) pmd_offset(pgd_offset_k(address), address) +#define pmd_off_k(address) pmd_offset(pud_offset(p4d_offset(pgd_offset_k(address), (address)), (address)), (address)) #define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) /* diff --git a/arch/nds32/include/asm/vmalloc.h b/arch/nds32/include/asm/vmalloc.h new file mode 100644 index 000000000000..caeed3898419 --- /dev/null +++ b/arch/nds32/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_NDS32_VMALLOC_H +#define _ASM_NDS32_VMALLOC_H + +#endif /* _ASM_NDS32_VMALLOC_H */ diff --git a/arch/nds32/kernel/ex-exit.S b/arch/nds32/kernel/ex-exit.S index 1df02a793364..6a2966c2d8c8 100644 --- a/arch/nds32/kernel/ex-exit.S +++ b/arch/nds32/kernel/ex-exit.S @@ -72,7 +72,7 @@ restore_user_regs_last .endm -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION .macro preempt_stop .endm #else @@ -158,7 +158,7 @@ no_work_pending: /* * preemptive kernel */ -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION resume_kernel: gie_disable lwi $t0, [tsk+#TSK_TI_PREEMPT] diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index fd2a54b8cd57..22ab77ea27ad 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -89,18 +89,6 @@ int __init ftrace_dyn_arch_init(void) return 0; } -int ftrace_arch_code_modify_prepare(void) -{ - set_all_modules_text_rw(); - return 0; -} - -int ftrace_arch_code_modify_post_process(void) -{ - set_all_modules_text_ro(); - return 0; -} - static unsigned long gen_sethi_insn(unsigned long addr) { unsigned long opcode = 0x46000000; diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c index 31d29d92478e..a066efbe53c0 100644 --- a/arch/nds32/kernel/setup.c +++ b/arch/nds32/kernel/setup.c @@ -317,11 +317,6 @@ void __init setup_arch(char **cmdline_p) unflatten_and_copy_device_tree(); - if(IS_ENABLED(CONFIG_VT)) { - if(IS_ENABLED(CONFIG_DUMMY_CONSOLE)) - conswitchp = &dummy_con; - } - *cmdline_p = boot_command_line; early_trap_init(); } diff --git a/arch/nios2/include/asm/vmalloc.h b/arch/nios2/include/asm/vmalloc.h new file mode 100644 index 000000000000..ec7a9260090b --- /dev/null +++ b/arch/nios2/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_NIOS2_VMALLOC_H +#define _ASM_NIOS2_VMALLOC_H + +#endif /* _ASM_NIOS2_VMALLOC_H */ diff --git a/arch/nios2/kernel/entry.S b/arch/nios2/kernel/entry.S index 1e515ccd698e..3d8d1d0bcb64 100644 --- a/arch/nios2/kernel/entry.S +++ b/arch/nios2/kernel/entry.S @@ -365,7 +365,7 @@ ENTRY(ret_from_interrupt) ldw r1, PT_ESTATUS(sp) /* check if returning to kernel */ TSTBNZ r1, r1, ESTATUS_EU, Luser_return -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION GET_THREAD_INFO r1 ldw r4, TI_PREEMPT_COUNT(r1) bne r4, r0, restore_all diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c index 4cf35b09c0ec..3c6e3c813a0b 100644 --- a/arch/nios2/kernel/setup.c +++ b/arch/nios2/kernel/setup.c @@ -196,8 +196,4 @@ void __init setup_arch(char **cmdline_p) * get kmalloc into gear */ paging_init(); - -#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; -#endif } diff --git a/arch/nios2/mm/ioremap.c b/arch/nios2/mm/ioremap.c index b56af759dcdf..819bdfcc2e71 100644 --- a/arch/nios2/mm/ioremap.c +++ b/arch/nios2/mm/ioremap.c @@ -138,6 +138,14 @@ void __iomem *ioremap(unsigned long phys_addr, unsigned long size) return NULL; } + /* + * Map uncached objects in the low part of address space to + * CONFIG_NIOS2_IO_REGION_BASE + */ + if (IS_MAPPABLE_UNCACHEABLE(phys_addr) && + IS_MAPPABLE_UNCACHEABLE(last_addr)) + return (void __iomem *)(CONFIG_NIOS2_IO_REGION_BASE + phys_addr); + /* Mappings have to be page-aligned */ offset = phys_addr & ~PAGE_MASK; phys_addr &= PAGE_MASK; diff --git a/arch/openrisc/include/asm/vmalloc.h b/arch/openrisc/include/asm/vmalloc.h new file mode 100644 index 000000000000..75435eceec32 --- /dev/null +++ b/arch/openrisc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_OPENRISC_VMALLOC_H +#define _ASM_OPENRISC_VMALLOC_H + +#endif /* _ASM_OPENRISC_VMALLOC_H */ diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index d668f5be3a99..c0a774b51e45 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -308,11 +308,6 @@ void __init setup_arch(char **cmdline_p) /* paging_init() sets up the MMU and marks all pages as reserved */ paging_init(); -#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE) - if (!conswitchp) - conswitchp = &dummy_con; -#endif - *cmdline_p = boot_command_line; printk(KERN_INFO "OpenRISC Linux -- http://openrisc.io\n"); diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index b16237c95ea3..71034b54d74e 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -18,7 +18,7 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG - select BUILDTIME_EXTABLE_SORT + select BUILDTIME_TABLE_SORT select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_KERNEL_BZIP2 @@ -62,6 +62,7 @@ config PARISC select HAVE_FTRACE_MCOUNT_RECORD if HAVE_DYNAMIC_FTRACE select HAVE_KPROBES_ON_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS + select HAVE_COPY_THREAD_TLS help The PA-RISC microprocessor is designed by Hewlett-Packard and used @@ -81,7 +82,7 @@ config STACK_GROWSUP config GENERIC_LOCKBREAK bool default y - depends on SMP && PREEMPT + depends on SMP && PREEMPTION config ARCH_HAS_ILOG2_U32 bool diff --git a/arch/parisc/include/asm/cmpxchg.h b/arch/parisc/include/asm/cmpxchg.h index f627c37dad9c..ab5c215cf46c 100644 --- a/arch/parisc/include/asm/cmpxchg.h +++ b/arch/parisc/include/asm/cmpxchg.h @@ -44,8 +44,14 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size) ** if (((unsigned long)p & 0xf) == 0) ** return __ldcw(p); */ -#define xchg(ptr, x) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) +#define xchg(ptr, x) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) _x_ = (x); \ + __ret = (__typeof__(*(ptr))) \ + __xchg((unsigned long)_x_, (ptr), sizeof(*(ptr))); \ + __ret; \ +}) /* bug catcher for when unsupported size is used - won't link */ extern void __cmpxchg_called_with_bad_pointer(void); diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h index e03e3c849f40..2f4f66a3bac0 100644 --- a/arch/parisc/include/asm/compat.h +++ b/arch/parisc/include/asm/compat.h @@ -173,23 +173,6 @@ struct compat_shmid64_ds { #define COMPAT_ELF_NGREG 80 typedef compat_ulong_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; -/* - * A pointer passed in from user mode. This should not - * be used for syscall parameters, just declare them - * as pointers because the syscall entry code will have - * appropriately converted them already. - */ - -static inline void __user *compat_ptr(compat_uptr_t uptr) -{ - return (void __user *)(unsigned long)uptr; -} - -static inline compat_uptr_t ptr_to_compat(void __user *uptr) -{ - return (u32)(unsigned long)uptr; -} - static __inline__ void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = ¤t->thread.regs; diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h index 46212b52c23e..cab8f64ca4a2 100644 --- a/arch/parisc/include/asm/io.h +++ b/arch/parisc/include/asm/io.h @@ -128,9 +128,8 @@ static inline void gsc_writeq(unsigned long long val, unsigned long addr) * The standard PCI ioremap interfaces */ void __iomem *ioremap(unsigned long offset, unsigned long size); -#define ioremap_nocache(off, sz) ioremap((off), (sz)) -#define ioremap_wc ioremap_nocache -#define ioremap_uc ioremap_nocache +#define ioremap_wc ioremap +#define ioremap_uc ioremap extern void iounmap(const volatile void __iomem *addr); diff --git a/arch/parisc/include/asm/kexec.h b/arch/parisc/include/asm/kexec.h index a99ea747d7ed..87e174006995 100644 --- a/arch/parisc/include/asm/kexec.h +++ b/arch/parisc/include/asm/kexec.h @@ -2,8 +2,6 @@ #ifndef _ASM_PARISC_KEXEC_H #define _ASM_PARISC_KEXEC_H -#ifdef CONFIG_KEXEC - /* Maximum physical address we can use pages from */ #define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) /* Maximum address we can reach in physical address mode */ @@ -32,6 +30,4 @@ static inline void crash_setup_regs(struct pt_regs *newregs, #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_KEXEC */ - #endif /* _ASM_PARISC_KEXEC_H */ diff --git a/arch/parisc/include/asm/vmalloc.h b/arch/parisc/include/asm/vmalloc.h new file mode 100644 index 000000000000..1088ae4e7af9 --- /dev/null +++ b/arch/parisc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_PARISC_VMALLOC_H +#define _ASM_PARISC_VMALLOC_H + +#endif /* _ASM_PARISC_VMALLOC_H */ diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index 2663c8f8be11..068d90950d93 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -37,5 +37,5 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_KEXEC) += kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += kexec.o relocate_kernel.o obj-$(CONFIG_KEXEC_FILE) += kexec_file.o diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 3b330e58a4f0..a5f3e50fe976 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -810,7 +810,7 @@ EXPORT_SYMBOL(device_to_hwpath); static void walk_native_bus(unsigned long io_io_low, unsigned long io_io_high, struct device *parent); -static void walk_lower_bus(struct parisc_device *dev) +static void __init walk_lower_bus(struct parisc_device *dev) { unsigned long io_io_low, io_io_high; @@ -889,8 +889,8 @@ static void print_parisc_device(struct parisc_device *dev) static int count; print_pa_hwpath(dev, hw_path); - pr_info("%d. %s at 0x%px [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", - ++count, dev->name, (void*) dev->hpa.start, hw_path, dev->id.hw_type, + pr_info("%d. %s at %pap [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", + ++count, dev->name, &(dev->hpa.start), hw_path, dev->id.hw_type, dev->id.hversion_rev, dev->id.hversion, dev->id.sversion); if (dev->num_addrs) { diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index b96d74496977..9a03e29c8733 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -940,14 +940,14 @@ intr_restore: rfi nop -#ifndef CONFIG_PREEMPT +#ifndef CONFIG_PREEMPTION # define intr_do_preempt intr_restore -#endif /* !CONFIG_PREEMPT */ +#endif /* !CONFIG_PREEMPTION */ .import schedule,code intr_do_resched: /* Only call schedule on return to userspace. If we're returning - * to kernel space, we may schedule if CONFIG_PREEMPT, otherwise + * to kernel space, we may schedule if CONFIG_PREEMPTION, otherwise * we jump back to intr_restore. */ LDREG PT_IASQ0(%r16), %r20 @@ -979,7 +979,7 @@ intr_do_resched: * and preempt_count is 0. otherwise, we continue on * our merry way back to the current running task. */ -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION .import preempt_schedule_irq,code intr_do_preempt: rsm PSW_SM_I, %r0 /* disable interrupts */ @@ -999,7 +999,7 @@ intr_do_preempt: nop b,n intr_restore /* ssm PSW_SM_I done by intr_restore */ -#endif /* CONFIG_PREEMPT */ +#endif /* CONFIG_PREEMPTION */ /* * External interrupts. diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c index 36434d4da381..749c4579db0d 100644 --- a/arch/parisc/kernel/pdt.c +++ b/arch/parisc/kernel/pdt.c @@ -327,8 +327,7 @@ static int pdt_mainloop(void *unused) ((pde & PDT_ADDR_SINGLE_ERR) == 0)) memory_failure(pde >> PAGE_SHIFT, 0); else - soft_offline_page( - pfn_to_page(pde >> PAGE_SHIFT), 0); + soft_offline_page(pde >> PAGE_SHIFT, 0); #else pr_crit("PDT: memory error at 0x%lx ignored.\n" "Rebuild kernel with CONFIG_MEMORY_FAILURE=y " diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c index 676683641d00..e1a8fee3ad49 100644 --- a/arch/parisc/kernel/perf.c +++ b/arch/parisc/kernel/perf.c @@ -792,7 +792,7 @@ static int perf_write_image(uint64_t *memaddr) return -1; } - runway = ioremap_nocache(cpu_device->hpa.start, 4096); + runway = ioremap(cpu_device->hpa.start, 4096); if (!runway) { pr_err("perf_write_image: ioremap failed!\n"); return -ENOMEM; diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index ecc5c2771208..230a6422b99f 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -208,8 +208,8 @@ arch_initcall(parisc_idle_init); * Copy architecture-specific thread state */ int -copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long kthread_arg, struct task_struct *p) +copy_thread_tls(unsigned long clone_flags, unsigned long usp, + unsigned long kthread_arg, struct task_struct *p, unsigned long tls) { struct pt_regs *cregs = &(p->thread.regs); void *stack = task_stack_page(p); @@ -254,9 +254,9 @@ copy_thread(unsigned long clone_flags, unsigned long usp, cregs->ksp = (unsigned long)stack + THREAD_SZ_ALGN + FRAME_SIZE; cregs->kpc = (unsigned long) &child_return; - /* Setup thread TLS area from the 4th parameter in clone */ + /* Setup thread TLS area */ if (clone_flags & CLONE_SETTLS) - cregs->cr27 = cregs->gr[23]; + cregs->cr27 = tls; } return 0; diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 53a21ce927de..e320bae501d3 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -151,10 +151,6 @@ void __init setup_arch(char **cmdline_p) dma_ops_init(); #endif -#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; /* we use do_take_over_console() later ! */ -#endif - clear_sched_clock_stable(); } diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 285ff516150c..52a15f5cd130 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -433,3 +433,5 @@ 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open 435 common clone3 sys_clone3_wrapper +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index ddca8287d43b..354cf060b67f 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -401,7 +401,7 @@ static void __init map_pages(unsigned long start_vaddr, pmd = (pmd_t *) __pa(pmd); } - pgd_populate(NULL, pg_dir, __va(pmd)); + pud_populate(NULL, (pud_t *)pg_dir, __va(pmd)); #endif pg_dir++; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 1ec34e16ed65..bf2b538aba12 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -106,7 +106,7 @@ config LOCKDEP_SUPPORT config GENERIC_LOCKBREAK bool default y - depends on SMP && PREEMPT + depends on SMP && PREEMPTION config GENERIC_HWEIGHT bool @@ -149,7 +149,7 @@ config PPC select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WEAK_RELEASE_ACQUIRE select BINFMT_ELF - select BUILDTIME_EXTABLE_SORT + select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN select DYNAMIC_FTRACE if FUNCTION_TRACER @@ -222,9 +222,8 @@ config PPC select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP - select HAVE_RCU_TABLE_FREE if SMP - select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE - select HAVE_MMU_GATHER_PAGE_SIZE + select MMU_GATHER_RCU_TABLE_FREE + select MMU_GATHER_PAGE_SIZE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN select HAVE_SYSCALL_TRACEPOINTS @@ -238,6 +237,7 @@ config PPC select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE select NEED_SG_DMA_LENGTH select OF + select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE select OF_EARLY_FLATTREE select OLD_SIGACTION if PPC32 select OLD_SIGSUSPEND @@ -455,11 +455,7 @@ config PPC_TRANSACTIONAL_MEM config PPC_UV bool "Ultravisor support" depends on KVM_BOOK3S_HV_POSSIBLE - select ZONE_DEVICE - select DEV_PAGEMAP_OPS - select DEVICE_PRIVATE - select MEMORY_HOTPLUG - select MEMORY_HOTREMOVE + depends on DEVICE_PRIVATE default n help This option paravirtualizes the kernel to run in POWER platforms that diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi index e1a961f05dcd..baa0c503e741 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi @@ -63,6 +63,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy0: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi index c288f3c6c637..93095600e808 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi @@ -60,6 +60,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy6: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi index 94f3e7175012..ff4bd38f0645 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi @@ -63,6 +63,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy1: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi index 94a76982d214..1fa38ed6f59e 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi @@ -60,6 +60,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy7: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi index b5ff5f71c6b8..a8cc9780c0c4 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy0: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi index ee44182c6348..8b8bd70c9382 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy1: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi index f05f0d775039..619c880b54d8 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe5000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy2: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi index a9114ec51075..d7ebb73a400d 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe7000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy3: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi index 44dd00ac7367..b151d696a069 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe9000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy4: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi index 5b1b84b58602..adc0ae0013a3 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xeb000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy5: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi index 0e1daaef9e74..435047e0e250 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi @@ -60,6 +60,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy14: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi index 68c5ef779266..c098657cca0a 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi @@ -60,6 +60,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy15: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi index 605363cc1117..9d06824815f3 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy8: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi index 1955dfa13634..70e947730c4b 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy9: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi index 2c1476454ee0..ad96e6529595 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe5000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy10: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi index b8b541ff5fb0..034bc4b71f7a 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe7000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy11: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi index 4b2cfddd1b15..93ca23d82b39 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe9000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy12: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi index 0a52ddf7cc17..23b3117a2fd2 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xeb000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy13: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c index 1fad5d4c658d..c2b23b69d7b1 100644 --- a/arch/powerpc/crypto/aes-spe-glue.c +++ b/arch/powerpc/crypto/aes-spe-glue.c @@ -94,13 +94,6 @@ static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key, { struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm); - if (key_len != AES_KEYSIZE_128 && - key_len != AES_KEYSIZE_192 && - key_len != AES_KEYSIZE_256) { - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - switch (key_len) { case AES_KEYSIZE_128: ctx->rounds = 4; @@ -114,6 +107,8 @@ static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key, ctx->rounds = 6; ppc_expand_key_256(ctx->key_enc, in_key); break; + default: + return -EINVAL; } ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len); @@ -139,13 +134,6 @@ static int ppc_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, key_len >>= 1; - if (key_len != AES_KEYSIZE_128 && - key_len != AES_KEYSIZE_192 && - key_len != AES_KEYSIZE_256) { - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - switch (key_len) { case AES_KEYSIZE_128: ctx->rounds = 4; @@ -162,6 +150,8 @@ static int ppc_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, ppc_expand_key_256(ctx->key_enc, in_key); ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256); break; + default: + return -EINVAL; } ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len); diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c index 2c232898b933..63760b7dbb76 100644 --- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c +++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c @@ -73,10 +73,8 @@ static int crc32c_vpmsum_setkey(struct crypto_shash *hash, const u8 *key, { u32 *mctx = crypto_shash_ctx(hash); - if (keylen != sizeof(u32)) { - crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + if (keylen != sizeof(u32)) return -EINVAL; - } *mctx = le32_to_cpup((__le32 *)key); return 0; } diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h index a09595f00cab..9a53e29680f4 100644 --- a/arch/powerpc/include/asm/archrandom.h +++ b/arch/powerpc/include/asm/archrandom.h @@ -6,27 +6,28 @@ #include <asm/machdep.h> -static inline int arch_get_random_long(unsigned long *v) +static inline bool __must_check arch_get_random_long(unsigned long *v) { - return 0; + return false; } -static inline int arch_get_random_int(unsigned int *v) +static inline bool __must_check arch_get_random_int(unsigned int *v) { - return 0; + return false; } -static inline int arch_get_random_seed_long(unsigned long *v) +static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { if (ppc_md.get_random_seed) return ppc_md.get_random_seed(v); - return 0; + return false; } -static inline int arch_get_random_seed_int(unsigned int *v) + +static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { unsigned long val; - int rc; + bool rc; rc = arch_get_random_seed_long(&val); if (rc) @@ -34,16 +35,6 @@ static inline int arch_get_random_seed_int(unsigned int *v) return rc; } - -static inline int arch_has_random(void) -{ - return 0; -} - -static inline int arch_has_random_seed(void) -{ - return !!ppc_md.get_random_seed; -} #endif /* CONFIG_ARCH_RANDOM */ #ifdef CONFIG_PPC_POWERNV diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index fbe8df433019..123adcefd40f 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -18,8 +18,6 @@ * mb() prevents loads and stores being reordered across this point. * rmb() prevents loads being reordered across this point. * wmb() prevents stores being reordered across this point. - * read_barrier_depends() prevents data-dependent loads being reordered - * across this point (nop on PPC). * * *mb() variants without smp_ prefix must order all types of memory * operations with one another. sync is the only instruction sufficient diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index 998317702630..dc5c039eb28e 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -49,7 +49,6 @@ static inline void pgtable_free(void *table, unsigned index_size) #define get_hugepd_cache_index(x) (x) -#ifdef CONFIG_SMP static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) { @@ -66,13 +65,6 @@ static inline void __tlb_remove_table(void *_table) pgtable_free(table, shift); } -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, - void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 15b75005bc34..3fa1b962dc27 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -600,8 +600,11 @@ extern void slb_set_size(u16 size); * */ #define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2) + +// The + 2 accounts for INVALID_REGION and 1 more to avoid overlap with kernel #define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \ - MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT) + MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT + 2) + /* * For platforms that support on 65bit VA we limit the context bits */ diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index f6968c811026..a41e91bd0580 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -19,9 +19,7 @@ extern struct vmemmap_backing *vmemmap_list; extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long); extern void pmd_fragment_free(unsigned long *); extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); -#ifdef CONFIG_SMP extern void __tlb_remove_table(void *_table); -#endif void pte_frag_destroy(void *pte_frag); static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index b01624e5c467..201a69e6a355 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1355,18 +1355,21 @@ static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_va * Like pmd_huge() and pmd_large(), but works regardless of config options */ #define pmd_is_leaf pmd_is_leaf +#define pmd_leaf pmd_is_leaf static inline bool pmd_is_leaf(pmd_t pmd) { return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)); } #define pud_is_leaf pud_is_leaf +#define pud_leaf pud_is_leaf static inline bool pud_is_leaf(pud_t pud) { return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); } #define pgd_is_leaf pgd_is_leaf +#define pgd_leaf pgd_is_leaf static inline bool pgd_is_leaf(pgd_t pgd) { return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PTE)); diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index 74d0db511099..3e3cdfaa76c6 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -96,23 +96,6 @@ typedef u32 compat_sigset_word; #define COMPAT_OFF_T_MAX 0x7fffffff -/* - * A pointer passed in from user mode. This should not - * be used for syscall parameters, just declare them - * as pointers because the syscall entry code will have - * appropriately converted them already. - */ - -static inline void __user *compat_ptr(compat_uptr_t uptr) -{ - return (void __user *)(unsigned long)uptr; -} - -static inline compat_uptr_t ptr_to_compat(void __user *uptr) -{ - return (u32)(unsigned long)uptr; -} - static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = current->thread.regs; diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 13bd870609c3..e90c073e437e 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -350,6 +350,7 @@ #define H_SVM_PAGE_OUT 0xEF04 #define H_SVM_INIT_START 0xEF08 #define H_SVM_INIT_DONE 0xEF0C +#define H_SVM_INIT_ABORT 0xEF14 /* Values for 2nd argument to H_SET_MODE */ #define H_SET_MODE_RESOURCE_SET_CIABR 1 diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index a63ec938636d..635969b5b58e 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -691,8 +691,6 @@ static inline void iosync(void) * * ioremap_prot allows to specify the page flags as an argument and can * also be hooked by the platform via ppc_md. * - * * ioremap_nocache is identical to ioremap - * * * ioremap_wc enables write combining * * * ioremap_wt enables write through @@ -715,7 +713,6 @@ extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size, extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size); void __iomem *ioremap_wt(phys_addr_t address, unsigned long size); void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size); -#define ioremap_nocache(addr, size) ioremap((addr), (size)) #define ioremap_uc(addr, size) ioremap((addr), (size)) #define ioremap_cache(addr, size) \ ioremap_prot((addr), (size), pgprot_val(PAGE_KERNEL)) diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h index 50204e228f16..5a9834e0e2d1 100644 --- a/arch/powerpc/include/asm/kvm_book3s_uvmem.h +++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h @@ -19,8 +19,9 @@ unsigned long kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long kvmppc_h_svm_init_start(struct kvm *kvm); unsigned long kvmppc_h_svm_init_done(struct kvm *kvm); int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn); +unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm); void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, - struct kvm *kvm); + struct kvm *kvm, bool skip_page_out); #else static inline int kvmppc_uvmem_init(void) { @@ -62,6 +63,11 @@ static inline unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) return H_UNSUPPORTED; } +static inline unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm) +{ + return H_UNSUPPORTED; +} + static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn) { return -EFAULT; @@ -69,6 +75,6 @@ static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn) static inline void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, - struct kvm *kvm) { } + struct kvm *kvm, bool skip_page_out) { } #endif /* CONFIG_PPC_UV */ #endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 0a398f2321c2..6e8b8ffd06ad 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -278,6 +278,7 @@ struct kvm_resize_hpt; /* Flag values for kvm_arch.secure_guest */ #define KVMPPC_SECURE_INIT_START 0x1 /* H_SVM_INIT_START has been called */ #define KVMPPC_SECURE_INIT_DONE 0x2 /* H_SVM_INIT_DONE completed */ +#define KVMPPC_SECURE_INIT_ABORT 0x4 /* H_SVM_INIT_ABORT issued */ struct kvm_arch { unsigned int lpid; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 3d2f871241a8..bc2494e5710a 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -119,8 +119,7 @@ extern int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid, enum xlate_readwrite xlrw, struct kvmppc_pte *pte); -extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, - unsigned int id); +extern int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu); extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu); extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu); extern int kvmppc_core_check_processor_compat(void); @@ -274,7 +273,7 @@ struct kvmppc_ops { void (*inject_interrupt)(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags); void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr); int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); - struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned int id); + int (*vcpu_create)(struct kvm_vcpu *vcpu); void (*vcpu_free)(struct kvm_vcpu *vcpu); int (*check_requests)(struct kvm_vcpu *vcpu); int (*get_dirty_log)(struct kvm *kvm, struct kvm_dirty_log *log); diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 58efca934311..360367c579de 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -238,11 +238,6 @@ static inline void arch_unmap(struct mm_struct *mm, mm->context.vdso_base = 0; } -static inline void arch_bprm_mm_init(struct mm_struct *mm, - struct vm_area_struct *vma) -{ -} - #ifdef CONFIG_PPC_MEM_KEYS bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, bool execute, bool foreign); diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h index 332b13b4ecdb..29c43665a753 100644 --- a/arch/powerpc/include/asm/nohash/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/pgalloc.h @@ -46,7 +46,6 @@ static inline void pgtable_free(void *table, int shift) #define get_hugepd_cache_index(x) (x) -#ifdef CONFIG_SMP static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) { unsigned long pgf = (unsigned long)table; @@ -64,13 +63,6 @@ static inline void __tlb_remove_table(void *_table) pgtable_free(table, shift); } -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif - static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index e9a960e28f3c..860228e917dc 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -15,6 +15,7 @@ * * (the type definitions are in asm/spinlock_types.h) */ +#include <linux/jump_label.h> #include <linux/irqflags.h> #ifdef CONFIG_PPC64 #include <asm/paca.h> @@ -36,10 +37,12 @@ #endif #ifdef CONFIG_PPC_PSERIES +DECLARE_STATIC_KEY_FALSE(shared_processor); + #define vcpu_is_preempted vcpu_is_preempted static inline bool vcpu_is_preempted(int cpu) { - if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + if (!static_branch_unlikely(&shared_processor)) return false; return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1); } @@ -110,13 +113,8 @@ static inline void splpar_rw_yield(arch_rwlock_t *lock) {}; static inline bool is_shared_processor(void) { -/* - * LPPACA is only available on Pseries so guard anything LPPACA related to - * allow other platforms (which include this common header) to compile. - */ -#ifdef CONFIG_PPC_PSERIES - return (IS_ENABLED(CONFIG_PPC_SPLPAR) && - lppaca_shared_proc(local_paca->lppaca_ptr)); +#ifdef CONFIG_PPC_SPLPAR + return static_branch_unlikely(&shared_processor); #else return false; #endif diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index b2c0be93929d..7f3a8b902325 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -26,6 +26,17 @@ #define tlb_flush tlb_flush extern void tlb_flush(struct mmu_gather *tlb); +/* + * book3s: + * Hash does not use the linux page-tables, so we can avoid + * the TLB invalidate for page-table freeing, Radix otoh does use the + * page-tables and needs the TLBI. + * + * nohash: + * We still do TLB invalidate in the __pte_free_tlb routine before we + * add the page table pages to mmu gather table batch. + */ +#define tlb_needs_table_invalidate() radix_enabled() /* Get the generic bits... */ #include <asm-generic/tlb.h> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 15002b51ff18..c92fe7fe9692 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -401,7 +401,7 @@ copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n) return n; } -extern unsigned long __clear_user(void __user *addr, unsigned long size); +unsigned long __arch_clear_user(void __user *addr, unsigned long size); static inline unsigned long clear_user(void __user *addr, unsigned long size) { @@ -409,12 +409,17 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size) might_fault(); if (likely(access_ok(addr, size))) { allow_write_to_user(addr, size); - ret = __clear_user(addr, size); + ret = __arch_clear_user(addr, size); prevent_write_to_user(addr, size); } return ret; } +static inline unsigned long __clear_user(void __user *addr, unsigned long size) +{ + return clear_user(addr, size); +} + extern long strncpy_from_user(char *dst, const char __user *src, long count); extern __must_check long strnlen_user(const char __user *str, long n); diff --git a/arch/powerpc/include/asm/vmalloc.h b/arch/powerpc/include/asm/vmalloc.h new file mode 100644 index 000000000000..b992dfaaa161 --- /dev/null +++ b/arch/powerpc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_POWERPC_VMALLOC_H +#define _ASM_POWERPC_VMALLOC_H + +#endif /* _ASM_POWERPC_VMALLOC_H */ diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h index f2dfcd50a2d3..33aee7490cbb 100644 --- a/arch/powerpc/include/asm/xive-regs.h +++ b/arch/powerpc/include/asm/xive-regs.h @@ -39,6 +39,7 @@ #define XIVE_ESB_VAL_P 0x2 #define XIVE_ESB_VAL_Q 0x1 +#define XIVE_ESB_INVALID 0xFF /* * Thread Management (aka "TM") registers diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index d60908ea37fb..e1a4c39b83b8 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -897,7 +897,7 @@ resume_kernel: bne- 0b 1: -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION /* check current_thread_info->preempt_count */ lwz r0,TI_PREEMPT(r2) cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ @@ -921,7 +921,7 @@ resume_kernel: */ bl trace_hardirqs_on #endif -#endif /* CONFIG_PREEMPT */ +#endif /* CONFIG_PREEMPTION */ restore_kuap: kuap_restore r1, r2, r9, r10, r0 diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 3fd3ef352e3f..a9a1d3cdb523 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -846,7 +846,7 @@ resume_kernel: bne- 0b 1: -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION /* Check if we need to preempt */ andi. r0,r4,_TIF_NEED_RESCHED beq+ restore @@ -877,7 +877,7 @@ resume_kernel: li r10,MSR_RI mtmsrd r10,1 /* Update machine state */ #endif /* CONFIG_PPC_BOOK3E */ -#endif /* CONFIG_PREEMPT */ +#endif /* CONFIG_PREEMPTION */ .globl fast_exc_return_irq fast_exc_return_irq: diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 5645bc9cbc09..add67498c126 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -619,8 +619,6 @@ void __do_irq(struct pt_regs *regs) trace_irq_entry(regs); - check_stack_overflow(); - /* * Query the platform PIC for the interrupt & ack it. * @@ -652,6 +650,8 @@ void do_IRQ(struct pt_regs *regs) irqsp = hardirq_ctx[raw_smp_processor_id()]; sirqsp = softirq_ctx[raw_smp_processor_id()]; + check_stack_overflow(); + /* Already there ? */ if (unlikely(cursp == irqsp || cursp == sirqsp)) { __do_irq(regs); diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 7cea5978f21f..f061e06e9f51 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -479,8 +479,10 @@ static void __init fixup_port_irq(int index, port->irq = virq; #ifdef CONFIG_SERIAL_8250_FSL - if (of_device_is_compatible(np, "fsl,ns16550")) + if (of_device_is_compatible(np, "fsl,ns16550")) { port->handle_irq = fsl8250_handle_irq; + port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE); + } #endif } diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index be3758d54e59..877817471e3c 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -39,10 +39,10 @@ static int page_map_mmap( struct file *file, struct vm_area_struct *vma ) return 0; } -static const struct file_operations page_map_fops = { - .llseek = page_map_seek, - .read = page_map_read, - .mmap = page_map_mmap +static const struct proc_ops page_map_proc_ops = { + .proc_lseek = page_map_seek, + .proc_read = page_map_read, + .proc_mmap = page_map_mmap, }; @@ -51,7 +51,7 @@ static int __init proc_ppc64_init(void) struct proc_dir_entry *pde; pde = proc_create_data("powerpc/systemcfg", S_IFREG | 0444, NULL, - &page_map_fops, vdso_data); + &page_map_proc_ops, vdso_data); if (!pde) return 1; proc_set_size(pde, PAGE_SIZE); diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 487dcd8da4de..2d33f342a293 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -159,12 +159,12 @@ static int poweron_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_poweron_show, NULL); } -static const struct file_operations ppc_rtas_poweron_operations = { - .open = poweron_open, - .read = seq_read, - .llseek = seq_lseek, - .write = ppc_rtas_poweron_write, - .release = single_release, +static const struct proc_ops ppc_rtas_poweron_proc_ops = { + .proc_open = poweron_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = ppc_rtas_poweron_write, + .proc_release = single_release, }; static int progress_open(struct inode *inode, struct file *file) @@ -172,12 +172,12 @@ static int progress_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_progress_show, NULL); } -static const struct file_operations ppc_rtas_progress_operations = { - .open = progress_open, - .read = seq_read, - .llseek = seq_lseek, - .write = ppc_rtas_progress_write, - .release = single_release, +static const struct proc_ops ppc_rtas_progress_proc_ops = { + .proc_open = progress_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = ppc_rtas_progress_write, + .proc_release = single_release, }; static int clock_open(struct inode *inode, struct file *file) @@ -185,12 +185,12 @@ static int clock_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_clock_show, NULL); } -static const struct file_operations ppc_rtas_clock_operations = { - .open = clock_open, - .read = seq_read, - .llseek = seq_lseek, - .write = ppc_rtas_clock_write, - .release = single_release, +static const struct proc_ops ppc_rtas_clock_proc_ops = { + .proc_open = clock_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = ppc_rtas_clock_write, + .proc_release = single_release, }; static int tone_freq_open(struct inode *inode, struct file *file) @@ -198,12 +198,12 @@ static int tone_freq_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_tone_freq_show, NULL); } -static const struct file_operations ppc_rtas_tone_freq_operations = { - .open = tone_freq_open, - .read = seq_read, - .llseek = seq_lseek, - .write = ppc_rtas_tone_freq_write, - .release = single_release, +static const struct proc_ops ppc_rtas_tone_freq_proc_ops = { + .proc_open = tone_freq_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = ppc_rtas_tone_freq_write, + .proc_release = single_release, }; static int tone_volume_open(struct inode *inode, struct file *file) @@ -211,12 +211,12 @@ static int tone_volume_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_tone_volume_show, NULL); } -static const struct file_operations ppc_rtas_tone_volume_operations = { - .open = tone_volume_open, - .read = seq_read, - .llseek = seq_lseek, - .write = ppc_rtas_tone_volume_write, - .release = single_release, +static const struct proc_ops ppc_rtas_tone_volume_proc_ops = { + .proc_open = tone_volume_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = ppc_rtas_tone_volume_write, + .proc_release = single_release, }; static int ppc_rtas_find_all_sensors(void); @@ -238,17 +238,17 @@ static int __init proc_rtas_init(void) return -ENODEV; proc_create("powerpc/rtas/progress", 0644, NULL, - &ppc_rtas_progress_operations); + &ppc_rtas_progress_proc_ops); proc_create("powerpc/rtas/clock", 0644, NULL, - &ppc_rtas_clock_operations); + &ppc_rtas_clock_proc_ops); proc_create("powerpc/rtas/poweron", 0644, NULL, - &ppc_rtas_poweron_operations); + &ppc_rtas_poweron_proc_ops); proc_create_single("powerpc/rtas/sensors", 0444, NULL, ppc_rtas_sensors_show); proc_create("powerpc/rtas/frequency", 0644, NULL, - &ppc_rtas_tone_freq_operations); + &ppc_rtas_tone_freq_proc_ops); proc_create("powerpc/rtas/volume", 0644, NULL, - &ppc_rtas_tone_volume_operations); + &ppc_rtas_tone_volume_proc_ops); proc_create_single("powerpc/rtas/rmo_buffer", 0400, NULL, ppc_rtas_rmo_buf_show); return 0; diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 84f794782c62..a99179d83538 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -655,7 +655,7 @@ struct rtas_flash_file { const char *filename; const char *rtas_call_name; int *status; - const struct file_operations fops; + const struct proc_ops ops; }; static const struct rtas_flash_file rtas_flash_files[] = { @@ -663,36 +663,36 @@ static const struct rtas_flash_file rtas_flash_files[] = { .filename = "powerpc/rtas/" FIRMWARE_FLASH_NAME, .rtas_call_name = "ibm,update-flash-64-and-reboot", .status = &rtas_update_flash_data.status, - .fops.read = rtas_flash_read_msg, - .fops.write = rtas_flash_write, - .fops.release = rtas_flash_release, - .fops.llseek = default_llseek, + .ops.proc_read = rtas_flash_read_msg, + .ops.proc_write = rtas_flash_write, + .ops.proc_release = rtas_flash_release, + .ops.proc_lseek = default_llseek, }, { .filename = "powerpc/rtas/" FIRMWARE_UPDATE_NAME, .rtas_call_name = "ibm,update-flash-64-and-reboot", .status = &rtas_update_flash_data.status, - .fops.read = rtas_flash_read_num, - .fops.write = rtas_flash_write, - .fops.release = rtas_flash_release, - .fops.llseek = default_llseek, + .ops.proc_read = rtas_flash_read_num, + .ops.proc_write = rtas_flash_write, + .ops.proc_release = rtas_flash_release, + .ops.proc_lseek = default_llseek, }, { .filename = "powerpc/rtas/" VALIDATE_FLASH_NAME, .rtas_call_name = "ibm,validate-flash-image", .status = &rtas_validate_flash_data.status, - .fops.read = validate_flash_read, - .fops.write = validate_flash_write, - .fops.release = validate_flash_release, - .fops.llseek = default_llseek, + .ops.proc_read = validate_flash_read, + .ops.proc_write = validate_flash_write, + .ops.proc_release = validate_flash_release, + .ops.proc_lseek = default_llseek, }, { .filename = "powerpc/rtas/" MANAGE_FLASH_NAME, .rtas_call_name = "ibm,manage-flash-image", .status = &rtas_manage_flash_data.status, - .fops.read = manage_flash_read, - .fops.write = manage_flash_write, - .fops.llseek = default_llseek, + .ops.proc_read = manage_flash_read, + .ops.proc_write = manage_flash_write, + .ops.proc_lseek = default_llseek, } }; @@ -723,7 +723,7 @@ static int __init rtas_flash_init(void) const struct rtas_flash_file *f = &rtas_flash_files[i]; int token; - if (!proc_create(f->filename, 0600, NULL, &f->fops)) + if (!proc_create(f->filename, 0600, NULL, &f->ops)) goto enomem; /* diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 8d02e047f96a..89b798f8f656 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -385,12 +385,12 @@ static __poll_t rtas_log_poll(struct file *file, poll_table * wait) return 0; } -static const struct file_operations proc_rtas_log_operations = { - .read = rtas_log_read, - .poll = rtas_log_poll, - .open = rtas_log_open, - .release = rtas_log_release, - .llseek = noop_llseek, +static const struct proc_ops rtas_log_proc_ops = { + .proc_read = rtas_log_read, + .proc_poll = rtas_log_poll, + .proc_open = rtas_log_open, + .proc_release = rtas_log_release, + .proc_lseek = noop_llseek, }; static int enable_surveillance(int timeout) @@ -572,7 +572,7 @@ static int __init rtas_init(void) return -ENODEV; entry = proc_create("powerpc/rtas/error_log", 0400, NULL, - &proc_rtas_log_operations); + &rtas_log_proc_ops); if (!entry) printk(KERN_ERR "Failed to create error_log proc entry\n"); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 488f1eecc0de..7f8c890360fe 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -949,9 +949,6 @@ void __init setup_arch(char **cmdline_p) early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); - if (IS_ENABLED(CONFIG_DUMMY_CONSOLE)) - conswitchp = &dummy_con; - if (ppc_md.setup_arch) ppc_md.setup_arch(); diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 43f736ed47f2..35b61bfc1b1a 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -517,3 +517,5 @@ 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open 435 nospu clone3 ppc_clone3 +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 58a59ee998e2..d07a8e12fa15 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -471,11 +471,6 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(kvmppc_load_last_inst); -int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) -{ - return 0; -} - int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) { return 0; @@ -789,9 +784,9 @@ void kvmppc_decrementer_func(struct kvm_vcpu *vcpu) kvm_vcpu_kick(vcpu); } -struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu) { - return kvm->arch.kvm_ops->vcpu_create(kvm, id); + return vcpu->kvm->arch.kvm_ops->vcpu_create(vcpu); } void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index d381526c5c9b..6c372f5c61b6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -284,7 +284,7 @@ static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, /* Protect linux PTE lookup from page table destruction */ rcu_read_lock_sched(); /* this disables preemption too */ ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel, - current->mm->pgd, false, pte_idx_ret); + kvm->mm->pgd, false, pte_idx_ret); rcu_read_unlock_sched(); if (ret == H_TOO_HARD) { /* this can't happen */ @@ -573,7 +573,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, is_ci = false; pfn = 0; page = NULL; - mm = current->mm; + mm = kvm->mm; pte_size = PAGE_SIZE; writing = (dsisr & DSISR_ISSTORE) != 0; /* If writing != 0, then the HPTE must allow writing, if we get here */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index da857c8ba6e4..744dba98e5d1 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -1102,7 +1102,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm, unsigned int shift; if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START) - kvmppc_uvmem_drop_pages(memslot, kvm); + kvmppc_uvmem_drop_pages(memslot, kvm, true); if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) return; diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 883a66e76638..ee6c103bb7d5 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -253,10 +253,11 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) } } + account_locked_vm(kvm->mm, + kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false); + kvm_put_kvm(stt->kvm); - account_locked_vm(current->mm, - kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false); call_rcu(&stt->rcu, release_spapr_tce_table); return 0; @@ -272,6 +273,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, { struct kvmppc_spapr_tce_table *stt = NULL; struct kvmppc_spapr_tce_table *siter; + struct mm_struct *mm = kvm->mm; unsigned long npages, size = args->size; int ret = -ENOMEM; @@ -280,7 +282,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, return -EINVAL; npages = kvmppc_tce_pages(size); - ret = account_locked_vm(current->mm, kvmppc_stt_pages(npages), true); + ret = account_locked_vm(mm, kvmppc_stt_pages(npages), true); if (ret) return ret; @@ -326,7 +328,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, kfree(stt); fail_acct: - account_locked_vm(current->mm, kvmppc_stt_pages(npages), false); + account_locked_vm(mm, kvmppc_stt_pages(npages), false); return ret; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index dc53578193ee..2cefd071b848 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1091,6 +1091,9 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) case H_SVM_INIT_DONE: ret = kvmppc_h_svm_init_done(vcpu->kvm); break; + case H_SVM_INIT_ABORT: + ret = kvmppc_h_svm_init_abort(vcpu->kvm); + break; default: return RESUME_HOST; @@ -2271,22 +2274,16 @@ static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id) } #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ -static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, - unsigned int id) +static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu; int err; int core; struct kvmppc_vcore *vcore; + struct kvm *kvm; + unsigned int id; - err = -ENOMEM; - vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu) - goto out; - - err = kvm_vcpu_init(vcpu, kvm, id); - if (err) - goto free_vcpu; + kvm = vcpu->kvm; + id = vcpu->vcpu_id; vcpu->arch.shared = &vcpu->arch.shregs; #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE @@ -2368,7 +2365,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, mutex_unlock(&kvm->lock); if (!vcore) - goto free_vcpu; + return err; spin_lock(&vcore->lock); ++vcore->num_threads; @@ -2383,12 +2380,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, debugfs_vcpu_init(vcpu, id); - return vcpu; - -free_vcpu: - kmem_cache_free(kvm_vcpu_cache, vcpu); -out: - return ERR_PTR(err); + return 0; } static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode, @@ -2442,8 +2434,6 @@ static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu) unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow); unpin_vpa(vcpu->kvm, &vcpu->arch.vpa); spin_unlock(&vcpu->arch.vpa_update_lock); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(kvm_vcpu_cache, vcpu); } static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu) @@ -4285,7 +4275,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) user_vrsave = mfspr(SPRN_VRSAVE); vcpu->arch.wqp = &vcpu->arch.vcore->wq; - vcpu->arch.pgdir = current->mm->pgd; + vcpu->arch.pgdir = kvm->mm->pgd; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; do { @@ -4640,14 +4630,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) /* Look up the VMA for the start of this memory slot */ hva = memslot->userspace_addr; - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, hva); + down_read(&kvm->mm->mmap_sem); + vma = find_vma(kvm->mm, hva); if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO)) goto up_out; psize = vma_kernel_pagesize(vma); - up_read(¤t->mm->mmap_sem); + up_read(&kvm->mm->mmap_sem); /* We can handle 4k, 64k or 16M pages in the VRMA */ if (psize >= 0x1000000) @@ -4680,7 +4670,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) return err; up_out: - up_read(¤t->mm->mmap_sem); + up_read(&kvm->mm->mmap_sem); goto out_srcu; } @@ -4983,7 +4973,8 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) if (nesting_enabled(kvm)) kvmhv_release_all_nested(kvm); kvm->arch.process_table = 0; - uv_svm_terminate(kvm->arch.lpid); + if (kvm->arch.secure_guest) + uv_svm_terminate(kvm->arch.lpid); kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0); } @@ -5476,7 +5467,7 @@ static int kvmhv_svm_off(struct kvm *kvm) continue; kvm_for_each_memslot(memslot, slots) { - kvmppc_uvmem_drop_pages(memslot, kvm); + kvmppc_uvmem_drop_pages(memslot, kvm, true); uv_unregister_mem_slot(kvm->arch.lpid, memslot->id); } } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 0496e66aaa56..c6fbbd29bd87 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1117,7 +1117,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) ld r7, VCPU_GPR(R7)(r4) bne ret_to_ultra - lwz r0, VCPU_CR(r4) + ld r0, VCPU_CR(r4) mtcr r0 ld r0, VCPU_GPR(R0)(r4) @@ -1137,7 +1137,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) * R3 = UV_RETURN */ ret_to_ultra: - lwz r0, VCPU_CR(r4) + ld r0, VCPU_CR(r4) mtcr r0 ld r0, VCPU_GPR(R3)(r4) diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 2de264fc3156..79b1202b1c62 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -258,7 +258,7 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) * QEMU page table with normal PTEs from newly allocated pages. */ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, - struct kvm *kvm) + struct kvm *kvm, bool skip_page_out) { int i; struct kvmppc_uvmem_page_pvt *pvt; @@ -276,7 +276,7 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, uvmem_page = pfn_to_page(uvmem_pfn); pvt = uvmem_page->zone_device_data; - pvt->skip_page_out = true; + pvt->skip_page_out = skip_page_out; mutex_unlock(&kvm->arch.uvmem_lock); pfn = gfn_to_pfn(kvm, gfn); @@ -286,6 +286,34 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, } } +unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm) +{ + int srcu_idx; + struct kvm_memory_slot *memslot; + + /* + * Expect to be called only after INIT_START and before INIT_DONE. + * If INIT_DONE was completed, use normal VM termination sequence. + */ + if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) + return H_UNSUPPORTED; + + if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) + return H_STATE; + + srcu_idx = srcu_read_lock(&kvm->srcu); + + kvm_for_each_memslot(memslot, kvm_memslots(kvm)) + kvmppc_uvmem_drop_pages(memslot, kvm, false); + + srcu_read_unlock(&kvm->srcu, srcu_idx); + + kvm->arch.secure_guest = 0; + uv_svm_terminate(kvm->arch.lpid); + + return H_PARAMETER; +} + /* * Get a free device PFN from the pool * @@ -543,7 +571,7 @@ kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, ret = migrate_vma_setup(&mig); if (ret) - return ret; + goto out; spage = migrate_pfn_to_page(*mig.src); if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE)) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index ce4fcf76e53e..729a0f12a752 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1744,21 +1744,17 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, return r; } -static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, - unsigned int id) +static int kvmppc_core_vcpu_create_pr(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu_book3s; - struct kvm_vcpu *vcpu; - int err = -ENOMEM; unsigned long p; + int err; - vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu) - goto out; + err = -ENOMEM; vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s)); if (!vcpu_book3s) - goto free_vcpu; + goto out; vcpu->arch.book3s = vcpu_book3s; #ifdef CONFIG_KVM_BOOK3S_32_HANDLER @@ -1768,14 +1764,9 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, goto free_vcpu3s; #endif - err = kvm_vcpu_init(vcpu, kvm, id); - if (err) - goto free_shadow_vcpu; - - err = -ENOMEM; p = __get_free_page(GFP_KERNEL|__GFP_ZERO); if (!p) - goto uninit_vcpu; + goto free_shadow_vcpu; vcpu->arch.shared = (void *)p; #ifdef CONFIG_PPC_BOOK3S_64 /* Always start the shared struct in native endian mode */ @@ -1806,22 +1797,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, err = kvmppc_mmu_init(vcpu); if (err < 0) - goto uninit_vcpu; + goto free_shared_page; - return vcpu; + return 0; -uninit_vcpu: - kvm_vcpu_uninit(vcpu); +free_shared_page: + free_page((unsigned long)vcpu->arch.shared); free_shadow_vcpu: #ifdef CONFIG_KVM_BOOK3S_32_HANDLER kfree(vcpu->arch.shadow_vcpu); free_vcpu3s: #endif vfree(vcpu_book3s); -free_vcpu: - kmem_cache_free(kvm_vcpu_cache, vcpu); out: - return ERR_PTR(err); + return err; } static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu) @@ -1829,12 +1818,10 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu) struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); free_page((unsigned long)vcpu->arch.shared & PAGE_MASK); - kvm_vcpu_uninit(vcpu); #ifdef CONFIG_KVM_BOOK3S_32_HANDLER kfree(vcpu->arch.shadow_vcpu); #endif vfree(vcpu_book3s); - kmem_cache_free(kvm_vcpu_cache, vcpu); } static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) @@ -2030,6 +2017,7 @@ static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, { /* We should not get called */ BUG(); + return 0; } #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index d83adb1e1490..6ef0151ff70a 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -631,7 +631,7 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, srcu_idx = srcu_read_lock(&kvm->srcu); gfn = gpa_to_gfn(kvm_eq.qaddr); - page_size = kvm_host_page_size(kvm, gfn); + page_size = kvm_host_page_size(vcpu, gfn); if (1ull << kvm_eq.qshift > page_size) { srcu_read_unlock(&kvm->srcu, srcu_idx); pr_warn("Incompatible host page size %lx!\n", page_size); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index be9a45874194..7b27604adadf 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -775,7 +775,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) debug = current->thread.debug; current->thread.debug = vcpu->arch.dbg_reg; - vcpu->arch.pgdir = current->mm->pgd; + vcpu->arch.pgdir = vcpu->kvm->mm->pgd; kvmppc_fix_ee_before_entry(); ret = __kvmppc_vcpu_run(kvm_run, vcpu); @@ -1377,36 +1377,6 @@ static void kvmppc_set_tsr(struct kvm_vcpu *vcpu, u32 new_tsr) update_timer_ints(vcpu); } -/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ -int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) -{ - int i; - int r; - - vcpu->arch.regs.nip = 0; - vcpu->arch.shared->pir = vcpu->vcpu_id; - kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ - kvmppc_set_msr(vcpu, 0); - -#ifndef CONFIG_KVM_BOOKE_HV - vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS; - vcpu->arch.shadow_pid = 1; - vcpu->arch.shared->msr = 0; -#endif - - /* Eye-catching numbers so we know if the guest takes an interrupt - * before it's programmed its own IVPR/IVORs. */ - vcpu->arch.ivpr = 0x55550000; - for (i = 0; i < BOOKE_IRQPRIO_MAX; i++) - vcpu->arch.ivor[i] = 0x7700 | i * 4; - - kvmppc_init_timing_stats(vcpu); - - r = kvmppc_core_vcpu_setup(vcpu); - kvmppc_sanity_check(vcpu); - return r; -} - int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) { /* setup watchdog timer once */ @@ -2114,9 +2084,40 @@ int kvmppc_core_init_vm(struct kvm *kvm) return kvm->arch.kvm_ops->init_vm(kvm); } -struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu) { - return kvm->arch.kvm_ops->vcpu_create(kvm, id); + int i; + int r; + + r = vcpu->kvm->arch.kvm_ops->vcpu_create(vcpu); + if (r) + return r; + + /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ + vcpu->arch.regs.nip = 0; + vcpu->arch.shared->pir = vcpu->vcpu_id; + kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ + kvmppc_set_msr(vcpu, 0); + +#ifndef CONFIG_KVM_BOOKE_HV + vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS; + vcpu->arch.shadow_pid = 1; + vcpu->arch.shared->msr = 0; +#endif + + /* Eye-catching numbers so we know if the guest takes an interrupt + * before it's programmed its own IVPR/IVORs. */ + vcpu->arch.ivpr = 0x55550000; + for (i = 0; i < BOOKE_IRQPRIO_MAX; i++) + vcpu->arch.ivor[i] = 0x7700 | i * 4; + + kvmppc_init_timing_stats(vcpu); + + r = kvmppc_core_vcpu_setup(vcpu); + if (r) + vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu); + kvmppc_sanity_check(vcpu); + return r; } void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 00649ca5fa9a..f2b4feaff6d2 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -433,31 +433,16 @@ static int kvmppc_set_one_reg_e500(struct kvm_vcpu *vcpu, u64 id, return r; } -static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm, - unsigned int id) +static int kvmppc_core_vcpu_create_e500(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500; - struct kvm_vcpu *vcpu; int err; - BUILD_BUG_ON_MSG(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0, - "struct kvm_vcpu must be at offset 0 for arch usercopy region"); + BUILD_BUG_ON(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0); + vcpu_e500 = to_e500(vcpu); - vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu_e500) { - err = -ENOMEM; - goto out; - } - - vcpu = &vcpu_e500->vcpu; - err = kvm_vcpu_init(vcpu, kvm, id); - if (err) - goto free_vcpu; - - if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) { - err = -ENOMEM; - goto uninit_vcpu; - } + if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) + return -ENOMEM; err = kvmppc_e500_tlb_init(vcpu_e500); if (err) @@ -469,18 +454,13 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm, goto uninit_tlb; } - return vcpu; + return 0; uninit_tlb: kvmppc_e500_tlb_uninit(vcpu_e500); uninit_id: kvmppc_e500_id_table_free(vcpu_e500); -uninit_vcpu: - kvm_vcpu_uninit(vcpu); -free_vcpu: - kmem_cache_free(kvm_vcpu_cache, vcpu_e500); -out: - return ERR_PTR(err); + return err; } static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu) @@ -490,8 +470,6 @@ static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu) free_page((unsigned long)vcpu->arch.shared); kvmppc_e500_tlb_uninit(vcpu_e500); kvmppc_e500_id_table_free(vcpu_e500); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(kvm_vcpu_cache, vcpu_e500); } static int kvmppc_core_init_vm_e500(struct kvm *kvm) diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 318e65c65999..e6b06cb2b92c 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -301,30 +301,20 @@ static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id, return r; } -static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm, - unsigned int id) +static int kvmppc_core_vcpu_create_e500mc(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500; - struct kvm_vcpu *vcpu; int err; - vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu_e500) { - err = -ENOMEM; - goto out; - } - vcpu = &vcpu_e500->vcpu; + BUILD_BUG_ON(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0); + vcpu_e500 = to_e500(vcpu); /* Invalid PIR value -- this LPID dosn't have valid state on any cpu */ vcpu->arch.oldpir = 0xffffffff; - err = kvm_vcpu_init(vcpu, kvm, id); - if (err) - goto free_vcpu; - err = kvmppc_e500_tlb_init(vcpu_e500); if (err) - goto uninit_vcpu; + return err; vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); if (!vcpu->arch.shared) { @@ -332,17 +322,11 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm, goto uninit_tlb; } - return vcpu; + return 0; uninit_tlb: kvmppc_e500_tlb_uninit(vcpu_e500); -uninit_vcpu: - kvm_vcpu_uninit(vcpu); - -free_vcpu: - kmem_cache_free(kvm_vcpu_cache, vcpu_e500); -out: - return ERR_PTR(err); + return err; } static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu) @@ -351,8 +335,6 @@ static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu) free_page((unsigned long)vcpu->arch.shared); kvmppc_e500_tlb_uninit(vcpu_e500); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(kvm_vcpu_cache, vcpu_e500); } static int kvmppc_core_init_vm_e500mc(struct kvm *kvm) diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 2e496eb86e94..1139bc56e004 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -73,7 +73,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; u32 inst; - int ra, rs, rt; enum emulation_result emulated = EMULATE_FAIL; int advance = 1; struct instruction_op op; @@ -85,10 +84,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) if (emulated != EMULATE_DONE) return emulated; - ra = get_ra(inst); - rs = get_rs(inst); - rt = get_rt(inst); - vcpu->arch.mmio_vsx_copy_nums = 0; vcpu->arch.mmio_vsx_offset = 0; vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 416fb3d2a1d0..1af96fb5dc6f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -475,7 +475,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) #endif kvm_for_each_vcpu(i, vcpu, kvm) - kvm_arch_vcpu_free(vcpu); + kvm_vcpu_destroy(vcpu); mutex_lock(&kvm->lock); for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) @@ -720,22 +720,55 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, kvmppc_core_flush_memslot(kvm, slot); } -struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) +int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) +{ + return 0; +} + +static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) { struct kvm_vcpu *vcpu; - vcpu = kvmppc_core_vcpu_create(kvm, id); - if (!IS_ERR(vcpu)) { - vcpu->arch.wqp = &vcpu->wq; - kvmppc_create_vcpu_debugfs(vcpu, id); - } - return vcpu; + + vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer); + kvmppc_decrementer_func(vcpu); + + return HRTIMER_NORESTART; +} + +int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) +{ + int err; + + hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); + vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; + vcpu->arch.dec_expires = get_tb(); + +#ifdef CONFIG_KVM_EXIT_TIMING + mutex_init(&vcpu->arch.exit_timing_lock); +#endif + err = kvmppc_subarch_vcpu_init(vcpu); + if (err) + return err; + + err = kvmppc_core_vcpu_create(vcpu); + if (err) + goto out_vcpu_uninit; + + vcpu->arch.wqp = &vcpu->wq; + kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id); + return 0; + +out_vcpu_uninit: + kvmppc_mmu_destroy(vcpu); + kvmppc_subarch_vcpu_uninit(vcpu); + return err; } void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) { } -void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { /* Make sure we're not using the vcpu anymore */ hrtimer_cancel(&vcpu->arch.dec_timer); @@ -758,11 +791,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) } kvmppc_core_vcpu_free(vcpu); -} -void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) -{ - kvm_arch_vcpu_free(vcpu); + kvmppc_mmu_destroy(vcpu); + kvmppc_subarch_vcpu_uninit(vcpu); } int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) @@ -770,37 +801,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return kvmppc_core_pending_dec(vcpu); } -static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) -{ - struct kvm_vcpu *vcpu; - - vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer); - kvmppc_decrementer_func(vcpu); - - return HRTIMER_NORESTART; -} - -int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) -{ - int ret; - - hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); - vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; - vcpu->arch.dec_expires = get_tb(); - -#ifdef CONFIG_KVM_EXIT_TIMING - mutex_init(&vcpu->arch.exit_timing_lock); -#endif - ret = kvmppc_subarch_vcpu_init(vcpu); - return ret; -} - -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) -{ - kvmppc_mmu_destroy(vcpu); - kvmppc_subarch_vcpu_uninit(vcpu); -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { #ifdef CONFIG_BOOKE diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S index f69a6aab7bfb..1ddb26394e8a 100644 --- a/arch/powerpc/lib/string_32.S +++ b/arch/powerpc/lib/string_32.S @@ -17,7 +17,7 @@ CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT CACHELINE_MASK = (L1_CACHE_BYTES-1) -_GLOBAL(__clear_user) +_GLOBAL(__arch_clear_user) /* * Use dcbz on the complete cache lines in the destination * to set them to zero. This requires that the destination @@ -87,4 +87,4 @@ _GLOBAL(__clear_user) EX_TABLE(8b, 91b) EX_TABLE(9b, 91b) -EXPORT_SYMBOL(__clear_user) +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S index 507b18b1660e..169872bc0892 100644 --- a/arch/powerpc/lib/string_64.S +++ b/arch/powerpc/lib/string_64.S @@ -17,7 +17,7 @@ PPC64_CACHES: .section ".text" /** - * __clear_user: - Zero a block of memory in user space, with less checking. + * __arch_clear_user: - Zero a block of memory in user space, with less checking. * @to: Destination address, in user space. * @n: Number of bytes to zero. * @@ -58,7 +58,7 @@ err3; stb r0,0(r3) mr r3,r4 blr -_GLOBAL_TOC(__clear_user) +_GLOBAL_TOC(__arch_clear_user) cmpdi r4,32 neg r6,r3 li r0,0 @@ -181,4 +181,4 @@ err1; dcbz 0,r3 cmpdi r4,32 blt .Lshort_clear b .Lmedium_clear -EXPORT_SYMBOL(__clear_user) +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c index 56cc84520577..eba73ebd8ae5 100644 --- a/arch/powerpc/mm/book3s64/iommu_api.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c @@ -103,7 +103,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, for (entry = 0; entry < entries; entry += chunk) { unsigned long n = min(entries - entry, chunk); - ret = get_user_pages(ua + (entry << PAGE_SHIFT), n, + ret = pin_user_pages(ua + (entry << PAGE_SHIFT), n, FOLL_WRITE | FOLL_LONGTERM, mem->hpages + entry, NULL); if (ret == n) { @@ -167,9 +167,8 @@ good_exit: return 0; free_exit: - /* free the reference taken */ - for (i = 0; i < pinned; i++) - put_page(mem->hpages[i]); + /* free the references taken */ + unpin_user_pages(mem->hpages, pinned); vfree(mem->hpas); kfree(mem); @@ -215,7 +214,8 @@ static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem) if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY) SetPageDirty(page); - put_page(page); + unpin_user_page(page); + mem->hpas[i] = 0; } } diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 75483b40fcb1..2bf7e1b4fd82 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -378,7 +378,6 @@ static inline void pgtable_free(void *table, int index) } } -#ifdef CONFIG_SMP void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index) { unsigned long pgf = (unsigned long)table; @@ -395,12 +394,6 @@ void __tlb_remove_table(void *_table) return pgtable_free(table, index); } -#else -void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index) -{ - return pgtable_free(table, index); -} -#endif #ifdef CONFIG_PROC_FS atomic_long_t direct_pages_count[MMU_PAGE_COUNT]; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 9488b63dfc87..f5535eae637f 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -151,10 +151,9 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap); int ret; - __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); /* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); @@ -289,6 +288,14 @@ void __init mem_init(void) BUILD_BUG_ON(MMU_PAGE_COUNT > 16); #ifdef CONFIG_SWIOTLB + /* + * Some platforms (e.g. 85xx) limit DMA-able memory way below + * 4G. We force memblock to bottom-up mode to ensure that the + * memory allocated in swiotlb_init() is DMA-able. + * As it's the last memblock allocation, no need to reset it + * back to to-down. + */ + memblock_set_bottom_up(true); swiotlb_init(0); #endif diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 090af2d2d3e4..96eb8e43f39b 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -103,7 +103,7 @@ static void mmu_patch_addis(s32 *site, long simm) patch_instruction_site(site, instr); } -void __init mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot) +static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot) { unsigned long s = offset; unsigned long v = PAGE_OFFSET + s; diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 50d68d21ddcc..3c7dec70cda0 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1616,11 +1616,11 @@ static ssize_t topology_write(struct file *file, const char __user *buf, return count; } -static const struct file_operations topology_ops = { - .read = seq_read, - .write = topology_write, - .open = topology_open, - .release = single_release +static const struct proc_ops topology_proc_ops = { + .proc_read = seq_read, + .proc_write = topology_write, + .proc_open = topology_open, + .proc_release = single_release, }; static int topology_update_init(void) @@ -1630,7 +1630,7 @@ static int topology_update_init(void) if (vphn_enabled) topology_schedule_update(); - if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops)) + if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_proc_ops)) return -ENOMEM; topology_inited = 1; diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 42bbcd47cc85..dffe1a45b6ed 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -50,7 +50,7 @@ static void slice_print_mask(const char *label, const struct slice_mask *mask) { #endif -static inline bool slice_addr_is_low(unsigned long addr) +static inline notrace bool slice_addr_is_low(unsigned long addr) { u64 tmp = (u64)addr; @@ -659,7 +659,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, mm_ctx_user_psize(¤t->mm->context), 1); } -unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) +unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr) { unsigned char *psizes; int index, mask_index; diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h index 6e5a2a4faeab..4ec2a9f14f84 100644 --- a/arch/powerpc/net/bpf_jit32.h +++ b/arch/powerpc/net/bpf_jit32.h @@ -97,12 +97,12 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh); #ifdef CONFIG_SMP #ifdef CONFIG_PPC64 #define PPC_BPF_LOAD_CPU(r) \ - do { BUILD_BUG_ON(FIELD_SIZEOF(struct paca_struct, paca_index) != 2); \ + do { BUILD_BUG_ON(sizeof_field(struct paca_struct, paca_index) != 2); \ PPC_LHZ_OFFS(r, 13, offsetof(struct paca_struct, paca_index)); \ } while (0) #else #define PPC_BPF_LOAD_CPU(r) \ - do { BUILD_BUG_ON(FIELD_SIZEOF(struct task_struct, cpu) != 4); \ + do { BUILD_BUG_ON(sizeof_field(struct task_struct, cpu) != 4); \ PPC_LHZ_OFFS(r, 2, offsetof(struct task_struct, cpu)); \ } while(0) #endif diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index d57b46e0dd60..0acc9d5fb19e 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -321,7 +321,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf)); break; case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */ - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); + BUILD_BUG_ON(sizeof_field(struct sk_buff, len) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len)); break; case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */ @@ -333,16 +333,16 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, /*** Ancillary info loads ***/ case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */ - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, + BUILD_BUG_ON(sizeof_field(struct sk_buff, protocol) != 2); PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff, protocol)); break; case BPF_ANC | SKF_AD_IFINDEX: case BPF_ANC | SKF_AD_HATYPE: - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, + BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4); - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, + BUILD_BUG_ON(sizeof_field(struct net_device, type) != 2); PPC_LL_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, dev)); @@ -365,17 +365,17 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, break; case BPF_ANC | SKF_AD_MARK: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, mark)); break; case BPF_ANC | SKF_AD_RXHASH: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); + BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, hash)); break; case BPF_ANC | SKF_AD_VLAN_TAG: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); + BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2); PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, vlan_tci)); @@ -388,7 +388,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_ANDI(r_A, r_A, 1); break; case BPF_ANC | SKF_AD_QUEUE: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, + BUILD_BUG_ON(sizeof_field(struct sk_buff, queue_mapping) != 2); PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, queue_mapping)); diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index 43245f4a9bcb..6ffcb80cf844 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -9,7 +9,7 @@ #include <linux/sched.h> #include <asm/processor.h> #include <linux/uaccess.h> -#include <asm/compat.h> +#include <linux/compat.h> #include <asm/oprofile_impl.h> #define STACK_SP(STACK) *(STACK) diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index 9680d766f20e..855eedb8d7d7 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -240,9 +240,6 @@ static void __init cell_setup_arch(void) init_pci_config_tokens(); cbe_pervasive_init(); -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif mmio_nvram_init(); } diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 9cd6f3e1000b..47f73103ef74 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -183,9 +183,6 @@ static void __init maple_setup_arch(void) /* Lookup PCI hosts */ maple_pci_init(); -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif maple_use_rtas_reboot_and_halt_if_present(); printk(KERN_DEBUG "Using native/NAP idle loop\n"); diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index 05a52f10c2f0..b612474f8f8e 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -147,10 +147,6 @@ static void __init pas_setup_arch(void) /* Lookup PCI hosts */ pas_pci_init(); -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - /* Remap SDC register for doing reset */ /* XXXOJN This should maybe come out of the device tree */ reset_reg = ioremap(0xfc101100, 4); diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 8108b9b9b9ea..b29368931c56 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -200,10 +200,6 @@ static void __init ps3_setup_arch(void) smp_init_ps3(); #endif -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - prealloc_ps3fb_videomemory(); prealloc_ps3flash_bounce_buffer(); diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 91571841df8a..9dba7e880885 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -539,6 +539,16 @@ static int cmm_migratepage(struct balloon_dev_info *b_dev_info, /* balloon page list reference */ get_page(newpage); + /* + * When we migrate a page to a different zone, we have to fixup the + * count of both involved zones as we adjusted the managed page count + * when inflating. + */ + if (page_zone(page) != page_zone(newpage)) { + adjust_managed_page_count(page, 1); + adjust_managed_page_count(newpage, -1); + } + spin_lock_irqsave(&b_dev_info->pages_lock, flags); balloon_page_insert(b_dev_info, newpage); balloon_page_delete(page); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 60cb29ae4739..3c3da25b445c 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -582,12 +582,12 @@ static int vcpudispatch_stats_open(struct inode *inode, struct file *file) return single_open(file, vcpudispatch_stats_display, NULL); } -static const struct file_operations vcpudispatch_stats_proc_ops = { - .open = vcpudispatch_stats_open, - .read = seq_read, - .write = vcpudispatch_stats_write, - .llseek = seq_lseek, - .release = single_release, +static const struct proc_ops vcpudispatch_stats_proc_ops = { + .proc_open = vcpudispatch_stats_open, + .proc_read = seq_read, + .proc_write = vcpudispatch_stats_write, + .proc_lseek = seq_lseek, + .proc_release = single_release, }; static ssize_t vcpudispatch_stats_freq_write(struct file *file, @@ -626,12 +626,12 @@ static int vcpudispatch_stats_freq_open(struct inode *inode, struct file *file) return single_open(file, vcpudispatch_stats_freq_display, NULL); } -static const struct file_operations vcpudispatch_stats_freq_proc_ops = { - .open = vcpudispatch_stats_freq_open, - .read = seq_read, - .write = vcpudispatch_stats_freq_write, - .llseek = seq_lseek, - .release = single_release, +static const struct proc_ops vcpudispatch_stats_freq_proc_ops = { + .proc_open = vcpudispatch_stats_freq_open, + .proc_read = seq_read, + .proc_write = vcpudispatch_stats_freq_write, + .proc_lseek = seq_lseek, + .proc_release = single_release, }; static int __init vcpudispatch_stats_procfs_init(void) diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index e33e8bc4b69b..f43e778dd7c8 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -698,12 +698,12 @@ static int lparcfg_open(struct inode *inode, struct file *file) return single_open(file, lparcfg_data, NULL); } -static const struct file_operations lparcfg_fops = { - .read = seq_read, - .write = lparcfg_write, - .open = lparcfg_open, - .release = single_release, - .llseek = seq_lseek, +static const struct proc_ops lparcfg_proc_ops = { + .proc_read = seq_read, + .proc_write = lparcfg_write, + .proc_open = lparcfg_open, + .proc_release = single_release, + .proc_lseek = seq_lseek, }; static int __init lparcfg_init(void) @@ -714,7 +714,7 @@ static int __init lparcfg_init(void) if (firmware_has_feature(FW_FEATURE_SPLPAR)) mode |= 0200; - if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops)) { + if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_proc_ops)) { printk(KERN_ERR "Failed to create powerpc/lparcfg\n"); return -EIO; } diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index 8a9c4fb95b8b..7f7369fec46b 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -391,9 +391,9 @@ out: return rv ? rv : count; } -static const struct file_operations ofdt_fops = { - .write = ofdt_write, - .llseek = noop_llseek, +static const struct proc_ops ofdt_proc_ops = { + .proc_write = ofdt_write, + .proc_lseek = noop_llseek, }; /* create /proc/powerpc/ofdt write-only by root */ @@ -401,7 +401,7 @@ static int proc_ppc64_create_ofdt(void) { struct proc_dir_entry *ent; - ent = proc_create("powerpc/ofdt", 0200, NULL, &ofdt_fops); + ent = proc_create("powerpc/ofdt", 0200, NULL, &ofdt_proc_ops); if (ent) proc_set_size(ent, 0); diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c index a0001280503c..2879c4f0ceb7 100644 --- a/arch/powerpc/platforms/pseries/scanlog.c +++ b/arch/powerpc/platforms/pseries/scanlog.c @@ -152,13 +152,12 @@ static int scanlog_release(struct inode * inode, struct file * file) return 0; } -static const struct file_operations scanlog_fops = { - .owner = THIS_MODULE, - .read = scanlog_read, - .write = scanlog_write, - .open = scanlog_open, - .release = scanlog_release, - .llseek = noop_llseek, +static const struct proc_ops scanlog_proc_ops = { + .proc_read = scanlog_read, + .proc_write = scanlog_write, + .proc_open = scanlog_open, + .proc_release = scanlog_release, + .proc_lseek = noop_llseek, }; static int __init scanlog_init(void) @@ -176,7 +175,7 @@ static int __init scanlog_init(void) goto err; ent = proc_create("powerpc/rtas/scan-log-dump", 0400, NULL, - &scanlog_fops); + &scanlog_proc_ops); if (!ent) goto err; return 0; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 0a40201f315f..0c8421dd01ab 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -74,6 +74,9 @@ #include "pseries.h" #include "../../../../drivers/pci/pci.h" +DEFINE_STATIC_KEY_FALSE(shared_processor); +EXPORT_SYMBOL_GPL(shared_processor); + int CMO_PrPSP = -1; int CMO_SecPSP = -1; unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K); @@ -758,6 +761,10 @@ static void __init pSeries_setup_arch(void) if (firmware_has_feature(FW_FEATURE_LPAR)) { vpa_init(boot_cpuid); + + if (lppaca_shared_proc(get_lppaca())) + static_branch_enable(&shared_processor); + ppc_md.power_save = pseries_lpar_idle; ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; #ifdef CONFIG_PCI_IOV diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index f5fadbd2533a..9651ca061828 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -972,12 +972,21 @@ static int xive_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, bool *state) { struct xive_irq_data *xd = irq_data_get_irq_handler_data(data); + u8 pq; switch (which) { case IRQCHIP_STATE_ACTIVE: - *state = !xd->stale_p && - (xd->saved_p || - !!(xive_esb_read(xd, XIVE_ESB_GET) & XIVE_ESB_VAL_P)); + pq = xive_esb_read(xd, XIVE_ESB_GET); + + /* + * The esb value being all 1's means we couldn't get + * the PQ state of the interrupt through mmio. It may + * happen, for example when querying a PHB interrupt + * while the PHB is in an error state. We consider the + * interrupt to be inactive in that case. + */ + *state = (pq != XIVE_ESB_INVALID) && !xd->stale_p && + (xd->saved_p || !!(pq & XIVE_ESB_VAL_P)); return 0; default: return -EINVAL; diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 759ffb00267c..73f029eae0cc 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -12,8 +12,6 @@ config 32BIT config RISCV def_bool y - # even on 32-bit, physical (and DMA) addresses are > 32-bits - select PHYS_ADDR_T_64BIT select OF select OF_EARLY_FLATTREE select OF_IRQ @@ -57,6 +55,7 @@ config RISCV select GENERIC_ARCH_TOPOLOGY if SMP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MMIOWB + select ARCH_HAS_DEBUG_VIRTUAL select HAVE_EBPF_JIT if 64BIT select EDAC_SUPPORT select ARCH_HAS_GIGANTIC_PAGE @@ -64,6 +63,9 @@ config RISCV select SPARSEMEM_STATIC if 32BIT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select HAVE_ARCH_MMAP_RND_BITS if MMU + select ARCH_HAS_GCOV_PROFILE_ALL + select HAVE_COPY_THREAD_TLS + select HAVE_ARCH_KASAN if MMU && 64BIT config ARCH_MMAP_RND_BITS_MIN default 18 if 64BIT @@ -154,7 +156,7 @@ config GENERIC_HWEIGHT def_bool y config FIX_EARLYCON_MEM - def_bool CONFIG_MMU + def_bool MMU config PGTABLE_LEVELS int diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 634759ac8c71..d325b67d00df 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -2,8 +2,8 @@ menu "SoC selection" config SOC_SIFIVE bool "SiFive SoCs" - select SERIAL_SIFIVE - select SERIAL_SIFIVE_CONSOLE + select SERIAL_SIFIVE if TTY + select SERIAL_SIFIVE_CONSOLE if TTY select CLK_SIFIVE select CLK_SIFIVE_FU540_PRCI select SIFIVE_PLIC diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index a474f98ce4fa..36db8145f9f4 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -24,7 +24,7 @@ $(obj)/Image: vmlinux FORCE $(obj)/Image.gz: $(obj)/Image FORCE $(call if_changed,gzip) -loader.o: $(src)/loader.S $(obj)/Image +$(obj)/loader.o: $(src)/loader.S $(obj)/Image $(obj)/loader: $(obj)/loader.o $(obj)/Image $(obj)/loader.lds FORCE $(Q)$(LD) -T $(obj)/loader.lds -o $@ $(obj)/loader.o diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index 70a1891e7cd0..7db861053483 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -54,6 +54,7 @@ reg = <1>; riscv,isa = "rv64imafdc"; tlb-split; + next-level-cache = <&l2cache>; cpu1_intc: interrupt-controller { #interrupt-cells = <1>; compatible = "riscv,cpu-intc"; @@ -77,6 +78,7 @@ reg = <2>; riscv,isa = "rv64imafdc"; tlb-split; + next-level-cache = <&l2cache>; cpu2_intc: interrupt-controller { #interrupt-cells = <1>; compatible = "riscv,cpu-intc"; @@ -100,6 +102,7 @@ reg = <3>; riscv,isa = "rv64imafdc"; tlb-split; + next-level-cache = <&l2cache>; cpu3_intc: interrupt-controller { #interrupt-cells = <1>; compatible = "riscv,cpu-intc"; @@ -123,6 +126,7 @@ reg = <4>; riscv,isa = "rv64imafdc"; tlb-split; + next-level-cache = <&l2cache>; cpu4_intc: interrupt-controller { #interrupt-cells = <1>; compatible = "riscv,cpu-intc"; @@ -253,6 +257,30 @@ #pwm-cells = <3>; status = "disabled"; }; - + l2cache: cache-controller@2010000 { + compatible = "sifive,fu540-c000-ccache", "cache"; + cache-block-size = <64>; + cache-level = <2>; + cache-sets = <1024>; + cache-size = <2097152>; + cache-unified; + interrupt-parent = <&plic0>; + interrupts = <1 2 3>; + reg = <0x0 0x2010000 0x0 0x1000>; + }; + gpio: gpio@10060000 { + compatible = "sifive,fu540-c000-gpio", "sifive,gpio0"; + interrupt-parent = <&plic0>; + interrupts = <7>, <8>, <9>, <10>, <11>, <12>, <13>, + <14>, <15>, <16>, <17>, <18>, <19>, <20>, + <21>, <22>; + reg = <0x0 0x10060000 0x0 0x1000>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + clocks = <&prci PRCI_CLK_TLCLK>; + status = "disabled"; + }; }; }; diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 88cfcb96bf23..609198cb1163 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -94,3 +94,7 @@ &pwm1 { status = "okay"; }; + +&gpio { + status = "okay"; +}; diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h index dd62b691c443..27e005fca584 100644 --- a/arch/riscv/include/asm/asm-prototypes.h +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -5,4 +5,8 @@ #include <linux/ftrace.h> #include <asm-generic/asm-prototypes.h> +long long __lshrti3(long long a, int b); +long long __ashrti3(long long a, int b); +long long __ashlti3(long long a, int b); + #endif /* _ASM_RISCV_PROTOTYPES_H */ diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 0a62d2d68455..435b65532e29 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -116,9 +116,9 @@ # define SR_PIE SR_MPIE # define SR_PP SR_MPP -# define IRQ_SOFT IRQ_M_SOFT -# define IRQ_TIMER IRQ_M_TIMER -# define IRQ_EXT IRQ_M_EXT +# define RV_IRQ_SOFT IRQ_M_SOFT +# define RV_IRQ_TIMER IRQ_M_TIMER +# define RV_IRQ_EXT IRQ_M_EXT #else /* CONFIG_RISCV_M_MODE */ # define CSR_STATUS CSR_SSTATUS # define CSR_IE CSR_SIE @@ -133,15 +133,15 @@ # define SR_PIE SR_SPIE # define SR_PP SR_SPP -# define IRQ_SOFT IRQ_S_SOFT -# define IRQ_TIMER IRQ_S_TIMER -# define IRQ_EXT IRQ_S_EXT +# define RV_IRQ_SOFT IRQ_S_SOFT +# define RV_IRQ_TIMER IRQ_S_TIMER +# define RV_IRQ_EXT IRQ_S_EXT #endif /* CONFIG_RISCV_M_MODE */ /* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */ -#define IE_SIE (_AC(0x1, UL) << IRQ_SOFT) -#define IE_TIE (_AC(0x1, UL) << IRQ_TIMER) -#define IE_EIE (_AC(0x1, UL) << IRQ_EXT) +#define IE_SIE (_AC(0x1, UL) << RV_IRQ_SOFT) +#define IE_TIE (_AC(0x1, UL) << RV_IRQ_TIMER) +#define IE_EIE (_AC(0x1, UL) << RV_IRQ_EXT) #ifndef __ASSEMBLY__ diff --git a/arch/riscv/include/asm/image.h b/arch/riscv/include/asm/image.h index 7b0f92ba0acc..e0b319af3681 100644 --- a/arch/riscv/include/asm/image.h +++ b/arch/riscv/include/asm/image.h @@ -42,7 +42,7 @@ * @res2: reserved * @magic: Magic number (RISC-V specific; deprecated) * @magic2: Magic number 2 (to match the ARM64 'magic' field pos) - * @res4: reserved (will be used for PE COFF offset) + * @res3: reserved (will be used for PE COFF offset) * * The intention is for this header format to be shared between multiple * architectures to avoid a proliferation of image header formats. @@ -59,7 +59,7 @@ struct riscv_image_header { u64 res2; u64 magic; u32 magic2; - u32 res4; + u32 res3; }; #endif /* __ASSEMBLY__ */ #endif /* _ASM_RISCV_IMAGE_H */ diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h new file mode 100644 index 000000000000..eee6e6588b12 --- /dev/null +++ b/arch/riscv/include/asm/kasan.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 Andes Technology Corporation */ + +#ifndef __ASM_KASAN_H +#define __ASM_KASAN_H + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_KASAN + +#include <asm/pgtable.h> + +#define KASAN_SHADOW_SCALE_SHIFT 3 + +#define KASAN_SHADOW_SIZE (UL(1) << (38 - KASAN_SHADOW_SCALE_SHIFT)) +#define KASAN_SHADOW_START 0xffffffc000000000 /* 2^64 - 2^38 */ +#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) + +#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << \ + (64 - KASAN_SHADOW_SCALE_SHIFT))) + +void kasan_init(void); +asmlinkage void kasan_early_init(void); + +#endif +#endif +#endif /* __ASM_KASAN_H */ diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index ac699246ae7e..8ca1930caa44 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -100,8 +100,20 @@ extern unsigned long pfn_base; extern unsigned long max_low_pfn; extern unsigned long min_low_pfn; -#define __pa(x) ((unsigned long)(x) - va_pa_offset) -#define __va(x) ((void *)((unsigned long) (x) + va_pa_offset)) +#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset)) +#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset) + +#ifdef CONFIG_DEBUG_VIRTUAL +extern phys_addr_t __virt_to_phys(unsigned long x); +extern phys_addr_t __phys_addr_symbol(unsigned long x); +#else +#define __virt_to_phys(x) __va_to_pa_nodebug(x) +#define __phys_addr_symbol(x) __va_to_pa_nodebug(x) +#endif /* CONFIG_DEBUG_VIRTUAL */ + +#define __pa_symbol(x) __phys_addr_symbol(RELOC_HIDE((unsigned long)(x), 0)) +#define __pa(x) __virt_to_phys((unsigned long)(x)) +#define __va(x) ((void *)__pa_to_va_nodebug((phys_addr_t)(x))) #define phys_to_pfn(phys) (PFN_DOWN(phys)) #define pfn_to_phys(pfn) (PFN_PHYS(pfn)) diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h index aefbfaa6a781..0234048b12bc 100644 --- a/arch/riscv/include/asm/perf_event.h +++ b/arch/riscv/include/asm/perf_event.h @@ -82,4 +82,8 @@ struct riscv_pmu { int irq; }; +#ifdef CONFIG_PERF_EVENTS +#define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs +#endif + #endif /* _ASM_RISCV_PERF_EVENT_H */ diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 74630989006d..b15f70a1fdfa 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -43,6 +43,13 @@ static inline int pud_bad(pud_t pud) return !pud_present(pud); } +#define pud_leaf pud_leaf +static inline int pud_leaf(pud_t pud) +{ + return pud_present(pud) && + (pud_val(pud) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); +} + static inline void set_pud(pud_t *pudp, pud_t pud) { *pudp = pud; @@ -58,6 +65,11 @@ static inline unsigned long pud_page_vaddr(pud_t pud) return (unsigned long)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT); } +static inline struct page *pud_page(pud_t pud) +{ + return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT); +} + #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7ff0ed4f292e..e43041519edd 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -90,6 +90,31 @@ extern pgd_t swapper_pg_dir[]; #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC +#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) +#define VMALLOC_END (PAGE_OFFSET - 1) +#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) + +#define BPF_JIT_REGION_SIZE (SZ_128M) +#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE) +#define BPF_JIT_REGION_END (VMALLOC_END) + +/* + * Roughly size the vmemmap space to be large enough to fit enough + * struct pages to map half the virtual address space. Then + * position vmemmap directly below the VMALLOC region. + */ +#define VMEMMAP_SHIFT \ + (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) +#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) +#define VMEMMAP_END (VMALLOC_START - 1) +#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE) + +/* + * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel + * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. + */ +#define vmemmap ((struct page *)VMEMMAP_START) + static inline int pmd_present(pmd_t pmd) { return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE)); @@ -105,6 +130,13 @@ static inline int pmd_bad(pmd_t pmd) return !pmd_present(pmd); } +#define pmd_leaf pmd_leaf +static inline int pmd_leaf(pmd_t pmd) +{ + return pmd_present(pmd) && + (pmd_val(pmd) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); +} + static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { *pmdp = pmd; @@ -400,23 +432,6 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) -#define VMALLOC_END (PAGE_OFFSET - 1) -#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) - -/* - * Roughly size the vmemmap space to be large enough to fit enough - * struct pages to map half the virtual address space. Then - * position vmemmap directly below the VMALLOC region. - */ -#define VMEMMAP_SHIFT \ - (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) -#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) -#define VMEMMAP_END (VMALLOC_START - 1) -#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE) - -#define vmemmap ((struct page *)VMEMMAP_START) - #define PCI_IO_SIZE SZ_16M #define PCI_IO_END VMEMMAP_START #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) diff --git a/arch/riscv/include/asm/sifive_l2_cache.h b/arch/riscv/include/asm/sifive_l2_cache.h deleted file mode 100644 index 04f6748fc50b..000000000000 --- a/arch/riscv/include/asm/sifive_l2_cache.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * SiFive L2 Cache Controller header file - * - */ - -#ifndef _ASM_RISCV_SIFIVE_L2_CACHE_H -#define _ASM_RISCV_SIFIVE_L2_CACHE_H - -extern int register_sifive_l2_error_notifier(struct notifier_block *nb); -extern int unregister_sifive_l2_error_notifier(struct notifier_block *nb); - -#define SIFIVE_L2_ERR_TYPE_CE 0 -#define SIFIVE_L2_ERR_TYPE_UE 1 - -#endif /* _ASM_RISCV_SIFIVE_L2_CACHE_H */ diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h index 1b5d44585962..924af13f8555 100644 --- a/arch/riscv/include/asm/string.h +++ b/arch/riscv/include/asm/string.h @@ -11,8 +11,17 @@ #define __HAVE_ARCH_MEMSET extern asmlinkage void *memset(void *, int, size_t); +extern asmlinkage void *__memset(void *, int, size_t); #define __HAVE_ARCH_MEMCPY extern asmlinkage void *memcpy(void *, const void *, size_t); +extern asmlinkage void *__memcpy(void *, const void *, size_t); +/* For those files which don't want to check by kasan. */ +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) + +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memset(s, c, n) __memset(s, c, n) + +#endif #endif /* _ASM_RISCV_STRING_H */ diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h new file mode 100644 index 000000000000..ff9abc00d139 --- /dev/null +++ b/arch/riscv/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_RISCV_VMALLOC_H +#define _ASM_RISCV_VMALLOC_H + +#endif /* _ASM_RISCV_VMALLOC_H */ diff --git a/arch/riscv/include/uapi/asm/bpf_perf_event.h b/arch/riscv/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 000000000000..6cb1c2823288 --- /dev/null +++ b/arch/riscv/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_BPF_PERF_EVENT_H__ + +#include <asm/ptrace.h> + +typedef struct user_regs_struct bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index a1349ca64669..bad4d85b5e91 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -155,7 +155,7 @@ _save_context: REG_L x2, PT_SP(sp) .endm -#if !IS_ENABLED(CONFIG_PREEMPT) +#if !IS_ENABLED(CONFIG_PREEMPTION) .set resume_kernel, restore_all #endif @@ -246,6 +246,7 @@ check_syscall_nr: */ li t1, -1 beq a7, t1, ret_from_syscall_rejected + blt a7, t1, 1f /* Call syscall */ la s0, sys_call_table slli t0, a7, RISCV_LGPTR @@ -304,7 +305,7 @@ restore_all: sret #endif -#if IS_ENABLED(CONFIG_PREEMPT) +#if IS_ENABLED(CONFIG_PREEMPTION) resume_kernel: REG_L s0, TASK_TI_PREEMPT_COUNT(tp) bnez s0, restore_all diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index b94d8db5ddcc..c40fdcdeb950 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -142,7 +142,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, */ old = *parent; - if (function_graph_enter(old, self_addr, frame_pointer, parent)) + if (!function_graph_enter(old, self_addr, frame_pointer, parent)) *parent = return_hooker; } diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 84a6f0a4b120..271860fc2c3f 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -80,7 +80,9 @@ _start_kernel: #ifdef CONFIG_SMP li t0, CONFIG_NR_CPUS - bgeu a0, t0, .Lsecondary_park + blt a0, t0, .Lgood_cores + tail .Lsecondary_park +.Lgood_cores: #endif /* Pick one hart to run the main boot sequence */ @@ -119,6 +121,9 @@ clear_bss_done: sw zero, TASK_TI_CPU(tp) la sp, init_thread_union + THREAD_SIZE +#ifdef CONFIG_KASAN + call kasan_early_init +#endif /* Start the kernel */ call parse_dtb tail start_kernel @@ -209,11 +214,6 @@ relocate: tail smp_callin #endif -.align 2 -.Lsecondary_park: - /* We lack SMP support or have too many harts, so park this hart */ - wfi - j .Lsecondary_park END(_start) #ifdef CONFIG_RISCV_M_MODE @@ -246,12 +246,12 @@ ENTRY(reset_regs) li t4, 0 li t5, 0 li t6, 0 - csrw sscratch, 0 + csrw CSR_SCRATCH, 0 #ifdef CONFIG_FPU csrr t0, CSR_MISA andi t0, t0, (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D) - bnez t0, .Lreset_regs_done + beqz t0, .Lreset_regs_done li t1, SR_FS csrs CSR_STATUS, t1 @@ -295,6 +295,13 @@ ENTRY(reset_regs) END(reset_regs) #endif /* CONFIG_RISCV_M_MODE */ +.section ".text", "ax",@progbits +.align 2 +.Lsecondary_park: + /* We lack SMP support or have too many harts, so park this hart */ + wfi + j .Lsecondary_park + __PAGE_ALIGNED_BSS /* Empty zero page */ .balign PAGE_SIZE diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index 3f07a91d5afb..345c4f2eba13 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -23,11 +23,11 @@ asmlinkage __visible void __irq_entry do_IRQ(struct pt_regs *regs) irq_enter(); switch (regs->cause & ~CAUSE_IRQ_FLAG) { - case IRQ_TIMER: + case RV_IRQ_TIMER: riscv_timer_interrupt(); break; #ifdef CONFIG_SMP - case IRQ_SOFT: + case RV_IRQ_SOFT: /* * We only use software interrupts to pass IPIs, so if a non-SMP * system gets one, then we don't know what to do. @@ -35,7 +35,7 @@ asmlinkage __visible void __irq_entry do_IRQ(struct pt_regs *regs) riscv_software_interrupt(); break; #endif - case IRQ_EXT: + case RV_IRQ_EXT: handle_arch_irq(regs); break; default: diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 95a3031e5c7c..817cf7b0974c 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -99,8 +99,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } -int copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long arg, struct task_struct *p) +int copy_thread_tls(unsigned long clone_flags, unsigned long usp, + unsigned long arg, struct task_struct *p, unsigned long tls) { struct pt_regs *childregs = task_pt_regs(p); @@ -121,7 +121,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, if (usp) /* User fork */ childregs->sp = usp; if (clone_flags & CLONE_SETTLS) - childregs->tp = childregs->a5; + childregs->tp = tls; childregs->a0 = 0; /* Return value of fork() */ p->thread.ra = (unsigned long)ret_from_fork; } diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c index 4800cf703186..450492e1cb4e 100644 --- a/arch/riscv/kernel/riscv_ksyms.c +++ b/arch/riscv/kernel/riscv_ksyms.c @@ -9,8 +9,7 @@ /* * Assembly functions that may be used (directly or indirectly) by modules */ -EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(__asm_copy_to_user); -EXPORT_SYMBOL(__asm_copy_from_user); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(__memset); +EXPORT_SYMBOL(__memcpy); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 365ff8420bfe..0a6d415b0a5a 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -24,6 +24,7 @@ #include <asm/smp.h> #include <asm/tlbflush.h> #include <asm/thread_info.h> +#include <asm/kasan.h> #include "head.h" @@ -74,12 +75,12 @@ void __init setup_arch(char **cmdline_p) swiotlb_init(1); #endif -#ifdef CONFIG_SMP - setup_smp(); +#ifdef CONFIG_KASAN + kasan_init(); #endif -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; +#ifdef CONFIG_SMP + setup_smp(); #endif riscv_fill_hwcap(); diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 49a5852fd07d..33b16f4212f7 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -58,7 +58,8 @@ quiet_cmd_vdsold = VDSOLD $@ cmd_vdsold = $(CC) $(KBUILD_CFLAGS) $(call cc-option, -no-pie) -nostdlib -nostartfiles $(SYSCFLAGS_$(@F)) \ -Wl,-T,$(filter-out FORCE,$^) -o $@.tmp && \ $(CROSS_COMPILE)objcopy \ - $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ + $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \ + rm $@.tmp # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 12f42f96d46e..1e0193ded420 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -46,6 +46,7 @@ SECTIONS KPROBES_TEXT ENTRY_TEXT IRQENTRY_TEXT + SOFTIRQENTRY_TEXT *(.fixup) _etext = .; } diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S index b4c477846e91..51ab716253fa 100644 --- a/arch/riscv/lib/memcpy.S +++ b/arch/riscv/lib/memcpy.S @@ -7,7 +7,8 @@ #include <asm/asm.h> /* void *memcpy(void *, const void *, size_t) */ -ENTRY(memcpy) +ENTRY(__memcpy) +WEAK(memcpy) move t6, a0 /* Preserve return value */ /* Defer to byte-oriented copy for small sizes */ @@ -104,4 +105,4 @@ ENTRY(memcpy) bltu a1, a3, 5b 6: ret -END(memcpy) +END(__memcpy) diff --git a/arch/riscv/lib/memset.S b/arch/riscv/lib/memset.S index 5a7386b47175..34c5360c6705 100644 --- a/arch/riscv/lib/memset.S +++ b/arch/riscv/lib/memset.S @@ -8,7 +8,8 @@ #include <asm/asm.h> /* void *memset(void *, int, size_t) */ -ENTRY(memset) +ENTRY(__memset) +WEAK(memset) move t0, a0 /* Preserve return value */ /* Defer to byte-oriented fill for small sizes */ @@ -109,4 +110,4 @@ ENTRY(memset) bltu t0, a3, 5b 6: ret -END(memset) +END(__memset) diff --git a/arch/riscv/lib/tishift.S b/arch/riscv/lib/tishift.S index 15f9d54c7db6..ef90075c4b0a 100644 --- a/arch/riscv/lib/tishift.S +++ b/arch/riscv/lib/tishift.S @@ -4,34 +4,73 @@ */ #include <linux/linkage.h> +#include <asm-generic/export.h> -ENTRY(__lshrti3) +SYM_FUNC_START(__lshrti3) beqz a2, .L1 li a5,64 sub a5,a5,a2 - addi sp,sp,-16 sext.w a4,a5 blez a5, .L2 sext.w a2,a2 - sll a4,a1,a4 srl a0,a0,a2 - srl a1,a1,a2 + sll a4,a1,a4 + srl a2,a1,a2 or a0,a0,a4 - sd a1,8(sp) - sd a0,0(sp) - ld a0,0(sp) - ld a1,8(sp) - addi sp,sp,16 - ret + mv a1,a2 .L1: ret .L2: - negw a4,a4 - srl a1,a1,a4 - sd a1,0(sp) - sd zero,8(sp) - ld a0,0(sp) - ld a1,8(sp) - addi sp,sp,16 + negw a0,a4 + li a2,0 + srl a0,a1,a0 + mv a1,a2 + ret +SYM_FUNC_END(__lshrti3) +EXPORT_SYMBOL(__lshrti3) + +SYM_FUNC_START(__ashrti3) + beqz a2, .L3 + li a5,64 + sub a5,a5,a2 + sext.w a4,a5 + blez a5, .L4 + sext.w a2,a2 + srl a0,a0,a2 + sll a4,a1,a4 + sra a2,a1,a2 + or a0,a0,a4 + mv a1,a2 +.L3: + ret +.L4: + negw a0,a4 + srai a2,a1,0x3f + sra a0,a1,a0 + mv a1,a2 + ret +SYM_FUNC_END(__ashrti3) +EXPORT_SYMBOL(__ashrti3) + +SYM_FUNC_START(__ashlti3) + beqz a2, .L5 + li a5,64 + sub a5,a5,a2 + sext.w a4,a5 + blez a5, .L6 + sext.w a2,a2 + sll a1,a1,a2 + srl a4,a0,a4 + sll a2,a0,a2 + or a1,a1,a4 + mv a0,a2 +.L5: + ret +.L6: + negw a1,a4 + li a2,0 + sll a1,a0,a1 + mv a0,a2 ret -ENDPROC(__lshrti3) +SYM_FUNC_END(__ashlti3) +EXPORT_SYMBOL(__ashlti3) diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index fecd65657a6f..f29d2ba2c0a6 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -1,4 +1,5 @@ #include <linux/linkage.h> +#include <asm-generic/export.h> #include <asm/asm.h> #include <asm/csr.h> @@ -66,6 +67,8 @@ ENTRY(__asm_copy_from_user) j 3b ENDPROC(__asm_copy_to_user) ENDPROC(__asm_copy_from_user) +EXPORT_SYMBOL(__asm_copy_to_user) +EXPORT_SYMBOL(__asm_copy_from_user) ENTRY(__clear_user) @@ -108,6 +111,7 @@ ENTRY(__clear_user) bltu a0, a3, 5b j 3b ENDPROC(__clear_user) +EXPORT_SYMBOL(__clear_user) .section .fixup,"ax" .balign 4 diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 3c8b33258457..50b7af58c566 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -10,9 +10,16 @@ obj-y += extable.o obj-$(CONFIG_MMU) += fault.o obj-y += cacheflush.o obj-y += context.o -obj-y += sifive_l2_cache.o ifeq ($(CONFIG_MMU),y) obj-$(CONFIG_SMP) += tlbflush.o endif obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_KASAN) += kasan_init.o + +ifdef CONFIG_KASAN +KASAN_SANITIZE_kasan_init.o := n +KASAN_SANITIZE_init.o := n +endif + +obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index 8f1900686640..8930ab7278e6 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -22,6 +22,7 @@ void flush_icache_all(void) else on_each_cpu(ipi_remote_fence_i, NULL, 1); } +EXPORT_SYMBOL(flush_icache_all); /* * Performs an icache flush for the given MM context. RISC-V has no direct diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 69f6678db7f3..965a8cf4829c 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -99,13 +99,13 @@ static void __init setup_initrd(void) pr_info("initrd not found or empty"); goto disable; } - if (__pa(initrd_end) > PFN_PHYS(max_low_pfn)) { + if (__pa_symbol(initrd_end) > PFN_PHYS(max_low_pfn)) { pr_err("initrd extends beyond end of memory"); goto disable; } size = initrd_end - initrd_start; - memblock_reserve(__pa(initrd_start), size); + memblock_reserve(__pa_symbol(initrd_start), size); initrd_below_start_ok = 1; pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n", @@ -124,8 +124,8 @@ void __init setup_bootmem(void) { struct memblock_region *reg; phys_addr_t mem_size = 0; - phys_addr_t vmlinux_end = __pa(&_end); - phys_addr_t vmlinux_start = __pa(&_start); + phys_addr_t vmlinux_end = __pa_symbol(&_end); + phys_addr_t vmlinux_start = __pa_symbol(&_start); /* Find the memory region containing the kernel */ for_each_memblock(memory, reg) { @@ -445,7 +445,7 @@ static void __init setup_vm_final(void) /* Setup swapper PGD for fixmap */ create_pgd_mapping(swapper_pg_dir, FIXADDR_START, - __pa(fixmap_pgd_next), + __pa_symbol(fixmap_pgd_next), PGDIR_SIZE, PAGE_TABLE); /* Map all memory banks */ @@ -474,7 +474,7 @@ static void __init setup_vm_final(void) clear_fixmap(FIX_PMD); /* Move to swapper page table */ - csr_write(CSR_SATP, PFN_DOWN(__pa(swapper_pg_dir)) | SATP_MODE); + csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE); local_flush_tlb_all(); } #else diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c new file mode 100644 index 000000000000..f0cc86040587 --- /dev/null +++ b/arch/riscv/mm/kasan_init.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 Andes Technology Corporation + +#include <linux/pfn.h> +#include <linux/init_task.h> +#include <linux/kasan.h> +#include <linux/kernel.h> +#include <linux/memblock.h> +#include <asm/tlbflush.h> +#include <asm/pgtable.h> +#include <asm/fixmap.h> + +extern pgd_t early_pg_dir[PTRS_PER_PGD]; +asmlinkage void __init kasan_early_init(void) +{ + uintptr_t i; + pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START); + + for (i = 0; i < PTRS_PER_PTE; ++i) + set_pte(kasan_early_shadow_pte + i, + mk_pte(virt_to_page(kasan_early_shadow_page), + PAGE_KERNEL)); + + for (i = 0; i < PTRS_PER_PMD; ++i) + set_pmd(kasan_early_shadow_pmd + i, + pfn_pmd(PFN_DOWN(__pa((uintptr_t)kasan_early_shadow_pte)), + __pgprot(_PAGE_TABLE))); + + for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END; + i += PGDIR_SIZE, ++pgd) + set_pgd(pgd, + pfn_pgd(PFN_DOWN(__pa(((uintptr_t)kasan_early_shadow_pmd))), + __pgprot(_PAGE_TABLE))); + + /* init for swapper_pg_dir */ + pgd = pgd_offset_k(KASAN_SHADOW_START); + + for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END; + i += PGDIR_SIZE, ++pgd) + set_pgd(pgd, + pfn_pgd(PFN_DOWN(__pa(((uintptr_t)kasan_early_shadow_pmd))), + __pgprot(_PAGE_TABLE))); + + flush_tlb_all(); +} + +static void __init populate(void *start, void *end) +{ + unsigned long i; + unsigned long vaddr = (unsigned long)start & PAGE_MASK; + unsigned long vend = PAGE_ALIGN((unsigned long)end); + unsigned long n_pages = (vend - vaddr) / PAGE_SIZE; + unsigned long n_pmds = + (n_pages % PTRS_PER_PTE) ? n_pages / PTRS_PER_PTE + 1 : + n_pages / PTRS_PER_PTE; + pgd_t *pgd = pgd_offset_k(vaddr); + pmd_t *pmd = memblock_alloc(n_pmds * sizeof(pmd_t), PAGE_SIZE); + pte_t *pte = memblock_alloc(n_pages * sizeof(pte_t), PAGE_SIZE); + + for (i = 0; i < n_pages; i++) { + phys_addr_t phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); + + set_pte(pte + i, pfn_pte(PHYS_PFN(phys), PAGE_KERNEL)); + } + + for (i = 0; i < n_pmds; ++pgd, i += PTRS_PER_PMD) + set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(((uintptr_t)(pmd + i)))), + __pgprot(_PAGE_TABLE))); + + for (i = 0; i < n_pages; ++pmd, i += PTRS_PER_PTE) + set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa((uintptr_t)(pte + i))), + __pgprot(_PAGE_TABLE))); + + flush_tlb_all(); + memset(start, 0, end - start); +} + +void __init kasan_init(void) +{ + struct memblock_region *reg; + unsigned long i; + + kasan_populate_early_shadow((void *)KASAN_SHADOW_START, + (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); + + for_each_memblock(memory, reg) { + void *start = (void *)__va(reg->base); + void *end = (void *)__va(reg->base + reg->size); + + if (start >= end) + break; + + populate(kasan_mem_to_shadow(start), + kasan_mem_to_shadow(end)); + }; + + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte(&kasan_early_shadow_pte[i], + mk_pte(virt_to_page(kasan_early_shadow_page), + __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_ACCESSED))); + + memset(kasan_early_shadow_page, 0, PAGE_SIZE); + init_task.kasan_depth = 0; +} diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c new file mode 100644 index 000000000000..e8e4dcd39fed --- /dev/null +++ b/arch/riscv/mm/physaddr.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/types.h> +#include <linux/mmdebug.h> +#include <linux/mm.h> +#include <asm/page.h> +#include <asm/sections.h> + +phys_addr_t __virt_to_phys(unsigned long x) +{ + phys_addr_t y = x - PAGE_OFFSET; + + /* + * Boundary checking aginst the kernel linear mapping space. + */ + WARN(y >= KERN_VIRT_SIZE, + "virt_to_phys used for non-linear address: %pK (%pS)\n", + (void *)x, (void *)x); + + return __va_to_pa_nodebug(x); +} +EXPORT_SYMBOL(__virt_to_phys); + +phys_addr_t __phys_addr_symbol(unsigned long x) +{ + unsigned long kernel_start = (unsigned long)PAGE_OFFSET; + unsigned long kernel_end = (unsigned long)_end; + + /* + * Boundary checking aginst the kernel image mapping. + * __pa_symbol should only be used on kernel symbol addresses. + */ + VIRTUAL_BUG_ON(x < kernel_start || x > kernel_end); + + return __va_to_pa_nodebug(x); +} +EXPORT_SYMBOL(__phys_addr_symbol); diff --git a/arch/riscv/mm/sifive_l2_cache.c b/arch/riscv/mm/sifive_l2_cache.c deleted file mode 100644 index a9ffff3277c7..000000000000 --- a/arch/riscv/mm/sifive_l2_cache.c +++ /dev/null @@ -1,178 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * SiFive L2 cache controller Driver - * - * Copyright (C) 2018-2019 SiFive, Inc. - * - */ -#include <linux/debugfs.h> -#include <linux/interrupt.h> -#include <linux/of_irq.h> -#include <linux/of_address.h> -#include <asm/sifive_l2_cache.h> - -#define SIFIVE_L2_DIRECCFIX_LOW 0x100 -#define SIFIVE_L2_DIRECCFIX_HIGH 0x104 -#define SIFIVE_L2_DIRECCFIX_COUNT 0x108 - -#define SIFIVE_L2_DATECCFIX_LOW 0x140 -#define SIFIVE_L2_DATECCFIX_HIGH 0x144 -#define SIFIVE_L2_DATECCFIX_COUNT 0x148 - -#define SIFIVE_L2_DATECCFAIL_LOW 0x160 -#define SIFIVE_L2_DATECCFAIL_HIGH 0x164 -#define SIFIVE_L2_DATECCFAIL_COUNT 0x168 - -#define SIFIVE_L2_CONFIG 0x00 -#define SIFIVE_L2_WAYENABLE 0x08 -#define SIFIVE_L2_ECCINJECTERR 0x40 - -#define SIFIVE_L2_MAX_ECCINTR 3 - -static void __iomem *l2_base; -static int g_irq[SIFIVE_L2_MAX_ECCINTR]; - -enum { - DIR_CORR = 0, - DATA_CORR, - DATA_UNCORR, -}; - -#ifdef CONFIG_DEBUG_FS -static struct dentry *sifive_test; - -static ssize_t l2_write(struct file *file, const char __user *data, - size_t count, loff_t *ppos) -{ - unsigned int val; - - if (kstrtouint_from_user(data, count, 0, &val)) - return -EINVAL; - if ((val >= 0 && val < 0xFF) || (val >= 0x10000 && val < 0x100FF)) - writel(val, l2_base + SIFIVE_L2_ECCINJECTERR); - else - return -EINVAL; - return count; -} - -static const struct file_operations l2_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .write = l2_write -}; - -static void setup_sifive_debug(void) -{ - sifive_test = debugfs_create_dir("sifive_l2_cache", NULL); - - debugfs_create_file("sifive_debug_inject_error", 0200, - sifive_test, NULL, &l2_fops); -} -#endif - -static void l2_config_read(void) -{ - u32 regval, val; - - regval = readl(l2_base + SIFIVE_L2_CONFIG); - val = regval & 0xFF; - pr_info("L2CACHE: No. of Banks in the cache: %d\n", val); - val = (regval & 0xFF00) >> 8; - pr_info("L2CACHE: No. of ways per bank: %d\n", val); - val = (regval & 0xFF0000) >> 16; - pr_info("L2CACHE: Sets per bank: %llu\n", (uint64_t)1 << val); - val = (regval & 0xFF000000) >> 24; - pr_info("L2CACHE: Bytes per cache block: %llu\n", (uint64_t)1 << val); - - regval = readl(l2_base + SIFIVE_L2_WAYENABLE); - pr_info("L2CACHE: Index of the largest way enabled: %d\n", regval); -} - -static const struct of_device_id sifive_l2_ids[] = { - { .compatible = "sifive,fu540-c000-ccache" }, - { /* end of table */ }, -}; - -static ATOMIC_NOTIFIER_HEAD(l2_err_chain); - -int register_sifive_l2_error_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_register(&l2_err_chain, nb); -} -EXPORT_SYMBOL_GPL(register_sifive_l2_error_notifier); - -int unregister_sifive_l2_error_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(&l2_err_chain, nb); -} -EXPORT_SYMBOL_GPL(unregister_sifive_l2_error_notifier); - -static irqreturn_t l2_int_handler(int irq, void *device) -{ - unsigned int add_h, add_l; - - if (irq == g_irq[DIR_CORR]) { - add_h = readl(l2_base + SIFIVE_L2_DIRECCFIX_HIGH); - add_l = readl(l2_base + SIFIVE_L2_DIRECCFIX_LOW); - pr_err("L2CACHE: DirError @ 0x%08X.%08X\n", add_h, add_l); - /* Reading this register clears the DirError interrupt sig */ - readl(l2_base + SIFIVE_L2_DIRECCFIX_COUNT); - atomic_notifier_call_chain(&l2_err_chain, SIFIVE_L2_ERR_TYPE_CE, - "DirECCFix"); - } - if (irq == g_irq[DATA_CORR]) { - add_h = readl(l2_base + SIFIVE_L2_DATECCFIX_HIGH); - add_l = readl(l2_base + SIFIVE_L2_DATECCFIX_LOW); - pr_err("L2CACHE: DataError @ 0x%08X.%08X\n", add_h, add_l); - /* Reading this register clears the DataError interrupt sig */ - readl(l2_base + SIFIVE_L2_DATECCFIX_COUNT); - atomic_notifier_call_chain(&l2_err_chain, SIFIVE_L2_ERR_TYPE_CE, - "DatECCFix"); - } - if (irq == g_irq[DATA_UNCORR]) { - add_h = readl(l2_base + SIFIVE_L2_DATECCFAIL_HIGH); - add_l = readl(l2_base + SIFIVE_L2_DATECCFAIL_LOW); - pr_err("L2CACHE: DataFail @ 0x%08X.%08X\n", add_h, add_l); - /* Reading this register clears the DataFail interrupt sig */ - readl(l2_base + SIFIVE_L2_DATECCFAIL_COUNT); - atomic_notifier_call_chain(&l2_err_chain, SIFIVE_L2_ERR_TYPE_UE, - "DatECCFail"); - } - - return IRQ_HANDLED; -} - -static int __init sifive_l2_init(void) -{ - struct device_node *np; - struct resource res; - int i, rc; - - np = of_find_matching_node(NULL, sifive_l2_ids); - if (!np) - return -ENODEV; - - if (of_address_to_resource(np, 0, &res)) - return -ENODEV; - - l2_base = ioremap(res.start, resource_size(&res)); - if (!l2_base) - return -ENOMEM; - - for (i = 0; i < SIFIVE_L2_MAX_ECCINTR; i++) { - g_irq[i] = irq_of_parse_and_map(np, i); - rc = request_irq(g_irq[i], l2_int_handler, 0, "l2_ecc", NULL); - if (rc) { - pr_err("L2CACHE: Could not request IRQ %d\n", g_irq[i]); - return rc; - } - } - - l2_config_read(); - -#ifdef CONFIG_DEBUG_FS - setup_sifive_debug(); -#endif - return 0; -} -device_initcall(sifive_l2_init); diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp.c index 5451ef3845f2..483f4ad7f4dc 100644 --- a/arch/riscv/net/bpf_jit_comp.c +++ b/arch/riscv/net/bpf_jit_comp.c @@ -120,6 +120,11 @@ static bool seen_reg(int reg, struct rv_jit_context *ctx) return false; } +static void mark_fp(struct rv_jit_context *ctx) +{ + __set_bit(RV_CTX_F_SEEN_S5, &ctx->flags); +} + static void mark_call(struct rv_jit_context *ctx) { __set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags); @@ -456,6 +461,11 @@ static u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f); } +static u32 rv_auipc(u8 rd, u32 imm31_12) +{ + return rv_u_insn(imm31_12, rd, 0x17); +} + static bool is_12b_int(s64 val) { return -(1 << 11) <= val && val < (1 << 11); @@ -479,27 +489,7 @@ static bool is_32b_int(s64 val) static int is_12b_check(int off, int insn) { if (!is_12b_int(off)) { - pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n", - insn, (int)off); - return -1; - } - return 0; -} - -static int is_13b_check(int off, int insn) -{ - if (!is_13b_int(off)) { - pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n", - insn, (int)off); - return -1; - } - return 0; -} - -static int is_21b_check(int off, int insn) -{ - if (!is_21b_int(off)) { - pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n", + pr_err("bpf-jit: insn=%d 12b < offset=%d not supported yet!\n", insn, (int)off); return -1; } @@ -545,10 +535,13 @@ static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx) emit(rv_addi(rd, rd, lower), ctx); } -static int rv_offset(int bpf_to, int bpf_from, struct rv_jit_context *ctx) +static int rv_offset(int insn, int off, struct rv_jit_context *ctx) { - int from = ctx->offset[bpf_from] - 1, to = ctx->offset[bpf_to]; + int from, to; + off++; /* BPF branch is from PC+1, RV is from PC */ + from = (insn > 0) ? ctx->offset[insn - 1] : 0; + to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0; return (to - from) << 2; } @@ -559,7 +552,7 @@ static int epilogue_offset(struct rv_jit_context *ctx) return (to - from) << 2; } -static void __build_epilogue(u8 reg, struct rv_jit_context *ctx) +static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) { int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8; @@ -596,8 +589,114 @@ static void __build_epilogue(u8 reg, struct rv_jit_context *ctx) emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx); /* Set return value. */ - emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx); - emit(rv_jalr(RV_REG_ZERO, reg, 0), ctx); + if (!is_tail_call) + emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx); + emit(rv_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, + is_tail_call ? 4 : 0), /* skip TCC init */ + ctx); +} + +/* return -1 or inverted cond */ +static int invert_bpf_cond(u8 cond) +{ + switch (cond) { + case BPF_JEQ: + return BPF_JNE; + case BPF_JGT: + return BPF_JLE; + case BPF_JLT: + return BPF_JGE; + case BPF_JGE: + return BPF_JLT; + case BPF_JLE: + return BPF_JGT; + case BPF_JNE: + return BPF_JEQ; + case BPF_JSGT: + return BPF_JSLE; + case BPF_JSLT: + return BPF_JSGE; + case BPF_JSGE: + return BPF_JSLT; + case BPF_JSLE: + return BPF_JSGT; + } + return -1; +} + +static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff, + struct rv_jit_context *ctx) +{ + switch (cond) { + case BPF_JEQ: + emit(rv_beq(rd, rs, rvoff >> 1), ctx); + return; + case BPF_JGT: + emit(rv_bltu(rs, rd, rvoff >> 1), ctx); + return; + case BPF_JLT: + emit(rv_bltu(rd, rs, rvoff >> 1), ctx); + return; + case BPF_JGE: + emit(rv_bgeu(rd, rs, rvoff >> 1), ctx); + return; + case BPF_JLE: + emit(rv_bgeu(rs, rd, rvoff >> 1), ctx); + return; + case BPF_JNE: + emit(rv_bne(rd, rs, rvoff >> 1), ctx); + return; + case BPF_JSGT: + emit(rv_blt(rs, rd, rvoff >> 1), ctx); + return; + case BPF_JSLT: + emit(rv_blt(rd, rs, rvoff >> 1), ctx); + return; + case BPF_JSGE: + emit(rv_bge(rd, rs, rvoff >> 1), ctx); + return; + case BPF_JSLE: + emit(rv_bge(rs, rd, rvoff >> 1), ctx); + } +} + +static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff, + struct rv_jit_context *ctx) +{ + s64 upper, lower; + + if (is_13b_int(rvoff)) { + emit_bcc(cond, rd, rs, rvoff, ctx); + return; + } + + /* Adjust for jal */ + rvoff -= 4; + + /* Transform, e.g.: + * bne rd,rs,foo + * to + * beq rd,rs,<.L1> + * (auipc foo) + * jal(r) foo + * .L1 + */ + cond = invert_bpf_cond(cond); + if (is_21b_int(rvoff)) { + emit_bcc(cond, rd, rs, 8, ctx); + emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx); + return; + } + + /* 32b No need for an additional rvoff adjustment, since we + * get that from the auipc at PC', where PC = PC' + 4. + */ + upper = (rvoff + (1 << 11)) >> 12; + lower = rvoff & 0xfff; + + emit_bcc(cond, rd, rs, 12, ctx); + emit(rv_auipc(RV_REG_T1, upper), ctx); + emit(rv_jalr(RV_REG_ZERO, RV_REG_T1, lower), ctx); } static void emit_zext_32(u8 reg, struct rv_jit_context *ctx) @@ -627,18 +726,14 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) return -1; emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx); off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; - if (is_13b_check(off, insn)) - return -1; - emit(rv_bgeu(RV_REG_A2, RV_REG_T1, off >> 1), ctx); + emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx); - /* if (--TCC < 0) + /* if (TCC-- < 0) * goto out; */ emit(rv_addi(RV_REG_T1, tcc, -1), ctx); off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; - if (is_13b_check(off, insn)) - return -1; - emit(rv_blt(RV_REG_T1, RV_REG_ZERO, off >> 1), ctx); + emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx); /* prog = array->ptrs[index]; * if (!prog) @@ -651,18 +746,15 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) return -1; emit(rv_ld(RV_REG_T2, off, RV_REG_T2), ctx); off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; - if (is_13b_check(off, insn)) - return -1; - emit(rv_beq(RV_REG_T2, RV_REG_ZERO, off >> 1), ctx); + emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx); /* goto *(prog->bpf_func + 4); */ off = offsetof(struct bpf_prog, bpf_func); if (is_12b_check(off, insn)) return -1; emit(rv_ld(RV_REG_T3, off, RV_REG_T2), ctx); - emit(rv_addi(RV_REG_T3, RV_REG_T3, 4), ctx); emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx); - __build_epilogue(RV_REG_T3, ctx); + __build_epilogue(true, ctx); return 0; } @@ -687,13 +779,6 @@ static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn, *rs = bpf_to_rv_reg(insn->src_reg, ctx); } -static int rv_offset_check(int *rvoff, s16 off, int insn, - struct rv_jit_context *ctx) -{ - *rvoff = rv_offset(insn + off, insn, ctx); - return is_13b_check(*rvoff, insn); -} - static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx) { emit(rv_addi(RV_REG_T2, *rd, 0), ctx); @@ -726,13 +811,57 @@ static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx) *rd = RV_REG_T2; } +static void emit_jump_and_link(u8 rd, s64 rvoff, bool force_jalr, + struct rv_jit_context *ctx) +{ + s64 upper, lower; + + if (rvoff && is_21b_int(rvoff) && !force_jalr) { + emit(rv_jal(rd, rvoff >> 1), ctx); + return; + } + + upper = (rvoff + (1 << 11)) >> 12; + lower = rvoff & 0xfff; + emit(rv_auipc(RV_REG_T1, upper), ctx); + emit(rv_jalr(rd, RV_REG_T1, lower), ctx); +} + +static bool is_signed_bpf_cond(u8 cond) +{ + return cond == BPF_JSGT || cond == BPF_JSLT || + cond == BPF_JSGE || cond == BPF_JSLE; +} + +static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx) +{ + s64 off = 0; + u64 ip; + u8 rd; + + if (addr && ctx->insns) { + ip = (u64)(long)(ctx->insns + ctx->ninsns); + off = addr - ip; + if (!is_32b_int(off)) { + pr_err("bpf-jit: target call addr %pK is out of range\n", + (void *)addr); + return -ERANGE; + } + } + + emit_jump_and_link(RV_REG_RA, off, !fixed, ctx); + rd = bpf_to_rv_reg(BPF_REG_0, ctx); + emit(rv_addi(rd, RV_REG_A0, 0), ctx); + return 0; +} + static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, bool extra_pass) { bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || BPF_CLASS(insn->code) == BPF_JMP; + int s, e, rvoff, i = insn - ctx->prog->insnsi; struct bpf_prog_aux *aux = ctx->prog->aux; - int rvoff, i = insn - ctx->prog->insnsi; u8 rd = -1, rs = -1, code = insn->code; s16 off = insn->off; s32 imm = insn->imm; @@ -1000,214 +1129,110 @@ out_be: /* JUMP off */ case BPF_JMP | BPF_JA: - rvoff = rv_offset(i + off, i, ctx); - if (!is_21b_int(rvoff)) { - pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n", - i, rvoff); - return -1; - } - - emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx); + rvoff = rv_offset(i, off, ctx); + emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); break; /* IF (dst COND src) JUMP off */ case BPF_JMP | BPF_JEQ | BPF_X: case BPF_JMP32 | BPF_JEQ | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_zext_32_rd_rs(&rd, &rs, ctx); - emit(rv_beq(rd, rs, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JGT | BPF_X: case BPF_JMP32 | BPF_JGT | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_zext_32_rd_rs(&rd, &rs, ctx); - emit(rv_bltu(rs, rd, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JLT | BPF_X: case BPF_JMP32 | BPF_JLT | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_zext_32_rd_rs(&rd, &rs, ctx); - emit(rv_bltu(rd, rs, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JGE | BPF_X: case BPF_JMP32 | BPF_JGE | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_zext_32_rd_rs(&rd, &rs, ctx); - emit(rv_bgeu(rd, rs, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JLE | BPF_X: case BPF_JMP32 | BPF_JLE | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_zext_32_rd_rs(&rd, &rs, ctx); - emit(rv_bgeu(rs, rd, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JNE | BPF_X: case BPF_JMP32 | BPF_JNE | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_zext_32_rd_rs(&rd, &rs, ctx); - emit(rv_bne(rd, rs, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSGT | BPF_X: case BPF_JMP32 | BPF_JSGT | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_sext_32_rd_rs(&rd, &rs, ctx); - emit(rv_blt(rs, rd, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSLT | BPF_X: case BPF_JMP32 | BPF_JSLT | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_sext_32_rd_rs(&rd, &rs, ctx); - emit(rv_blt(rd, rs, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSGE | BPF_X: case BPF_JMP32 | BPF_JSGE | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_sext_32_rd_rs(&rd, &rs, ctx); - emit(rv_bge(rd, rs, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSLE | BPF_X: case BPF_JMP32 | BPF_JSLE | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_sext_32_rd_rs(&rd, &rs, ctx); - emit(rv_bge(rs, rd, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSET | BPF_X: case BPF_JMP32 | BPF_JSET | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_zext_32_rd_rs(&rd, &rs, ctx); - emit(rv_and(RV_REG_T1, rd, rs), ctx); - emit(rv_bne(RV_REG_T1, RV_REG_ZERO, rvoff >> 1), ctx); + rvoff = rv_offset(i, off, ctx); + if (!is64) { + s = ctx->ninsns; + if (is_signed_bpf_cond(BPF_OP(code))) + emit_sext_32_rd_rs(&rd, &rs, ctx); + else + emit_zext_32_rd_rs(&rd, &rs, ctx); + e = ctx->ninsns; + + /* Adjust for extra insns */ + rvoff -= (e - s) << 2; + } + + if (BPF_OP(code) == BPF_JSET) { + /* Adjust for and */ + rvoff -= 4; + emit(rv_and(RV_REG_T1, rd, rs), ctx); + emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, + ctx); + } else { + emit_branch(BPF_OP(code), rd, rs, rvoff, ctx); + } break; /* IF (dst COND imm) JUMP off */ case BPF_JMP | BPF_JEQ | BPF_K: case BPF_JMP32 | BPF_JEQ | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_zext_32_rd_t1(&rd, ctx); - emit(rv_beq(rd, RV_REG_T1, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JGT | BPF_K: case BPF_JMP32 | BPF_JGT | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_zext_32_rd_t1(&rd, ctx); - emit(rv_bltu(RV_REG_T1, rd, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JLT | BPF_K: case BPF_JMP32 | BPF_JLT | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_zext_32_rd_t1(&rd, ctx); - emit(rv_bltu(rd, RV_REG_T1, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JGE | BPF_K: case BPF_JMP32 | BPF_JGE | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_zext_32_rd_t1(&rd, ctx); - emit(rv_bgeu(rd, RV_REG_T1, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JLE | BPF_K: case BPF_JMP32 | BPF_JLE | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_zext_32_rd_t1(&rd, ctx); - emit(rv_bgeu(RV_REG_T1, rd, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JNE | BPF_K: case BPF_JMP32 | BPF_JNE | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_zext_32_rd_t1(&rd, ctx); - emit(rv_bne(rd, RV_REG_T1, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSGT | BPF_K: case BPF_JMP32 | BPF_JSGT | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_sext_32_rd(&rd, ctx); - emit(rv_blt(RV_REG_T1, rd, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSLT | BPF_K: case BPF_JMP32 | BPF_JSLT | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_sext_32_rd(&rd, ctx); - emit(rv_blt(rd, RV_REG_T1, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSGE | BPF_K: case BPF_JMP32 | BPF_JSGE | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_sext_32_rd(&rd, ctx); - emit(rv_bge(rd, RV_REG_T1, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSLE | BPF_K: case BPF_JMP32 | BPF_JSLE | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_sext_32_rd(&rd, ctx); - emit(rv_bge(RV_REG_T1, rd, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP32 | BPF_JSET | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; + rvoff = rv_offset(i, off, ctx); + s = ctx->ninsns; emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_zext_32_rd_t1(&rd, ctx); - emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx); - emit(rv_bne(RV_REG_T1, RV_REG_ZERO, rvoff >> 1), ctx); + if (!is64) { + if (is_signed_bpf_cond(BPF_OP(code))) + emit_sext_32_rd(&rd, ctx); + else + emit_zext_32_rd_t1(&rd, ctx); + } + e = ctx->ninsns; + + /* Adjust for extra insns */ + rvoff -= (e - s) << 2; + + if (BPF_OP(code) == BPF_JSET) { + /* Adjust for and */ + rvoff -= 4; + emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx); + emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, + ctx); + } else { + emit_branch(BPF_OP(code), rd, RV_REG_T1, rvoff, ctx); + } break; /* function call */ case BPF_JMP | BPF_CALL: { bool fixed; - int i, ret; + int ret; u64 addr; mark_call(ctx); @@ -1215,20 +1240,9 @@ out_be: &fixed); if (ret < 0) return ret; - if (fixed) { - emit_imm(RV_REG_T1, addr, ctx); - } else { - i = ctx->ninsns; - emit_imm(RV_REG_T1, addr, ctx); - for (i = ctx->ninsns - i; i < 8; i++) { - /* nop */ - emit(rv_addi(RV_REG_ZERO, RV_REG_ZERO, 0), - ctx); - } - } - emit(rv_jalr(RV_REG_RA, RV_REG_T1, 0), ctx); - rd = bpf_to_rv_reg(BPF_REG_0, ctx); - emit(rv_addi(rd, RV_REG_A0, 0), ctx); + ret = emit_call(fixed, addr, ctx); + if (ret) + return ret; break; } /* tail call */ @@ -1243,9 +1257,7 @@ out_be: break; rvoff = epilogue_offset(ctx); - if (is_21b_check(rvoff, i)) - return -1; - emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx); + emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); break; /* dst = imm64 */ @@ -1426,6 +1438,10 @@ static void build_prologue(struct rv_jit_context *ctx) { int stack_adjust = 0, store_offset, bpf_stack_adjust; + bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); + if (bpf_stack_adjust) + mark_fp(ctx); + if (seen_reg(RV_REG_RA, ctx)) stack_adjust += 8; stack_adjust += 8; /* RV_REG_FP */ @@ -1443,7 +1459,6 @@ static void build_prologue(struct rv_jit_context *ctx) stack_adjust += 8; stack_adjust = round_up(stack_adjust, 16); - bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); stack_adjust += bpf_stack_adjust; store_offset = stack_adjust - 8; @@ -1502,10 +1517,10 @@ static void build_prologue(struct rv_jit_context *ctx) static void build_epilogue(struct rv_jit_context *ctx) { - __build_epilogue(RV_REG_RA, ctx); + __build_epilogue(false, ctx); } -static int build_body(struct rv_jit_context *ctx, bool extra_pass) +static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset) { const struct bpf_prog *prog = ctx->prog; int i; @@ -1517,12 +1532,12 @@ static int build_body(struct rv_jit_context *ctx, bool extra_pass) ret = emit_insn(insn, ctx, extra_pass); if (ret > 0) { i++; - if (ctx->insns == NULL) - ctx->offset[i] = ctx->ninsns; + if (offset) + offset[i] = ctx->ninsns; continue; } - if (ctx->insns == NULL) - ctx->offset[i] = ctx->ninsns; + if (offset) + offset[i] = ctx->ninsns; if (ret) return ret; } @@ -1548,9 +1563,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { bool tmp_blinded = false, extra_pass = false; struct bpf_prog *tmp, *orig_prog = prog; + int pass = 0, prev_ninsns = 0, i; struct rv_jit_data *jit_data; + unsigned int image_size = 0; struct rv_jit_context *ctx; - unsigned int image_size; if (!prog->jit_requested) return orig_prog; @@ -1587,33 +1603,59 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog = orig_prog; goto out_offset; } + for (i = 0; i < prog->len; i++) { + prev_ninsns += 32; + ctx->offset[i] = prev_ninsns; + } - /* First pass generates the ctx->offset, but does not emit an image. */ - if (build_body(ctx, extra_pass)) { - prog = orig_prog; - goto out_offset; + for (i = 0; i < 16; i++) { + pass++; + ctx->ninsns = 0; + if (build_body(ctx, extra_pass, ctx->offset)) { + prog = orig_prog; + goto out_offset; + } + build_prologue(ctx); + ctx->epilogue_offset = ctx->ninsns; + build_epilogue(ctx); + + if (ctx->ninsns == prev_ninsns) { + if (jit_data->header) + break; + + image_size = sizeof(u32) * ctx->ninsns; + jit_data->header = + bpf_jit_binary_alloc(image_size, + &jit_data->image, + sizeof(u32), + bpf_fill_ill_insns); + if (!jit_data->header) { + prog = orig_prog; + goto out_offset; + } + + ctx->insns = (u32 *)jit_data->image; + /* Now, when the image is allocated, the image + * can potentially shrink more (auipc/jalr -> + * jal). + */ + } + prev_ninsns = ctx->ninsns; } - build_prologue(ctx); - ctx->epilogue_offset = ctx->ninsns; - build_epilogue(ctx); - /* Allocate image, now that we know the size. */ - image_size = sizeof(u32) * ctx->ninsns; - jit_data->header = bpf_jit_binary_alloc(image_size, &jit_data->image, - sizeof(u32), - bpf_fill_ill_insns); - if (!jit_data->header) { + if (i == 16) { + pr_err("bpf-jit: image did not converge in <%d passes!\n", i); + bpf_jit_binary_free(jit_data->header); prog = orig_prog; goto out_offset; } - /* Second, real pass, that acutally emits the image. */ - ctx->insns = (u32 *)jit_data->image; skip_init_ctx: + pass++; ctx->ninsns = 0; build_prologue(ctx); - if (build_body(ctx, extra_pass)) { + if (build_body(ctx, extra_pass, NULL)) { bpf_jit_binary_free(jit_data->header); prog = orig_prog; goto out_offset; @@ -1621,7 +1663,7 @@ skip_init_ctx: build_epilogue(ctx); if (bpf_jit_enable > 1) - bpf_jit_dump(prog->len, image_size, 2, ctx->insns); + bpf_jit_dump(prog->len, image_size, pass, ctx->insns); prog->bpf_func = (void *)ctx->insns; prog->jited = 1; @@ -1641,3 +1683,16 @@ out: tmp : orig_prog); return prog; } + +void *bpf_jit_alloc_exec(unsigned long size) +{ + return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, + BPF_JIT_REGION_END, GFP_KERNEL, + PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + __builtin_return_address(0)); +} + +void bpf_jit_free_exec(void *addr) +{ + return vfree(addr); +} diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index d4051e88e625..734996784d03 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -30,7 +30,7 @@ config GENERIC_BUG_RELATIVE_POINTERS def_bool y config GENERIC_LOCKBREAK - def_bool y if PREEMPT + def_bool y if PREEMPTTION config PGSTE def_bool y if KVM @@ -110,7 +110,7 @@ config S390 select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANT_IPC_PARSE_VERSION - select BUILDTIME_EXTABLE_SORT + select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 select DYNAMIC_FTRACE if FUNCTION_TRACER select GENERIC_CLOCKEVENTS @@ -124,6 +124,7 @@ config S390 select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN + select HAVE_ARCH_KASAN_VMALLOC select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY @@ -162,13 +163,13 @@ config S390 select HAVE_PERF_USER_STACK_DUMP select HAVE_MEMBLOCK_NODE_MAP select HAVE_MEMBLOCK_PHYS_MAP - select HAVE_MMU_GATHER_NO_GATHER + select MMU_GATHER_NO_GATHER select HAVE_MOD_ARCH_SPECIFIC select HAVE_NOP_MCOUNT select HAVE_OPROFILE select HAVE_PCI select HAVE_PERF_EVENTS - select HAVE_RCU_TABLE_FREE + select MMU_GATHER_RCU_TABLE_FREE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE select HAVE_RSEQ diff --git a/arch/s390/Makefile b/arch/s390/Makefile index ba8556bb0fb1..e0e3a465bbfd 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -69,7 +69,7 @@ cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include # cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls -ifeq ($(call cc-option-yn,-mpacked-stack),y) +ifeq ($(call cc-option-yn,-mpacked-stack -mbackchain -msoft-float),y) cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK endif diff --git a/arch/s390/boot/compressed/decompressor.c b/arch/s390/boot/compressed/decompressor.c index 45046630c56a..368fd372c875 100644 --- a/arch/s390/boot/compressed/decompressor.c +++ b/arch/s390/boot/compressed/decompressor.c @@ -30,13 +30,13 @@ extern unsigned char _compressed_start[]; extern unsigned char _compressed_end[]; #ifdef CONFIG_HAVE_KERNEL_BZIP2 -#define HEAP_SIZE 0x400000 +#define BOOT_HEAP_SIZE 0x400000 #else -#define HEAP_SIZE 0x10000 +#define BOOT_HEAP_SIZE 0x10000 #endif static unsigned long free_mem_ptr = (unsigned long) _end; -static unsigned long free_mem_end_ptr = (unsigned long) _end + HEAP_SIZE; +static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE; #ifdef CONFIG_KERNEL_GZIP #include "../../../../lib/decompress_inflate.c" @@ -62,7 +62,7 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + HEAP_SIZE; #include "../../../../lib/decompress_unxz.c" #endif -#define decompress_offset ALIGN((unsigned long)_end + HEAP_SIZE, PAGE_SIZE) +#define decompress_offset ALIGN((unsigned long)_end + BOOT_HEAP_SIZE, PAGE_SIZE) unsigned long mem_safe_offset(void) { diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 4b86a8d3c121..dae10961d072 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -329,7 +329,7 @@ ENTRY(startup_kdump) .quad .Lduct # cr5: primary-aste origin .quad 0 # cr6: I/O interrupts .quad 0 # cr7: secondary space segment table - .quad 0 # cr8: access registers translation + .quad 0x0000000000008000 # cr8: access registers translation .quad 0 # cr9: tracing off .quad 0 # cr10: tracing off .quad 0 # cr11: tracing off diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 24ef67eb1cef..357adad991d2 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -14,6 +14,7 @@ char __bootdata(early_command_line)[COMMAND_LINE_SIZE]; struct ipl_parameter_block __bootdata_preserved(ipl_block); int __bootdata_preserved(ipl_block_valid); +unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL; unsigned long __bootdata(vmalloc_size) = VMALLOC_DEFAULT_SIZE; unsigned long __bootdata(memory_end); @@ -229,6 +230,19 @@ void parse_boot_command_line(void) if (!strcmp(param, "vmalloc") && val) vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE); + if (!strcmp(param, "dfltcc")) { + if (!strcmp(val, "off")) + zlib_dfltcc_support = ZLIB_DFLTCC_DISABLED; + else if (!strcmp(val, "on")) + zlib_dfltcc_support = ZLIB_DFLTCC_FULL; + else if (!strcmp(val, "def_only")) + zlib_dfltcc_support = ZLIB_DFLTCC_DEFLATE_ONLY; + else if (!strcmp(val, "inf_only")) + zlib_dfltcc_support = ZLIB_DFLTCC_INFLATE_ONLY; + else if (!strcmp(val, "always")) + zlib_dfltcc_support = ZLIB_DFLTCC_FULL_DEBUG; + } + if (!strcmp(param, "noexec")) { rc = kstrtobool(val, &enabled); if (!rc && !enabled) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index ead0b2c9881d..1c23d84a9097 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -72,19 +72,12 @@ static int setkey_fallback_cip(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); - int ret; sctx->fallback.cip->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; sctx->fallback.cip->base.crt_flags |= (tfm->crt_flags & CRYPTO_TFM_REQ_MASK); - ret = crypto_cipher_setkey(sctx->fallback.cip, in_key, key_len); - if (ret) { - tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm->crt_flags |= (sctx->fallback.cip->base.crt_flags & - CRYPTO_TFM_RES_MASK); - } - return ret; + return crypto_cipher_setkey(sctx->fallback.cip, in_key, key_len); } static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, @@ -182,18 +175,13 @@ static int setkey_fallback_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int len) { struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm); - int ret; crypto_skcipher_clear_flags(sctx->fallback.skcipher, CRYPTO_TFM_REQ_MASK); crypto_skcipher_set_flags(sctx->fallback.skcipher, crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); - ret = crypto_skcipher_setkey(sctx->fallback.skcipher, key, len); - crypto_skcipher_set_flags(tfm, - crypto_skcipher_get_flags(sctx->fallback.skcipher) & - CRYPTO_TFM_RES_MASK); - return ret; + return crypto_skcipher_setkey(sctx->fallback.skcipher, key, len); } static int fallback_skcipher_crypt(struct s390_aes_ctx *sctx, @@ -389,17 +377,12 @@ static int xts_fallback_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int len) { struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm); - int ret; crypto_skcipher_clear_flags(xts_ctx->fallback, CRYPTO_TFM_REQ_MASK); crypto_skcipher_set_flags(xts_ctx->fallback, crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); - ret = crypto_skcipher_setkey(xts_ctx->fallback, key, len); - crypto_skcipher_set_flags(tfm, - crypto_skcipher_get_flags(xts_ctx->fallback) & - CRYPTO_TFM_RES_MASK); - return ret; + return crypto_skcipher_setkey(xts_ctx->fallback, key, len); } static int xts_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, @@ -414,10 +397,8 @@ static int xts_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, return err; /* In fips mode only 128 bit or 256 bit keys are valid */ - if (fips_enabled && key_len != 32 && key_len != 64) { - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + if (fips_enabled && key_len != 32 && key_len != 64) return -EINVAL; - } /* Pick the correct function code based on the key length */ fc = (key_len == 32) ? CPACF_KM_XTS_128 : diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c index 423ee05887e6..fafecad20752 100644 --- a/arch/s390/crypto/crc32-vx.c +++ b/arch/s390/crypto/crc32-vx.c @@ -111,10 +111,8 @@ static int crc32_vx_setkey(struct crypto_shash *tfm, const u8 *newkey, { struct crc_ctx *mctx = crypto_shash_ctx(tfm); - if (newkeylen != sizeof(mctx->key)) { - crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + if (newkeylen != sizeof(mctx->key)) return -EINVAL; - } mctx->key = le32_to_cpu(*(__le32 *)newkey); return 0; } @@ -124,10 +122,8 @@ static int crc32be_vx_setkey(struct crypto_shash *tfm, const u8 *newkey, { struct crc_ctx *mctx = crypto_shash_ctx(tfm); - if (newkeylen != sizeof(mctx->key)) { - crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + if (newkeylen != sizeof(mctx->key)) return -EINVAL; - } mctx->key = be32_to_cpu(*(__be32 *)newkey); return 0; } diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index a3e7400e031c..6b07a2f1ce8a 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -43,10 +43,8 @@ static int ghash_setkey(struct crypto_shash *tfm, { struct ghash_ctx *ctx = crypto_shash_ctx(tfm); - if (keylen != GHASH_BLOCK_SIZE) { - crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + if (keylen != GHASH_BLOCK_SIZE) return -EINVAL; - } memcpy(ctx->key, key, GHASH_BLOCK_SIZE); diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index c7119c617b6e..e2a85783f804 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -151,11 +151,7 @@ static int ecb_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, if (rc) return rc; - if (__paes_set_key(ctx)) { - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - return 0; + return __paes_set_key(ctx); } static int ecb_paes_crypt(struct skcipher_request *req, unsigned long modifier) @@ -254,11 +250,7 @@ static int cbc_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, if (rc) return rc; - if (__cbc_paes_set_key(ctx)) { - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - return 0; + return __cbc_paes_set_key(ctx); } static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier) @@ -386,10 +378,9 @@ static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, if (rc) return rc; - if (__xts_paes_set_key(ctx)) { - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } + rc = __xts_paes_set_key(ctx); + if (rc) + return rc; /* * xts_check_key verifies the key length is not odd and makes @@ -526,11 +517,7 @@ static int ctr_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, if (rc) return rc; - if (__ctr_paes_set_key(ctx)) { - crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - return 0; + return __ctr_paes_set_key(ctx); } static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes) diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h index c67b82dfa558..de61ce562052 100644 --- a/arch/s390/include/asm/archrandom.h +++ b/arch/s390/include/asm/archrandom.h @@ -21,29 +21,17 @@ extern atomic64_t s390_arch_random_counter; bool s390_arch_random_generate(u8 *buf, unsigned int nbytes); -static inline bool arch_has_random(void) +static inline bool __must_check arch_get_random_long(unsigned long *v) { return false; } -static inline bool arch_has_random_seed(void) +static inline bool __must_check arch_get_random_int(unsigned int *v) { - if (static_branch_likely(&s390_arch_random_available)) - return true; return false; } -static inline bool arch_get_random_long(unsigned long *v) -{ - return false; -} - -static inline bool arch_get_random_int(unsigned int *v) -{ - return false; -} - -static inline bool arch_get_random_seed_long(unsigned long *v) +static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { if (static_branch_likely(&s390_arch_random_available)) { return s390_arch_random_generate((u8 *)v, sizeof(*v)); @@ -51,7 +39,7 @@ static inline bool arch_get_random_seed_long(unsigned long *v) return false; } -static inline bool arch_get_random_seed_int(unsigned int *v) +static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { if (static_branch_likely(&s390_arch_random_available)) { return s390_arch_random_generate((u8 *)v, sizeof(*v)); diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h index a2b11ac00f60..7725f8006fdf 100644 --- a/arch/s390/include/asm/bug.h +++ b/arch/s390/include/asm/bug.h @@ -10,15 +10,14 @@ #define __EMIT_BUG(x) do { \ asm_inline volatile( \ - "0: j 0b+2\n" \ - "1:\n" \ + "0: mc 0,0\n" \ ".section .rodata.str,\"aMS\",@progbits,1\n" \ - "2: .asciz \""__FILE__"\"\n" \ + "1: .asciz \""__FILE__"\"\n" \ ".previous\n" \ ".section __bug_table,\"awM\",@progbits,%2\n" \ - "3: .long 1b-3b,2b-3b\n" \ + "2: .long 0b-2b,1b-2b\n" \ " .short %0,%1\n" \ - " .org 3b+%2\n" \ + " .org 2b+%2\n" \ ".previous\n" \ : : "i" (__LINE__), \ "i" (x), \ @@ -29,12 +28,11 @@ #define __EMIT_BUG(x) do { \ asm_inline volatile( \ - "0: j 0b+2\n" \ - "1:\n" \ + "0: mc 0,0\n" \ ".section __bug_table,\"awM\",@progbits,%1\n" \ - "2: .long 1b-2b\n" \ + "1: .long 0b-1b\n" \ " .short %0\n" \ - " .org 2b+%1\n" \ + " .org 1b+%1\n" \ ".previous\n" \ : : "i" (x), \ "i" (sizeof(struct bug_entry))); \ diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index 63b46e30b2c3..9547cd5d6cdc 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -177,11 +177,7 @@ static inline void __user *compat_ptr(compat_uptr_t uptr) { return (void __user *)(unsigned long)(uptr & 0x7fffffffUL); } - -static inline compat_uptr_t ptr_to_compat(void __user *uptr) -{ - return (u32)(unsigned long)uptr; -} +#define compat_ptr(uptr) compat_ptr(uptr) #ifdef CONFIG_COMPAT diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 39f747d63758..dcb1bba4f406 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -10,7 +10,9 @@ #define JUMP_LABEL_NOP_SIZE 6 #define JUMP_LABEL_NOP_OFFSET 2 -#if __GNUC__ < 9 +#ifdef CONFIG_CC_IS_CLANG +#define JUMP_LABEL_STATIC_KEY_CONSTRAINT "i" +#elif __GNUC__ < 9 #define JUMP_LABEL_STATIC_KEY_CONSTRAINT "X" #else #define JUMP_LABEL_STATIC_KEY_CONSTRAINT "jdd" diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 02f4c21c57f6..11ecc4071a29 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -914,7 +914,6 @@ extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc); static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} -static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 3a06c264ea53..b05187ce5dbd 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -180,7 +180,7 @@ void zpci_remove_reserved_devices(void); /* CLP */ int clp_scan_pci_devices(void); int clp_rescan_pci_devices(void); -int clp_rescan_pci_devices_simple(void); +int clp_rescan_pci_devices_simple(u32 *fid); int clp_add_pci_device(u32, u32, int); int clp_enable_fh(struct zpci_dev *, u8); int clp_disable_fh(struct zpci_dev *); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 7b03037a8475..137a3920ca36 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -673,6 +673,7 @@ static inline int pud_none(pud_t pud) return pud_val(pud) == _REGION3_ENTRY_EMPTY; } +#define pud_leaf pud_large static inline int pud_large(pud_t pud) { if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3) @@ -690,6 +691,7 @@ static inline unsigned long pud_pfn(pud_t pud) return (pud_val(pud) & origin_mask) >> PAGE_SHIFT; } +#define pmd_leaf pmd_large static inline int pmd_large(pmd_t pmd) { return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index b5ea9e14c017..6ede29907fbf 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -130,11 +130,11 @@ static inline bool should_resched(int preempt_offset) #endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION extern asmlinkage void preempt_schedule(void); #define __preempt_schedule() preempt_schedule() extern asmlinkage void preempt_schedule_notrace(void); #define __preempt_schedule_notrace() preempt_schedule_notrace() -#endif /* CONFIG_PREEMPT */ +#endif /* CONFIG_PREEMPTION */ #endif /* __ASM_PREEMPT_H */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 6dc6c4fbc8e2..b241ddb67caf 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -27,7 +27,6 @@ #define MACHINE_FLAG_DIAG9C BIT(3) #define MACHINE_FLAG_ESOP BIT(4) #define MACHINE_FLAG_IDTE BIT(5) -#define MACHINE_FLAG_DIAG44 BIT(6) #define MACHINE_FLAG_EDAT1 BIT(7) #define MACHINE_FLAG_EDAT2 BIT(8) #define MACHINE_FLAG_TOPOLOGY BIT(10) @@ -80,6 +79,13 @@ struct parmarea { char command_line[ARCH_COMMAND_LINE_SIZE]; /* 0x10480 */ }; +extern unsigned int zlib_dfltcc_support; +#define ZLIB_DFLTCC_DISABLED 0 +#define ZLIB_DFLTCC_FULL 1 +#define ZLIB_DFLTCC_DEFLATE_ONLY 2 +#define ZLIB_DFLTCC_INFLATE_ONLY 3 +#define ZLIB_DFLTCC_FULL_DEBUG 4 + extern int noexec_disabled; extern int memory_end_set; extern unsigned long memory_end; @@ -94,7 +100,6 @@ extern unsigned long __swsusp_reset_dma; #define MACHINE_HAS_DIAG9C (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C) #define MACHINE_HAS_ESOP (S390_lowcore.machine_flags & MACHINE_FLAG_ESOP) #define MACHINE_HAS_IDTE (S390_lowcore.machine_flags & MACHINE_FLAG_IDTE) -#define MACHINE_HAS_DIAG44 (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44) #define MACHINE_HAS_EDAT1 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1) #define MACHINE_HAS_EDAT2 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2) #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 6da8885251d6..670f14a228e5 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -194,9 +194,9 @@ static inline unsigned long long get_tod_clock_monotonic(void) { unsigned long long tod; - preempt_disable(); + preempt_disable_notrace(); tod = get_tod_clock() - *(unsigned long long *) &tod_clock_base[1]; - preempt_enable(); + preempt_enable_notrace(); return tod; } diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index ef3c00b049ab..4093a2856929 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -86,7 +86,7 @@ static inline int share(unsigned long addr, u16 cmd) }; if (!is_prot_virt_guest()) - return -ENOTSUPP; + return -EOPNOTSUPP; /* * Sharing is page wise, if we encounter addresses that are * not page aligned, we assume something went wrong. If diff --git a/arch/s390/include/asm/vmalloc.h b/arch/s390/include/asm/vmalloc.h new file mode 100644 index 000000000000..3ba3a6bdca25 --- /dev/null +++ b/arch/s390/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_S390_VMALLOC_H +#define _ASM_S390_VMALLOC_H + +#endif /* _ASM_S390_VMALLOC_H */ diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index d306fe04489a..2c122d8bab93 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -195,6 +195,8 @@ void die(struct pt_regs *regs, const char *str) regs->int_code >> 17, ++die_counter); #ifdef CONFIG_PREEMPT pr_cont("PREEMPT "); +#elif defined(CONFIG_PREEMPT_RT) + pr_cont("PREEMPT_RT "); #endif pr_cont("SMP "); if (debug_pagealloc_enabled()) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index db32a55daaec..cd241ee66eff 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -204,21 +204,6 @@ static __init void detect_diag9c(void) S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C; } -static __init void detect_diag44(void) -{ - int rc; - - diag_stat_inc(DIAG_STAT_X044); - asm volatile( - " diag 0,0,0x44\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc"); - if (!rc) - S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44; -} - static __init void detect_machine_facilities(void) { if (test_facility(8)) { @@ -331,7 +316,6 @@ void __init startup_init(void) setup_arch_string(); setup_boot_command_line(); detect_diag9c(); - detect_diag44(); detect_machine_facilities(); save_vector_registers(); setup_topology(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 270d1d145761..9205add8481d 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -790,7 +790,7 @@ ENTRY(io_int_handler) .Lio_work: tm __PT_PSW+1(%r11),0x01 # returning to user ? jo .Lio_work_user # yes -> do resched & signal -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION # check for preemptive scheduling icm %r0,15,__LC_PREEMPT_COUNT jnz .Lio_restore # preemption is disabled diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index b2956d49b6ad..1d3927e01a5f 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -45,6 +45,7 @@ void specification_exception(struct pt_regs *regs); void transaction_exception(struct pt_regs *regs); void translation_exception(struct pt_regs *regs); void vector_exception(struct pt_regs *regs); +void monitor_event_exception(struct pt_regs *regs); void do_per_trap(struct pt_regs *regs); void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str); diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 9e1660a6b9db..f942341429b1 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -26,6 +26,12 @@ ENDPROC(ftrace_stub) #define STACK_PTREGS (STACK_FRAME_OVERHEAD) #define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) +#ifdef __PACK_STACK +/* allocate just enough for r14, r15 and backchain */ +#define TRACED_FUNC_FRAME_SIZE 24 +#else +#define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD +#endif ENTRY(_mcount) BR_EX %r14 @@ -35,13 +41,21 @@ EXPORT_SYMBOL(_mcount) ENTRY(ftrace_caller) .globl ftrace_regs_caller .set ftrace_regs_caller,ftrace_caller + stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller lgr %r1,%r15 #if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)) aghi %r0,MCOUNT_RETURN_FIXUP #endif - aghi %r15,-STACK_FRAME_SIZE + # allocate stack frame for ftrace_caller to contain traced function + aghi %r15,-TRACED_FUNC_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) + stg %r0,(__SF_GPRS+8*8)(%r15) + stg %r15,(__SF_GPRS+9*8)(%r15) + # allocate pt_regs and stack frame for ftrace_trace_function + aghi %r15,-STACK_FRAME_SIZE stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) + aghi %r1,-TRACED_FUNC_FRAME_SIZE + stg %r1,__SF_BACKCHAIN(%r15) stg %r0,(STACK_PTREGS_PSW+8)(%r15) stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index c07fdcd73726..b095b1c78987 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1303,18 +1303,28 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) */ if (flush_all && done) break; - - /* If an event overflow happened, discard samples by - * processing any remaining sample-data-blocks. - */ - if (event_overflow) - flush_all = 1; } /* Account sample overflows in the event hardware structure */ if (sampl_overflow) OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + sampl_overflow, 1 + num_sdb); + + /* Perf_event_overflow() and perf_event_account_interrupt() limit + * the interrupt rate to an upper limit. Roughly 1000 samples per + * task tick. + * Hitting this limit results in a large number + * of throttled REF_REPORT_THROTTLE entries and the samples + * are dropped. + * Slightly increase the interval to avoid hitting this limit. + */ + if (event_overflow) { + SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10); + debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n", + __func__, + DIV_ROUND_UP(SAMPL_RATE(hwc), 10)); + } + if (sampl_overflow || event_overflow) debug_sprintf_event(sfdbg, 4, "%s: " "overflows: sample %llu event %llu" @@ -1373,7 +1383,8 @@ static void aux_output_end(struct perf_output_handle *handle) te = aux_sdb_trailer(aux, aux->alert_mark); te->flags &= ~SDB_TE_ALERT_REQ_MASK; - debug_sprintf_event(sfdbg, 6, "%s: collect %#lx SDBs\n", __func__, i); + debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n", + __func__, i, range_scan, aux->head); } /* @@ -1406,13 +1417,17 @@ static int aux_output_begin(struct perf_output_handle *handle, * SDBs between aux->head and aux->empty_mark are already ready * for new data. range_scan is num of SDBs not within them. */ + debug_sprintf_event(sfdbg, 6, + "%s: range %ld head %ld alert %ld empty %ld\n", + __func__, range, aux->head, aux->alert_mark, + aux->empty_mark); if (range > AUX_SDB_NUM_EMPTY(aux)) { range_scan = range - AUX_SDB_NUM_EMPTY(aux); idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); - te->flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; - te->flags = te->flags & ~SDB_TE_ALERT_REQ_MASK; + te->flags &= ~(SDB_TE_BUFFER_FULL_MASK | + SDB_TE_ALERT_REQ_MASK); te->overflow = 0; } /* Save the position of empty SDBs */ @@ -1431,15 +1446,11 @@ static int aux_output_begin(struct perf_output_handle *handle, cpuhw->lsctl.tear = base + offset * sizeof(unsigned long); cpuhw->lsctl.dear = aux->sdb_index[head]; - debug_sprintf_event(sfdbg, 6, "%s: " - "head->alert_mark->empty_mark (num_alert, range)" - "[%#lx -> %#lx -> %#lx] (%#lx, %#lx) " - "tear index %#lx, tear %#lx dear %#lx\n", __func__, + debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld " + "index %ld tear %#lx dear %#lx\n", __func__, aux->head, aux->alert_mark, aux->empty_mark, - AUX_SDB_NUM_ALERT(aux), range, head / CPUM_SF_SDB_PER_TABLE, - cpuhw->lsctl.tear, - cpuhw->lsctl.dear); + cpuhw->lsctl.tear, cpuhw->lsctl.dear); return 0; } @@ -1459,8 +1470,7 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, te = aux_sdb_trailer(aux, alert_index); do { orig_flags = te->flags; - orig_overflow = te->overflow; - *overflow = orig_overflow; + *overflow = orig_overflow = te->overflow; if (orig_flags & SDB_TE_BUFFER_FULL_MASK) { /* * SDB is already set by hardware. @@ -1502,9 +1512,12 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, unsigned long long *overflow) { unsigned long long orig_overflow, orig_flags, new_flags; - unsigned long i, range_scan, idx; + unsigned long i, range_scan, idx, idx_old; struct hws_trailer_entry *te; + debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld " + "empty %ld\n", __func__, range, aux->head, + aux->alert_mark, aux->empty_mark); if (range <= AUX_SDB_NUM_EMPTY(aux)) /* * No need to scan. All SDBs in range are marked as empty. @@ -1527,7 +1540,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, * indicator fall into this range, set it. */ range_scan = range - AUX_SDB_NUM_EMPTY(aux); - idx = aux->empty_mark + 1; + idx_old = idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); do { @@ -1547,6 +1560,9 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, /* Update empty_mark to new position */ aux->empty_mark = aux->head + range - 1; + debug_sprintf_event(sfdbg, 6, "%s: range_scan %ld idx %ld..%ld " + "empty %ld\n", __func__, range_scan, idx_old, + idx - 1, aux->empty_mark); return true; } @@ -1560,7 +1576,6 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) unsigned long range = 0, size; unsigned long long overflow = 0; struct perf_output_handle *handle = &cpuhw->handle; - unsigned long num_sdb; aux = perf_get_aux(handle); if (WARN_ON_ONCE(!aux)) @@ -1568,8 +1583,9 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) /* Inform user space new data arrived */ size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT; + debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__, + size >> PAGE_SHIFT); perf_aux_output_end(handle, size); - num_sdb = aux->sfb.num_sdb; while (!done) { /* Get an output handle */ @@ -1577,7 +1593,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) if (handle->size == 0) { pr_err("The AUX buffer with %lu pages for the " "diagnostic-sampling mode is full\n", - num_sdb); + aux->sfb.num_sdb); debug_sprintf_event(sfdbg, 1, "%s: AUX buffer used up\n", __func__); @@ -1602,14 +1618,14 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) size = range << PAGE_SHIFT; perf_aux_output_end(&cpuhw->handle, size); pr_err("Sample data caused the AUX buffer with %lu " - "pages to overflow\n", num_sdb); - debug_sprintf_event(sfdbg, 1, "%s: head %#lx range %#lx " - "overflow %#llx\n", __func__, + "pages to overflow\n", aux->sfb.num_sdb); + debug_sprintf_event(sfdbg, 1, "%s: head %ld range %ld " + "overflow %lld\n", __func__, aux->head, range, overflow); } else { size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT; perf_aux_output_end(&cpuhw->handle, size); - debug_sprintf_event(sfdbg, 6, "%s: head %#lx alert %#lx " + debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld " "already full, try another\n", __func__, aux->head, aux->alert_mark); @@ -1617,11 +1633,9 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) } if (done) - debug_sprintf_event(sfdbg, 6, "%s: aux_reset_buffer " - "[%#lx -> %#lx -> %#lx] (%#lx, %#lx)\n", - __func__, aux->head, aux->alert_mark, - aux->empty_mark, AUX_SDB_NUM_ALERT(aux), - range); + debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld " + "empty %ld\n", __func__, aux->head, + aux->alert_mark, aux->empty_mark); } /* @@ -1644,8 +1658,7 @@ static void aux_buffer_free(void *data) kfree(aux->sdb_index); kfree(aux); - debug_sprintf_event(sfdbg, 4, "%s: free " - "%lu SDBTs\n", __func__, num_sdbt); + debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu\n", __func__, num_sdbt); } static void aux_sdb_init(unsigned long sdb) @@ -1697,13 +1710,13 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages, } /* Allocate aux_buffer struct for the event */ - aux = kmalloc(sizeof(struct aux_buffer), GFP_KERNEL); + aux = kzalloc(sizeof(struct aux_buffer), GFP_KERNEL); if (!aux) goto no_aux; sfb = &aux->sfb; /* Allocate sdbt_index for fast reference */ - n_sdbt = (nr_pages + CPUM_SF_SDB_PER_TABLE - 1) / CPUM_SF_SDB_PER_TABLE; + n_sdbt = DIV_ROUND_UP(nr_pages, CPUM_SF_SDB_PER_TABLE); aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL); if (!aux->sdbt_index) goto no_sdbt_index; @@ -1753,8 +1766,8 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages, */ aux->empty_mark = sfb->num_sdb - 1; - debug_sprintf_event(sfdbg, 4, "%s: setup %lu SDBTs and %lu SDBs\n", - __func__, sfb->num_sdbt, sfb->num_sdb); + debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu SDBs %lu\n", __func__, + sfb->num_sdbt, sfb->num_sdb); return aux; diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S index 59dee9d3bebf..eee3a482195a 100644 --- a/arch/s390/kernel/pgm_check.S +++ b/arch/s390/kernel/pgm_check.S @@ -81,7 +81,7 @@ PGM_CHECK_DEFAULT /* 3c */ PGM_CHECK_DEFAULT /* 3d */ PGM_CHECK_DEFAULT /* 3e */ PGM_CHECK_DEFAULT /* 3f */ -PGM_CHECK_DEFAULT /* 40 */ +PGM_CHECK(monitor_event_exception) /* 40 */ PGM_CHECK_DEFAULT /* 41 */ PGM_CHECK_DEFAULT /* 42 */ PGM_CHECK_DEFAULT /* 43 */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 9cbf490fd162..b2c2f75860e8 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -111,6 +111,8 @@ unsigned long __bootdata_preserved(__etext_dma); unsigned long __bootdata_preserved(__sdma); unsigned long __bootdata_preserved(__edma); unsigned long __bootdata_preserved(__kaslr_offset); +unsigned int __bootdata_preserved(zlib_dfltcc_support); +EXPORT_SYMBOL(zlib_dfltcc_support); unsigned long VMALLOC_START; EXPORT_SYMBOL(VMALLOC_START); @@ -241,8 +243,6 @@ static void __init conmode_default(void) SET_CONSOLE_SCLP; #endif } - if (IS_ENABLED(CONFIG_VT) && IS_ENABLED(CONFIG_DUMMY_CONSOLE)) - conswitchp = &dummy_con; } #ifdef CONFIG_CRASH_DUMP @@ -761,14 +761,6 @@ static void __init free_mem_detect_info(void) memblock_free(start, size); } -static void __init memblock_physmem_add(phys_addr_t start, phys_addr_t size) -{ - memblock_dbg("memblock_physmem_add: [%#016llx-%#016llx]\n", - start, start + size - 1); - memblock_add_range(&memblock.memory, start, size, 0, 0); - memblock_add_range(&memblock.physmem, start, size, 0, 0); -} - static const char * __init get_mem_info_source(void) { switch (mem_detect.info_source) { @@ -793,8 +785,10 @@ static void __init memblock_add_mem_detect_info(void) get_mem_info_source(), mem_detect.info_source); /* keep memblock lists close to the kernel */ memblock_set_bottom_up(true); - for_each_mem_detect_block(i, &start, &end) + for_each_mem_detect_block(i, &start, &end) { + memblock_add(start, end - start); memblock_physmem_add(start, end - start); + } memblock_set_bottom_up(false); memblock_dump_all(); } @@ -1052,7 +1046,7 @@ static void __init log_component_list(void) if (!early_ipl_comp_list_addr) return; - if (ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR) + if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL) pr_info("Linux is running with Secure-IPL enabled\n"); else pr_info("Linux is running with Secure-IPL disabled\n"); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2794cad9312e..a08bd2522dd9 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -413,14 +413,11 @@ EXPORT_SYMBOL(arch_vcpu_is_preempted); void smp_yield_cpu(int cpu) { - if (MACHINE_HAS_DIAG9C) { - diag_stat_inc_norecursion(DIAG_STAT_X09C); - asm volatile("diag %0,0,0x9c" - : : "d" (pcpu_devices[cpu].address)); - } else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) { - diag_stat_inc_norecursion(DIAG_STAT_X044); - asm volatile("diag 0,0,0x44"); - } + if (!MACHINE_HAS_DIAG9C) + return; + diag_stat_inc_norecursion(DIAG_STAT_X09C); + asm volatile("diag %0,0,0x9c" + : : "d" (pcpu_devices[cpu].address)); } /* diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 3054e9c035a3..bd7bd3581a0f 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -438,3 +438,5 @@ 433 common fspick sys_fspick sys_fspick 434 common pidfd_open sys_pidfd_open sys_pidfd_open 435 common clone3 sys_clone3 sys_clone3 +437 common openat2 sys_openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 164c0282b41a..dc75588d7894 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -53,11 +53,6 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) if (fixup) regs->psw.addr = extable_fixup(fixup); else { - enum bug_trap_type btt; - - btt = report_bug(regs->psw.addr, regs); - if (btt == BUG_TRAP_TYPE_WARN) - return; die(regs, str); } } @@ -245,6 +240,27 @@ void space_switch_exception(struct pt_regs *regs) do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event"); } +void monitor_event_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + + if (user_mode(regs)) + return; + + switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) { + case BUG_TRAP_TYPE_NONE: + fixup = s390_search_extables(regs->psw.addr); + if (fixup) + regs->psw.addr = extable_fixup(fixup); + break; + case BUG_TRAP_TYPE_WARN: + break; + case BUG_TRAP_TYPE_BUG: + die(regs, "monitor event"); + break; + } +} + void kernel_stack_overflow(struct pt_regs *regs) { bust_spinlocks(1); @@ -255,8 +271,23 @@ void kernel_stack_overflow(struct pt_regs *regs) } NOKPROBE_SYMBOL(kernel_stack_overflow); +static void test_monitor_call(void) +{ + int val = 1; + + asm volatile( + " mc 0,0\n" + "0: xgr %0,%0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (val)); + if (!val) + panic("Monitor call doesn't work!\n"); +} + void __init trap_init(void) { sort_extable(__start_dma_ex_table, __stop_dma_ex_table); local_mcck_enable(); + test_monitor_call(); } diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c index da2d4d4c5b0e..707fd99f6734 100644 --- a/arch/s390/kernel/unwind_bc.c +++ b/arch/s390/kernel/unwind_bc.c @@ -36,10 +36,17 @@ static bool update_stack_info(struct unwind_state *state, unsigned long sp) return true; } -static inline bool is_task_pt_regs(struct unwind_state *state, - struct pt_regs *regs) +static inline bool is_final_pt_regs(struct unwind_state *state, + struct pt_regs *regs) { - return task_pt_regs(state->task) == regs; + /* user mode or kernel thread pt_regs at the bottom of task stack */ + if (task_pt_regs(state->task) == regs) + return true; + + /* user mode pt_regs at the bottom of irq stack */ + return state->stack_info.type == STACK_TYPE_IRQ && + state->stack_info.end - sizeof(struct pt_regs) == (unsigned long)regs && + READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE; } bool unwind_next_frame(struct unwind_state *state) @@ -80,7 +87,7 @@ bool unwind_next_frame(struct unwind_state *state) if (!on_stack(info, sp, sizeof(struct pt_regs))) goto out_err; regs = (struct pt_regs *) sp; - if (is_task_pt_regs(state, regs)) + if (is_final_pt_regs(state, regs)) goto out_stop; ip = READ_ONCE_NOCHECK(regs->psw.addr); sp = READ_ONCE_NOCHECK(regs->gprs[15]); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d9e6bf3d54f0..8646c99217f2 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2530,9 +2530,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) if (vcpu->kvm->arch.use_cmma) kvm_s390_vcpu_unsetup_cmma(vcpu); free_page((unsigned long)(vcpu->arch.sie_block)); - - kvm_vcpu_uninit(vcpu); - kmem_cache_free(kvm_vcpu_cache, vcpu); } static void kvm_free_vcpus(struct kvm *kvm) @@ -2541,7 +2538,7 @@ static void kvm_free_vcpus(struct kvm *kvm) struct kvm_vcpu *vcpu; kvm_for_each_vcpu(i, vcpu, kvm) - kvm_arch_vcpu_destroy(vcpu); + kvm_vcpu_destroy(vcpu); mutex_lock(&kvm->lock); for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) @@ -2703,39 +2700,6 @@ static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; } -int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) -{ - vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; - kvm_clear_async_pf_completion_queue(vcpu); - vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | - KVM_SYNC_GPRS | - KVM_SYNC_ACRS | - KVM_SYNC_CRS | - KVM_SYNC_ARCH0 | - KVM_SYNC_PFAULT; - kvm_s390_set_prefix(vcpu, 0); - if (test_kvm_facility(vcpu->kvm, 64)) - vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; - if (test_kvm_facility(vcpu->kvm, 82)) - vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; - if (test_kvm_facility(vcpu->kvm, 133)) - vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; - if (test_kvm_facility(vcpu->kvm, 156)) - vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; - /* fprs can be synchronized via vrs, even if the guest has no vx. With - * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. - */ - if (MACHINE_HAS_VX) - vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; - else - vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; - - if (kvm_is_ucontrol(vcpu->kvm)) - return __kvm_ucontrol_vcpu_init(vcpu); - - return 0; -} - /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) { @@ -2962,7 +2926,7 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; } -int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) { int rc = 0; @@ -3035,26 +2999,22 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) return rc; } -struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, - unsigned int id) +int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) { - struct kvm_vcpu *vcpu; - struct sie_page *sie_page; - int rc = -EINVAL; - if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) - goto out; - - rc = -ENOMEM; + return -EINVAL; + return 0; +} - vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu) - goto out; +int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) +{ + struct sie_page *sie_page; + int rc; BUILD_BUG_ON(sizeof(struct sie_page) != 4096); sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); if (!sie_page) - goto out_free_cpu; + return -ENOMEM; vcpu->arch.sie_block = &sie_page->sie_block; vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; @@ -3063,27 +3023,59 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block->mso = 0; vcpu->arch.sie_block->msl = sclp.hamax; - vcpu->arch.sie_block->icpua = id; + vcpu->arch.sie_block->icpua = vcpu->vcpu_id; spin_lock_init(&vcpu->arch.local_int.lock); - vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin; + vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin; if (vcpu->arch.sie_block->gd && sclp.has_gisaf) vcpu->arch.sie_block->gd |= GISA_FORMAT1; seqcount_init(&vcpu->arch.cputm_seqcount); - rc = kvm_vcpu_init(vcpu, kvm, id); + vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + kvm_clear_async_pf_completion_queue(vcpu); + vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | + KVM_SYNC_GPRS | + KVM_SYNC_ACRS | + KVM_SYNC_CRS | + KVM_SYNC_ARCH0 | + KVM_SYNC_PFAULT; + kvm_s390_set_prefix(vcpu, 0); + if (test_kvm_facility(vcpu->kvm, 64)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; + if (test_kvm_facility(vcpu->kvm, 82)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; + if (test_kvm_facility(vcpu->kvm, 133)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; + if (test_kvm_facility(vcpu->kvm, 156)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; + /* fprs can be synchronized via vrs, even if the guest has no vx. With + * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. + */ + if (MACHINE_HAS_VX) + vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; + else + vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; + + if (kvm_is_ucontrol(vcpu->kvm)) { + rc = __kvm_ucontrol_vcpu_init(vcpu); + if (rc) + goto out_free_sie_block; + } + + VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", + vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); + trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); + + rc = kvm_s390_vcpu_setup(vcpu); if (rc) - goto out_free_sie_block; - VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu, - vcpu->arch.sie_block); - trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); + goto out_ucontrol_uninit; + return 0; - return vcpu; +out_ucontrol_uninit: + if (kvm_is_ucontrol(vcpu->kvm)) + gmap_remove(vcpu->arch.gmap); out_free_sie_block: free_page((unsigned long)(vcpu->arch.sie_block)); -out_free_cpu: - kmem_cache_free(kvm_vcpu_cache, vcpu); -out: - return ERR_PTR(rc); + return rc; } int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index ce1e4bbe53aa..9b2dab5a69f9 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -242,7 +242,6 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp) void arch_spin_lock_wait(arch_spinlock_t *lp) { - /* Use classic spinlocks + niai if the steal time is >= 10% */ if (test_cpu_flag(CIF_DEDICATED_CPU)) arch_spin_lock_queued(lp); else diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index bda7ac0ddd29..32b7a30b2485 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -238,7 +238,7 @@ static int test_unwind_irq(struct unwindme *u) { preempt_disable(); if (register_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler)) { - pr_info("Couldn't reqister external interrupt handler"); + pr_info("Couldn't register external interrupt handler"); return -1; } u->task = current; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index f0ce22220565..ac44bd76db4b 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -292,10 +292,8 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); vmem_remove_mapping(start, size); } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 460f25572940..06345616a646 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -82,7 +82,8 @@ static pte_t * __init kasan_early_pte_alloc(void) enum populate_mode { POPULATE_ONE2ONE, POPULATE_MAP, - POPULATE_ZERO_SHADOW + POPULATE_ZERO_SHADOW, + POPULATE_SHALLOW }; static void __init kasan_early_vmemmap_populate(unsigned long address, unsigned long end, @@ -116,6 +117,12 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, pgd_populate(&init_mm, pg_dir, p4_dir); } + if (IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) && + mode == POPULATE_SHALLOW) { + address = (address + P4D_SIZE) & P4D_MASK; + continue; + } + p4_dir = p4d_offset(pg_dir, address); if (p4d_none(*p4_dir)) { if (mode == POPULATE_ZERO_SHADOW && @@ -130,6 +137,12 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, p4d_populate(&init_mm, p4_dir, pu_dir); } + if (!IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) && + mode == POPULATE_SHALLOW) { + address = (address + PUD_SIZE) & PUD_MASK; + continue; + } + pu_dir = pud_offset(p4_dir, address); if (pud_none(*pu_dir)) { if (mode == POPULATE_ZERO_SHADOW && @@ -195,6 +208,9 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, page = kasan_early_shadow_page; pte_val(*pt_dir) = __pa(page) | pgt_prot_zero; break; + case POPULATE_SHALLOW: + /* should never happen */ + break; } } address += PAGE_SIZE; @@ -313,22 +329,50 @@ void __init kasan_early_init(void) init_mm.pgd = early_pg_dir; /* * Current memory layout: - * +- 0 -------------+ +- shadow start -+ - * | 1:1 ram mapping | /| 1/8 ram | - * +- end of ram ----+ / +----------------+ - * | ... gap ... |/ | kasan | - * +- shadow start --+ | zero | - * | 1/8 addr space | | page | - * +- shadow end -+ | mapping | - * | ... gap ... |\ | (untracked) | - * +- modules vaddr -+ \ +----------------+ - * | 2Gb | \| unmapped | allocated per module - * +-----------------+ +- shadow end ---+ + * +- 0 -------------+ +- shadow start -+ + * | 1:1 ram mapping | /| 1/8 ram | + * | | / | | + * +- end of ram ----+ / +----------------+ + * | ... gap ... | / | | + * | |/ | kasan | + * +- shadow start --+ | zero | + * | 1/8 addr space | | page | + * +- shadow end -+ | mapping | + * | ... gap ... |\ | (untracked) | + * +- vmalloc area -+ \ | | + * | vmalloc_size | \ | | + * +- modules vaddr -+ \ +----------------+ + * | 2Gb | \| unmapped | allocated per module + * +-----------------+ +- shadow end ---+ + * + * Current memory layout (KASAN_VMALLOC): + * +- 0 -------------+ +- shadow start -+ + * | 1:1 ram mapping | /| 1/8 ram | + * | | / | | + * +- end of ram ----+ / +----------------+ + * | ... gap ... | / | kasan | + * | |/ | zero | + * +- shadow start --+ | page | + * | 1/8 addr space | | mapping | + * +- shadow end -+ | (untracked) | + * | ... gap ... |\ | | + * +- vmalloc area -+ \ +- vmalloc area -+ + * | vmalloc_size | \ |shallow populate| + * +- modules vaddr -+ \ +- modules area -+ + * | 2Gb | \|shallow populate| + * +-----------------+ +- shadow end ---+ */ /* populate kasan shadow (for identity mapping and zero page mapping) */ kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP); if (IS_ENABLED(CONFIG_MODULES)) untracked_mem_end = vmax - MODULES_LEN; + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + untracked_mem_end = vmax - vmalloc_size - MODULES_LEN; + /* shallowly populate kasan shadow for vmalloc and modules */ + kasan_early_vmemmap_populate(__sha(untracked_mem_end), + __sha(vmax), POPULATE_SHALLOW); + } + /* populate kasan shadow for untracked memory */ kasan_early_vmemmap_populate(__sha(max_physmem_end), __sha(untracked_mem_end), POPULATE_ZERO_SHADOW); diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 8e872951c07b..bc61ea18e88d 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -939,5 +939,5 @@ subsys_initcall_sync(pci_base_init); void zpci_rescan(void) { if (zpci_is_enabled()) - clp_rescan_pci_devices_simple(); + clp_rescan_pci_devices_simple(NULL); } diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 4c613e569fe0..0d3d8f170ea4 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -240,12 +240,14 @@ error: } /* - * Enable/Disable a given PCI function defined by its function handle. + * Enable/Disable a given PCI function and update its function handle if + * necessary */ -static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) +static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command) { struct clp_req_rsp_set_pci *rrb; int rc, retries = 100; + u32 fid = zdev->fid; rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) @@ -256,7 +258,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) rrb->request.hdr.len = sizeof(rrb->request); rrb->request.hdr.cmd = CLP_SET_PCI_FN; rrb->response.hdr.len = sizeof(rrb->response); - rrb->request.fh = *fh; + rrb->request.fh = zdev->fh; rrb->request.oc = command; rrb->request.ndas = nr_dma_as; @@ -269,12 +271,17 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) } } while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY); - if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) - *fh = rrb->response.fh; - else { + if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { zpci_err("Set PCI FN:\n"); zpci_err_clp(rrb->response.hdr.rsp, rc); - rc = -EIO; + } + + if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { + zdev->fh = rrb->response.fh; + } else if (!rc && rrb->response.hdr.rsp == CLP_RC_SETPCIFN_ALRDY && + rrb->response.fh == 0) { + /* Function is already in desired state - update handle */ + rc = clp_rescan_pci_devices_simple(&fid); } clp_free_block(rrb); return rc; @@ -282,18 +289,17 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as) { - u32 fh = zdev->fh; int rc; - rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN); - zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc); + rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_PCI_FN); + zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); if (rc) goto out; - zdev->fh = fh; if (zpci_use_mio(zdev)) { - rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_MIO); - zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc); + rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_MIO); + zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", + zdev->fid, zdev->fh, rc); if (rc) clp_disable_fh(zdev); } @@ -309,11 +315,8 @@ int clp_disable_fh(struct zpci_dev *zdev) if (!zdev_enabled(zdev)) return 0; - rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN); + rc = clp_set_pci_fn(zdev, 0, CLP_SET_DISABLE_PCI_FN); zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc); - if (!rc) - zdev->fh = fh; - return rc; } @@ -370,10 +373,14 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) static void __clp_update(struct clp_fh_list_entry *entry, void *data) { struct zpci_dev *zdev; + u32 *fid = data; if (!entry->vendor_id) return; + if (fid && *fid != entry->fid) + return; + zdev = get_zdev_by_fid(entry->fid); if (!zdev) return; @@ -413,7 +420,10 @@ int clp_rescan_pci_devices(void) return rc; } -int clp_rescan_pci_devices_simple(void) +/* Rescan PCI functions and refresh function handles. If fid is non-NULL only + * refresh the handle of the function matching @fid + */ +int clp_rescan_pci_devices_simple(u32 *fid) { struct clp_req_rsp_list_pci *rrb; int rc; @@ -422,7 +432,7 @@ int clp_rescan_pci_devices_simple(void) if (!rrb) return -ENOMEM; - rc = clp_list_pci(rrb, NULL, __clp_update); + rc = clp_list_pci(rrb, fid, __clp_update); clp_free_block(rrb); return rc; diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index a433ba01a317..215f17437a4f 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -13,6 +13,8 @@ #include <linux/stat.h> #include <linux/pci.h> +#include "../../../drivers/pci/pci.h" + #include <asm/sclp.h> #define zpci_attr(name, fmt, member) \ @@ -49,31 +51,50 @@ static DEVICE_ATTR_RO(mio_enabled); static ssize_t recover_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { + struct kernfs_node *kn; struct pci_dev *pdev = to_pci_dev(dev); struct zpci_dev *zdev = to_zpci(pdev); - int ret; - - if (!device_remove_file_self(dev, attr)) - return count; - + int ret = 0; + + /* Can't use device_remove_self() here as that would lead us to lock + * the pci_rescan_remove_lock while holding the device' kernfs lock. + * This would create a possible deadlock with disable_slot() which is + * not directly protected by the device' kernfs lock but takes it + * during the device removal which happens under + * pci_rescan_remove_lock. + * + * This is analogous to sdev_store_delete() in + * drivers/scsi/scsi_sysfs.c + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + WARN_ON_ONCE(!kn); + /* device_remove_file() serializes concurrent calls ignoring all but + * the first + */ + device_remove_file(dev, attr); + + /* A concurrent call to recover_store() may slip between + * sysfs_break_active_protection() and the sysfs file removal. + * Once it unblocks from pci_lock_rescan_remove() the original pdev + * will already be removed. + */ pci_lock_rescan_remove(); - pci_stop_and_remove_bus_device(pdev); - ret = zpci_disable_device(zdev); - if (ret) - goto error; - - ret = zpci_enable_device(zdev); - if (ret) - goto error; - - pci_rescan_bus(zdev->bus); + if (pci_dev_is_added(pdev)) { + pci_stop_and_remove_bus_device(pdev); + ret = zpci_disable_device(zdev); + if (ret) + goto out; + + ret = zpci_enable_device(zdev); + if (ret) + goto out; + pci_rescan_bus(zdev->bus); + } +out: pci_unlock_rescan_remove(); - - return count; - -error: - pci_unlock_rescan_remove(); - return ret; + if (kn) + sysfs_unbreak_active_protection(kn); + return ret ? ret : count; } static DEVICE_ATTR_WO(recover); diff --git a/arch/s390/purgatory/.gitignore b/arch/s390/purgatory/.gitignore index 04a03433c720..c82157f46b18 100644 --- a/arch/s390/purgatory/.gitignore +++ b/arch/s390/purgatory/.gitignore @@ -1,3 +1,4 @@ purgatory +purgatory.chk purgatory.lds purgatory.ro diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index bc0d7a0d0394..c57f8c40e992 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -4,7 +4,7 @@ OBJECT_FILES_NON_STANDARD := y purgatory-y := head.o purgatory.o string.o sha256.o mem.o -targets += $(purgatory-y) purgatory.lds purgatory purgatory.ro +targets += $(purgatory-y) purgatory.lds purgatory purgatory.chk purgatory.ro PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE @@ -15,8 +15,10 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) -$(obj)/string.o: $(srctree)/arch/s390/lib/string.c FORCE - $(call if_changed_rule,cc_o_c) +KCOV_INSTRUMENT := n +GCOV_PROFILE := n +UBSAN_SANITIZE := n +KASAN_SANITIZE := n KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare @@ -26,15 +28,22 @@ KBUILD_CFLAGS += $(CLANG_FLAGS) KBUILD_CFLAGS += $(call cc-option,-fno-PIE) KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS)) -LDFLAGS_purgatory := -r --no-undefined -nostdlib -z nodefaultlib -T +# Since we link purgatory with -r unresolved symbols are not checked, so we +# also link a purgatory.chk binary without -r to check for unresolved symbols. +PURGATORY_LDFLAGS := -nostdlib -z nodefaultlib +LDFLAGS_purgatory := -r $(PURGATORY_LDFLAGS) -T +LDFLAGS_purgatory.chk := -e purgatory_start $(PURGATORY_LDFLAGS) $(obj)/purgatory: $(obj)/purgatory.lds $(PURGATORY_OBJS) FORCE $(call if_changed,ld) +$(obj)/purgatory.chk: $(obj)/purgatory FORCE + $(call if_changed,ld) + OBJCOPYFLAGS_purgatory.ro := -O elf64-s390 OBJCOPYFLAGS_purgatory.ro += --remove-section='*debug*' OBJCOPYFLAGS_purgatory.ro += --remove-section='.comment' OBJCOPYFLAGS_purgatory.ro += --remove-section='.note.*' -$(obj)/purgatory.ro: $(obj)/purgatory FORCE +$(obj)/purgatory.ro: $(obj)/purgatory $(obj)/purgatory.chk FORCE $(call if_changed,objcopy) $(obj)/kexec-purgatory.o: $(obj)/kexec-purgatory.S $(obj)/purgatory.ro FORCE diff --git a/arch/s390/purgatory/string.c b/arch/s390/purgatory/string.c new file mode 100644 index 000000000000..c98c22a72db7 --- /dev/null +++ b/arch/s390/purgatory/string.c @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: GPL-2.0 +#define __HAVE_ARCH_MEMCMP /* arch function */ +#include "../lib/string.c" diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index f356ee674d89..9ece111b0254 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -108,7 +108,7 @@ config GENERIC_CALIBRATE_DELAY config GENERIC_LOCKBREAK def_bool y - depends on SMP && PREEMPT + depends on SMP && PREEMPTION config ARCH_SUSPEND_POSSIBLE def_bool n diff --git a/arch/sh/boards/board-sh7785lcr.c b/arch/sh/boards/board-sh7785lcr.c index d964c4d6b139..77dad1e511b4 100644 --- a/arch/sh/boards/board-sh7785lcr.c +++ b/arch/sh/boards/board-sh7785lcr.c @@ -341,7 +341,7 @@ static void __init sh7785lcr_setup(char **cmdline_p) pm_power_off = sh7785lcr_power_off; /* sm501 DRAM configuration */ - sm501_reg = ioremap_nocache(SM107_REG_ADDR, SM501_DRAM_CONTROL); + sm501_reg = ioremap(SM107_REG_ADDR, SM501_DRAM_CONTROL); if (!sm501_reg) { printk(KERN_ERR "%s: ioremap error.\n", __func__); return; diff --git a/arch/sh/boards/mach-cayman/irq.c b/arch/sh/boards/mach-cayman/irq.c index 9108789fafef..3b6ea2d99013 100644 --- a/arch/sh/boards/mach-cayman/irq.c +++ b/arch/sh/boards/mach-cayman/irq.c @@ -137,7 +137,7 @@ void init_cayman_irq(void) { int i; - epld_virt = (unsigned long)ioremap_nocache(EPLD_BASE, 1024); + epld_virt = (unsigned long)ioremap(EPLD_BASE, 1024); if (!epld_virt) { printk(KERN_ERR "Cayman IRQ: Unable to remap EPLD\n"); return; diff --git a/arch/sh/boards/mach-cayman/setup.c b/arch/sh/boards/mach-cayman/setup.c index 4cec14700adc..8ef76e288da0 100644 --- a/arch/sh/boards/mach-cayman/setup.c +++ b/arch/sh/boards/mach-cayman/setup.c @@ -99,7 +99,7 @@ static int __init smsc_superio_setup(void) { unsigned char devid, devrev; - smsc_superio_virt = (unsigned long)ioremap_nocache(SMSC_SUPERIO_BASE, 1024); + smsc_superio_virt = (unsigned long)ioremap(SMSC_SUPERIO_BASE, 1024); if (!smsc_superio_virt) { panic("Unable to remap SMSC SuperIO\n"); } diff --git a/arch/sh/boards/mach-sdk7786/fpga.c b/arch/sh/boards/mach-sdk7786/fpga.c index 895576ff8376..a37e1e88c6b1 100644 --- a/arch/sh/boards/mach-sdk7786/fpga.c +++ b/arch/sh/boards/mach-sdk7786/fpga.c @@ -32,7 +32,7 @@ static void __iomem *sdk7786_fpga_probe(void) * is reserved. */ for (area = PA_AREA0; area < PA_AREA7; area += SZ_64M) { - base = ioremap_nocache(area + FPGA_REGS_OFFSET, FPGA_REGS_SIZE); + base = ioremap(area + FPGA_REGS_OFFSET, FPGA_REGS_SIZE); if (!base) { /* Failed to remap this area, move along. */ continue; diff --git a/arch/sh/drivers/heartbeat.c b/arch/sh/drivers/heartbeat.c index cf2fcccca812..24391b444b28 100644 --- a/arch/sh/drivers/heartbeat.c +++ b/arch/sh/drivers/heartbeat.c @@ -96,7 +96,7 @@ static int heartbeat_drv_probe(struct platform_device *pdev) return -ENOMEM; } - hd->base = ioremap_nocache(res->start, resource_size(res)); + hd->base = ioremap(res->start, resource_size(res)); if (unlikely(!hd->base)) { dev_err(&pdev->dev, "ioremap failed\n"); diff --git a/arch/sh/drivers/pci/pci-sh5.c b/arch/sh/drivers/pci/pci-sh5.c index 49303fab187b..03225d27770b 100644 --- a/arch/sh/drivers/pci/pci-sh5.c +++ b/arch/sh/drivers/pci/pci-sh5.c @@ -115,12 +115,12 @@ static int __init sh5pci_init(void) return -EINVAL; } - pcicr_virt = (unsigned long)ioremap_nocache(SH5PCI_ICR_BASE, 1024); + pcicr_virt = (unsigned long)ioremap(SH5PCI_ICR_BASE, 1024); if (!pcicr_virt) { panic("Unable to remap PCICR\n"); } - PCI_IO_AREA = (unsigned long)ioremap_nocache(SH5PCI_IO_BASE, 0x10000); + PCI_IO_AREA = (unsigned long)ioremap(SH5PCI_IO_BASE, 0x10000); if (!PCI_IO_AREA) { panic("Unable to remap PCIIO\n"); } diff --git a/arch/sh/drivers/platform_early.c b/arch/sh/drivers/platform_early.c index f6d148451dfc..f3dc3f25b3ff 100644 --- a/arch/sh/drivers/platform_early.c +++ b/arch/sh/drivers/platform_early.c @@ -325,9 +325,9 @@ int __init sh_early_platform_driver_probe(char *class_str, } /** - * sh_early_platform_cleanup - clean up early platform code + * early_platform_cleanup - clean up early platform code */ -static int __init sh_early_platform_cleanup(void) +void __init early_platform_cleanup(void) { struct platform_device *pd, *pd2; @@ -337,11 +337,4 @@ static int __init sh_early_platform_cleanup(void) list_del(&pd->dev.devres_head); memset(&pd->dev.devres_head, 0, sizeof(pd->dev.devres_head)); } - - return 0; } -/* - * This must happen once after all early devices are probed but before probing - * real platform devices. - */ -subsys_initcall(sh_early_platform_cleanup); diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index 1495489225ac..39c9ead489e5 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -367,7 +367,6 @@ static inline void ioremap_fixed_init(void) { } static inline int iounmap_fixed(void __iomem *addr) { return -EINVAL; } #endif -#define ioremap_nocache ioremap #define ioremap_uc ioremap /* diff --git a/arch/sh/include/asm/vmalloc.h b/arch/sh/include/asm/vmalloc.h new file mode 100644 index 000000000000..716b77472646 --- /dev/null +++ b/arch/sh/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_SH_VMALLOC_H +#define _ASM_SH_VMALLOC_H + +#endif /* _ASM_SH_VMALLOC_H */ diff --git a/arch/sh/include/cpu-sh2a/cpu/sh7264.h b/arch/sh/include/cpu-sh2a/cpu/sh7264.h index d12c19186845..8a1338aaf10a 100644 --- a/arch/sh/include/cpu-sh2a/cpu/sh7264.h +++ b/arch/sh/include/cpu-sh2a/cpu/sh7264.h @@ -112,12 +112,6 @@ enum { GPIO_FN_TIOC0D, GPIO_FN_TIOC0C, GPIO_FN_TIOC0B, GPIO_FN_TIOC0A, GPIO_FN_TCLKD, GPIO_FN_TCLKC, GPIO_FN_TCLKB, GPIO_FN_TCLKA, - /* SSU */ - GPIO_FN_SCS0_PD, GPIO_FN_SSO0_PD, GPIO_FN_SSI0_PD, GPIO_FN_SSCK0_PD, - GPIO_FN_SCS0_PF, GPIO_FN_SSO0_PF, GPIO_FN_SSI0_PF, GPIO_FN_SSCK0_PF, - GPIO_FN_SCS1_PD, GPIO_FN_SSO1_PD, GPIO_FN_SSI1_PD, GPIO_FN_SSCK1_PD, - GPIO_FN_SCS1_PF, GPIO_FN_SSO1_PF, GPIO_FN_SSI1_PF, GPIO_FN_SSCK1_PF, - /* SCIF */ GPIO_FN_SCK0, GPIO_FN_SCK1, GPIO_FN_SCK2, GPIO_FN_SCK3, GPIO_FN_RXD0, GPIO_FN_RXD1, GPIO_FN_RXD2, GPIO_FN_RXD3, diff --git a/arch/sh/include/cpu-sh2a/cpu/sh7269.h b/arch/sh/include/cpu-sh2a/cpu/sh7269.h index d516e5d48818..fece521c74b3 100644 --- a/arch/sh/include/cpu-sh2a/cpu/sh7269.h +++ b/arch/sh/include/cpu-sh2a/cpu/sh7269.h @@ -78,8 +78,15 @@ enum { GPIO_FN_WDTOVF, /* CAN */ - GPIO_FN_CTX1, GPIO_FN_CRX1, GPIO_FN_CTX0, GPIO_FN_CTX0_CTX1, - GPIO_FN_CRX0, GPIO_FN_CRX0_CRX1, GPIO_FN_CRX0_CRX1_CRX2, + GPIO_FN_CTX2, GPIO_FN_CRX2, + GPIO_FN_CTX1, GPIO_FN_CRX1, + GPIO_FN_CTX0, GPIO_FN_CRX0, + GPIO_FN_CTX0_CTX1, GPIO_FN_CRX0_CRX1, + GPIO_FN_CTX0_CTX1_CTX2, GPIO_FN_CRX0_CRX1_CRX2, + GPIO_FN_CTX2_PJ21, GPIO_FN_CRX2_PJ20, + GPIO_FN_CTX1_PJ23, GPIO_FN_CRX1_PJ22, + GPIO_FN_CTX0_CTX1_PJ23, GPIO_FN_CRX0_CRX1_PJ22, + GPIO_FN_CTX0_CTX1_CTX2_PJ21, GPIO_FN_CRX0_CRX1_CRX2_PJ20, /* DMAC */ GPIO_FN_TEND0, GPIO_FN_DACK0, GPIO_FN_DREQ0, @@ -119,12 +126,6 @@ enum { GPIO_FN_TIOC0D, GPIO_FN_TIOC0C, GPIO_FN_TIOC0B, GPIO_FN_TIOC0A, GPIO_FN_TCLKD, GPIO_FN_TCLKC, GPIO_FN_TCLKB, GPIO_FN_TCLKA, - /* SSU */ - GPIO_FN_SCS0_PD, GPIO_FN_SSO0_PD, GPIO_FN_SSI0_PD, GPIO_FN_SSCK0_PD, - GPIO_FN_SCS0_PF, GPIO_FN_SSO0_PF, GPIO_FN_SSI0_PF, GPIO_FN_SSCK0_PF, - GPIO_FN_SCS1_PD, GPIO_FN_SSO1_PD, GPIO_FN_SSI1_PD, GPIO_FN_SSCK1_PD, - GPIO_FN_SCS1_PF, GPIO_FN_SSO1_PF, GPIO_FN_SSI1_PF, GPIO_FN_SSCK1_PF, - /* SCIF */ GPIO_FN_SCK0, GPIO_FN_RXD0, GPIO_FN_TXD0, GPIO_FN_SCK1, GPIO_FN_RXD1, GPIO_FN_TXD1, GPIO_FN_RTS1, GPIO_FN_CTS1, diff --git a/arch/sh/include/uapi/asm/sockios.h b/arch/sh/include/uapi/asm/sockios.h index ef18a668456d..3da561453260 100644 --- a/arch/sh/include/uapi/asm/sockios.h +++ b/arch/sh/include/uapi/asm/sockios.h @@ -10,7 +10,7 @@ #define SIOCSPGRP _IOW('s', 8, pid_t) #define SIOCGPGRP _IOR('s', 9, pid_t) -#define SIOCGSTAMP_OLD _IOR('s', 100, struct timeval) /* Get stamp (timeval) */ -#define SIOCGSTAMPNS_OLD _IOR('s', 101, struct timespec) /* Get stamp (timespec) */ +#define SIOCGSTAMP_OLD _IOR('s', 100, struct __kernel_old_timeval) /* Get stamp (timeval) */ +#define SIOCGSTAMPNS_OLD _IOR('s', 101, struct __kernel_old_timespec) /* Get stamp (timespec) */ #endif /* __ASM_SH_SOCKIOS_H */ diff --git a/arch/sh/kernel/cpu/irq/intc-sh5.c b/arch/sh/kernel/cpu/irq/intc-sh5.c index 744f903b4df3..1b3050facda8 100644 --- a/arch/sh/kernel/cpu/irq/intc-sh5.c +++ b/arch/sh/kernel/cpu/irq/intc-sh5.c @@ -124,7 +124,7 @@ void __init plat_irq_setup(void) unsigned long reg; int i; - intc_virt = (unsigned long)ioremap_nocache(INTC_BASE, 1024); + intc_virt = (unsigned long)ioremap(INTC_BASE, 1024); if (!intc_virt) { panic("Unable to remap INTC\n"); } diff --git a/arch/sh/kernel/cpu/sh2/smp-j2.c b/arch/sh/kernel/cpu/sh2/smp-j2.c index ae44dc24c455..d0d5d81455ae 100644 --- a/arch/sh/kernel/cpu/sh2/smp-j2.c +++ b/arch/sh/kernel/cpu/sh2/smp-j2.c @@ -88,8 +88,8 @@ static void j2_start_cpu(unsigned int cpu, unsigned long entry_point) if (!np) return; if (of_property_read_u32_array(np, "cpu-release-addr", regs, 2)) return; - release = ioremap_nocache(regs[0], sizeof(u32)); - initpc = ioremap_nocache(regs[1], sizeof(u32)); + release = ioremap(regs[0], sizeof(u32)); + initpc = ioremap(regs[1], sizeof(u32)); __raw_writel(entry_point, initpc); __raw_writel(1, release); diff --git a/arch/sh/kernel/cpu/sh5/clock-sh5.c b/arch/sh/kernel/cpu/sh5/clock-sh5.c index 43763c26a752..dee6be2c2344 100644 --- a/arch/sh/kernel/cpu/sh5/clock-sh5.c +++ b/arch/sh/kernel/cpu/sh5/clock-sh5.c @@ -68,7 +68,7 @@ static struct sh_clk_ops *sh5_clk_ops[] = { void __init arch_init_clk_ops(struct sh_clk_ops **ops, int idx) { - cprc_base = (unsigned long)ioremap_nocache(CPRC_BASE, 1024); + cprc_base = (unsigned long)ioremap(CPRC_BASE, 1024); BUG_ON(!cprc_base); if (idx < ARRAY_SIZE(sh5_clk_ops)) diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S index de68ffdfffbf..81c8b64b977f 100644 --- a/arch/sh/kernel/cpu/sh5/entry.S +++ b/arch/sh/kernel/cpu/sh5/entry.S @@ -86,7 +86,7 @@ andi r6, ~0xf0, r6; \ putcon r6, SR; -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION # define preempt_stop() CLI() #else # define preempt_stop() @@ -884,7 +884,7 @@ ret_from_exception: /* Check softirqs */ -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION pta ret_from_syscall, tr0 blink tr0, ZERO diff --git a/arch/sh/kernel/dma-coherent.c b/arch/sh/kernel/dma-coherent.c index eeb25a4fa55f..d4811691b93c 100644 --- a/arch/sh/kernel/dma-coherent.c +++ b/arch/sh/kernel/dma-coherent.c @@ -28,7 +28,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, arch_sync_dma_for_device(virt_to_phys(ret), size, DMA_BIDIRECTIONAL); - ret_nocache = (void __force *)ioremap_nocache(virt_to_phys(ret), size); + ret_nocache = (void __force *)ioremap(virt_to_phys(ret), size); if (!ret_nocache) { free_pages((unsigned long)ret, order); return NULL; diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S index d31f66e82ce5..956a7a03b0c8 100644 --- a/arch/sh/kernel/entry-common.S +++ b/arch/sh/kernel/entry-common.S @@ -41,7 +41,7 @@ */ #include <asm/dwarf.h> -#if defined(CONFIG_PREEMPT) +#if defined(CONFIG_PREEMPTION) # define preempt_stop() cli ; TRACE_IRQS_OFF #else # define preempt_stop() @@ -84,7 +84,7 @@ ENTRY(ret_from_irq) get_current_thread_info r8, r0 bt resume_kernel ! Yes, it's from kernel, go back soon -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION bra resume_userspace nop ENTRY(resume_kernel) diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c index 6d61f8cf4c13..0d5f3c9d52f3 100644 --- a/arch/sh/kernel/kgdb.c +++ b/arch/sh/kernel/kgdb.c @@ -266,6 +266,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, ptr = &remcomInBuffer[1]; if (kgdb_hex2long(&ptr, &addr)) linux_regs->pc = addr; + /* fallthrough */ case 'D': case 'k': atomic_set(&kgdb_cpu_doing_single_step, -1); diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index d232cfa01877..67f5a3b44c2e 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -341,10 +341,6 @@ void __init setup_arch(char **cmdline_p) paging_init(); -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - /* Perform the machine specific initialisation */ if (likely(sh_mv.mv_setup)) sh_mv.mv_setup(cmdline_p); diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index b5ed26c4c005..c7a30fcd135f 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -438,3 +438,5 @@ 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open # 435 reserved for clone3 +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd diff --git a/arch/sh/mm/alignment.c b/arch/sh/mm/alignment.c index ec2b25302427..fb517b82a87b 100644 --- a/arch/sh/mm/alignment.c +++ b/arch/sh/mm/alignment.c @@ -152,13 +152,12 @@ static ssize_t alignment_proc_write(struct file *file, return count; } -static const struct file_operations alignment_proc_fops = { - .owner = THIS_MODULE, - .open = alignment_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = alignment_proc_write, +static const struct proc_ops alignment_proc_ops = { + .proc_open = alignment_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = alignment_proc_write, }; /* @@ -176,12 +175,12 @@ static int __init alignment_init(void) return -ENOMEM; res = proc_create_data("alignment", S_IWUSR | S_IRUGO, dir, - &alignment_proc_fops, &se_usermode); + &alignment_proc_ops, &se_usermode); if (!res) return -ENOMEM; res = proc_create_data("kernel_alignment", S_IWUSR | S_IRUGO, dir, - &alignment_proc_fops, &se_kernmode_warn); + &alignment_proc_ops, &se_kernmode_warn); if (!res) return -ENOMEM; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index dfdbaa50946e..d1b1ff2be17a 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -434,9 +434,7 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index eb24cb1afc11..c1dd6dd642f4 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -64,8 +64,7 @@ config SPARC64 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_KRETPROBES select HAVE_KPROBES - select HAVE_RCU_TABLE_FREE if SMP - select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE + select MMU_GATHER_RCU_TABLE_FREE if SMP select HAVE_MEMBLOCK_NODE_MAP select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_DYNAMIC_FTRACE @@ -277,7 +276,7 @@ config US3_MC config GENERIC_LOCKBREAK bool default y - depends on SPARC64 && SMP && PREEMPT + depends on SPARC64 && SMP && PREEMPTION config NUMA bool "NUMA support" diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c index 0f5a501c95a9..e3d2138ff9e2 100644 --- a/arch/sparc/crypto/aes_glue.c +++ b/arch/sparc/crypto/aes_glue.c @@ -169,7 +169,6 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; switch (key_len) { case AES_KEYSIZE_128: @@ -188,7 +187,6 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, break; default: - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c index 1700f863748c..aaa9714378e6 100644 --- a/arch/sparc/crypto/camellia_glue.c +++ b/arch/sparc/crypto/camellia_glue.c @@ -39,12 +39,9 @@ static int camellia_set_key(struct crypto_tfm *tfm, const u8 *_in_key, { struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); const u32 *in_key = (const u32 *) _in_key; - u32 *flags = &tfm->crt_flags; - if (key_len != 16 && key_len != 24 && key_len != 32) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + if (key_len != 16 && key_len != 24 && key_len != 32) return -EINVAL; - } ctx->key_len = key_len; diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c index 1299073285a3..4e9323229e71 100644 --- a/arch/sparc/crypto/crc32c_glue.c +++ b/arch/sparc/crypto/crc32c_glue.c @@ -33,10 +33,8 @@ static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key, { u32 *mctx = crypto_shash_ctx(hash); - if (keylen != sizeof(u32)) { - crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + if (keylen != sizeof(u32)) return -EINVAL; - } *(__le32 *)mctx = le32_to_cpup((__le32 *)key); return 0; } diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h index 30b1763580b1..40a267b3bd52 100644 --- a/arch/sparc/include/asm/compat.h +++ b/arch/sparc/include/asm/compat.h @@ -125,23 +125,6 @@ typedef u32 compat_sigset_word; #define COMPAT_OFF_T_MAX 0x7fffffff -/* - * A pointer passed in from user mode. This should not - * be used for syscall parameters, just declare them - * as pointers because the syscall entry code will have - * appropriately converted them already. - */ - -static inline void __user *compat_ptr(compat_uptr_t uptr) -{ - return (void __user *)(unsigned long)uptr; -} - -static inline compat_uptr_t ptr_to_compat(void __user *uptr) -{ - return (u32)(unsigned long)uptr; -} - #ifdef CONFIG_COMPAT static inline void __user *arch_compat_alloc_user_space(long len) { diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h index f4afa301954a..9bb27e5c22f1 100644 --- a/arch/sparc/include/asm/io_64.h +++ b/arch/sparc/include/asm/io_64.h @@ -406,7 +406,6 @@ static inline void __iomem *ioremap(unsigned long offset, unsigned long size) return (void __iomem *)offset; } -#define ioremap_nocache(X,Y) ioremap((X),(Y)) #define ioremap_uc(X,Y) ioremap((X),(Y)) #define ioremap_wc(X,Y) ioremap((X),(Y)) #define ioremap_wt(X,Y) ioremap((X),(Y)) diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h index 9d3e5cc95bbb..264e76ceccf6 100644 --- a/arch/sparc/include/asm/pgalloc_64.h +++ b/arch/sparc/include/asm/pgalloc_64.h @@ -16,12 +16,12 @@ extern struct kmem_cache *pgtable_cache; -static inline void __pgd_populate(pgd_t *pgd, pud_t *pud) +static inline void __p4d_populate(p4d_t *p4d, pud_t *pud) { - pgd_set(pgd, pud); + p4d_set(p4d, pud); } -#define pgd_populate(MM, PGD, PUD) __pgd_populate(PGD, PUD) +#define p4d_populate(MM, P4D, PUD) __p4d_populate(P4D, PUD) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 6ae8016ef4ec..65494c3a420e 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -13,7 +13,7 @@ * the SpitFire page tables. */ -#include <asm-generic/5level-fixup.h> +#include <asm-generic/pgtable-nop4d.h> #include <linux/compiler.h> #include <linux/const.h> #include <asm/types.h> @@ -683,6 +683,7 @@ static inline unsigned long pte_special(pte_t pte) return pte_val(pte) & _PAGE_SPECIAL; } +#define pmd_leaf pmd_large static inline unsigned long pmd_large(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); @@ -810,9 +811,9 @@ static inline int pmd_present(pmd_t pmd) #define pud_bad(pud) (pud_val(pud) & ~PAGE_MASK) -#define pgd_none(pgd) (!pgd_val(pgd)) +#define p4d_none(p4d) (!p4d_val(p4d)) -#define pgd_bad(pgd) (pgd_val(pgd) & ~PAGE_MASK) +#define p4d_bad(p4d) (p4d_val(p4d) & ~PAGE_MASK) #ifdef CONFIG_TRANSPARENT_HUGEPAGE void set_pmd_at(struct mm_struct *mm, unsigned long addr, @@ -859,14 +860,15 @@ static inline unsigned long pud_page_vaddr(pud_t pud) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL) #define pud_present(pud) (pud_val(pud) != 0U) #define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) -#define pgd_page_vaddr(pgd) \ - ((unsigned long) __va(pgd_val(pgd))) -#define pgd_present(pgd) (pgd_val(pgd) != 0U) -#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL) +#define p4d_page_vaddr(p4d) \ + ((unsigned long) __va(p4d_val(p4d))) +#define p4d_present(p4d) (p4d_val(p4d) != 0U) +#define p4d_clear(p4dp) (p4d_val(*(p4dp)) = 0UL) /* only used by the stubbed out hugetlb gup code, should never be called */ -#define pgd_page(pgd) NULL +#define p4d_page(p4d) NULL +#define pud_leaf pud_large static inline unsigned long pud_large(pud_t pud) { pte_t pte = __pte(pud_val(pud)); @@ -884,8 +886,8 @@ static inline unsigned long pud_pfn(pud_t pud) /* Same in both SUN4V and SUN4U. */ #define pte_none(pte) (!pte_val(pte)) -#define pgd_set(pgdp, pudp) \ - (pgd_val(*(pgdp)) = (__pa((unsigned long) (pudp)))) +#define p4d_set(p4dp, pudp) \ + (p4d_val(*(p4dp)) = (__pa((unsigned long) (pudp)))) /* to find an entry in a page-table-directory. */ #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) @@ -896,8 +898,8 @@ static inline unsigned long pud_pfn(pud_t pud) /* Find an entry in the third-level page table.. */ #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) -#define pud_offset(pgdp, address) \ - ((pud_t *) pgd_page_vaddr(*(pgdp)) + pud_index(address)) +#define pud_offset(p4dp, address) \ + ((pud_t *) p4d_page_vaddr(*(p4dp)) + pud_index(address)) /* Find an entry in the second-level page table.. */ #define pmd_offset(pudp, address) \ diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h index a2f3fa61ee36..6820d357581c 100644 --- a/arch/sparc/include/asm/tlb_64.h +++ b/arch/sparc/include/asm/tlb_64.h @@ -28,6 +28,15 @@ void flush_tlb_pending(void); #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) #define tlb_flush(tlb) flush_tlb_pending() +/* + * SPARC64's hardware TLB fill does not use the Linux page-tables + * and therefore we don't need a TLBI when freeing page-table pages. + */ + +#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE +#define tlb_needs_table_invalidate() (false) +#endif + #include <asm-generic/tlb.h> #endif /* _SPARC64_TLB_H */ diff --git a/arch/sparc/include/asm/vmalloc.h b/arch/sparc/include/asm/vmalloc.h new file mode 100644 index 000000000000..04b8ab9518b8 --- /dev/null +++ b/arch/sparc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_SPARC_VMALLOC_H +#define _ASM_SPARC_VMALLOC_H + +#endif /* _ASM_SPARC_VMALLOC_H */ diff --git a/arch/sparc/include/uapi/asm/ipcbuf.h b/arch/sparc/include/uapi/asm/ipcbuf.h index 5b933a598a33..0ea1240d2ea1 100644 --- a/arch/sparc/include/uapi/asm/ipcbuf.h +++ b/arch/sparc/include/uapi/asm/ipcbuf.h @@ -17,19 +17,19 @@ struct ipc64_perm { - __kernel_key_t key; - __kernel_uid_t uid; - __kernel_gid_t gid; - __kernel_uid_t cuid; - __kernel_gid_t cgid; + __kernel_key_t key; + __kernel_uid32_t uid; + __kernel_gid32_t gid; + __kernel_uid32_t cuid; + __kernel_gid32_t cgid; #ifndef __arch64__ - unsigned short __pad0; + unsigned short __pad0; #endif - __kernel_mode_t mode; - unsigned short __pad1; - unsigned short seq; - unsigned long long __unused1; - unsigned long long __unused2; + __kernel_mode_t mode; + unsigned short __pad1; + unsigned short seq; + unsigned long long __unused1; + unsigned long long __unused2; }; #endif /* __SPARC_IPCBUF_H */ diff --git a/arch/sparc/include/uapi/asm/statfs.h b/arch/sparc/include/uapi/asm/statfs.h deleted file mode 100644 index 20c8f5bd340e..000000000000 --- a/arch/sparc/include/uapi/asm/statfs.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef ___ASM_SPARC_STATFS_H -#define ___ASM_SPARC_STATFS_H - -#include <asm-generic/statfs.h> - -#endif diff --git a/arch/sparc/kernel/led.c b/arch/sparc/kernel/led.c index a6292f8ed180..bd48575172c3 100644 --- a/arch/sparc/kernel/led.c +++ b/arch/sparc/kernel/led.c @@ -104,13 +104,12 @@ static ssize_t led_proc_write(struct file *file, const char __user *buffer, return count; } -static const struct file_operations led_proc_fops = { - .owner = THIS_MODULE, - .open = led_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = led_proc_write, +static const struct proc_ops led_proc_ops = { + .proc_open = led_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = led_proc_write, }; static struct proc_dir_entry *led; @@ -121,7 +120,7 @@ static int __init led_init(void) { timer_setup(&led_blink_timer, led_blink, 0); - led = proc_create("led", 0, NULL, &led_proc_fops); + led = proc_create("led", 0, NULL, &led_proc_ops); if (!led) return -ENOMEM; diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c index ec244d1022ce..da8902295c8c 100644 --- a/arch/sparc/kernel/prom_32.c +++ b/arch/sparc/kernel/prom_32.c @@ -132,12 +132,13 @@ static void __init ebus_path_component(struct device_node *dp, char *tmp_buf) regs->which_io, regs->phys_addr); } -/* "name:vendor:device@irq,addrlo" */ +/* "name@irq,addrlo" */ static void __init ambapp_path_component(struct device_node *dp, char *tmp_buf) { const char *name = of_get_property(dp, "name", NULL); struct amba_prom_registers *regs; - unsigned int *intr, *device, *vendor, reg0; + unsigned int *intr; + unsigned int reg0; struct property *prop; int interrupt = 0; @@ -159,18 +160,7 @@ static void __init ambapp_path_component(struct device_node *dp, char *tmp_buf) else intr = prop->value; - prop = of_find_property(dp, "vendor", NULL); - if (!prop) - return; - vendor = prop->value; - prop = of_find_property(dp, "device", NULL); - if (!prop) - return; - device = prop->value; - - sprintf(tmp_buf, "%s:%d:%d@%x,%x", - name, *vendor, *device, - *intr, reg0); + sprintf(tmp_buf, "%s@%x,%x", name, *intr, reg0); } static void __init __build_path_component(struct device_node *dp, char *tmp_buf) diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index 29aa34f11720..c5fd4b450d9b 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -310,7 +310,7 @@ kern_rtt_restore: retry to_kernel: -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION ldsw [%g6 + TI_PRE_COUNT], %l5 brnz %l5, kern_fpucheck ldx [%g6 + TI_FLAGS], %l5 diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index afe1592a6d08..5d1bcfce05d8 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -332,10 +332,6 @@ void __init setup_arch(char **cmdline_p) break; } -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - idprom_init(); load_mmu(); diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index fd2182a5c32d..75e3992203b6 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -653,10 +653,6 @@ void __init setup_arch(char **cmdline_p) else pr_info("ARCH: SUN4U\n"); -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - idprom_init(); if (!root_flags) diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index a237810aa9f4..2a734ecd0a40 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -299,6 +299,7 @@ static void flush_signal_insns(unsigned long address) unsigned long pstate, paddr; pte_t *ptep, pte; pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; @@ -318,7 +319,10 @@ static void flush_signal_insns(unsigned long address) pgdp = pgd_offset(current->mm, address); if (pgd_none(*pgdp)) goto out_irqs_on; - pudp = pud_offset(pgdp, address); + p4dp = p4d_offset(pgdp, address); + if (p4d_none(*p4dp)) + goto out_irqs_on; + pudp = pud_offset(p4dp, address); if (pud_none(*pudp)) goto out_irqs_on; pmdp = pmd_offset(pudp, address); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 9b4506373353..80f20b3808ee 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1621,6 +1621,7 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) static void __init pcpu_populate_pte(unsigned long addr) { pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d; pud_t *pud; pmd_t *pmd; @@ -1633,7 +1634,17 @@ static void __init pcpu_populate_pte(unsigned long addr) pgd_populate(&init_mm, pgd, new); } - pud = pud_offset(pgd, addr); + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) { + pud_t *new; + + new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + if (!new) + goto err_alloc; + p4d_populate(&init_mm, p4d, new); + } + + pud = pud_offset(p4d, addr); if (pud_none(*pud)) { pmd_t *new; diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 9f41a6f5a032..6b92fadb6ec7 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -548,34 +548,35 @@ out_unlock: return err; } -SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p) +SYSCALL_DEFINE1(sparc_adjtimex, struct __kernel_timex __user *, txc_p) { - struct timex txc; /* Local copy of parameter */ - struct __kernel_timex *kt = (void *)&txc; + struct __kernel_timex txc; + struct __kernel_old_timeval *tv = (void *)&txc.time; int ret; /* Copy the user data space into the kernel copy * structure. But bear in mind that the structures * may change */ - if (copy_from_user(&txc, txc_p, sizeof(struct timex))) + if (copy_from_user(&txc, txc_p, sizeof(txc))) return -EFAULT; /* * override for sparc64 specific timeval type: tv_usec * is 32 bit wide instead of 64-bit in __kernel_timex */ - kt->time.tv_usec = txc.time.tv_usec; - ret = do_adjtimex(kt); - txc.time.tv_usec = kt->time.tv_usec; + txc.time.tv_usec = tv->tv_usec; + ret = do_adjtimex(&txc); + tv->tv_usec = txc.time.tv_usec; - return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; + return copy_to_user(txc_p, &txc, sizeof(txc)) ? -EFAULT : ret; } -SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex __user *, txc_p) +SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock, + struct __kernel_timex __user *, txc_p) { - struct timex txc; /* Local copy of parameter */ - struct __kernel_timex *kt = (void *)&txc; + struct __kernel_timex txc; + struct __kernel_old_timeval *tv = (void *)&txc.time; int ret; if (!IS_ENABLED(CONFIG_POSIX_TIMERS)) { @@ -590,18 +591,18 @@ SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex _ * structure. But bear in mind that the structures * may change */ - if (copy_from_user(&txc, txc_p, sizeof(struct timex))) + if (copy_from_user(&txc, txc_p, sizeof(txc))) return -EFAULT; /* * override for sparc64 specific timeval type: tv_usec * is 32 bit wide instead of 64-bit in __kernel_timex */ - kt->time.tv_usec = txc.time.tv_usec; - ret = do_clock_adjtime(which_clock, kt); - txc.time.tv_usec = kt->time.tv_usec; + txc.time.tv_usec = tv->tv_usec; + ret = do_clock_adjtime(which_clock, &txc); + tv->tv_usec = txc.time.tv_usec; - return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; + return copy_to_user(txc_p, &txc, sizeof(txc)) ? -EFAULT : ret; } SYSCALL_DEFINE5(utrap_install, utrap_entry_t, type, diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 8c8cc7537fb2..f13615ecdecc 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -481,3 +481,5 @@ 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open # 435 reserved for clone3 +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 7ec79918b566..f99e99e58075 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -171,12 +171,14 @@ SECTIONS } PERCPU_SECTION(SMP_CACHE_BYTES) -#ifdef CONFIG_JUMP_LABEL . = ALIGN(PAGE_SIZE); .exit.text : { EXIT_TEXT } -#endif + + .exit.data : { + EXIT_DATA + } . = ALIGN(PAGE_SIZE); __init_end = .; diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 2371fb6b97e4..8b7ddbd14b65 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -80,6 +80,7 @@ static void __kprobes bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr) static unsigned int get_user_insn(unsigned long tpc) { pgd_t *pgdp = pgd_offset(current->mm, tpc); + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep, pte; @@ -88,7 +89,10 @@ static unsigned int get_user_insn(unsigned long tpc) if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp))) goto out; - pudp = pud_offset(pgdp, tpc); + p4dp = p4d_offset(pgdp, tpc); + if (p4d_none(*p4dp) || unlikely(p4d_bad(*p4dp))) + goto out; + pudp = pud_offset(p4dp, tpc); if (pud_none(*pudp) || unlikely(pud_bad(*pudp))) goto out; diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index f78793a06bbd..7b9fa861b67c 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -277,11 +277,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); + p4d = p4d_offset(pgd, addr); + pud = pud_alloc(mm, p4d, addr); if (!pud) return NULL; if (sz >= PUD_SIZE) @@ -298,13 +300,17 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pgd = pgd_offset(mm, addr); if (pgd_none(*pgd)) return NULL; - pud = pud_offset(pgd, addr); + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) + return NULL; + pud = pud_offset(p4d, addr); if (pud_none(*pud)) return NULL; if (is_hugetlb_pud(*pud)) @@ -449,7 +455,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, mm_dec_nr_pmds(tlb->mm); } -static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, +static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { @@ -458,7 +464,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, unsigned long start; start = addr; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) @@ -481,8 +487,8 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, if (end - 1 > ceiling - 1) return; - pud = pud_offset(pgd, start); - pgd_clear(pgd); + pud = pud_offset(p4d, start); + p4d_clear(p4d); pud_free_tlb(tlb, pud, start); mm_dec_nr_puds(tlb->mm); } @@ -492,6 +498,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long floor, unsigned long ceiling) { pgd_t *pgd; + p4d_t *p4d; unsigned long next; addr &= PMD_MASK; @@ -511,10 +518,11 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, return; pgd = pgd_offset(tlb->mm, addr); + p4d = p4d_offset(pgd, addr); do { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(p4d)) continue; - hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); - } while (pgd++, addr = next, addr != end); + hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling); + } while (p4d++, addr = next, addr != end); } diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index e6d91819da92..1cf0d666dea3 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -530,7 +530,8 @@ void __kprobes flush_icache_range(unsigned long start, unsigned long end) paddr = kaddr & mask; else { pgd_t *pgdp = pgd_offset_k(kaddr); - pud_t *pudp = pud_offset(pgdp, kaddr); + p4d_t *p4dp = p4d_offset(pgdp, kaddr); + pud_t *pudp = pud_offset(p4dp, kaddr); pmd_t *pmdp = pmd_offset(pudp, kaddr); pte_t *ptep = pte_offset_kernel(pmdp, kaddr); @@ -1653,6 +1654,7 @@ static unsigned long max_phys_bits = 40; bool kern_addr_valid(unsigned long addr) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -1674,7 +1676,11 @@ bool kern_addr_valid(unsigned long addr) if (pgd_none(*pgd)) return 0; - pud = pud_offset(pgd, addr); + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) + return 0; + + pud = pud_offset(p4d, addr); if (pud_none(*pud)) return 0; @@ -1800,6 +1806,7 @@ static unsigned long __ref kernel_map_range(unsigned long pstart, while (vstart < vend) { unsigned long this_end, paddr = __pa(vstart); pgd_t *pgd = pgd_offset_k(vstart); + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -1814,7 +1821,20 @@ static unsigned long __ref kernel_map_range(unsigned long pstart, alloc_bytes += PAGE_SIZE; pgd_populate(&init_mm, pgd, new); } - pud = pud_offset(pgd, vstart); + + p4d = p4d_offset(pgd, vstart); + if (p4d_none(*p4d)) { + pud_t *new; + + new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, + PAGE_SIZE); + if (!new) + goto err_alloc; + alloc_bytes += PAGE_SIZE; + p4d_populate(&init_mm, p4d, new); + } + + pud = pud_offset(p4d, vstart); if (pud_none(*pud)) { pmd_t *new; @@ -2612,13 +2632,18 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, for (; vstart < vend; vstart += PMD_SIZE) { pgd_t *pgd = vmemmap_pgd_populate(vstart, node); unsigned long pte; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; if (!pgd) return -ENOMEM; - pud = vmemmap_pud_populate(pgd, vstart, node); + p4d = vmemmap_p4d_populate(pgd, vstart, node); + if (!p4d) + return -ENOMEM; + + pud = vmemmap_pud_populate(p4d, vstart, node); if (!pud) return -ENOMEM; diff --git a/arch/sparc/net/bpf_jit_comp_32.c b/arch/sparc/net/bpf_jit_comp_32.c index 84cc8f7f83e9..c8eabb973b86 100644 --- a/arch/sparc/net/bpf_jit_comp_32.c +++ b/arch/sparc/net/bpf_jit_comp_32.c @@ -180,19 +180,19 @@ do { \ #define emit_loadptr(BASE, STRUCT, FIELD, DEST) \ do { unsigned int _off = offsetof(STRUCT, FIELD); \ - BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(void *)); \ + BUILD_BUG_ON(sizeof_field(STRUCT, FIELD) != sizeof(void *)); \ *prog++ = LDPTRI | RS1(BASE) | S13(_off) | RD(DEST); \ } while (0) #define emit_load32(BASE, STRUCT, FIELD, DEST) \ do { unsigned int _off = offsetof(STRUCT, FIELD); \ - BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u32)); \ + BUILD_BUG_ON(sizeof_field(STRUCT, FIELD) != sizeof(u32)); \ *prog++ = LD32I | RS1(BASE) | S13(_off) | RD(DEST); \ } while (0) #define emit_load16(BASE, STRUCT, FIELD, DEST) \ do { unsigned int _off = offsetof(STRUCT, FIELD); \ - BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u16)); \ + BUILD_BUG_ON(sizeof_field(STRUCT, FIELD) != sizeof(u16)); \ *prog++ = LD16I | RS1(BASE) | S13(_off) | RD(DEST); \ } while (0) @@ -202,7 +202,7 @@ do { unsigned int _off = offsetof(STRUCT, FIELD); \ } while (0) #define emit_load8(BASE, STRUCT, FIELD, DEST) \ -do { BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u8)); \ +do { BUILD_BUG_ON(sizeof_field(STRUCT, FIELD) != sizeof(u8)); \ __emit_load8(BASE, STRUCT, FIELD, DEST); \ } while (0) diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 2a6d04fcb3e9..0917f8443c28 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -14,6 +14,7 @@ config UML select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_BUGVERBOSE + select HAVE_COPY_THREAD_TLS select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES select GENERIC_CLOCKEVENTS @@ -188,6 +189,8 @@ config SECCOMP config UML_TIME_TRAVEL_SUPPORT bool prompt "Support time-travel mode (e.g. for test execution)" + # inf-cpu mode is incompatible with the benchmarking + depends on !RAID6_PQ_BENCHMARK help Enable this option to support time travel inside the UML instance. diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig index 388096fb45a2..72d417055782 100644 --- a/arch/um/drivers/Kconfig +++ b/arch/um/drivers/Kconfig @@ -147,7 +147,7 @@ config UML_NET make use of UML networking. config UML_NET_ETHERTAP - bool "Ethertap transport" + bool "Ethertap transport (obsolete)" depends on UML_NET help The Ethertap User-Mode Linux network transport allows a single @@ -167,14 +167,13 @@ config UML_NET_ETHERTAP has examples of the UML command line to use to enable Ethertap networking. - If you'd like to set up an IP network with the host and/or the - outside world, say Y to this, the Daemon Transport and/or the - Slip Transport. You'll need at least one of them, but may choose - more than one without conflict. If you don't need UML networking, - say N. + NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please + migrate to UML_NET_VECTOR. + + If unsure, say N. config UML_NET_TUNTAP - bool "TUN/TAP transport" + bool "TUN/TAP transport (obsolete)" depends on UML_NET help The UML TUN/TAP network transport allows a UML instance to exchange @@ -185,8 +184,13 @@ config UML_NET_TUNTAP To use this transport, your host kernel must have support for TUN/TAP devices, either built-in or as a module. + NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please + migrate to UML_NET_VECTOR. + + If unsure, say N. + config UML_NET_SLIP - bool "SLIP transport" + bool "SLIP transport (obsolete)" depends on UML_NET help The slip User-Mode Linux network transport allows a running UML to @@ -201,16 +205,13 @@ config UML_NET_SLIP has examples of the UML command line to use to enable slip networking, and details of a few quirks with it. - The Ethertap Transport is preferred over slip because of its - limitations. If you prefer slip, however, say Y here. Otherwise - choose the Multicast transport (to network multiple UMLs on - multiple hosts), Ethertap (to network with the host and the - outside world), and/or the Daemon transport (to network multiple - UMLs on a single host). You may choose more than one without - conflict. If you don't need UML networking, say N. + NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please + migrate to UML_NET_VECTOR. + + If unsure, say N. config UML_NET_DAEMON - bool "Daemon transport" + bool "Daemon transport (obsolete)" depends on UML_NET help This User-Mode Linux network transport allows one or more running @@ -225,13 +226,10 @@ config UML_NET_DAEMON has examples of the UML command line to use to enable Daemon networking. - If you'd like to set up a network with other UMLs on a single host, - say Y. If you need a network between UMLs on multiple physical - hosts, choose the Multicast Transport. To set up a network with - the host and/or other IP machines, say Y to the Ethertap or Slip - transports. You'll need at least one of them, but may choose - more than one without conflict. If you don't need UML networking, - say N. + NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please + migrate to UML_NET_VECTOR. + + If unsure, say N. config UML_NET_VECTOR bool "Vector I/O high performance network devices" @@ -245,7 +243,7 @@ config UML_NET_VECTOR drivers. config UML_NET_VDE - bool "VDE transport" + bool "VDE transport (obsolete)" depends on UML_NET help This User-Mode Linux network transport allows one or more running @@ -263,11 +261,13 @@ config UML_NET_VDE That site has a good overview of what VDE is and also examples of the UML command line to use to enable VDE networking. - If you need UML networking with VDE, - say Y. + NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please + migrate to UML_NET_VECTOR. + + If unsure, say N. config UML_NET_MCAST - bool "Multicast transport" + bool "Multicast transport (obsolete)" depends on UML_NET help This Multicast User-Mode Linux network transport allows multiple @@ -284,15 +284,13 @@ config UML_NET_MCAST has examples of the UML command line to use to enable Multicast networking, and notes about the security of this approach. - If you need UMLs on multiple physical hosts to communicate as if - they shared an Ethernet network, say Y. If you need to communicate - with other IP machines, make sure you select one of the other - transports (possibly in addition to Multicast; they're not - exclusive). If you don't need to network UMLs say N to each of - the transports. + NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please + migrate to UML_NET_VECTOR. + + If unsure, say N. config UML_NET_PCAP - bool "pcap transport" + bool "pcap transport (obsolete)" depends on UML_NET help The pcap transport makes a pcap packet stream on the host look @@ -304,11 +302,13 @@ config UML_NET_PCAP <http://user-mode-linux.sourceforge.net/old/networking.html> That site has examples of the UML command line to use to enable this option. - If you intend to use UML as a network monitor for the host, say - Y here. Otherwise, say N. + NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please + migrate to UML_NET_VECTOR. + + If unsure, say N. config UML_NET_SLIRP - bool "SLiRP transport" + bool "SLiRP transport (obsolete)" depends on UML_NET help The SLiRP User-Mode Linux network transport allows a running UML @@ -328,9 +328,10 @@ config UML_NET_SLIRP that of a host behind a firewall that masquerades all network connections passing through it (but is less secure). - To use this you should first have slirp compiled somewhere - accessible on the host, and have read its documentation. If you - don't need UML networking, say N. + NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please + migrate to UML_NET_VECTOR. + + If unsure, say N. Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp" diff --git a/arch/um/drivers/chan_user.h b/arch/um/drivers/chan_user.h index 72222bb036f5..4e51b85e2a23 100644 --- a/arch/um/drivers/chan_user.h +++ b/arch/um/drivers/chan_user.h @@ -11,7 +11,7 @@ struct chan_opts { void (*const announce)(char *dev_name, int dev); char *xterm_title; - const int raw; + int raw; }; struct chan_ops { diff --git a/arch/um/drivers/cow.h b/arch/um/drivers/cow.h index 760c507dd5b6..103adac691ed 100644 --- a/arch/um/drivers/cow.h +++ b/arch/um/drivers/cow.h @@ -11,7 +11,7 @@ extern int init_cow_file(int fd, char *cow_file, char *backing_file, extern int file_reader(__u64 offset, char *buf, int len, void *arg); extern int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, __u32 *version_out, - char **backing_file_out, time_t *mtime_out, + char **backing_file_out, long long *mtime_out, unsigned long long *size_out, int *sectorsize_out, __u32 *align_out, int *bitmap_offset_out); diff --git a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c index 74b0c2686c95..29b46581ddd1 100644 --- a/arch/um/drivers/cow_user.c +++ b/arch/um/drivers/cow_user.c @@ -17,6 +17,7 @@ #define PATH_LEN_V1 256 +/* unsigned time_t works until year 2106 */ typedef __u32 time32_t; struct cow_header_v1 { @@ -197,7 +198,7 @@ int write_cow_header(char *cow_file, int fd, char *backing_file, int sectorsize, int alignment, unsigned long long *size) { struct cow_header_v3 *header; - unsigned long modtime; + long long modtime; int err; err = cow_seek_file(fd, 0); @@ -276,7 +277,7 @@ int file_reader(__u64 offset, char *buf, int len, void *arg) int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, __u32 *version_out, char **backing_file_out, - time_t *mtime_out, unsigned long long *size_out, + long long *mtime_out, unsigned long long *size_out, int *sectorsize_out, __u32 *align_out, int *bitmap_offset_out) { @@ -363,7 +364,7 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, /* * this was used until Dec2005 - 64bits are needed to represent - * 2038+. I.e. we can safely do this truncating cast. + * 2106+. I.e. we can safely do this truncating cast. * * Additionally, we must use be32toh() instead of be64toh(), since * the program used to use the former (tested - I got mtime diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 0117489e9b30..b80a1d616e4e 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -752,10 +752,9 @@ static ssize_t mconsole_proc_write(struct file *file, return count; } -static const struct file_operations mconsole_proc_fops = { - .owner = THIS_MODULE, - .write = mconsole_proc_write, - .llseek = noop_llseek, +static const struct proc_ops mconsole_proc_ops = { + .proc_write = mconsole_proc_write, + .proc_lseek = noop_llseek, }; static int create_proc_mconsole(void) @@ -765,7 +764,7 @@ static int create_proc_mconsole(void) if (notify_socket == NULL) return 0; - ent = proc_create("mconsole", 0200, NULL, &mconsole_proc_fops); + ent = proc_create("mconsole", 0200, NULL, &mconsole_proc_ops); if (ent == NULL) { printk(KERN_INFO "create_proc_mconsole : proc_create failed\n"); return 0; diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 327b728f7244..35ebeebfc1a8 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -247,7 +247,7 @@ static void uml_net_set_multicast_list(struct net_device *dev) return; } -static void uml_net_tx_timeout(struct net_device *dev) +static void uml_net_tx_timeout(struct net_device *dev, unsigned int txqueue) { netif_trans_update(dev); netif_wake_queue(dev); diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c index b213201b8a3b..26c5716fac0f 100644 --- a/arch/um/drivers/ssl.c +++ b/arch/um/drivers/ssl.c @@ -196,3 +196,11 @@ static int ssl_chan_setup(char *str) __setup("ssl", ssl_chan_setup); __channel_help(ssl_chan_setup, "ssl"); + +static int ssl_non_raw_setup(char *str) +{ + opts.raw = 0; + return 1; +} +__setup("ssl-non-raw", ssl_non_raw_setup); +__channel_help(ssl_non_raw_setup, "set serial lines to non-raw mode"); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 6627d7c30f37..247f95da057b 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -113,6 +113,7 @@ static const struct block_device_operations ubd_blops = { .open = ubd_open, .release = ubd_release, .ioctl = ubd_ioctl, + .compat_ioctl = blkdev_compat_ptr_ioctl, .getgeo = ubd_getgeo, }; @@ -561,7 +562,7 @@ static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out) __u32 version; __u32 align; char *backing_file; - time_t mtime; + time64_t mtime; unsigned long long size; int sector_size; int bitmap_offset; @@ -600,9 +601,9 @@ static int read_cow_bitmap(int fd, void *buf, int offset, int len) return 0; } -static int backing_file_mismatch(char *file, __u64 size, time_t mtime) +static int backing_file_mismatch(char *file, __u64 size, time64_t mtime) { - unsigned long modtime; + time64_t modtime; unsigned long long actual; int err; @@ -628,7 +629,7 @@ static int backing_file_mismatch(char *file, __u64 size, time_t mtime) return -EINVAL; } if (modtime != mtime) { - printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs " + printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs " "backing file\n", mtime, modtime); return -EINVAL; } @@ -671,7 +672,7 @@ static int open_ubd_file(char *file, struct openflags *openflags, int shared, unsigned long *bitmap_len_out, int *data_offset_out, int *create_cow_out) { - time_t mtime; + time64_t mtime; unsigned long long size; __u32 version, align; char *backing_file; diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 92617e16829e..0ff86391f77d 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -1332,7 +1332,7 @@ static void vector_net_set_multicast_list(struct net_device *dev) return; } -static void vector_net_tx_timeout(struct net_device *dev) +static void vector_net_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct vector_private *vp = netdev_priv(dev); diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 398006d27e40..db7d9d4e30d8 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -25,5 +25,4 @@ generic-y += switch_to.h generic-y += topology.h generic-y += trace_clock.h generic-y += word-at-a-time.h -generic-y += xor.h generic-y += kprobes.h diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S index 7145ce699982..eca6c452a41b 100644 --- a/arch/um/include/asm/common.lds.S +++ b/arch/um/include/asm/common.lds.S @@ -82,8 +82,8 @@ __preinit_array_end = .; } .init_array : { - /* dummy - we call this ourselves */ __init_array_start = .; + *(.init_array) __init_array_end = .; } .fini_array : { diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h index 5aee0626e390..b4deb1bfbb68 100644 --- a/arch/um/include/asm/mmu_context.h +++ b/arch/um/include/asm/mmu_context.h @@ -25,11 +25,6 @@ static inline void arch_unmap(struct mm_struct *mm, unsigned long start, unsigned long end) { } -static inline void arch_bprm_mm_init(struct mm_struct *mm, - struct vm_area_struct *vma) -{ -} - static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, bool execute, bool foreign) { diff --git a/arch/um/include/asm/ptrace-generic.h b/arch/um/include/asm/ptrace-generic.h index 81c647ef9c6c..adf91ef553ae 100644 --- a/arch/um/include/asm/ptrace-generic.h +++ b/arch/um/include/asm/ptrace-generic.h @@ -36,7 +36,7 @@ extern long subarch_ptrace(struct task_struct *child, long request, extern unsigned long getreg(struct task_struct *child, int regno); extern int putreg(struct task_struct *child, int regno, unsigned long value); -extern int arch_copy_tls(struct task_struct *new); +extern int arch_set_tls(struct task_struct *new, unsigned long tls); extern void clear_flushed_tls(struct task_struct *task); extern int syscall_trace_enter(struct pt_regs *regs); extern void syscall_trace_leave(struct pt_regs *regs); diff --git a/arch/um/include/asm/vmalloc.h b/arch/um/include/asm/vmalloc.h new file mode 100644 index 000000000000..9a7b9ed93733 --- /dev/null +++ b/arch/um/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_UM_VMALLOC_H +#define _ASM_UM_VMALLOC_H + +#endif /* _ASM_UM_VMALLOC_H */ diff --git a/arch/um/include/asm/xor.h b/arch/um/include/asm/xor.h new file mode 100644 index 000000000000..7a3208c47cfc --- /dev/null +++ b/arch/um/include/asm/xor.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <asm-generic/xor.h> +#include <shared/timer-internal.h> + +/* pick an arbitrary one - measuring isn't possible with inf-cpu */ +#define XOR_SELECT_TEMPLATE(x) \ + (time_travel_mode == TT_MODE_INFCPU ? &xor_block_8regs : NULL) diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 506bcd1bca68..0f30204b6afa 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -150,7 +150,7 @@ extern int os_sync_file(int fd); extern int os_file_size(const char *file, unsigned long long *size_out); extern int os_pread_file(int fd, void *buf, int len, unsigned long long offset); extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long long offset); -extern int os_file_modtime(const char *file, unsigned long *modtime); +extern int os_file_modtime(const char *file, long long *modtime); extern int os_pipe(int *fd, int stream, int close_on_exec); extern int os_set_fd_async(int fd); extern int os_clear_fd_async(int fd); diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index c69d69ee96be..f5001481010c 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -103,6 +103,7 @@ SECTIONS be empty, which isn't pretty. */ . = ALIGN(32 / 8); .preinit_array : { *(.preinit_array) } + .init_array : { *(.init_array) } .fini_array : { *(.fini_array) } .data : { INIT_TASK_DATA(KERNEL_STACK_SIZE) diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c index 369fd844e195..43edc2aa57e4 100644 --- a/arch/um/kernel/exitcode.c +++ b/arch/um/kernel/exitcode.c @@ -55,20 +55,19 @@ static ssize_t exitcode_proc_write(struct file *file, return count; } -static const struct file_operations exitcode_proc_fops = { - .owner = THIS_MODULE, - .open = exitcode_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = exitcode_proc_write, +static const struct proc_ops exitcode_proc_ops = { + .proc_open = exitcode_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = exitcode_proc_write, }; static int make_proc_exitcode(void) { struct proc_dir_entry *ent; - ent = proc_create("exitcode", 0600, NULL, &exitcode_proc_fops); + ent = proc_create("exitcode", 0600, NULL, &exitcode_proc_ops); if (ent == NULL) { printk(KERN_WARNING "make_proc_exitcode : Failed to register " "/proc/exitcode\n"); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 263a8f069133..56a094182bf5 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -153,8 +153,8 @@ void fork_handler(void) userspace(¤t->thread.regs.regs, current_thread_info()->aux_fp_regs); } -int copy_thread(unsigned long clone_flags, unsigned long sp, - unsigned long arg, struct task_struct * p) +int copy_thread_tls(unsigned long clone_flags, unsigned long sp, + unsigned long arg, struct task_struct * p, unsigned long tls) { void (*handler)(void); int kthread = current->flags & PF_KTHREAD; @@ -188,7 +188,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, * Set a new TLS for the child thread? */ if (clone_flags & CLONE_SETTLS) - ret = arch_copy_tls(p); + ret = arch_set_tls(p, tls); } return ret; @@ -348,13 +348,12 @@ static ssize_t sysemu_proc_write(struct file *file, const char __user *buf, return count; } -static const struct file_operations sysemu_proc_fops = { - .owner = THIS_MODULE, - .open = sysemu_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = sysemu_proc_write, +static const struct proc_ops sysemu_proc_ops = { + .proc_open = sysemu_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = sysemu_proc_write, }; int __init make_proc_sysemu(void) @@ -363,7 +362,7 @@ int __init make_proc_sysemu(void) if (!sysemu_supported) return 0; - ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_fops); + ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_ops); if (ent == NULL) { diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index 5133e3afb96f..fbda10535dab 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -341,7 +341,7 @@ int os_file_size(const char *file, unsigned long long *size_out) return 0; } -int os_file_modtime(const char *file, unsigned long *modtime) +int os_file_modtime(const char *file, long long *modtime) { struct uml_stat buf; int err; diff --git a/arch/unicore32/include/asm/io.h b/arch/unicore32/include/asm/io.h index 4b460e01acfa..3ca74e1cde7d 100644 --- a/arch/unicore32/include/asm/io.h +++ b/arch/unicore32/include/asm/io.h @@ -31,7 +31,6 @@ extern void __uc32_iounmap(volatile void __iomem *addr); * */ #define ioremap(cookie, size) __uc32_ioremap(cookie, size) -#define ioremap_nocache(cookie, size) __uc32_ioremap(cookie, size) #define iounmap(cookie) __uc32_iounmap(cookie) #define readb_relaxed readb diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h index 247a07ae2cdc..388c0c811c68 100644 --- a/arch/unicore32/include/asm/mmu_context.h +++ b/arch/unicore32/include/asm/mmu_context.h @@ -89,11 +89,6 @@ static inline void arch_unmap(struct mm_struct *mm, { } -static inline void arch_bprm_mm_init(struct mm_struct *mm, - struct vm_area_struct *vma) -{ -} - static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, bool execute, bool foreign) { diff --git a/arch/unicore32/include/asm/vmalloc.h b/arch/unicore32/include/asm/vmalloc.h new file mode 100644 index 000000000000..054435818a14 --- /dev/null +++ b/arch/unicore32/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_UNICORE32_VMALLOC_H +#define _ASM_UNICORE32_VMALLOC_H + +#endif /* _ASM_UNICORE32_VMALLOC_H */ diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c index 95ae3b54df68..0c4242a5ee1d 100644 --- a/arch/unicore32/kernel/setup.c +++ b/arch/unicore32/kernel/setup.c @@ -270,8 +270,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) conswitchp = &vga_con; -#elif defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; #endif #endif early_trap_init(); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5e8949953660..beea77046f9b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -93,10 +93,11 @@ config X86 select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH + select ARCH_WANT_DEFAULT_BPF_JIT if X86_64 select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANT_HUGE_PMD_SHARE select ARCH_WANTS_THP_SWAP if X86_64 - select BUILDTIME_EXTABLE_SORT + select BUILDTIME_TABLE_SORT select CLKEVT_I8253 select CLOCKSOURCE_VALIDATE_LAST_CYCLE select CLOCKSOURCE_WATCHDOG @@ -119,11 +120,13 @@ config X86 select GENERIC_IRQ_RESERVATION_MODE select GENERIC_IRQ_SHOW select GENERIC_PENDING_IRQ if SMP + select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY + select GENERIC_VDSO_TIME_NS select GUP_GET_PTE_LOW_HIGH if X86_PAE select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 select HAVE_ACPI_APEI if ACPI @@ -200,7 +203,7 @@ config X86 select HAVE_PCI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP - select HAVE_RCU_TABLE_FREE if PARAVIRT + select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION select HAVE_FUNCTION_ARG_ACCESS_API @@ -439,8 +442,8 @@ config X86_MPPARSE (esp with 64bit cpus) with acpi support, MADT and DSDT will override it config GOLDFISH - def_bool y - depends on X86_GOLDFISH + def_bool y + depends on X86_GOLDFISH config RETPOLINE bool "Avoid speculative indirect branches in kernel" @@ -456,6 +459,7 @@ config X86_CPU_RESCTRL bool "x86 CPU resource control support" depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) select KERNFS + select PROC_CPU_RESCTRL if PROC_FS help Enable x86 CPU resource control support. @@ -477,7 +481,7 @@ config X86_BIGSMP bool "Support for big SMP systems with more than 8 CPUs" depends on SMP ---help--- - This option is needed for the systems that have more than 8 CPUs + This option is needed for the systems that have more than 8 CPUs. config X86_EXTENDED_PLATFORM bool "Support for extended (non-PC) x86 platforms" @@ -561,9 +565,9 @@ config X86_UV # Please maintain the alphabetic order if and when there are additions config X86_GOLDFISH - bool "Goldfish (Virtual Platform)" - depends on X86_EXTENDED_PLATFORM - ---help--- + bool "Goldfish (Virtual Platform)" + depends on X86_EXTENDED_PLATFORM + ---help--- Enable support for the Goldfish virtual platform used primarily for Android development. Unless you are building for the Android Goldfish emulator say N here. @@ -806,9 +810,9 @@ config KVM_GUEST timing infrastructure such as time of day, and system time config ARCH_CPUIDLE_HALTPOLL - def_bool n - prompt "Disable host haltpoll when loading haltpoll driver" - help + def_bool n + prompt "Disable host haltpoll when loading haltpoll driver" + help If virtualized under KVM, disable host haltpoll. config PVH @@ -887,16 +891,16 @@ config HPET_EMULATE_RTC depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y) config APB_TIMER - def_bool y if X86_INTEL_MID - prompt "Intel MID APB Timer Support" if X86_INTEL_MID - select DW_APB_TIMER - depends on X86_INTEL_MID && SFI - help - APB timer is the replacement for 8254, HPET on X86 MID platforms. - The APBT provides a stable time base on SMP - systems, unlike the TSC, but it is more expensive to access, - as it is off-chip. APB timers are always running regardless of CPU - C states, they are used as per CPU clockevent device when possible. + def_bool y if X86_INTEL_MID + prompt "Intel MID APB Timer Support" if X86_INTEL_MID + select DW_APB_TIMER + depends on X86_INTEL_MID && SFI + help + APB timer is the replacement for 8254, HPET on X86 MID platforms. + The APBT provides a stable time base on SMP + systems, unlike the TSC, but it is more expensive to access, + as it is off-chip. APB timers are always running regardless of CPU + C states, they are used as per CPU clockevent device when possible. # Mark as expert because too many people got it wrong. # The code disables itself when not needed. @@ -1035,8 +1039,8 @@ config SCHED_MC_PRIO If unsure say Y here. config UP_LATE_INIT - def_bool y - depends on !SMP && X86_LOCAL_APIC + def_bool y + depends on !SMP && X86_LOCAL_APIC config X86_UP_APIC bool "Local APIC support on uniprocessors" if !PCI_MSI @@ -1185,8 +1189,8 @@ config X86_LEGACY_VM86 If unsure, say N here. config VM86 - bool - default X86_LEGACY_VM86 + bool + default X86_LEGACY_VM86 config X86_16BIT bool "Enable support for 16-bit segments" if EXPERT @@ -1207,10 +1211,10 @@ config X86_ESPFIX64 depends on X86_16BIT && X86_64 config X86_VSYSCALL_EMULATION - bool "Enable vsyscall emulation" if EXPERT - default y - depends on X86_64 - ---help--- + bool "Enable vsyscall emulation" if EXPERT + default y + depends on X86_64 + ---help--- This enables emulation of the legacy vsyscall page. Disabling it is roughly equivalent to booting with vsyscall=none, except that it will also disable the helpful warning if a program @@ -1512,7 +1516,7 @@ config X86_CPA_STATISTICS bool "Enable statistic for Change Page Attribute" depends on DEBUG_FS ---help--- - Expose statistics about the Change Page Attribute mechanims, which + Expose statistics about the Change Page Attribute mechanism, which helps to determine the effectiveness of preserving large and huge page mappings when mapping protections are changed. @@ -1543,12 +1547,12 @@ config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT # Common NUMA Features config NUMA - bool "Numa Memory Allocation and Scheduler Support" + bool "NUMA Memory Allocation and Scheduler Support" depends on SMP depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP) default y if X86_BIGSMP ---help--- - Enable NUMA (Non Uniform Memory Access) support. + Enable NUMA (Non-Uniform Memory Access) support. The kernel will try to allocate memory used by a CPU on the local memory controller of the CPU and add some more @@ -1648,9 +1652,9 @@ config ARCH_PROC_KCORE_TEXT depends on X86_64 && PROC_KCORE config ILLEGAL_POINTER_VALUE - hex - default 0 if X86_32 - default 0xdead000000000000 if X86_64 + hex + default 0 if X86_32 + default 0xdead000000000000 if X86_64 config X86_PMEM_LEGACY_DEVICE bool @@ -1885,34 +1889,6 @@ config X86_UMIP specific cases in protected and virtual-8086 modes. Emulated results are dummy. -config X86_INTEL_MPX - prompt "Intel MPX (Memory Protection Extensions)" - def_bool n - # Note: only available in 64-bit mode due to VMA flags shortage - depends on CPU_SUP_INTEL && X86_64 - select ARCH_USES_HIGH_VMA_FLAGS - ---help--- - MPX provides hardware features that can be used in - conjunction with compiler-instrumented code to check - memory references. It is designed to detect buffer - overflow or underflow bugs. - - This option enables running applications which are - instrumented or otherwise use MPX. It does not use MPX - itself inside the kernel or to protect the kernel - against bad memory references. - - Enabling this option will make the kernel larger: - ~8k of kernel text and 36 bytes of data on a 64-bit - defconfig. It adds a long to the 'mm_struct' which - will increase the kernel memory overhead of each - process and adds some branches to paths used during - exec() and munmap(). - - For details, see Documentation/x86/intel_mpx.rst - - If unsure, say N. - config X86_INTEL_MEMORY_PROTECTION_KEYS prompt "Intel Memory Protection Keys" def_bool y @@ -1991,11 +1967,12 @@ config EFI platforms. config EFI_STUB - bool "EFI stub support" - depends on EFI && !X86_USE_3DNOW - select RELOCATABLE - ---help--- - This kernel feature allows a bzImage to be loaded directly + bool "EFI stub support" + depends on EFI && !X86_USE_3DNOW + depends on $(cc-option,-mabi=ms) || X86_32 + select RELOCATABLE + ---help--- + This kernel feature allows a bzImage to be loaded directly by EFI firmware without the use of a bootloader. See Documentation/admin-guide/efi-stub.rst for more information. @@ -2955,9 +2932,6 @@ config HAVE_ATOMIC_IOMAP def_bool y depends on X86_32 -config X86_DEV_DMA_OPS - bool - source "drivers/firmware/Kconfig" source "arch/x86/kvm/Kconfig" diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index af9c967782f6..bc3a497c029c 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -387,6 +387,14 @@ config X86_DEBUGCTLMSR def_bool y depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486SX || M486) && !UML +config IA32_FEAT_CTL + def_bool y + depends on CPU_SUP_INTEL || CPU_SUP_CENTAUR || CPU_SUP_ZHAOXIN + +config X86_VMX_FEATURE_NAMES + def_bool y + depends on IA32_FEAT_CTL && X86_FEATURE_NAMES + menuconfig PROCESSOR_SELECT bool "Supported processor vendors" if EXPERT ---help--- diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index c4eab8ed33a3..2e74690b028a 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -62,26 +62,10 @@ config EARLY_PRINTK_USB_XDBC config MCSAFE_TEST def_bool n -config X86_PTDUMP_CORE - def_bool n - -config X86_PTDUMP - tristate "Export kernel pagetable layout to userspace via debugfs" - depends on DEBUG_KERNEL - select DEBUG_FS - select X86_PTDUMP_CORE - ---help--- - Say Y here if you want to show the kernel pagetable layout in a - debugfs file. This information is only useful for kernel developers - who are working in architecture specific areas of the kernel. - It is probably not a good idea to enable this feature in a production - kernel. - If in doubt, say "N" - config EFI_PGT_DUMP bool "Dump the EFI pagetable" depends on EFI - select X86_PTDUMP_CORE + select PTDUMP_CORE ---help--- Enable this if you want to dump the EFI page table before enabling virtual mode. This can be used to debug miscellaneous @@ -90,7 +74,7 @@ config EFI_PGT_DUMP config DEBUG_WX bool "Warn on W+X mappings at boot" - select X86_PTDUMP_CORE + select PTDUMP_CORE ---help--- Generate a warning if any W+X mappings are found at boot. diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 95410d6ee2ff..748b6d28a91d 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -88,7 +88,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) -sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' quiet_cmd_zoffset = ZOFFSET $@ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index aa976adb7094..56aa5fa0a66b 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -89,7 +89,7 @@ vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone -vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \ +vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o \ $(objtree)/drivers/firmware/efi/libstub/lib.a vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o @@ -103,7 +103,7 @@ vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o quiet_cmd_check_data_rel = DATAREL $@ define cmd_check_data_rel for obj in $(filter %.o,$^); do \ - ${CROSS_COMPILE}readelf -S $$obj | grep -qF .rel.local && { \ + $(READELF) -S $$obj | grep -qF .rel.local && { \ echo "error: $$obj has data relocations!" >&2; \ exit 1; \ } || true; \ diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 72b08fde6de6..287393d725f0 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -6,6 +6,8 @@ * * ----------------------------------------------------------------------- */ +#pragma GCC visibility push(hidden) + #include <linux/efi.h> #include <linux/pci.h> @@ -19,32 +21,18 @@ #include "eboot.h" static efi_system_table_t *sys_table; +extern const bool efi_is64; -static struct efi_config *efi_early; - -__pure const struct efi_config *__efi_early(void) +__pure efi_system_table_t *efi_system_table(void) { - return efi_early; + return sys_table; } -#define BOOT_SERVICES(bits) \ -static void setup_boot_services##bits(struct efi_config *c) \ -{ \ - efi_system_table_##bits##_t *table; \ - \ - table = (typeof(table))sys_table; \ - \ - c->runtime_services = table->runtime; \ - c->boot_services = table->boottime; \ - c->text_output = table->con_out; \ -} -BOOT_SERVICES(32); -BOOT_SERVICES(64); - -void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) +__attribute_const__ bool efi_is_64bit(void) { - efi_call_proto(efi_simple_text_output_protocol, output_string, - efi_early->text_output, str); + if (IS_ENABLED(CONFIG_EFI_MIXED)) + return efi_is64; + return IS_ENABLED(CONFIG_X86_64); } static efi_status_t @@ -63,17 +51,17 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) * large romsize. The UEFI spec limits the size of option ROMs to 16 * MiB so we reject any ROMs over 16 MiB in size to catch this. */ - romimage = (void *)(unsigned long)efi_table_attr(efi_pci_io_protocol, - romimage, pci); - romsize = efi_table_attr(efi_pci_io_protocol, romsize, pci); + romimage = efi_table_attr(pci, romimage); + romsize = efi_table_attr(pci, romsize); if (!romimage || !romsize || romsize > SZ_16M) return EFI_INVALID_PARAMETER; size = romsize + sizeof(*rom); - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size, + (void **)&rom); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to allocate memory for 'rom'\n"); + efi_printk("Failed to allocate memory for 'rom'\n"); return status; } @@ -85,27 +73,24 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) rom->pcilen = pci->romsize; *__rom = rom; - status = efi_call_proto(efi_pci_io_protocol, pci.read, pci, - EfiPciIoWidthUint16, PCI_VENDOR_ID, 1, - &rom->vendor); + status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16, + PCI_VENDOR_ID, 1, &rom->vendor); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to read rom->vendor\n"); + efi_printk("Failed to read rom->vendor\n"); goto free_struct; } - status = efi_call_proto(efi_pci_io_protocol, pci.read, pci, - EfiPciIoWidthUint16, PCI_DEVICE_ID, 1, - &rom->devid); + status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16, + PCI_DEVICE_ID, 1, &rom->devid); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to read rom->devid\n"); + efi_printk("Failed to read rom->devid\n"); goto free_struct; } - status = efi_call_proto(efi_pci_io_protocol, get_location, pci, - &rom->segment, &rom->bus, &rom->device, - &rom->function); + status = efi_call_proto(pci, get_location, &rom->segment, &rom->bus, + &rom->device, &rom->function); if (status != EFI_SUCCESS) goto free_struct; @@ -114,7 +99,7 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) return status; free_struct: - efi_call_early(free_pool, rom); + efi_bs_call(free_pool, rom); return status; } @@ -133,27 +118,24 @@ static void setup_efi_pci(struct boot_params *params) void **pci_handle = NULL; efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; unsigned long size = 0; - unsigned long nr_pci; struct setup_data *data; + efi_handle_t h; int i; - status = efi_call_early(locate_handle, - EFI_LOCATE_BY_PROTOCOL, - &pci_proto, NULL, &size, pci_handle); + status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, + &pci_proto, NULL, &size, pci_handle); if (status == EFI_BUFFER_TOO_SMALL) { - status = efi_call_early(allocate_pool, - EFI_LOADER_DATA, - size, (void **)&pci_handle); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size, + (void **)&pci_handle); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to allocate memory for 'pci_handle'\n"); + efi_printk("Failed to allocate memory for 'pci_handle'\n"); return; } - status = efi_call_early(locate_handle, - EFI_LOCATE_BY_PROTOCOL, &pci_proto, - NULL, &size, pci_handle); + status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, + &pci_proto, NULL, &size, pci_handle); } if (status != EFI_SUCCESS) @@ -164,15 +146,12 @@ static void setup_efi_pci(struct boot_params *params) while (data && data->next) data = (struct setup_data *)(unsigned long)data->next; - nr_pci = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32)); - for (i = 0; i < nr_pci; i++) { + for_each_efi_handle(h, pci_handle, size, i) { efi_pci_io_protocol_t *pci = NULL; struct pci_setup_rom *rom; - status = efi_call_early(handle_protocol, - efi_is_64bit() ? ((u64 *)pci_handle)[i] - : ((u32 *)pci_handle)[i], - &pci_proto, (void **)&pci); + status = efi_bs_call(handle_protocol, h, &pci_proto, + (void **)&pci); if (status != EFI_SUCCESS || !pci) continue; @@ -189,7 +168,7 @@ static void setup_efi_pci(struct boot_params *params) } free_handle: - efi_call_early(free_pool, pci_handle); + efi_bs_call(free_pool, pci_handle); } static void retrieve_apple_device_properties(struct boot_params *boot_params) @@ -198,34 +177,34 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) struct setup_data *data, *new; efi_status_t status; u32 size = 0; - void *p; + apple_properties_protocol_t *p; - status = efi_call_early(locate_protocol, &guid, NULL, &p); + status = efi_bs_call(locate_protocol, &guid, NULL, (void **)&p); if (status != EFI_SUCCESS) return; - if (efi_table_attr(apple_properties_protocol, version, p) != 0x10000) { - efi_printk(sys_table, "Unsupported properties proto version\n"); + if (efi_table_attr(p, version) != 0x10000) { + efi_printk("Unsupported properties proto version\n"); return; } - efi_call_proto(apple_properties_protocol, get_all, p, NULL, &size); + efi_call_proto(p, get_all, NULL, &size); if (!size) return; do { - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - size + sizeof(struct setup_data), &new); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, + size + sizeof(struct setup_data), + (void **)&new); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to allocate memory for 'properties'\n"); + efi_printk("Failed to allocate memory for 'properties'\n"); return; } - status = efi_call_proto(apple_properties_protocol, get_all, p, - new->data, &size); + status = efi_call_proto(p, get_all, new->data, &size); if (status == EFI_BUFFER_TOO_SMALL) - efi_call_early(free_pool, new); + efi_bs_call(free_pool, new); } while (status == EFI_BUFFER_TOO_SMALL); new->type = SETUP_APPLE_PROPERTIES; @@ -247,7 +226,7 @@ static const efi_char16_t apple[] = L"Apple"; static void setup_quirks(struct boot_params *boot_params) { efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long) - efi_table_attr(efi_system_table, fw_vendor, sys_table); + efi_table_attr(efi_system_table(), fw_vendor); if (!memcmp(fw_vendor, apple, sizeof(apple))) { if (IS_ENABLED(CONFIG_APPLE_PROPERTIES)) @@ -265,17 +244,16 @@ setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size) u32 width, height; void **uga_handle = NULL; efi_uga_draw_protocol_t *uga = NULL, *first_uga; - unsigned long nr_ugas; + efi_handle_t handle; int i; - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - size, (void **)&uga_handle); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size, + (void **)&uga_handle); if (status != EFI_SUCCESS) return status; - status = efi_call_early(locate_handle, - EFI_LOCATE_BY_PROTOCOL, - uga_proto, NULL, &size, uga_handle); + status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, + uga_proto, NULL, &size, uga_handle); if (status != EFI_SUCCESS) goto free_handle; @@ -283,24 +261,20 @@ setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size) width = 0; first_uga = NULL; - nr_ugas = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32)); - for (i = 0; i < nr_ugas; i++) { + for_each_efi_handle(handle, uga_handle, size, i) { efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; u32 w, h, depth, refresh; void *pciio; - unsigned long handle = efi_is_64bit() ? ((u64 *)uga_handle)[i] - : ((u32 *)uga_handle)[i]; - status = efi_call_early(handle_protocol, handle, - uga_proto, (void **)&uga); + status = efi_bs_call(handle_protocol, handle, uga_proto, + (void **)&uga); if (status != EFI_SUCCESS) continue; pciio = NULL; - efi_call_early(handle_protocol, handle, &pciio_proto, &pciio); + efi_bs_call(handle_protocol, handle, &pciio_proto, &pciio); - status = efi_call_proto(efi_uga_draw_protocol, get_mode, uga, - &w, &h, &depth, &refresh); + status = efi_call_proto(uga, get_mode, &w, &h, &depth, &refresh); if (status == EFI_SUCCESS && (!first_uga || pciio)) { width = w; height = h; @@ -336,7 +310,7 @@ setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size) si->rsvd_pos = 24; free_handle: - efi_call_early(free_pool, uga_handle); + efi_bs_call(free_pool, uga_handle); return status; } @@ -355,37 +329,38 @@ void setup_graphics(struct boot_params *boot_params) memset(si, 0, sizeof(*si)); size = 0; - status = efi_call_early(locate_handle, - EFI_LOCATE_BY_PROTOCOL, - &graphics_proto, NULL, &size, gop_handle); + status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, + &graphics_proto, NULL, &size, gop_handle); if (status == EFI_BUFFER_TOO_SMALL) - status = efi_setup_gop(NULL, si, &graphics_proto, size); + status = efi_setup_gop(si, &graphics_proto, size); if (status != EFI_SUCCESS) { size = 0; - status = efi_call_early(locate_handle, - EFI_LOCATE_BY_PROTOCOL, - &uga_proto, NULL, &size, uga_handle); + status = efi_bs_call(locate_handle, EFI_LOCATE_BY_PROTOCOL, + &uga_proto, NULL, &size, uga_handle); if (status == EFI_BUFFER_TOO_SMALL) setup_uga(si, &uga_proto, size); } } +void startup_32(struct boot_params *boot_params); + +void __noreturn efi_stub_entry(efi_handle_t handle, + efi_system_table_t *sys_table_arg, + struct boot_params *boot_params); + /* * Because the x86 boot code expects to be passed a boot_params we * need to create one ourselves (usually the bootloader would create * one for us). - * - * The caller is responsible for filling out ->code32_start in the - * returned boot_params. */ -struct boot_params *make_boot_params(struct efi_config *c) +efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, + efi_system_table_t *sys_table_arg) { struct boot_params *boot_params; struct apm_bios_info *bi; struct setup_header *hdr; efi_loaded_image_t *image; - void *handle; efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; int options_size = 0; efi_status_t status; @@ -393,31 +368,22 @@ struct boot_params *make_boot_params(struct efi_config *c) unsigned long ramdisk_addr; unsigned long ramdisk_size; - efi_early = c; - sys_table = (efi_system_table_t *)(unsigned long)efi_early->table; - handle = (void *)(unsigned long)efi_early->image_handle; + sys_table = sys_table_arg; /* Check if we were booted by the EFI firmware */ if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) - return NULL; - - if (efi_is_64bit()) - setup_boot_services64(efi_early); - else - setup_boot_services32(efi_early); + return EFI_INVALID_PARAMETER; - status = efi_call_early(handle_protocol, handle, - &proto, (void *)&image); + status = efi_bs_call(handle_protocol, handle, &proto, (void *)&image); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); - return NULL; + efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); + return status; } - status = efi_low_alloc(sys_table, 0x4000, 1, - (unsigned long *)&boot_params); + status = efi_low_alloc(0x4000, 1, (unsigned long *)&boot_params); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to allocate lowmem for boot params\n"); - return NULL; + efi_printk("Failed to allocate lowmem for boot params\n"); + return status; } memset(boot_params, 0x0, 0x4000); @@ -439,7 +405,7 @@ struct boot_params *make_boot_params(struct efi_config *c) hdr->type_of_loader = 0x21; /* Convert unicode cmdline to ascii */ - cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size); + cmdline_ptr = efi_convert_cmdline(image, &options_size); if (!cmdline_ptr) goto fail; @@ -457,15 +423,15 @@ struct boot_params *make_boot_params(struct efi_config *c) if (status != EFI_SUCCESS) goto fail2; - status = handle_cmdline_files(sys_table, image, + status = handle_cmdline_files(image, (char *)(unsigned long)hdr->cmd_line_ptr, "initrd=", hdr->initrd_addr_max, &ramdisk_addr, &ramdisk_size); if (status != EFI_SUCCESS && hdr->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G) { - efi_printk(sys_table, "Trying to load files to higher address\n"); - status = handle_cmdline_files(sys_table, image, + efi_printk("Trying to load files to higher address\n"); + status = handle_cmdline_files(image, (char *)(unsigned long)hdr->cmd_line_ptr, "initrd=", -1UL, &ramdisk_addr, &ramdisk_size); @@ -478,14 +444,17 @@ struct boot_params *make_boot_params(struct efi_config *c) boot_params->ext_ramdisk_image = (u64)ramdisk_addr >> 32; boot_params->ext_ramdisk_size = (u64)ramdisk_size >> 32; - return boot_params; + hdr->code32_start = (u32)(unsigned long)startup_32; + + efi_stub_entry(handle, sys_table, boot_params); + /* not reached */ fail2: - efi_free(sys_table, options_size, hdr->cmd_line_ptr); + efi_free(options_size, hdr->cmd_line_ptr); fail: - efi_free(sys_table, 0x4000, (unsigned long)boot_params); + efi_free(0x4000, (unsigned long)boot_params); - return NULL; + return status; } static void add_e820ext(struct boot_params *params, @@ -620,13 +589,13 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext, sizeof(struct e820_entry) * nr_desc; if (*e820ext) { - efi_call_early(free_pool, *e820ext); + efi_bs_call(free_pool, *e820ext); *e820ext = NULL; *e820ext_size = 0; } - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - size, (void **)e820ext); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size, + (void **)e820ext); if (status == EFI_SUCCESS) *e820ext_size = size; @@ -650,7 +619,7 @@ static efi_status_t allocate_e820(struct boot_params *params, boot_map.key_ptr = NULL; boot_map.buff_size = &buff_size; - status = efi_get_memory_map(sys_table, &boot_map); + status = efi_get_memory_map(&boot_map); if (status != EFI_SUCCESS) return status; @@ -672,8 +641,7 @@ struct exit_boot_struct { struct efi_info *efi; }; -static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, - struct efi_boot_memmap *map, +static efi_status_t exit_boot_func(struct efi_boot_memmap *map, void *priv) { const char *signature; @@ -683,14 +651,14 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, : EFI32_LOADER_SIGNATURE; memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32)); - p->efi->efi_systab = (unsigned long)sys_table_arg; + p->efi->efi_systab = (unsigned long)efi_system_table(); p->efi->efi_memdesc_size = *map->desc_size; p->efi->efi_memdesc_version = *map->desc_ver; p->efi->efi_memmap = (unsigned long)*map->map; p->efi->efi_memmap_size = *map->map_size; #ifdef CONFIG_X86_64 - p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32; + p->efi->efi_systab_hi = (unsigned long)efi_system_table() >> 32; p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32; #endif @@ -722,8 +690,7 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) return status; /* Might as well exit boot services now */ - status = efi_exit_boot_services(sys_table, handle, &map, &priv, - exit_boot_func); + status = efi_exit_boot_services(handle, &map, &priv, exit_boot_func); if (status != EFI_SUCCESS) return status; @@ -741,33 +708,22 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) * On success we return a pointer to a boot_params structure, and NULL * on failure. */ -struct boot_params * -efi_main(struct efi_config *c, struct boot_params *boot_params) +struct boot_params *efi_main(efi_handle_t handle, + efi_system_table_t *sys_table_arg, + struct boot_params *boot_params) { struct desc_ptr *gdt = NULL; struct setup_header *hdr = &boot_params->hdr; efi_status_t status; struct desc_struct *desc; - void *handle; - efi_system_table_t *_table; unsigned long cmdline_paddr; - efi_early = c; - - _table = (efi_system_table_t *)(unsigned long)efi_early->table; - handle = (void *)(unsigned long)efi_early->image_handle; - - sys_table = _table; + sys_table = sys_table_arg; /* Check if we were booted by the EFI firmware */ if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) goto fail; - if (efi_is_64bit()) - setup_boot_services64(efi_early); - else - setup_boot_services32(efi_early); - /* * make_boot_params() may have been called before efi_main(), in which * case this is the second time we parse the cmdline. This is ok, @@ -782,14 +738,14 @@ efi_main(struct efi_config *c, struct boot_params *boot_params) * otherwise we ask the BIOS. */ if (boot_params->secure_boot == efi_secureboot_mode_unset) - boot_params->secure_boot = efi_get_secureboot(sys_table); + boot_params->secure_boot = efi_get_secureboot(); /* Ask the firmware to clear memory on unclean shutdown */ - efi_enable_reset_attack_mitigation(sys_table); + efi_enable_reset_attack_mitigation(); - efi_random_get_seed(sys_table); + efi_random_get_seed(); - efi_retrieve_tpm2_eventlog(sys_table); + efi_retrieve_tpm2_eventlog(); setup_graphics(boot_params); @@ -797,18 +753,17 @@ efi_main(struct efi_config *c, struct boot_params *boot_params) setup_quirks(boot_params); - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - sizeof(*gdt), (void **)&gdt); + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, sizeof(*gdt), + (void **)&gdt); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to allocate memory for 'gdt' structure\n"); + efi_printk("Failed to allocate memory for 'gdt' structure\n"); goto fail; } gdt->size = 0x800; - status = efi_low_alloc(sys_table, gdt->size, 8, - (unsigned long *)&gdt->address); + status = efi_low_alloc(gdt->size, 8, (unsigned long *)&gdt->address); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to allocate memory for 'gdt'\n"); + efi_printk("Failed to allocate memory for 'gdt'\n"); goto fail; } @@ -818,13 +773,13 @@ efi_main(struct efi_config *c, struct boot_params *boot_params) */ if (hdr->pref_address != hdr->code32_start) { unsigned long bzimage_addr = hdr->code32_start; - status = efi_relocate_kernel(sys_table, &bzimage_addr, + status = efi_relocate_kernel(&bzimage_addr, hdr->init_size, hdr->init_size, hdr->pref_address, hdr->kernel_alignment, LOAD_PHYSICAL_ADDR); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "efi_relocate_kernel() failed!\n"); + efi_printk("efi_relocate_kernel() failed!\n"); goto fail; } @@ -834,7 +789,7 @@ efi_main(struct efi_config *c, struct boot_params *boot_params) status = exit_boot(boot_params, handle); if (status != EFI_SUCCESS) { - efi_printk(sys_table, "exit_boot() failed!\n"); + efi_printk("exit_boot() failed!\n"); goto fail; } @@ -927,7 +882,8 @@ efi_main(struct efi_config *c, struct boot_params *boot_params) return boot_params; fail: - efi_printk(sys_table, "efi_main() failed!\n"); + efi_printk("efi_main() failed!\n"); - return NULL; + for (;;) + asm("hlt"); } diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index 8297387c4676..99f35343d443 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -12,22 +12,20 @@ #define DESC_TYPE_CODE_DATA (1 << 0) -typedef struct { - u32 get_mode; - u32 set_mode; - u32 blt; -} efi_uga_draw_protocol_32_t; +typedef union efi_uga_draw_protocol efi_uga_draw_protocol_t; -typedef struct { - u64 get_mode; - u64 set_mode; - u64 blt; -} efi_uga_draw_protocol_64_t; - -typedef struct { - void *get_mode; - void *set_mode; - void *blt; -} efi_uga_draw_protocol_t; +union efi_uga_draw_protocol { + struct { + efi_status_t (__efiapi *get_mode)(efi_uga_draw_protocol_t *, + u32*, u32*, u32*, u32*); + void *set_mode; + void *blt; + }; + struct { + u32 get_mode; + u32 set_mode; + u32 blt; + } mixed_mode; +}; #endif /* BOOT_COMPRESSED_EBOOT_H */ diff --git a/arch/x86/boot/compressed/efi_stub_32.S b/arch/x86/boot/compressed/efi_stub_32.S deleted file mode 100644 index ed6c351d34ed..000000000000 --- a/arch/x86/boot/compressed/efi_stub_32.S +++ /dev/null @@ -1,87 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * EFI call stub for IA32. - * - * This stub allows us to make EFI calls in physical mode with interrupts - * turned off. Note that this implementation is different from the one in - * arch/x86/platform/efi/efi_stub_32.S because we're _already_ in physical - * mode at this point. - */ - -#include <linux/linkage.h> -#include <asm/page_types.h> - -/* - * efi_call_phys(void *, ...) is a function with variable parameters. - * All the callers of this function assure that all the parameters are 4-bytes. - */ - -/* - * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. - * So we'd better save all of them at the beginning of this function and restore - * at the end no matter how many we use, because we can not assure EFI runtime - * service functions will comply with gcc calling convention, too. - */ - -.text -SYM_FUNC_START(efi_call_phys) - /* - * 0. The function can only be called in Linux kernel. So CS has been - * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found - * the values of these registers are the same. And, the corresponding - * GDT entries are identical. So I will do nothing about segment reg - * and GDT, but change GDT base register in prelog and epilog. - */ - - /* - * 1. Because we haven't been relocated by this point we need to - * use relative addressing. - */ - call 1f -1: popl %edx - subl $1b, %edx - - /* - * 2. Now on the top of stack is the return - * address in the caller of efi_call_phys(), then parameter 1, - * parameter 2, ..., param n. To make things easy, we save the return - * address of efi_call_phys in a global variable. - */ - popl %ecx - movl %ecx, saved_return_addr(%edx) - /* get the function pointer into ECX*/ - popl %ecx - movl %ecx, efi_rt_function_ptr(%edx) - - /* - * 3. Call the physical function. - */ - call *%ecx - - /* - * 4. Balance the stack. And because EAX contain the return value, - * we'd better not clobber it. We need to calculate our address - * again because %ecx and %edx are not preserved across EFI function - * calls. - */ - call 1f -1: popl %edx - subl $1b, %edx - - movl efi_rt_function_ptr(%edx), %ecx - pushl %ecx - - /* - * 10. Push the saved return address onto the stack and return. - */ - movl saved_return_addr(%edx), %ecx - pushl %ecx - ret -SYM_FUNC_END(efi_call_phys) -.previous - -.data -saved_return_addr: - .long 0 -efi_rt_function_ptr: - .long 0 diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S deleted file mode 100644 index 99494dff2113..000000000000 --- a/arch/x86/boot/compressed/efi_stub_64.S +++ /dev/null @@ -1,5 +0,0 @@ -#include <asm/segment.h> -#include <asm/msr.h> -#include <asm/processor-flags.h> - -#include "../../platform/efi/efi_stub_64.S" diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S index 593913692d16..8fb7f6799c52 100644 --- a/arch/x86/boot/compressed/efi_thunk_64.S +++ b/arch/x86/boot/compressed/efi_thunk_64.S @@ -10,7 +10,7 @@ * needs to be able to service interrupts. * * On the plus side, we don't have to worry about mangling 64-bit - * addresses into 32-bits because we're executing with an identify + * addresses into 32-bits because we're executing with an identity * mapped pagetable and haven't transitioned to 64-bit virtual addresses * yet. */ @@ -23,16 +23,13 @@ .code64 .text -SYM_FUNC_START(efi64_thunk) +SYM_FUNC_START(__efi64_thunk) push %rbp push %rbx - subq $8, %rsp - leaq efi_exit32(%rip), %rax - movl %eax, 4(%rsp) - leaq efi_gdt64(%rip), %rax - movl %eax, (%rsp) - movl %eax, 2(%rax) /* Fixup the gdt base address */ + leaq 1f(%rip), %rbp + leaq efi_gdt64(%rip), %rbx + movl %ebx, 2(%rbx) /* Fixup the gdt base address */ movl %ds, %eax push %rax @@ -48,15 +45,10 @@ SYM_FUNC_START(efi64_thunk) movl %esi, 0x0(%rsp) movl %edx, 0x4(%rsp) movl %ecx, 0x8(%rsp) - movq %r8, %rsi - movl %esi, 0xc(%rsp) - movq %r9, %rsi - movl %esi, 0x10(%rsp) + movl %r8d, 0xc(%rsp) + movl %r9d, 0x10(%rsp) - sgdt save_gdt(%rip) - - leaq 1f(%rip), %rbx - movq %rbx, func_rt_ptr(%rip) + sgdt 0x14(%rsp) /* * Switch to gdt with 32-bit segments. This is the firmware GDT @@ -71,9 +63,9 @@ SYM_FUNC_START(efi64_thunk) pushq %rax lretq -1: addq $32, %rsp - - lgdt save_gdt(%rip) +1: lgdt 0x14(%rsp) + addq $32, %rsp + movq %rdi, %rax pop %rbx movl %ebx, %ss @@ -85,26 +77,13 @@ SYM_FUNC_START(efi64_thunk) /* * Convert 32-bit status code into 64-bit. */ - test %rax, %rax - jz 1f - movl %eax, %ecx - andl $0x0fffffff, %ecx - andl $0xf0000000, %eax - shl $32, %rax - or %rcx, %rax -1: - addq $8, %rsp + roll $1, %eax + rorq $1, %rax + pop %rbx pop %rbp ret -SYM_FUNC_END(efi64_thunk) - -SYM_FUNC_START_LOCAL(efi_exit32) - movq func_rt_ptr(%rip), %rax - push %rax - mov %rdi, %rax - ret -SYM_FUNC_END(efi_exit32) +SYM_FUNC_END(__efi64_thunk) .code32 /* @@ -144,9 +123,7 @@ SYM_FUNC_START_LOCAL(efi_enter32) */ cli - movl 56(%esp), %eax - movl %eax, 2(%eax) - lgdtl (%eax) + lgdtl (%ebx) movl %cr4, %eax btsl $(X86_CR4_PAE_BIT), %eax @@ -163,9 +140,8 @@ SYM_FUNC_START_LOCAL(efi_enter32) xorl %eax, %eax lldt %ax - movl 60(%esp), %eax pushl $__KERNEL_CS - pushl %eax + pushl %ebp /* Enable paging */ movl %cr0, %eax @@ -181,13 +157,6 @@ SYM_DATA_START(efi32_boot_gdt) .quad 0 SYM_DATA_END(efi32_boot_gdt) -SYM_DATA_START_LOCAL(save_gdt) - .word 0 - .quad 0 -SYM_DATA_END(save_gdt) - -SYM_DATA_LOCAL(func_rt_ptr, .quad 0) - SYM_DATA_START(efi_gdt64) .word efi_gdt64_end - efi_gdt64 .long 0 /* Filled out by user */ diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index f2dfd6d083ef..73f17d0544dd 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -145,67 +145,16 @@ SYM_FUNC_START(startup_32) SYM_FUNC_END(startup_32) #ifdef CONFIG_EFI_STUB -/* - * We don't need the return address, so set up the stack so efi_main() can find - * its arguments. - */ -SYM_FUNC_START(efi_pe_entry) - add $0x4, %esp - - call 1f -1: popl %esi - subl $1b, %esi - - popl %ecx - movl %ecx, efi32_config(%esi) /* Handle */ - popl %ecx - movl %ecx, efi32_config+8(%esi) /* EFI System table pointer */ - - /* Relocate efi_config->call() */ - leal efi32_config(%esi), %eax - add %esi, 40(%eax) - pushl %eax - - call make_boot_params - cmpl $0, %eax - je fail - movl %esi, BP_code32_start(%eax) - popl %ecx - pushl %eax - pushl %ecx - jmp 2f /* Skip efi_config initialization */ -SYM_FUNC_END(efi_pe_entry) - SYM_FUNC_START(efi32_stub_entry) +SYM_FUNC_START_ALIAS(efi_stub_entry) add $0x4, %esp - popl %ecx - popl %edx - - call 1f -1: popl %esi - subl $1b, %esi - - movl %ecx, efi32_config(%esi) /* Handle */ - movl %edx, efi32_config+8(%esi) /* EFI System table pointer */ - - /* Relocate efi_config->call() */ - leal efi32_config(%esi), %eax - add %esi, 40(%eax) - pushl %eax -2: call efi_main - cmpl $0, %eax movl %eax, %esi - jne 2f -fail: - /* EFI init failed, so hang. */ - hlt - jmp fail -2: movl BP_code32_start(%esi), %eax leal startup_32(%eax), %eax jmp *%eax SYM_FUNC_END(efi32_stub_entry) +SYM_FUNC_END_ALIAS(efi_stub_entry) #endif .text @@ -240,11 +189,9 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) /* push arguments for extract_kernel: */ pushl $z_output_len /* decompressed length, end of relocs */ - movl BP_init_size(%esi), %eax - subl $_end, %eax - movl %ebx, %ebp - subl %eax, %ebp - pushl %ebp /* output address */ + leal _end(%ebx), %eax + subl BP_init_size(%esi), %eax + pushl %eax /* output address */ pushl $z_input_len /* input_len */ leal input_data(%ebx), %eax @@ -262,15 +209,6 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) jmp *%eax SYM_FUNC_END(.Lrelocated) -#ifdef CONFIG_EFI_STUB - .data -efi32_config: - .fill 5,8,0 - .long efi_call_phys - .long 0 - .byte 0 -#endif - /* * Stack and heap for uncompression */ diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 58a512e33d8d..1f1f6c8139b3 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -208,10 +208,12 @@ SYM_FUNC_START(startup_32) pushl $__KERNEL_CS leal startup_64(%ebp), %eax #ifdef CONFIG_EFI_MIXED - movl efi32_config(%ebp), %ebx - cmp $0, %ebx + movl efi32_boot_args(%ebp), %edi + cmp $0, %edi jz 1f - leal handover_entry(%ebp), %eax + leal efi64_stub_entry(%ebp), %eax + movl %esi, %edx + movl efi32_boot_args+4(%ebp), %esi 1: #endif pushl %eax @@ -232,17 +234,19 @@ SYM_FUNC_START(efi32_stub_entry) popl %edx popl %esi - leal (BP_scratch+4)(%esi), %esp call 1f 1: pop %ebp subl $1b, %ebp - movl %ecx, efi32_config(%ebp) - movl %edx, efi32_config+8(%ebp) + movl %ecx, efi32_boot_args(%ebp) + movl %edx, efi32_boot_args+4(%ebp) sgdtl efi32_boot_gdt(%ebp) + movb $0, efi_is64(%ebp) - leal efi32_config(%ebp), %eax - movl %eax, efi_config(%ebp) + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 jmp startup_32 SYM_FUNC_END(efi32_stub_entry) @@ -445,70 +449,17 @@ trampoline_return: SYM_CODE_END(startup_64) #ifdef CONFIG_EFI_STUB - -/* The entry point for the PE/COFF executable is efi_pe_entry. */ -SYM_FUNC_START(efi_pe_entry) - movq %rcx, efi64_config(%rip) /* Handle */ - movq %rdx, efi64_config+8(%rip) /* EFI System table pointer */ - - leaq efi64_config(%rip), %rax - movq %rax, efi_config(%rip) - - call 1f -1: popq %rbp - subq $1b, %rbp - - /* - * Relocate efi_config->call(). - */ - addq %rbp, efi64_config+40(%rip) - - movq %rax, %rdi - call make_boot_params - cmpq $0,%rax - je fail - mov %rax, %rsi - leaq startup_32(%rip), %rax - movl %eax, BP_code32_start(%rsi) - jmp 2f /* Skip the relocation */ - -handover_entry: - call 1f -1: popq %rbp - subq $1b, %rbp - - /* - * Relocate efi_config->call(). - */ - movq efi_config(%rip), %rax - addq %rbp, 40(%rax) -2: - movq efi_config(%rip), %rdi + .org 0x390 +SYM_FUNC_START(efi64_stub_entry) +SYM_FUNC_START_ALIAS(efi_stub_entry) + and $~0xf, %rsp /* realign the stack */ call efi_main movq %rax,%rsi - cmpq $0,%rax - jne 2f -fail: - /* EFI init failed, so hang. */ - hlt - jmp fail -2: movl BP_code32_start(%esi), %eax leaq startup_64(%rax), %rax jmp *%rax -SYM_FUNC_END(efi_pe_entry) - - .org 0x390 -SYM_FUNC_START(efi64_stub_entry) - movq %rdi, efi64_config(%rip) /* Handle */ - movq %rsi, efi64_config+8(%rip) /* EFI System table pointer */ - - leaq efi64_config(%rip), %rax - movq %rax, efi_config(%rip) - - movq %rdx, %rsi - jmp handover_entry SYM_FUNC_END(efi64_stub_entry) +SYM_FUNC_END_ALIAS(efi_stub_entry) #endif .text @@ -677,24 +628,11 @@ SYM_DATA_START_LOCAL(gdt) .quad 0x0000000000000000 /* TS continued */ SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) -#ifdef CONFIG_EFI_STUB -SYM_DATA_LOCAL(efi_config, .quad 0) - #ifdef CONFIG_EFI_MIXED -SYM_DATA_START(efi32_config) - .fill 5,8,0 - .quad efi64_thunk - .byte 0 -SYM_DATA_END(efi32_config) +SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0) +SYM_DATA(efi_is64, .byte 1) #endif -SYM_DATA_START(efi64_config) - .fill 5,8,0 - .quad efi_call - .byte 1 -SYM_DATA_END(efi64_config) -#endif /* CONFIG_EFI_STUB */ - /* * Stack and heap for uncompression */ diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c index 9caa10e82217..da0ccc5de538 100644 --- a/arch/x86/boot/mkcpustr.c +++ b/arch/x86/boot/mkcpustr.c @@ -15,6 +15,7 @@ #include "../include/asm/required-features.h" #include "../include/asm/disabled-features.h" #include "../include/asm/cpufeatures.h" +#include "../include/asm/vmxfeatures.h" #include "../kernel/cpu/capflags.c" int main(void) diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index 0149e41d42c2..3da1c37c6dd5 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld @@ -51,7 +51,10 @@ SECTIONS . = ALIGN(16); _end = .; - /DISCARD/ : { *(.note*) } + /DISCARD/ : { + *(.eh_frame) + *(.note*) + } /* * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: diff --git a/arch/x86/crypto/.gitignore b/arch/x86/crypto/.gitignore new file mode 100644 index 000000000000..30be0400a439 --- /dev/null +++ b/arch/x86/crypto/.gitignore @@ -0,0 +1 @@ +poly1305-x86_64-cryptogams.S diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 958440eae27e..b69e00bf20b8 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -73,6 +73,10 @@ aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o blake2s-x86_64-y := blake2s-core.o blake2s-glue.o +poly1305-x86_64-y := poly1305-x86_64-cryptogams.o poly1305_glue.o +ifneq ($(CONFIG_CRYPTO_POLY1305_X86_64),) +targets += poly1305-x86_64-cryptogams.S +endif ifeq ($(avx_supported),yes) camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \ @@ -101,10 +105,8 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o -poly1305-x86_64-y := poly1305-sse2-x86_64.o poly1305_glue.o ifeq ($(avx2_supported),yes) sha1-ssse3-y += sha1_avx2_x86_64_asm.o -poly1305-x86_64-y += poly1305-avx2-x86_64.o endif ifeq ($(sha1_ni_supported),yes) sha1-ssse3-y += sha1_ni_asm.o @@ -118,3 +120,8 @@ sha256-ssse3-y += sha256_ni_asm.o endif sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o + +quiet_cmd_perlasm = PERLASM $@ + cmd_perlasm = $(PERL) $< > $@ +$(obj)/%.S: $(src)/%.pl FORCE + $(call if_changed,perlasm) diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c index 46d227122643..4623189000d8 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -144,10 +144,8 @@ static int crypto_aegis128_aesni_setkey(struct crypto_aead *aead, const u8 *key, { struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(aead); - if (keylen != AEGIS128_KEY_SIZE) { - crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); + if (keylen != AEGIS128_KEY_SIZE) return -EINVAL; - } memcpy(ctx->key.bytes, key, AEGIS128_KEY_SIZE); diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index d28503f99f58..cad6e1bfa7d5 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -1942,7 +1942,7 @@ SYM_FUNC_START(aesni_set_key) SYM_FUNC_END(aesni_set_key) /* - * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) + * void aesni_enc(const void *ctx, u8 *dst, const u8 *src) */ SYM_FUNC_START(aesni_enc) FRAME_BEGIN @@ -2131,7 +2131,7 @@ SYM_FUNC_START_LOCAL(_aesni_enc4) SYM_FUNC_END(_aesni_enc4) /* - * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) + * void aesni_dec (const void *ctx, u8 *dst, const u8 *src) */ SYM_FUNC_START(aesni_dec) FRAME_BEGIN @@ -2716,8 +2716,8 @@ SYM_FUNC_END(aesni_ctr_enc) pxor CTR, IV; /* - * void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, - * bool enc, u8 *iv) + * void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *dst, + * const u8 *src, bool enc, le128 *iv) */ SYM_FUNC_START(aesni_xts_crypt8) FRAME_BEGIN diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 3e707e81afdb..bbbebbd35b5d 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -83,10 +83,8 @@ struct gcm_context_data { asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, unsigned int key_len); -asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in); -asmlinkage void aesni_dec(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in); +asmlinkage void aesni_enc(const void *ctx, u8 *out, const u8 *in); +asmlinkage void aesni_dec(const void *ctx, u8 *out, const u8 *in); asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len); asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out, @@ -106,8 +104,8 @@ static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out, asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); -asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, bool enc, u8 *iv); +asmlinkage void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, bool enc, le128 *iv); /* asmlinkage void aesni_gcm_enc() * void *ctx, AES Key schedule. Starts on a 16 byte boundary. @@ -318,14 +316,11 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx, const u8 *in_key, unsigned int key_len) { struct crypto_aes_ctx *ctx = aes_ctx(raw_ctx); - u32 *flags = &tfm->crt_flags; int err; if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 && - key_len != AES_KEYSIZE_256) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + key_len != AES_KEYSIZE_256) return -EINVAL; - } if (!crypto_simd_usable()) err = aes_expandkey(ctx, in_key, key_len); @@ -550,29 +545,24 @@ static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key, } -static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in) -{ - aesni_enc(ctx, out, in); -} - -static void aesni_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void aesni_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_enc)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_enc); } -static void aesni_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void aesni_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_dec)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_dec); } -static void aesni_xts_enc8(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void aesni_xts_enc8(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, true, (u8 *)iv); + aesni_xts_crypt8(ctx, dst, src, true, iv); } -static void aesni_xts_dec8(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void aesni_xts_dec8(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, false, (u8 *)iv); + aesni_xts_crypt8(ctx, dst, src, false, iv); } static const struct common_glue_ctx aesni_enc_xts = { @@ -581,10 +571,10 @@ static const struct common_glue_ctx aesni_enc_xts = { .funcs = { { .num_blocks = 8, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc8) } + .fn_u = { .xts = aesni_xts_enc8 } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc) } + .fn_u = { .xts = aesni_xts_enc } } } }; @@ -594,10 +584,10 @@ static const struct common_glue_ctx aesni_dec_xts = { .funcs = { { .num_blocks = 8, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec8) } + .fn_u = { .xts = aesni_xts_dec8 } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec) } + .fn_u = { .xts = aesni_xts_dec } } } }; @@ -606,8 +596,7 @@ static int xts_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&aesni_enc_xts, req, - XTS_TWEAK_CAST(aesni_xts_tweak), + return glue_xts_req_128bit(&aesni_enc_xts, req, aesni_enc, aes_ctx(ctx->raw_tweak_ctx), aes_ctx(ctx->raw_crypt_ctx), false); @@ -618,8 +607,7 @@ static int xts_decrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&aesni_dec_xts, req, - XTS_TWEAK_CAST(aesni_xts_tweak), + return glue_xts_req_128bit(&aesni_dec_xts, req, aesni_enc, aes_ctx(ctx->raw_tweak_ctx), aes_ctx(ctx->raw_crypt_ctx), true); @@ -650,10 +638,9 @@ static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key, { struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(aead); - if (key_len < 4) { - crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); + if (key_len < 4) return -EINVAL; - } + /*Account for 4 byte nonce at the end.*/ key_len -= 4; diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c index 1d9ff8a45e1f..06ef2d4a4701 100644 --- a/arch/x86/crypto/blake2s-glue.c +++ b/arch/x86/crypto/blake2s-glue.c @@ -64,10 +64,8 @@ static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key, { struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm); - if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) { - crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) return -EINVAL; - } memcpy(tctx->key, key, keylen); tctx->keylen = keylen; diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index a4f00128ea55..ccda647422d6 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -19,20 +19,17 @@ #define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32 /* 32-way AVX2/AES-NI parallel cipher functions */ -asmlinkage void camellia_ecb_enc_32way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void camellia_ecb_dec_32way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void camellia_ecb_enc_32way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void camellia_ecb_dec_32way(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void camellia_cbc_dec_32way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void camellia_ctr_32way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void camellia_cbc_dec_32way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void camellia_ctr_32way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); -asmlinkage void camellia_xts_enc_32way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void camellia_xts_dec_32way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void camellia_xts_enc_32way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); +asmlinkage void camellia_xts_dec_32way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); static const struct common_glue_ctx camellia_enc = { .num_funcs = 4, @@ -40,16 +37,16 @@ static const struct common_glue_ctx camellia_enc = { .funcs = { { .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_32way) } + .fn_u = { .ecb = camellia_ecb_enc_32way } }, { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) } + .fn_u = { .ecb = camellia_ecb_enc_16way } }, { .num_blocks = 2, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) } + .fn_u = { .ecb = camellia_enc_blk_2way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) } + .fn_u = { .ecb = camellia_enc_blk } } } }; @@ -59,16 +56,16 @@ static const struct common_glue_ctx camellia_ctr = { .funcs = { { .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_32way) } + .fn_u = { .ctr = camellia_ctr_32way } }, { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) } + .fn_u = { .ctr = camellia_ctr_16way } }, { .num_blocks = 2, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) } + .fn_u = { .ctr = camellia_crypt_ctr_2way } }, { .num_blocks = 1, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) } + .fn_u = { .ctr = camellia_crypt_ctr } } } }; @@ -78,13 +75,13 @@ static const struct common_glue_ctx camellia_enc_xts = { .funcs = { { .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_32way) } + .fn_u = { .xts = camellia_xts_enc_32way } }, { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) } + .fn_u = { .xts = camellia_xts_enc_16way } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) } + .fn_u = { .xts = camellia_xts_enc } } } }; @@ -94,16 +91,16 @@ static const struct common_glue_ctx camellia_dec = { .funcs = { { .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_32way) } + .fn_u = { .ecb = camellia_ecb_dec_32way } }, { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) } + .fn_u = { .ecb = camellia_ecb_dec_16way } }, { .num_blocks = 2, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) } + .fn_u = { .ecb = camellia_dec_blk_2way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) } + .fn_u = { .ecb = camellia_dec_blk } } } }; @@ -113,16 +110,16 @@ static const struct common_glue_ctx camellia_dec_cbc = { .funcs = { { .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_32way) } + .fn_u = { .cbc = camellia_cbc_dec_32way } }, { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) } + .fn_u = { .cbc = camellia_cbc_dec_16way } }, { .num_blocks = 2, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) } + .fn_u = { .cbc = camellia_decrypt_cbc_2way } }, { .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) } + .fn_u = { .cbc = camellia_dec_blk } } } }; @@ -132,21 +129,20 @@ static const struct common_glue_ctx camellia_dec_xts = { .funcs = { { .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_32way) } + .fn_u = { .xts = camellia_xts_dec_32way } }, { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) } + .fn_u = { .xts = camellia_xts_dec_16way } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) } + .fn_u = { .xts = camellia_xts_dec } } } }; static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { - return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen, - &tfm->base.crt_flags); + return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen); } static int ecb_encrypt(struct skcipher_request *req) @@ -161,8 +157,7 @@ static int ecb_decrypt(struct skcipher_request *req) static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk), - req); + return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req); } static int cbc_decrypt(struct skcipher_request *req) @@ -180,8 +175,7 @@ static int xts_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&camellia_enc_xts, req, - XTS_TWEAK_CAST(camellia_enc_blk), + return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk, &ctx->tweak_ctx, &ctx->crypt_ctx, false); } @@ -190,8 +184,7 @@ static int xts_decrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&camellia_dec_xts, req, - XTS_TWEAK_CAST(camellia_enc_blk), + return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk, &ctx->tweak_ctx, &ctx->crypt_ctx, true); } diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index f28d282779b8..4e5de6ef206e 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -18,41 +18,36 @@ #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 /* 16-way parallel cipher functions (avx/aes-ni) */ -asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void camellia_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_ecb_enc_16way); -asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void camellia_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_ecb_dec_16way); -asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_cbc_dec_16way); -asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); EXPORT_SYMBOL_GPL(camellia_ctr_16way); -asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); EXPORT_SYMBOL_GPL(camellia_xts_enc_16way); -asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); EXPORT_SYMBOL_GPL(camellia_xts_dec_16way); -void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) +void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, - GLUE_FUNC_CAST(camellia_enc_blk)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_enc_blk); } EXPORT_SYMBOL_GPL(camellia_xts_enc); -void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) +void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, - GLUE_FUNC_CAST(camellia_dec_blk)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_dec_blk); } EXPORT_SYMBOL_GPL(camellia_xts_dec); @@ -62,13 +57,13 @@ static const struct common_glue_ctx camellia_enc = { .funcs = { { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) } + .fn_u = { .ecb = camellia_ecb_enc_16way } }, { .num_blocks = 2, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) } + .fn_u = { .ecb = camellia_enc_blk_2way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) } + .fn_u = { .ecb = camellia_enc_blk } } } }; @@ -78,13 +73,13 @@ static const struct common_glue_ctx camellia_ctr = { .funcs = { { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) } + .fn_u = { .ctr = camellia_ctr_16way } }, { .num_blocks = 2, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) } + .fn_u = { .ctr = camellia_crypt_ctr_2way } }, { .num_blocks = 1, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) } + .fn_u = { .ctr = camellia_crypt_ctr } } } }; @@ -94,10 +89,10 @@ static const struct common_glue_ctx camellia_enc_xts = { .funcs = { { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) } + .fn_u = { .xts = camellia_xts_enc_16way } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) } + .fn_u = { .xts = camellia_xts_enc } } } }; @@ -107,13 +102,13 @@ static const struct common_glue_ctx camellia_dec = { .funcs = { { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) } + .fn_u = { .ecb = camellia_ecb_dec_16way } }, { .num_blocks = 2, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) } + .fn_u = { .ecb = camellia_dec_blk_2way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) } + .fn_u = { .ecb = camellia_dec_blk } } } }; @@ -123,13 +118,13 @@ static const struct common_glue_ctx camellia_dec_cbc = { .funcs = { { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) } + .fn_u = { .cbc = camellia_cbc_dec_16way } }, { .num_blocks = 2, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) } + .fn_u = { .cbc = camellia_decrypt_cbc_2way } }, { .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) } + .fn_u = { .cbc = camellia_dec_blk } } } }; @@ -139,18 +134,17 @@ static const struct common_glue_ctx camellia_dec_xts = { .funcs = { { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) } + .fn_u = { .xts = camellia_xts_dec_16way } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) } + .fn_u = { .xts = camellia_xts_dec } } } }; static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { - return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen, - &tfm->base.crt_flags); + return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen); } static int ecb_encrypt(struct skcipher_request *req) @@ -165,8 +159,7 @@ static int ecb_decrypt(struct skcipher_request *req) static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk), - req); + return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req); } static int cbc_decrypt(struct skcipher_request *req) @@ -183,7 +176,6 @@ int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - u32 *flags = &tfm->base.crt_flags; int err; err = xts_verify_key(tfm, key, keylen); @@ -191,13 +183,12 @@ int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, return err; /* first half of xts-key is for crypt */ - err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); + err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2); if (err) return err; /* second half of xts-key is for tweak */ - return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, - flags); + return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); } EXPORT_SYMBOL_GPL(xts_camellia_setkey); @@ -206,8 +197,7 @@ static int xts_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&camellia_enc_xts, req, - XTS_TWEAK_CAST(camellia_enc_blk), + return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk, &ctx->tweak_ctx, &ctx->crypt_ctx, false); } @@ -216,8 +206,7 @@ static int xts_decrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&camellia_dec_xts, req, - XTS_TWEAK_CAST(camellia_enc_blk), + return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk, &ctx->tweak_ctx, &ctx->crypt_ctx, true); } diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index 7c62db56ffe1..242c056e5fa8 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -18,19 +18,17 @@ #include <asm/crypto/glue_helper.h> /* regular block cipher functions */ -asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, bool xor); +asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src, + bool xor); EXPORT_SYMBOL_GPL(__camellia_enc_blk); -asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_dec_blk); /* 2-way parallel cipher functions */ -asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, bool xor); +asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src, + bool xor); EXPORT_SYMBOL_GPL(__camellia_enc_blk_2way); -asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_dec_blk_2way); static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) @@ -1229,12 +1227,10 @@ static void camellia_setup192(const unsigned char *key, u64 *subkey) } int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { - if (key_len != 16 && key_len != 24 && key_len != 32) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + if (key_len != 16 && key_len != 24 && key_len != 32) return -EINVAL; - } cctx->key_length = key_len; @@ -1257,8 +1253,7 @@ EXPORT_SYMBOL_GPL(__camellia_setkey); static int camellia_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len) { - return __camellia_setkey(crypto_tfm_ctx(tfm), key, key_len, - &tfm->crt_flags); + return __camellia_setkey(crypto_tfm_ctx(tfm), key, key_len); } static int camellia_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, @@ -1267,8 +1262,10 @@ static int camellia_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, return camellia_setkey(&tfm->base, key, key_len); } -void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src) +void camellia_decrypt_cbc_2way(const void *ctx, u8 *d, const u8 *s) { + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; u128 iv = *src; camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src); @@ -1277,9 +1274,11 @@ void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src) } EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way); -void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) +void camellia_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblk; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; if (dst != src) *dst = *src; @@ -1291,9 +1290,11 @@ void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) } EXPORT_SYMBOL_GPL(camellia_crypt_ctr); -void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, le128 *iv) +void camellia_crypt_ctr_2way(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblks[2]; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; if (dst != src) { dst[0] = src[0]; @@ -1315,10 +1316,10 @@ static const struct common_glue_ctx camellia_enc = { .funcs = { { .num_blocks = 2, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) } + .fn_u = { .ecb = camellia_enc_blk_2way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) } + .fn_u = { .ecb = camellia_enc_blk } } } }; @@ -1328,10 +1329,10 @@ static const struct common_glue_ctx camellia_ctr = { .funcs = { { .num_blocks = 2, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) } + .fn_u = { .ctr = camellia_crypt_ctr_2way } }, { .num_blocks = 1, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) } + .fn_u = { .ctr = camellia_crypt_ctr } } } }; @@ -1341,10 +1342,10 @@ static const struct common_glue_ctx camellia_dec = { .funcs = { { .num_blocks = 2, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) } + .fn_u = { .ecb = camellia_dec_blk_2way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) } + .fn_u = { .ecb = camellia_dec_blk } } } }; @@ -1354,10 +1355,10 @@ static const struct common_glue_ctx camellia_dec_cbc = { .funcs = { { .num_blocks = 2, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) } + .fn_u = { .cbc = camellia_decrypt_cbc_2way } }, { .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) } + .fn_u = { .cbc = camellia_dec_blk } } } }; @@ -1373,8 +1374,7 @@ static int ecb_decrypt(struct skcipher_request *req) static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk), - req); + return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req); } static int cbc_decrypt(struct skcipher_request *req) diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index a8a38fffb4a9..48e0f37796fa 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -20,20 +20,17 @@ #define CAST6_PARALLEL_BLOCKS 8 -asmlinkage void cast6_ecb_enc_8way(struct cast6_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void cast6_ecb_dec_8way(struct cast6_ctx *ctx, u8 *dst, - const u8 *src); - -asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src, +asmlinkage void cast6_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void cast6_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src); + +asmlinkage void cast6_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void cast6_ctr_8way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void cast6_xts_enc_8way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); +asmlinkage void cast6_xts_dec_8way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); static int cast6_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) @@ -41,21 +38,21 @@ static int cast6_setkey_skcipher(struct crypto_skcipher *tfm, return cast6_setkey(&tfm->base, key, keylen); } -static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void cast6_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, - GLUE_FUNC_CAST(__cast6_encrypt)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, __cast6_encrypt); } -static void cast6_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void cast6_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, - GLUE_FUNC_CAST(__cast6_decrypt)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, __cast6_decrypt); } -static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void cast6_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblk; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; le128_to_be128(&ctrblk, iv); le128_inc(iv); @@ -70,10 +67,10 @@ static const struct common_glue_ctx cast6_enc = { .funcs = { { .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_enc_8way) } + .fn_u = { .ecb = cast6_ecb_enc_8way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) } + .fn_u = { .ecb = __cast6_encrypt } } } }; @@ -83,10 +80,10 @@ static const struct common_glue_ctx cast6_ctr = { .funcs = { { .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_ctr_8way) } + .fn_u = { .ctr = cast6_ctr_8way } }, { .num_blocks = 1, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) } + .fn_u = { .ctr = cast6_crypt_ctr } } } }; @@ -96,10 +93,10 @@ static const struct common_glue_ctx cast6_enc_xts = { .funcs = { { .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc_8way) } + .fn_u = { .xts = cast6_xts_enc_8way } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc) } + .fn_u = { .xts = cast6_xts_enc } } } }; @@ -109,10 +106,10 @@ static const struct common_glue_ctx cast6_dec = { .funcs = { { .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_dec_8way) } + .fn_u = { .ecb = cast6_ecb_dec_8way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) } + .fn_u = { .ecb = __cast6_decrypt } } } }; @@ -122,10 +119,10 @@ static const struct common_glue_ctx cast6_dec_cbc = { .funcs = { { .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_cbc_dec_8way) } + .fn_u = { .cbc = cast6_cbc_dec_8way } }, { .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) } + .fn_u = { .cbc = __cast6_decrypt } } } }; @@ -135,10 +132,10 @@ static const struct common_glue_ctx cast6_dec_xts = { .funcs = { { .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec_8way) } + .fn_u = { .xts = cast6_xts_dec_8way } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec) } + .fn_u = { .xts = cast6_xts_dec } } } }; @@ -154,8 +151,7 @@ static int ecb_decrypt(struct skcipher_request *req) static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__cast6_encrypt), - req); + return glue_cbc_encrypt_req_128bit(__cast6_encrypt, req); } static int cbc_decrypt(struct skcipher_request *req) @@ -177,7 +173,6 @@ static int xts_cast6_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - u32 *flags = &tfm->base.crt_flags; int err; err = xts_verify_key(tfm, key, keylen); @@ -185,13 +180,12 @@ static int xts_cast6_setkey(struct crypto_skcipher *tfm, const u8 *key, return err; /* first half of xts-key is for crypt */ - err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); + err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2); if (err) return err; /* second half of xts-key is for tweak */ - return __cast6_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, - flags); + return __cast6_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); } static int xts_encrypt(struct skcipher_request *req) @@ -199,8 +193,7 @@ static int xts_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&cast6_enc_xts, req, - XTS_TWEAK_CAST(__cast6_encrypt), + return glue_xts_req_128bit(&cast6_enc_xts, req, __cast6_encrypt, &ctx->tweak_ctx, &ctx->crypt_ctx, false); } @@ -209,8 +202,7 @@ static int xts_decrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&cast6_dec_xts, req, - XTS_TWEAK_CAST(__cast6_encrypt), + return glue_xts_req_128bit(&cast6_dec_xts, req, __cast6_encrypt, &ctx->tweak_ctx, &ctx->crypt_ctx, true); } diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c index cb4ab6645106..418bd88acac8 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -94,10 +94,8 @@ static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key, { u32 *mctx = crypto_shash_ctx(hash); - if (keylen != sizeof(u32)) { - crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + if (keylen != sizeof(u32)) return -EINVAL; - } *mctx = le32_to_cpup((__le32 *)key); return 0; } diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index eefa0862f309..c20d1b8a82c3 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -91,10 +91,8 @@ static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key, { u32 *mctx = crypto_shash_ctx(hash); - if (keylen != sizeof(u32)) { - crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + if (keylen != sizeof(u32)) return -EINVAL; - } *mctx = le32_to_cpup((__le32 *)key); return 0; } diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 04d72a5a8ce9..a4b728518e28 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -57,10 +57,8 @@ static int ghash_setkey(struct crypto_shash *tfm, be128 *x = (be128 *)key; u64 a, b; - if (keylen != GHASH_BLOCK_SIZE) { - crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + if (keylen != GHASH_BLOCK_SIZE) return -EINVAL; - } /* perform multiplication by 'x' in GF(2^128) */ a = be64_to_cpu(x->a); @@ -257,16 +255,11 @@ static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key, { struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); struct crypto_ahash *child = &ctx->cryptd_tfm->base; - int err; crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); - err = crypto_ahash_setkey(child, key, keylen); - crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child) - & CRYPTO_TFM_RES_MASK); - - return err; + return crypto_ahash_setkey(child, key, keylen); } static int ghash_async_init_tfm(struct crypto_tfm *tfm) diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index d15b99397480..d3d91a0abf88 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -134,7 +134,8 @@ int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, src -= num_blocks - 1; dst -= num_blocks - 1; - gctx->funcs[i].fn_u.cbc(ctx, dst, src); + gctx->funcs[i].fn_u.cbc(ctx, (u8 *)dst, + (const u8 *)src); nbytes -= func_bytes; if (nbytes < bsize) @@ -188,7 +189,9 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, /* Process multi-block batch */ do { - gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk); + gctx->funcs[i].fn_u.ctr(ctx, (u8 *)dst, + (const u8 *)src, + &ctrblk); src += num_blocks; dst += num_blocks; nbytes -= func_bytes; @@ -210,7 +213,8 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, be128_to_le128(&ctrblk, (be128 *)walk.iv); memcpy(&tmp, walk.src.virt.addr, nbytes); - gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, &tmp, &tmp, + gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, (u8 *)&tmp, + (const u8 *)&tmp, &ctrblk); memcpy(walk.dst.virt.addr, &tmp, nbytes); le128_to_be128((be128 *)walk.iv, &ctrblk); @@ -240,7 +244,8 @@ static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx, if (nbytes >= func_bytes) { do { - gctx->funcs[i].fn_u.xts(ctx, dst, src, + gctx->funcs[i].fn_u.xts(ctx, (u8 *)dst, + (const u8 *)src, walk->iv); src += num_blocks; @@ -354,8 +359,8 @@ out: } EXPORT_SYMBOL_GPL(glue_xts_req_128bit); -void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv, - common_glue_func_t fn) +void glue_xts_crypt_128bit_one(const void *ctx, u8 *dst, const u8 *src, + le128 *iv, common_glue_func_t fn) { le128 ivblk = *iv; @@ -363,13 +368,13 @@ void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv, gf128mul_x_ble(iv, &ivblk); /* CC <- T xor C */ - u128_xor(dst, src, (u128 *)&ivblk); + u128_xor((u128 *)dst, (const u128 *)src, (u128 *)&ivblk); /* PP <- D(Key2,CC) */ - fn(ctx, (u8 *)dst, (u8 *)dst); + fn(ctx, dst, dst); /* P <- T xor PP */ - u128_xor(dst, dst, (u128 *)&ivblk); + u128_xor((u128 *)dst, (u128 *)dst, (u128 *)&ivblk); } EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit_one); diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S deleted file mode 100644 index d6063feda9da..000000000000 --- a/arch/x86/crypto/poly1305-avx2-x86_64.S +++ /dev/null @@ -1,390 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Poly1305 authenticator algorithm, RFC7539, x64 AVX2 functions - * - * Copyright (C) 2015 Martin Willi - */ - -#include <linux/linkage.h> - -.section .rodata.cst32.ANMASK, "aM", @progbits, 32 -.align 32 -ANMASK: .octa 0x0000000003ffffff0000000003ffffff - .octa 0x0000000003ffffff0000000003ffffff - -.section .rodata.cst32.ORMASK, "aM", @progbits, 32 -.align 32 -ORMASK: .octa 0x00000000010000000000000001000000 - .octa 0x00000000010000000000000001000000 - -.text - -#define h0 0x00(%rdi) -#define h1 0x04(%rdi) -#define h2 0x08(%rdi) -#define h3 0x0c(%rdi) -#define h4 0x10(%rdi) -#define r0 0x00(%rdx) -#define r1 0x04(%rdx) -#define r2 0x08(%rdx) -#define r3 0x0c(%rdx) -#define r4 0x10(%rdx) -#define u0 0x00(%r8) -#define u1 0x04(%r8) -#define u2 0x08(%r8) -#define u3 0x0c(%r8) -#define u4 0x10(%r8) -#define w0 0x14(%r8) -#define w1 0x18(%r8) -#define w2 0x1c(%r8) -#define w3 0x20(%r8) -#define w4 0x24(%r8) -#define y0 0x28(%r8) -#define y1 0x2c(%r8) -#define y2 0x30(%r8) -#define y3 0x34(%r8) -#define y4 0x38(%r8) -#define m %rsi -#define hc0 %ymm0 -#define hc1 %ymm1 -#define hc2 %ymm2 -#define hc3 %ymm3 -#define hc4 %ymm4 -#define hc0x %xmm0 -#define hc1x %xmm1 -#define hc2x %xmm2 -#define hc3x %xmm3 -#define hc4x %xmm4 -#define t1 %ymm5 -#define t2 %ymm6 -#define t1x %xmm5 -#define t2x %xmm6 -#define ruwy0 %ymm7 -#define ruwy1 %ymm8 -#define ruwy2 %ymm9 -#define ruwy3 %ymm10 -#define ruwy4 %ymm11 -#define ruwy0x %xmm7 -#define ruwy1x %xmm8 -#define ruwy2x %xmm9 -#define ruwy3x %xmm10 -#define ruwy4x %xmm11 -#define svxz1 %ymm12 -#define svxz2 %ymm13 -#define svxz3 %ymm14 -#define svxz4 %ymm15 -#define d0 %r9 -#define d1 %r10 -#define d2 %r11 -#define d3 %r12 -#define d4 %r13 - -SYM_FUNC_START(poly1305_4block_avx2) - # %rdi: Accumulator h[5] - # %rsi: 64 byte input block m - # %rdx: Poly1305 key r[5] - # %rcx: Quadblock count - # %r8: Poly1305 derived key r^2 u[5], r^3 w[5], r^4 y[5], - - # This four-block variant uses loop unrolled block processing. It - # requires 4 Poly1305 keys: r, r^2, r^3 and r^4: - # h = (h + m) * r => h = (h + m1) * r^4 + m2 * r^3 + m3 * r^2 + m4 * r - - vzeroupper - push %rbx - push %r12 - push %r13 - - # combine r0,u0,w0,y0 - vmovd y0,ruwy0x - vmovd w0,t1x - vpunpcklqdq t1,ruwy0,ruwy0 - vmovd u0,t1x - vmovd r0,t2x - vpunpcklqdq t2,t1,t1 - vperm2i128 $0x20,t1,ruwy0,ruwy0 - - # combine r1,u1,w1,y1 and s1=r1*5,v1=u1*5,x1=w1*5,z1=y1*5 - vmovd y1,ruwy1x - vmovd w1,t1x - vpunpcklqdq t1,ruwy1,ruwy1 - vmovd u1,t1x - vmovd r1,t2x - vpunpcklqdq t2,t1,t1 - vperm2i128 $0x20,t1,ruwy1,ruwy1 - vpslld $2,ruwy1,svxz1 - vpaddd ruwy1,svxz1,svxz1 - - # combine r2,u2,w2,y2 and s2=r2*5,v2=u2*5,x2=w2*5,z2=y2*5 - vmovd y2,ruwy2x - vmovd w2,t1x - vpunpcklqdq t1,ruwy2,ruwy2 - vmovd u2,t1x - vmovd r2,t2x - vpunpcklqdq t2,t1,t1 - vperm2i128 $0x20,t1,ruwy2,ruwy2 - vpslld $2,ruwy2,svxz2 - vpaddd ruwy2,svxz2,svxz2 - - # combine r3,u3,w3,y3 and s3=r3*5,v3=u3*5,x3=w3*5,z3=y3*5 - vmovd y3,ruwy3x - vmovd w3,t1x - vpunpcklqdq t1,ruwy3,ruwy3 - vmovd u3,t1x - vmovd r3,t2x - vpunpcklqdq t2,t1,t1 - vperm2i128 $0x20,t1,ruwy3,ruwy3 - vpslld $2,ruwy3,svxz3 - vpaddd ruwy3,svxz3,svxz3 - - # combine r4,u4,w4,y4 and s4=r4*5,v4=u4*5,x4=w4*5,z4=y4*5 - vmovd y4,ruwy4x - vmovd w4,t1x - vpunpcklqdq t1,ruwy4,ruwy4 - vmovd u4,t1x - vmovd r4,t2x - vpunpcklqdq t2,t1,t1 - vperm2i128 $0x20,t1,ruwy4,ruwy4 - vpslld $2,ruwy4,svxz4 - vpaddd ruwy4,svxz4,svxz4 - -.Ldoblock4: - # hc0 = [m[48-51] & 0x3ffffff, m[32-35] & 0x3ffffff, - # m[16-19] & 0x3ffffff, m[ 0- 3] & 0x3ffffff + h0] - vmovd 0x00(m),hc0x - vmovd 0x10(m),t1x - vpunpcklqdq t1,hc0,hc0 - vmovd 0x20(m),t1x - vmovd 0x30(m),t2x - vpunpcklqdq t2,t1,t1 - vperm2i128 $0x20,t1,hc0,hc0 - vpand ANMASK(%rip),hc0,hc0 - vmovd h0,t1x - vpaddd t1,hc0,hc0 - # hc1 = [(m[51-54] >> 2) & 0x3ffffff, (m[35-38] >> 2) & 0x3ffffff, - # (m[19-22] >> 2) & 0x3ffffff, (m[ 3- 6] >> 2) & 0x3ffffff + h1] - vmovd 0x03(m),hc1x - vmovd 0x13(m),t1x - vpunpcklqdq t1,hc1,hc1 - vmovd 0x23(m),t1x - vmovd 0x33(m),t2x - vpunpcklqdq t2,t1,t1 - vperm2i128 $0x20,t1,hc1,hc1 - vpsrld $2,hc1,hc1 - vpand ANMASK(%rip),hc1,hc1 - vmovd h1,t1x - vpaddd t1,hc1,hc1 - # hc2 = [(m[54-57] >> 4) & 0x3ffffff, (m[38-41] >> 4) & 0x3ffffff, - # (m[22-25] >> 4) & 0x3ffffff, (m[ 6- 9] >> 4) & 0x3ffffff + h2] - vmovd 0x06(m),hc2x - vmovd 0x16(m),t1x - vpunpcklqdq t1,hc2,hc2 - vmovd 0x26(m),t1x - vmovd 0x36(m),t2x - vpunpcklqdq t2,t1,t1 - vperm2i128 $0x20,t1,hc2,hc2 - vpsrld $4,hc2,hc2 - vpand ANMASK(%rip),hc2,hc2 - vmovd h2,t1x - vpaddd t1,hc2,hc2 - # hc3 = [(m[57-60] >> 6) & 0x3ffffff, (m[41-44] >> 6) & 0x3ffffff, - # (m[25-28] >> 6) & 0x3ffffff, (m[ 9-12] >> 6) & 0x3ffffff + h3] - vmovd 0x09(m),hc3x - vmovd 0x19(m),t1x - vpunpcklqdq t1,hc3,hc3 - vmovd 0x29(m),t1x - vmovd 0x39(m),t2x - vpunpcklqdq t2,t1,t1 - vperm2i128 $0x20,t1,hc3,hc3 - vpsrld $6,hc3,hc3 - vpand ANMASK(%rip),hc3,hc3 - vmovd h3,t1x - vpaddd t1,hc3,hc3 - # hc4 = [(m[60-63] >> 8) | (1<<24), (m[44-47] >> 8) | (1<<24), - # (m[28-31] >> 8) | (1<<24), (m[12-15] >> 8) | (1<<24) + h4] - vmovd 0x0c(m),hc4x - vmovd 0x1c(m),t1x - vpunpcklqdq t1,hc4,hc4 - vmovd 0x2c(m),t1x - vmovd 0x3c(m),t2x - vpunpcklqdq t2,t1,t1 - vperm2i128 $0x20,t1,hc4,hc4 - vpsrld $8,hc4,hc4 - vpor ORMASK(%rip),hc4,hc4 - vmovd h4,t1x - vpaddd t1,hc4,hc4 - - # t1 = [ hc0[3] * r0, hc0[2] * u0, hc0[1] * w0, hc0[0] * y0 ] - vpmuludq hc0,ruwy0,t1 - # t1 += [ hc1[3] * s4, hc1[2] * v4, hc1[1] * x4, hc1[0] * z4 ] - vpmuludq hc1,svxz4,t2 - vpaddq t2,t1,t1 - # t1 += [ hc2[3] * s3, hc2[2] * v3, hc2[1] * x3, hc2[0] * z3 ] - vpmuludq hc2,svxz3,t2 - vpaddq t2,t1,t1 - # t1 += [ hc3[3] * s2, hc3[2] * v2, hc3[1] * x2, hc3[0] * z2 ] - vpmuludq hc3,svxz2,t2 - vpaddq t2,t1,t1 - # t1 += [ hc4[3] * s1, hc4[2] * v1, hc4[1] * x1, hc4[0] * z1 ] - vpmuludq hc4,svxz1,t2 - vpaddq t2,t1,t1 - # d0 = t1[0] + t1[1] + t[2] + t[3] - vpermq $0xee,t1,t2 - vpaddq t2,t1,t1 - vpsrldq $8,t1,t2 - vpaddq t2,t1,t1 - vmovq t1x,d0 - - # t1 = [ hc0[3] * r1, hc0[2] * u1,hc0[1] * w1, hc0[0] * y1 ] - vpmuludq hc0,ruwy1,t1 - # t1 += [ hc1[3] * r0, hc1[2] * u0, hc1[1] * w0, hc1[0] * y0 ] - vpmuludq hc1,ruwy0,t2 - vpaddq t2,t1,t1 - # t1 += [ hc2[3] * s4, hc2[2] * v4, hc2[1] * x4, hc2[0] * z4 ] - vpmuludq hc2,svxz4,t2 - vpaddq t2,t1,t1 - # t1 += [ hc3[3] * s3, hc3[2] * v3, hc3[1] * x3, hc3[0] * z3 ] - vpmuludq hc3,svxz3,t2 - vpaddq t2,t1,t1 - # t1 += [ hc4[3] * s2, hc4[2] * v2, hc4[1] * x2, hc4[0] * z2 ] - vpmuludq hc4,svxz2,t2 - vpaddq t2,t1,t1 - # d1 = t1[0] + t1[1] + t1[3] + t1[4] - vpermq $0xee,t1,t2 - vpaddq t2,t1,t1 - vpsrldq $8,t1,t2 - vpaddq t2,t1,t1 - vmovq t1x,d1 - - # t1 = [ hc0[3] * r2, hc0[2] * u2, hc0[1] * w2, hc0[0] * y2 ] - vpmuludq hc0,ruwy2,t1 - # t1 += [ hc1[3] * r1, hc1[2] * u1, hc1[1] * w1, hc1[0] * y1 ] - vpmuludq hc1,ruwy1,t2 - vpaddq t2,t1,t1 - # t1 += [ hc2[3] * r0, hc2[2] * u0, hc2[1] * w0, hc2[0] * y0 ] - vpmuludq hc2,ruwy0,t2 - vpaddq t2,t1,t1 - # t1 += [ hc3[3] * s4, hc3[2] * v4, hc3[1] * x4, hc3[0] * z4 ] - vpmuludq hc3,svxz4,t2 - vpaddq t2,t1,t1 - # t1 += [ hc4[3] * s3, hc4[2] * v3, hc4[1] * x3, hc4[0] * z3 ] - vpmuludq hc4,svxz3,t2 - vpaddq t2,t1,t1 - # d2 = t1[0] + t1[1] + t1[2] + t1[3] - vpermq $0xee,t1,t2 - vpaddq t2,t1,t1 - vpsrldq $8,t1,t2 - vpaddq t2,t1,t1 - vmovq t1x,d2 - - # t1 = [ hc0[3] * r3, hc0[2] * u3, hc0[1] * w3, hc0[0] * y3 ] - vpmuludq hc0,ruwy3,t1 - # t1 += [ hc1[3] * r2, hc1[2] * u2, hc1[1] * w2, hc1[0] * y2 ] - vpmuludq hc1,ruwy2,t2 - vpaddq t2,t1,t1 - # t1 += [ hc2[3] * r1, hc2[2] * u1, hc2[1] * w1, hc2[0] * y1 ] - vpmuludq hc2,ruwy1,t2 - vpaddq t2,t1,t1 - # t1 += [ hc3[3] * r0, hc3[2] * u0, hc3[1] * w0, hc3[0] * y0 ] - vpmuludq hc3,ruwy0,t2 - vpaddq t2,t1,t1 - # t1 += [ hc4[3] * s4, hc4[2] * v4, hc4[1] * x4, hc4[0] * z4 ] - vpmuludq hc4,svxz4,t2 - vpaddq t2,t1,t1 - # d3 = t1[0] + t1[1] + t1[2] + t1[3] - vpermq $0xee,t1,t2 - vpaddq t2,t1,t1 - vpsrldq $8,t1,t2 - vpaddq t2,t1,t1 - vmovq t1x,d3 - - # t1 = [ hc0[3] * r4, hc0[2] * u4, hc0[1] * w4, hc0[0] * y4 ] - vpmuludq hc0,ruwy4,t1 - # t1 += [ hc1[3] * r3, hc1[2] * u3, hc1[1] * w3, hc1[0] * y3 ] - vpmuludq hc1,ruwy3,t2 - vpaddq t2,t1,t1 - # t1 += [ hc2[3] * r2, hc2[2] * u2, hc2[1] * w2, hc2[0] * y2 ] - vpmuludq hc2,ruwy2,t2 - vpaddq t2,t1,t1 - # t1 += [ hc3[3] * r1, hc3[2] * u1, hc3[1] * w1, hc3[0] * y1 ] - vpmuludq hc3,ruwy1,t2 - vpaddq t2,t1,t1 - # t1 += [ hc4[3] * r0, hc4[2] * u0, hc4[1] * w0, hc4[0] * y0 ] - vpmuludq hc4,ruwy0,t2 - vpaddq t2,t1,t1 - # d4 = t1[0] + t1[1] + t1[2] + t1[3] - vpermq $0xee,t1,t2 - vpaddq t2,t1,t1 - vpsrldq $8,t1,t2 - vpaddq t2,t1,t1 - vmovq t1x,d4 - - # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 -> - # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small - # amount. Careful: we must not assume the carry bits 'd0 >> 26', - # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit - # integers. It's true in a single-block implementation, but not here. - - # d1 += d0 >> 26 - mov d0,%rax - shr $26,%rax - add %rax,d1 - # h0 = d0 & 0x3ffffff - mov d0,%rbx - and $0x3ffffff,%ebx - - # d2 += d1 >> 26 - mov d1,%rax - shr $26,%rax - add %rax,d2 - # h1 = d1 & 0x3ffffff - mov d1,%rax - and $0x3ffffff,%eax - mov %eax,h1 - - # d3 += d2 >> 26 - mov d2,%rax - shr $26,%rax - add %rax,d3 - # h2 = d2 & 0x3ffffff - mov d2,%rax - and $0x3ffffff,%eax - mov %eax,h2 - - # d4 += d3 >> 26 - mov d3,%rax - shr $26,%rax - add %rax,d4 - # h3 = d3 & 0x3ffffff - mov d3,%rax - and $0x3ffffff,%eax - mov %eax,h3 - - # h0 += (d4 >> 26) * 5 - mov d4,%rax - shr $26,%rax - lea (%rax,%rax,4),%rax - add %rax,%rbx - # h4 = d4 & 0x3ffffff - mov d4,%rax - and $0x3ffffff,%eax - mov %eax,h4 - - # h1 += h0 >> 26 - mov %rbx,%rax - shr $26,%rax - add %eax,h1 - # h0 = h0 & 0x3ffffff - andl $0x3ffffff,%ebx - mov %ebx,h0 - - add $0x40,m - dec %rcx - jnz .Ldoblock4 - - vzeroupper - pop %r13 - pop %r12 - pop %rbx - ret -SYM_FUNC_END(poly1305_4block_avx2) diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S deleted file mode 100644 index d8ea29b96640..000000000000 --- a/arch/x86/crypto/poly1305-sse2-x86_64.S +++ /dev/null @@ -1,590 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Poly1305 authenticator algorithm, RFC7539, x64 SSE2 functions - * - * Copyright (C) 2015 Martin Willi - */ - -#include <linux/linkage.h> - -.section .rodata.cst16.ANMASK, "aM", @progbits, 16 -.align 16 -ANMASK: .octa 0x0000000003ffffff0000000003ffffff - -.section .rodata.cst16.ORMASK, "aM", @progbits, 16 -.align 16 -ORMASK: .octa 0x00000000010000000000000001000000 - -.text - -#define h0 0x00(%rdi) -#define h1 0x04(%rdi) -#define h2 0x08(%rdi) -#define h3 0x0c(%rdi) -#define h4 0x10(%rdi) -#define r0 0x00(%rdx) -#define r1 0x04(%rdx) -#define r2 0x08(%rdx) -#define r3 0x0c(%rdx) -#define r4 0x10(%rdx) -#define s1 0x00(%rsp) -#define s2 0x04(%rsp) -#define s3 0x08(%rsp) -#define s4 0x0c(%rsp) -#define m %rsi -#define h01 %xmm0 -#define h23 %xmm1 -#define h44 %xmm2 -#define t1 %xmm3 -#define t2 %xmm4 -#define t3 %xmm5 -#define t4 %xmm6 -#define mask %xmm7 -#define d0 %r8 -#define d1 %r9 -#define d2 %r10 -#define d3 %r11 -#define d4 %r12 - -SYM_FUNC_START(poly1305_block_sse2) - # %rdi: Accumulator h[5] - # %rsi: 16 byte input block m - # %rdx: Poly1305 key r[5] - # %rcx: Block count - - # This single block variant tries to improve performance by doing two - # multiplications in parallel using SSE instructions. There is quite - # some quardword packing involved, hence the speedup is marginal. - - push %rbx - push %r12 - sub $0x10,%rsp - - # s1..s4 = r1..r4 * 5 - mov r1,%eax - lea (%eax,%eax,4),%eax - mov %eax,s1 - mov r2,%eax - lea (%eax,%eax,4),%eax - mov %eax,s2 - mov r3,%eax - lea (%eax,%eax,4),%eax - mov %eax,s3 - mov r4,%eax - lea (%eax,%eax,4),%eax - mov %eax,s4 - - movdqa ANMASK(%rip),mask - -.Ldoblock: - # h01 = [0, h1, 0, h0] - # h23 = [0, h3, 0, h2] - # h44 = [0, h4, 0, h4] - movd h0,h01 - movd h1,t1 - movd h2,h23 - movd h3,t2 - movd h4,h44 - punpcklqdq t1,h01 - punpcklqdq t2,h23 - punpcklqdq h44,h44 - - # h01 += [ (m[3-6] >> 2) & 0x3ffffff, m[0-3] & 0x3ffffff ] - movd 0x00(m),t1 - movd 0x03(m),t2 - psrld $2,t2 - punpcklqdq t2,t1 - pand mask,t1 - paddd t1,h01 - # h23 += [ (m[9-12] >> 6) & 0x3ffffff, (m[6-9] >> 4) & 0x3ffffff ] - movd 0x06(m),t1 - movd 0x09(m),t2 - psrld $4,t1 - psrld $6,t2 - punpcklqdq t2,t1 - pand mask,t1 - paddd t1,h23 - # h44 += [ (m[12-15] >> 8) | (1 << 24), (m[12-15] >> 8) | (1 << 24) ] - mov 0x0c(m),%eax - shr $8,%eax - or $0x01000000,%eax - movd %eax,t1 - pshufd $0xc4,t1,t1 - paddd t1,h44 - - # t1[0] = h0 * r0 + h2 * s3 - # t1[1] = h1 * s4 + h3 * s2 - movd r0,t1 - movd s4,t2 - punpcklqdq t2,t1 - pmuludq h01,t1 - movd s3,t2 - movd s2,t3 - punpcklqdq t3,t2 - pmuludq h23,t2 - paddq t2,t1 - # t2[0] = h0 * r1 + h2 * s4 - # t2[1] = h1 * r0 + h3 * s3 - movd r1,t2 - movd r0,t3 - punpcklqdq t3,t2 - pmuludq h01,t2 - movd s4,t3 - movd s3,t4 - punpcklqdq t4,t3 - pmuludq h23,t3 - paddq t3,t2 - # t3[0] = h4 * s1 - # t3[1] = h4 * s2 - movd s1,t3 - movd s2,t4 - punpcklqdq t4,t3 - pmuludq h44,t3 - # d0 = t1[0] + t1[1] + t3[0] - # d1 = t2[0] + t2[1] + t3[1] - movdqa t1,t4 - punpcklqdq t2,t4 - punpckhqdq t2,t1 - paddq t4,t1 - paddq t3,t1 - movq t1,d0 - psrldq $8,t1 - movq t1,d1 - - # t1[0] = h0 * r2 + h2 * r0 - # t1[1] = h1 * r1 + h3 * s4 - movd r2,t1 - movd r1,t2 - punpcklqdq t2,t1 - pmuludq h01,t1 - movd r0,t2 - movd s4,t3 - punpcklqdq t3,t2 - pmuludq h23,t2 - paddq t2,t1 - # t2[0] = h0 * r3 + h2 * r1 - # t2[1] = h1 * r2 + h3 * r0 - movd r3,t2 - movd r2,t3 - punpcklqdq t3,t2 - pmuludq h01,t2 - movd r1,t3 - movd r0,t4 - punpcklqdq t4,t3 - pmuludq h23,t3 - paddq t3,t2 - # t3[0] = h4 * s3 - # t3[1] = h4 * s4 - movd s3,t3 - movd s4,t4 - punpcklqdq t4,t3 - pmuludq h44,t3 - # d2 = t1[0] + t1[1] + t3[0] - # d3 = t2[0] + t2[1] + t3[1] - movdqa t1,t4 - punpcklqdq t2,t4 - punpckhqdq t2,t1 - paddq t4,t1 - paddq t3,t1 - movq t1,d2 - psrldq $8,t1 - movq t1,d3 - - # t1[0] = h0 * r4 + h2 * r2 - # t1[1] = h1 * r3 + h3 * r1 - movd r4,t1 - movd r3,t2 - punpcklqdq t2,t1 - pmuludq h01,t1 - movd r2,t2 - movd r1,t3 - punpcklqdq t3,t2 - pmuludq h23,t2 - paddq t2,t1 - # t3[0] = h4 * r0 - movd r0,t3 - pmuludq h44,t3 - # d4 = t1[0] + t1[1] + t3[0] - movdqa t1,t4 - psrldq $8,t4 - paddq t4,t1 - paddq t3,t1 - movq t1,d4 - - # d1 += d0 >> 26 - mov d0,%rax - shr $26,%rax - add %rax,d1 - # h0 = d0 & 0x3ffffff - mov d0,%rbx - and $0x3ffffff,%ebx - - # d2 += d1 >> 26 - mov d1,%rax - shr $26,%rax - add %rax,d2 - # h1 = d1 & 0x3ffffff - mov d1,%rax - and $0x3ffffff,%eax - mov %eax,h1 - - # d3 += d2 >> 26 - mov d2,%rax - shr $26,%rax - add %rax,d3 - # h2 = d2 & 0x3ffffff - mov d2,%rax - and $0x3ffffff,%eax - mov %eax,h2 - - # d4 += d3 >> 26 - mov d3,%rax - shr $26,%rax - add %rax,d4 - # h3 = d3 & 0x3ffffff - mov d3,%rax - and $0x3ffffff,%eax - mov %eax,h3 - - # h0 += (d4 >> 26) * 5 - mov d4,%rax - shr $26,%rax - lea (%rax,%rax,4),%rax - add %rax,%rbx - # h4 = d4 & 0x3ffffff - mov d4,%rax - and $0x3ffffff,%eax - mov %eax,h4 - - # h1 += h0 >> 26 - mov %rbx,%rax - shr $26,%rax - add %eax,h1 - # h0 = h0 & 0x3ffffff - andl $0x3ffffff,%ebx - mov %ebx,h0 - - add $0x10,m - dec %rcx - jnz .Ldoblock - - # Zeroing of key material - mov %rcx,0x00(%rsp) - mov %rcx,0x08(%rsp) - - add $0x10,%rsp - pop %r12 - pop %rbx - ret -SYM_FUNC_END(poly1305_block_sse2) - - -#define u0 0x00(%r8) -#define u1 0x04(%r8) -#define u2 0x08(%r8) -#define u3 0x0c(%r8) -#define u4 0x10(%r8) -#define hc0 %xmm0 -#define hc1 %xmm1 -#define hc2 %xmm2 -#define hc3 %xmm5 -#define hc4 %xmm6 -#define ru0 %xmm7 -#define ru1 %xmm8 -#define ru2 %xmm9 -#define ru3 %xmm10 -#define ru4 %xmm11 -#define sv1 %xmm12 -#define sv2 %xmm13 -#define sv3 %xmm14 -#define sv4 %xmm15 -#undef d0 -#define d0 %r13 - -SYM_FUNC_START(poly1305_2block_sse2) - # %rdi: Accumulator h[5] - # %rsi: 16 byte input block m - # %rdx: Poly1305 key r[5] - # %rcx: Doubleblock count - # %r8: Poly1305 derived key r^2 u[5] - - # This two-block variant further improves performance by using loop - # unrolled block processing. This is more straight forward and does - # less byte shuffling, but requires a second Poly1305 key r^2: - # h = (h + m) * r => h = (h + m1) * r^2 + m2 * r - - push %rbx - push %r12 - push %r13 - - # combine r0,u0 - movd u0,ru0 - movd r0,t1 - punpcklqdq t1,ru0 - - # combine r1,u1 and s1=r1*5,v1=u1*5 - movd u1,ru1 - movd r1,t1 - punpcklqdq t1,ru1 - movdqa ru1,sv1 - pslld $2,sv1 - paddd ru1,sv1 - - # combine r2,u2 and s2=r2*5,v2=u2*5 - movd u2,ru2 - movd r2,t1 - punpcklqdq t1,ru2 - movdqa ru2,sv2 - pslld $2,sv2 - paddd ru2,sv2 - - # combine r3,u3 and s3=r3*5,v3=u3*5 - movd u3,ru3 - movd r3,t1 - punpcklqdq t1,ru3 - movdqa ru3,sv3 - pslld $2,sv3 - paddd ru3,sv3 - - # combine r4,u4 and s4=r4*5,v4=u4*5 - movd u4,ru4 - movd r4,t1 - punpcklqdq t1,ru4 - movdqa ru4,sv4 - pslld $2,sv4 - paddd ru4,sv4 - -.Ldoblock2: - # hc0 = [ m[16-19] & 0x3ffffff, h0 + m[0-3] & 0x3ffffff ] - movd 0x00(m),hc0 - movd 0x10(m),t1 - punpcklqdq t1,hc0 - pand ANMASK(%rip),hc0 - movd h0,t1 - paddd t1,hc0 - # hc1 = [ (m[19-22] >> 2) & 0x3ffffff, h1 + (m[3-6] >> 2) & 0x3ffffff ] - movd 0x03(m),hc1 - movd 0x13(m),t1 - punpcklqdq t1,hc1 - psrld $2,hc1 - pand ANMASK(%rip),hc1 - movd h1,t1 - paddd t1,hc1 - # hc2 = [ (m[22-25] >> 4) & 0x3ffffff, h2 + (m[6-9] >> 4) & 0x3ffffff ] - movd 0x06(m),hc2 - movd 0x16(m),t1 - punpcklqdq t1,hc2 - psrld $4,hc2 - pand ANMASK(%rip),hc2 - movd h2,t1 - paddd t1,hc2 - # hc3 = [ (m[25-28] >> 6) & 0x3ffffff, h3 + (m[9-12] >> 6) & 0x3ffffff ] - movd 0x09(m),hc3 - movd 0x19(m),t1 - punpcklqdq t1,hc3 - psrld $6,hc3 - pand ANMASK(%rip),hc3 - movd h3,t1 - paddd t1,hc3 - # hc4 = [ (m[28-31] >> 8) | (1<<24), h4 + (m[12-15] >> 8) | (1<<24) ] - movd 0x0c(m),hc4 - movd 0x1c(m),t1 - punpcklqdq t1,hc4 - psrld $8,hc4 - por ORMASK(%rip),hc4 - movd h4,t1 - paddd t1,hc4 - - # t1 = [ hc0[1] * r0, hc0[0] * u0 ] - movdqa ru0,t1 - pmuludq hc0,t1 - # t1 += [ hc1[1] * s4, hc1[0] * v4 ] - movdqa sv4,t2 - pmuludq hc1,t2 - paddq t2,t1 - # t1 += [ hc2[1] * s3, hc2[0] * v3 ] - movdqa sv3,t2 - pmuludq hc2,t2 - paddq t2,t1 - # t1 += [ hc3[1] * s2, hc3[0] * v2 ] - movdqa sv2,t2 - pmuludq hc3,t2 - paddq t2,t1 - # t1 += [ hc4[1] * s1, hc4[0] * v1 ] - movdqa sv1,t2 - pmuludq hc4,t2 - paddq t2,t1 - # d0 = t1[0] + t1[1] - movdqa t1,t2 - psrldq $8,t2 - paddq t2,t1 - movq t1,d0 - - # t1 = [ hc0[1] * r1, hc0[0] * u1 ] - movdqa ru1,t1 - pmuludq hc0,t1 - # t1 += [ hc1[1] * r0, hc1[0] * u0 ] - movdqa ru0,t2 - pmuludq hc1,t2 - paddq t2,t1 - # t1 += [ hc2[1] * s4, hc2[0] * v4 ] - movdqa sv4,t2 - pmuludq hc2,t2 - paddq t2,t1 - # t1 += [ hc3[1] * s3, hc3[0] * v3 ] - movdqa sv3,t2 - pmuludq hc3,t2 - paddq t2,t1 - # t1 += [ hc4[1] * s2, hc4[0] * v2 ] - movdqa sv2,t2 - pmuludq hc4,t2 - paddq t2,t1 - # d1 = t1[0] + t1[1] - movdqa t1,t2 - psrldq $8,t2 - paddq t2,t1 - movq t1,d1 - - # t1 = [ hc0[1] * r2, hc0[0] * u2 ] - movdqa ru2,t1 - pmuludq hc0,t1 - # t1 += [ hc1[1] * r1, hc1[0] * u1 ] - movdqa ru1,t2 - pmuludq hc1,t2 - paddq t2,t1 - # t1 += [ hc2[1] * r0, hc2[0] * u0 ] - movdqa ru0,t2 - pmuludq hc2,t2 - paddq t2,t1 - # t1 += [ hc3[1] * s4, hc3[0] * v4 ] - movdqa sv4,t2 - pmuludq hc3,t2 - paddq t2,t1 - # t1 += [ hc4[1] * s3, hc4[0] * v3 ] - movdqa sv3,t2 - pmuludq hc4,t2 - paddq t2,t1 - # d2 = t1[0] + t1[1] - movdqa t1,t2 - psrldq $8,t2 - paddq t2,t1 - movq t1,d2 - - # t1 = [ hc0[1] * r3, hc0[0] * u3 ] - movdqa ru3,t1 - pmuludq hc0,t1 - # t1 += [ hc1[1] * r2, hc1[0] * u2 ] - movdqa ru2,t2 - pmuludq hc1,t2 - paddq t2,t1 - # t1 += [ hc2[1] * r1, hc2[0] * u1 ] - movdqa ru1,t2 - pmuludq hc2,t2 - paddq t2,t1 - # t1 += [ hc3[1] * r0, hc3[0] * u0 ] - movdqa ru0,t2 - pmuludq hc3,t2 - paddq t2,t1 - # t1 += [ hc4[1] * s4, hc4[0] * v4 ] - movdqa sv4,t2 - pmuludq hc4,t2 - paddq t2,t1 - # d3 = t1[0] + t1[1] - movdqa t1,t2 - psrldq $8,t2 - paddq t2,t1 - movq t1,d3 - - # t1 = [ hc0[1] * r4, hc0[0] * u4 ] - movdqa ru4,t1 - pmuludq hc0,t1 - # t1 += [ hc1[1] * r3, hc1[0] * u3 ] - movdqa ru3,t2 - pmuludq hc1,t2 - paddq t2,t1 - # t1 += [ hc2[1] * r2, hc2[0] * u2 ] - movdqa ru2,t2 - pmuludq hc2,t2 - paddq t2,t1 - # t1 += [ hc3[1] * r1, hc3[0] * u1 ] - movdqa ru1,t2 - pmuludq hc3,t2 - paddq t2,t1 - # t1 += [ hc4[1] * r0, hc4[0] * u0 ] - movdqa ru0,t2 - pmuludq hc4,t2 - paddq t2,t1 - # d4 = t1[0] + t1[1] - movdqa t1,t2 - psrldq $8,t2 - paddq t2,t1 - movq t1,d4 - - # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 -> - # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small - # amount. Careful: we must not assume the carry bits 'd0 >> 26', - # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit - # integers. It's true in a single-block implementation, but not here. - - # d1 += d0 >> 26 - mov d0,%rax - shr $26,%rax - add %rax,d1 - # h0 = d0 & 0x3ffffff - mov d0,%rbx - and $0x3ffffff,%ebx - - # d2 += d1 >> 26 - mov d1,%rax - shr $26,%rax - add %rax,d2 - # h1 = d1 & 0x3ffffff - mov d1,%rax - and $0x3ffffff,%eax - mov %eax,h1 - - # d3 += d2 >> 26 - mov d2,%rax - shr $26,%rax - add %rax,d3 - # h2 = d2 & 0x3ffffff - mov d2,%rax - and $0x3ffffff,%eax - mov %eax,h2 - - # d4 += d3 >> 26 - mov d3,%rax - shr $26,%rax - add %rax,d4 - # h3 = d3 & 0x3ffffff - mov d3,%rax - and $0x3ffffff,%eax - mov %eax,h3 - - # h0 += (d4 >> 26) * 5 - mov d4,%rax - shr $26,%rax - lea (%rax,%rax,4),%rax - add %rax,%rbx - # h4 = d4 & 0x3ffffff - mov d4,%rax - and $0x3ffffff,%eax - mov %eax,h4 - - # h1 += h0 >> 26 - mov %rbx,%rax - shr $26,%rax - add %eax,h1 - # h0 = h0 & 0x3ffffff - andl $0x3ffffff,%ebx - mov %ebx,h0 - - add $0x20,m - dec %rcx - jnz .Ldoblock2 - - pop %r13 - pop %r12 - pop %rbx - ret -SYM_FUNC_END(poly1305_2block_sse2) diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl new file mode 100644 index 000000000000..7a6b5380a46f --- /dev/null +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl @@ -0,0 +1,4265 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# +# Copyright (C) 2017-2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved. +# Copyright (C) 2017-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. +# Copyright (C) 2006-2017 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved. +# +# This code is taken from the OpenSSL project but the author, Andy Polyakov, +# has relicensed it under the licenses specified in the SPDX header above. +# The original headers, including the original license headers, are +# included below for completeness. +# +# ==================================================================== +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# This module implements Poly1305 hash for x86_64. +# +# March 2015 +# +# Initial release. +# +# December 2016 +# +# Add AVX512F+VL+BW code path. +# +# November 2017 +# +# Convert AVX512F+VL+BW code path to pure AVX512F, so that it can be +# executed even on Knights Landing. Trigger for modification was +# observation that AVX512 code paths can negatively affect overall +# Skylake-X system performance. Since we are likely to suppress +# AVX512F capability flag [at least on Skylake-X], conversion serves +# as kind of "investment protection". Note that next *lake processor, +# Cannonlake, has AVX512IFMA code path to execute... +# +# Numbers are cycles per processed byte with poly1305_blocks alone, +# measured with rdtsc at fixed clock frequency. +# +# IALU/gcc-4.8(*) AVX(**) AVX2 AVX-512 +# P4 4.46/+120% - +# Core 2 2.41/+90% - +# Westmere 1.88/+120% - +# Sandy Bridge 1.39/+140% 1.10 +# Haswell 1.14/+175% 1.11 0.65 +# Skylake[-X] 1.13/+120% 0.96 0.51 [0.35] +# Silvermont 2.83/+95% - +# Knights L 3.60/? 1.65 1.10 0.41(***) +# Goldmont 1.70/+180% - +# VIA Nano 1.82/+150% - +# Sledgehammer 1.38/+160% - +# Bulldozer 2.30/+130% 0.97 +# Ryzen 1.15/+200% 1.08 1.18 +# +# (*) improvement coefficients relative to clang are more modest and +# are ~50% on most processors, in both cases we are comparing to +# __int128 code; +# (**) SSE2 implementation was attempted, but among non-AVX processors +# it was faster than integer-only code only on older Intel P4 and +# Core processors, 50-30%, less newer processor is, but slower on +# contemporary ones, for example almost 2x slower on Atom, and as +# former are naturally disappearing, SSE2 is deemed unnecessary; +# (***) strangely enough performance seems to vary from core to core, +# listed result is best case; + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); +$kernel=0; $kernel=1 if (!$flavour && !$output); + +if (!$kernel) { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or + die "can't locate x86_64-xlate.pl"; + + open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; + *STDOUT=*OUT; + + if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.19) + ($1>=2.22) + ($1>=2.25); + } + + if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) { + $avx = ($1>=2.09) + ($1>=2.10) + ($1>=2.12); + $avx += 1 if ($1==2.11 && $2>=8); + } + + if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./) { + $avx = ($1>=10) + ($1>=11); + } + + if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) { + $avx = ($2>=3.0) + ($2>3.0); + } +} else { + $avx = 4; # The kernel uses ifdefs for this. +} + +sub declare_function() { + my ($name, $align, $nargs) = @_; + if($kernel) { + $code .= ".align $align\n"; + $code .= "SYM_FUNC_START($name)\n"; + $code .= ".L$name:\n"; + } else { + $code .= ".globl $name\n"; + $code .= ".type $name,\@function,$nargs\n"; + $code .= ".align $align\n"; + $code .= "$name:\n"; + } +} + +sub end_function() { + my ($name) = @_; + if($kernel) { + $code .= "SYM_FUNC_END($name)\n"; + } else { + $code .= ".size $name,.-$name\n"; + } +} + +$code.=<<___ if $kernel; +#include <linux/linkage.h> +___ + +if ($avx) { +$code.=<<___ if $kernel; +.section .rodata +___ +$code.=<<___; +.align 64 +.Lconst: +.Lmask24: +.long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0 +.L129: +.long `1<<24`,0,`1<<24`,0,`1<<24`,0,`1<<24`,0 +.Lmask26: +.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0 +.Lpermd_avx2: +.long 2,2,2,3,2,0,2,1 +.Lpermd_avx512: +.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7 + +.L2_44_inp_permd: +.long 0,1,1,2,2,3,7,7 +.L2_44_inp_shift: +.quad 0,12,24,64 +.L2_44_mask: +.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff +.L2_44_shift_rgt: +.quad 44,44,42,64 +.L2_44_shift_lft: +.quad 8,8,10,64 + +.align 64 +.Lx_mask44: +.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff +.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff +.Lx_mask42: +.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff +.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff +___ +} +$code.=<<___ if (!$kernel); +.asciz "Poly1305 for x86_64, CRYPTOGAMS by <appro\@openssl.org>" +.align 16 +___ + +my ($ctx,$inp,$len,$padbit)=("%rdi","%rsi","%rdx","%rcx"); +my ($mac,$nonce)=($inp,$len); # *_emit arguments +my ($d1,$d2,$d3, $r0,$r1,$s1)=("%r8","%r9","%rdi","%r11","%r12","%r13"); +my ($h0,$h1,$h2)=("%r14","%rbx","%r10"); + +sub poly1305_iteration { +# input: copy of $r1 in %rax, $h0-$h2, $r0-$r1 +# output: $h0-$h2 *= $r0-$r1 +$code.=<<___; + mulq $h0 # h0*r1 + mov %rax,$d2 + mov $r0,%rax + mov %rdx,$d3 + + mulq $h0 # h0*r0 + mov %rax,$h0 # future $h0 + mov $r0,%rax + mov %rdx,$d1 + + mulq $h1 # h1*r0 + add %rax,$d2 + mov $s1,%rax + adc %rdx,$d3 + + mulq $h1 # h1*s1 + mov $h2,$h1 # borrow $h1 + add %rax,$h0 + adc %rdx,$d1 + + imulq $s1,$h1 # h2*s1 + add $h1,$d2 + mov $d1,$h1 + adc \$0,$d3 + + imulq $r0,$h2 # h2*r0 + add $d2,$h1 + mov \$-4,%rax # mask value + adc $h2,$d3 + + and $d3,%rax # last reduction step + mov $d3,$h2 + shr \$2,$d3 + and \$3,$h2 + add $d3,%rax + add %rax,$h0 + adc \$0,$h1 + adc \$0,$h2 +___ +} + +######################################################################## +# Layout of opaque area is following. +# +# unsigned __int64 h[3]; # current hash value base 2^64 +# unsigned __int64 r[2]; # key value base 2^64 + +$code.=<<___; +.text +___ +$code.=<<___ if (!$kernel); +.extern OPENSSL_ia32cap_P + +.globl poly1305_init_x86_64 +.hidden poly1305_init_x86_64 +.globl poly1305_blocks_x86_64 +.hidden poly1305_blocks_x86_64 +.globl poly1305_emit_x86_64 +.hidden poly1305_emit_x86_64 +___ +&declare_function("poly1305_init_x86_64", 32, 3); +$code.=<<___; + xor %rax,%rax + mov %rax,0($ctx) # initialize hash value + mov %rax,8($ctx) + mov %rax,16($ctx) + + cmp \$0,$inp + je .Lno_key +___ +$code.=<<___ if (!$kernel); + lea poly1305_blocks_x86_64(%rip),%r10 + lea poly1305_emit_x86_64(%rip),%r11 +___ +$code.=<<___ if (!$kernel && $avx); + mov OPENSSL_ia32cap_P+4(%rip),%r9 + lea poly1305_blocks_avx(%rip),%rax + lea poly1305_emit_avx(%rip),%rcx + bt \$`60-32`,%r9 # AVX? + cmovc %rax,%r10 + cmovc %rcx,%r11 +___ +$code.=<<___ if (!$kernel && $avx>1); + lea poly1305_blocks_avx2(%rip),%rax + bt \$`5+32`,%r9 # AVX2? + cmovc %rax,%r10 +___ +$code.=<<___ if (!$kernel && $avx>3); + mov \$`(1<<31|1<<21|1<<16)`,%rax + shr \$32,%r9 + and %rax,%r9 + cmp %rax,%r9 + je .Linit_base2_44 +___ +$code.=<<___; + mov \$0x0ffffffc0fffffff,%rax + mov \$0x0ffffffc0ffffffc,%rcx + and 0($inp),%rax + and 8($inp),%rcx + mov %rax,24($ctx) + mov %rcx,32($ctx) +___ +$code.=<<___ if (!$kernel && $flavour !~ /elf32/); + mov %r10,0(%rdx) + mov %r11,8(%rdx) +___ +$code.=<<___ if (!$kernel && $flavour =~ /elf32/); + mov %r10d,0(%rdx) + mov %r11d,4(%rdx) +___ +$code.=<<___; + mov \$1,%eax +.Lno_key: + ret +___ +&end_function("poly1305_init_x86_64"); + +&declare_function("poly1305_blocks_x86_64", 32, 4); +$code.=<<___; +.cfi_startproc +.Lblocks: + shr \$4,$len + jz .Lno_data # too short + + push %rbx +.cfi_push %rbx + push %r12 +.cfi_push %r12 + push %r13 +.cfi_push %r13 + push %r14 +.cfi_push %r14 + push %r15 +.cfi_push %r15 + push $ctx +.cfi_push $ctx +.Lblocks_body: + + mov $len,%r15 # reassign $len + + mov 24($ctx),$r0 # load r + mov 32($ctx),$s1 + + mov 0($ctx),$h0 # load hash value + mov 8($ctx),$h1 + mov 16($ctx),$h2 + + mov $s1,$r1 + shr \$2,$s1 + mov $r1,%rax + add $r1,$s1 # s1 = r1 + (r1 >> 2) + jmp .Loop + +.align 32 +.Loop: + add 0($inp),$h0 # accumulate input + adc 8($inp),$h1 + lea 16($inp),$inp + adc $padbit,$h2 +___ + + &poly1305_iteration(); + +$code.=<<___; + mov $r1,%rax + dec %r15 # len-=16 + jnz .Loop + + mov 0(%rsp),$ctx +.cfi_restore $ctx + + mov $h0,0($ctx) # store hash value + mov $h1,8($ctx) + mov $h2,16($ctx) + + mov 8(%rsp),%r15 +.cfi_restore %r15 + mov 16(%rsp),%r14 +.cfi_restore %r14 + mov 24(%rsp),%r13 +.cfi_restore %r13 + mov 32(%rsp),%r12 +.cfi_restore %r12 + mov 40(%rsp),%rbx +.cfi_restore %rbx + lea 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lno_data: +.Lblocks_epilogue: + ret +.cfi_endproc +___ +&end_function("poly1305_blocks_x86_64"); + +&declare_function("poly1305_emit_x86_64", 32, 3); +$code.=<<___; +.Lemit: + mov 0($ctx),%r8 # load hash value + mov 8($ctx),%r9 + mov 16($ctx),%r10 + + mov %r8,%rax + add \$5,%r8 # compare to modulus + mov %r9,%rcx + adc \$0,%r9 + adc \$0,%r10 + shr \$2,%r10 # did 130-bit value overflow? + cmovnz %r8,%rax + cmovnz %r9,%rcx + + add 0($nonce),%rax # accumulate nonce + adc 8($nonce),%rcx + mov %rax,0($mac) # write result + mov %rcx,8($mac) + + ret +___ +&end_function("poly1305_emit_x86_64"); +if ($avx) { + +if($kernel) { + $code .= "#ifdef CONFIG_AS_AVX\n"; +} + +######################################################################## +# Layout of opaque area is following. +# +# unsigned __int32 h[5]; # current hash value base 2^26 +# unsigned __int32 is_base2_26; +# unsigned __int64 r[2]; # key value base 2^64 +# unsigned __int64 pad; +# struct { unsigned __int32 r^2, r^1, r^4, r^3; } r[9]; +# +# where r^n are base 2^26 digits of degrees of multiplier key. There are +# 5 digits, but last four are interleaved with multiples of 5, totalling +# in 9 elements: r0, r1, 5*r1, r2, 5*r2, r3, 5*r3, r4, 5*r4. + +my ($H0,$H1,$H2,$H3,$H4, $T0,$T1,$T2,$T3,$T4, $D0,$D1,$D2,$D3,$D4, $MASK) = + map("%xmm$_",(0..15)); + +$code.=<<___; +.type __poly1305_block,\@abi-omnipotent +.align 32 +__poly1305_block: + push $ctx +___ + &poly1305_iteration(); +$code.=<<___; + pop $ctx + ret +.size __poly1305_block,.-__poly1305_block + +.type __poly1305_init_avx,\@abi-omnipotent +.align 32 +__poly1305_init_avx: + push %rbp + mov %rsp,%rbp + mov $r0,$h0 + mov $r1,$h1 + xor $h2,$h2 + + lea 48+64($ctx),$ctx # size optimization + + mov $r1,%rax + call __poly1305_block # r^2 + + mov \$0x3ffffff,%eax # save interleaved r^2 and r base 2^26 + mov \$0x3ffffff,%edx + mov $h0,$d1 + and $h0#d,%eax + mov $r0,$d2 + and $r0#d,%edx + mov %eax,`16*0+0-64`($ctx) + shr \$26,$d1 + mov %edx,`16*0+4-64`($ctx) + shr \$26,$d2 + + mov \$0x3ffffff,%eax + mov \$0x3ffffff,%edx + and $d1#d,%eax + and $d2#d,%edx + mov %eax,`16*1+0-64`($ctx) + lea (%rax,%rax,4),%eax # *5 + mov %edx,`16*1+4-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + mov %eax,`16*2+0-64`($ctx) + shr \$26,$d1 + mov %edx,`16*2+4-64`($ctx) + shr \$26,$d2 + + mov $h1,%rax + mov $r1,%rdx + shl \$12,%rax + shl \$12,%rdx + or $d1,%rax + or $d2,%rdx + and \$0x3ffffff,%eax + and \$0x3ffffff,%edx + mov %eax,`16*3+0-64`($ctx) + lea (%rax,%rax,4),%eax # *5 + mov %edx,`16*3+4-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + mov %eax,`16*4+0-64`($ctx) + mov $h1,$d1 + mov %edx,`16*4+4-64`($ctx) + mov $r1,$d2 + + mov \$0x3ffffff,%eax + mov \$0x3ffffff,%edx + shr \$14,$d1 + shr \$14,$d2 + and $d1#d,%eax + and $d2#d,%edx + mov %eax,`16*5+0-64`($ctx) + lea (%rax,%rax,4),%eax # *5 + mov %edx,`16*5+4-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + mov %eax,`16*6+0-64`($ctx) + shr \$26,$d1 + mov %edx,`16*6+4-64`($ctx) + shr \$26,$d2 + + mov $h2,%rax + shl \$24,%rax + or %rax,$d1 + mov $d1#d,`16*7+0-64`($ctx) + lea ($d1,$d1,4),$d1 # *5 + mov $d2#d,`16*7+4-64`($ctx) + lea ($d2,$d2,4),$d2 # *5 + mov $d1#d,`16*8+0-64`($ctx) + mov $d2#d,`16*8+4-64`($ctx) + + mov $r1,%rax + call __poly1305_block # r^3 + + mov \$0x3ffffff,%eax # save r^3 base 2^26 + mov $h0,$d1 + and $h0#d,%eax + shr \$26,$d1 + mov %eax,`16*0+12-64`($ctx) + + mov \$0x3ffffff,%edx + and $d1#d,%edx + mov %edx,`16*1+12-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + shr \$26,$d1 + mov %edx,`16*2+12-64`($ctx) + + mov $h1,%rax + shl \$12,%rax + or $d1,%rax + and \$0x3ffffff,%eax + mov %eax,`16*3+12-64`($ctx) + lea (%rax,%rax,4),%eax # *5 + mov $h1,$d1 + mov %eax,`16*4+12-64`($ctx) + + mov \$0x3ffffff,%edx + shr \$14,$d1 + and $d1#d,%edx + mov %edx,`16*5+12-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + shr \$26,$d1 + mov %edx,`16*6+12-64`($ctx) + + mov $h2,%rax + shl \$24,%rax + or %rax,$d1 + mov $d1#d,`16*7+12-64`($ctx) + lea ($d1,$d1,4),$d1 # *5 + mov $d1#d,`16*8+12-64`($ctx) + + mov $r1,%rax + call __poly1305_block # r^4 + + mov \$0x3ffffff,%eax # save r^4 base 2^26 + mov $h0,$d1 + and $h0#d,%eax + shr \$26,$d1 + mov %eax,`16*0+8-64`($ctx) + + mov \$0x3ffffff,%edx + and $d1#d,%edx + mov %edx,`16*1+8-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + shr \$26,$d1 + mov %edx,`16*2+8-64`($ctx) + + mov $h1,%rax + shl \$12,%rax + or $d1,%rax + and \$0x3ffffff,%eax + mov %eax,`16*3+8-64`($ctx) + lea (%rax,%rax,4),%eax # *5 + mov $h1,$d1 + mov %eax,`16*4+8-64`($ctx) + + mov \$0x3ffffff,%edx + shr \$14,$d1 + and $d1#d,%edx + mov %edx,`16*5+8-64`($ctx) + lea (%rdx,%rdx,4),%edx # *5 + shr \$26,$d1 + mov %edx,`16*6+8-64`($ctx) + + mov $h2,%rax + shl \$24,%rax + or %rax,$d1 + mov $d1#d,`16*7+8-64`($ctx) + lea ($d1,$d1,4),$d1 # *5 + mov $d1#d,`16*8+8-64`($ctx) + + lea -48-64($ctx),$ctx # size [de-]optimization + pop %rbp + ret +.size __poly1305_init_avx,.-__poly1305_init_avx +___ + +&declare_function("poly1305_blocks_avx", 32, 4); +$code.=<<___; +.cfi_startproc + mov 20($ctx),%r8d # is_base2_26 + cmp \$128,$len + jae .Lblocks_avx + test %r8d,%r8d + jz .Lblocks + +.Lblocks_avx: + and \$-16,$len + jz .Lno_data_avx + + vzeroupper + + test %r8d,%r8d + jz .Lbase2_64_avx + + test \$31,$len + jz .Leven_avx + + push %rbp +.cfi_push %rbp + mov %rsp,%rbp + push %rbx +.cfi_push %rbx + push %r12 +.cfi_push %r12 + push %r13 +.cfi_push %r13 + push %r14 +.cfi_push %r14 + push %r15 +.cfi_push %r15 +.Lblocks_avx_body: + + mov $len,%r15 # reassign $len + + mov 0($ctx),$d1 # load hash value + mov 8($ctx),$d2 + mov 16($ctx),$h2#d + + mov 24($ctx),$r0 # load r + mov 32($ctx),$s1 + + ################################# base 2^26 -> base 2^64 + mov $d1#d,$h0#d + and \$`-1*(1<<31)`,$d1 + mov $d2,$r1 # borrow $r1 + mov $d2#d,$h1#d + and \$`-1*(1<<31)`,$d2 + + shr \$6,$d1 + shl \$52,$r1 + add $d1,$h0 + shr \$12,$h1 + shr \$18,$d2 + add $r1,$h0 + adc $d2,$h1 + + mov $h2,$d1 + shl \$40,$d1 + shr \$24,$h2 + add $d1,$h1 + adc \$0,$h2 # can be partially reduced... + + mov \$-4,$d2 # ... so reduce + mov $h2,$d1 + and $h2,$d2 + shr \$2,$d1 + and \$3,$h2 + add $d2,$d1 # =*5 + add $d1,$h0 + adc \$0,$h1 + adc \$0,$h2 + + mov $s1,$r1 + mov $s1,%rax + shr \$2,$s1 + add $r1,$s1 # s1 = r1 + (r1 >> 2) + + add 0($inp),$h0 # accumulate input + adc 8($inp),$h1 + lea 16($inp),$inp + adc $padbit,$h2 + + call __poly1305_block + + test $padbit,$padbit # if $padbit is zero, + jz .Lstore_base2_64_avx # store hash in base 2^64 format + + ################################# base 2^64 -> base 2^26 + mov $h0,%rax + mov $h0,%rdx + shr \$52,$h0 + mov $h1,$r0 + mov $h1,$r1 + shr \$26,%rdx + and \$0x3ffffff,%rax # h[0] + shl \$12,$r0 + and \$0x3ffffff,%rdx # h[1] + shr \$14,$h1 + or $r0,$h0 + shl \$24,$h2 + and \$0x3ffffff,$h0 # h[2] + shr \$40,$r1 + and \$0x3ffffff,$h1 # h[3] + or $r1,$h2 # h[4] + + sub \$16,%r15 + jz .Lstore_base2_26_avx + + vmovd %rax#d,$H0 + vmovd %rdx#d,$H1 + vmovd $h0#d,$H2 + vmovd $h1#d,$H3 + vmovd $h2#d,$H4 + jmp .Lproceed_avx + +.align 32 +.Lstore_base2_64_avx: + mov $h0,0($ctx) + mov $h1,8($ctx) + mov $h2,16($ctx) # note that is_base2_26 is zeroed + jmp .Ldone_avx + +.align 16 +.Lstore_base2_26_avx: + mov %rax#d,0($ctx) # store hash value base 2^26 + mov %rdx#d,4($ctx) + mov $h0#d,8($ctx) + mov $h1#d,12($ctx) + mov $h2#d,16($ctx) +.align 16 +.Ldone_avx: + pop %r15 +.cfi_restore %r15 + pop %r14 +.cfi_restore %r14 + pop %r13 +.cfi_restore %r13 + pop %r12 +.cfi_restore %r12 + pop %rbx +.cfi_restore %rbx + pop %rbp +.cfi_restore %rbp +.Lno_data_avx: +.Lblocks_avx_epilogue: + ret +.cfi_endproc + +.align 32 +.Lbase2_64_avx: +.cfi_startproc + push %rbp +.cfi_push %rbp + mov %rsp,%rbp + push %rbx +.cfi_push %rbx + push %r12 +.cfi_push %r12 + push %r13 +.cfi_push %r13 + push %r14 +.cfi_push %r14 + push %r15 +.cfi_push %r15 +.Lbase2_64_avx_body: + + mov $len,%r15 # reassign $len + + mov 24($ctx),$r0 # load r + mov 32($ctx),$s1 + + mov 0($ctx),$h0 # load hash value + mov 8($ctx),$h1 + mov 16($ctx),$h2#d + + mov $s1,$r1 + mov $s1,%rax + shr \$2,$s1 + add $r1,$s1 # s1 = r1 + (r1 >> 2) + + test \$31,$len + jz .Linit_avx + + add 0($inp),$h0 # accumulate input + adc 8($inp),$h1 + lea 16($inp),$inp + adc $padbit,$h2 + sub \$16,%r15 + + call __poly1305_block + +.Linit_avx: + ################################# base 2^64 -> base 2^26 + mov $h0,%rax + mov $h0,%rdx + shr \$52,$h0 + mov $h1,$d1 + mov $h1,$d2 + shr \$26,%rdx + and \$0x3ffffff,%rax # h[0] + shl \$12,$d1 + and \$0x3ffffff,%rdx # h[1] + shr \$14,$h1 + or $d1,$h0 + shl \$24,$h2 + and \$0x3ffffff,$h0 # h[2] + shr \$40,$d2 + and \$0x3ffffff,$h1 # h[3] + or $d2,$h2 # h[4] + + vmovd %rax#d,$H0 + vmovd %rdx#d,$H1 + vmovd $h0#d,$H2 + vmovd $h1#d,$H3 + vmovd $h2#d,$H4 + movl \$1,20($ctx) # set is_base2_26 + + call __poly1305_init_avx + +.Lproceed_avx: + mov %r15,$len + pop %r15 +.cfi_restore %r15 + pop %r14 +.cfi_restore %r14 + pop %r13 +.cfi_restore %r13 + pop %r12 +.cfi_restore %r12 + pop %rbx +.cfi_restore %rbx + pop %rbp +.cfi_restore %rbp +.Lbase2_64_avx_epilogue: + jmp .Ldo_avx +.cfi_endproc + +.align 32 +.Leven_avx: +.cfi_startproc + vmovd 4*0($ctx),$H0 # load hash value + vmovd 4*1($ctx),$H1 + vmovd 4*2($ctx),$H2 + vmovd 4*3($ctx),$H3 + vmovd 4*4($ctx),$H4 + +.Ldo_avx: +___ +$code.=<<___ if (!$win64); + lea 8(%rsp),%r10 +.cfi_def_cfa_register %r10 + and \$-32,%rsp + sub \$-8,%rsp + lea -0x58(%rsp),%r11 + sub \$0x178,%rsp +___ +$code.=<<___ if ($win64); + lea -0xf8(%rsp),%r11 + sub \$0x218,%rsp + vmovdqa %xmm6,0x50(%r11) + vmovdqa %xmm7,0x60(%r11) + vmovdqa %xmm8,0x70(%r11) + vmovdqa %xmm9,0x80(%r11) + vmovdqa %xmm10,0x90(%r11) + vmovdqa %xmm11,0xa0(%r11) + vmovdqa %xmm12,0xb0(%r11) + vmovdqa %xmm13,0xc0(%r11) + vmovdqa %xmm14,0xd0(%r11) + vmovdqa %xmm15,0xe0(%r11) +.Ldo_avx_body: +___ +$code.=<<___; + sub \$64,$len + lea -32($inp),%rax + cmovc %rax,$inp + + vmovdqu `16*3`($ctx),$D4 # preload r0^2 + lea `16*3+64`($ctx),$ctx # size optimization + lea .Lconst(%rip),%rcx + + ################################################################ + # load input + vmovdqu 16*2($inp),$T0 + vmovdqu 16*3($inp),$T1 + vmovdqa 64(%rcx),$MASK # .Lmask26 + + vpsrldq \$6,$T0,$T2 # splat input + vpsrldq \$6,$T1,$T3 + vpunpckhqdq $T1,$T0,$T4 # 4 + vpunpcklqdq $T1,$T0,$T0 # 0:1 + vpunpcklqdq $T3,$T2,$T3 # 2:3 + + vpsrlq \$40,$T4,$T4 # 4 + vpsrlq \$26,$T0,$T1 + vpand $MASK,$T0,$T0 # 0 + vpsrlq \$4,$T3,$T2 + vpand $MASK,$T1,$T1 # 1 + vpsrlq \$30,$T3,$T3 + vpand $MASK,$T2,$T2 # 2 + vpand $MASK,$T3,$T3 # 3 + vpor 32(%rcx),$T4,$T4 # padbit, yes, always + + jbe .Lskip_loop_avx + + # expand and copy pre-calculated table to stack + vmovdqu `16*1-64`($ctx),$D1 + vmovdqu `16*2-64`($ctx),$D2 + vpshufd \$0xEE,$D4,$D3 # 34xx -> 3434 + vpshufd \$0x44,$D4,$D0 # xx12 -> 1212 + vmovdqa $D3,-0x90(%r11) + vmovdqa $D0,0x00(%rsp) + vpshufd \$0xEE,$D1,$D4 + vmovdqu `16*3-64`($ctx),$D0 + vpshufd \$0x44,$D1,$D1 + vmovdqa $D4,-0x80(%r11) + vmovdqa $D1,0x10(%rsp) + vpshufd \$0xEE,$D2,$D3 + vmovdqu `16*4-64`($ctx),$D1 + vpshufd \$0x44,$D2,$D2 + vmovdqa $D3,-0x70(%r11) + vmovdqa $D2,0x20(%rsp) + vpshufd \$0xEE,$D0,$D4 + vmovdqu `16*5-64`($ctx),$D2 + vpshufd \$0x44,$D0,$D0 + vmovdqa $D4,-0x60(%r11) + vmovdqa $D0,0x30(%rsp) + vpshufd \$0xEE,$D1,$D3 + vmovdqu `16*6-64`($ctx),$D0 + vpshufd \$0x44,$D1,$D1 + vmovdqa $D3,-0x50(%r11) + vmovdqa $D1,0x40(%rsp) + vpshufd \$0xEE,$D2,$D4 + vmovdqu `16*7-64`($ctx),$D1 + vpshufd \$0x44,$D2,$D2 + vmovdqa $D4,-0x40(%r11) + vmovdqa $D2,0x50(%rsp) + vpshufd \$0xEE,$D0,$D3 + vmovdqu `16*8-64`($ctx),$D2 + vpshufd \$0x44,$D0,$D0 + vmovdqa $D3,-0x30(%r11) + vmovdqa $D0,0x60(%rsp) + vpshufd \$0xEE,$D1,$D4 + vpshufd \$0x44,$D1,$D1 + vmovdqa $D4,-0x20(%r11) + vmovdqa $D1,0x70(%rsp) + vpshufd \$0xEE,$D2,$D3 + vmovdqa 0x00(%rsp),$D4 # preload r0^2 + vpshufd \$0x44,$D2,$D2 + vmovdqa $D3,-0x10(%r11) + vmovdqa $D2,0x80(%rsp) + + jmp .Loop_avx + +.align 32 +.Loop_avx: + ################################################################ + # ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 + # ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r + # \___________________/ + # ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 + # ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r + # \___________________/ \____________________/ + # + # Note that we start with inp[2:3]*r^2. This is because it + # doesn't depend on reduction in previous iteration. + ################################################################ + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + # + # though note that $Tx and $Hx are "reversed" in this section, + # and $D4 is preloaded with r0^2... + + vpmuludq $T0,$D4,$D0 # d0 = h0*r0 + vpmuludq $T1,$D4,$D1 # d1 = h1*r0 + vmovdqa $H2,0x20(%r11) # offload hash + vpmuludq $T2,$D4,$D2 # d3 = h2*r0 + vmovdqa 0x10(%rsp),$H2 # r1^2 + vpmuludq $T3,$D4,$D3 # d3 = h3*r0 + vpmuludq $T4,$D4,$D4 # d4 = h4*r0 + + vmovdqa $H0,0x00(%r11) # + vpmuludq 0x20(%rsp),$T4,$H0 # h4*s1 + vmovdqa $H1,0x10(%r11) # + vpmuludq $T3,$H2,$H1 # h3*r1 + vpaddq $H0,$D0,$D0 # d0 += h4*s1 + vpaddq $H1,$D4,$D4 # d4 += h3*r1 + vmovdqa $H3,0x30(%r11) # + vpmuludq $T2,$H2,$H0 # h2*r1 + vpmuludq $T1,$H2,$H1 # h1*r1 + vpaddq $H0,$D3,$D3 # d3 += h2*r1 + vmovdqa 0x30(%rsp),$H3 # r2^2 + vpaddq $H1,$D2,$D2 # d2 += h1*r1 + vmovdqa $H4,0x40(%r11) # + vpmuludq $T0,$H2,$H2 # h0*r1 + vpmuludq $T2,$H3,$H0 # h2*r2 + vpaddq $H2,$D1,$D1 # d1 += h0*r1 + + vmovdqa 0x40(%rsp),$H4 # s2^2 + vpaddq $H0,$D4,$D4 # d4 += h2*r2 + vpmuludq $T1,$H3,$H1 # h1*r2 + vpmuludq $T0,$H3,$H3 # h0*r2 + vpaddq $H1,$D3,$D3 # d3 += h1*r2 + vmovdqa 0x50(%rsp),$H2 # r3^2 + vpaddq $H3,$D2,$D2 # d2 += h0*r2 + vpmuludq $T4,$H4,$H0 # h4*s2 + vpmuludq $T3,$H4,$H4 # h3*s2 + vpaddq $H0,$D1,$D1 # d1 += h4*s2 + vmovdqa 0x60(%rsp),$H3 # s3^2 + vpaddq $H4,$D0,$D0 # d0 += h3*s2 + + vmovdqa 0x80(%rsp),$H4 # s4^2 + vpmuludq $T1,$H2,$H1 # h1*r3 + vpmuludq $T0,$H2,$H2 # h0*r3 + vpaddq $H1,$D4,$D4 # d4 += h1*r3 + vpaddq $H2,$D3,$D3 # d3 += h0*r3 + vpmuludq $T4,$H3,$H0 # h4*s3 + vpmuludq $T3,$H3,$H1 # h3*s3 + vpaddq $H0,$D2,$D2 # d2 += h4*s3 + vmovdqu 16*0($inp),$H0 # load input + vpaddq $H1,$D1,$D1 # d1 += h3*s3 + vpmuludq $T2,$H3,$H3 # h2*s3 + vpmuludq $T2,$H4,$T2 # h2*s4 + vpaddq $H3,$D0,$D0 # d0 += h2*s3 + + vmovdqu 16*1($inp),$H1 # + vpaddq $T2,$D1,$D1 # d1 += h2*s4 + vpmuludq $T3,$H4,$T3 # h3*s4 + vpmuludq $T4,$H4,$T4 # h4*s4 + vpsrldq \$6,$H0,$H2 # splat input + vpaddq $T3,$D2,$D2 # d2 += h3*s4 + vpaddq $T4,$D3,$D3 # d3 += h4*s4 + vpsrldq \$6,$H1,$H3 # + vpmuludq 0x70(%rsp),$T0,$T4 # h0*r4 + vpmuludq $T1,$H4,$T0 # h1*s4 + vpunpckhqdq $H1,$H0,$H4 # 4 + vpaddq $T4,$D4,$D4 # d4 += h0*r4 + vmovdqa -0x90(%r11),$T4 # r0^4 + vpaddq $T0,$D0,$D0 # d0 += h1*s4 + + vpunpcklqdq $H1,$H0,$H0 # 0:1 + vpunpcklqdq $H3,$H2,$H3 # 2:3 + + #vpsrlq \$40,$H4,$H4 # 4 + vpsrldq \$`40/8`,$H4,$H4 # 4 + vpsrlq \$26,$H0,$H1 + vpand $MASK,$H0,$H0 # 0 + vpsrlq \$4,$H3,$H2 + vpand $MASK,$H1,$H1 # 1 + vpand 0(%rcx),$H4,$H4 # .Lmask24 + vpsrlq \$30,$H3,$H3 + vpand $MASK,$H2,$H2 # 2 + vpand $MASK,$H3,$H3 # 3 + vpor 32(%rcx),$H4,$H4 # padbit, yes, always + + vpaddq 0x00(%r11),$H0,$H0 # add hash value + vpaddq 0x10(%r11),$H1,$H1 + vpaddq 0x20(%r11),$H2,$H2 + vpaddq 0x30(%r11),$H3,$H3 + vpaddq 0x40(%r11),$H4,$H4 + + lea 16*2($inp),%rax + lea 16*4($inp),$inp + sub \$64,$len + cmovc %rax,$inp + + ################################################################ + # Now we accumulate (inp[0:1]+hash)*r^4 + ################################################################ + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + + vpmuludq $H0,$T4,$T0 # h0*r0 + vpmuludq $H1,$T4,$T1 # h1*r0 + vpaddq $T0,$D0,$D0 + vpaddq $T1,$D1,$D1 + vmovdqa -0x80(%r11),$T2 # r1^4 + vpmuludq $H2,$T4,$T0 # h2*r0 + vpmuludq $H3,$T4,$T1 # h3*r0 + vpaddq $T0,$D2,$D2 + vpaddq $T1,$D3,$D3 + vpmuludq $H4,$T4,$T4 # h4*r0 + vpmuludq -0x70(%r11),$H4,$T0 # h4*s1 + vpaddq $T4,$D4,$D4 + + vpaddq $T0,$D0,$D0 # d0 += h4*s1 + vpmuludq $H2,$T2,$T1 # h2*r1 + vpmuludq $H3,$T2,$T0 # h3*r1 + vpaddq $T1,$D3,$D3 # d3 += h2*r1 + vmovdqa -0x60(%r11),$T3 # r2^4 + vpaddq $T0,$D4,$D4 # d4 += h3*r1 + vpmuludq $H1,$T2,$T1 # h1*r1 + vpmuludq $H0,$T2,$T2 # h0*r1 + vpaddq $T1,$D2,$D2 # d2 += h1*r1 + vpaddq $T2,$D1,$D1 # d1 += h0*r1 + + vmovdqa -0x50(%r11),$T4 # s2^4 + vpmuludq $H2,$T3,$T0 # h2*r2 + vpmuludq $H1,$T3,$T1 # h1*r2 + vpaddq $T0,$D4,$D4 # d4 += h2*r2 + vpaddq $T1,$D3,$D3 # d3 += h1*r2 + vmovdqa -0x40(%r11),$T2 # r3^4 + vpmuludq $H0,$T3,$T3 # h0*r2 + vpmuludq $H4,$T4,$T0 # h4*s2 + vpaddq $T3,$D2,$D2 # d2 += h0*r2 + vpaddq $T0,$D1,$D1 # d1 += h4*s2 + vmovdqa -0x30(%r11),$T3 # s3^4 + vpmuludq $H3,$T4,$T4 # h3*s2 + vpmuludq $H1,$T2,$T1 # h1*r3 + vpaddq $T4,$D0,$D0 # d0 += h3*s2 + + vmovdqa -0x10(%r11),$T4 # s4^4 + vpaddq $T1,$D4,$D4 # d4 += h1*r3 + vpmuludq $H0,$T2,$T2 # h0*r3 + vpmuludq $H4,$T3,$T0 # h4*s3 + vpaddq $T2,$D3,$D3 # d3 += h0*r3 + vpaddq $T0,$D2,$D2 # d2 += h4*s3 + vmovdqu 16*2($inp),$T0 # load input + vpmuludq $H3,$T3,$T2 # h3*s3 + vpmuludq $H2,$T3,$T3 # h2*s3 + vpaddq $T2,$D1,$D1 # d1 += h3*s3 + vmovdqu 16*3($inp),$T1 # + vpaddq $T3,$D0,$D0 # d0 += h2*s3 + + vpmuludq $H2,$T4,$H2 # h2*s4 + vpmuludq $H3,$T4,$H3 # h3*s4 + vpsrldq \$6,$T0,$T2 # splat input + vpaddq $H2,$D1,$D1 # d1 += h2*s4 + vpmuludq $H4,$T4,$H4 # h4*s4 + vpsrldq \$6,$T1,$T3 # + vpaddq $H3,$D2,$H2 # h2 = d2 + h3*s4 + vpaddq $H4,$D3,$H3 # h3 = d3 + h4*s4 + vpmuludq -0x20(%r11),$H0,$H4 # h0*r4 + vpmuludq $H1,$T4,$H0 + vpunpckhqdq $T1,$T0,$T4 # 4 + vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4 + vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4 + + vpunpcklqdq $T1,$T0,$T0 # 0:1 + vpunpcklqdq $T3,$T2,$T3 # 2:3 + + #vpsrlq \$40,$T4,$T4 # 4 + vpsrldq \$`40/8`,$T4,$T4 # 4 + vpsrlq \$26,$T0,$T1 + vmovdqa 0x00(%rsp),$D4 # preload r0^2 + vpand $MASK,$T0,$T0 # 0 + vpsrlq \$4,$T3,$T2 + vpand $MASK,$T1,$T1 # 1 + vpand 0(%rcx),$T4,$T4 # .Lmask24 + vpsrlq \$30,$T3,$T3 + vpand $MASK,$T2,$T2 # 2 + vpand $MASK,$T3,$T3 # 3 + vpor 32(%rcx),$T4,$T4 # padbit, yes, always + + ################################################################ + # lazy reduction as discussed in "NEON crypto" by D.J. Bernstein + # and P. Schwabe + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $D0,$D1,$H1 # h0 -> h1 + + vpsrlq \$26,$H4,$D0 + vpand $MASK,$H4,$H4 + + vpsrlq \$26,$H1,$D1 + vpand $MASK,$H1,$H1 + vpaddq $D1,$H2,$H2 # h1 -> h2 + + vpaddq $D0,$H0,$H0 + vpsllq \$2,$D0,$D0 + vpaddq $D0,$H0,$H0 # h4 -> h0 + + vpsrlq \$26,$H2,$D2 + vpand $MASK,$H2,$H2 + vpaddq $D2,$H3,$H3 # h2 -> h3 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + ja .Loop_avx + +.Lskip_loop_avx: + ################################################################ + # multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 + + vpshufd \$0x10,$D4,$D4 # r0^n, xx12 -> x1x2 + add \$32,$len + jnz .Long_tail_avx + + vpaddq $H2,$T2,$T2 + vpaddq $H0,$T0,$T0 + vpaddq $H1,$T1,$T1 + vpaddq $H3,$T3,$T3 + vpaddq $H4,$T4,$T4 + +.Long_tail_avx: + vmovdqa $H2,0x20(%r11) + vmovdqa $H0,0x00(%r11) + vmovdqa $H1,0x10(%r11) + vmovdqa $H3,0x30(%r11) + vmovdqa $H4,0x40(%r11) + + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + + vpmuludq $T2,$D4,$D2 # d2 = h2*r0 + vpmuludq $T0,$D4,$D0 # d0 = h0*r0 + vpshufd \$0x10,`16*1-64`($ctx),$H2 # r1^n + vpmuludq $T1,$D4,$D1 # d1 = h1*r0 + vpmuludq $T3,$D4,$D3 # d3 = h3*r0 + vpmuludq $T4,$D4,$D4 # d4 = h4*r0 + + vpmuludq $T3,$H2,$H0 # h3*r1 + vpaddq $H0,$D4,$D4 # d4 += h3*r1 + vpshufd \$0x10,`16*2-64`($ctx),$H3 # s1^n + vpmuludq $T2,$H2,$H1 # h2*r1 + vpaddq $H1,$D3,$D3 # d3 += h2*r1 + vpshufd \$0x10,`16*3-64`($ctx),$H4 # r2^n + vpmuludq $T1,$H2,$H0 # h1*r1 + vpaddq $H0,$D2,$D2 # d2 += h1*r1 + vpmuludq $T0,$H2,$H2 # h0*r1 + vpaddq $H2,$D1,$D1 # d1 += h0*r1 + vpmuludq $T4,$H3,$H3 # h4*s1 + vpaddq $H3,$D0,$D0 # d0 += h4*s1 + + vpshufd \$0x10,`16*4-64`($ctx),$H2 # s2^n + vpmuludq $T2,$H4,$H1 # h2*r2 + vpaddq $H1,$D4,$D4 # d4 += h2*r2 + vpmuludq $T1,$H4,$H0 # h1*r2 + vpaddq $H0,$D3,$D3 # d3 += h1*r2 + vpshufd \$0x10,`16*5-64`($ctx),$H3 # r3^n + vpmuludq $T0,$H4,$H4 # h0*r2 + vpaddq $H4,$D2,$D2 # d2 += h0*r2 + vpmuludq $T4,$H2,$H1 # h4*s2 + vpaddq $H1,$D1,$D1 # d1 += h4*s2 + vpshufd \$0x10,`16*6-64`($ctx),$H4 # s3^n + vpmuludq $T3,$H2,$H2 # h3*s2 + vpaddq $H2,$D0,$D0 # d0 += h3*s2 + + vpmuludq $T1,$H3,$H0 # h1*r3 + vpaddq $H0,$D4,$D4 # d4 += h1*r3 + vpmuludq $T0,$H3,$H3 # h0*r3 + vpaddq $H3,$D3,$D3 # d3 += h0*r3 + vpshufd \$0x10,`16*7-64`($ctx),$H2 # r4^n + vpmuludq $T4,$H4,$H1 # h4*s3 + vpaddq $H1,$D2,$D2 # d2 += h4*s3 + vpshufd \$0x10,`16*8-64`($ctx),$H3 # s4^n + vpmuludq $T3,$H4,$H0 # h3*s3 + vpaddq $H0,$D1,$D1 # d1 += h3*s3 + vpmuludq $T2,$H4,$H4 # h2*s3 + vpaddq $H4,$D0,$D0 # d0 += h2*s3 + + vpmuludq $T0,$H2,$H2 # h0*r4 + vpaddq $H2,$D4,$D4 # h4 = d4 + h0*r4 + vpmuludq $T4,$H3,$H1 # h4*s4 + vpaddq $H1,$D3,$D3 # h3 = d3 + h4*s4 + vpmuludq $T3,$H3,$H0 # h3*s4 + vpaddq $H0,$D2,$D2 # h2 = d2 + h3*s4 + vpmuludq $T2,$H3,$H1 # h2*s4 + vpaddq $H1,$D1,$D1 # h1 = d1 + h2*s4 + vpmuludq $T1,$H3,$H3 # h1*s4 + vpaddq $H3,$D0,$D0 # h0 = d0 + h1*s4 + + jz .Lshort_tail_avx + + vmovdqu 16*0($inp),$H0 # load input + vmovdqu 16*1($inp),$H1 + + vpsrldq \$6,$H0,$H2 # splat input + vpsrldq \$6,$H1,$H3 + vpunpckhqdq $H1,$H0,$H4 # 4 + vpunpcklqdq $H1,$H0,$H0 # 0:1 + vpunpcklqdq $H3,$H2,$H3 # 2:3 + + vpsrlq \$40,$H4,$H4 # 4 + vpsrlq \$26,$H0,$H1 + vpand $MASK,$H0,$H0 # 0 + vpsrlq \$4,$H3,$H2 + vpand $MASK,$H1,$H1 # 1 + vpsrlq \$30,$H3,$H3 + vpand $MASK,$H2,$H2 # 2 + vpand $MASK,$H3,$H3 # 3 + vpor 32(%rcx),$H4,$H4 # padbit, yes, always + + vpshufd \$0x32,`16*0-64`($ctx),$T4 # r0^n, 34xx -> x3x4 + vpaddq 0x00(%r11),$H0,$H0 + vpaddq 0x10(%r11),$H1,$H1 + vpaddq 0x20(%r11),$H2,$H2 + vpaddq 0x30(%r11),$H3,$H3 + vpaddq 0x40(%r11),$H4,$H4 + + ################################################################ + # multiply (inp[0:1]+hash) by r^4:r^3 and accumulate + + vpmuludq $H0,$T4,$T0 # h0*r0 + vpaddq $T0,$D0,$D0 # d0 += h0*r0 + vpmuludq $H1,$T4,$T1 # h1*r0 + vpaddq $T1,$D1,$D1 # d1 += h1*r0 + vpmuludq $H2,$T4,$T0 # h2*r0 + vpaddq $T0,$D2,$D2 # d2 += h2*r0 + vpshufd \$0x32,`16*1-64`($ctx),$T2 # r1^n + vpmuludq $H3,$T4,$T1 # h3*r0 + vpaddq $T1,$D3,$D3 # d3 += h3*r0 + vpmuludq $H4,$T4,$T4 # h4*r0 + vpaddq $T4,$D4,$D4 # d4 += h4*r0 + + vpmuludq $H3,$T2,$T0 # h3*r1 + vpaddq $T0,$D4,$D4 # d4 += h3*r1 + vpshufd \$0x32,`16*2-64`($ctx),$T3 # s1 + vpmuludq $H2,$T2,$T1 # h2*r1 + vpaddq $T1,$D3,$D3 # d3 += h2*r1 + vpshufd \$0x32,`16*3-64`($ctx),$T4 # r2 + vpmuludq $H1,$T2,$T0 # h1*r1 + vpaddq $T0,$D2,$D2 # d2 += h1*r1 + vpmuludq $H0,$T2,$T2 # h0*r1 + vpaddq $T2,$D1,$D1 # d1 += h0*r1 + vpmuludq $H4,$T3,$T3 # h4*s1 + vpaddq $T3,$D0,$D0 # d0 += h4*s1 + + vpshufd \$0x32,`16*4-64`($ctx),$T2 # s2 + vpmuludq $H2,$T4,$T1 # h2*r2 + vpaddq $T1,$D4,$D4 # d4 += h2*r2 + vpmuludq $H1,$T4,$T0 # h1*r2 + vpaddq $T0,$D3,$D3 # d3 += h1*r2 + vpshufd \$0x32,`16*5-64`($ctx),$T3 # r3 + vpmuludq $H0,$T4,$T4 # h0*r2 + vpaddq $T4,$D2,$D2 # d2 += h0*r2 + vpmuludq $H4,$T2,$T1 # h4*s2 + vpaddq $T1,$D1,$D1 # d1 += h4*s2 + vpshufd \$0x32,`16*6-64`($ctx),$T4 # s3 + vpmuludq $H3,$T2,$T2 # h3*s2 + vpaddq $T2,$D0,$D0 # d0 += h3*s2 + + vpmuludq $H1,$T3,$T0 # h1*r3 + vpaddq $T0,$D4,$D4 # d4 += h1*r3 + vpmuludq $H0,$T3,$T3 # h0*r3 + vpaddq $T3,$D3,$D3 # d3 += h0*r3 + vpshufd \$0x32,`16*7-64`($ctx),$T2 # r4 + vpmuludq $H4,$T4,$T1 # h4*s3 + vpaddq $T1,$D2,$D2 # d2 += h4*s3 + vpshufd \$0x32,`16*8-64`($ctx),$T3 # s4 + vpmuludq $H3,$T4,$T0 # h3*s3 + vpaddq $T0,$D1,$D1 # d1 += h3*s3 + vpmuludq $H2,$T4,$T4 # h2*s3 + vpaddq $T4,$D0,$D0 # d0 += h2*s3 + + vpmuludq $H0,$T2,$T2 # h0*r4 + vpaddq $T2,$D4,$D4 # d4 += h0*r4 + vpmuludq $H4,$T3,$T1 # h4*s4 + vpaddq $T1,$D3,$D3 # d3 += h4*s4 + vpmuludq $H3,$T3,$T0 # h3*s4 + vpaddq $T0,$D2,$D2 # d2 += h3*s4 + vpmuludq $H2,$T3,$T1 # h2*s4 + vpaddq $T1,$D1,$D1 # d1 += h2*s4 + vpmuludq $H1,$T3,$T3 # h1*s4 + vpaddq $T3,$D0,$D0 # d0 += h1*s4 + +.Lshort_tail_avx: + ################################################################ + # horizontal addition + + vpsrldq \$8,$D4,$T4 + vpsrldq \$8,$D3,$T3 + vpsrldq \$8,$D1,$T1 + vpsrldq \$8,$D0,$T0 + vpsrldq \$8,$D2,$T2 + vpaddq $T3,$D3,$D3 + vpaddq $T4,$D4,$D4 + vpaddq $T0,$D0,$D0 + vpaddq $T1,$D1,$D1 + vpaddq $T2,$D2,$D2 + + ################################################################ + # lazy reduction + + vpsrlq \$26,$D3,$H3 + vpand $MASK,$D3,$D3 + vpaddq $H3,$D4,$D4 # h3 -> h4 + + vpsrlq \$26,$D0,$H0 + vpand $MASK,$D0,$D0 + vpaddq $H0,$D1,$D1 # h0 -> h1 + + vpsrlq \$26,$D4,$H4 + vpand $MASK,$D4,$D4 + + vpsrlq \$26,$D1,$H1 + vpand $MASK,$D1,$D1 + vpaddq $H1,$D2,$D2 # h1 -> h2 + + vpaddq $H4,$D0,$D0 + vpsllq \$2,$H4,$H4 + vpaddq $H4,$D0,$D0 # h4 -> h0 + + vpsrlq \$26,$D2,$H2 + vpand $MASK,$D2,$D2 + vpaddq $H2,$D3,$D3 # h2 -> h3 + + vpsrlq \$26,$D0,$H0 + vpand $MASK,$D0,$D0 + vpaddq $H0,$D1,$D1 # h0 -> h1 + + vpsrlq \$26,$D3,$H3 + vpand $MASK,$D3,$D3 + vpaddq $H3,$D4,$D4 # h3 -> h4 + + vmovd $D0,`4*0-48-64`($ctx) # save partially reduced + vmovd $D1,`4*1-48-64`($ctx) + vmovd $D2,`4*2-48-64`($ctx) + vmovd $D3,`4*3-48-64`($ctx) + vmovd $D4,`4*4-48-64`($ctx) +___ +$code.=<<___ if ($win64); + vmovdqa 0x50(%r11),%xmm6 + vmovdqa 0x60(%r11),%xmm7 + vmovdqa 0x70(%r11),%xmm8 + vmovdqa 0x80(%r11),%xmm9 + vmovdqa 0x90(%r11),%xmm10 + vmovdqa 0xa0(%r11),%xmm11 + vmovdqa 0xb0(%r11),%xmm12 + vmovdqa 0xc0(%r11),%xmm13 + vmovdqa 0xd0(%r11),%xmm14 + vmovdqa 0xe0(%r11),%xmm15 + lea 0xf8(%r11),%rsp +.Ldo_avx_epilogue: +___ +$code.=<<___ if (!$win64); + lea -8(%r10),%rsp +.cfi_def_cfa_register %rsp +___ +$code.=<<___; + vzeroupper + ret +.cfi_endproc +___ +&end_function("poly1305_blocks_avx"); + +&declare_function("poly1305_emit_avx", 32, 3); +$code.=<<___; + cmpl \$0,20($ctx) # is_base2_26? + je .Lemit + + mov 0($ctx),%eax # load hash value base 2^26 + mov 4($ctx),%ecx + mov 8($ctx),%r8d + mov 12($ctx),%r11d + mov 16($ctx),%r10d + + shl \$26,%rcx # base 2^26 -> base 2^64 + mov %r8,%r9 + shl \$52,%r8 + add %rcx,%rax + shr \$12,%r9 + add %rax,%r8 # h0 + adc \$0,%r9 + + shl \$14,%r11 + mov %r10,%rax + shr \$24,%r10 + add %r11,%r9 + shl \$40,%rax + add %rax,%r9 # h1 + adc \$0,%r10 # h2 + + mov %r10,%rax # could be partially reduced, so reduce + mov %r10,%rcx + and \$3,%r10 + shr \$2,%rax + and \$-4,%rcx + add %rcx,%rax + add %rax,%r8 + adc \$0,%r9 + adc \$0,%r10 + + mov %r8,%rax + add \$5,%r8 # compare to modulus + mov %r9,%rcx + adc \$0,%r9 + adc \$0,%r10 + shr \$2,%r10 # did 130-bit value overflow? + cmovnz %r8,%rax + cmovnz %r9,%rcx + + add 0($nonce),%rax # accumulate nonce + adc 8($nonce),%rcx + mov %rax,0($mac) # write result + mov %rcx,8($mac) + + ret +___ +&end_function("poly1305_emit_avx"); + +if ($kernel) { + $code .= "#endif\n"; +} + +if ($avx>1) { + +if ($kernel) { + $code .= "#ifdef CONFIG_AS_AVX2\n"; +} + +my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) = + map("%ymm$_",(0..15)); +my $S4=$MASK; + +sub poly1305_blocks_avxN { + my ($avx512) = @_; + my $suffix = $avx512 ? "_avx512" : ""; +$code.=<<___; +.cfi_startproc + mov 20($ctx),%r8d # is_base2_26 + cmp \$128,$len + jae .Lblocks_avx2$suffix + test %r8d,%r8d + jz .Lblocks + +.Lblocks_avx2$suffix: + and \$-16,$len + jz .Lno_data_avx2$suffix + + vzeroupper + + test %r8d,%r8d + jz .Lbase2_64_avx2$suffix + + test \$63,$len + jz .Leven_avx2$suffix + + push %rbp +.cfi_push %rbp + mov %rsp,%rbp + push %rbx +.cfi_push %rbx + push %r12 +.cfi_push %r12 + push %r13 +.cfi_push %r13 + push %r14 +.cfi_push %r14 + push %r15 +.cfi_push %r15 +.Lblocks_avx2_body$suffix: + + mov $len,%r15 # reassign $len + + mov 0($ctx),$d1 # load hash value + mov 8($ctx),$d2 + mov 16($ctx),$h2#d + + mov 24($ctx),$r0 # load r + mov 32($ctx),$s1 + + ################################# base 2^26 -> base 2^64 + mov $d1#d,$h0#d + and \$`-1*(1<<31)`,$d1 + mov $d2,$r1 # borrow $r1 + mov $d2#d,$h1#d + and \$`-1*(1<<31)`,$d2 + + shr \$6,$d1 + shl \$52,$r1 + add $d1,$h0 + shr \$12,$h1 + shr \$18,$d2 + add $r1,$h0 + adc $d2,$h1 + + mov $h2,$d1 + shl \$40,$d1 + shr \$24,$h2 + add $d1,$h1 + adc \$0,$h2 # can be partially reduced... + + mov \$-4,$d2 # ... so reduce + mov $h2,$d1 + and $h2,$d2 + shr \$2,$d1 + and \$3,$h2 + add $d2,$d1 # =*5 + add $d1,$h0 + adc \$0,$h1 + adc \$0,$h2 + + mov $s1,$r1 + mov $s1,%rax + shr \$2,$s1 + add $r1,$s1 # s1 = r1 + (r1 >> 2) + +.Lbase2_26_pre_avx2$suffix: + add 0($inp),$h0 # accumulate input + adc 8($inp),$h1 + lea 16($inp),$inp + adc $padbit,$h2 + sub \$16,%r15 + + call __poly1305_block + mov $r1,%rax + + test \$63,%r15 + jnz .Lbase2_26_pre_avx2$suffix + + test $padbit,$padbit # if $padbit is zero, + jz .Lstore_base2_64_avx2$suffix # store hash in base 2^64 format + + ################################# base 2^64 -> base 2^26 + mov $h0,%rax + mov $h0,%rdx + shr \$52,$h0 + mov $h1,$r0 + mov $h1,$r1 + shr \$26,%rdx + and \$0x3ffffff,%rax # h[0] + shl \$12,$r0 + and \$0x3ffffff,%rdx # h[1] + shr \$14,$h1 + or $r0,$h0 + shl \$24,$h2 + and \$0x3ffffff,$h0 # h[2] + shr \$40,$r1 + and \$0x3ffffff,$h1 # h[3] + or $r1,$h2 # h[4] + + test %r15,%r15 + jz .Lstore_base2_26_avx2$suffix + + vmovd %rax#d,%x#$H0 + vmovd %rdx#d,%x#$H1 + vmovd $h0#d,%x#$H2 + vmovd $h1#d,%x#$H3 + vmovd $h2#d,%x#$H4 + jmp .Lproceed_avx2$suffix + +.align 32 +.Lstore_base2_64_avx2$suffix: + mov $h0,0($ctx) + mov $h1,8($ctx) + mov $h2,16($ctx) # note that is_base2_26 is zeroed + jmp .Ldone_avx2$suffix + +.align 16 +.Lstore_base2_26_avx2$suffix: + mov %rax#d,0($ctx) # store hash value base 2^26 + mov %rdx#d,4($ctx) + mov $h0#d,8($ctx) + mov $h1#d,12($ctx) + mov $h2#d,16($ctx) +.align 16 +.Ldone_avx2$suffix: + pop %r15 +.cfi_restore %r15 + pop %r14 +.cfi_restore %r14 + pop %r13 +.cfi_restore %r13 + pop %r12 +.cfi_restore %r12 + pop %rbx +.cfi_restore %rbx + pop %rbp +.cfi_restore %rbp +.Lno_data_avx2$suffix: +.Lblocks_avx2_epilogue$suffix: + ret +.cfi_endproc + +.align 32 +.Lbase2_64_avx2$suffix: +.cfi_startproc + push %rbp +.cfi_push %rbp + mov %rsp,%rbp + push %rbx +.cfi_push %rbx + push %r12 +.cfi_push %r12 + push %r13 +.cfi_push %r13 + push %r14 +.cfi_push %r14 + push %r15 +.cfi_push %r15 +.Lbase2_64_avx2_body$suffix: + + mov $len,%r15 # reassign $len + + mov 24($ctx),$r0 # load r + mov 32($ctx),$s1 + + mov 0($ctx),$h0 # load hash value + mov 8($ctx),$h1 + mov 16($ctx),$h2#d + + mov $s1,$r1 + mov $s1,%rax + shr \$2,$s1 + add $r1,$s1 # s1 = r1 + (r1 >> 2) + + test \$63,$len + jz .Linit_avx2$suffix + +.Lbase2_64_pre_avx2$suffix: + add 0($inp),$h0 # accumulate input + adc 8($inp),$h1 + lea 16($inp),$inp + adc $padbit,$h2 + sub \$16,%r15 + + call __poly1305_block + mov $r1,%rax + + test \$63,%r15 + jnz .Lbase2_64_pre_avx2$suffix + +.Linit_avx2$suffix: + ################################# base 2^64 -> base 2^26 + mov $h0,%rax + mov $h0,%rdx + shr \$52,$h0 + mov $h1,$d1 + mov $h1,$d2 + shr \$26,%rdx + and \$0x3ffffff,%rax # h[0] + shl \$12,$d1 + and \$0x3ffffff,%rdx # h[1] + shr \$14,$h1 + or $d1,$h0 + shl \$24,$h2 + and \$0x3ffffff,$h0 # h[2] + shr \$40,$d2 + and \$0x3ffffff,$h1 # h[3] + or $d2,$h2 # h[4] + + vmovd %rax#d,%x#$H0 + vmovd %rdx#d,%x#$H1 + vmovd $h0#d,%x#$H2 + vmovd $h1#d,%x#$H3 + vmovd $h2#d,%x#$H4 + movl \$1,20($ctx) # set is_base2_26 + + call __poly1305_init_avx + +.Lproceed_avx2$suffix: + mov %r15,$len # restore $len +___ +$code.=<<___ if (!$kernel); + mov OPENSSL_ia32cap_P+8(%rip),%r9d + mov \$`(1<<31|1<<30|1<<16)`,%r11d +___ +$code.=<<___; + pop %r15 +.cfi_restore %r15 + pop %r14 +.cfi_restore %r14 + pop %r13 +.cfi_restore %r13 + pop %r12 +.cfi_restore %r12 + pop %rbx +.cfi_restore %rbx + pop %rbp +.cfi_restore %rbp +.Lbase2_64_avx2_epilogue$suffix: + jmp .Ldo_avx2$suffix +.cfi_endproc + +.align 32 +.Leven_avx2$suffix: +.cfi_startproc +___ +$code.=<<___ if (!$kernel); + mov OPENSSL_ia32cap_P+8(%rip),%r9d +___ +$code.=<<___; + vmovd 4*0($ctx),%x#$H0 # load hash value base 2^26 + vmovd 4*1($ctx),%x#$H1 + vmovd 4*2($ctx),%x#$H2 + vmovd 4*3($ctx),%x#$H3 + vmovd 4*4($ctx),%x#$H4 + +.Ldo_avx2$suffix: +___ +$code.=<<___ if (!$kernel && $avx>2); + cmp \$512,$len + jb .Lskip_avx512 + and %r11d,%r9d + test \$`1<<16`,%r9d # check for AVX512F + jnz .Lblocks_avx512 +.Lskip_avx512$suffix: +___ +$code.=<<___ if ($avx > 2 && $avx512 && $kernel); + cmp \$512,$len + jae .Lblocks_avx512 +___ +$code.=<<___ if (!$win64); + lea 8(%rsp),%r10 +.cfi_def_cfa_register %r10 + sub \$0x128,%rsp +___ +$code.=<<___ if ($win64); + lea 8(%rsp),%r10 + sub \$0x1c8,%rsp + vmovdqa %xmm6,-0xb0(%r10) + vmovdqa %xmm7,-0xa0(%r10) + vmovdqa %xmm8,-0x90(%r10) + vmovdqa %xmm9,-0x80(%r10) + vmovdqa %xmm10,-0x70(%r10) + vmovdqa %xmm11,-0x60(%r10) + vmovdqa %xmm12,-0x50(%r10) + vmovdqa %xmm13,-0x40(%r10) + vmovdqa %xmm14,-0x30(%r10) + vmovdqa %xmm15,-0x20(%r10) +.Ldo_avx2_body$suffix: +___ +$code.=<<___; + lea .Lconst(%rip),%rcx + lea 48+64($ctx),$ctx # size optimization + vmovdqa 96(%rcx),$T0 # .Lpermd_avx2 + + # expand and copy pre-calculated table to stack + vmovdqu `16*0-64`($ctx),%x#$T2 + and \$-512,%rsp + vmovdqu `16*1-64`($ctx),%x#$T3 + vmovdqu `16*2-64`($ctx),%x#$T4 + vmovdqu `16*3-64`($ctx),%x#$D0 + vmovdqu `16*4-64`($ctx),%x#$D1 + vmovdqu `16*5-64`($ctx),%x#$D2 + lea 0x90(%rsp),%rax # size optimization + vmovdqu `16*6-64`($ctx),%x#$D3 + vpermd $T2,$T0,$T2 # 00003412 -> 14243444 + vmovdqu `16*7-64`($ctx),%x#$D4 + vpermd $T3,$T0,$T3 + vmovdqu `16*8-64`($ctx),%x#$MASK + vpermd $T4,$T0,$T4 + vmovdqa $T2,0x00(%rsp) + vpermd $D0,$T0,$D0 + vmovdqa $T3,0x20-0x90(%rax) + vpermd $D1,$T0,$D1 + vmovdqa $T4,0x40-0x90(%rax) + vpermd $D2,$T0,$D2 + vmovdqa $D0,0x60-0x90(%rax) + vpermd $D3,$T0,$D3 + vmovdqa $D1,0x80-0x90(%rax) + vpermd $D4,$T0,$D4 + vmovdqa $D2,0xa0-0x90(%rax) + vpermd $MASK,$T0,$MASK + vmovdqa $D3,0xc0-0x90(%rax) + vmovdqa $D4,0xe0-0x90(%rax) + vmovdqa $MASK,0x100-0x90(%rax) + vmovdqa 64(%rcx),$MASK # .Lmask26 + + ################################################################ + # load input + vmovdqu 16*0($inp),%x#$T0 + vmovdqu 16*1($inp),%x#$T1 + vinserti128 \$1,16*2($inp),$T0,$T0 + vinserti128 \$1,16*3($inp),$T1,$T1 + lea 16*4($inp),$inp + + vpsrldq \$6,$T0,$T2 # splat input + vpsrldq \$6,$T1,$T3 + vpunpckhqdq $T1,$T0,$T4 # 4 + vpunpcklqdq $T3,$T2,$T2 # 2:3 + vpunpcklqdq $T1,$T0,$T0 # 0:1 + + vpsrlq \$30,$T2,$T3 + vpsrlq \$4,$T2,$T2 + vpsrlq \$26,$T0,$T1 + vpsrlq \$40,$T4,$T4 # 4 + vpand $MASK,$T2,$T2 # 2 + vpand $MASK,$T0,$T0 # 0 + vpand $MASK,$T1,$T1 # 1 + vpand $MASK,$T3,$T3 # 3 + vpor 32(%rcx),$T4,$T4 # padbit, yes, always + + vpaddq $H2,$T2,$H2 # accumulate input + sub \$64,$len + jz .Ltail_avx2$suffix + jmp .Loop_avx2$suffix + +.align 32 +.Loop_avx2$suffix: + ################################################################ + # ((inp[0]*r^4+inp[4])*r^4+inp[ 8])*r^4 + # ((inp[1]*r^4+inp[5])*r^4+inp[ 9])*r^3 + # ((inp[2]*r^4+inp[6])*r^4+inp[10])*r^2 + # ((inp[3]*r^4+inp[7])*r^4+inp[11])*r^1 + # \________/\__________/ + ################################################################ + #vpaddq $H2,$T2,$H2 # accumulate input + vpaddq $H0,$T0,$H0 + vmovdqa `32*0`(%rsp),$T0 # r0^4 + vpaddq $H1,$T1,$H1 + vmovdqa `32*1`(%rsp),$T1 # r1^4 + vpaddq $H3,$T3,$H3 + vmovdqa `32*3`(%rsp),$T2 # r2^4 + vpaddq $H4,$T4,$H4 + vmovdqa `32*6-0x90`(%rax),$T3 # s3^4 + vmovdqa `32*8-0x90`(%rax),$S4 # s4^4 + + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + # + # however, as h2 is "chronologically" first one available pull + # corresponding operations up, so it's + # + # d4 = h2*r2 + h4*r0 + h3*r1 + h1*r3 + h0*r4 + # d3 = h2*r1 + h3*r0 + h1*r2 + h0*r3 + h4*5*r4 + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + # d1 = h2*5*r4 + h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + # d0 = h2*5*r3 + h0*r0 + h4*5*r1 + h3*5*r2 + h1*5*r4 + + vpmuludq $H2,$T0,$D2 # d2 = h2*r0 + vpmuludq $H2,$T1,$D3 # d3 = h2*r1 + vpmuludq $H2,$T2,$D4 # d4 = h2*r2 + vpmuludq $H2,$T3,$D0 # d0 = h2*s3 + vpmuludq $H2,$S4,$D1 # d1 = h2*s4 + + vpmuludq $H0,$T1,$T4 # h0*r1 + vpmuludq $H1,$T1,$H2 # h1*r1, borrow $H2 as temp + vpaddq $T4,$D1,$D1 # d1 += h0*r1 + vpaddq $H2,$D2,$D2 # d2 += h1*r1 + vpmuludq $H3,$T1,$T4 # h3*r1 + vpmuludq `32*2`(%rsp),$H4,$H2 # h4*s1 + vpaddq $T4,$D4,$D4 # d4 += h3*r1 + vpaddq $H2,$D0,$D0 # d0 += h4*s1 + vmovdqa `32*4-0x90`(%rax),$T1 # s2 + + vpmuludq $H0,$T0,$T4 # h0*r0 + vpmuludq $H1,$T0,$H2 # h1*r0 + vpaddq $T4,$D0,$D0 # d0 += h0*r0 + vpaddq $H2,$D1,$D1 # d1 += h1*r0 + vpmuludq $H3,$T0,$T4 # h3*r0 + vpmuludq $H4,$T0,$H2 # h4*r0 + vmovdqu 16*0($inp),%x#$T0 # load input + vpaddq $T4,$D3,$D3 # d3 += h3*r0 + vpaddq $H2,$D4,$D4 # d4 += h4*r0 + vinserti128 \$1,16*2($inp),$T0,$T0 + + vpmuludq $H3,$T1,$T4 # h3*s2 + vpmuludq $H4,$T1,$H2 # h4*s2 + vmovdqu 16*1($inp),%x#$T1 + vpaddq $T4,$D0,$D0 # d0 += h3*s2 + vpaddq $H2,$D1,$D1 # d1 += h4*s2 + vmovdqa `32*5-0x90`(%rax),$H2 # r3 + vpmuludq $H1,$T2,$T4 # h1*r2 + vpmuludq $H0,$T2,$T2 # h0*r2 + vpaddq $T4,$D3,$D3 # d3 += h1*r2 + vpaddq $T2,$D2,$D2 # d2 += h0*r2 + vinserti128 \$1,16*3($inp),$T1,$T1 + lea 16*4($inp),$inp + + vpmuludq $H1,$H2,$T4 # h1*r3 + vpmuludq $H0,$H2,$H2 # h0*r3 + vpsrldq \$6,$T0,$T2 # splat input + vpaddq $T4,$D4,$D4 # d4 += h1*r3 + vpaddq $H2,$D3,$D3 # d3 += h0*r3 + vpmuludq $H3,$T3,$T4 # h3*s3 + vpmuludq $H4,$T3,$H2 # h4*s3 + vpsrldq \$6,$T1,$T3 + vpaddq $T4,$D1,$D1 # d1 += h3*s3 + vpaddq $H2,$D2,$D2 # d2 += h4*s3 + vpunpckhqdq $T1,$T0,$T4 # 4 + + vpmuludq $H3,$S4,$H3 # h3*s4 + vpmuludq $H4,$S4,$H4 # h4*s4 + vpunpcklqdq $T1,$T0,$T0 # 0:1 + vpaddq $H3,$D2,$H2 # h2 = d2 + h3*r4 + vpaddq $H4,$D3,$H3 # h3 = d3 + h4*r4 + vpunpcklqdq $T3,$T2,$T3 # 2:3 + vpmuludq `32*7-0x90`(%rax),$H0,$H4 # h0*r4 + vpmuludq $H1,$S4,$H0 # h1*s4 + vmovdqa 64(%rcx),$MASK # .Lmask26 + vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4 + vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4 + + ################################################################ + # lazy reduction (interleaved with tail of input splat) + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $D0,$D1,$H1 # h0 -> h1 + + vpsrlq \$26,$H4,$D4 + vpand $MASK,$H4,$H4 + + vpsrlq \$4,$T3,$T2 + + vpsrlq \$26,$H1,$D1 + vpand $MASK,$H1,$H1 + vpaddq $D1,$H2,$H2 # h1 -> h2 + + vpaddq $D4,$H0,$H0 + vpsllq \$2,$D4,$D4 + vpaddq $D4,$H0,$H0 # h4 -> h0 + + vpand $MASK,$T2,$T2 # 2 + vpsrlq \$26,$T0,$T1 + + vpsrlq \$26,$H2,$D2 + vpand $MASK,$H2,$H2 + vpaddq $D2,$H3,$H3 # h2 -> h3 + + vpaddq $T2,$H2,$H2 # modulo-scheduled + vpsrlq \$30,$T3,$T3 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpsrlq \$40,$T4,$T4 # 4 + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vpand $MASK,$T0,$T0 # 0 + vpand $MASK,$T1,$T1 # 1 + vpand $MASK,$T3,$T3 # 3 + vpor 32(%rcx),$T4,$T4 # padbit, yes, always + + sub \$64,$len + jnz .Loop_avx2$suffix + + .byte 0x66,0x90 +.Ltail_avx2$suffix: + ################################################################ + # while above multiplications were by r^4 in all lanes, in last + # iteration we multiply least significant lane by r^4 and most + # significant one by r, so copy of above except that references + # to the precomputed table are displaced by 4... + + #vpaddq $H2,$T2,$H2 # accumulate input + vpaddq $H0,$T0,$H0 + vmovdqu `32*0+4`(%rsp),$T0 # r0^4 + vpaddq $H1,$T1,$H1 + vmovdqu `32*1+4`(%rsp),$T1 # r1^4 + vpaddq $H3,$T3,$H3 + vmovdqu `32*3+4`(%rsp),$T2 # r2^4 + vpaddq $H4,$T4,$H4 + vmovdqu `32*6+4-0x90`(%rax),$T3 # s3^4 + vmovdqu `32*8+4-0x90`(%rax),$S4 # s4^4 + + vpmuludq $H2,$T0,$D2 # d2 = h2*r0 + vpmuludq $H2,$T1,$D3 # d3 = h2*r1 + vpmuludq $H2,$T2,$D4 # d4 = h2*r2 + vpmuludq $H2,$T3,$D0 # d0 = h2*s3 + vpmuludq $H2,$S4,$D1 # d1 = h2*s4 + + vpmuludq $H0,$T1,$T4 # h0*r1 + vpmuludq $H1,$T1,$H2 # h1*r1 + vpaddq $T4,$D1,$D1 # d1 += h0*r1 + vpaddq $H2,$D2,$D2 # d2 += h1*r1 + vpmuludq $H3,$T1,$T4 # h3*r1 + vpmuludq `32*2+4`(%rsp),$H4,$H2 # h4*s1 + vpaddq $T4,$D4,$D4 # d4 += h3*r1 + vpaddq $H2,$D0,$D0 # d0 += h4*s1 + + vpmuludq $H0,$T0,$T4 # h0*r0 + vpmuludq $H1,$T0,$H2 # h1*r0 + vpaddq $T4,$D0,$D0 # d0 += h0*r0 + vmovdqu `32*4+4-0x90`(%rax),$T1 # s2 + vpaddq $H2,$D1,$D1 # d1 += h1*r0 + vpmuludq $H3,$T0,$T4 # h3*r0 + vpmuludq $H4,$T0,$H2 # h4*r0 + vpaddq $T4,$D3,$D3 # d3 += h3*r0 + vpaddq $H2,$D4,$D4 # d4 += h4*r0 + + vpmuludq $H3,$T1,$T4 # h3*s2 + vpmuludq $H4,$T1,$H2 # h4*s2 + vpaddq $T4,$D0,$D0 # d0 += h3*s2 + vpaddq $H2,$D1,$D1 # d1 += h4*s2 + vmovdqu `32*5+4-0x90`(%rax),$H2 # r3 + vpmuludq $H1,$T2,$T4 # h1*r2 + vpmuludq $H0,$T2,$T2 # h0*r2 + vpaddq $T4,$D3,$D3 # d3 += h1*r2 + vpaddq $T2,$D2,$D2 # d2 += h0*r2 + + vpmuludq $H1,$H2,$T4 # h1*r3 + vpmuludq $H0,$H2,$H2 # h0*r3 + vpaddq $T4,$D4,$D4 # d4 += h1*r3 + vpaddq $H2,$D3,$D3 # d3 += h0*r3 + vpmuludq $H3,$T3,$T4 # h3*s3 + vpmuludq $H4,$T3,$H2 # h4*s3 + vpaddq $T4,$D1,$D1 # d1 += h3*s3 + vpaddq $H2,$D2,$D2 # d2 += h4*s3 + + vpmuludq $H3,$S4,$H3 # h3*s4 + vpmuludq $H4,$S4,$H4 # h4*s4 + vpaddq $H3,$D2,$H2 # h2 = d2 + h3*r4 + vpaddq $H4,$D3,$H3 # h3 = d3 + h4*r4 + vpmuludq `32*7+4-0x90`(%rax),$H0,$H4 # h0*r4 + vpmuludq $H1,$S4,$H0 # h1*s4 + vmovdqa 64(%rcx),$MASK # .Lmask26 + vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4 + vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4 + + ################################################################ + # horizontal addition + + vpsrldq \$8,$D1,$T1 + vpsrldq \$8,$H2,$T2 + vpsrldq \$8,$H3,$T3 + vpsrldq \$8,$H4,$T4 + vpsrldq \$8,$H0,$T0 + vpaddq $T1,$D1,$D1 + vpaddq $T2,$H2,$H2 + vpaddq $T3,$H3,$H3 + vpaddq $T4,$H4,$H4 + vpaddq $T0,$H0,$H0 + + vpermq \$0x2,$H3,$T3 + vpermq \$0x2,$H4,$T4 + vpermq \$0x2,$H0,$T0 + vpermq \$0x2,$D1,$T1 + vpermq \$0x2,$H2,$T2 + vpaddq $T3,$H3,$H3 + vpaddq $T4,$H4,$H4 + vpaddq $T0,$H0,$H0 + vpaddq $T1,$D1,$D1 + vpaddq $T2,$H2,$H2 + + ################################################################ + # lazy reduction + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $D0,$D1,$H1 # h0 -> h1 + + vpsrlq \$26,$H4,$D4 + vpand $MASK,$H4,$H4 + + vpsrlq \$26,$H1,$D1 + vpand $MASK,$H1,$H1 + vpaddq $D1,$H2,$H2 # h1 -> h2 + + vpaddq $D4,$H0,$H0 + vpsllq \$2,$D4,$D4 + vpaddq $D4,$H0,$H0 # h4 -> h0 + + vpsrlq \$26,$H2,$D2 + vpand $MASK,$H2,$H2 + vpaddq $D2,$H3,$H3 # h2 -> h3 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced + vmovd %x#$H1,`4*1-48-64`($ctx) + vmovd %x#$H2,`4*2-48-64`($ctx) + vmovd %x#$H3,`4*3-48-64`($ctx) + vmovd %x#$H4,`4*4-48-64`($ctx) +___ +$code.=<<___ if ($win64); + vmovdqa -0xb0(%r10),%xmm6 + vmovdqa -0xa0(%r10),%xmm7 + vmovdqa -0x90(%r10),%xmm8 + vmovdqa -0x80(%r10),%xmm9 + vmovdqa -0x70(%r10),%xmm10 + vmovdqa -0x60(%r10),%xmm11 + vmovdqa -0x50(%r10),%xmm12 + vmovdqa -0x40(%r10),%xmm13 + vmovdqa -0x30(%r10),%xmm14 + vmovdqa -0x20(%r10),%xmm15 + lea -8(%r10),%rsp +.Ldo_avx2_epilogue$suffix: +___ +$code.=<<___ if (!$win64); + lea -8(%r10),%rsp +.cfi_def_cfa_register %rsp +___ +$code.=<<___; + vzeroupper + ret +.cfi_endproc +___ +if($avx > 2 && $avx512) { +my ($R0,$R1,$R2,$R3,$R4, $S1,$S2,$S3,$S4) = map("%zmm$_",(16..24)); +my ($M0,$M1,$M2,$M3,$M4) = map("%zmm$_",(25..29)); +my $PADBIT="%zmm30"; + +map(s/%y/%z/,($T4,$T0,$T1,$T2,$T3)); # switch to %zmm domain +map(s/%y/%z/,($D0,$D1,$D2,$D3,$D4)); +map(s/%y/%z/,($H0,$H1,$H2,$H3,$H4)); +map(s/%y/%z/,($MASK)); + +$code.=<<___; +.cfi_startproc +.Lblocks_avx512: + mov \$15,%eax + kmovw %eax,%k2 +___ +$code.=<<___ if (!$win64); + lea 8(%rsp),%r10 +.cfi_def_cfa_register %r10 + sub \$0x128,%rsp +___ +$code.=<<___ if ($win64); + lea 8(%rsp),%r10 + sub \$0x1c8,%rsp + vmovdqa %xmm6,-0xb0(%r10) + vmovdqa %xmm7,-0xa0(%r10) + vmovdqa %xmm8,-0x90(%r10) + vmovdqa %xmm9,-0x80(%r10) + vmovdqa %xmm10,-0x70(%r10) + vmovdqa %xmm11,-0x60(%r10) + vmovdqa %xmm12,-0x50(%r10) + vmovdqa %xmm13,-0x40(%r10) + vmovdqa %xmm14,-0x30(%r10) + vmovdqa %xmm15,-0x20(%r10) +.Ldo_avx512_body: +___ +$code.=<<___; + lea .Lconst(%rip),%rcx + lea 48+64($ctx),$ctx # size optimization + vmovdqa 96(%rcx),%y#$T2 # .Lpermd_avx2 + + # expand pre-calculated table + vmovdqu `16*0-64`($ctx),%x#$D0 # will become expanded ${R0} + and \$-512,%rsp + vmovdqu `16*1-64`($ctx),%x#$D1 # will become ... ${R1} + mov \$0x20,%rax + vmovdqu `16*2-64`($ctx),%x#$T0 # ... ${S1} + vmovdqu `16*3-64`($ctx),%x#$D2 # ... ${R2} + vmovdqu `16*4-64`($ctx),%x#$T1 # ... ${S2} + vmovdqu `16*5-64`($ctx),%x#$D3 # ... ${R3} + vmovdqu `16*6-64`($ctx),%x#$T3 # ... ${S3} + vmovdqu `16*7-64`($ctx),%x#$D4 # ... ${R4} + vmovdqu `16*8-64`($ctx),%x#$T4 # ... ${S4} + vpermd $D0,$T2,$R0 # 00003412 -> 14243444 + vpbroadcastq 64(%rcx),$MASK # .Lmask26 + vpermd $D1,$T2,$R1 + vpermd $T0,$T2,$S1 + vpermd $D2,$T2,$R2 + vmovdqa64 $R0,0x00(%rsp){%k2} # save in case $len%128 != 0 + vpsrlq \$32,$R0,$T0 # 14243444 -> 01020304 + vpermd $T1,$T2,$S2 + vmovdqu64 $R1,0x00(%rsp,%rax){%k2} + vpsrlq \$32,$R1,$T1 + vpermd $D3,$T2,$R3 + vmovdqa64 $S1,0x40(%rsp){%k2} + vpermd $T3,$T2,$S3 + vpermd $D4,$T2,$R4 + vmovdqu64 $R2,0x40(%rsp,%rax){%k2} + vpermd $T4,$T2,$S4 + vmovdqa64 $S2,0x80(%rsp){%k2} + vmovdqu64 $R3,0x80(%rsp,%rax){%k2} + vmovdqa64 $S3,0xc0(%rsp){%k2} + vmovdqu64 $R4,0xc0(%rsp,%rax){%k2} + vmovdqa64 $S4,0x100(%rsp){%k2} + + ################################################################ + # calculate 5th through 8th powers of the key + # + # d0 = r0'*r0 + r1'*5*r4 + r2'*5*r3 + r3'*5*r2 + r4'*5*r1 + # d1 = r0'*r1 + r1'*r0 + r2'*5*r4 + r3'*5*r3 + r4'*5*r2 + # d2 = r0'*r2 + r1'*r1 + r2'*r0 + r3'*5*r4 + r4'*5*r3 + # d3 = r0'*r3 + r1'*r2 + r2'*r1 + r3'*r0 + r4'*5*r4 + # d4 = r0'*r4 + r1'*r3 + r2'*r2 + r3'*r1 + r4'*r0 + + vpmuludq $T0,$R0,$D0 # d0 = r0'*r0 + vpmuludq $T0,$R1,$D1 # d1 = r0'*r1 + vpmuludq $T0,$R2,$D2 # d2 = r0'*r2 + vpmuludq $T0,$R3,$D3 # d3 = r0'*r3 + vpmuludq $T0,$R4,$D4 # d4 = r0'*r4 + vpsrlq \$32,$R2,$T2 + + vpmuludq $T1,$S4,$M0 + vpmuludq $T1,$R0,$M1 + vpmuludq $T1,$R1,$M2 + vpmuludq $T1,$R2,$M3 + vpmuludq $T1,$R3,$M4 + vpsrlq \$32,$R3,$T3 + vpaddq $M0,$D0,$D0 # d0 += r1'*5*r4 + vpaddq $M1,$D1,$D1 # d1 += r1'*r0 + vpaddq $M2,$D2,$D2 # d2 += r1'*r1 + vpaddq $M3,$D3,$D3 # d3 += r1'*r2 + vpaddq $M4,$D4,$D4 # d4 += r1'*r3 + + vpmuludq $T2,$S3,$M0 + vpmuludq $T2,$S4,$M1 + vpmuludq $T2,$R1,$M3 + vpmuludq $T2,$R2,$M4 + vpmuludq $T2,$R0,$M2 + vpsrlq \$32,$R4,$T4 + vpaddq $M0,$D0,$D0 # d0 += r2'*5*r3 + vpaddq $M1,$D1,$D1 # d1 += r2'*5*r4 + vpaddq $M3,$D3,$D3 # d3 += r2'*r1 + vpaddq $M4,$D4,$D4 # d4 += r2'*r2 + vpaddq $M2,$D2,$D2 # d2 += r2'*r0 + + vpmuludq $T3,$S2,$M0 + vpmuludq $T3,$R0,$M3 + vpmuludq $T3,$R1,$M4 + vpmuludq $T3,$S3,$M1 + vpmuludq $T3,$S4,$M2 + vpaddq $M0,$D0,$D0 # d0 += r3'*5*r2 + vpaddq $M3,$D3,$D3 # d3 += r3'*r0 + vpaddq $M4,$D4,$D4 # d4 += r3'*r1 + vpaddq $M1,$D1,$D1 # d1 += r3'*5*r3 + vpaddq $M2,$D2,$D2 # d2 += r3'*5*r4 + + vpmuludq $T4,$S4,$M3 + vpmuludq $T4,$R0,$M4 + vpmuludq $T4,$S1,$M0 + vpmuludq $T4,$S2,$M1 + vpmuludq $T4,$S3,$M2 + vpaddq $M3,$D3,$D3 # d3 += r2'*5*r4 + vpaddq $M4,$D4,$D4 # d4 += r2'*r0 + vpaddq $M0,$D0,$D0 # d0 += r2'*5*r1 + vpaddq $M1,$D1,$D1 # d1 += r2'*5*r2 + vpaddq $M2,$D2,$D2 # d2 += r2'*5*r3 + + ################################################################ + # load input + vmovdqu64 16*0($inp),%z#$T3 + vmovdqu64 16*4($inp),%z#$T4 + lea 16*8($inp),$inp + + ################################################################ + # lazy reduction + + vpsrlq \$26,$D3,$M3 + vpandq $MASK,$D3,$D3 + vpaddq $M3,$D4,$D4 # d3 -> d4 + + vpsrlq \$26,$D0,$M0 + vpandq $MASK,$D0,$D0 + vpaddq $M0,$D1,$D1 # d0 -> d1 + + vpsrlq \$26,$D4,$M4 + vpandq $MASK,$D4,$D4 + + vpsrlq \$26,$D1,$M1 + vpandq $MASK,$D1,$D1 + vpaddq $M1,$D2,$D2 # d1 -> d2 + + vpaddq $M4,$D0,$D0 + vpsllq \$2,$M4,$M4 + vpaddq $M4,$D0,$D0 # d4 -> d0 + + vpsrlq \$26,$D2,$M2 + vpandq $MASK,$D2,$D2 + vpaddq $M2,$D3,$D3 # d2 -> d3 + + vpsrlq \$26,$D0,$M0 + vpandq $MASK,$D0,$D0 + vpaddq $M0,$D1,$D1 # d0 -> d1 + + vpsrlq \$26,$D3,$M3 + vpandq $MASK,$D3,$D3 + vpaddq $M3,$D4,$D4 # d3 -> d4 + + ################################################################ + # at this point we have 14243444 in $R0-$S4 and 05060708 in + # $D0-$D4, ... + + vpunpcklqdq $T4,$T3,$T0 # transpose input + vpunpckhqdq $T4,$T3,$T4 + + # ... since input 64-bit lanes are ordered as 73625140, we could + # "vperm" it to 76543210 (here and in each loop iteration), *or* + # we could just flow along, hence the goal for $R0-$S4 is + # 1858286838784888 ... + + vmovdqa32 128(%rcx),$M0 # .Lpermd_avx512: + mov \$0x7777,%eax + kmovw %eax,%k1 + + vpermd $R0,$M0,$R0 # 14243444 -> 1---2---3---4--- + vpermd $R1,$M0,$R1 + vpermd $R2,$M0,$R2 + vpermd $R3,$M0,$R3 + vpermd $R4,$M0,$R4 + + vpermd $D0,$M0,${R0}{%k1} # 05060708 -> 1858286838784888 + vpermd $D1,$M0,${R1}{%k1} + vpermd $D2,$M0,${R2}{%k1} + vpermd $D3,$M0,${R3}{%k1} + vpermd $D4,$M0,${R4}{%k1} + + vpslld \$2,$R1,$S1 # *5 + vpslld \$2,$R2,$S2 + vpslld \$2,$R3,$S3 + vpslld \$2,$R4,$S4 + vpaddd $R1,$S1,$S1 + vpaddd $R2,$S2,$S2 + vpaddd $R3,$S3,$S3 + vpaddd $R4,$S4,$S4 + + vpbroadcastq 32(%rcx),$PADBIT # .L129 + + vpsrlq \$52,$T0,$T2 # splat input + vpsllq \$12,$T4,$T3 + vporq $T3,$T2,$T2 + vpsrlq \$26,$T0,$T1 + vpsrlq \$14,$T4,$T3 + vpsrlq \$40,$T4,$T4 # 4 + vpandq $MASK,$T2,$T2 # 2 + vpandq $MASK,$T0,$T0 # 0 + #vpandq $MASK,$T1,$T1 # 1 + #vpandq $MASK,$T3,$T3 # 3 + #vporq $PADBIT,$T4,$T4 # padbit, yes, always + + vpaddq $H2,$T2,$H2 # accumulate input + sub \$192,$len + jbe .Ltail_avx512 + jmp .Loop_avx512 + +.align 32 +.Loop_avx512: + ################################################################ + # ((inp[0]*r^8+inp[ 8])*r^8+inp[16])*r^8 + # ((inp[1]*r^8+inp[ 9])*r^8+inp[17])*r^7 + # ((inp[2]*r^8+inp[10])*r^8+inp[18])*r^6 + # ((inp[3]*r^8+inp[11])*r^8+inp[19])*r^5 + # ((inp[4]*r^8+inp[12])*r^8+inp[20])*r^4 + # ((inp[5]*r^8+inp[13])*r^8+inp[21])*r^3 + # ((inp[6]*r^8+inp[14])*r^8+inp[22])*r^2 + # ((inp[7]*r^8+inp[15])*r^8+inp[23])*r^1 + # \________/\___________/ + ################################################################ + #vpaddq $H2,$T2,$H2 # accumulate input + + # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + # + # however, as h2 is "chronologically" first one available pull + # corresponding operations up, so it's + # + # d3 = h2*r1 + h0*r3 + h1*r2 + h3*r0 + h4*5*r4 + # d4 = h2*r2 + h0*r4 + h1*r3 + h3*r1 + h4*r0 + # d0 = h2*5*r3 + h0*r0 + h1*5*r4 + h3*5*r2 + h4*5*r1 + # d1 = h2*5*r4 + h0*r1 + h1*r0 + h3*5*r3 + h4*5*r2 + # d2 = h2*r0 + h0*r2 + h1*r1 + h3*5*r4 + h4*5*r3 + + vpmuludq $H2,$R1,$D3 # d3 = h2*r1 + vpaddq $H0,$T0,$H0 + vpmuludq $H2,$R2,$D4 # d4 = h2*r2 + vpandq $MASK,$T1,$T1 # 1 + vpmuludq $H2,$S3,$D0 # d0 = h2*s3 + vpandq $MASK,$T3,$T3 # 3 + vpmuludq $H2,$S4,$D1 # d1 = h2*s4 + vporq $PADBIT,$T4,$T4 # padbit, yes, always + vpmuludq $H2,$R0,$D2 # d2 = h2*r0 + vpaddq $H1,$T1,$H1 # accumulate input + vpaddq $H3,$T3,$H3 + vpaddq $H4,$T4,$H4 + + vmovdqu64 16*0($inp),$T3 # load input + vmovdqu64 16*4($inp),$T4 + lea 16*8($inp),$inp + vpmuludq $H0,$R3,$M3 + vpmuludq $H0,$R4,$M4 + vpmuludq $H0,$R0,$M0 + vpmuludq $H0,$R1,$M1 + vpaddq $M3,$D3,$D3 # d3 += h0*r3 + vpaddq $M4,$D4,$D4 # d4 += h0*r4 + vpaddq $M0,$D0,$D0 # d0 += h0*r0 + vpaddq $M1,$D1,$D1 # d1 += h0*r1 + + vpmuludq $H1,$R2,$M3 + vpmuludq $H1,$R3,$M4 + vpmuludq $H1,$S4,$M0 + vpmuludq $H0,$R2,$M2 + vpaddq $M3,$D3,$D3 # d3 += h1*r2 + vpaddq $M4,$D4,$D4 # d4 += h1*r3 + vpaddq $M0,$D0,$D0 # d0 += h1*s4 + vpaddq $M2,$D2,$D2 # d2 += h0*r2 + + vpunpcklqdq $T4,$T3,$T0 # transpose input + vpunpckhqdq $T4,$T3,$T4 + + vpmuludq $H3,$R0,$M3 + vpmuludq $H3,$R1,$M4 + vpmuludq $H1,$R0,$M1 + vpmuludq $H1,$R1,$M2 + vpaddq $M3,$D3,$D3 # d3 += h3*r0 + vpaddq $M4,$D4,$D4 # d4 += h3*r1 + vpaddq $M1,$D1,$D1 # d1 += h1*r0 + vpaddq $M2,$D2,$D2 # d2 += h1*r1 + + vpmuludq $H4,$S4,$M3 + vpmuludq $H4,$R0,$M4 + vpmuludq $H3,$S2,$M0 + vpmuludq $H3,$S3,$M1 + vpaddq $M3,$D3,$D3 # d3 += h4*s4 + vpmuludq $H3,$S4,$M2 + vpaddq $M4,$D4,$D4 # d4 += h4*r0 + vpaddq $M0,$D0,$D0 # d0 += h3*s2 + vpaddq $M1,$D1,$D1 # d1 += h3*s3 + vpaddq $M2,$D2,$D2 # d2 += h3*s4 + + vpmuludq $H4,$S1,$M0 + vpmuludq $H4,$S2,$M1 + vpmuludq $H4,$S3,$M2 + vpaddq $M0,$D0,$H0 # h0 = d0 + h4*s1 + vpaddq $M1,$D1,$H1 # h1 = d2 + h4*s2 + vpaddq $M2,$D2,$H2 # h2 = d3 + h4*s3 + + ################################################################ + # lazy reduction (interleaved with input splat) + + vpsrlq \$52,$T0,$T2 # splat input + vpsllq \$12,$T4,$T3 + + vpsrlq \$26,$D3,$H3 + vpandq $MASK,$D3,$D3 + vpaddq $H3,$D4,$H4 # h3 -> h4 + + vporq $T3,$T2,$T2 + + vpsrlq \$26,$H0,$D0 + vpandq $MASK,$H0,$H0 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpandq $MASK,$T2,$T2 # 2 + + vpsrlq \$26,$H4,$D4 + vpandq $MASK,$H4,$H4 + + vpsrlq \$26,$H1,$D1 + vpandq $MASK,$H1,$H1 + vpaddq $D1,$H2,$H2 # h1 -> h2 + + vpaddq $D4,$H0,$H0 + vpsllq \$2,$D4,$D4 + vpaddq $D4,$H0,$H0 # h4 -> h0 + + vpaddq $T2,$H2,$H2 # modulo-scheduled + vpsrlq \$26,$T0,$T1 + + vpsrlq \$26,$H2,$D2 + vpandq $MASK,$H2,$H2 + vpaddq $D2,$D3,$H3 # h2 -> h3 + + vpsrlq \$14,$T4,$T3 + + vpsrlq \$26,$H0,$D0 + vpandq $MASK,$H0,$H0 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpsrlq \$40,$T4,$T4 # 4 + + vpsrlq \$26,$H3,$D3 + vpandq $MASK,$H3,$H3 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vpandq $MASK,$T0,$T0 # 0 + #vpandq $MASK,$T1,$T1 # 1 + #vpandq $MASK,$T3,$T3 # 3 + #vporq $PADBIT,$T4,$T4 # padbit, yes, always + + sub \$128,$len + ja .Loop_avx512 + +.Ltail_avx512: + ################################################################ + # while above multiplications were by r^8 in all lanes, in last + # iteration we multiply least significant lane by r^8 and most + # significant one by r, that's why table gets shifted... + + vpsrlq \$32,$R0,$R0 # 0105020603070408 + vpsrlq \$32,$R1,$R1 + vpsrlq \$32,$R2,$R2 + vpsrlq \$32,$S3,$S3 + vpsrlq \$32,$S4,$S4 + vpsrlq \$32,$R3,$R3 + vpsrlq \$32,$R4,$R4 + vpsrlq \$32,$S1,$S1 + vpsrlq \$32,$S2,$S2 + + ################################################################ + # load either next or last 64 byte of input + lea ($inp,$len),$inp + + #vpaddq $H2,$T2,$H2 # accumulate input + vpaddq $H0,$T0,$H0 + + vpmuludq $H2,$R1,$D3 # d3 = h2*r1 + vpmuludq $H2,$R2,$D4 # d4 = h2*r2 + vpmuludq $H2,$S3,$D0 # d0 = h2*s3 + vpandq $MASK,$T1,$T1 # 1 + vpmuludq $H2,$S4,$D1 # d1 = h2*s4 + vpandq $MASK,$T3,$T3 # 3 + vpmuludq $H2,$R0,$D2 # d2 = h2*r0 + vporq $PADBIT,$T4,$T4 # padbit, yes, always + vpaddq $H1,$T1,$H1 # accumulate input + vpaddq $H3,$T3,$H3 + vpaddq $H4,$T4,$H4 + + vmovdqu 16*0($inp),%x#$T0 + vpmuludq $H0,$R3,$M3 + vpmuludq $H0,$R4,$M4 + vpmuludq $H0,$R0,$M0 + vpmuludq $H0,$R1,$M1 + vpaddq $M3,$D3,$D3 # d3 += h0*r3 + vpaddq $M4,$D4,$D4 # d4 += h0*r4 + vpaddq $M0,$D0,$D0 # d0 += h0*r0 + vpaddq $M1,$D1,$D1 # d1 += h0*r1 + + vmovdqu 16*1($inp),%x#$T1 + vpmuludq $H1,$R2,$M3 + vpmuludq $H1,$R3,$M4 + vpmuludq $H1,$S4,$M0 + vpmuludq $H0,$R2,$M2 + vpaddq $M3,$D3,$D3 # d3 += h1*r2 + vpaddq $M4,$D4,$D4 # d4 += h1*r3 + vpaddq $M0,$D0,$D0 # d0 += h1*s4 + vpaddq $M2,$D2,$D2 # d2 += h0*r2 + + vinserti128 \$1,16*2($inp),%y#$T0,%y#$T0 + vpmuludq $H3,$R0,$M3 + vpmuludq $H3,$R1,$M4 + vpmuludq $H1,$R0,$M1 + vpmuludq $H1,$R1,$M2 + vpaddq $M3,$D3,$D3 # d3 += h3*r0 + vpaddq $M4,$D4,$D4 # d4 += h3*r1 + vpaddq $M1,$D1,$D1 # d1 += h1*r0 + vpaddq $M2,$D2,$D2 # d2 += h1*r1 + + vinserti128 \$1,16*3($inp),%y#$T1,%y#$T1 + vpmuludq $H4,$S4,$M3 + vpmuludq $H4,$R0,$M4 + vpmuludq $H3,$S2,$M0 + vpmuludq $H3,$S3,$M1 + vpmuludq $H3,$S4,$M2 + vpaddq $M3,$D3,$H3 # h3 = d3 + h4*s4 + vpaddq $M4,$D4,$D4 # d4 += h4*r0 + vpaddq $M0,$D0,$D0 # d0 += h3*s2 + vpaddq $M1,$D1,$D1 # d1 += h3*s3 + vpaddq $M2,$D2,$D2 # d2 += h3*s4 + + vpmuludq $H4,$S1,$M0 + vpmuludq $H4,$S2,$M1 + vpmuludq $H4,$S3,$M2 + vpaddq $M0,$D0,$H0 # h0 = d0 + h4*s1 + vpaddq $M1,$D1,$H1 # h1 = d2 + h4*s2 + vpaddq $M2,$D2,$H2 # h2 = d3 + h4*s3 + + ################################################################ + # horizontal addition + + mov \$1,%eax + vpermq \$0xb1,$H3,$D3 + vpermq \$0xb1,$D4,$H4 + vpermq \$0xb1,$H0,$D0 + vpermq \$0xb1,$H1,$D1 + vpermq \$0xb1,$H2,$D2 + vpaddq $D3,$H3,$H3 + vpaddq $D4,$H4,$H4 + vpaddq $D0,$H0,$H0 + vpaddq $D1,$H1,$H1 + vpaddq $D2,$H2,$H2 + + kmovw %eax,%k3 + vpermq \$0x2,$H3,$D3 + vpermq \$0x2,$H4,$D4 + vpermq \$0x2,$H0,$D0 + vpermq \$0x2,$H1,$D1 + vpermq \$0x2,$H2,$D2 + vpaddq $D3,$H3,$H3 + vpaddq $D4,$H4,$H4 + vpaddq $D0,$H0,$H0 + vpaddq $D1,$H1,$H1 + vpaddq $D2,$H2,$H2 + + vextracti64x4 \$0x1,$H3,%y#$D3 + vextracti64x4 \$0x1,$H4,%y#$D4 + vextracti64x4 \$0x1,$H0,%y#$D0 + vextracti64x4 \$0x1,$H1,%y#$D1 + vextracti64x4 \$0x1,$H2,%y#$D2 + vpaddq $D3,$H3,${H3}{%k3}{z} # keep single qword in case + vpaddq $D4,$H4,${H4}{%k3}{z} # it's passed to .Ltail_avx2 + vpaddq $D0,$H0,${H0}{%k3}{z} + vpaddq $D1,$H1,${H1}{%k3}{z} + vpaddq $D2,$H2,${H2}{%k3}{z} +___ +map(s/%z/%y/,($T0,$T1,$T2,$T3,$T4, $PADBIT)); +map(s/%z/%y/,($H0,$H1,$H2,$H3,$H4, $D0,$D1,$D2,$D3,$D4, $MASK)); +$code.=<<___; + ################################################################ + # lazy reduction (interleaved with input splat) + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpsrldq \$6,$T0,$T2 # splat input + vpsrldq \$6,$T1,$T3 + vpunpckhqdq $T1,$T0,$T4 # 4 + vpaddq $D3,$H4,$H4 # h3 -> h4 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpunpcklqdq $T3,$T2,$T2 # 2:3 + vpunpcklqdq $T1,$T0,$T0 # 0:1 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpsrlq \$26,$H4,$D4 + vpand $MASK,$H4,$H4 + + vpsrlq \$26,$H1,$D1 + vpand $MASK,$H1,$H1 + vpsrlq \$30,$T2,$T3 + vpsrlq \$4,$T2,$T2 + vpaddq $D1,$H2,$H2 # h1 -> h2 + + vpaddq $D4,$H0,$H0 + vpsllq \$2,$D4,$D4 + vpsrlq \$26,$T0,$T1 + vpsrlq \$40,$T4,$T4 # 4 + vpaddq $D4,$H0,$H0 # h4 -> h0 + + vpsrlq \$26,$H2,$D2 + vpand $MASK,$H2,$H2 + vpand $MASK,$T2,$T2 # 2 + vpand $MASK,$T0,$T0 # 0 + vpaddq $D2,$H3,$H3 # h2 -> h3 + + vpsrlq \$26,$H0,$D0 + vpand $MASK,$H0,$H0 + vpaddq $H2,$T2,$H2 # accumulate input for .Ltail_avx2 + vpand $MASK,$T1,$T1 # 1 + vpaddq $D0,$H1,$H1 # h0 -> h1 + + vpsrlq \$26,$H3,$D3 + vpand $MASK,$H3,$H3 + vpand $MASK,$T3,$T3 # 3 + vpor 32(%rcx),$T4,$T4 # padbit, yes, always + vpaddq $D3,$H4,$H4 # h3 -> h4 + + lea 0x90(%rsp),%rax # size optimization for .Ltail_avx2 + add \$64,$len + jnz .Ltail_avx2$suffix + + vpsubq $T2,$H2,$H2 # undo input accumulation + vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced + vmovd %x#$H1,`4*1-48-64`($ctx) + vmovd %x#$H2,`4*2-48-64`($ctx) + vmovd %x#$H3,`4*3-48-64`($ctx) + vmovd %x#$H4,`4*4-48-64`($ctx) + vzeroall +___ +$code.=<<___ if ($win64); + movdqa -0xb0(%r10),%xmm6 + movdqa -0xa0(%r10),%xmm7 + movdqa -0x90(%r10),%xmm8 + movdqa -0x80(%r10),%xmm9 + movdqa -0x70(%r10),%xmm10 + movdqa -0x60(%r10),%xmm11 + movdqa -0x50(%r10),%xmm12 + movdqa -0x40(%r10),%xmm13 + movdqa -0x30(%r10),%xmm14 + movdqa -0x20(%r10),%xmm15 + lea -8(%r10),%rsp +.Ldo_avx512_epilogue: +___ +$code.=<<___ if (!$win64); + lea -8(%r10),%rsp +.cfi_def_cfa_register %rsp +___ +$code.=<<___; + ret +.cfi_endproc +___ + +} + +} + +&declare_function("poly1305_blocks_avx2", 32, 4); +poly1305_blocks_avxN(0); +&end_function("poly1305_blocks_avx2"); + +if($kernel) { + $code .= "#endif\n"; +} + +####################################################################### +if ($avx>2) { +# On entry we have input length divisible by 64. But since inner loop +# processes 128 bytes per iteration, cases when length is not divisible +# by 128 are handled by passing tail 64 bytes to .Ltail_avx2. For this +# reason stack layout is kept identical to poly1305_blocks_avx2. If not +# for this tail, we wouldn't have to even allocate stack frame... + +if($kernel) { + $code .= "#ifdef CONFIG_AS_AVX512\n"; +} + +&declare_function("poly1305_blocks_avx512", 32, 4); +poly1305_blocks_avxN(1); +&end_function("poly1305_blocks_avx512"); + +if ($kernel) { + $code .= "#endif\n"; +} + +if (!$kernel && $avx>3) { +######################################################################## +# VPMADD52 version using 2^44 radix. +# +# One can argue that base 2^52 would be more natural. Well, even though +# some operations would be more natural, one has to recognize couple of +# things. Base 2^52 doesn't provide advantage over base 2^44 if you look +# at amount of multiply-n-accumulate operations. Secondly, it makes it +# impossible to pre-compute multiples of 5 [referred to as s[]/sN in +# reference implementations], which means that more such operations +# would have to be performed in inner loop, which in turn makes critical +# path longer. In other words, even though base 2^44 reduction might +# look less elegant, overall critical path is actually shorter... + +######################################################################## +# Layout of opaque area is following. +# +# unsigned __int64 h[3]; # current hash value base 2^44 +# unsigned __int64 s[2]; # key value*20 base 2^44 +# unsigned __int64 r[3]; # key value base 2^44 +# struct { unsigned __int64 r^1, r^3, r^2, r^4; } R[4]; +# # r^n positions reflect +# # placement in register, not +# # memory, R[3] is R[1]*20 + +$code.=<<___; +.type poly1305_init_base2_44,\@function,3 +.align 32 +poly1305_init_base2_44: + xor %rax,%rax + mov %rax,0($ctx) # initialize hash value + mov %rax,8($ctx) + mov %rax,16($ctx) + +.Linit_base2_44: + lea poly1305_blocks_vpmadd52(%rip),%r10 + lea poly1305_emit_base2_44(%rip),%r11 + + mov \$0x0ffffffc0fffffff,%rax + mov \$0x0ffffffc0ffffffc,%rcx + and 0($inp),%rax + mov \$0x00000fffffffffff,%r8 + and 8($inp),%rcx + mov \$0x00000fffffffffff,%r9 + and %rax,%r8 + shrd \$44,%rcx,%rax + mov %r8,40($ctx) # r0 + and %r9,%rax + shr \$24,%rcx + mov %rax,48($ctx) # r1 + lea (%rax,%rax,4),%rax # *5 + mov %rcx,56($ctx) # r2 + shl \$2,%rax # magic <<2 + lea (%rcx,%rcx,4),%rcx # *5 + shl \$2,%rcx # magic <<2 + mov %rax,24($ctx) # s1 + mov %rcx,32($ctx) # s2 + movq \$-1,64($ctx) # write impossible value +___ +$code.=<<___ if ($flavour !~ /elf32/); + mov %r10,0(%rdx) + mov %r11,8(%rdx) +___ +$code.=<<___ if ($flavour =~ /elf32/); + mov %r10d,0(%rdx) + mov %r11d,4(%rdx) +___ +$code.=<<___; + mov \$1,%eax + ret +.size poly1305_init_base2_44,.-poly1305_init_base2_44 +___ +{ +my ($H0,$H1,$H2,$r2r1r0,$r1r0s2,$r0s2s1,$Dlo,$Dhi) = map("%ymm$_",(0..5,16,17)); +my ($T0,$inp_permd,$inp_shift,$PAD) = map("%ymm$_",(18..21)); +my ($reduc_mask,$reduc_rght,$reduc_left) = map("%ymm$_",(22..25)); + +$code.=<<___; +.type poly1305_blocks_vpmadd52,\@function,4 +.align 32 +poly1305_blocks_vpmadd52: + shr \$4,$len + jz .Lno_data_vpmadd52 # too short + + shl \$40,$padbit + mov 64($ctx),%r8 # peek on power of the key + + # if powers of the key are not calculated yet, process up to 3 + # blocks with this single-block subroutine, otherwise ensure that + # length is divisible by 2 blocks and pass the rest down to next + # subroutine... + + mov \$3,%rax + mov \$1,%r10 + cmp \$4,$len # is input long + cmovae %r10,%rax + test %r8,%r8 # is power value impossible? + cmovns %r10,%rax + + and $len,%rax # is input of favourable length? + jz .Lblocks_vpmadd52_4x + + sub %rax,$len + mov \$7,%r10d + mov \$1,%r11d + kmovw %r10d,%k7 + lea .L2_44_inp_permd(%rip),%r10 + kmovw %r11d,%k1 + + vmovq $padbit,%x#$PAD + vmovdqa64 0(%r10),$inp_permd # .L2_44_inp_permd + vmovdqa64 32(%r10),$inp_shift # .L2_44_inp_shift + vpermq \$0xcf,$PAD,$PAD + vmovdqa64 64(%r10),$reduc_mask # .L2_44_mask + + vmovdqu64 0($ctx),${Dlo}{%k7}{z} # load hash value + vmovdqu64 40($ctx),${r2r1r0}{%k7}{z} # load keys + vmovdqu64 32($ctx),${r1r0s2}{%k7}{z} + vmovdqu64 24($ctx),${r0s2s1}{%k7}{z} + + vmovdqa64 96(%r10),$reduc_rght # .L2_44_shift_rgt + vmovdqa64 128(%r10),$reduc_left # .L2_44_shift_lft + + jmp .Loop_vpmadd52 + +.align 32 +.Loop_vpmadd52: + vmovdqu32 0($inp),%x#$T0 # load input as ----3210 + lea 16($inp),$inp + + vpermd $T0,$inp_permd,$T0 # ----3210 -> --322110 + vpsrlvq $inp_shift,$T0,$T0 + vpandq $reduc_mask,$T0,$T0 + vporq $PAD,$T0,$T0 + + vpaddq $T0,$Dlo,$Dlo # accumulate input + + vpermq \$0,$Dlo,${H0}{%k7}{z} # smash hash value + vpermq \$0b01010101,$Dlo,${H1}{%k7}{z} + vpermq \$0b10101010,$Dlo,${H2}{%k7}{z} + + vpxord $Dlo,$Dlo,$Dlo + vpxord $Dhi,$Dhi,$Dhi + + vpmadd52luq $r2r1r0,$H0,$Dlo + vpmadd52huq $r2r1r0,$H0,$Dhi + + vpmadd52luq $r1r0s2,$H1,$Dlo + vpmadd52huq $r1r0s2,$H1,$Dhi + + vpmadd52luq $r0s2s1,$H2,$Dlo + vpmadd52huq $r0s2s1,$H2,$Dhi + + vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost qword + vpsllvq $reduc_left,$Dhi,$Dhi # 0 in topmost qword + vpandq $reduc_mask,$Dlo,$Dlo + + vpaddq $T0,$Dhi,$Dhi + + vpermq \$0b10010011,$Dhi,$Dhi # 0 in lowest qword + + vpaddq $Dhi,$Dlo,$Dlo # note topmost qword :-) + + vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost word + vpandq $reduc_mask,$Dlo,$Dlo + + vpermq \$0b10010011,$T0,$T0 + + vpaddq $T0,$Dlo,$Dlo + + vpermq \$0b10010011,$Dlo,${T0}{%k1}{z} + + vpaddq $T0,$Dlo,$Dlo + vpsllq \$2,$T0,$T0 + + vpaddq $T0,$Dlo,$Dlo + + dec %rax # len-=16 + jnz .Loop_vpmadd52 + + vmovdqu64 $Dlo,0($ctx){%k7} # store hash value + + test $len,$len + jnz .Lblocks_vpmadd52_4x + +.Lno_data_vpmadd52: + ret +.size poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52 +___ +} +{ +######################################################################## +# As implied by its name 4x subroutine processes 4 blocks in parallel +# (but handles even 4*n+2 blocks lengths). It takes up to 4th key power +# and is handled in 256-bit %ymm registers. + +my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17)); +my ($D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi) = map("%ymm$_",(18..23)); +my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31)); + +$code.=<<___; +.type poly1305_blocks_vpmadd52_4x,\@function,4 +.align 32 +poly1305_blocks_vpmadd52_4x: + shr \$4,$len + jz .Lno_data_vpmadd52_4x # too short + + shl \$40,$padbit + mov 64($ctx),%r8 # peek on power of the key + +.Lblocks_vpmadd52_4x: + vpbroadcastq $padbit,$PAD + + vmovdqa64 .Lx_mask44(%rip),$mask44 + mov \$5,%eax + vmovdqa64 .Lx_mask42(%rip),$mask42 + kmovw %eax,%k1 # used in 2x path + + test %r8,%r8 # is power value impossible? + js .Linit_vpmadd52 # if it is, then init R[4] + + vmovq 0($ctx),%x#$H0 # load current hash value + vmovq 8($ctx),%x#$H1 + vmovq 16($ctx),%x#$H2 + + test \$3,$len # is length 4*n+2? + jnz .Lblocks_vpmadd52_2x_do + +.Lblocks_vpmadd52_4x_do: + vpbroadcastq 64($ctx),$R0 # load 4th power of the key + vpbroadcastq 96($ctx),$R1 + vpbroadcastq 128($ctx),$R2 + vpbroadcastq 160($ctx),$S1 + +.Lblocks_vpmadd52_4x_key_loaded: + vpsllq \$2,$R2,$S2 # S2 = R2*5*4 + vpaddq $R2,$S2,$S2 + vpsllq \$2,$S2,$S2 + + test \$7,$len # is len 8*n? + jz .Lblocks_vpmadd52_8x + + vmovdqu64 16*0($inp),$T2 # load data + vmovdqu64 16*2($inp),$T3 + lea 16*4($inp),$inp + + vpunpcklqdq $T3,$T2,$T1 # transpose data + vpunpckhqdq $T3,$T2,$T3 + + # at this point 64-bit lanes are ordered as 3-1-2-0 + + vpsrlq \$24,$T3,$T2 # splat the data + vporq $PAD,$T2,$T2 + vpaddq $T2,$H2,$H2 # accumulate input + vpandq $mask44,$T1,$T0 + vpsrlq \$44,$T1,$T1 + vpsllq \$20,$T3,$T3 + vporq $T3,$T1,$T1 + vpandq $mask44,$T1,$T1 + + sub \$4,$len + jz .Ltail_vpmadd52_4x + jmp .Loop_vpmadd52_4x + ud2 + +.align 32 +.Linit_vpmadd52: + vmovq 24($ctx),%x#$S1 # load key + vmovq 56($ctx),%x#$H2 + vmovq 32($ctx),%x#$S2 + vmovq 40($ctx),%x#$R0 + vmovq 48($ctx),%x#$R1 + + vmovdqa $R0,$H0 + vmovdqa $R1,$H1 + vmovdqa $H2,$R2 + + mov \$2,%eax + +.Lmul_init_vpmadd52: + vpxorq $D0lo,$D0lo,$D0lo + vpmadd52luq $H2,$S1,$D0lo + vpxorq $D0hi,$D0hi,$D0hi + vpmadd52huq $H2,$S1,$D0hi + vpxorq $D1lo,$D1lo,$D1lo + vpmadd52luq $H2,$S2,$D1lo + vpxorq $D1hi,$D1hi,$D1hi + vpmadd52huq $H2,$S2,$D1hi + vpxorq $D2lo,$D2lo,$D2lo + vpmadd52luq $H2,$R0,$D2lo + vpxorq $D2hi,$D2hi,$D2hi + vpmadd52huq $H2,$R0,$D2hi + + vpmadd52luq $H0,$R0,$D0lo + vpmadd52huq $H0,$R0,$D0hi + vpmadd52luq $H0,$R1,$D1lo + vpmadd52huq $H0,$R1,$D1hi + vpmadd52luq $H0,$R2,$D2lo + vpmadd52huq $H0,$R2,$D2hi + + vpmadd52luq $H1,$S2,$D0lo + vpmadd52huq $H1,$S2,$D0hi + vpmadd52luq $H1,$R0,$D1lo + vpmadd52huq $H1,$R0,$D1hi + vpmadd52luq $H1,$R1,$D2lo + vpmadd52huq $H1,$R1,$D2hi + + ################################################################ + # partial reduction + vpsrlq \$44,$D0lo,$tmp + vpsllq \$8,$D0hi,$D0hi + vpandq $mask44,$D0lo,$H0 + vpaddq $tmp,$D0hi,$D0hi + + vpaddq $D0hi,$D1lo,$D1lo + + vpsrlq \$44,$D1lo,$tmp + vpsllq \$8,$D1hi,$D1hi + vpandq $mask44,$D1lo,$H1 + vpaddq $tmp,$D1hi,$D1hi + + vpaddq $D1hi,$D2lo,$D2lo + + vpsrlq \$42,$D2lo,$tmp + vpsllq \$10,$D2hi,$D2hi + vpandq $mask42,$D2lo,$H2 + vpaddq $tmp,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + vpsllq \$2,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + + vpsrlq \$44,$H0,$tmp # additional step + vpandq $mask44,$H0,$H0 + + vpaddq $tmp,$H1,$H1 + + dec %eax + jz .Ldone_init_vpmadd52 + + vpunpcklqdq $R1,$H1,$R1 # 1,2 + vpbroadcastq %x#$H1,%x#$H1 # 2,2 + vpunpcklqdq $R2,$H2,$R2 + vpbroadcastq %x#$H2,%x#$H2 + vpunpcklqdq $R0,$H0,$R0 + vpbroadcastq %x#$H0,%x#$H0 + + vpsllq \$2,$R1,$S1 # S1 = R1*5*4 + vpsllq \$2,$R2,$S2 # S2 = R2*5*4 + vpaddq $R1,$S1,$S1 + vpaddq $R2,$S2,$S2 + vpsllq \$2,$S1,$S1 + vpsllq \$2,$S2,$S2 + + jmp .Lmul_init_vpmadd52 + ud2 + +.align 32 +.Ldone_init_vpmadd52: + vinserti128 \$1,%x#$R1,$H1,$R1 # 1,2,3,4 + vinserti128 \$1,%x#$R2,$H2,$R2 + vinserti128 \$1,%x#$R0,$H0,$R0 + + vpermq \$0b11011000,$R1,$R1 # 1,3,2,4 + vpermq \$0b11011000,$R2,$R2 + vpermq \$0b11011000,$R0,$R0 + + vpsllq \$2,$R1,$S1 # S1 = R1*5*4 + vpaddq $R1,$S1,$S1 + vpsllq \$2,$S1,$S1 + + vmovq 0($ctx),%x#$H0 # load current hash value + vmovq 8($ctx),%x#$H1 + vmovq 16($ctx),%x#$H2 + + test \$3,$len # is length 4*n+2? + jnz .Ldone_init_vpmadd52_2x + + vmovdqu64 $R0,64($ctx) # save key powers + vpbroadcastq %x#$R0,$R0 # broadcast 4th power + vmovdqu64 $R1,96($ctx) + vpbroadcastq %x#$R1,$R1 + vmovdqu64 $R2,128($ctx) + vpbroadcastq %x#$R2,$R2 + vmovdqu64 $S1,160($ctx) + vpbroadcastq %x#$S1,$S1 + + jmp .Lblocks_vpmadd52_4x_key_loaded + ud2 + +.align 32 +.Ldone_init_vpmadd52_2x: + vmovdqu64 $R0,64($ctx) # save key powers + vpsrldq \$8,$R0,$R0 # 0-1-0-2 + vmovdqu64 $R1,96($ctx) + vpsrldq \$8,$R1,$R1 + vmovdqu64 $R2,128($ctx) + vpsrldq \$8,$R2,$R2 + vmovdqu64 $S1,160($ctx) + vpsrldq \$8,$S1,$S1 + jmp .Lblocks_vpmadd52_2x_key_loaded + ud2 + +.align 32 +.Lblocks_vpmadd52_2x_do: + vmovdqu64 128+8($ctx),${R2}{%k1}{z}# load 2nd and 1st key powers + vmovdqu64 160+8($ctx),${S1}{%k1}{z} + vmovdqu64 64+8($ctx),${R0}{%k1}{z} + vmovdqu64 96+8($ctx),${R1}{%k1}{z} + +.Lblocks_vpmadd52_2x_key_loaded: + vmovdqu64 16*0($inp),$T2 # load data + vpxorq $T3,$T3,$T3 + lea 16*2($inp),$inp + + vpunpcklqdq $T3,$T2,$T1 # transpose data + vpunpckhqdq $T3,$T2,$T3 + + # at this point 64-bit lanes are ordered as x-1-x-0 + + vpsrlq \$24,$T3,$T2 # splat the data + vporq $PAD,$T2,$T2 + vpaddq $T2,$H2,$H2 # accumulate input + vpandq $mask44,$T1,$T0 + vpsrlq \$44,$T1,$T1 + vpsllq \$20,$T3,$T3 + vporq $T3,$T1,$T1 + vpandq $mask44,$T1,$T1 + + jmp .Ltail_vpmadd52_2x + ud2 + +.align 32 +.Loop_vpmadd52_4x: + #vpaddq $T2,$H2,$H2 # accumulate input + vpaddq $T0,$H0,$H0 + vpaddq $T1,$H1,$H1 + + vpxorq $D0lo,$D0lo,$D0lo + vpmadd52luq $H2,$S1,$D0lo + vpxorq $D0hi,$D0hi,$D0hi + vpmadd52huq $H2,$S1,$D0hi + vpxorq $D1lo,$D1lo,$D1lo + vpmadd52luq $H2,$S2,$D1lo + vpxorq $D1hi,$D1hi,$D1hi + vpmadd52huq $H2,$S2,$D1hi + vpxorq $D2lo,$D2lo,$D2lo + vpmadd52luq $H2,$R0,$D2lo + vpxorq $D2hi,$D2hi,$D2hi + vpmadd52huq $H2,$R0,$D2hi + + vmovdqu64 16*0($inp),$T2 # load data + vmovdqu64 16*2($inp),$T3 + lea 16*4($inp),$inp + vpmadd52luq $H0,$R0,$D0lo + vpmadd52huq $H0,$R0,$D0hi + vpmadd52luq $H0,$R1,$D1lo + vpmadd52huq $H0,$R1,$D1hi + vpmadd52luq $H0,$R2,$D2lo + vpmadd52huq $H0,$R2,$D2hi + + vpunpcklqdq $T3,$T2,$T1 # transpose data + vpunpckhqdq $T3,$T2,$T3 + vpmadd52luq $H1,$S2,$D0lo + vpmadd52huq $H1,$S2,$D0hi + vpmadd52luq $H1,$R0,$D1lo + vpmadd52huq $H1,$R0,$D1hi + vpmadd52luq $H1,$R1,$D2lo + vpmadd52huq $H1,$R1,$D2hi + + ################################################################ + # partial reduction (interleaved with data splat) + vpsrlq \$44,$D0lo,$tmp + vpsllq \$8,$D0hi,$D0hi + vpandq $mask44,$D0lo,$H0 + vpaddq $tmp,$D0hi,$D0hi + + vpsrlq \$24,$T3,$T2 + vporq $PAD,$T2,$T2 + vpaddq $D0hi,$D1lo,$D1lo + + vpsrlq \$44,$D1lo,$tmp + vpsllq \$8,$D1hi,$D1hi + vpandq $mask44,$D1lo,$H1 + vpaddq $tmp,$D1hi,$D1hi + + vpandq $mask44,$T1,$T0 + vpsrlq \$44,$T1,$T1 + vpsllq \$20,$T3,$T3 + vpaddq $D1hi,$D2lo,$D2lo + + vpsrlq \$42,$D2lo,$tmp + vpsllq \$10,$D2hi,$D2hi + vpandq $mask42,$D2lo,$H2 + vpaddq $tmp,$D2hi,$D2hi + + vpaddq $T2,$H2,$H2 # accumulate input + vpaddq $D2hi,$H0,$H0 + vpsllq \$2,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + vporq $T3,$T1,$T1 + vpandq $mask44,$T1,$T1 + + vpsrlq \$44,$H0,$tmp # additional step + vpandq $mask44,$H0,$H0 + + vpaddq $tmp,$H1,$H1 + + sub \$4,$len # len-=64 + jnz .Loop_vpmadd52_4x + +.Ltail_vpmadd52_4x: + vmovdqu64 128($ctx),$R2 # load all key powers + vmovdqu64 160($ctx),$S1 + vmovdqu64 64($ctx),$R0 + vmovdqu64 96($ctx),$R1 + +.Ltail_vpmadd52_2x: + vpsllq \$2,$R2,$S2 # S2 = R2*5*4 + vpaddq $R2,$S2,$S2 + vpsllq \$2,$S2,$S2 + + #vpaddq $T2,$H2,$H2 # accumulate input + vpaddq $T0,$H0,$H0 + vpaddq $T1,$H1,$H1 + + vpxorq $D0lo,$D0lo,$D0lo + vpmadd52luq $H2,$S1,$D0lo + vpxorq $D0hi,$D0hi,$D0hi + vpmadd52huq $H2,$S1,$D0hi + vpxorq $D1lo,$D1lo,$D1lo + vpmadd52luq $H2,$S2,$D1lo + vpxorq $D1hi,$D1hi,$D1hi + vpmadd52huq $H2,$S2,$D1hi + vpxorq $D2lo,$D2lo,$D2lo + vpmadd52luq $H2,$R0,$D2lo + vpxorq $D2hi,$D2hi,$D2hi + vpmadd52huq $H2,$R0,$D2hi + + vpmadd52luq $H0,$R0,$D0lo + vpmadd52huq $H0,$R0,$D0hi + vpmadd52luq $H0,$R1,$D1lo + vpmadd52huq $H0,$R1,$D1hi + vpmadd52luq $H0,$R2,$D2lo + vpmadd52huq $H0,$R2,$D2hi + + vpmadd52luq $H1,$S2,$D0lo + vpmadd52huq $H1,$S2,$D0hi + vpmadd52luq $H1,$R0,$D1lo + vpmadd52huq $H1,$R0,$D1hi + vpmadd52luq $H1,$R1,$D2lo + vpmadd52huq $H1,$R1,$D2hi + + ################################################################ + # horizontal addition + + mov \$1,%eax + kmovw %eax,%k1 + vpsrldq \$8,$D0lo,$T0 + vpsrldq \$8,$D0hi,$H0 + vpsrldq \$8,$D1lo,$T1 + vpsrldq \$8,$D1hi,$H1 + vpaddq $T0,$D0lo,$D0lo + vpaddq $H0,$D0hi,$D0hi + vpsrldq \$8,$D2lo,$T2 + vpsrldq \$8,$D2hi,$H2 + vpaddq $T1,$D1lo,$D1lo + vpaddq $H1,$D1hi,$D1hi + vpermq \$0x2,$D0lo,$T0 + vpermq \$0x2,$D0hi,$H0 + vpaddq $T2,$D2lo,$D2lo + vpaddq $H2,$D2hi,$D2hi + + vpermq \$0x2,$D1lo,$T1 + vpermq \$0x2,$D1hi,$H1 + vpaddq $T0,$D0lo,${D0lo}{%k1}{z} + vpaddq $H0,$D0hi,${D0hi}{%k1}{z} + vpermq \$0x2,$D2lo,$T2 + vpermq \$0x2,$D2hi,$H2 + vpaddq $T1,$D1lo,${D1lo}{%k1}{z} + vpaddq $H1,$D1hi,${D1hi}{%k1}{z} + vpaddq $T2,$D2lo,${D2lo}{%k1}{z} + vpaddq $H2,$D2hi,${D2hi}{%k1}{z} + + ################################################################ + # partial reduction + vpsrlq \$44,$D0lo,$tmp + vpsllq \$8,$D0hi,$D0hi + vpandq $mask44,$D0lo,$H0 + vpaddq $tmp,$D0hi,$D0hi + + vpaddq $D0hi,$D1lo,$D1lo + + vpsrlq \$44,$D1lo,$tmp + vpsllq \$8,$D1hi,$D1hi + vpandq $mask44,$D1lo,$H1 + vpaddq $tmp,$D1hi,$D1hi + + vpaddq $D1hi,$D2lo,$D2lo + + vpsrlq \$42,$D2lo,$tmp + vpsllq \$10,$D2hi,$D2hi + vpandq $mask42,$D2lo,$H2 + vpaddq $tmp,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + vpsllq \$2,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + + vpsrlq \$44,$H0,$tmp # additional step + vpandq $mask44,$H0,$H0 + + vpaddq $tmp,$H1,$H1 + # at this point $len is + # either 4*n+2 or 0... + sub \$2,$len # len-=32 + ja .Lblocks_vpmadd52_4x_do + + vmovq %x#$H0,0($ctx) + vmovq %x#$H1,8($ctx) + vmovq %x#$H2,16($ctx) + vzeroall + +.Lno_data_vpmadd52_4x: + ret +.size poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x +___ +} +{ +######################################################################## +# As implied by its name 8x subroutine processes 8 blocks in parallel... +# This is intermediate version, as it's used only in cases when input +# length is either 8*n, 8*n+1 or 8*n+2... + +my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17)); +my ($D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi) = map("%ymm$_",(18..23)); +my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31)); +my ($RR0,$RR1,$RR2,$SS1,$SS2) = map("%ymm$_",(6..10)); + +$code.=<<___; +.type poly1305_blocks_vpmadd52_8x,\@function,4 +.align 32 +poly1305_blocks_vpmadd52_8x: + shr \$4,$len + jz .Lno_data_vpmadd52_8x # too short + + shl \$40,$padbit + mov 64($ctx),%r8 # peek on power of the key + + vmovdqa64 .Lx_mask44(%rip),$mask44 + vmovdqa64 .Lx_mask42(%rip),$mask42 + + test %r8,%r8 # is power value impossible? + js .Linit_vpmadd52 # if it is, then init R[4] + + vmovq 0($ctx),%x#$H0 # load current hash value + vmovq 8($ctx),%x#$H1 + vmovq 16($ctx),%x#$H2 + +.Lblocks_vpmadd52_8x: + ################################################################ + # fist we calculate more key powers + + vmovdqu64 128($ctx),$R2 # load 1-3-2-4 powers + vmovdqu64 160($ctx),$S1 + vmovdqu64 64($ctx),$R0 + vmovdqu64 96($ctx),$R1 + + vpsllq \$2,$R2,$S2 # S2 = R2*5*4 + vpaddq $R2,$S2,$S2 + vpsllq \$2,$S2,$S2 + + vpbroadcastq %x#$R2,$RR2 # broadcast 4th power + vpbroadcastq %x#$R0,$RR0 + vpbroadcastq %x#$R1,$RR1 + + vpxorq $D0lo,$D0lo,$D0lo + vpmadd52luq $RR2,$S1,$D0lo + vpxorq $D0hi,$D0hi,$D0hi + vpmadd52huq $RR2,$S1,$D0hi + vpxorq $D1lo,$D1lo,$D1lo + vpmadd52luq $RR2,$S2,$D1lo + vpxorq $D1hi,$D1hi,$D1hi + vpmadd52huq $RR2,$S2,$D1hi + vpxorq $D2lo,$D2lo,$D2lo + vpmadd52luq $RR2,$R0,$D2lo + vpxorq $D2hi,$D2hi,$D2hi + vpmadd52huq $RR2,$R0,$D2hi + + vpmadd52luq $RR0,$R0,$D0lo + vpmadd52huq $RR0,$R0,$D0hi + vpmadd52luq $RR0,$R1,$D1lo + vpmadd52huq $RR0,$R1,$D1hi + vpmadd52luq $RR0,$R2,$D2lo + vpmadd52huq $RR0,$R2,$D2hi + + vpmadd52luq $RR1,$S2,$D0lo + vpmadd52huq $RR1,$S2,$D0hi + vpmadd52luq $RR1,$R0,$D1lo + vpmadd52huq $RR1,$R0,$D1hi + vpmadd52luq $RR1,$R1,$D2lo + vpmadd52huq $RR1,$R1,$D2hi + + ################################################################ + # partial reduction + vpsrlq \$44,$D0lo,$tmp + vpsllq \$8,$D0hi,$D0hi + vpandq $mask44,$D0lo,$RR0 + vpaddq $tmp,$D0hi,$D0hi + + vpaddq $D0hi,$D1lo,$D1lo + + vpsrlq \$44,$D1lo,$tmp + vpsllq \$8,$D1hi,$D1hi + vpandq $mask44,$D1lo,$RR1 + vpaddq $tmp,$D1hi,$D1hi + + vpaddq $D1hi,$D2lo,$D2lo + + vpsrlq \$42,$D2lo,$tmp + vpsllq \$10,$D2hi,$D2hi + vpandq $mask42,$D2lo,$RR2 + vpaddq $tmp,$D2hi,$D2hi + + vpaddq $D2hi,$RR0,$RR0 + vpsllq \$2,$D2hi,$D2hi + + vpaddq $D2hi,$RR0,$RR0 + + vpsrlq \$44,$RR0,$tmp # additional step + vpandq $mask44,$RR0,$RR0 + + vpaddq $tmp,$RR1,$RR1 + + ################################################################ + # At this point Rx holds 1324 powers, RRx - 5768, and the goal + # is 15263748, which reflects how data is loaded... + + vpunpcklqdq $R2,$RR2,$T2 # 3748 + vpunpckhqdq $R2,$RR2,$R2 # 1526 + vpunpcklqdq $R0,$RR0,$T0 + vpunpckhqdq $R0,$RR0,$R0 + vpunpcklqdq $R1,$RR1,$T1 + vpunpckhqdq $R1,$RR1,$R1 +___ +######## switch to %zmm +map(s/%y/%z/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2); +map(s/%y/%z/, $D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi); +map(s/%y/%z/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD); +map(s/%y/%z/, $RR0,$RR1,$RR2,$SS1,$SS2); + +$code.=<<___; + vshufi64x2 \$0x44,$R2,$T2,$RR2 # 15263748 + vshufi64x2 \$0x44,$R0,$T0,$RR0 + vshufi64x2 \$0x44,$R1,$T1,$RR1 + + vmovdqu64 16*0($inp),$T2 # load data + vmovdqu64 16*4($inp),$T3 + lea 16*8($inp),$inp + + vpsllq \$2,$RR2,$SS2 # S2 = R2*5*4 + vpsllq \$2,$RR1,$SS1 # S1 = R1*5*4 + vpaddq $RR2,$SS2,$SS2 + vpaddq $RR1,$SS1,$SS1 + vpsllq \$2,$SS2,$SS2 + vpsllq \$2,$SS1,$SS1 + + vpbroadcastq $padbit,$PAD + vpbroadcastq %x#$mask44,$mask44 + vpbroadcastq %x#$mask42,$mask42 + + vpbroadcastq %x#$SS1,$S1 # broadcast 8th power + vpbroadcastq %x#$SS2,$S2 + vpbroadcastq %x#$RR0,$R0 + vpbroadcastq %x#$RR1,$R1 + vpbroadcastq %x#$RR2,$R2 + + vpunpcklqdq $T3,$T2,$T1 # transpose data + vpunpckhqdq $T3,$T2,$T3 + + # at this point 64-bit lanes are ordered as 73625140 + + vpsrlq \$24,$T3,$T2 # splat the data + vporq $PAD,$T2,$T2 + vpaddq $T2,$H2,$H2 # accumulate input + vpandq $mask44,$T1,$T0 + vpsrlq \$44,$T1,$T1 + vpsllq \$20,$T3,$T3 + vporq $T3,$T1,$T1 + vpandq $mask44,$T1,$T1 + + sub \$8,$len + jz .Ltail_vpmadd52_8x + jmp .Loop_vpmadd52_8x + +.align 32 +.Loop_vpmadd52_8x: + #vpaddq $T2,$H2,$H2 # accumulate input + vpaddq $T0,$H0,$H0 + vpaddq $T1,$H1,$H1 + + vpxorq $D0lo,$D0lo,$D0lo + vpmadd52luq $H2,$S1,$D0lo + vpxorq $D0hi,$D0hi,$D0hi + vpmadd52huq $H2,$S1,$D0hi + vpxorq $D1lo,$D1lo,$D1lo + vpmadd52luq $H2,$S2,$D1lo + vpxorq $D1hi,$D1hi,$D1hi + vpmadd52huq $H2,$S2,$D1hi + vpxorq $D2lo,$D2lo,$D2lo + vpmadd52luq $H2,$R0,$D2lo + vpxorq $D2hi,$D2hi,$D2hi + vpmadd52huq $H2,$R0,$D2hi + + vmovdqu64 16*0($inp),$T2 # load data + vmovdqu64 16*4($inp),$T3 + lea 16*8($inp),$inp + vpmadd52luq $H0,$R0,$D0lo + vpmadd52huq $H0,$R0,$D0hi + vpmadd52luq $H0,$R1,$D1lo + vpmadd52huq $H0,$R1,$D1hi + vpmadd52luq $H0,$R2,$D2lo + vpmadd52huq $H0,$R2,$D2hi + + vpunpcklqdq $T3,$T2,$T1 # transpose data + vpunpckhqdq $T3,$T2,$T3 + vpmadd52luq $H1,$S2,$D0lo + vpmadd52huq $H1,$S2,$D0hi + vpmadd52luq $H1,$R0,$D1lo + vpmadd52huq $H1,$R0,$D1hi + vpmadd52luq $H1,$R1,$D2lo + vpmadd52huq $H1,$R1,$D2hi + + ################################################################ + # partial reduction (interleaved with data splat) + vpsrlq \$44,$D0lo,$tmp + vpsllq \$8,$D0hi,$D0hi + vpandq $mask44,$D0lo,$H0 + vpaddq $tmp,$D0hi,$D0hi + + vpsrlq \$24,$T3,$T2 + vporq $PAD,$T2,$T2 + vpaddq $D0hi,$D1lo,$D1lo + + vpsrlq \$44,$D1lo,$tmp + vpsllq \$8,$D1hi,$D1hi + vpandq $mask44,$D1lo,$H1 + vpaddq $tmp,$D1hi,$D1hi + + vpandq $mask44,$T1,$T0 + vpsrlq \$44,$T1,$T1 + vpsllq \$20,$T3,$T3 + vpaddq $D1hi,$D2lo,$D2lo + + vpsrlq \$42,$D2lo,$tmp + vpsllq \$10,$D2hi,$D2hi + vpandq $mask42,$D2lo,$H2 + vpaddq $tmp,$D2hi,$D2hi + + vpaddq $T2,$H2,$H2 # accumulate input + vpaddq $D2hi,$H0,$H0 + vpsllq \$2,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + vporq $T3,$T1,$T1 + vpandq $mask44,$T1,$T1 + + vpsrlq \$44,$H0,$tmp # additional step + vpandq $mask44,$H0,$H0 + + vpaddq $tmp,$H1,$H1 + + sub \$8,$len # len-=128 + jnz .Loop_vpmadd52_8x + +.Ltail_vpmadd52_8x: + #vpaddq $T2,$H2,$H2 # accumulate input + vpaddq $T0,$H0,$H0 + vpaddq $T1,$H1,$H1 + + vpxorq $D0lo,$D0lo,$D0lo + vpmadd52luq $H2,$SS1,$D0lo + vpxorq $D0hi,$D0hi,$D0hi + vpmadd52huq $H2,$SS1,$D0hi + vpxorq $D1lo,$D1lo,$D1lo + vpmadd52luq $H2,$SS2,$D1lo + vpxorq $D1hi,$D1hi,$D1hi + vpmadd52huq $H2,$SS2,$D1hi + vpxorq $D2lo,$D2lo,$D2lo + vpmadd52luq $H2,$RR0,$D2lo + vpxorq $D2hi,$D2hi,$D2hi + vpmadd52huq $H2,$RR0,$D2hi + + vpmadd52luq $H0,$RR0,$D0lo + vpmadd52huq $H0,$RR0,$D0hi + vpmadd52luq $H0,$RR1,$D1lo + vpmadd52huq $H0,$RR1,$D1hi + vpmadd52luq $H0,$RR2,$D2lo + vpmadd52huq $H0,$RR2,$D2hi + + vpmadd52luq $H1,$SS2,$D0lo + vpmadd52huq $H1,$SS2,$D0hi + vpmadd52luq $H1,$RR0,$D1lo + vpmadd52huq $H1,$RR0,$D1hi + vpmadd52luq $H1,$RR1,$D2lo + vpmadd52huq $H1,$RR1,$D2hi + + ################################################################ + # horizontal addition + + mov \$1,%eax + kmovw %eax,%k1 + vpsrldq \$8,$D0lo,$T0 + vpsrldq \$8,$D0hi,$H0 + vpsrldq \$8,$D1lo,$T1 + vpsrldq \$8,$D1hi,$H1 + vpaddq $T0,$D0lo,$D0lo + vpaddq $H0,$D0hi,$D0hi + vpsrldq \$8,$D2lo,$T2 + vpsrldq \$8,$D2hi,$H2 + vpaddq $T1,$D1lo,$D1lo + vpaddq $H1,$D1hi,$D1hi + vpermq \$0x2,$D0lo,$T0 + vpermq \$0x2,$D0hi,$H0 + vpaddq $T2,$D2lo,$D2lo + vpaddq $H2,$D2hi,$D2hi + + vpermq \$0x2,$D1lo,$T1 + vpermq \$0x2,$D1hi,$H1 + vpaddq $T0,$D0lo,$D0lo + vpaddq $H0,$D0hi,$D0hi + vpermq \$0x2,$D2lo,$T2 + vpermq \$0x2,$D2hi,$H2 + vpaddq $T1,$D1lo,$D1lo + vpaddq $H1,$D1hi,$D1hi + vextracti64x4 \$1,$D0lo,%y#$T0 + vextracti64x4 \$1,$D0hi,%y#$H0 + vpaddq $T2,$D2lo,$D2lo + vpaddq $H2,$D2hi,$D2hi + + vextracti64x4 \$1,$D1lo,%y#$T1 + vextracti64x4 \$1,$D1hi,%y#$H1 + vextracti64x4 \$1,$D2lo,%y#$T2 + vextracti64x4 \$1,$D2hi,%y#$H2 +___ +######## switch back to %ymm +map(s/%z/%y/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2); +map(s/%z/%y/, $D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi); +map(s/%z/%y/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD); + +$code.=<<___; + vpaddq $T0,$D0lo,${D0lo}{%k1}{z} + vpaddq $H0,$D0hi,${D0hi}{%k1}{z} + vpaddq $T1,$D1lo,${D1lo}{%k1}{z} + vpaddq $H1,$D1hi,${D1hi}{%k1}{z} + vpaddq $T2,$D2lo,${D2lo}{%k1}{z} + vpaddq $H2,$D2hi,${D2hi}{%k1}{z} + + ################################################################ + # partial reduction + vpsrlq \$44,$D0lo,$tmp + vpsllq \$8,$D0hi,$D0hi + vpandq $mask44,$D0lo,$H0 + vpaddq $tmp,$D0hi,$D0hi + + vpaddq $D0hi,$D1lo,$D1lo + + vpsrlq \$44,$D1lo,$tmp + vpsllq \$8,$D1hi,$D1hi + vpandq $mask44,$D1lo,$H1 + vpaddq $tmp,$D1hi,$D1hi + + vpaddq $D1hi,$D2lo,$D2lo + + vpsrlq \$42,$D2lo,$tmp + vpsllq \$10,$D2hi,$D2hi + vpandq $mask42,$D2lo,$H2 + vpaddq $tmp,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + vpsllq \$2,$D2hi,$D2hi + + vpaddq $D2hi,$H0,$H0 + + vpsrlq \$44,$H0,$tmp # additional step + vpandq $mask44,$H0,$H0 + + vpaddq $tmp,$H1,$H1 + + ################################################################ + + vmovq %x#$H0,0($ctx) + vmovq %x#$H1,8($ctx) + vmovq %x#$H2,16($ctx) + vzeroall + +.Lno_data_vpmadd52_8x: + ret +.size poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x +___ +} +$code.=<<___; +.type poly1305_emit_base2_44,\@function,3 +.align 32 +poly1305_emit_base2_44: + mov 0($ctx),%r8 # load hash value + mov 8($ctx),%r9 + mov 16($ctx),%r10 + + mov %r9,%rax + shr \$20,%r9 + shl \$44,%rax + mov %r10,%rcx + shr \$40,%r10 + shl \$24,%rcx + + add %rax,%r8 + adc %rcx,%r9 + adc \$0,%r10 + + mov %r8,%rax + add \$5,%r8 # compare to modulus + mov %r9,%rcx + adc \$0,%r9 + adc \$0,%r10 + shr \$2,%r10 # did 130-bit value overflow? + cmovnz %r8,%rax + cmovnz %r9,%rcx + + add 0($nonce),%rax # accumulate nonce + adc 8($nonce),%rcx + mov %rax,0($mac) # write result + mov %rcx,8($mac) + + ret +.size poly1305_emit_base2_44,.-poly1305_emit_base2_44 +___ +} } } +} + +if (!$kernel) +{ # chacha20-poly1305 helpers +my ($out,$inp,$otp,$len)=$win64 ? ("%rcx","%rdx","%r8", "%r9") : # Win64 order + ("%rdi","%rsi","%rdx","%rcx"); # Unix order +$code.=<<___; +.globl xor128_encrypt_n_pad +.type xor128_encrypt_n_pad,\@abi-omnipotent +.align 16 +xor128_encrypt_n_pad: + sub $otp,$inp + sub $otp,$out + mov $len,%r10 # put len aside + shr \$4,$len # len / 16 + jz .Ltail_enc + nop +.Loop_enc_xmm: + movdqu ($inp,$otp),%xmm0 + pxor ($otp),%xmm0 + movdqu %xmm0,($out,$otp) + movdqa %xmm0,($otp) + lea 16($otp),$otp + dec $len + jnz .Loop_enc_xmm + + and \$15,%r10 # len % 16 + jz .Ldone_enc + +.Ltail_enc: + mov \$16,$len + sub %r10,$len + xor %eax,%eax +.Loop_enc_byte: + mov ($inp,$otp),%al + xor ($otp),%al + mov %al,($out,$otp) + mov %al,($otp) + lea 1($otp),$otp + dec %r10 + jnz .Loop_enc_byte + + xor %eax,%eax +.Loop_enc_pad: + mov %al,($otp) + lea 1($otp),$otp + dec $len + jnz .Loop_enc_pad + +.Ldone_enc: + mov $otp,%rax + ret +.size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad + +.globl xor128_decrypt_n_pad +.type xor128_decrypt_n_pad,\@abi-omnipotent +.align 16 +xor128_decrypt_n_pad: + sub $otp,$inp + sub $otp,$out + mov $len,%r10 # put len aside + shr \$4,$len # len / 16 + jz .Ltail_dec + nop +.Loop_dec_xmm: + movdqu ($inp,$otp),%xmm0 + movdqa ($otp),%xmm1 + pxor %xmm0,%xmm1 + movdqu %xmm1,($out,$otp) + movdqa %xmm0,($otp) + lea 16($otp),$otp + dec $len + jnz .Loop_dec_xmm + + pxor %xmm1,%xmm1 + and \$15,%r10 # len % 16 + jz .Ldone_dec + +.Ltail_dec: + mov \$16,$len + sub %r10,$len + xor %eax,%eax + xor %r11,%r11 +.Loop_dec_byte: + mov ($inp,$otp),%r11b + mov ($otp),%al + xor %r11b,%al + mov %al,($out,$otp) + mov %r11b,($otp) + lea 1($otp),$otp + dec %r10 + jnz .Loop_dec_byte + + xor %eax,%eax +.Loop_dec_pad: + mov %al,($otp) + lea 1($otp),$otp + dec $len + jnz .Loop_dec_pad + +.Ldone_dec: + mov $otp,%rax + ret +.size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad +___ +} + +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +if ($win64) { +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type se_handler,\@abi-omnipotent +.align 16 +se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue label + cmp %r10,%rbx # context->Rip<.Lprologue + jb .Lcommon_seh_tail + + mov 152($context),%rax # pull context->Rsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=.Lepilogue + jae .Lcommon_seh_tail + + lea 48(%rax),%rax + + mov -8(%rax),%rbx + mov -16(%rax),%rbp + mov -24(%rax),%r12 + mov -32(%rax),%r13 + mov -40(%rax),%r14 + mov -48(%rax),%r15 + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R14 + + jmp .Lcommon_seh_tail +.size se_handler,.-se_handler + +.type avx_handler,\@abi-omnipotent +.align 16 +avx_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue label + cmp %r10,%rbx # context->Rip<prologue label + jb .Lcommon_seh_tail + + mov 152($context),%rax # pull context->Rsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lcommon_seh_tail + + mov 208($context),%rax # pull context->R11 + + lea 0x50(%rax),%rsi + lea 0xf8(%rax),%rax + lea 512($context),%rdi # &context.Xmm6 + mov \$20,%ecx + .long 0xa548f3fc # cld; rep movsq + +.Lcommon_seh_tail: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size avx_handler,.-avx_handler + +.section .pdata +.align 4 + .rva .LSEH_begin_poly1305_init_x86_64 + .rva .LSEH_end_poly1305_init_x86_64 + .rva .LSEH_info_poly1305_init_x86_64 + + .rva .LSEH_begin_poly1305_blocks_x86_64 + .rva .LSEH_end_poly1305_blocks_x86_64 + .rva .LSEH_info_poly1305_blocks_x86_64 + + .rva .LSEH_begin_poly1305_emit_x86_64 + .rva .LSEH_end_poly1305_emit_x86_64 + .rva .LSEH_info_poly1305_emit_x86_64 +___ +$code.=<<___ if ($avx); + .rva .LSEH_begin_poly1305_blocks_avx + .rva .Lbase2_64_avx + .rva .LSEH_info_poly1305_blocks_avx_1 + + .rva .Lbase2_64_avx + .rva .Leven_avx + .rva .LSEH_info_poly1305_blocks_avx_2 + + .rva .Leven_avx + .rva .LSEH_end_poly1305_blocks_avx + .rva .LSEH_info_poly1305_blocks_avx_3 + + .rva .LSEH_begin_poly1305_emit_avx + .rva .LSEH_end_poly1305_emit_avx + .rva .LSEH_info_poly1305_emit_avx +___ +$code.=<<___ if ($avx>1); + .rva .LSEH_begin_poly1305_blocks_avx2 + .rva .Lbase2_64_avx2 + .rva .LSEH_info_poly1305_blocks_avx2_1 + + .rva .Lbase2_64_avx2 + .rva .Leven_avx2 + .rva .LSEH_info_poly1305_blocks_avx2_2 + + .rva .Leven_avx2 + .rva .LSEH_end_poly1305_blocks_avx2 + .rva .LSEH_info_poly1305_blocks_avx2_3 +___ +$code.=<<___ if ($avx>2); + .rva .LSEH_begin_poly1305_blocks_avx512 + .rva .LSEH_end_poly1305_blocks_avx512 + .rva .LSEH_info_poly1305_blocks_avx512 +___ +$code.=<<___; +.section .xdata +.align 8 +.LSEH_info_poly1305_init_x86_64: + .byte 9,0,0,0 + .rva se_handler + .rva .LSEH_begin_poly1305_init_x86_64,.LSEH_begin_poly1305_init_x86_64 + +.LSEH_info_poly1305_blocks_x86_64: + .byte 9,0,0,0 + .rva se_handler + .rva .Lblocks_body,.Lblocks_epilogue + +.LSEH_info_poly1305_emit_x86_64: + .byte 9,0,0,0 + .rva se_handler + .rva .LSEH_begin_poly1305_emit_x86_64,.LSEH_begin_poly1305_emit_x86_64 +___ +$code.=<<___ if ($avx); +.LSEH_info_poly1305_blocks_avx_1: + .byte 9,0,0,0 + .rva se_handler + .rva .Lblocks_avx_body,.Lblocks_avx_epilogue # HandlerData[] + +.LSEH_info_poly1305_blocks_avx_2: + .byte 9,0,0,0 + .rva se_handler + .rva .Lbase2_64_avx_body,.Lbase2_64_avx_epilogue # HandlerData[] + +.LSEH_info_poly1305_blocks_avx_3: + .byte 9,0,0,0 + .rva avx_handler + .rva .Ldo_avx_body,.Ldo_avx_epilogue # HandlerData[] + +.LSEH_info_poly1305_emit_avx: + .byte 9,0,0,0 + .rva se_handler + .rva .LSEH_begin_poly1305_emit_avx,.LSEH_begin_poly1305_emit_avx +___ +$code.=<<___ if ($avx>1); +.LSEH_info_poly1305_blocks_avx2_1: + .byte 9,0,0,0 + .rva se_handler + .rva .Lblocks_avx2_body,.Lblocks_avx2_epilogue # HandlerData[] + +.LSEH_info_poly1305_blocks_avx2_2: + .byte 9,0,0,0 + .rva se_handler + .rva .Lbase2_64_avx2_body,.Lbase2_64_avx2_epilogue # HandlerData[] + +.LSEH_info_poly1305_blocks_avx2_3: + .byte 9,0,0,0 + .rva avx_handler + .rva .Ldo_avx2_body,.Ldo_avx2_epilogue # HandlerData[] +___ +$code.=<<___ if ($avx>2); +.LSEH_info_poly1305_blocks_avx512: + .byte 9,0,0,0 + .rva avx_handler + .rva .Ldo_avx512_body,.Ldo_avx512_epilogue # HandlerData[] +___ +} + +open SELF,$0; +while(<SELF>) { + next if (/^#!/); + last if (!s/^#/\/\// and !/^$/); + print; +} +close SELF; + +foreach (split('\n',$code)) { + s/\`([^\`]*)\`/eval($1)/ge; + s/%r([a-z]+)#d/%e$1/g; + s/%r([0-9]+)#d/%r$1d/g; + s/%x#%[yz]/%x/g or s/%y#%z/%y/g or s/%z#%[yz]/%z/g; + + if ($kernel) { + s/(^\.type.*),[0-9]+$/\1/; + s/(^\.type.*),\@abi-omnipotent+$/\1,\@function/; + next if /^\.cfi.*/; + } + + print $_,"\n"; +} +close STDOUT; diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index 0cc4537e6617..79bb58737d52 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -1,8 +1,6 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0 OR MIT /* - * Poly1305 authenticator algorithm, RFC7539, SIMD glue code - * - * Copyright (C) 2015 Martin Willi + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ #include <crypto/algapi.h> @@ -13,108 +11,166 @@ #include <linux/jump_label.h> #include <linux/kernel.h> #include <linux/module.h> +#include <asm/intel-family.h> #include <asm/simd.h> -asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src, - const u32 *r, unsigned int blocks); -asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r, - unsigned int blocks, const u32 *u); -asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r, - unsigned int blocks, const u32 *u); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd); +asmlinkage void poly1305_init_x86_64(void *ctx, + const u8 key[POLY1305_KEY_SIZE]); +asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp, + const size_t len, const u32 padbit); +asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); +asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); +asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len, + const u32 padbit); +asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len, + const u32 padbit); +asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp, + const size_t len, const u32 padbit); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx); static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512); + +struct poly1305_arch_internal { + union { + struct { + u32 h[5]; + u32 is_base2_26; + }; + u64 hs[3]; + }; + u64 r[2]; + u64 pad; + struct { u32 r2, r1, r4, r3; } rn[9]; +}; -static void poly1305_simd_mult(u32 *a, const u32 *b) +/* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit + * the unfortunate situation of using AVX and then having to go back to scalar + * -- because the user is silly and has called the update function from two + * separate contexts -- then we need to convert back to the original base before + * proceeding. It is possible to reason that the initial reduction below is + * sufficient given the implementation invariants. However, for an avoidance of + * doubt and because this is not performance critical, we do the full reduction + * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py + */ +static void convert_to_base2_64(void *ctx) { - u8 m[POLY1305_BLOCK_SIZE]; - - memset(m, 0, sizeof(m)); - /* The poly1305 block function adds a hi-bit to the accumulator which - * we don't need for key multiplication; compensate for it. */ - a[4] -= 1 << 24; - poly1305_block_sse2(a, m, b, 1); + struct poly1305_arch_internal *state = ctx; + u32 cy; + + if (!state->is_base2_26) + return; + + cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; + cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; + cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; + cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; + state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; + state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); + state->hs[2] = state->h[4] >> 24; +#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) + cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL); + state->hs[2] &= 3; + state->hs[0] += cy; + state->hs[1] += (cy = ULT(state->hs[0], cy)); + state->hs[2] += ULT(state->hs[1], cy); +#undef ULT + state->is_base2_26 = 0; } -static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx, - const u8 *src, unsigned int srclen) +static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_KEY_SIZE]) { - unsigned int datalen; - - if (unlikely(!dctx->sset)) { - datalen = crypto_poly1305_setdesckey(dctx, src, srclen); - src += srclen - datalen; - srclen = datalen; - } - if (srclen >= POLY1305_BLOCK_SIZE) { - poly1305_core_blocks(&dctx->h, dctx->r, src, - srclen / POLY1305_BLOCK_SIZE, 1); - srclen %= POLY1305_BLOCK_SIZE; - } - return srclen; + poly1305_init_x86_64(ctx, key); } -static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, - const u8 *src, unsigned int srclen) +static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len, + const u32 padbit) { - unsigned int blocks, datalen; - - if (unlikely(!dctx->sset)) { - datalen = crypto_poly1305_setdesckey(dctx, src, srclen); - src += srclen - datalen; - srclen = datalen; + struct poly1305_arch_internal *state = ctx; + + /* SIMD disables preemption, so relax after processing each page. */ + BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE || + PAGE_SIZE % POLY1305_BLOCK_SIZE); + + if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) || + (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) || + !crypto_simd_usable()) { + convert_to_base2_64(ctx); + poly1305_blocks_x86_64(ctx, inp, len, padbit); + return; } - if (IS_ENABLED(CONFIG_AS_AVX2) && - static_branch_likely(&poly1305_use_avx2) && - srclen >= POLY1305_BLOCK_SIZE * 4) { - if (unlikely(dctx->rset < 4)) { - if (dctx->rset < 2) { - dctx->r[1] = dctx->r[0]; - poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r); - } - dctx->r[2] = dctx->r[1]; - poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r); - dctx->r[3] = dctx->r[2]; - poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r); - dctx->rset = 4; - } - blocks = srclen / (POLY1305_BLOCK_SIZE * 4); - poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks, - dctx->r[1].r); - src += POLY1305_BLOCK_SIZE * 4 * blocks; - srclen -= POLY1305_BLOCK_SIZE * 4 * blocks; + for (;;) { + const size_t bytes = min_t(size_t, len, PAGE_SIZE); + + kernel_fpu_begin(); + if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512)) + poly1305_blocks_avx512(ctx, inp, bytes, padbit); + else if (IS_ENABLED(CONFIG_AS_AVX2) && static_branch_likely(&poly1305_use_avx2)) + poly1305_blocks_avx2(ctx, inp, bytes, padbit); + else + poly1305_blocks_avx(ctx, inp, bytes, padbit); + kernel_fpu_end(); + len -= bytes; + if (!len) + break; + inp += bytes; } +} - if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) { - if (unlikely(dctx->rset < 2)) { - dctx->r[1] = dctx->r[0]; - poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r); - dctx->rset = 2; - } - blocks = srclen / (POLY1305_BLOCK_SIZE * 2); - poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r, - blocks, dctx->r[1].r); - src += POLY1305_BLOCK_SIZE * 2 * blocks; - srclen -= POLY1305_BLOCK_SIZE * 2 * blocks; - } - if (srclen >= POLY1305_BLOCK_SIZE) { - poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1); - srclen -= POLY1305_BLOCK_SIZE; - } - return srclen; +static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], + const u32 nonce[4]) +{ + if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx)) + poly1305_emit_x86_64(ctx, mac, nonce); + else + poly1305_emit_avx(ctx, mac, nonce); } -void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key) +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) { - poly1305_init_generic(desc, key); + poly1305_simd_init(&dctx->h, key); + dctx->s[0] = get_unaligned_le32(&key[16]); + dctx->s[1] = get_unaligned_le32(&key[20]); + dctx->s[2] = get_unaligned_le32(&key[24]); + dctx->s[3] = get_unaligned_le32(&key[28]); + dctx->buflen = 0; + dctx->sset = true; } EXPORT_SYMBOL(poly1305_init_arch); +static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx, + const u8 *inp, unsigned int len) +{ + unsigned int acc = 0; + if (unlikely(!dctx->sset)) { + if (!dctx->rset && len >= POLY1305_BLOCK_SIZE) { + poly1305_simd_init(&dctx->h, inp); + inp += POLY1305_BLOCK_SIZE; + len -= POLY1305_BLOCK_SIZE; + acc += POLY1305_BLOCK_SIZE; + dctx->rset = 1; + } + if (len >= POLY1305_BLOCK_SIZE) { + dctx->s[0] = get_unaligned_le32(&inp[0]); + dctx->s[1] = get_unaligned_le32(&inp[4]); + dctx->s[2] = get_unaligned_le32(&inp[8]); + dctx->s[3] = get_unaligned_le32(&inp[12]); + inp += POLY1305_BLOCK_SIZE; + len -= POLY1305_BLOCK_SIZE; + acc += POLY1305_BLOCK_SIZE; + dctx->sset = true; + } + } + return acc; +} + void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, unsigned int srclen) { - unsigned int bytes; + unsigned int bytes, used; if (unlikely(dctx->buflen)) { bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen); @@ -124,31 +180,19 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, dctx->buflen += bytes; if (dctx->buflen == POLY1305_BLOCK_SIZE) { - if (static_branch_likely(&poly1305_use_simd) && - likely(crypto_simd_usable())) { - kernel_fpu_begin(); - poly1305_simd_blocks(dctx, dctx->buf, - POLY1305_BLOCK_SIZE); - kernel_fpu_end(); - } else { - poly1305_scalar_blocks(dctx, dctx->buf, - POLY1305_BLOCK_SIZE); - } + if (likely(!crypto_poly1305_setdctxkey(dctx, dctx->buf, POLY1305_BLOCK_SIZE))) + poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1); dctx->buflen = 0; } } if (likely(srclen >= POLY1305_BLOCK_SIZE)) { - if (static_branch_likely(&poly1305_use_simd) && - likely(crypto_simd_usable())) { - kernel_fpu_begin(); - bytes = poly1305_simd_blocks(dctx, src, srclen); - kernel_fpu_end(); - } else { - bytes = poly1305_scalar_blocks(dctx, src, srclen); - } - src += srclen - bytes; - srclen = bytes; + bytes = round_down(srclen, POLY1305_BLOCK_SIZE); + srclen -= bytes; + used = crypto_poly1305_setdctxkey(dctx, src, bytes); + if (likely(bytes - used)) + poly1305_simd_blocks(&dctx->h, src + used, bytes - used, 1); + src += bytes; } if (unlikely(srclen)) { @@ -158,9 +202,17 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, } EXPORT_SYMBOL(poly1305_update_arch); -void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest) +void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) { - poly1305_final_generic(desc, digest); + if (unlikely(dctx->buflen)) { + dctx->buf[dctx->buflen++] = 1; + memset(dctx->buf + dctx->buflen, 0, + POLY1305_BLOCK_SIZE - dctx->buflen); + poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); + } + + poly1305_simd_emit(&dctx->h, dst, dctx->s); + *dctx = (struct poly1305_desc_ctx){}; } EXPORT_SYMBOL(poly1305_final_arch); @@ -168,38 +220,34 @@ static int crypto_poly1305_init(struct shash_desc *desc) { struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - poly1305_core_init(&dctx->h); - dctx->buflen = 0; - dctx->rset = 0; - dctx->sset = false; - + *dctx = (struct poly1305_desc_ctx){}; return 0; } -static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) +static int crypto_poly1305_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) { struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - if (unlikely(!dctx->sset)) - return -ENOKEY; - - poly1305_final_generic(dctx, dst); + poly1305_update_arch(dctx, src, srclen); return 0; } -static int poly1305_simd_update(struct shash_desc *desc, - const u8 *src, unsigned int srclen) +static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) { struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - poly1305_update_arch(dctx, src, srclen); + if (unlikely(!dctx->sset)) + return -ENOKEY; + + poly1305_final_arch(dctx, dst); return 0; } static struct shash_alg alg = { .digestsize = POLY1305_DIGEST_SIZE, .init = crypto_poly1305_init, - .update = poly1305_simd_update, + .update = crypto_poly1305_update, .final = crypto_poly1305_final, .descsize = sizeof(struct poly1305_desc_ctx), .base = { @@ -213,17 +261,19 @@ static struct shash_alg alg = { static int __init poly1305_simd_mod_init(void) { - if (!boot_cpu_has(X86_FEATURE_XMM2)) - return 0; - - static_branch_enable(&poly1305_use_simd); - - if (IS_ENABLED(CONFIG_AS_AVX2) && - boot_cpu_has(X86_FEATURE_AVX) && + if (IS_ENABLED(CONFIG_AS_AVX) && boot_cpu_has(X86_FEATURE_AVX) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) + static_branch_enable(&poly1305_use_avx); + if (IS_ENABLED(CONFIG_AS_AVX2) && boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) static_branch_enable(&poly1305_use_avx2); - + if (IS_ENABLED(CONFIG_AS_AVX512) && boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL) && + /* Skylake downclocks unacceptably much when using zmm, but later generations are fast. */ + boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X) + static_branch_enable(&poly1305_use_avx512); return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? crypto_register_shash(&alg) : 0; } @@ -237,7 +287,7 @@ module_init(poly1305_simd_mod_init); module_exit(poly1305_simd_mod_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Martin Willi <martin@strongswan.org>"); +MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); MODULE_DESCRIPTION("Poly1305 authenticator"); MODULE_ALIAS_CRYPTO("poly1305"); MODULE_ALIAS_CRYPTO("poly1305-simd"); diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 13fd8d3d2da0..f973ace44ad3 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -19,18 +19,16 @@ #define SERPENT_AVX2_PARALLEL_BLOCKS 16 /* 16-way AVX2 parallel cipher functions */ -asmlinkage void serpent_ecb_enc_16way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void serpent_ecb_dec_16way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void serpent_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src); +asmlinkage void serpent_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void serpent_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void serpent_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void serpent_ctr_16way(void *ctx, u128 *dst, const u128 *src, +asmlinkage void serpent_ctr_16way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -asmlinkage void serpent_xts_enc_16way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void serpent_xts_dec_16way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void serpent_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); +asmlinkage void serpent_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) @@ -44,13 +42,13 @@ static const struct common_glue_ctx serpent_enc = { .funcs = { { .num_blocks = 16, - .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_16way) } + .fn_u = { .ecb = serpent_ecb_enc_16way } }, { .num_blocks = 8, - .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) } + .fn_u = { .ecb = serpent_ecb_enc_8way_avx } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } + .fn_u = { .ecb = __serpent_encrypt } } } }; @@ -60,13 +58,13 @@ static const struct common_glue_ctx serpent_ctr = { .funcs = { { .num_blocks = 16, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_16way) } + .fn_u = { .ctr = serpent_ctr_16way } }, { .num_blocks = 8, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) } + .fn_u = { .ctr = serpent_ctr_8way_avx } }, { .num_blocks = 1, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) } + .fn_u = { .ctr = __serpent_crypt_ctr } } } }; @@ -76,13 +74,13 @@ static const struct common_glue_ctx serpent_enc_xts = { .funcs = { { .num_blocks = 16, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_16way) } + .fn_u = { .xts = serpent_xts_enc_16way } }, { .num_blocks = 8, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) } + .fn_u = { .xts = serpent_xts_enc_8way_avx } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) } + .fn_u = { .xts = serpent_xts_enc } } } }; @@ -92,13 +90,13 @@ static const struct common_glue_ctx serpent_dec = { .funcs = { { .num_blocks = 16, - .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_16way) } + .fn_u = { .ecb = serpent_ecb_dec_16way } }, { .num_blocks = 8, - .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) } + .fn_u = { .ecb = serpent_ecb_dec_8way_avx } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } + .fn_u = { .ecb = __serpent_decrypt } } } }; @@ -108,13 +106,13 @@ static const struct common_glue_ctx serpent_dec_cbc = { .funcs = { { .num_blocks = 16, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_16way) } + .fn_u = { .cbc = serpent_cbc_dec_16way } }, { .num_blocks = 8, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) } + .fn_u = { .cbc = serpent_cbc_dec_8way_avx } }, { .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } + .fn_u = { .cbc = __serpent_decrypt } } } }; @@ -124,13 +122,13 @@ static const struct common_glue_ctx serpent_dec_xts = { .funcs = { { .num_blocks = 16, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_16way) } + .fn_u = { .xts = serpent_xts_dec_16way } }, { .num_blocks = 8, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) } + .fn_u = { .xts = serpent_xts_dec_8way_avx } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) } + .fn_u = { .xts = serpent_xts_dec } } } }; @@ -146,8 +144,7 @@ static int ecb_decrypt(struct skcipher_request *req) static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt), - req); + return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req); } static int cbc_decrypt(struct skcipher_request *req) @@ -166,8 +163,8 @@ static int xts_encrypt(struct skcipher_request *req) struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); return glue_xts_req_128bit(&serpent_enc_xts, req, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx, false); + __serpent_encrypt, &ctx->tweak_ctx, + &ctx->crypt_ctx, false); } static int xts_decrypt(struct skcipher_request *req) @@ -176,8 +173,8 @@ static int xts_decrypt(struct skcipher_request *req) struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); return glue_xts_req_128bit(&serpent_dec_xts, req, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx, true); + __serpent_encrypt, &ctx->tweak_ctx, + &ctx->crypt_ctx, true); } static struct skcipher_alg serpent_algs[] = { diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 7d3dca38a5a2..7806d1cbe854 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -20,33 +20,35 @@ #include <asm/crypto/serpent-avx.h> /* 8-way parallel cipher functions */ -asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(serpent_ecb_enc_8way_avx); -asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(serpent_ecb_dec_8way_avx); -asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(serpent_cbc_dec_8way_avx); -asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); EXPORT_SYMBOL_GPL(serpent_ctr_8way_avx); -asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_xts_enc_8way_avx(const void *ctx, u8 *dst, const u8 *src, le128 *iv); EXPORT_SYMBOL_GPL(serpent_xts_enc_8way_avx); -asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_xts_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src, le128 *iv); EXPORT_SYMBOL_GPL(serpent_xts_dec_8way_avx); -void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) +void __serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblk; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; le128_to_be128(&ctrblk, iv); le128_inc(iv); @@ -56,17 +58,15 @@ void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) } EXPORT_SYMBOL_GPL(__serpent_crypt_ctr); -void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) +void serpent_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, - GLUE_FUNC_CAST(__serpent_encrypt)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, __serpent_encrypt); } EXPORT_SYMBOL_GPL(serpent_xts_enc); -void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) +void serpent_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, - GLUE_FUNC_CAST(__serpent_decrypt)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, __serpent_decrypt); } EXPORT_SYMBOL_GPL(serpent_xts_dec); @@ -102,10 +102,10 @@ static const struct common_glue_ctx serpent_enc = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) } + .fn_u = { .ecb = serpent_ecb_enc_8way_avx } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } + .fn_u = { .ecb = __serpent_encrypt } } } }; @@ -115,10 +115,10 @@ static const struct common_glue_ctx serpent_ctr = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) } + .fn_u = { .ctr = serpent_ctr_8way_avx } }, { .num_blocks = 1, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) } + .fn_u = { .ctr = __serpent_crypt_ctr } } } }; @@ -128,10 +128,10 @@ static const struct common_glue_ctx serpent_enc_xts = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) } + .fn_u = { .xts = serpent_xts_enc_8way_avx } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) } + .fn_u = { .xts = serpent_xts_enc } } } }; @@ -141,10 +141,10 @@ static const struct common_glue_ctx serpent_dec = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) } + .fn_u = { .ecb = serpent_ecb_dec_8way_avx } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } + .fn_u = { .ecb = __serpent_decrypt } } } }; @@ -154,10 +154,10 @@ static const struct common_glue_ctx serpent_dec_cbc = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) } + .fn_u = { .cbc = serpent_cbc_dec_8way_avx } }, { .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } + .fn_u = { .cbc = __serpent_decrypt } } } }; @@ -167,10 +167,10 @@ static const struct common_glue_ctx serpent_dec_xts = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) } + .fn_u = { .xts = serpent_xts_dec_8way_avx } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) } + .fn_u = { .xts = serpent_xts_dec } } } }; @@ -186,8 +186,7 @@ static int ecb_decrypt(struct skcipher_request *req) static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt), - req); + return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req); } static int cbc_decrypt(struct skcipher_request *req) @@ -206,8 +205,8 @@ static int xts_encrypt(struct skcipher_request *req) struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); return glue_xts_req_128bit(&serpent_enc_xts, req, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx, false); + __serpent_encrypt, &ctx->tweak_ctx, + &ctx->crypt_ctx, false); } static int xts_decrypt(struct skcipher_request *req) @@ -216,8 +215,8 @@ static int xts_decrypt(struct skcipher_request *req) struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); return glue_xts_req_128bit(&serpent_dec_xts, req, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx, true); + __serpent_encrypt, &ctx->tweak_ctx, + &ctx->crypt_ctx, true); } static struct skcipher_alg serpent_algs[] = { diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 5fdf1931d069..4fed8d26b91a 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -31,9 +31,11 @@ static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); } -static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) +static void serpent_decrypt_cbc_xway(const void *ctx, u8 *d, const u8 *s) { u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; unsigned int j; for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) @@ -45,9 +47,11 @@ static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); } -static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblk; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; le128_to_be128(&ctrblk, iv); le128_inc(iv); @@ -56,10 +60,12 @@ static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) u128_xor(dst, src, (u128 *)&ctrblk); } -static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, +static void serpent_crypt_ctr_xway(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; unsigned int i; for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { @@ -79,10 +85,10 @@ static const struct common_glue_ctx serpent_enc = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) } + .fn_u = { .ecb = serpent_enc_blk_xway } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } + .fn_u = { .ecb = __serpent_encrypt } } } }; @@ -92,10 +98,10 @@ static const struct common_glue_ctx serpent_ctr = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) } + .fn_u = { .ctr = serpent_crypt_ctr_xway } }, { .num_blocks = 1, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } + .fn_u = { .ctr = serpent_crypt_ctr } } } }; @@ -105,10 +111,10 @@ static const struct common_glue_ctx serpent_dec = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) } + .fn_u = { .ecb = serpent_dec_blk_xway } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } + .fn_u = { .ecb = __serpent_decrypt } } } }; @@ -118,10 +124,10 @@ static const struct common_glue_ctx serpent_dec_cbc = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) } + .fn_u = { .cbc = serpent_decrypt_cbc_xway } }, { .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } + .fn_u = { .cbc = __serpent_decrypt } } } }; @@ -137,7 +143,7 @@ static int ecb_decrypt(struct skcipher_request *req) static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt), + return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req); } diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S index 6decc85ef7b7..1e594d60afa5 100644 --- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S @@ -62,11 +62,11 @@ *Visit http://software.intel.com/en-us/articles/ *and refer to improving-the-performance-of-the-secure-hash-algorithm-1/ * - *Updates 20-byte SHA-1 record in 'hash' for even number of - *'num_blocks' consecutive 64-byte blocks + *Updates 20-byte SHA-1 record at start of 'state', from 'input', for + *even number of 'blocks' consecutive 64-byte blocks. * *extern "C" void sha1_transform_avx2( - * int *hash, const char* input, size_t num_blocks ); + * struct sha1_state *state, const u8* input, int blocks ); */ #include <linux/linkage.h> diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index 5d03c1173690..12e2d19d7402 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -457,9 +457,13 @@ W_PRECALC_SSSE3 movdqu \a,\b .endm -/* SSSE3 optimized implementation: - * extern "C" void sha1_transform_ssse3(u32 *digest, const char *data, u32 *ws, - * unsigned int rounds); +/* + * SSSE3 optimized implementation: + * + * extern "C" void sha1_transform_ssse3(struct sha1_state *state, + * const u8 *data, int blocks); + * + * Note that struct sha1_state is assumed to begin with u32 state[5]. */ SHA1_VECTOR_ASM sha1_transform_ssse3 @@ -545,8 +549,8 @@ W_PRECALC_AVX /* AVX optimized implementation: - * extern "C" void sha1_transform_avx(u32 *digest, const char *data, u32 *ws, - * unsigned int rounds); + * extern "C" void sha1_transform_avx(struct sha1_state *state, + * const u8 *data, int blocks); */ SHA1_VECTOR_ASM sha1_transform_avx diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 639d4c2fd6a8..d70b40ad594c 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -27,11 +27,8 @@ #include <crypto/sha1_base.h> #include <asm/simd.h> -typedef void (sha1_transform_fn)(u32 *digest, const char *data, - unsigned int rounds); - static int sha1_update(struct shash_desc *desc, const u8 *data, - unsigned int len, sha1_transform_fn *sha1_xform) + unsigned int len, sha1_block_fn *sha1_xform) { struct sha1_state *sctx = shash_desc_ctx(desc); @@ -39,48 +36,47 @@ static int sha1_update(struct shash_desc *desc, const u8 *data, (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) return crypto_sha1_update(desc, data, len); - /* make sure casting to sha1_block_fn() is safe */ + /* + * Make sure struct sha1_state begins directly with the SHA1 + * 160-bit internal state, as this is what the asm functions expect. + */ BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0); kernel_fpu_begin(); - sha1_base_do_update(desc, data, len, - (sha1_block_fn *)sha1_xform); + sha1_base_do_update(desc, data, len, sha1_xform); kernel_fpu_end(); return 0; } static int sha1_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out, sha1_transform_fn *sha1_xform) + unsigned int len, u8 *out, sha1_block_fn *sha1_xform) { if (!crypto_simd_usable()) return crypto_sha1_finup(desc, data, len, out); kernel_fpu_begin(); if (len) - sha1_base_do_update(desc, data, len, - (sha1_block_fn *)sha1_xform); - sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_xform); + sha1_base_do_update(desc, data, len, sha1_xform); + sha1_base_do_finalize(desc, sha1_xform); kernel_fpu_end(); return sha1_base_finish(desc, out); } -asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, - unsigned int rounds); +asmlinkage void sha1_transform_ssse3(struct sha1_state *state, + const u8 *data, int blocks); static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha1_update(desc, data, len, - (sha1_transform_fn *) sha1_transform_ssse3); + return sha1_update(desc, data, len, sha1_transform_ssse3); } static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - return sha1_finup(desc, data, len, out, - (sha1_transform_fn *) sha1_transform_ssse3); + return sha1_finup(desc, data, len, out, sha1_transform_ssse3); } /* Add padding and return the message digest. */ @@ -119,21 +115,19 @@ static void unregister_sha1_ssse3(void) } #ifdef CONFIG_AS_AVX -asmlinkage void sha1_transform_avx(u32 *digest, const char *data, - unsigned int rounds); +asmlinkage void sha1_transform_avx(struct sha1_state *state, + const u8 *data, int blocks); static int sha1_avx_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha1_update(desc, data, len, - (sha1_transform_fn *) sha1_transform_avx); + return sha1_update(desc, data, len, sha1_transform_avx); } static int sha1_avx_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - return sha1_finup(desc, data, len, out, - (sha1_transform_fn *) sha1_transform_avx); + return sha1_finup(desc, data, len, out, sha1_transform_avx); } static int sha1_avx_final(struct shash_desc *desc, u8 *out) @@ -190,8 +184,8 @@ static inline void unregister_sha1_avx(void) { } #if defined(CONFIG_AS_AVX2) && (CONFIG_AS_AVX) #define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ -asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, - unsigned int rounds); +asmlinkage void sha1_transform_avx2(struct sha1_state *state, + const u8 *data, int blocks); static bool avx2_usable(void) { @@ -203,28 +197,26 @@ static bool avx2_usable(void) return false; } -static void sha1_apply_transform_avx2(u32 *digest, const char *data, - unsigned int rounds) +static void sha1_apply_transform_avx2(struct sha1_state *state, + const u8 *data, int blocks) { /* Select the optimal transform based on data block size */ - if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE) - sha1_transform_avx2(digest, data, rounds); + if (blocks >= SHA1_AVX2_BLOCK_OPTSIZE) + sha1_transform_avx2(state, data, blocks); else - sha1_transform_avx(digest, data, rounds); + sha1_transform_avx(state, data, blocks); } static int sha1_avx2_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha1_update(desc, data, len, - (sha1_transform_fn *) sha1_apply_transform_avx2); + return sha1_update(desc, data, len, sha1_apply_transform_avx2); } static int sha1_avx2_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - return sha1_finup(desc, data, len, out, - (sha1_transform_fn *) sha1_apply_transform_avx2); + return sha1_finup(desc, data, len, out, sha1_apply_transform_avx2); } static int sha1_avx2_final(struct shash_desc *desc, u8 *out) @@ -267,21 +259,19 @@ static inline void unregister_sha1_avx2(void) { } #endif #ifdef CONFIG_AS_SHA1_NI -asmlinkage void sha1_ni_transform(u32 *digest, const char *data, - unsigned int rounds); +asmlinkage void sha1_ni_transform(struct sha1_state *digest, const u8 *data, + int rounds); static int sha1_ni_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha1_update(desc, data, len, - (sha1_transform_fn *) sha1_ni_transform); + return sha1_update(desc, data, len, sha1_ni_transform); } static int sha1_ni_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - return sha1_finup(desc, data, len, out, - (sha1_transform_fn *) sha1_ni_transform); + return sha1_finup(desc, data, len, out, sha1_ni_transform); } static int sha1_ni_final(struct shash_desc *desc, u8 *out) diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index 22e14c8dd2e4..fcbc30f58c38 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -341,8 +341,8 @@ a = TMP_ .endm ######################################################################## -## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks) -## arg 1 : pointer to digest +## void sha256_transform_avx(state sha256_state *state, const u8 *data, int blocks) +## arg 1 : pointer to state ## arg 2 : pointer to input data ## arg 3 : Num blocks ######################################################################## diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 519b551ad576..499d9ec129de 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -520,8 +520,8 @@ STACK_SIZE = _RSP + _RSP_SIZE .endm ######################################################################## -## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks) -## arg 1 : pointer to digest +## void sha256_transform_rorx(struct sha256_state *state, const u8 *data, int blocks) +## arg 1 : pointer to state ## arg 2 : pointer to input data ## arg 3 : Num blocks ######################################################################## diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index 69cc2f91dc4c..ddfa863b4ee3 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S @@ -347,8 +347,10 @@ a = TMP_ .endm ######################################################################## -## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks) -## arg 1 : pointer to digest +## void sha256_transform_ssse3(struct sha256_state *state, const u8 *data, +## int blocks); +## arg 1 : pointer to state +## (struct sha256_state is assumed to begin with u32 state[8]) ## arg 2 : pointer to input data ## arg 3 : Num blocks ######################################################################## diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index f9aff31fe59e..03ad657c04bd 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -41,12 +41,11 @@ #include <linux/string.h> #include <asm/simd.h> -asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, - u64 rounds); -typedef void (sha256_transform_fn)(u32 *digest, const char *data, u64 rounds); +asmlinkage void sha256_transform_ssse3(struct sha256_state *state, + const u8 *data, int blocks); static int _sha256_update(struct shash_desc *desc, const u8 *data, - unsigned int len, sha256_transform_fn *sha256_xform) + unsigned int len, sha256_block_fn *sha256_xform) { struct sha256_state *sctx = shash_desc_ctx(desc); @@ -54,28 +53,29 @@ static int _sha256_update(struct shash_desc *desc, const u8 *data, (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) return crypto_sha256_update(desc, data, len); - /* make sure casting to sha256_block_fn() is safe */ + /* + * Make sure struct sha256_state begins directly with the SHA256 + * 256-bit internal state, as this is what the asm functions expect. + */ BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0); kernel_fpu_begin(); - sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_xform); + sha256_base_do_update(desc, data, len, sha256_xform); kernel_fpu_end(); return 0; } static int sha256_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out, sha256_transform_fn *sha256_xform) + unsigned int len, u8 *out, sha256_block_fn *sha256_xform) { if (!crypto_simd_usable()) return crypto_sha256_finup(desc, data, len, out); kernel_fpu_begin(); if (len) - sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_xform); - sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_xform); + sha256_base_do_update(desc, data, len, sha256_xform); + sha256_base_do_finalize(desc, sha256_xform); kernel_fpu_end(); return sha256_base_finish(desc, out); @@ -145,8 +145,8 @@ static void unregister_sha256_ssse3(void) } #ifdef CONFIG_AS_AVX -asmlinkage void sha256_transform_avx(u32 *digest, const char *data, - u64 rounds); +asmlinkage void sha256_transform_avx(struct sha256_state *state, + const u8 *data, int blocks); static int sha256_avx_update(struct shash_desc *desc, const u8 *data, unsigned int len) @@ -227,8 +227,8 @@ static inline void unregister_sha256_avx(void) { } #endif #if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX) -asmlinkage void sha256_transform_rorx(u32 *digest, const char *data, - u64 rounds); +asmlinkage void sha256_transform_rorx(struct sha256_state *state, + const u8 *data, int blocks); static int sha256_avx2_update(struct shash_desc *desc, const u8 *data, unsigned int len) @@ -307,8 +307,8 @@ static inline void unregister_sha256_avx2(void) { } #endif #ifdef CONFIG_AS_SHA256_NI -asmlinkage void sha256_ni_transform(u32 *digest, const char *data, - u64 rounds); /*unsigned int rounds);*/ +asmlinkage void sha256_ni_transform(struct sha256_state *digest, + const u8 *data, int rounds); static int sha256_ni_update(struct shash_desc *desc, const u8 *data, unsigned int len) diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 3704ddd7e5d5..90ea945ba5e6 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S @@ -271,11 +271,12 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE .endm ######################################################################## -# void sha512_transform_avx(void* D, const void* M, u64 L) -# Purpose: Updates the SHA512 digest stored at D with the message stored in M. -# The size of the message pointed to by M must be an integer multiple of SHA512 -# message blocks. -# L is the message length in SHA512 blocks +# void sha512_transform_avx(sha512_state *state, const u8 *data, int blocks) +# Purpose: Updates the SHA512 digest stored at "state" with the message +# stored in "data". +# The size of the message pointed to by "data" must be an integer multiple +# of SHA512 message blocks. +# "blocks" is the message length in SHA512 blocks ######################################################################## SYM_FUNC_START(sha512_transform_avx) cmp $0, msglen diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index 80d830e7ee09..3dd886b14e7d 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -563,11 +563,12 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE .endm ######################################################################## -# void sha512_transform_rorx(void* D, const void* M, uint64_t L)# -# Purpose: Updates the SHA512 digest stored at D with the message stored in M. -# The size of the message pointed to by M must be an integer multiple of SHA512 -# message blocks. -# L is the message length in SHA512 blocks +# void sha512_transform_rorx(sha512_state *state, const u8 *data, int blocks) +# Purpose: Updates the SHA512 digest stored at "state" with the message +# stored in "data". +# The size of the message pointed to by "data" must be an integer multiple +# of SHA512 message blocks. +# "blocks" is the message length in SHA512 blocks ######################################################################## SYM_FUNC_START(sha512_transform_rorx) # Allocate Stack Space diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index 838f984e95d9..7946a1bee85b 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S @@ -269,11 +269,14 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE .endm ######################################################################## -# void sha512_transform_ssse3(void* D, const void* M, u64 L)# -# Purpose: Updates the SHA512 digest stored at D with the message stored in M. -# The size of the message pointed to by M must be an integer multiple of SHA512 -# message blocks. -# L is the message length in SHA512 blocks. +## void sha512_transform_ssse3(struct sha512_state *state, const u8 *data, +## int blocks); +# (struct sha512_state is assumed to begin with u64 state[8]) +# Purpose: Updates the SHA512 digest stored at "state" with the message +# stored in "data". +# The size of the message pointed to by "data" must be an integer multiple +# of SHA512 message blocks. +# "blocks" is the message length in SHA512 blocks. ######################################################################## SYM_FUNC_START(sha512_transform_ssse3) diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 458356a3f124..1c444f41037c 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -39,13 +39,11 @@ #include <crypto/sha512_base.h> #include <asm/simd.h> -asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data, - u64 rounds); - -typedef void (sha512_transform_fn)(u64 *digest, const char *data, u64 rounds); +asmlinkage void sha512_transform_ssse3(struct sha512_state *state, + const u8 *data, int blocks); static int sha512_update(struct shash_desc *desc, const u8 *data, - unsigned int len, sha512_transform_fn *sha512_xform) + unsigned int len, sha512_block_fn *sha512_xform) { struct sha512_state *sctx = shash_desc_ctx(desc); @@ -53,28 +51,29 @@ static int sha512_update(struct shash_desc *desc, const u8 *data, (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE) return crypto_sha512_update(desc, data, len); - /* make sure casting to sha512_block_fn() is safe */ + /* + * Make sure struct sha512_state begins directly with the SHA512 + * 512-bit internal state, as this is what the asm functions expect. + */ BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0); kernel_fpu_begin(); - sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_xform); + sha512_base_do_update(desc, data, len, sha512_xform); kernel_fpu_end(); return 0; } static int sha512_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out, sha512_transform_fn *sha512_xform) + unsigned int len, u8 *out, sha512_block_fn *sha512_xform) { if (!crypto_simd_usable()) return crypto_sha512_finup(desc, data, len, out); kernel_fpu_begin(); if (len) - sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_xform); - sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_xform); + sha512_base_do_update(desc, data, len, sha512_xform); + sha512_base_do_finalize(desc, sha512_xform); kernel_fpu_end(); return sha512_base_finish(desc, out); @@ -144,8 +143,8 @@ static void unregister_sha512_ssse3(void) } #ifdef CONFIG_AS_AVX -asmlinkage void sha512_transform_avx(u64 *digest, const char *data, - u64 rounds); +asmlinkage void sha512_transform_avx(struct sha512_state *state, + const u8 *data, int blocks); static bool avx_usable(void) { if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { @@ -225,8 +224,8 @@ static inline void unregister_sha512_avx(void) { } #endif #if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX) -asmlinkage void sha512_transform_rorx(u64 *digest, const char *data, - u64 rounds); +asmlinkage void sha512_transform_rorx(struct sha512_state *state, + const u8 *data, int blocks); static int sha512_avx2_update(struct shash_desc *desc, const u8 *data, unsigned int len) diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index d561c821788b..2dbc8ce3730e 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -22,20 +22,17 @@ #define TWOFISH_PARALLEL_BLOCKS 8 /* 8-way parallel cipher functions */ -asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void twofish_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void twofish_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void twofish_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void twofish_ctr_8way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); -asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void twofish_xts_enc_8way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); +asmlinkage void twofish_xts_dec_8way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) @@ -43,22 +40,19 @@ static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, return twofish_setkey(&tfm->base, key, keylen); } -static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src) +static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) { __twofish_enc_blk_3way(ctx, dst, src, false); } -static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void twofish_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, - GLUE_FUNC_CAST(twofish_enc_blk)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, twofish_enc_blk); } -static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void twofish_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, - GLUE_FUNC_CAST(twofish_dec_blk)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, twofish_dec_blk); } struct twofish_xts_ctx { @@ -70,7 +64,6 @@ static int xts_twofish_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - u32 *flags = &tfm->base.crt_flags; int err; err = xts_verify_key(tfm, key, keylen); @@ -78,13 +71,12 @@ static int xts_twofish_setkey(struct crypto_skcipher *tfm, const u8 *key, return err; /* first half of xts-key is for crypt */ - err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); + err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2); if (err) return err; /* second half of xts-key is for tweak */ - return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, - flags); + return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); } static const struct common_glue_ctx twofish_enc = { @@ -93,13 +85,13 @@ static const struct common_glue_ctx twofish_enc = { .funcs = { { .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) } + .fn_u = { .ecb = twofish_ecb_enc_8way } }, { .num_blocks = 3, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } + .fn_u = { .ecb = twofish_enc_blk_3way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } + .fn_u = { .ecb = twofish_enc_blk } } } }; @@ -109,13 +101,13 @@ static const struct common_glue_ctx twofish_ctr = { .funcs = { { .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) } + .fn_u = { .ctr = twofish_ctr_8way } }, { .num_blocks = 3, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) } + .fn_u = { .ctr = twofish_enc_blk_ctr_3way } }, { .num_blocks = 1, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) } + .fn_u = { .ctr = twofish_enc_blk_ctr } } } }; @@ -125,10 +117,10 @@ static const struct common_glue_ctx twofish_enc_xts = { .funcs = { { .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) } + .fn_u = { .xts = twofish_xts_enc_8way } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) } + .fn_u = { .xts = twofish_xts_enc } } } }; @@ -138,13 +130,13 @@ static const struct common_glue_ctx twofish_dec = { .funcs = { { .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) } + .fn_u = { .ecb = twofish_ecb_dec_8way } }, { .num_blocks = 3, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } + .fn_u = { .ecb = twofish_dec_blk_3way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } + .fn_u = { .ecb = twofish_dec_blk } } } }; @@ -154,13 +146,13 @@ static const struct common_glue_ctx twofish_dec_cbc = { .funcs = { { .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) } + .fn_u = { .cbc = twofish_cbc_dec_8way } }, { .num_blocks = 3, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } + .fn_u = { .cbc = twofish_dec_blk_cbc_3way } }, { .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } + .fn_u = { .cbc = twofish_dec_blk } } } }; @@ -170,10 +162,10 @@ static const struct common_glue_ctx twofish_dec_xts = { .funcs = { { .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) } + .fn_u = { .xts = twofish_xts_dec_8way } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) } + .fn_u = { .xts = twofish_xts_dec } } } }; @@ -189,8 +181,7 @@ static int ecb_decrypt(struct skcipher_request *req) static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk), - req); + return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req); } static int cbc_decrypt(struct skcipher_request *req) @@ -208,8 +199,7 @@ static int xts_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&twofish_enc_xts, req, - XTS_TWEAK_CAST(twofish_enc_blk), + return glue_xts_req_128bit(&twofish_enc_xts, req, twofish_enc_blk, &ctx->tweak_ctx, &ctx->crypt_ctx, false); } @@ -218,8 +208,7 @@ static int xts_decrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&twofish_dec_xts, req, - XTS_TWEAK_CAST(twofish_enc_blk), + return glue_xts_req_128bit(&twofish_dec_xts, req, twofish_enc_blk, &ctx->tweak_ctx, &ctx->crypt_ctx, true); } diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 1dc9e29f221e..768af6075479 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -25,21 +25,22 @@ static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, return twofish_setkey(&tfm->base, key, keylen); } -static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src) +static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) { __twofish_enc_blk_3way(ctx, dst, src, false); } -static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst, +static inline void twofish_enc_blk_xor_3way(const void *ctx, u8 *dst, const u8 *src) { __twofish_enc_blk_3way(ctx, dst, src, true); } -void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src) +void twofish_dec_blk_cbc_3way(const void *ctx, u8 *d, const u8 *s) { u128 ivs[2]; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; ivs[0] = src[0]; ivs[1] = src[1]; @@ -51,9 +52,11 @@ void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src) } EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); -void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) +void twofish_enc_blk_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblk; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; if (dst != src) *dst = *src; @@ -66,10 +69,11 @@ void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) } EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr); -void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, - le128 *iv) +void twofish_enc_blk_ctr_3way(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblks[3]; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; if (dst != src) { dst[0] = src[0]; @@ -94,10 +98,10 @@ static const struct common_glue_ctx twofish_enc = { .funcs = { { .num_blocks = 3, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } + .fn_u = { .ecb = twofish_enc_blk_3way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } + .fn_u = { .ecb = twofish_enc_blk } } } }; @@ -107,10 +111,10 @@ static const struct common_glue_ctx twofish_ctr = { .funcs = { { .num_blocks = 3, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) } + .fn_u = { .ctr = twofish_enc_blk_ctr_3way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) } + .fn_u = { .ctr = twofish_enc_blk_ctr } } } }; @@ -120,10 +124,10 @@ static const struct common_glue_ctx twofish_dec = { .funcs = { { .num_blocks = 3, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } + .fn_u = { .ecb = twofish_dec_blk_3way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } + .fn_u = { .ecb = twofish_dec_blk } } } }; @@ -133,10 +137,10 @@ static const struct common_glue_ctx twofish_dec_cbc = { .funcs = { { .num_blocks = 3, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } + .fn_u = { .cbc = twofish_dec_blk_cbc_3way } }, { .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } + .fn_u = { .cbc = twofish_dec_blk } } } }; @@ -152,8 +156,7 @@ static int ecb_decrypt(struct skcipher_request *req) static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk), - req); + return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req); } static int cbc_decrypt(struct skcipher_request *req) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 76942cbd95a1..f2bb91e87877 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1728,7 +1728,7 @@ SYM_CODE_END(nmi) SYM_CODE_START(ignore_sysret) UNWIND_HINT_EMPTY mov $-ENOSYS, %eax - sysret + sysretl SYM_CODE_END(ignore_sysret) #endif diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 15908eb9b17e..c17cb77eb150 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -440,3 +440,5 @@ 433 i386 fspick sys_fspick __ia32_sys_fspick 434 i386 pidfd_open sys_pidfd_open __ia32_sys_pidfd_open 435 i386 clone3 sys_clone3 __ia32_sys_clone3 +437 i386 openat2 sys_openat2 __ia32_sys_openat2 +438 i386 pidfd_getfd sys_pidfd_getfd __ia32_sys_pidfd_getfd diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index c29976eca4a8..44d510bc9b78 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -357,6 +357,8 @@ 433 common fspick __x64_sys_fspick 434 common pidfd_open __x64_sys_pidfd_open 435 common clone3 __x64_sys_clone3/ptregs +437 common openat2 __x64_sys_openat2 +438 common pidfd_getfd __x64_sys_pidfd_getfd # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S index 93c6dc7812d0..ea7e0155c604 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/vdso-layout.lds.S @@ -16,18 +16,23 @@ SECTIONS * segment. */ - vvar_start = . - 3 * PAGE_SIZE; - vvar_page = vvar_start; + vvar_start = . - 4 * PAGE_SIZE; + vvar_page = vvar_start; /* Place all vvars at the offsets in asm/vvar.h. */ #define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset; -#define __VVAR_KERNEL_LDS #include <asm/vvar.h> -#undef __VVAR_KERNEL_LDS #undef EMIT_VVAR pvclock_page = vvar_start + PAGE_SIZE; hvclock_page = vvar_start + 2 * PAGE_SIZE; + timens_page = vvar_start + 3 * PAGE_SIZE; + +#undef _ASM_X86_VVAR_H + /* Place all vvars in timens too at the offsets in asm/vvar.h. */ +#define EMIT_VVAR(name, offset) timens_ ## name = timens_page + offset; +#include <asm/vvar.h> +#undef EMIT_VVAR . = SIZEOF_HEADERS; diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c index 3a4d8d4d39f8..3842873b3ae3 100644 --- a/arch/x86/entry/vdso/vdso2c.c +++ b/arch/x86/entry/vdso/vdso2c.c @@ -75,12 +75,14 @@ enum { sym_vvar_page, sym_pvclock_page, sym_hvclock_page, + sym_timens_page, }; const int special_pages[] = { sym_vvar_page, sym_pvclock_page, sym_hvclock_page, + sym_timens_page, }; struct vdso_sym { @@ -93,6 +95,7 @@ struct vdso_sym required_syms[] = { [sym_vvar_page] = {"vvar_page", true}, [sym_pvclock_page] = {"pvclock_page", true}, [sym_hvclock_page] = {"hvclock_page", true}, + [sym_timens_page] = {"timens_page", true}, {"VDSO32_NOTE_MASK", true}, {"__kernel_vsyscall", true}, {"__kernel_sigreturn", true}, diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c index 240626e7f55a..43842fade8fa 100644 --- a/arch/x86/entry/vdso/vdso32-setup.c +++ b/arch/x86/entry/vdso/vdso32-setup.c @@ -11,6 +11,7 @@ #include <linux/smp.h> #include <linux/kernel.h> #include <linux/mm_types.h> +#include <linux/elf.h> #include <asm/processor.h> #include <asm/vdso.h> diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index f5937742b290..c1b8496b5606 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -14,16 +14,30 @@ #include <linux/elf.h> #include <linux/cpu.h> #include <linux/ptrace.h> +#include <linux/time_namespace.h> + #include <asm/pvclock.h> #include <asm/vgtod.h> #include <asm/proto.h> #include <asm/vdso.h> #include <asm/vvar.h> +#include <asm/tlb.h> #include <asm/page.h> #include <asm/desc.h> #include <asm/cpufeature.h> #include <clocksource/hyperv_timer.h> +#undef _ASM_X86_VVAR_H +#define EMIT_VVAR(name, offset) \ + const size_t name ## _offset = offset; +#include <asm/vvar.h> + +struct vdso_data *arch_get_vdso_data(void *vvar_page) +{ + return (struct vdso_data *)(vvar_page + _vdso_data_offset); +} +#undef EMIT_VVAR + #if defined(CONFIG_X86_64) unsigned int __read_mostly vdso64_enabled = 1; #endif @@ -37,6 +51,7 @@ void __init init_vdso_image(const struct vdso_image *image) image->alt_len)); } +static const struct vm_special_mapping vvar_mapping; struct linux_binprm; static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, @@ -84,10 +99,74 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } +static int vvar_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + const struct vdso_image *image = new_vma->vm_mm->context.vdso_image; + unsigned long new_size = new_vma->vm_end - new_vma->vm_start; + + if (new_size != -image->sym_vvar_start) + return -EINVAL; + + return 0; +} + +#ifdef CONFIG_TIME_NS +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + if (likely(vma->vm_mm == current->mm)) + return current->nsproxy->time_ns->vvar_page; + + /* + * VM_PFNMAP | VM_IO protect .fault() handler from being called + * through interfaces like /proc/$pid/mem or + * process_vm_{readv,writev}() as long as there's no .access() + * in special_mapping_vmops(). + * For more details check_vma_flags() and __access_remote_vm() + */ + + WARN(1, "vvar_page accessed remotely"); + + return NULL; +} + +/* + * The vvar page layout depends on whether a task belongs to the root or + * non-root time namespace. Whenever a task changes its namespace, the VVAR + * page tables are cleared and then they will re-faulted with a + * corresponding layout. + * See also the comment near timens_setup_vdso_data() for details. + */ +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + + if (down_write_killable(&mm->mmap_sem)) + return -EINTR; + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + unsigned long size = vma->vm_end - vma->vm_start; + + if (vma_is_special_mapping(vma, &vvar_mapping)) + zap_page_range(vma, vma->vm_start, size); + } + + up_write(&mm->mmap_sem); + return 0; +} +#else +static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} +#endif + static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { const struct vdso_image *image = vma->vm_mm->context.vdso_image; + unsigned long pfn; long sym_offset; if (!image) @@ -107,8 +186,36 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, return VM_FAULT_SIGBUS; if (sym_offset == image->sym_vvar_page) { - return vmf_insert_pfn(vma, vmf->address, - __pa_symbol(&__vvar_page) >> PAGE_SHIFT); + struct page *timens_page = find_timens_vvar_page(vma); + + pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT; + + /* + * If a task belongs to a time namespace then a namespace + * specific VVAR is mapped with the sym_vvar_page offset and + * the real VVAR page is mapped with the sym_timens_page + * offset. + * See also the comment near timens_setup_vdso_data(). + */ + if (timens_page) { + unsigned long addr; + vm_fault_t err; + + /* + * Optimization: inside time namespace pre-fault + * VVAR page too. As on timens page there are only + * offsets for clocks on VVAR, it'll be faulted + * shortly by VDSO code. + */ + addr = vmf->address + (image->sym_timens_page - sym_offset); + err = vmf_insert_pfn(vma, addr, pfn); + if (unlikely(err & VM_FAULT_ERROR)) + return err; + + pfn = page_to_pfn(timens_page); + } + + return vmf_insert_pfn(vma, vmf->address, pfn); } else if (sym_offset == image->sym_pvclock_page) { struct pvclock_vsyscall_time_info *pvti = pvclock_get_pvti_cpu0_va(); @@ -123,6 +230,14 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK)) return vmf_insert_pfn(vma, vmf->address, virt_to_phys(tsc_pg) >> PAGE_SHIFT); + } else if (sym_offset == image->sym_timens_page) { + struct page *timens_page = find_timens_vvar_page(vma); + + if (!timens_page) + return VM_FAULT_SIGBUS; + + pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT; + return vmf_insert_pfn(vma, vmf->address, pfn); } return VM_FAULT_SIGBUS; @@ -136,6 +251,7 @@ static const struct vm_special_mapping vdso_mapping = { static const struct vm_special_mapping vvar_mapping = { .name = "[vvar]", .fault = vvar_fault, + .mremap = vvar_mremap, }; /* diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index a7752cd78b89..1f22b6bbda68 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -14,6 +14,10 @@ static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp); static unsigned long perf_nmi_window; +/* AMD Event 0xFFF: Merge. Used with Large Increment per Cycle events */ +#define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL) +#define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE) + static __initconst const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -301,6 +305,25 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel) return offset; } +/* + * AMD64 events are detected based on their event codes. + */ +static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) +{ + return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); +} + +static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc) +{ + if (!(x86_pmu.flags & PMU_FL_PAIR)) + return false; + + switch (amd_get_event_code(hwc)) { + case 0x003: return true; /* Retired SSE/AVX FLOPs */ + default: return false; + } +} + static int amd_core_hw_config(struct perf_event *event) { if (event->attr.exclude_host && event->attr.exclude_guest) @@ -316,15 +339,10 @@ static int amd_core_hw_config(struct perf_event *event) else if (event->attr.exclude_guest) event->hw.config |= AMD64_EVENTSEL_HOSTONLY; - return 0; -} + if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw)) + event->hw.flags |= PERF_X86_EVENT_PAIR; -/* - * AMD64 events are detected based on their event codes. - */ -static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) -{ - return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); + return 0; } static inline int amd_is_nb_event(struct hw_perf_event *hwc) @@ -855,6 +873,29 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx, } } +static struct event_constraint pair_constraint; + +static struct event_constraint * +amd_get_event_constraints_f17h(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (amd_is_pair_event_code(hwc)) + return &pair_constraint; + + return &unconstrained; +} + +static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (is_counter_pair(hwc)) + --cpuc->n_pair; +} + static ssize_t amd_event_sysfs_show(char *page, u64 config) { u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) | @@ -898,33 +939,15 @@ static __initconst const struct x86_pmu amd_pmu = { static int __init amd_core_pmu_init(void) { + u64 even_ctr_mask = 0ULL; + int i; + if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) return 0; - /* Avoid calulating the value each time in the NMI handler */ + /* Avoid calculating the value each time in the NMI handler */ perf_nmi_window = msecs_to_jiffies(100); - switch (boot_cpu_data.x86) { - case 0x15: - pr_cont("Fam15h "); - x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; - break; - case 0x17: - pr_cont("Fam17h "); - /* - * In family 17h, there are no event constraints in the PMC hardware. - * We fallback to using default amd_get_event_constraints. - */ - break; - case 0x18: - pr_cont("Fam18h "); - /* Using default amd_get_event_constraints. */ - break; - default: - pr_err("core perfctr but no constraints; unknown hardware!\n"); - return -ENODEV; - } - /* * If core performance counter extensions exists, we must use * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also @@ -939,6 +962,32 @@ static int __init amd_core_pmu_init(void) */ x86_pmu.amd_nb_constraints = 0; + if (boot_cpu_data.x86 == 0x15) { + pr_cont("Fam15h "); + x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; + } + if (boot_cpu_data.x86 >= 0x17) { + pr_cont("Fam17h+ "); + /* + * Family 17h and compatibles have constraints for Large + * Increment per Cycle events: they may only be assigned an + * even numbered counter that has a consecutive adjacent odd + * numbered counter following it. + */ + for (i = 0; i < x86_pmu.num_counters - 1; i += 2) + even_ctr_mask |= 1 << i; + + pair_constraint = (struct event_constraint) + __EVENT_CONSTRAINT(0, even_ctr_mask, 0, + x86_pmu.num_counters / 2, 0, + PERF_X86_EVENT_PAIR); + + x86_pmu.get_event_constraints = amd_get_event_constraints_f17h; + x86_pmu.put_event_constraints = amd_put_event_constraints_f17h; + x86_pmu.perf_ctr_pair_en = AMD_MERGE_EVENT_ENABLE; + x86_pmu.flags |= PMU_FL_PAIR; + } + pr_cont("core perfctr, "); return 0; } diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 9a89d98c55bd..3bb738f5a472 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -376,7 +376,7 @@ int x86_add_exclusive(unsigned int what) * LBR and BTS are still mutually exclusive. */ if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) - return 0; + goto out; if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) { mutex_lock(&pmc_reserve_mutex); @@ -388,6 +388,7 @@ int x86_add_exclusive(unsigned int what) mutex_unlock(&pmc_reserve_mutex); } +out: atomic_inc(&active_events); return 0; @@ -398,11 +399,15 @@ fail_unlock: void x86_del_exclusive(unsigned int what) { + atomic_dec(&active_events); + + /* + * See the comment in x86_add_exclusive(). + */ if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) return; atomic_dec(&x86_pmu.lbr_exclusive[what]); - atomic_dec(&active_events); } int x86_setup_perfctr(struct perf_event *event) @@ -613,6 +618,7 @@ void x86_pmu_disable_all(void) int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { + struct hw_perf_event *hwc = &cpuc->events[idx]->hw; u64 val; if (!test_bit(idx, cpuc->active_mask)) @@ -622,6 +628,8 @@ void x86_pmu_disable_all(void) continue; val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; wrmsrl(x86_pmu_config_addr(idx), val); + if (is_counter_pair(hwc)) + wrmsrl(x86_pmu_config_addr(idx + 1), 0); } } @@ -694,7 +702,7 @@ struct sched_state { int counter; /* counter index */ int unassigned; /* number of events to be assigned left */ int nr_gp; /* number of GP counters used */ - unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + u64 used; }; /* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */ @@ -751,8 +759,12 @@ static bool perf_sched_restore_state(struct perf_sched *sched) sched->saved_states--; sched->state = sched->saved[sched->saved_states]; - /* continue with next counter: */ - clear_bit(sched->state.counter++, sched->state.used); + /* this assignment didn't work out */ + /* XXX broken vs EVENT_PAIR */ + sched->state.used &= ~BIT_ULL(sched->state.counter); + + /* try the next one */ + sched->state.counter++; return true; } @@ -777,20 +789,32 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { idx = INTEL_PMC_IDX_FIXED; for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) { - if (!__test_and_set_bit(idx, sched->state.used)) - goto done; + u64 mask = BIT_ULL(idx); + + if (sched->state.used & mask) + continue; + + sched->state.used |= mask; + goto done; } } /* Grab the first unused counter starting with idx */ idx = sched->state.counter; for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) { - if (!__test_and_set_bit(idx, sched->state.used)) { - if (sched->state.nr_gp++ >= sched->max_gp) - return false; + u64 mask = BIT_ULL(idx); - goto done; - } + if (c->flags & PERF_X86_EVENT_PAIR) + mask |= mask << 1; + + if (sched->state.used & mask) + continue; + + if (sched->state.nr_gp++ >= sched->max_gp) + return false; + + sched->state.used |= mask; + goto done; } return false; @@ -867,12 +891,10 @@ EXPORT_SYMBOL_GPL(perf_assign_events); int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { struct event_constraint *c; - unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; struct perf_event *e; int n0, i, wmin, wmax, unsched = 0; struct hw_perf_event *hwc; - - bitmap_zero(used_mask, X86_PMC_IDX_MAX); + u64 used_mask = 0; /* * Compute the number of events already present; see x86_pmu_add(), @@ -915,6 +937,8 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) * fastpath, try to reuse previous register */ for (i = 0; i < n; i++) { + u64 mask; + hwc = &cpuc->event_list[i]->hw; c = cpuc->event_constraint[i]; @@ -926,11 +950,16 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (!test_bit(hwc->idx, c->idxmsk)) break; + mask = BIT_ULL(hwc->idx); + if (is_counter_pair(hwc)) + mask |= mask << 1; + /* not already used */ - if (test_bit(hwc->idx, used_mask)) + if (used_mask & mask) break; - __set_bit(hwc->idx, used_mask); + used_mask |= mask; + if (assign) assign[i] = hwc->idx; } @@ -953,6 +982,15 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) READ_ONCE(cpuc->excl_cntrs->exclusive_present)) gpmax /= 2; + /* + * Reduce the amount of available counters to allow fitting + * the extra Merge events needed by large increment events. + */ + if (x86_pmu.flags & PMU_FL_PAIR) { + gpmax = x86_pmu.num_counters - cpuc->n_pair; + WARN_ON(gpmax <= 0); + } + unsched = perf_assign_events(cpuc->event_constraint, n, wmin, wmax, gpmax, assign); } @@ -1033,6 +1071,8 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, return -EINVAL; cpuc->event_list[n] = leader; n++; + if (is_counter_pair(&leader->hw)) + cpuc->n_pair++; } if (!dogrp) return n; @@ -1047,6 +1087,8 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, cpuc->event_list[n] = event; n++; + if (is_counter_pair(&event->hw)) + cpuc->n_pair++; } return n; } @@ -1233,6 +1275,13 @@ int x86_perf_event_set_period(struct perf_event *event) wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); /* + * Clear the Merge event counter's upper 16 bits since + * we currently declare a 48-bit counter width + */ + if (is_counter_pair(hwc)) + wrmsrl(x86_pmu_event_addr(idx + 1), 0); + + /* * Due to erratum on certan cpu we need * a second write to be sure the register * is updated properly @@ -1642,9 +1691,12 @@ static struct attribute_group x86_pmu_format_group __ro_after_init = { ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) { - struct perf_pmu_events_attr *pmu_attr = \ + struct perf_pmu_events_attr *pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); - u64 config = x86_pmu.event_map(pmu_attr->id); + u64 config = 0; + + if (pmu_attr->id < x86_pmu.max_events) + config = x86_pmu.event_map(pmu_attr->id); /* string trumps id */ if (pmu_attr->event_str) @@ -1713,6 +1765,9 @@ is_visible(struct kobject *kobj, struct attribute *attr, int idx) { struct perf_pmu_events_attr *pmu_attr; + if (idx >= x86_pmu.max_events) + return 0; + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); /* str trumps id */ return pmu_attr->event_str || x86_pmu.event_map(idx) ? attr->mode : 0; diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 38de4a7f6752..6a3b599ee0fe 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -63,9 +63,17 @@ struct bts_buffer { static struct pmu bts_pmu; +static int buf_nr_pages(struct page *page) +{ + if (!PagePrivate(page)) + return 1; + + return 1 << page_private(page); +} + static size_t buf_size(struct page *page) { - return 1 << (PAGE_SHIFT + page_private(page)); + return buf_nr_pages(page) * PAGE_SIZE; } static void * @@ -83,9 +91,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages, /* count all the high order buffers */ for (pg = 0, nbuf = 0; pg < nr_pages;) { page = virt_to_page(pages[pg]); - if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1)) - return NULL; - pg += 1 << page_private(page); + pg += buf_nr_pages(page); nbuf++; } @@ -109,7 +115,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages, unsigned int __nr_pages; page = virt_to_page(pages[pg]); - __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1; + __nr_pages = buf_nr_pages(page); buf->buf[nbuf].page = page; buf->buf[nbuf].offset = offset; buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index ce83950036c5..4b94ae4ae369 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -7,6 +7,7 @@ #include <asm/perf_event.h> #include <asm/tlbflush.h> #include <asm/insn.h> +#include <asm/io.h> #include "../perf_event.h" diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 5053a403e4ae..09913121e726 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -741,6 +741,8 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, model_hsw), X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_L, model_skl), X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_COMETLAKE_L, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_COMETLAKE, model_skl), {}, }; diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index dbaa1b088a30..c37cb12d0ef6 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -15,6 +15,7 @@ #define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 #define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f #define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f +#define PCI_DEVICE_ID_INTEL_SKL_E3_IMC 0x1918 #define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c #define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 #define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 @@ -658,6 +659,10 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_E3_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, @@ -826,6 +831,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */ IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */ IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */ + IMC_DEV(SKL_E3_IMC, &skl_uncore_pci_driver), /* Xeon E3 V5 Gen Core processor */ IMC_DEV(KBL_Y_IMC, &skl_uncore_pci_driver), /* 7th Gen Core Y */ IMC_DEV(KBL_U_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U */ IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */ diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index b10a5ec79e48..ad20220af303 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -369,11 +369,6 @@ #define SNR_M2M_PCI_PMON_BOX_CTL 0x438 #define SNR_M2M_PCI_PMON_UMASK_EXT 0xff -/* SNR PCIE3 */ -#define SNR_PCIE3_PCI_PMON_CTL0 0x508 -#define SNR_PCIE3_PCI_PMON_CTR0 0x4e8 -#define SNR_PCIE3_PCI_PMON_BOX_CTL 0x4e4 - /* SNR IMC */ #define SNR_IMC_MMIO_PMON_FIXED_CTL 0x54 #define SNR_IMC_MMIO_PMON_FIXED_CTR 0x38 @@ -4328,27 +4323,12 @@ static struct intel_uncore_type snr_uncore_m2m = { .format_group = &snr_m2m_uncore_format_group, }; -static struct intel_uncore_type snr_uncore_pcie3 = { - .name = "pcie3", - .num_counters = 4, - .num_boxes = 1, - .perf_ctr_bits = 48, - .perf_ctr = SNR_PCIE3_PCI_PMON_CTR0, - .event_ctl = SNR_PCIE3_PCI_PMON_CTL0, - .event_mask = SNBEP_PMON_RAW_EVENT_MASK, - .box_ctl = SNR_PCIE3_PCI_PMON_BOX_CTL, - .ops = &ivbep_uncore_pci_ops, - .format_group = &ivbep_uncore_format_group, -}; - enum { SNR_PCI_UNCORE_M2M, - SNR_PCI_UNCORE_PCIE3, }; static struct intel_uncore_type *snr_pci_uncores[] = { [SNR_PCI_UNCORE_M2M] = &snr_uncore_m2m, - [SNR_PCI_UNCORE_PCIE3] = &snr_uncore_pcie3, NULL, }; @@ -4357,10 +4337,6 @@ static const struct pci_device_id snr_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), .driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 0, SNR_PCI_UNCORE_M2M, 0), }, - { /* PCIe3 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x334a), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 0, SNR_PCI_UNCORE_PCIE3, 0), - }, { /* end: all zeroes */ } }; @@ -4536,6 +4512,7 @@ static struct uncore_event_desc snr_uncore_imc_freerunning_events[] = { INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"), INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"), INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"), + { /* end: all zeroes */ }, }; static struct intel_uncore_ops snr_uncore_imc_freerunning_ops = { diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 930611db8f9a..f1cd1ca1a77b 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -77,6 +77,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode) #define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */ #define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */ #define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */ +#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */ struct amd_nb { int nb_id; /* NorthBridge id */ @@ -272,6 +273,7 @@ struct cpu_hw_events { struct amd_nb *amd_nb; /* Inverted mask of bits to clear in the perf_ctr ctrl registers */ u64 perf_ctr_virt_mask; + int n_pair; /* Large increment events */ void *kfree_on_online[X86_PERF_KFREE_MAX]; }; @@ -694,6 +696,7 @@ struct x86_pmu { * AMD bits */ unsigned int amd_nb_constraints : 1; + u64 perf_ctr_pair_en; /* * Extra registers for events @@ -743,6 +746,7 @@ do { \ #define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */ #define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */ #define PMU_FL_TFA 0x20 /* deal with TSX force abort */ +#define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr @@ -838,6 +842,11 @@ int x86_pmu_hw_config(struct perf_event *event); void x86_pmu_disable_all(void); +static inline bool is_counter_pair(struct hw_perf_event *hwc) +{ + return hwc->flags & PERF_X86_EVENT_PAIR; +} + static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, u64 enable_mask) { @@ -845,6 +854,14 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, if (hwc->extra_reg.reg) wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); + + /* + * Add enabled Merge event on next counter + * if large increment event being enabled on this counter + */ + if (is_counter_pair(hwc)) + wrmsrl(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en); + wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask); } @@ -861,6 +878,9 @@ static inline void x86_pmu_disable_event(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; wrmsrl(hwc->config_base, hwc->config); + + if (is_counter_pair(hwc)) + wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0); } void x86_pmu_enable_event(struct perf_event *event); diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 30416d7f19d4..a3aefe9b9401 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -114,8 +114,6 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, err |= fpu__restore_sig(buf, 1); - force_iret(); - return err; } diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index bc9693c9107e..ca0976456a6b 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -13,7 +13,6 @@ #include <asm/processor.h> #include <asm/mmu.h> #include <asm/mpspec.h> -#include <asm/realmode.h> #include <asm/x86_init.h> #ifdef CONFIG_ACPI_APEI @@ -62,7 +61,7 @@ static inline void acpi_disable_pci(void) extern int (*acpi_suspend_lowlevel)(void); /* Physical address to resume after wakeup */ -#define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start)) +unsigned long acpi_get_wakeup_address(void); /* * Check if the CPU can handle C2 and deeper diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index af45e1452f09..7a4bb1bd4bdb 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -27,7 +27,7 @@ /* Unconditional execution of RDRAND and RDSEED */ -static inline bool rdrand_long(unsigned long *v) +static inline bool __must_check rdrand_long(unsigned long *v) { bool ok; unsigned int retry = RDRAND_RETRY_LOOPS; @@ -41,7 +41,7 @@ static inline bool rdrand_long(unsigned long *v) return false; } -static inline bool rdrand_int(unsigned int *v) +static inline bool __must_check rdrand_int(unsigned int *v) { bool ok; unsigned int retry = RDRAND_RETRY_LOOPS; @@ -55,7 +55,7 @@ static inline bool rdrand_int(unsigned int *v) return false; } -static inline bool rdseed_long(unsigned long *v) +static inline bool __must_check rdseed_long(unsigned long *v) { bool ok; asm volatile(RDSEED_LONG @@ -64,7 +64,7 @@ static inline bool rdseed_long(unsigned long *v) return ok; } -static inline bool rdseed_int(unsigned int *v) +static inline bool __must_check rdseed_int(unsigned int *v) { bool ok; asm volatile(RDSEED_INT @@ -73,10 +73,6 @@ static inline bool rdseed_int(unsigned int *v) return ok; } -/* Conditional execution based on CPU type */ -#define arch_has_random() static_cpu_has(X86_FEATURE_RDRAND) -#define arch_has_random_seed() static_cpu_has(X86_FEATURE_RDSEED) - /* * These are the generic interfaces; they must not be declared if the * stubs in <linux/random.h> are to be invoked, @@ -84,24 +80,24 @@ static inline bool rdseed_int(unsigned int *v) */ #ifdef CONFIG_ARCH_RANDOM -static inline bool arch_get_random_long(unsigned long *v) +static inline bool __must_check arch_get_random_long(unsigned long *v) { - return arch_has_random() ? rdrand_long(v) : false; + return static_cpu_has(X86_FEATURE_RDRAND) ? rdrand_long(v) : false; } -static inline bool arch_get_random_int(unsigned int *v) +static inline bool __must_check arch_get_random_int(unsigned int *v) { - return arch_has_random() ? rdrand_int(v) : false; + return static_cpu_has(X86_FEATURE_RDRAND) ? rdrand_int(v) : false; } -static inline bool arch_get_random_seed_long(unsigned long *v) +static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { - return arch_has_random_seed() ? rdseed_long(v) : false; + return static_cpu_has(X86_FEATURE_RDSEED) ? rdseed_long(v) : false; } -static inline bool arch_get_random_seed_int(unsigned int *v) +static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { - return arch_has_random_seed() ? rdseed_int(v) : false; + return static_cpu_has(X86_FEATURE_RDSEED) ? rdseed_int(v) : false; } extern void x86_init_rdrand(struct cpuinfo_x86 *c); diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h index 794eb2129bc6..92ae28389940 100644 --- a/arch/x86/include/asm/bugs.h +++ b/arch/x86/include/asm/bugs.h @@ -6,12 +6,6 @@ extern void check_bugs(void); -#if defined(CONFIG_CPU_SUP_INTEL) -void check_mpx_erratum(struct cpuinfo_x86 *c); -#else -static inline void check_mpx_erratum(struct cpuinfo_x86 *c) {} -#endif - #if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32) int ppro_with_ram_bug(void); #else diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 22c4dfe65992..52e9f3480f69 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -177,23 +177,6 @@ typedef struct user_regs_struct compat_elf_gregset_t; (!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)) #endif -/* - * A pointer passed in from user mode. This should not - * be used for syscall parameters, just declare them - * as pointers because the syscall entry code will have - * appropriately converted them already. - */ - -static inline void __user *compat_ptr(compat_uptr_t uptr) -{ - return (void __user *)(unsigned long)uptr; -} - -static inline compat_uptr_t ptr_to_compat(void __user *uptr) -{ - return (u32)(unsigned long)uptr; -} - static inline void __user *arch_compat_alloc_user_space(long len) { compat_uptr_t sp; diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 804734058c77..02c0078d3787 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -6,6 +6,7 @@ #include <linux/percpu-defs.h> #include <asm/processor.h> #include <asm/intel_ds.h> +#include <asm/pgtable_areas.h> #ifdef CONFIG_X86_64 @@ -134,15 +135,6 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); extern void setup_cpu_entry_areas(void); extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); -/* Single page reserved for the readonly IDT mapping: */ -#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE -#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) - -#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) - -#define CPU_ENTRY_AREA_MAP_SIZE \ - (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) - extern struct cpu_entry_area *get_cpu_entry_area(int cpu); static inline struct entry_stack *cpu_entry_stack(int cpu) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index e9b62498fe75..f3327cb56edf 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -220,6 +220,7 @@ #define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ +#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -357,6 +358,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */ #define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h index a5d86fc0593f..f6d91861cb14 100644 --- a/arch/x86/include/asm/crypto/camellia.h +++ b/arch/x86/include/asm/crypto/camellia.h @@ -26,71 +26,66 @@ struct camellia_xts_ctx { extern int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key, - unsigned int key_len, u32 *flags); + unsigned int key_len); extern int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); /* regular block cipher functions */ -asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src, + bool xor); +asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src); /* 2-way parallel cipher functions */ -asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src, + bool xor); +asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src); /* 16-way parallel cipher functions (avx/aes-ni) */ -asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); - -asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); - -asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); - -static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src) +asmlinkage void camellia_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void camellia_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src); + +asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); + +asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); +asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); + +static inline void camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src) { __camellia_enc_blk(ctx, dst, src, false); } -static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst, - const u8 *src) +static inline void camellia_enc_blk_xor(const void *ctx, u8 *dst, const u8 *src) { __camellia_enc_blk(ctx, dst, src, true); } -static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, +static inline void camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src) { __camellia_enc_blk_2way(ctx, dst, src, false); } -static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst, +static inline void camellia_enc_blk_xor_2way(const void *ctx, u8 *dst, const u8 *src) { __camellia_enc_blk_2way(ctx, dst, src, true); } /* glue helpers */ -extern void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src); -extern void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, +extern void camellia_decrypt_cbc_2way(const void *ctx, u8 *dst, const u8 *src); +extern void camellia_crypt_ctr(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, +extern void camellia_crypt_ctr_2way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -extern void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); +extern void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); +extern void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); #endif /* ASM_X86_CAMELLIA_H */ diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index 8d4a8e1226ee..777c0f63418c 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -11,18 +11,13 @@ #include <asm/fpu/api.h> #include <crypto/b128ops.h> -typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); -typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src); -typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src, +typedef void (*common_glue_func_t)(const void *ctx, u8 *dst, const u8 *src); +typedef void (*common_glue_cbc_func_t)(const void *ctx, u8 *dst, const u8 *src); +typedef void (*common_glue_ctr_func_t)(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -typedef void (*common_glue_xts_func_t)(void *ctx, u128 *dst, const u128 *src, +typedef void (*common_glue_xts_func_t)(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn)) -#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn)) -#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn)) -#define GLUE_XTS_FUNC_CAST(fn) ((common_glue_xts_func_t)(fn)) - struct common_glue_func_entry { unsigned int num_blocks; /* number of blocks that @fn will process */ union { @@ -116,7 +111,8 @@ extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx, common_glue_func_t tweak_fn, void *tweak_ctx, void *crypt_ctx, bool decrypt); -extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, - le128 *iv, common_glue_func_t fn); +extern void glue_xts_crypt_128bit_one(const void *ctx, u8 *dst, + const u8 *src, le128 *iv, + common_glue_func_t fn); #endif /* _CRYPTO_GLUE_HELPER_H */ diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h index db7c9cc32234..251c2c89d7cf 100644 --- a/arch/x86/include/asm/crypto/serpent-avx.h +++ b/arch/x86/include/asm/crypto/serpent-avx.h @@ -15,26 +15,26 @@ struct serpent_xts_ctx { struct serpent_ctx crypt_ctx; }; -asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); -asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_xts_enc_8way_avx(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_xts_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, +extern void __serpent_crypt_ctr(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); +extern void serpent_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv); +extern void serpent_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv); extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); diff --git a/arch/x86/include/asm/crypto/serpent-sse2.h b/arch/x86/include/asm/crypto/serpent-sse2.h index 1a345e8a7496..860ca248914b 100644 --- a/arch/x86/include/asm/crypto/serpent-sse2.h +++ b/arch/x86/include/asm/crypto/serpent-sse2.h @@ -9,25 +9,23 @@ #define SERPENT_PARALLEL_BLOCKS 4 -asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void __serpent_enc_blk_4way(const struct serpent_ctx *ctx, u8 *dst, const u8 *src, bool xor); -asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_dec_blk_4way(const struct serpent_ctx *ctx, u8 *dst, const u8 *src); -static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) +static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src) { __serpent_enc_blk_4way(ctx, dst, src, false); } -static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) +static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx, + u8 *dst, const u8 *src) { __serpent_enc_blk_4way(ctx, dst, src, true); } -static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) +static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src) { serpent_dec_blk_4way(ctx, dst, src); } @@ -36,25 +34,23 @@ static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, #define SERPENT_PARALLEL_BLOCKS 8 -asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void __serpent_enc_blk_8way(const struct serpent_ctx *ctx, u8 *dst, const u8 *src, bool xor); -asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_dec_blk_8way(const struct serpent_ctx *ctx, u8 *dst, const u8 *src); -static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) +static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src) { __serpent_enc_blk_8way(ctx, dst, src, false); } -static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) +static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx, + u8 *dst, const u8 *src) { __serpent_enc_blk_8way(ctx, dst, src, true); } -static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) +static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src) { serpent_dec_blk_8way(ctx, dst, src); } diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h index f618bf272b90..2c377a8042e1 100644 --- a/arch/x86/include/asm/crypto/twofish.h +++ b/arch/x86/include/asm/crypto/twofish.h @@ -7,22 +7,19 @@ #include <crypto/b128ops.h> /* regular block cipher functions from twofish_x86_64 module */ -asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void twofish_enc_blk(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void twofish_dec_blk(const void *ctx, u8 *dst, const u8 *src); /* 3-way parallel cipher functions */ -asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void __twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src, + bool xor); +asmlinkage void twofish_dec_blk_3way(const void *ctx, u8 *dst, const u8 *src); /* helpers from twofish_x86_64-3way module */ -extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); -extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, +extern void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src); +extern void twofish_enc_blk_ctr(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, +extern void twofish_enc_blk_ctr_3way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); #endif /* ASM_X86_TWOFISH_H */ diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index 5e12c63b47aa..7e31f7f1bb06 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -8,16 +8,6 @@ struct dev_archdata { #endif }; -#if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS) -struct dma_domain { - struct list_head node; - const struct dma_map_ops *dma_ops; - int domain_nr; -}; -void add_dma_domain(struct dma_domain *domain); -void del_dma_domain(struct dma_domain *domain); -#endif - struct pdev_archdata { }; diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 8e1d0bb46361..4ea8584682f9 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -10,12 +10,6 @@ * cpu_feature_enabled(). */ -#ifdef CONFIG_X86_INTEL_MPX -# define DISABLE_MPX 0 -#else -# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31)) -#endif - #ifdef CONFIG_X86_SMAP # define DISABLE_SMAP 0 #else @@ -74,7 +68,7 @@ #define DISABLED_MASK6 0 #define DISABLED_MASK7 (DISABLE_PTI) #define DISABLED_MASK8 0 -#define DISABLED_MASK9 (DISABLE_MPX|DISABLE_SMAP) +#define DISABLED_MASK9 (DISABLE_SMAP) #define DISABLED_MASK10 0 #define DISABLED_MASK11 0 #define DISABLED_MASK12 0 diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index d028e9acdf1c..86169a24b0d8 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -8,6 +8,7 @@ #include <asm/tlb.h> #include <asm/nospec-branch.h> #include <asm/mmu_context.h> +#include <linux/build_bug.h> /* * We map the EFI regions needed for runtime services non-contiguously, @@ -19,13 +20,16 @@ * This is the main reason why we're doing stable VA mappings for RT * services. * - * This flag is used in conjunction with a chicken bit called - * "efi=old_map" which can be used as a fallback to the old runtime - * services mapping method in case there's some b0rkage with a - * particular EFI implementation (haha, it is hard to hold up the - * sarcasm here...). + * SGI UV1 machines are known to be incompatible with this scheme, so we + * provide an opt-out for these machines via a DMI quirk that sets the + * attribute below. */ -#define EFI_OLD_MEMMAP EFI_ARCH_1 +#define EFI_UV1_MEMMAP EFI_ARCH_1 + +static inline bool efi_have_uv1_memmap(void) +{ + return IS_ENABLED(CONFIG_X86_UV) && efi_enabled(EFI_UV1_MEMMAP); +} #define EFI32_LOADER_SIGNATURE "EL32" #define EFI64_LOADER_SIGNATURE "EL64" @@ -34,10 +38,46 @@ #define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF -#ifdef CONFIG_X86_32 +/* + * The EFI services are called through variadic functions in many cases. These + * functions are implemented in assembler and support only a fixed number of + * arguments. The macros below allows us to check at build time that we don't + * try to call them with too many arguments. + * + * __efi_nargs() will return the number of arguments if it is 7 or less, and + * cause a BUILD_BUG otherwise. The limitations of the C preprocessor make it + * impossible to calculate the exact number of arguments beyond some + * pre-defined limit. The maximum number of arguments currently supported by + * any of the thunks is 7, so this is good enough for now and can be extended + * in the obvious way if we ever need more. + */ -extern asmlinkage unsigned long efi_call_phys(void *, ...); +#define __efi_nargs(...) __efi_nargs_(__VA_ARGS__) +#define __efi_nargs_(...) __efi_nargs__(0, ##__VA_ARGS__, \ + __efi_arg_sentinel(7), __efi_arg_sentinel(6), \ + __efi_arg_sentinel(5), __efi_arg_sentinel(4), \ + __efi_arg_sentinel(3), __efi_arg_sentinel(2), \ + __efi_arg_sentinel(1), __efi_arg_sentinel(0)) +#define __efi_nargs__(_0, _1, _2, _3, _4, _5, _6, _7, n, ...) \ + __take_second_arg(n, \ + ({ BUILD_BUG_ON_MSG(1, "__efi_nargs limit exceeded"); 8; })) +#define __efi_arg_sentinel(n) , n +/* + * __efi_nargs_check(f, n, ...) will cause a BUILD_BUG if the ellipsis + * represents more than n arguments. + */ + +#define __efi_nargs_check(f, n, ...) \ + __efi_nargs_check_(f, __efi_nargs(__VA_ARGS__), n) +#define __efi_nargs_check_(f, p, n) __efi_nargs_check__(f, p, n) +#define __efi_nargs_check__(f, p, n) ({ \ + BUILD_BUG_ON_MSG( \ + (p) > (n), \ + #f " called with too many arguments (" #p ">" #n ")"); \ +}) + +#ifdef CONFIG_X86_32 #define arch_efi_call_virt_setup() \ ({ \ kernel_fpu_begin(); \ @@ -51,13 +91,7 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...); }) -/* - * Wrap all the virtual calls in a way that forces the parameters on the stack. - */ -#define arch_efi_call_virt(p, f, args...) \ -({ \ - ((efi_##f##_t __attribute__((regparm(0)))*) p->f)(args); \ -}) +#define arch_efi_call_virt(p, f, args...) p->f(args) #define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size) @@ -65,9 +99,12 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...); #define EFI_LOADER_SIGNATURE "EL64" -extern asmlinkage u64 efi_call(void *fp, ...); +extern asmlinkage u64 __efi_call(void *fp, ...); -#define efi_call_phys(f, args...) efi_call((f), args) +#define efi_call(...) ({ \ + __efi_nargs_check(efi_call, 7, __VA_ARGS__); \ + __efi_call(__VA_ARGS__); \ +}) /* * struct efi_scratch - Scratch space used while switching to/from efi_mm @@ -85,7 +122,7 @@ struct efi_scratch { kernel_fpu_begin(); \ firmware_restrict_branch_speculation_start(); \ \ - if (!efi_enabled(EFI_OLD_MEMMAP)) \ + if (!efi_have_uv1_memmap()) \ efi_switch_mm(&efi_mm); \ }) @@ -94,7 +131,7 @@ struct efi_scratch { #define arch_efi_call_virt_teardown() \ ({ \ - if (!efi_enabled(EFI_OLD_MEMMAP)) \ + if (!efi_have_uv1_memmap()) \ efi_switch_mm(efi_scratch.prev_mm); \ \ firmware_restrict_branch_speculation_end(); \ @@ -121,8 +158,6 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, extern struct efi_scratch efi_scratch; extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable); extern int __init efi_memblock_x86_reserve_range(void); -extern pgd_t * __init efi_call_phys_prolog(void); -extern void __init efi_call_phys_epilog(pgd_t *save_pgd); extern void __init efi_print_memmap(void); extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); @@ -140,6 +175,8 @@ extern void efi_delete_dummy_variable(void); extern void efi_switch_mm(struct mm_struct *mm); extern void efi_recover_from_page_fault(unsigned long phys_addr); extern void efi_free_boot_services(void); +extern pgd_t * __init efi_uv1_memmap_phys_prolog(void); +extern void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd); struct efi_setup_data { u64 fw_vendor; @@ -152,93 +189,144 @@ struct efi_setup_data { extern u64 efi_setup; #ifdef CONFIG_EFI +extern efi_status_t __efi64_thunk(u32, ...); -static inline bool efi_is_native(void) +#define efi64_thunk(...) ({ \ + __efi_nargs_check(efi64_thunk, 6, __VA_ARGS__); \ + __efi64_thunk(__VA_ARGS__); \ +}) + +static inline bool efi_is_mixed(void) { - return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT); + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return false; + return IS_ENABLED(CONFIG_X86_64) && !efi_enabled(EFI_64BIT); } static inline bool efi_runtime_supported(void) { - if (efi_is_native()) - return true; - - if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_enabled(EFI_OLD_MEMMAP)) + if (IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT)) return true; - return false; + return IS_ENABLED(CONFIG_EFI_MIXED); } extern void parse_efi_setup(u64 phys_addr, u32 data_len); extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); -#ifdef CONFIG_EFI_MIXED extern void efi_thunk_runtime_setup(void); -extern efi_status_t efi_thunk_set_virtual_address_map( - void *phys_set_virtual_address_map, - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map); -#else -static inline void efi_thunk_runtime_setup(void) {} -static inline efi_status_t efi_thunk_set_virtual_address_map( - void *phys_set_virtual_address_map, - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) -{ - return EFI_SUCCESS; -} -#endif /* CONFIG_EFI_MIXED */ - +efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map); /* arch specific definitions used by the stub code */ -struct efi_config { - u64 image_handle; - u64 table; - u64 runtime_services; - u64 boot_services; - u64 text_output; - efi_status_t (*call)(unsigned long, ...); - bool is64; -} __packed; - -__pure const struct efi_config *__efi_early(void); +__attribute_const__ bool efi_is_64bit(void); -static inline bool efi_is_64bit(void) +static inline bool efi_is_native(void) { if (!IS_ENABLED(CONFIG_X86_64)) - return false; - + return true; if (!IS_ENABLED(CONFIG_EFI_MIXED)) return true; + return efi_is_64bit(); +} + +#define efi_mixed_mode_cast(attr) \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(u32, __typeof__(attr)), \ + (unsigned long)(attr), (attr)) - return __efi_early()->is64; +#define efi_table_attr(inst, attr) \ + (efi_is_native() \ + ? inst->attr \ + : (__typeof__(inst->attr)) \ + efi_mixed_mode_cast(inst->mixed_mode.attr)) + +/* + * The following macros allow translating arguments if necessary from native to + * mixed mode. The use case for this is to initialize the upper 32 bits of + * output parameters, and where the 32-bit method requires a 64-bit argument, + * which must be split up into two arguments to be thunked properly. + * + * As examples, the AllocatePool boot service returns the address of the + * allocation, but it will not set the high 32 bits of the address. To ensure + * that the full 64-bit address is initialized, we zero-init the address before + * calling the thunk. + * + * The FreePages boot service takes a 64-bit physical address even in 32-bit + * mode. For the thunk to work correctly, a native 64-bit call of + * free_pages(addr, size) + * must be translated to + * efi64_thunk(free_pages, addr & U32_MAX, addr >> 32, size) + * so that the two 32-bit halves of addr get pushed onto the stack separately. + */ + +static inline void *efi64_zero_upper(void *p) +{ + ((u32 *)p)[1] = 0; + return p; } -#define efi_table_attr(table, attr, instance) \ - (efi_is_64bit() ? \ - ((table##_64_t *)(unsigned long)instance)->attr : \ - ((table##_32_t *)(unsigned long)instance)->attr) +#define __efi64_argmap_free_pages(addr, size) \ + ((addr), 0, (size)) + +#define __efi64_argmap_get_memory_map(mm_size, mm, key, size, ver) \ + ((mm_size), (mm), efi64_zero_upper(key), efi64_zero_upper(size), (ver)) + +#define __efi64_argmap_allocate_pool(type, size, buffer) \ + ((type), (size), efi64_zero_upper(buffer)) -#define efi_call_proto(protocol, f, instance, ...) \ - __efi_early()->call(efi_table_attr(protocol, f, instance), \ - instance, ##__VA_ARGS__) +#define __efi64_argmap_handle_protocol(handle, protocol, interface) \ + ((handle), (protocol), efi64_zero_upper(interface)) -#define efi_call_early(f, ...) \ - __efi_early()->call(efi_table_attr(efi_boot_services, f, \ - __efi_early()->boot_services), __VA_ARGS__) +#define __efi64_argmap_locate_protocol(protocol, reg, interface) \ + ((protocol), (reg), efi64_zero_upper(interface)) -#define __efi_call_early(f, ...) \ - __efi_early()->call((unsigned long)f, __VA_ARGS__); +/* PCI I/O */ +#define __efi64_argmap_get_location(protocol, seg, bus, dev, func) \ + ((protocol), efi64_zero_upper(seg), efi64_zero_upper(bus), \ + efi64_zero_upper(dev), efi64_zero_upper(func)) + +/* + * The macros below handle the plumbing for the argument mapping. To add a + * mapping for a specific EFI method, simply define a macro + * __efi64_argmap_<method name>, following the examples above. + */ -#define efi_call_runtime(f, ...) \ - __efi_early()->call(efi_table_attr(efi_runtime_services, f, \ - __efi_early()->runtime_services), __VA_ARGS__) +#define __efi64_thunk_map(inst, func, ...) \ + efi64_thunk(inst->mixed_mode.func, \ + __efi64_argmap(__efi64_argmap_ ## func(__VA_ARGS__), \ + (__VA_ARGS__))) + +#define __efi64_argmap(mapped, args) \ + __PASTE(__efi64_argmap__, __efi_nargs(__efi_eat mapped))(mapped, args) +#define __efi64_argmap__0(mapped, args) __efi_eval mapped +#define __efi64_argmap__1(mapped, args) __efi_eval args + +#define __efi_eat(...) +#define __efi_eval(...) __VA_ARGS__ + +/* The three macros below handle dispatching via the thunk if needed */ + +#define efi_call_proto(inst, func, ...) \ + (efi_is_native() \ + ? inst->func(inst, ##__VA_ARGS__) \ + : __efi64_thunk_map(inst, func, inst, ##__VA_ARGS__)) + +#define efi_bs_call(func, ...) \ + (efi_is_native() \ + ? efi_system_table()->boottime->func(__VA_ARGS__) \ + : __efi64_thunk_map(efi_table_attr(efi_system_table(), \ + boottime), func, __VA_ARGS__)) + +#define efi_rt_call(func, ...) \ + (efi_is_native() \ + ? efi_system_table()->runtime->func(__VA_ARGS__) \ + : __efi64_thunk_map(efi_table_attr(efi_system_table(), \ + runtime), func, __VA_ARGS__)) extern bool efi_reboot_required(void); extern bool efi_is_table_address(unsigned long phys_addr); diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index c2a7458f912c..85be2f506272 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -47,8 +47,6 @@ struct dyn_arch_ftrace { /* No extra data needed for x86 */ }; -int ftrace_int3_handler(struct pt_regs *regs); - #define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR #endif /* CONFIG_DYNAMIC_FTRACE */ diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 5f10f7f2098d..92abc1e42bfc 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -809,7 +809,8 @@ union hv_synic_sint { u64 reserved1:8; u64 masked:1; u64 auto_eoi:1; - u64 reserved2:46; + u64 polling:1; + u64 reserved2:45; } __packed; }; diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index c606c0b70738..4981c293f926 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -111,6 +111,7 @@ #define INTEL_FAM6_ATOM_TREMONT_D 0x86 /* Jacobsville */ #define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */ +#define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */ /* Xeon Phi */ diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h index 9e7adcdbe031..e6da1ce26256 100644 --- a/arch/x86/include/asm/intel_pmc_ipc.h +++ b/arch/x86/include/asm/intel_pmc_ipc.h @@ -31,30 +31,13 @@ #if IS_ENABLED(CONFIG_INTEL_PMC_IPC) -int intel_pmc_ipc_simple_command(int cmd, int sub); -int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, - u32 *out, u32 outlen, u32 dptr, u32 sptr); int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen); int intel_pmc_s0ix_counter_read(u64 *data); -int intel_pmc_gcr_read(u32 offset, u32 *data); int intel_pmc_gcr_read64(u32 offset, u64 *data); -int intel_pmc_gcr_write(u32 offset, u32 data); -int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val); #else -static inline int intel_pmc_ipc_simple_command(int cmd, int sub) -{ - return -EINVAL; -} - -static inline int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, - u32 *out, u32 outlen, u32 dptr, u32 sptr) -{ - return -EINVAL; -} - static inline int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen) { @@ -66,26 +49,11 @@ static inline int intel_pmc_s0ix_counter_read(u64 *data) return -EINVAL; } -static inline int intel_pmc_gcr_read(u32 offset, u32 *data) -{ - return -EINVAL; -} - static inline int intel_pmc_gcr_read64(u32 offset, u64 *data) { return -EINVAL; } -static inline int intel_pmc_gcr_write(u32 offset, u32 data) -{ - return -EINVAL; -} - -static inline int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val) -{ - return -EINVAL; -} - #endif /*CONFIG_INTEL_PMC_IPC*/ #endif diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h index 4a8c6e817398..2a1442ba6e78 100644 --- a/arch/x86/include/asm/intel_scu_ipc.h +++ b/arch/x86/include/asm/intel_scu_ipc.h @@ -22,24 +22,12 @@ /* Read single register */ int intel_scu_ipc_ioread8(u16 addr, u8 *data); -/* Read two sequential registers */ -int intel_scu_ipc_ioread16(u16 addr, u16 *data); - -/* Read four sequential registers */ -int intel_scu_ipc_ioread32(u16 addr, u32 *data); - /* Read a vector */ int intel_scu_ipc_readv(u16 *addr, u8 *data, int len); /* Write single register */ int intel_scu_ipc_iowrite8(u16 addr, u8 data); -/* Write two sequential registers */ -int intel_scu_ipc_iowrite16(u16 addr, u16 data); - -/* Write four sequential registers */ -int intel_scu_ipc_iowrite32(u16 addr, u32 data); - /* Write a vector */ int intel_scu_ipc_writev(u16 *addr, u8 *data, int len); @@ -50,14 +38,6 @@ int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask); int intel_scu_ipc_simple_command(int cmd, int sub); int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen, u32 *out, int outlen); -int intel_scu_ipc_raw_command(int cmd, int sub, u8 *in, int inlen, - u32 *out, int outlen, u32 dptr, u32 sptr); - -/* I2C control api */ -int intel_scu_ipc_i2c_cntrl(u32 addr, u32 *data); - -/* Update FW version */ -int intel_scu_ipc_fw_update(u8 *buffer, u32 length); extern struct blocking_notifier_head intel_scu_notifier; diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h index 214394860632..2f77e31a1283 100644 --- a/arch/x86/include/asm/intel_telemetry.h +++ b/arch/x86/include/asm/intel_telemetry.h @@ -40,13 +40,10 @@ struct telemetry_evtmap { struct telemetry_unit_config { struct telemetry_evtmap *telem_evts; void __iomem *regmap; - u32 ssram_base_addr; u8 ssram_evts_used; u8 curr_period; u8 max_period; u8 min_period; - u32 ssram_size; - }; struct telemetry_plt_config { diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 9997521fc5cd..e1aa17a468a8 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -399,4 +399,40 @@ extern bool arch_memremap_can_ram_remap(resource_size_t offset, extern bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size); +/** + * iosubmit_cmds512 - copy data to single MMIO location, in 512-bit units + * @__dst: destination, in MMIO space (must be 512-bit aligned) + * @src: source + * @count: number of 512 bits quantities to submit + * + * Submit data from kernel space to MMIO space, in units of 512 bits at a + * time. Order of access is not guaranteed, nor is a memory barrier + * performed afterwards. + * + * Warning: Do not use this helper unless your driver has checked that the CPU + * instruction is supported on the platform. + */ +static inline void iosubmit_cmds512(void __iomem *__dst, const void *src, + size_t count) +{ + /* + * Note that this isn't an "on-stack copy", just definition of "dst" + * as a pointer to 64-bytes of stuff that is going to be overwritten. + * In the MOVDIR64B case that may be needed as you can use the + * MOVDIR64B instruction to copy arbitrary memory around. This trick + * lets the compiler know how much gets clobbered. + */ + volatile struct { char _[64]; } *dst = __dst; + const u8 *from = src; + const u8 *end = from + count * 64; + + while (from < end) { + /* MOVDIR64B [rdx], rax */ + asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02" + : "=m" (dst) + : "d" (from), "a" (dst)); + from += 64; + } +} + #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 75f1e35e7c15..247ab14c6309 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -33,6 +33,7 @@ enum show_regs_mode { }; extern void die(const char *, struct pt_regs *,long); +void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr); extern int __must_check __die(const char *, struct pt_regs *, long); extern void show_stack_regs(struct pt_regs *regs); extern void __show_regs(struct pt_regs *regs, enum show_regs_mode); diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 5dc909d9ad81..95b1f053bd96 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -11,12 +11,11 @@ #include <asm-generic/kprobes.h> -#define BREAKPOINT_INSTRUCTION 0xcc - #ifdef CONFIG_KPROBES #include <linux/types.h> #include <linux/ptrace.h> #include <linux/percpu.h> +#include <asm/text-patching.h> #include <asm/insn.h> #define __ARCH_WANT_KPROBES_INSN_SLOT @@ -25,10 +24,7 @@ struct pt_regs; struct kprobe; typedef u8 kprobe_opcode_t; -#define RELATIVEJUMP_OPCODE 0xe9 -#define RELATIVEJUMP_SIZE 5 -#define RELATIVECALL_OPCODE 0xe8 -#define RELATIVE_ADDR_SIZE 4 + #define MAX_STACK_SIZE 64 #define CUR_STACK_SIZE(ADDR) \ (current_top_of_stack() - (unsigned long)(ADDR)) @@ -43,11 +39,11 @@ extern __visible kprobe_opcode_t optprobe_template_entry[]; extern __visible kprobe_opcode_t optprobe_template_val[]; extern __visible kprobe_opcode_t optprobe_template_call[]; extern __visible kprobe_opcode_t optprobe_template_end[]; -#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE) +#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + DISP32_SIZE) #define MAX_OPTINSN_SIZE \ (((unsigned long)optprobe_template_end - \ (unsigned long)optprobe_template_entry) + \ - MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE) + MAX_OPTIMIZED_LENGTH + JMP32_INSN_SIZE) extern const int kretprobe_blacklist_size; @@ -73,7 +69,7 @@ struct arch_specific_insn { struct arch_optimized_insn { /* copy of the original instructions */ - kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE]; + kprobe_opcode_t copied_insn[DISP32_SIZE]; /* detour code buffer */ kprobe_opcode_t *insn; /* the size of instructions copied to detour code buffer */ diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 77cf6c11f66b..03946eb3e2b9 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -222,6 +222,10 @@ struct x86_emulate_ops { bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit); + bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt); + bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt); + bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt); + void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b79cd6aa4075..329d01c689b7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -175,6 +175,11 @@ enum { VCPU_SREG_LDTR, }; +enum exit_fastpath_completion { + EXIT_FASTPATH_NONE, + EXIT_FASTPATH_SKIP_EMUL_INS, +}; + #include <asm/kvm_emulate.h> #define KVM_NR_MEM_OBJS 40 @@ -378,12 +383,12 @@ struct kvm_mmu { void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); - int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, + int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err, bool prefault); void (*inject_page_fault)(struct kvm_vcpu *vcpu, struct x86_exception *fault); - gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, - struct x86_exception *exception); + gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa, + u32 access, struct x86_exception *exception); gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, struct x86_exception *exception); int (*sync_page)(struct kvm_vcpu *vcpu, @@ -606,7 +611,7 @@ struct kvm_vcpu_arch { * Paging state of an L2 guest (used for nested npt) * * This context will save all necessary information to walk page tables - * of the an L2 guest. This context is only initialized for page table + * of an L2 guest. This context is only initialized for page table * walking and not for faulting since we never handle l2 page faults on * the host. */ @@ -685,10 +690,10 @@ struct kvm_vcpu_arch { bool pvclock_set_guest_stopped_request; struct { + u8 preempted; u64 msr_val; u64 last_steal; - struct gfn_to_hva_cache stime; - struct kvm_steal_time steal; + struct gfn_to_pfn_cache cache; } st; u64 tsc_offset; @@ -1022,6 +1027,11 @@ struct kvm_lapic_irq { bool msi_redir_hint; }; +static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) +{ + return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL; +} + struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ @@ -1040,7 +1050,7 @@ struct kvm_x86_ops { void (*vm_destroy)(struct kvm *kvm); /* Create, but do not attach this VCPU */ - struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); + int (*vcpu_create)(struct kvm_vcpu *vcpu); void (*vcpu_free)(struct kvm_vcpu *vcpu); void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event); @@ -1090,7 +1100,8 @@ struct kvm_x86_ops { void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr); void (*run)(struct kvm_vcpu *vcpu); - int (*handle_exit)(struct kvm_vcpu *vcpu); + int (*handle_exit)(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion exit_fastpath); int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu); @@ -1140,11 +1151,13 @@ struct kvm_x86_ops { int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage); - void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu); + void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion *exit_fastpath); bool (*mpx_supported)(void); bool (*xsaves_supported)(void); bool (*umip_emulated)(void); bool (*pt_supported)(void); + bool (*pku_supported)(void); int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); void (*request_immediate_exit)(struct kvm_vcpu *vcpu); @@ -1468,7 +1481,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code, +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); @@ -1614,7 +1627,6 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu); int kvm_is_in_guest(void); int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size); -int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size); bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index dc2d4b206ab7..4359b955e0b7 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -144,7 +144,7 @@ struct mce_log_buffer { enum mce_notifier_prios { MCE_PRIO_FIRST = INT_MAX, - MCE_PRIO_SRAO = INT_MAX - 1, + MCE_PRIO_UC = INT_MAX - 1, MCE_PRIO_EXTLOG = INT_MAX - 2, MCE_PRIO_NFIT = INT_MAX - 3, MCE_PRIO_EDAC = INT_MAX - 4, @@ -290,6 +290,7 @@ extern void apei_mce_report_mem_error(int corrected, /* These may be used by multiple smca_hwid_mcatypes */ enum smca_bank_types { SMCA_LS = 0, /* Load Store */ + SMCA_LS_V2, /* Load Store */ SMCA_IF, /* Instruction Fetch */ SMCA_L2_CACHE, /* L2 Cache */ SMCA_DE, /* Decoder Unit */ diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h new file mode 100644 index 000000000000..9c2447b3555d --- /dev/null +++ b/arch/x86/include/asm/memtype.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MEMTYPE_H +#define _ASM_X86_MEMTYPE_H + +#include <linux/types.h> +#include <asm/pgtable_types.h> + +extern bool pat_enabled(void); +extern void pat_disable(const char *reason); +extern void pat_init(void); +extern void init_cache_modes(void); + +extern int memtype_reserve(u64 start, u64 end, + enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); +extern int memtype_free(u64 start, u64 end); + +extern int memtype_kernel_map_sync(u64 base, unsigned long size, + enum page_cache_mode pcm); + +extern int memtype_reserve_io(resource_size_t start, resource_size_t end, + enum page_cache_mode *pcm); + +extern void memtype_free_io(resource_size_t start, resource_size_t end); + +extern bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); + +#endif /* _ASM_X86_MEMTYPE_H */ diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index 209492849566..6685e1218959 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -53,6 +53,6 @@ static inline void __init load_ucode_amd_bsp(unsigned int family) {} static inline void load_ucode_amd_ap(unsigned int family) {} static inline int __init save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } -void reload_ucode_amd(void) {} +static inline void reload_ucode_amd(void) {} #endif #endif /* _ASM_X86_MICROCODE_AMD_H */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index e78c7db87801..bdeae9291e5c 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -50,10 +50,6 @@ typedef struct { u16 pkey_allocation_map; s16 execute_only_pkey; #endif -#ifdef CONFIG_X86_INTEL_MPX - /* address of the bounds directory */ - void __user *bd_addr; -#endif } mm_context_t; #define INIT_MM_CONTEXT(mm) \ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 5f33924e200f..b538d9ddee9c 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -12,7 +12,6 @@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/paravirt.h> -#include <asm/mpx.h> #include <asm/debugreg.h> extern atomic64_t last_mm_ctx_id; @@ -69,14 +68,6 @@ struct ldt_struct { int slot; }; -/* This is a multiple of PAGE_SIZE. */ -#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) - -static inline void *ldt_slot_va(int slot) -{ - return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); -} - /* * Used for LDT copy/destruction. */ @@ -99,87 +90,21 @@ static inline void destroy_context_ldt(struct mm_struct *mm) { } static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { } #endif -static inline void load_mm_ldt(struct mm_struct *mm) -{ #ifdef CONFIG_MODIFY_LDT_SYSCALL - struct ldt_struct *ldt; - - /* READ_ONCE synchronizes with smp_store_release */ - ldt = READ_ONCE(mm->context.ldt); - - /* - * Any change to mm->context.ldt is followed by an IPI to all - * CPUs with the mm active. The LDT will not be freed until - * after the IPI is handled by all such CPUs. This means that, - * if the ldt_struct changes before we return, the values we see - * will be safe, and the new values will be loaded before we run - * any user code. - * - * NB: don't try to convert this to use RCU without extreme care. - * We would still need IRQs off, because we don't want to change - * the local LDT after an IPI loaded a newer value than the one - * that we can see. - */ - - if (unlikely(ldt)) { - if (static_cpu_has(X86_FEATURE_PTI)) { - if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { - /* - * Whoops -- either the new LDT isn't mapped - * (if slot == -1) or is mapped into a bogus - * slot (if slot > 1). - */ - clear_LDT(); - return; - } - - /* - * If page table isolation is enabled, ldt->entries - * will not be mapped in the userspace pagetables. - * Tell the CPU to access the LDT through the alias - * at ldt_slot_va(ldt->slot). - */ - set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); - } else { - set_ldt(ldt->entries, ldt->nr_entries); - } - } else { - clear_LDT(); - } +extern void load_mm_ldt(struct mm_struct *mm); +extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next); #else +static inline void load_mm_ldt(struct mm_struct *mm) +{ clear_LDT(); -#endif } - static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) { -#ifdef CONFIG_MODIFY_LDT_SYSCALL - /* - * Load the LDT if either the old or new mm had an LDT. - * - * An mm will never go from having an LDT to not having an LDT. Two - * mms never share an LDT, so we don't gain anything by checking to - * see whether the LDT changed. There's also no guarantee that - * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, - * then prev->context.ldt will also be non-NULL. - * - * If we really cared, we could optimize the case where prev == next - * and we're exiting lazy mode. Most of the time, if this happens, - * we don't actually need to reload LDTR, but modify_ldt() is mostly - * used by legacy code and emulators where we don't need this level of - * performance. - * - * This uses | instead of || because it generates better code. - */ - if (unlikely((unsigned long)prev->context.ldt | - (unsigned long)next->context.ldt)) - load_mm_ldt(next); -#endif - DEBUG_LOCKS_WARN_ON(preemptible()); } +#endif -void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); +extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); /* * Init a new mm. Used on mm copies, like at fork() @@ -274,34 +199,9 @@ static inline bool is_64bit_mm(struct mm_struct *mm) } #endif -static inline void arch_bprm_mm_init(struct mm_struct *mm, - struct vm_area_struct *vma) -{ - mpx_mm_init(mm); -} - static inline void arch_unmap(struct mm_struct *mm, unsigned long start, unsigned long end) { - /* - * mpx_notify_unmap() goes and reads a rarely-hot - * cacheline in the mm_struct. That can be expensive - * enough to be seen in profiles. - * - * The mpx_notify_unmap() call and its contents have been - * observed to affect munmap() performance on hardware - * where MPX is not present. - * - * The unlikely() optimizes for the fast case: no MPX - * in the CPU, or no MPX use in the process. Even if - * we get this wrong (in the unlikely event that MPX - * is widely enabled on some system) the overhead of - * MPX itself (reading bounds tables) is expected to - * overwhelm the overhead of getting this unlikely() - * consistently wrong. - */ - if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX))) - mpx_notify_unmap(mm, start, end); } /* diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h deleted file mode 100644 index 143a5c193ed3..000000000000 --- a/arch/x86/include/asm/mpx.h +++ /dev/null @@ -1,116 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_MPX_H -#define _ASM_X86_MPX_H - -#include <linux/types.h> -#include <linux/mm_types.h> - -#include <asm/ptrace.h> -#include <asm/insn.h> - -/* - * NULL is theoretically a valid place to put the bounds - * directory, so point this at an invalid address. - */ -#define MPX_INVALID_BOUNDS_DIR ((void __user *)-1) -#define MPX_BNDCFG_ENABLE_FLAG 0x1 -#define MPX_BD_ENTRY_VALID_FLAG 0x1 - -/* - * The upper 28 bits [47:20] of the virtual address in 64-bit - * are used to index into bounds directory (BD). - * - * The directory is 2G (2^31) in size, and with 8-byte entries - * it has 2^28 entries. - */ -#define MPX_BD_SIZE_BYTES_64 (1UL<<31) -#define MPX_BD_ENTRY_BYTES_64 8 -#define MPX_BD_NR_ENTRIES_64 (MPX_BD_SIZE_BYTES_64/MPX_BD_ENTRY_BYTES_64) - -/* - * The 32-bit directory is 4MB (2^22) in size, and with 4-byte - * entries it has 2^20 entries. - */ -#define MPX_BD_SIZE_BYTES_32 (1UL<<22) -#define MPX_BD_ENTRY_BYTES_32 4 -#define MPX_BD_NR_ENTRIES_32 (MPX_BD_SIZE_BYTES_32/MPX_BD_ENTRY_BYTES_32) - -/* - * A 64-bit table is 4MB total in size, and an entry is - * 4 64-bit pointers in size. - */ -#define MPX_BT_SIZE_BYTES_64 (1UL<<22) -#define MPX_BT_ENTRY_BYTES_64 32 -#define MPX_BT_NR_ENTRIES_64 (MPX_BT_SIZE_BYTES_64/MPX_BT_ENTRY_BYTES_64) - -/* - * A 32-bit table is 16kB total in size, and an entry is - * 4 32-bit pointers in size. - */ -#define MPX_BT_SIZE_BYTES_32 (1UL<<14) -#define MPX_BT_ENTRY_BYTES_32 16 -#define MPX_BT_NR_ENTRIES_32 (MPX_BT_SIZE_BYTES_32/MPX_BT_ENTRY_BYTES_32) - -#define MPX_BNDSTA_TAIL 2 -#define MPX_BNDCFG_TAIL 12 -#define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1)) -#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) -#define MPX_BNDSTA_ERROR_CODE 0x3 - -struct mpx_fault_info { - void __user *addr; - void __user *lower; - void __user *upper; -}; - -#ifdef CONFIG_X86_INTEL_MPX - -extern int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs); -extern int mpx_handle_bd_fault(void); - -static inline int kernel_managing_mpx_tables(struct mm_struct *mm) -{ - return (mm->context.bd_addr != MPX_INVALID_BOUNDS_DIR); -} - -static inline void mpx_mm_init(struct mm_struct *mm) -{ - /* - * NULL is theoretically a valid place to put the bounds - * directory, so point this at an invalid address. - */ - mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR; -} - -extern void mpx_notify_unmap(struct mm_struct *mm, unsigned long start, unsigned long end); -extern unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len, unsigned long flags); - -#else -static inline int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs) -{ - return -EINVAL; -} -static inline int mpx_handle_bd_fault(void) -{ - return -EINVAL; -} -static inline int kernel_managing_mpx_tables(struct mm_struct *mm) -{ - return 0; -} -static inline void mpx_mm_init(struct mm_struct *mm) -{ -} -static inline void mpx_notify_unmap(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ -} - -static inline unsigned long mpx_unmapped_area_check(unsigned long addr, - unsigned long len, unsigned long flags) -{ - return addr; -} -#endif /* CONFIG_X86_INTEL_MPX */ - -#endif /* _ASM_X86_MPX_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 084e98da04a7..ebe1685e92dd 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -558,7 +558,14 @@ #define MSR_IA32_EBL_CR_POWERON 0x0000002a #define MSR_EBC_FREQUENCY_ID 0x0000002c #define MSR_SMI_COUNT 0x00000034 -#define MSR_IA32_FEATURE_CONTROL 0x0000003a + +/* Referred to as IA32_FEATURE_CONTROL in Intel's SDM. */ +#define MSR_IA32_FEAT_CTL 0x0000003a +#define FEAT_CTL_LOCKED BIT(0) +#define FEAT_CTL_VMX_ENABLED_INSIDE_SMX BIT(1) +#define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX BIT(2) +#define FEAT_CTL_LMCE_ENABLED BIT(20) + #define MSR_IA32_TSC_ADJUST 0x0000003b #define MSR_IA32_BNDCFGS 0x00000d90 @@ -566,11 +573,6 @@ #define MSR_IA32_XSS 0x00000da0 -#define FEATURE_CONTROL_LOCKED (1<<0) -#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) -#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) -#define FEATURE_CONTROL_LMCE (1<<20) - #define MSR_IA32_APICBASE 0x0000001b #define MSR_IA32_APICBASE_BSP (1<<8) #define MSR_IA32_APICBASE_ENABLE (1<<11) diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index dbff1456d215..829df26fd7a3 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -24,7 +24,7 @@ #define _ASM_X86_MTRR_H #include <uapi/asm/mtrr.h> -#include <asm/pat.h> +#include <asm/memtype.h> /* @@ -86,7 +86,7 @@ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) } static inline void mtrr_bp_init(void) { - pat_disable("MTRRs disabled, skipping PAT initialization too."); + pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel."); } #define mtrr_ap_init() do {} while (0) diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 75ded1d13d98..9d5d949e662e 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -41,7 +41,6 @@ struct nmiaction { struct list_head list; nmi_handler_t handler; u64 max_duration; - struct irq_work irq_work; unsigned long flags; const char *name; }; diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 5c24a7b35166..07e95dcb40ad 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -37,7 +37,6 @@ */ #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ -#define RSB_FILL_LOOPS 16 /* To avoid underflow */ /* * Google experimented with loop-unrolling and this turned out to be diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h deleted file mode 100644 index 92015c65fa2a..000000000000 --- a/arch/x86/include/asm/pat.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_PAT_H -#define _ASM_X86_PAT_H - -#include <linux/types.h> -#include <asm/pgtable_types.h> - -bool pat_enabled(void); -void pat_disable(const char *reason); -extern void pat_init(void); -extern void init_cache_modes(void); - -extern int reserve_memtype(u64 start, u64 end, - enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); -extern int free_memtype(u64 start, u64 end); - -extern int kernel_map_sync_memtype(u64 base, unsigned long size, - enum page_cache_mode pcm); - -int io_reserve_memtype(resource_size_t start, resource_size_t end, - enum page_cache_mode *pcm); - -void io_free_memtype(resource_size_t start, resource_size_t end); - -bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); - -#endif /* _ASM_X86_PAT_H */ diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 90d0731fdcb6..40ac1330adb2 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -9,7 +9,7 @@ #include <linux/scatterlist.h> #include <linux/numa.h> #include <asm/io.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/x86_init.h> struct pci_sysdata { @@ -25,7 +25,7 @@ struct pci_sysdata { void *fwnode; /* IRQ domain for MSI assignment */ #endif #if IS_ENABLED(CONFIG_VMD) - bool vmd_domain; /* True if in Intel VMD domain */ + struct pci_dev *vmd_dev; /* VMD Device if in Intel VMD domain */ #endif }; @@ -35,12 +35,15 @@ extern int noioapicreroute; #ifdef CONFIG_PCI +static inline struct pci_sysdata *to_pci_sysdata(const struct pci_bus *bus) +{ + return bus->sysdata; +} + #ifdef CONFIG_PCI_DOMAINS static inline int pci_domain_nr(struct pci_bus *bus) { - struct pci_sysdata *sd = bus->sysdata; - - return sd->domain; + return to_pci_sysdata(bus)->domain; } static inline int pci_proc_domain(struct pci_bus *bus) @@ -52,24 +55,20 @@ static inline int pci_proc_domain(struct pci_bus *bus) #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN static inline void *_pci_root_bus_fwnode(struct pci_bus *bus) { - struct pci_sysdata *sd = bus->sysdata; - - return sd->fwnode; + return to_pci_sysdata(bus)->fwnode; } #define pci_root_bus_fwnode _pci_root_bus_fwnode #endif +#if IS_ENABLED(CONFIG_VMD) static inline bool is_vmd(struct pci_bus *bus) { -#if IS_ENABLED(CONFIG_VMD) - struct pci_sysdata *sd = bus->sysdata; - - return sd->vmd_domain; -#else - return false; -#endif + return to_pci_sysdata(bus)->vmd_dev != NULL; } +#else +#define is_vmd(bus) false +#endif /* CONFIG_VMD */ /* Can be used to override the logic in pci_scan_bus for skipping already-configured bus numbers - to be used for buggy BIOSes @@ -124,9 +123,7 @@ void native_restore_msi_irqs(struct pci_dev *dev); /* Returns the node based on pci bus */ static inline int __pcibus_to_node(const struct pci_bus *bus) { - const struct pci_sysdata *sd = bus->sysdata; - - return sd->node; + return to_pci_sysdata(bus)->node; } static inline const struct cpumask * diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index ee26e9215f18..29964b0e1075 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -322,17 +322,10 @@ struct perf_guest_switch_msr { u64 host, guest; }; -extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); extern void perf_check_microcode(void); extern int x86_perf_rdpmc_index(struct perf_event *event); #else -static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) -{ - *nr = 0; - return NULL; -} - static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) { memset(cap, 0, sizeof(*cap)); @@ -342,8 +335,23 @@ static inline void perf_events_lapic_init(void) { } static inline void perf_check_microcode(void) { } #endif +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) +extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); +#else +static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) +{ + *nr = 0; + return NULL; +} +#endif + #ifdef CONFIG_CPU_SUP_INTEL extern void intel_pt_handle_vmx(int on); +#else +static inline void intel_pt_handle_vmx(int on) +{ + +} #endif #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index ad97dc155195..7e118660bbd9 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -29,8 +29,9 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD]; int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); -void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); -void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user); +void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm); +void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, + bool user); void ptdump_walk_pgd_level_checkwx(void); void ptdump_walk_user_pgd_level_checkwx(void); @@ -239,6 +240,7 @@ static inline unsigned long pgd_pfn(pgd_t pgd) return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT; } +#define p4d_leaf p4d_large static inline int p4d_large(p4d_t p4d) { /* No 512 GiB pages yet */ @@ -247,6 +249,7 @@ static inline int p4d_large(p4d_t p4d) #define pte_page(pte) pfn_to_page(pte_pfn(pte)) +#define pmd_leaf pmd_large static inline int pmd_large(pmd_t pte) { return pmd_flags(pte) & _PAGE_PSE; @@ -874,6 +877,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); } +#define pud_leaf pud_large static inline int pud_large(pud_t pud) { return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == @@ -885,6 +889,7 @@ static inline int pud_bad(pud_t pud) return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; } #else +#define pud_leaf pud_large static inline int pud_large(pud_t pud) { return 0; @@ -1233,6 +1238,7 @@ static inline bool pgdp_maps_userspace(void *__ptr) return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START); } +#define pgd_leaf pgd_large static inline int pgd_large(pgd_t pgd) { return 0; } #ifdef CONFIG_PAGE_TABLE_ISOLATION diff --git a/arch/x86/include/asm/pgtable_32_areas.h b/arch/x86/include/asm/pgtable_32_areas.h new file mode 100644 index 000000000000..b6355416a15a --- /dev/null +++ b/arch/x86/include/asm/pgtable_32_areas.h @@ -0,0 +1,53 @@ +#ifndef _ASM_X86_PGTABLE_32_AREAS_H +#define _ASM_X86_PGTABLE_32_AREAS_H + +#include <asm/cpu_entry_area.h> + +/* + * Just any arbitrary offset to the start of the vmalloc VM area: the + * current 8MB value just means that there will be a 8MB "hole" after the + * physical memory until the kernel virtual memory starts. That means that + * any out-of-bounds memory accesses will hopefully be caught. + * The vmalloc() routines leaves a hole of 4kB between each vmalloced + * area for the same reason. ;) + */ +#define VMALLOC_OFFSET (8 * 1024 * 1024) + +#ifndef __ASSEMBLY__ +extern bool __vmalloc_start_set; /* set once high_memory is set */ +#endif + +#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) +#ifdef CONFIG_X86_PAE +#define LAST_PKMAP 512 +#else +#define LAST_PKMAP 1024 +#endif + +#define CPU_ENTRY_AREA_PAGES (NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE)) + +/* The +1 is for the readonly IDT page: */ +#define CPU_ENTRY_AREA_BASE \ + ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK) + +#define LDT_BASE_ADDR \ + ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) + +#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE) + +#define PKMAP_BASE \ + ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK) + +#ifdef CONFIG_HIGHMEM +# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) +#else +# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE) +#endif + +#define MODULES_VADDR VMALLOC_START +#define MODULES_END VMALLOC_END +#define MODULES_LEN (MODULES_VADDR - MODULES_END) + +#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) + +#endif /* _ASM_X86_PGTABLE_32_AREAS_H */ diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index 0416d42e5bdd..5356a46b0373 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_PGTABLE_32_DEFS_H -#define _ASM_X86_PGTABLE_32_DEFS_H +#ifndef _ASM_X86_PGTABLE_32_TYPES_H +#define _ASM_X86_PGTABLE_32_TYPES_H /* * The Linux x86 paging architecture is 'compile-time dual-mode', it @@ -20,55 +20,4 @@ #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) -/* Just any arbitrary offset to the start of the vmalloc VM area: the - * current 8MB value just means that there will be a 8MB "hole" after the - * physical memory until the kernel virtual memory starts. That means that - * any out-of-bounds memory accesses will hopefully be caught. - * The vmalloc() routines leaves a hole of 4kB between each vmalloced - * area for the same reason. ;) - */ -#define VMALLOC_OFFSET (8 * 1024 * 1024) - -#ifndef __ASSEMBLY__ -extern bool __vmalloc_start_set; /* set once high_memory is set */ -#endif - -#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) -#ifdef CONFIG_X86_PAE -#define LAST_PKMAP 512 -#else -#define LAST_PKMAP 1024 -#endif - -/* - * This is an upper bound on sizeof(struct cpu_entry_area) / PAGE_SIZE. - * Define this here and validate with BUILD_BUG_ON() in cpu_entry_area.c - * to avoid include recursion hell. - */ -#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 43) - -/* The +1 is for the readonly IDT page: */ -#define CPU_ENTRY_AREA_BASE \ - ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK) - -#define LDT_BASE_ADDR \ - ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) - -#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE) - -#define PKMAP_BASE \ - ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK) - -#ifdef CONFIG_HIGHMEM -# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) -#else -# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE) -#endif - -#define MODULES_VADDR VMALLOC_START -#define MODULES_END VMALLOC_END -#define MODULES_LEN (MODULES_VADDR - MODULES_END) - -#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) - -#endif /* _ASM_X86_PGTABLE_32_DEFS_H */ +#endif /* _ASM_X86_PGTABLE_32_TYPES_H */ diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h new file mode 100644 index 000000000000..d34cce1b995c --- /dev/null +++ b/arch/x86/include/asm/pgtable_areas.h @@ -0,0 +1,16 @@ +#ifndef _ASM_X86_PGTABLE_AREAS_H +#define _ASM_X86_PGTABLE_AREAS_H + +#ifdef CONFIG_X86_32 +# include <asm/pgtable_32_areas.h> +#endif + +/* Single page reserved for the readonly IDT mapping: */ +#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE +#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) + +#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) + +#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) + +#endif /* _ASM_X86_PGTABLE_AREAS_H */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b5e49e6bac63..0239998d8cdc 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -110,11 +110,6 @@ #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) -#define _PAGE_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\ - _PAGE_ACCESSED | _PAGE_DIRTY) -#define _KERNPG_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | \ - _PAGE_ACCESSED | _PAGE_DIRTY) - /* * Set of bits not changed in pte_modify. The pte's * protection key is treated like _PAGE_RW, for @@ -136,80 +131,93 @@ */ #ifndef __ASSEMBLY__ enum page_cache_mode { - _PAGE_CACHE_MODE_WB = 0, - _PAGE_CACHE_MODE_WC = 1, + _PAGE_CACHE_MODE_WB = 0, + _PAGE_CACHE_MODE_WC = 1, _PAGE_CACHE_MODE_UC_MINUS = 2, - _PAGE_CACHE_MODE_UC = 3, - _PAGE_CACHE_MODE_WT = 4, - _PAGE_CACHE_MODE_WP = 5, - _PAGE_CACHE_MODE_NUM = 8 + _PAGE_CACHE_MODE_UC = 3, + _PAGE_CACHE_MODE_WT = 4, + _PAGE_CACHE_MODE_WP = 5, + + _PAGE_CACHE_MODE_NUM = 8 }; #endif -#define _PAGE_CACHE_MASK (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT) -#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC)) -#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP)) +#define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) -#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) - -#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \ - _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED) -#define PAGE_COPY PAGE_COPY_NOEXEC -#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED) - -#define __PAGE_KERNEL_EXEC \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL) -#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) - -#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) -#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) -#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE) -#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER) -#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) -#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) -#define __PAGE_KERNEL_WP (__PAGE_KERNEL | _PAGE_CACHE_WP) - -#define __PAGE_KERNEL_IO (__PAGE_KERNEL) -#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE) +#define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT) -#ifndef __ASSEMBLY__ +#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC)) +#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP)) -#define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) +#define __PP _PAGE_PRESENT +#define __RW _PAGE_RW +#define _USR _PAGE_USER +#define ___A _PAGE_ACCESSED +#define ___D _PAGE_DIRTY +#define ___G _PAGE_GLOBAL +#define __NX _PAGE_NX + +#define _ENC _PAGE_ENC +#define __WP _PAGE_CACHE_WP +#define __NC _PAGE_NOCACHE +#define _PSE _PAGE_PSE + +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) } ) +#define __pg(x) __pgprot(x) + +#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) + +#define PAGE_NONE __pg( 0| 0| 0|___A| 0| 0| 0|___G) +#define PAGE_SHARED __pg(__PP|__RW|_USR|___A|__NX| 0| 0| 0) +#define PAGE_SHARED_EXEC __pg(__PP|__RW|_USR|___A| 0| 0| 0| 0) +#define PAGE_COPY_NOEXEC __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_COPY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0) +#define PAGE_COPY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_READONLY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_READONLY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0) + +#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G) +#define _KERNPG_TABLE_NOENC (__PP|__RW| 0|___A| 0|___D| 0| 0) +#define _KERNPG_TABLE (__PP|__RW| 0|___A| 0|___D| 0| 0| _ENC) +#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0) +#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC) +#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_RX (__PP| 0| 0|___A| 0|___D| 0|___G) +#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC) +#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G) +#define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW| 0|___A| 0|___D|_PSE|___G) +#define __PAGE_KERNEL_WP (__PP|__RW| 0|___A|__NX|___D| 0|___G| __WP) + + +#define __PAGE_KERNEL_IO __PAGE_KERNEL +#define __PAGE_KERNEL_IO_NOCACHE __PAGE_KERNEL_NOCACHE -#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ - _PAGE_DIRTY | _PAGE_ENC) -#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER) -#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC) -#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC) +#ifndef __ASSEMBLY__ -#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL) -#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP) +#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _ENC) +#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _ENC) +#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL | 0) +#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP | 0) -#define default_pgprot(x) __pgprot((x) & __default_kernel_pte_mask) +#define __pgprot_mask(x) __pgprot((x) & __default_kernel_pte_mask) -#define PAGE_KERNEL default_pgprot(__PAGE_KERNEL | _PAGE_ENC) -#define PAGE_KERNEL_NOENC default_pgprot(__PAGE_KERNEL) -#define PAGE_KERNEL_RO default_pgprot(__PAGE_KERNEL_RO | _PAGE_ENC) -#define PAGE_KERNEL_EXEC default_pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_EXEC_NOENC default_pgprot(__PAGE_KERNEL_EXEC) -#define PAGE_KERNEL_RX default_pgprot(__PAGE_KERNEL_RX | _PAGE_ENC) -#define PAGE_KERNEL_NOCACHE default_pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC) -#define PAGE_KERNEL_LARGE default_pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC) -#define PAGE_KERNEL_LARGE_EXEC default_pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_VVAR default_pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC) +#define PAGE_KERNEL __pgprot_mask(__PAGE_KERNEL | _ENC) +#define PAGE_KERNEL_NOENC __pgprot_mask(__PAGE_KERNEL | 0) +#define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC) +#define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC) +#define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0) +#define PAGE_KERNEL_RX __pgprot_mask(__PAGE_KERNEL_RX | _ENC) +#define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC) +#define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC) +#define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC) +#define PAGE_KERNEL_VVAR __pgprot_mask(__PAGE_KERNEL_VVAR | _ENC) -#define PAGE_KERNEL_IO default_pgprot(__PAGE_KERNEL_IO) -#define PAGE_KERNEL_IO_NOCACHE default_pgprot(__PAGE_KERNEL_IO_NOCACHE) +#define PAGE_KERNEL_IO __pgprot_mask(__PAGE_KERNEL_IO) +#define PAGE_KERNEL_IO_NOCACHE __pgprot_mask(__PAGE_KERNEL_IO_NOCACHE) #endif /* __ASSEMBLY__ */ @@ -449,9 +457,6 @@ static inline pteval_t pte_flags(pte_t pte) return native_pte_val(pte) & PTE_FLAGS_MASK; } -#define pgprot_val(x) ((x).pgprot) -#define __pgprot(x) ((pgprot_t) { (x) } ) - extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM]; extern uint8_t __pte2cachemode_tbl[8]; @@ -561,6 +566,10 @@ static inline void update_page_count(int level, unsigned long pages) { } extern pte_t *lookup_address(unsigned long address, unsigned int *level); extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, unsigned int *level); + +struct mm_struct; +extern pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address, + unsigned int *level); extern pmd_t *lookup_pmd_address(unsigned long address); extern phys_addr_t slow_virt_to_phys(void *__address); extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 0340aad3f2fc..09705ccc393c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -25,6 +25,7 @@ struct vm86; #include <asm/special_insns.h> #include <asm/fpu/types.h> #include <asm/unwind_hints.h> +#include <asm/vmxfeatures.h> #include <linux/personality.h> #include <linux/cache.h> @@ -85,6 +86,9 @@ struct cpuinfo_x86 { /* Number of 4K pages in DTLB/ITLB combined(in pages): */ int x86_tlbsize; #endif +#ifdef CONFIG_X86_VMX_FEATURE_NAMES + __u32 vmx_capability[NVMXINTS]; +#endif __u8 x86_virt_bits; __u8 x86_phys_bits; /* CPUID returned core id bits: */ @@ -943,24 +947,6 @@ extern int set_tsc_mode(unsigned int val); DECLARE_PER_CPU(u64, msr_misc_features_shadow); -/* Register/unregister a process' MPX related resource */ -#define MPX_ENABLE_MANAGEMENT() mpx_enable_management() -#define MPX_DISABLE_MANAGEMENT() mpx_disable_management() - -#ifdef CONFIG_X86_INTEL_MPX -extern int mpx_enable_management(void); -extern int mpx_disable_management(void); -#else -static inline int mpx_enable_management(void) -{ - return -EINVAL; -} -static inline int mpx_disable_management(void) -{ - return -EINVAL; -} -#endif /* CONFIG_X86_INTEL_MPX */ - #ifdef CONFIG_CPU_SUP_AMD extern u16 amd_get_nb_id(int cpu); extern u32 amd_get_nodes_per_socket(void); @@ -1015,11 +1001,4 @@ enum mds_mitigations { MDS_MITIGATION_VMWERV, }; -enum taa_mitigations { - TAA_MITIGATION_OFF, - TAA_MITIGATION_UCODE_NEEDED, - TAA_MITIGATION_VERW, - TAA_MITIGATION_TSX_DISABLED, -}; - #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 5057a8ed100b..6d6475fdd327 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -159,6 +159,19 @@ static inline bool user_64bit_mode(struct pt_regs *regs) #endif } +/* + * Determine whether the register set came from any context that is running in + * 64-bit mode. + */ +static inline bool any_64bit_mode(struct pt_regs *regs) +{ +#ifdef CONFIG_X86_64 + return !user_mode(regs) || user_64bit_mode(regs); +#else + return false; +#endif +} + #ifdef CONFIG_X86_64 #define current_user_stack_pointer() current_pt_regs()->sp #define compat_user_stack_pointer() current_pt_regs()->sp @@ -339,22 +352,6 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, #define ARCH_HAS_USER_SINGLE_STEP_REPORT -/* - * When hitting ptrace_stop(), we cannot return using SYSRET because - * that does not restore the full CPU state, only a minimal set. The - * ptracer can change arbitrary register values, which is usually okay - * because the usual ptrace stops run off the signal delivery path which - * forces IRET; however, ptrace_event() stops happen in arbitrary places - * in the kernel and don't force IRET path. - * - * So force IRET path after a ptrace stop. - */ -#define arch_ptrace_stop_needed(code, info) \ -({ \ - force_iret(); \ - false; \ -}) - struct user_desc; extern int do_get_thread_area(struct task_struct *p, int idx, struct user_desc __user *info); diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index 09ecc32f6524..b35030eeec36 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -14,7 +14,7 @@ #include <linux/types.h> #include <asm/io.h> -/* This must match data at realmode.S */ +/* This must match data at realmode/rm/header.S */ struct real_mode_header { u32 text_start; u32 ro_end; @@ -36,7 +36,7 @@ struct real_mode_header { #endif }; -/* This must match data at trampoline_32/64.S */ +/* This must match data at realmode/rm/trampoline_{32,64}.S */ struct trampoline_header { #ifdef CONFIG_X86_32 u32 start; diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 2ee8e469dcf5..64c3dce374e5 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -81,8 +81,6 @@ int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); extern int kernel_set_to_readonly; -void set_kernel_text_rw(void); -void set_kernel_text_ro(void); #ifdef CONFIG_X86_64 static inline int set_mce_nospec(unsigned long pfn) diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 23c626a742e8..67315fa3956a 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -25,14 +25,6 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, */ #define POKE_MAX_OPCODE_SIZE 5 -struct text_poke_loc { - void *addr; - int len; - s32 rel32; - u8 opcode; - const u8 text[POKE_MAX_OPCODE_SIZE]; -}; - extern void text_poke_early(void *addr, const void *opcode, size_t len); /* @@ -50,21 +42,13 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len); * an inconsistent instruction while you patch. */ extern void *text_poke(void *addr, const void *opcode, size_t len); +extern void text_poke_sync(void); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); extern int poke_int3_handler(struct pt_regs *regs); extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate); -extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries); -extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr, - const void *opcode, size_t len, const void *emulate); -extern int after_bootmem; -extern __ro_after_init struct mm_struct *poking_mm; -extern __ro_after_init unsigned long poking_addr; -#ifndef CONFIG_UML_X86 -static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) -{ - regs->ip = ip; -} +extern void text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate); +extern void text_poke_finish(void); #define INT3_INSN_SIZE 1 #define INT3_INSN_OPCODE 0xCC @@ -78,6 +62,67 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) #define JMP8_INSN_SIZE 2 #define JMP8_INSN_OPCODE 0xEB +#define DISP32_SIZE 4 + +static inline int text_opcode_size(u8 opcode) +{ + int size = 0; + +#define __CASE(insn) \ + case insn##_INSN_OPCODE: size = insn##_INSN_SIZE; break + + switch(opcode) { + __CASE(INT3); + __CASE(CALL); + __CASE(JMP32); + __CASE(JMP8); + } + +#undef __CASE + + return size; +} + +union text_poke_insn { + u8 text[POKE_MAX_OPCODE_SIZE]; + struct { + u8 opcode; + s32 disp; + } __attribute__((packed)); +}; + +static __always_inline +void *text_gen_insn(u8 opcode, const void *addr, const void *dest) +{ + static union text_poke_insn insn; /* per instance */ + int size = text_opcode_size(opcode); + + insn.opcode = opcode; + + if (size > 1) { + insn.disp = (long)dest - (long)(addr + size); + if (size == 2) { + /* + * Ensure that for JMP9 the displacement + * actually fits the signed byte. + */ + BUG_ON((insn.disp >> 31) != (insn.disp >> 7)); + } + } + + return &insn.text; +} + +extern int after_bootmem; +extern __ro_after_init struct mm_struct *poking_mm; +extern __ro_after_init unsigned long poking_addr; + +#ifndef CONFIG_UML_X86 +static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) +{ + regs->ip = ip; +} + static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) { /* @@ -85,6 +130,9 @@ static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) * stack where the break point happened, and the saving of * pt_regs. We can extend the original stack because of * this gap. See the idtentry macro's create_gap option. + * + * Similarly entry_32.S will have a gap on the stack for (any) hardware + * exception and pt_regs; see FIXUP_FRAME. */ regs->sp -= sizeof(unsigned long); *(unsigned long *)regs->sp = val; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index d779366ce3f8..cf4327986e98 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -239,15 +239,6 @@ static inline int arch_within_stack_frames(const void * const stack, current_thread_info()->status & TS_COMPAT) #endif -/* - * Force syscall return via IRET by making it look as if there was - * some work pending. IRET is our most capable (but slowest) syscall - * return path, which is able to restore modified SS, CS and certain - * EFLAGS values that other (fast) syscall return instructions - * are not able to restore properly. - */ -#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME) - extern void arch_task_cache_init(void); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); extern void arch_release_task_struct(struct task_struct *tsk); diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index f23e7aaff4cd..820082bd6880 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -29,8 +29,8 @@ static inline void tlb_flush(struct mmu_gather *tlb) * shootdown, enablement code for several hypervisors overrides * .flush_tlb_others hook in pv_mmu_ops and implements it by issuing * a hypercall. To keep software pagetable walkers safe in this case we - * switch to RCU based table free (HAVE_RCU_TABLE_FREE). See the comment - * below 'ifdef CONFIG_HAVE_RCU_TABLE_FREE' in include/asm-generic/tlb.h + * switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the comment + * below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h * for more details. */ static inline void __tlb_remove_table(void *table) diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h deleted file mode 100644 index 54133017267c..000000000000 --- a/arch/x86/include/asm/trace/mpx.h +++ /dev/null @@ -1,134 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM mpx - -#if !defined(_TRACE_MPX_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_MPX_H - -#include <linux/tracepoint.h> - -#ifdef CONFIG_X86_INTEL_MPX - -TRACE_EVENT(mpx_bounds_register_exception, - - TP_PROTO(void __user *addr_referenced, - const struct mpx_bndreg *bndreg), - TP_ARGS(addr_referenced, bndreg), - - TP_STRUCT__entry( - __field(void __user *, addr_referenced) - __field(u64, lower_bound) - __field(u64, upper_bound) - ), - - TP_fast_assign( - __entry->addr_referenced = addr_referenced; - __entry->lower_bound = bndreg->lower_bound; - __entry->upper_bound = bndreg->upper_bound; - ), - /* - * Note that we are printing out the '~' of the upper - * bounds register here. It is actually stored in its - * one's complement form so that its 'init' state - * corresponds to all 0's. But, that looks like - * gibberish when printed out, so print out the 1's - * complement instead of the actual value here. Note - * though that you still need to specify filters for the - * actual value, not the displayed one. - */ - TP_printk("address referenced: 0x%p bounds: lower: 0x%llx ~upper: 0x%llx", - __entry->addr_referenced, - __entry->lower_bound, - ~__entry->upper_bound - ) -); - -TRACE_EVENT(bounds_exception_mpx, - - TP_PROTO(const struct mpx_bndcsr *bndcsr), - TP_ARGS(bndcsr), - - TP_STRUCT__entry( - __field(u64, bndcfgu) - __field(u64, bndstatus) - ), - - TP_fast_assign( - /* need to get rid of the 'const' on bndcsr */ - __entry->bndcfgu = (u64)bndcsr->bndcfgu; - __entry->bndstatus = (u64)bndcsr->bndstatus; - ), - - TP_printk("bndcfgu:0x%llx bndstatus:0x%llx", - __entry->bndcfgu, - __entry->bndstatus) -); - -DECLARE_EVENT_CLASS(mpx_range_trace, - - TP_PROTO(unsigned long start, - unsigned long end), - TP_ARGS(start, end), - - TP_STRUCT__entry( - __field(unsigned long, start) - __field(unsigned long, end) - ), - - TP_fast_assign( - __entry->start = start; - __entry->end = end; - ), - - TP_printk("[0x%p:0x%p]", - (void *)__entry->start, - (void *)__entry->end - ) -); - -DEFINE_EVENT(mpx_range_trace, mpx_unmap_zap, - TP_PROTO(unsigned long start, unsigned long end), - TP_ARGS(start, end) -); - -DEFINE_EVENT(mpx_range_trace, mpx_unmap_search, - TP_PROTO(unsigned long start, unsigned long end), - TP_ARGS(start, end) -); - -TRACE_EVENT(mpx_new_bounds_table, - - TP_PROTO(unsigned long table_vaddr), - TP_ARGS(table_vaddr), - - TP_STRUCT__entry( - __field(unsigned long, table_vaddr) - ), - - TP_fast_assign( - __entry->table_vaddr = table_vaddr; - ), - - TP_printk("table vaddr:%p", (void *)__entry->table_vaddr) -); - -#else - -/* - * This gets used outside of MPX-specific code, so we need a stub. - */ -static inline -void trace_bounds_exception_mpx(const struct mpx_bndcsr *bndcsr) -{ -} - -#endif /* CONFIG_X86_INTEL_MPX */ - -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH asm/trace/ -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE mpx -#endif /* _TRACE_MPX_H */ - -/* This part must be outside protection */ -#include <trace/define_trace.h> diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 230474e2ddb5..bbcdc7b8f963 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -21,6 +21,7 @@ struct vdso_image { long sym_vvar_page; long sym_pvclock_page; long sym_hvclock_page; + long sym_timens_page; long sym_VDSO32_NOTE_MASK; long sym___kernel_sigreturn; long sym___kernel_rt_sigreturn; diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index e9ee139cf29e..6ee1f7dba34b 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -21,6 +21,7 @@ #include <clocksource/hyperv_timer.h> #define __vdso_data (VVAR(_vdso_data)) +#define __timens_vdso_data (TIMENS(_vdso_data)) #define VDSO_HAS_TIME 1 @@ -56,6 +57,13 @@ extern struct ms_hyperv_tsc_page hvclock_page __attribute__((visibility("hidden"))); #endif +#ifdef CONFIG_TIME_NS +static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +{ + return __timens_vdso_data; +} +#endif + #ifndef BUILD_VDSO32 static __always_inline @@ -96,8 +104,6 @@ long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) #else -#define VDSO_HAS_32BIT_FALLBACK 1 - static __always_inline long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) { diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h new file mode 100644 index 000000000000..29837740b520 --- /dev/null +++ b/arch/x86/include/asm/vmalloc.h @@ -0,0 +1,6 @@ +#ifndef _ASM_X86_VMALLOC_H +#define _ASM_X86_VMALLOC_H + +#include <asm/pgtable_areas.h> + +#endif /* _ASM_X86_VMALLOC_H */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 1835767aa335..d380b3b7ddd9 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -15,67 +15,70 @@ #include <linux/bitops.h> #include <linux/types.h> #include <uapi/asm/vmx.h> +#include <asm/vmxfeatures.h> + +#define VMCS_CONTROL_BIT(x) BIT(VMX_FEATURE_##x & 0x1f) /* * Definitions of Primary Processor-Based VM-Execution Controls. */ -#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 -#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 -#define CPU_BASED_HLT_EXITING 0x00000080 -#define CPU_BASED_INVLPG_EXITING 0x00000200 -#define CPU_BASED_MWAIT_EXITING 0x00000400 -#define CPU_BASED_RDPMC_EXITING 0x00000800 -#define CPU_BASED_RDTSC_EXITING 0x00001000 -#define CPU_BASED_CR3_LOAD_EXITING 0x00008000 -#define CPU_BASED_CR3_STORE_EXITING 0x00010000 -#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 -#define CPU_BASED_CR8_STORE_EXITING 0x00100000 -#define CPU_BASED_TPR_SHADOW 0x00200000 -#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 -#define CPU_BASED_MOV_DR_EXITING 0x00800000 -#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 -#define CPU_BASED_USE_IO_BITMAPS 0x02000000 -#define CPU_BASED_MONITOR_TRAP_FLAG 0x08000000 -#define CPU_BASED_USE_MSR_BITMAPS 0x10000000 -#define CPU_BASED_MONITOR_EXITING 0x20000000 -#define CPU_BASED_PAUSE_EXITING 0x40000000 -#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 +#define CPU_BASED_INTR_WINDOW_EXITING VMCS_CONTROL_BIT(VIRTUAL_INTR_PENDING) +#define CPU_BASED_USE_TSC_OFFSETTING VMCS_CONTROL_BIT(TSC_OFFSETTING) +#define CPU_BASED_HLT_EXITING VMCS_CONTROL_BIT(HLT_EXITING) +#define CPU_BASED_INVLPG_EXITING VMCS_CONTROL_BIT(INVLPG_EXITING) +#define CPU_BASED_MWAIT_EXITING VMCS_CONTROL_BIT(MWAIT_EXITING) +#define CPU_BASED_RDPMC_EXITING VMCS_CONTROL_BIT(RDPMC_EXITING) +#define CPU_BASED_RDTSC_EXITING VMCS_CONTROL_BIT(RDTSC_EXITING) +#define CPU_BASED_CR3_LOAD_EXITING VMCS_CONTROL_BIT(CR3_LOAD_EXITING) +#define CPU_BASED_CR3_STORE_EXITING VMCS_CONTROL_BIT(CR3_STORE_EXITING) +#define CPU_BASED_CR8_LOAD_EXITING VMCS_CONTROL_BIT(CR8_LOAD_EXITING) +#define CPU_BASED_CR8_STORE_EXITING VMCS_CONTROL_BIT(CR8_STORE_EXITING) +#define CPU_BASED_TPR_SHADOW VMCS_CONTROL_BIT(VIRTUAL_TPR) +#define CPU_BASED_NMI_WINDOW_EXITING VMCS_CONTROL_BIT(VIRTUAL_NMI_PENDING) +#define CPU_BASED_MOV_DR_EXITING VMCS_CONTROL_BIT(MOV_DR_EXITING) +#define CPU_BASED_UNCOND_IO_EXITING VMCS_CONTROL_BIT(UNCOND_IO_EXITING) +#define CPU_BASED_USE_IO_BITMAPS VMCS_CONTROL_BIT(USE_IO_BITMAPS) +#define CPU_BASED_MONITOR_TRAP_FLAG VMCS_CONTROL_BIT(MONITOR_TRAP_FLAG) +#define CPU_BASED_USE_MSR_BITMAPS VMCS_CONTROL_BIT(USE_MSR_BITMAPS) +#define CPU_BASED_MONITOR_EXITING VMCS_CONTROL_BIT(MONITOR_EXITING) +#define CPU_BASED_PAUSE_EXITING VMCS_CONTROL_BIT(PAUSE_EXITING) +#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS VMCS_CONTROL_BIT(SEC_CONTROLS) #define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172 /* * Definitions of Secondary Processor-Based VM-Execution Controls. */ -#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 -#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 -#define SECONDARY_EXEC_DESC 0x00000004 -#define SECONDARY_EXEC_RDTSCP 0x00000008 -#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 -#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 -#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 -#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 -#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 -#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 -#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 -#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800 -#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 -#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 -#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 -#define SECONDARY_EXEC_ENCLS_EXITING 0x00008000 -#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 -#define SECONDARY_EXEC_ENABLE_PML 0x00020000 -#define SECONDARY_EXEC_PT_CONCEAL_VMX 0x00080000 -#define SECONDARY_EXEC_XSAVES 0x00100000 -#define SECONDARY_EXEC_PT_USE_GPA 0x01000000 -#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x00400000 -#define SECONDARY_EXEC_TSC_SCALING 0x02000000 +#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES VMCS_CONTROL_BIT(VIRT_APIC_ACCESSES) +#define SECONDARY_EXEC_ENABLE_EPT VMCS_CONTROL_BIT(EPT) +#define SECONDARY_EXEC_DESC VMCS_CONTROL_BIT(DESC_EXITING) +#define SECONDARY_EXEC_RDTSCP VMCS_CONTROL_BIT(RDTSCP) +#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE VMCS_CONTROL_BIT(VIRTUAL_X2APIC) +#define SECONDARY_EXEC_ENABLE_VPID VMCS_CONTROL_BIT(VPID) +#define SECONDARY_EXEC_WBINVD_EXITING VMCS_CONTROL_BIT(WBINVD_EXITING) +#define SECONDARY_EXEC_UNRESTRICTED_GUEST VMCS_CONTROL_BIT(UNRESTRICTED_GUEST) +#define SECONDARY_EXEC_APIC_REGISTER_VIRT VMCS_CONTROL_BIT(APIC_REGISTER_VIRT) +#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY VMCS_CONTROL_BIT(VIRT_INTR_DELIVERY) +#define SECONDARY_EXEC_PAUSE_LOOP_EXITING VMCS_CONTROL_BIT(PAUSE_LOOP_EXITING) +#define SECONDARY_EXEC_RDRAND_EXITING VMCS_CONTROL_BIT(RDRAND_EXITING) +#define SECONDARY_EXEC_ENABLE_INVPCID VMCS_CONTROL_BIT(INVPCID) +#define SECONDARY_EXEC_ENABLE_VMFUNC VMCS_CONTROL_BIT(VMFUNC) +#define SECONDARY_EXEC_SHADOW_VMCS VMCS_CONTROL_BIT(SHADOW_VMCS) +#define SECONDARY_EXEC_ENCLS_EXITING VMCS_CONTROL_BIT(ENCLS_EXITING) +#define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING) +#define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING) +#define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX) +#define SECONDARY_EXEC_XSAVES VMCS_CONTROL_BIT(XSAVES) +#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC) +#define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA) +#define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING) #define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE 0x04000000 -#define PIN_BASED_EXT_INTR_MASK 0x00000001 -#define PIN_BASED_NMI_EXITING 0x00000008 -#define PIN_BASED_VIRTUAL_NMIS 0x00000020 -#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 -#define PIN_BASED_POSTED_INTR 0x00000080 +#define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING) +#define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING) +#define PIN_BASED_VIRTUAL_NMIS VMCS_CONTROL_BIT(VIRTUAL_NMIS) +#define PIN_BASED_VMX_PREEMPTION_TIMER VMCS_CONTROL_BIT(PREEMPTION_TIMER) +#define PIN_BASED_POSTED_INTR VMCS_CONTROL_BIT(POSTED_INTR) #define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 @@ -114,7 +117,9 @@ #define VMX_MISC_MSR_LIST_MULTIPLIER 512 /* VMFUNC functions */ -#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 +#define VMFUNC_CONTROL_BIT(x) BIT((VMX_FEATURE_##x & 0x1f) - 28) + +#define VMX_VMFUNC_EPTP_SWITCHING VMFUNC_CONTROL_BIT(EPTP_SWITCHING) #define VMFUNC_EPTP_ENTRIES 512 static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h new file mode 100644 index 000000000000..0d04d8bf15a5 --- /dev/null +++ b/arch/x86/include/asm/vmxfeatures.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_VMXFEATURES_H +#define _ASM_X86_VMXFEATURES_H + +/* + * Defines VMX CPU feature bits + */ +#define NVMXINTS 3 /* N 32-bit words worth of info */ + +/* + * Note: If the comment begins with a quoted string, that string is used + * in /proc/cpuinfo instead of the macro name. If the string is "", + * this feature bit is not displayed in /proc/cpuinfo at all. + */ + +/* Pin-Based VM-Execution Controls, EPT/VPID, APIC and VM-Functions, word 0 */ +#define VMX_FEATURE_INTR_EXITING ( 0*32+ 0) /* "" VM-Exit on vectored interrupts */ +#define VMX_FEATURE_NMI_EXITING ( 0*32+ 3) /* "" VM-Exit on NMIs */ +#define VMX_FEATURE_VIRTUAL_NMIS ( 0*32+ 5) /* "vnmi" NMI virtualization */ +#define VMX_FEATURE_PREEMPTION_TIMER ( 0*32+ 6) /* VMX Preemption Timer */ +#define VMX_FEATURE_POSTED_INTR ( 0*32+ 7) /* Posted Interrupts */ + +/* EPT/VPID features, scattered to bits 16-23 */ +#define VMX_FEATURE_INVVPID ( 0*32+ 16) /* INVVPID is supported */ +#define VMX_FEATURE_EPT_EXECUTE_ONLY ( 0*32+ 17) /* "ept_x_only" EPT entries can be execute only */ +#define VMX_FEATURE_EPT_AD ( 0*32+ 18) /* EPT Accessed/Dirty bits */ +#define VMX_FEATURE_EPT_1GB ( 0*32+ 19) /* 1GB EPT pages */ + +/* Aggregated APIC features 24-27 */ +#define VMX_FEATURE_FLEXPRIORITY ( 0*32+ 24) /* TPR shadow + virt APIC */ +#define VMX_FEATURE_APICV ( 0*32+ 25) /* TPR shadow + APIC reg virt + virt intr delivery + posted interrupts */ + +/* VM-Functions, shifted to bits 28-31 */ +#define VMX_FEATURE_EPTP_SWITCHING ( 0*32+ 28) /* EPTP switching (in guest) */ + +/* Primary Processor-Based VM-Execution Controls, word 1 */ +#define VMX_FEATURE_VIRTUAL_INTR_PENDING ( 1*32+ 2) /* "" VM-Exit if INTRs are unblocked in guest */ +#define VMX_FEATURE_TSC_OFFSETTING ( 1*32+ 3) /* "tsc_offset" Offset hardware TSC when read in guest */ +#define VMX_FEATURE_HLT_EXITING ( 1*32+ 7) /* "" VM-Exit on HLT */ +#define VMX_FEATURE_INVLPG_EXITING ( 1*32+ 9) /* "" VM-Exit on INVLPG */ +#define VMX_FEATURE_MWAIT_EXITING ( 1*32+ 10) /* "" VM-Exit on MWAIT */ +#define VMX_FEATURE_RDPMC_EXITING ( 1*32+ 11) /* "" VM-Exit on RDPMC */ +#define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* "" VM-Exit on RDTSC */ +#define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* "" VM-Exit on writes to CR3 */ +#define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* "" VM-Exit on reads from CR3 */ +#define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* "" VM-Exit on writes to CR8 */ +#define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* "" VM-Exit on reads from CR8 */ +#define VMX_FEATURE_VIRTUAL_TPR ( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */ +#define VMX_FEATURE_VIRTUAL_NMI_PENDING ( 1*32+ 22) /* "" VM-Exit if NMIs are unblocked in guest */ +#define VMX_FEATURE_MOV_DR_EXITING ( 1*32+ 23) /* "" VM-Exit on accesses to debug registers */ +#define VMX_FEATURE_UNCOND_IO_EXITING ( 1*32+ 24) /* "" VM-Exit on *all* IN{S} and OUT{S}*/ +#define VMX_FEATURE_USE_IO_BITMAPS ( 1*32+ 25) /* "" VM-Exit based on I/O port */ +#define VMX_FEATURE_MONITOR_TRAP_FLAG ( 1*32+ 27) /* "mtf" VMX single-step VM-Exits */ +#define VMX_FEATURE_USE_MSR_BITMAPS ( 1*32+ 28) /* "" VM-Exit based on MSR index */ +#define VMX_FEATURE_MONITOR_EXITING ( 1*32+ 29) /* "" VM-Exit on MONITOR (MWAIT's accomplice) */ +#define VMX_FEATURE_PAUSE_EXITING ( 1*32+ 30) /* "" VM-Exit on PAUSE (unconditionally) */ +#define VMX_FEATURE_SEC_CONTROLS ( 1*32+ 31) /* "" Enable Secondary VM-Execution Controls */ + +/* Secondary Processor-Based VM-Execution Controls, word 2 */ +#define VMX_FEATURE_VIRT_APIC_ACCESSES ( 2*32+ 0) /* "vapic" Virtualize memory mapped APIC accesses */ +#define VMX_FEATURE_EPT ( 2*32+ 1) /* Extended Page Tables, a.k.a. Two-Dimensional Paging */ +#define VMX_FEATURE_DESC_EXITING ( 2*32+ 2) /* "" VM-Exit on {S,L}*DT instructions */ +#define VMX_FEATURE_RDTSCP ( 2*32+ 3) /* "" Enable RDTSCP in guest */ +#define VMX_FEATURE_VIRTUAL_X2APIC ( 2*32+ 4) /* "" Virtualize X2APIC for the guest */ +#define VMX_FEATURE_VPID ( 2*32+ 5) /* Virtual Processor ID (TLB ASID modifier) */ +#define VMX_FEATURE_WBINVD_EXITING ( 2*32+ 6) /* "" VM-Exit on WBINVD */ +#define VMX_FEATURE_UNRESTRICTED_GUEST ( 2*32+ 7) /* Allow Big Real Mode and other "invalid" states */ +#define VMX_FEATURE_APIC_REGISTER_VIRT ( 2*32+ 8) /* "vapic_reg" Hardware emulation of reads to the virtual-APIC */ +#define VMX_FEATURE_VIRT_INTR_DELIVERY ( 2*32+ 9) /* "vid" Evaluation and delivery of pending virtual interrupts */ +#define VMX_FEATURE_PAUSE_LOOP_EXITING ( 2*32+ 10) /* "ple" Conditionally VM-Exit on PAUSE at CPL0 */ +#define VMX_FEATURE_RDRAND_EXITING ( 2*32+ 11) /* "" VM-Exit on RDRAND*/ +#define VMX_FEATURE_INVPCID ( 2*32+ 12) /* "" Enable INVPCID in guest */ +#define VMX_FEATURE_VMFUNC ( 2*32+ 13) /* "" Enable VM-Functions (leaf dependent) */ +#define VMX_FEATURE_SHADOW_VMCS ( 2*32+ 14) /* VMREAD/VMWRITE in guest can access shadow VMCS */ +#define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* "" VM-Exit on ENCLS (leaf dependent) */ +#define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* "" VM-Exit on RDSEED */ +#define VMX_FEATURE_PAGE_MOD_LOGGING ( 2*32+ 17) /* "pml" Log dirty pages into buffer */ +#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* "" Conditionally reflect EPT violations as #VE exceptions */ +#define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* "" Suppress VMX indicators in Processor Trace */ +#define VMX_FEATURE_XSAVES ( 2*32+ 20) /* "" Enable XSAVES and XRSTORS in guest */ +#define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */ +#define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* "" Processor Trace logs GPAs */ +#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* Scale hardware TSC when read in guest */ +#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */ + +#endif /* _ASM_X86_VMXFEATURES_H */ diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index 32f5d9a0b90e..183e98e49ab9 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h @@ -19,10 +19,10 @@ #ifndef _ASM_X86_VVAR_H #define _ASM_X86_VVAR_H -#if defined(__VVAR_KERNEL_LDS) - -/* The kernel linker script defines its own magic to put vvars in the - * right place. +#ifdef EMIT_VVAR +/* + * EMIT_VVAR() is used by the kernel linker script to put vvars in the + * right place. Also, it's used by kernel code to import offsets values. */ #define DECLARE_VVAR(offset, type, name) \ EMIT_VVAR(name, offset) @@ -33,9 +33,12 @@ extern char __vvar_page; #define DECLARE_VVAR(offset, type, name) \ extern type vvar_ ## name[CS_BASES] \ - __attribute__((visibility("hidden"))); + __attribute__((visibility("hidden"))); \ + extern type timens_ ## name[CS_BASES] \ + __attribute__((visibility("hidden"))); \ #define VVAR(name) (vvar_ ## name) +#define TIMENS(name) (timens_ ## name) #define DEFINE_VVAR(type, name) \ type name[CS_BASES] \ diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 3eb8411ab60e..e95b72ec19bc 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -33,7 +33,7 @@ #define EXIT_REASON_TRIPLE_FAULT 2 #define EXIT_REASON_INIT_SIGNAL 3 -#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_INTERRUPT_WINDOW 7 #define EXIT_REASON_NMI_WINDOW 8 #define EXIT_REASON_TASK_SWITCH 9 #define EXIT_REASON_CPUID 10 @@ -94,7 +94,7 @@ { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ { EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \ - { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ + { EXIT_REASON_INTERRUPT_WINDOW, "INTERRUPT_WINDOW" }, \ { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ { EXIT_REASON_CPUID, "CPUID" }, \ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 6175e370ee4a..9b294c13809a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -94,6 +94,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += ftrace_$(BITS).o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_X86_TSC) += trace_clock.o +obj-$(CONFIG_CRASH_CORE) += crash_core_$(BITS).o obj-$(CONFIG_KEXEC_CORE) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index ca13851f0570..26b7256f590f 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -27,6 +27,17 @@ static char temp_stack[4096]; #endif /** + * acpi_get_wakeup_address - provide physical address for S3 wakeup + * + * Returns the physical address where the kernel should be resumed after the + * system awakes from S3, e.g. for programming into the firmware waking vector. + */ +unsigned long acpi_get_wakeup_address(void) +{ + return ((unsigned long)(real_mode_header->wakeup_start)); +} + +/** * x86_acpi_enter_sleep_state - enter sleep state * @state: Sleep state to enter. * diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h index fbb60ca4255c..d06c2079b6c1 100644 --- a/arch/x86/kernel/acpi/sleep.h +++ b/arch/x86/kernel/acpi/sleep.h @@ -3,7 +3,7 @@ * Variables and functions used by the code in sleep.c */ -#include <asm/realmode.h> +#include <linux/linkage.h> extern unsigned long saved_video_mode; extern long saved_magic; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 9ec463fe96f2..15ac0d5f4b40 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -23,6 +23,7 @@ #include <asm/nmi.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> +#include <asm/insn.h> #include <asm/io.h> #include <asm/fixmap.h> @@ -936,44 +937,81 @@ static void do_sync_core(void *info) sync_core(); } -static struct bp_patching_desc { +void text_poke_sync(void) +{ + on_each_cpu(do_sync_core, NULL, 1); +} + +struct text_poke_loc { + s32 rel_addr; /* addr := _stext + rel_addr */ + s32 rel32; + u8 opcode; + const u8 text[POKE_MAX_OPCODE_SIZE]; +}; + +struct bp_patching_desc { struct text_poke_loc *vec; int nr_entries; -} bp_patching; + atomic_t refs; +}; + +static struct bp_patching_desc *bp_desc; + +static inline struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp) +{ + struct bp_patching_desc *desc = READ_ONCE(*descp); /* rcu_dereference */ + + if (!desc || !atomic_inc_not_zero(&desc->refs)) + return NULL; + + return desc; +} + +static inline void put_desc(struct bp_patching_desc *desc) +{ + smp_mb__before_atomic(); + atomic_dec(&desc->refs); +} -static int patch_cmp(const void *key, const void *elt) +static inline void *text_poke_addr(struct text_poke_loc *tp) +{ + return _stext + tp->rel_addr; +} + +static int notrace patch_cmp(const void *key, const void *elt) { struct text_poke_loc *tp = (struct text_poke_loc *) elt; - if (key < tp->addr) + if (key < text_poke_addr(tp)) return -1; - if (key > tp->addr) + if (key > text_poke_addr(tp)) return 1; return 0; } NOKPROBE_SYMBOL(patch_cmp); -int poke_int3_handler(struct pt_regs *regs) +int notrace poke_int3_handler(struct pt_regs *regs) { + struct bp_patching_desc *desc; struct text_poke_loc *tp; + int len, ret = 0; void *ip; + if (user_mode(regs)) + return 0; + /* * Having observed our INT3 instruction, we now must observe - * bp_patching.nr_entries. + * bp_desc: * - * nr_entries != 0 INT3 + * bp_desc = desc INT3 * WMB RMB - * write INT3 if (nr_entries) - * - * Idem for other elements in bp_patching. + * write INT3 if (desc) */ smp_rmb(); - if (likely(!bp_patching.nr_entries)) - return 0; - - if (user_mode(regs)) + desc = try_get_desc(&bp_desc); + if (!desc) return 0; /* @@ -984,19 +1022,20 @@ int poke_int3_handler(struct pt_regs *regs) /* * Skip the binary search if there is a single member in the vector. */ - if (unlikely(bp_patching.nr_entries > 1)) { - tp = bsearch(ip, bp_patching.vec, bp_patching.nr_entries, + if (unlikely(desc->nr_entries > 1)) { + tp = bsearch(ip, desc->vec, desc->nr_entries, sizeof(struct text_poke_loc), patch_cmp); if (!tp) - return 0; + goto out_put; } else { - tp = bp_patching.vec; - if (tp->addr != ip) - return 0; + tp = desc->vec; + if (text_poke_addr(tp) != ip) + goto out_put; } - ip += tp->len; + len = text_opcode_size(tp->opcode); + ip += len; switch (tp->opcode) { case INT3_INSN_OPCODE: @@ -1004,7 +1043,7 @@ int poke_int3_handler(struct pt_regs *regs) * Someone poked an explicit INT3, they'll want to handle it, * do not consume. */ - return 0; + goto out_put; case CALL_INSN_OPCODE: int3_emulate_call(regs, (long)ip + tp->rel32); @@ -1019,10 +1058,18 @@ int poke_int3_handler(struct pt_regs *regs) BUG(); } - return 1; + ret = 1; + +out_put: + put_desc(desc); + return ret; } NOKPROBE_SYMBOL(poke_int3_handler); +#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc)) +static struct text_poke_loc tp_vec[TP_VEC_MAX]; +static int tp_vec_nr; + /** * text_poke_bp_batch() -- update instructions on live kernel on SMP * @tp: vector of instructions to patch @@ -1044,16 +1091,20 @@ NOKPROBE_SYMBOL(poke_int3_handler); * replacing opcode * - sync cores */ -void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) +static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) { + struct bp_patching_desc desc = { + .vec = tp, + .nr_entries = nr_entries, + .refs = ATOMIC_INIT(1), + }; unsigned char int3 = INT3_INSN_OPCODE; unsigned int i; int do_sync; lockdep_assert_held(&text_mutex); - bp_patching.vec = tp; - bp_patching.nr_entries = nr_entries; + smp_store_release(&bp_desc, &desc); /* rcu_assign_pointer */ /* * Corresponding read barrier in int3 notifier for making sure the @@ -1065,18 +1116,20 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) * First step: add a int3 trap to the address that will be patched. */ for (i = 0; i < nr_entries; i++) - text_poke(tp[i].addr, &int3, sizeof(int3)); + text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE); - on_each_cpu(do_sync_core, NULL, 1); + text_poke_sync(); /* * Second step: update all but the first byte of the patched range. */ for (do_sync = 0, i = 0; i < nr_entries; i++) { - if (tp[i].len - sizeof(int3) > 0) { - text_poke((char *)tp[i].addr + sizeof(int3), - (const char *)tp[i].text + sizeof(int3), - tp[i].len - sizeof(int3)); + int len = text_opcode_size(tp[i].opcode); + + if (len - INT3_INSN_SIZE > 0) { + text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE, + (const char *)tp[i].text + INT3_INSN_SIZE, + len - INT3_INSN_SIZE); do_sync++; } } @@ -1087,7 +1140,7 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) * not necessary and we'd be safe even without it. But * better safe than sorry (plus there's not only Intel). */ - on_each_cpu(do_sync_core, NULL, 1); + text_poke_sync(); } /* @@ -1098,19 +1151,20 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) if (tp[i].text[0] == INT3_INSN_OPCODE) continue; - text_poke(tp[i].addr, tp[i].text, sizeof(int3)); + text_poke(text_poke_addr(&tp[i]), tp[i].text, INT3_INSN_SIZE); do_sync++; } if (do_sync) - on_each_cpu(do_sync_core, NULL, 1); + text_poke_sync(); /* - * sync_core() implies an smp_mb() and orders this store against - * the writing of the new instruction. + * Remove and synchronize_rcu(), except we have a very primitive + * refcount based completion. */ - bp_patching.vec = NULL; - bp_patching.nr_entries = 0; + WRITE_ONCE(bp_desc, NULL); /* RCU_INIT_POINTER */ + if (!atomic_dec_and_test(&desc.refs)) + atomic_cond_read_acquire(&desc.refs, !VAL); } void text_poke_loc_init(struct text_poke_loc *tp, void *addr, @@ -1118,11 +1172,7 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr, { struct insn insn; - if (!opcode) - opcode = (void *)tp->text; - else - memcpy((void *)tp->text, opcode, len); - + memcpy((void *)tp->text, opcode, len); if (!emulate) emulate = opcode; @@ -1132,8 +1182,7 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr, BUG_ON(!insn_complete(&insn)); BUG_ON(len != insn.length); - tp->addr = addr; - tp->len = len; + tp->rel_addr = addr - (void *)_stext; tp->opcode = insn.opcode.bytes[0]; switch (tp->opcode) { @@ -1167,6 +1216,55 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr, } } +/* + * We hard rely on the tp_vec being ordered; ensure this is so by flushing + * early if needed. + */ +static bool tp_order_fail(void *addr) +{ + struct text_poke_loc *tp; + + if (!tp_vec_nr) + return false; + + if (!addr) /* force */ + return true; + + tp = &tp_vec[tp_vec_nr - 1]; + if ((unsigned long)text_poke_addr(tp) > (unsigned long)addr) + return true; + + return false; +} + +static void text_poke_flush(void *addr) +{ + if (tp_vec_nr == TP_VEC_MAX || tp_order_fail(addr)) { + text_poke_bp_batch(tp_vec, tp_vec_nr); + tp_vec_nr = 0; + } +} + +void text_poke_finish(void) +{ + text_poke_flush(NULL); +} + +void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate) +{ + struct text_poke_loc *tp; + + if (unlikely(system_state == SYSTEM_BOOTING)) { + text_poke_early(addr, opcode, len); + return; + } + + text_poke_flush(addr); + + tp = &tp_vec[tp_vec_nr++]; + text_poke_loc_init(tp, addr, opcode, len, emulate); +} + /** * text_poke_bp() -- update instructions on live kernel on SMP * @addr: address to patch @@ -1178,10 +1276,15 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr, * dynamically allocated memory. This function should be used when it is * not possible to allocate memory. */ -void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate) +void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate) { struct text_poke_loc tp; + if (unlikely(system_state == SYSTEM_BOOTING)) { + text_poke_early(addr, opcode, len); + return; + } + text_poke_loc_init(&tp, addr, opcode, len, emulate); text_poke_bp_batch(&tp, 1); } diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 251c795b4eb3..69aed0ebbdfc 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -22,6 +22,7 @@ #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494 #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444 +#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654 /* Protect the PCI config register pairs used for SMN and DF indirect access. */ static DEFINE_MUTEX(smn_mutex); @@ -52,6 +53,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) }, {} }; EXPORT_SYMBOL_GPL(amd_nb_misc_ids); @@ -66,6 +68,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, {} }; diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 5da106f84e84..fe698f96617c 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -95,7 +95,7 @@ static inline void apbt_set_mapping(void) printk(KERN_WARNING "No timer base from SFI, use default\n"); apbt_address = APBT_DEFAULT_BASE; } - apbt_virt_address = ioremap_nocache(apbt_address, APBT_MMAP_SIZE); + apbt_virt_address = ioremap(apbt_address, APBT_MMAP_SIZE); if (!apbt_virt_address) { pr_debug("Failed mapping APBT phy address at %lu\n",\ (unsigned long)apbt_address); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d5b51a740524..ad53b2abc859 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1493,65 +1493,34 @@ static void check_efi_reboot(void) } /* Setup user proc fs files */ -static int proc_hubbed_show(struct seq_file *file, void *data) +static int __maybe_unused proc_hubbed_show(struct seq_file *file, void *data) { seq_printf(file, "0x%x\n", uv_hubbed_system); return 0; } -static int proc_hubless_show(struct seq_file *file, void *data) +static int __maybe_unused proc_hubless_show(struct seq_file *file, void *data) { seq_printf(file, "0x%x\n", uv_hubless_system); return 0; } -static int proc_oemid_show(struct seq_file *file, void *data) +static int __maybe_unused proc_oemid_show(struct seq_file *file, void *data) { seq_printf(file, "%s/%s\n", oem_id, oem_table_id); return 0; } -static int proc_hubbed_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_hubbed_show, (void *)NULL); -} - -static int proc_hubless_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_hubless_show, (void *)NULL); -} - -static int proc_oemid_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_oemid_show, (void *)NULL); -} - -/* (struct is "non-const" as open function is set at runtime) */ -static struct file_operations proc_version_fops = { - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static const struct file_operations proc_oemid_fops = { - .open = proc_oemid_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static __init void uv_setup_proc_files(int hubless) { struct proc_dir_entry *pde; - char *name = hubless ? "hubless" : "hubbed"; pde = proc_mkdir(UV_PROC_NODE, NULL); - proc_create("oemid", 0, pde, &proc_oemid_fops); - proc_create(name, 0, pde, &proc_version_fops); + proc_create_single("oemid", 0, pde, proc_oemid_show); if (hubless) - proc_version_fops.open = proc_hubless_open; + proc_create_single("hubless", 0, pde, proc_hubless_show); else - proc_version_fops.open = proc_hubbed_open; + proc_create_single("hubbed", 0, pde, proc_hubbed_show); } /* Initialize UV hubless systems */ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 890f60083eca..7dc4ad68eb41 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -29,6 +29,7 @@ obj-y += umwait.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o +obj-$(CONFIG_IA32_FEAT_CTL) += feat_ctl.o ifdef CONFIG_CPU_SUP_INTEL obj-y += intel.o intel_pconfig.o tsx.o obj-$(CONFIG_PM) += intel_epb.o @@ -53,11 +54,12 @@ obj-$(CONFIG_ACRN_GUEST) += acrn.o ifdef CONFIG_X86_FEATURE_NAMES quiet_cmd_mkcapflags = MKCAP $@ - cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@ + cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $@ $^ cpufeature = $(src)/../../include/asm/cpufeatures.h +vmxfeature = $(src)/../../include/asm/vmxfeatures.h -$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE +$(obj)/capflags.c: $(cpufeature) $(vmxfeature) $(src)/mkcapflags.sh FORCE $(call if_changed,mkcapflags) endif targets += capflags.c diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 90f75e515876..ac83a0fef628 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -319,13 +319,6 @@ static void legacy_fixup_core_id(struct cpuinfo_x86 *c) c->cpu_core_id %= cus_per_node; } - -static void amd_get_topology_early(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_TOPOEXT)) - smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1; -} - /* * Fixup core topology information for * (1) AMD multi-node processors @@ -615,9 +608,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) return; clear_all: - clear_cpu_cap(c, X86_FEATURE_SME); + setup_clear_cpu_cap(X86_FEATURE_SME); clear_sev: - clear_cpu_cap(c, X86_FEATURE_SEV); + setup_clear_cpu_cap(X86_FEATURE_SEV); } } @@ -717,7 +710,8 @@ static void early_init_amd(struct cpuinfo_x86 *c) } } - amd_get_topology_early(c); + if (cpu_has(c, X86_FEATURE_TOPOEXT)) + smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1; } static void init_amd_k8(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 8bf64899f56a..ed54b3b21c39 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -286,6 +286,13 @@ early_param("mds", mds_cmdline); #undef pr_fmt #define pr_fmt(fmt) "TAA: " fmt +enum taa_mitigations { + TAA_MITIGATION_OFF, + TAA_MITIGATION_UCODE_NEEDED, + TAA_MITIGATION_VERW, + TAA_MITIGATION_TSX_DISABLED, +}; + /* Default mitigation for TAA-affected CPUs */ static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW; static bool taa_nosmt __ro_after_init; diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 14433ff5b828..426792565d86 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -18,13 +18,6 @@ #define RNG_ENABLED (1 << 3) #define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ -#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 -#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 -#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 -#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 -#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 -#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 - static void init_c3(struct cpuinfo_x86 *c) { u32 lo, hi; @@ -71,8 +64,6 @@ static void init_c3(struct cpuinfo_x86 *c) c->x86_cache_alignment = c->x86_clflush_size * 2; set_cpu_cap(c, X86_FEATURE_REP_GOOD); } - - cpu_detect_cache_sizes(c); } enum { @@ -119,31 +110,6 @@ static void early_init_centaur(struct cpuinfo_x86 *c) } } -static void centaur_detect_vmx_virtcap(struct cpuinfo_x86 *c) -{ - u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; - - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); - msr_ctl = vmx_msr_high | vmx_msr_low; - - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) - set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) - set_cpu_cap(c, X86_FEATURE_VNMI); - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, - vmx_msr_low, vmx_msr_high); - msr_ctl2 = vmx_msr_high | vmx_msr_low; - if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && - (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) - set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); - if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) - set_cpu_cap(c, X86_FEATURE_EPT); - if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) - set_cpu_cap(c, X86_FEATURE_VPID); - } -} - static void init_centaur(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_32 @@ -250,8 +216,7 @@ static void init_centaur(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); #endif - if (cpu_has(c, X86_FEATURE_VMX)) - centaur_detect_vmx_virtcap(c); + init_ia32_feat_ctl(c); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2e4d90294fe6..52c9bfbbdb2a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -14,6 +14,7 @@ #include <linux/sched/mm.h> #include <linux/sched/clock.h> #include <linux/sched/task.h> +#include <linux/sched/smt.h> #include <linux/init.h> #include <linux/kprobes.h> #include <linux/kgdb.h> @@ -49,7 +50,7 @@ #include <asm/cpu.h> #include <asm/mce.h> #include <asm/msr.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/microcode.h> #include <asm/microcode_intel.h> #include <asm/intel-family.h> @@ -163,22 +164,6 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); -static int __init x86_mpx_setup(char *s) -{ - /* require an exact match without trailing characters */ - if (strlen(s)) - return 0; - - /* do not emit a message if the feature is not present */ - if (!boot_cpu_has(X86_FEATURE_MPX)) - return 1; - - setup_clear_cpu_cap(X86_FEATURE_MPX); - pr_info("nompx: Intel Memory Protection Extensions (MPX) disabled\n"); - return 1; -} -__setup("nompx", x86_mpx_setup); - #ifdef CONFIG_X86_64 static int __init x86_nopcid_setup(char *s) { @@ -305,8 +290,6 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) static __init int setup_disable_smep(char *arg) { setup_clear_cpu_cap(X86_FEATURE_SMEP); - /* Check for things that depend on SMEP being enabled: */ - check_mpx_erratum(&boot_cpu_data); return 1; } __setup("nosmep", setup_disable_smep); @@ -1023,6 +1006,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define MSBDS_ONLY BIT(5) #define NO_SWAPGS BIT(6) #define NO_ITLB_MULTIHIT BIT(7) +#define NO_SPECTRE_V2 BIT(8) #define VULNWL(_vendor, _family, _model, _whitelist) \ { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } @@ -1084,6 +1068,10 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + + /* Zhaoxin Family 7 */ + VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS), + VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS), {} }; @@ -1116,7 +1104,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) return; setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + + if (!cpu_matches(NO_SPECTRE_V2)) + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) @@ -1449,6 +1439,9 @@ static void identify_cpu(struct cpuinfo_x86 *c) #endif c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof(c->x86_capability)); +#ifdef CONFIG_X86_VMX_FEATURE_NAMES + memset(&c->vmx_capability, 0, sizeof(c->vmx_capability)); +#endif generic_identify(c); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 38ab6e115eac..37fdefd14f28 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -80,4 +80,8 @@ extern void x86_spec_ctrl_setup_ap(void); extern u64 x86_read_arch_cap_msr(void); +#ifdef CONFIG_IA32_FEAT_CTL +void init_ia32_feat_ctl(struct cpuinfo_x86 *c); +#endif + #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c new file mode 100644 index 000000000000..0268185bef94 --- /dev/null +++ b/arch/x86/kernel/cpu/feat_ctl.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/tboot.h> + +#include <asm/cpufeature.h> +#include <asm/msr-index.h> +#include <asm/processor.h> +#include <asm/vmx.h> + +#undef pr_fmt +#define pr_fmt(fmt) "x86/cpu: " fmt + +#ifdef CONFIG_X86_VMX_FEATURE_NAMES +enum vmx_feature_leafs { + MISC_FEATURES = 0, + PRIMARY_CTLS, + SECONDARY_CTLS, + NR_VMX_FEATURE_WORDS, +}; + +#define VMX_F(x) BIT(VMX_FEATURE_##x & 0x1f) + +static void init_vmx_capabilities(struct cpuinfo_x86 *c) +{ + u32 supported, funcs, ept, vpid, ign; + + BUILD_BUG_ON(NVMXINTS != NR_VMX_FEATURE_WORDS); + + /* + * The high bits contain the allowed-1 settings, i.e. features that can + * be turned on. The low bits contain the allowed-0 settings, i.e. + * features that can be turned off. Ignore the allowed-0 settings, + * if a feature can be turned on then it's supported. + * + * Use raw rdmsr() for primary processor controls and pin controls MSRs + * as they exist on any CPU that supports VMX, i.e. we want the WARN if + * the RDMSR faults. + */ + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, ign, supported); + c->vmx_capability[PRIMARY_CTLS] = supported; + + rdmsr_safe(MSR_IA32_VMX_PROCBASED_CTLS2, &ign, &supported); + c->vmx_capability[SECONDARY_CTLS] = supported; + + rdmsr(MSR_IA32_VMX_PINBASED_CTLS, ign, supported); + rdmsr_safe(MSR_IA32_VMX_VMFUNC, &ign, &funcs); + + /* + * Except for EPT+VPID, which enumerates support for both in a single + * MSR, low for EPT, high for VPID. + */ + rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, &ept, &vpid); + + /* Pin, EPT, VPID and VM-Func are merged into a single word. */ + WARN_ON_ONCE(supported >> 16); + WARN_ON_ONCE(funcs >> 4); + c->vmx_capability[MISC_FEATURES] = (supported & 0xffff) | + ((vpid & 0x1) << 16) | + ((funcs & 0xf) << 28); + + /* EPT bits are full on scattered and must be manually handled. */ + if (ept & VMX_EPT_EXECUTE_ONLY_BIT) + c->vmx_capability[MISC_FEATURES] |= VMX_F(EPT_EXECUTE_ONLY); + if (ept & VMX_EPT_AD_BIT) + c->vmx_capability[MISC_FEATURES] |= VMX_F(EPT_AD); + if (ept & VMX_EPT_1GB_PAGE_BIT) + c->vmx_capability[MISC_FEATURES] |= VMX_F(EPT_1GB); + + /* Synthetic APIC features that are aggregates of multiple features. */ + if ((c->vmx_capability[PRIMARY_CTLS] & VMX_F(VIRTUAL_TPR)) && + (c->vmx_capability[SECONDARY_CTLS] & VMX_F(VIRT_APIC_ACCESSES))) + c->vmx_capability[MISC_FEATURES] |= VMX_F(FLEXPRIORITY); + + if ((c->vmx_capability[PRIMARY_CTLS] & VMX_F(VIRTUAL_TPR)) && + (c->vmx_capability[SECONDARY_CTLS] & VMX_F(APIC_REGISTER_VIRT)) && + (c->vmx_capability[SECONDARY_CTLS] & VMX_F(VIRT_INTR_DELIVERY)) && + (c->vmx_capability[MISC_FEATURES] & VMX_F(POSTED_INTR))) + c->vmx_capability[MISC_FEATURES] |= VMX_F(APICV); + + /* Set the synthetic cpufeatures to preserve /proc/cpuinfo's ABI. */ + if (c->vmx_capability[PRIMARY_CTLS] & VMX_F(VIRTUAL_TPR)) + set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); + if (c->vmx_capability[MISC_FEATURES] & VMX_F(FLEXPRIORITY)) + set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); + if (c->vmx_capability[MISC_FEATURES] & VMX_F(VIRTUAL_NMIS)) + set_cpu_cap(c, X86_FEATURE_VNMI); + if (c->vmx_capability[SECONDARY_CTLS] & VMX_F(EPT)) + set_cpu_cap(c, X86_FEATURE_EPT); + if (c->vmx_capability[MISC_FEATURES] & VMX_F(EPT_AD)) + set_cpu_cap(c, X86_FEATURE_EPT_AD); + if (c->vmx_capability[MISC_FEATURES] & VMX_F(VPID)) + set_cpu_cap(c, X86_FEATURE_VPID); +} +#endif /* CONFIG_X86_VMX_FEATURE_NAMES */ + +void init_ia32_feat_ctl(struct cpuinfo_x86 *c) +{ + bool tboot = tboot_enabled(); + u64 msr; + + if (rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr)) { + clear_cpu_cap(c, X86_FEATURE_VMX); + return; + } + + if (msr & FEAT_CTL_LOCKED) + goto update_caps; + + /* + * Ignore whatever value BIOS left in the MSR to avoid enabling random + * features or faulting on the WRMSR. + */ + msr = FEAT_CTL_LOCKED; + + /* + * Enable VMX if and only if the kernel may do VMXON at some point, + * i.e. KVM is enabled, to avoid unnecessarily adding an attack vector + * for the kernel, e.g. using VMX to hide malicious code. + */ + if (cpu_has(c, X86_FEATURE_VMX) && IS_ENABLED(CONFIG_KVM_INTEL)) { + msr |= FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; + + if (tboot) + msr |= FEAT_CTL_VMX_ENABLED_INSIDE_SMX; + } + + wrmsrl(MSR_IA32_FEAT_CTL, msr); + +update_caps: + set_cpu_cap(c, X86_FEATURE_MSR_IA32_FEAT_CTL); + + if (!cpu_has(c, X86_FEATURE_VMX)) + return; + + if ( (tboot && !(msr & FEAT_CTL_VMX_ENABLED_INSIDE_SMX)) || + (!tboot && !(msr & FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX))) { + if (IS_ENABLED(CONFIG_KVM_INTEL)) + pr_err_once("VMX (%s TXT) disabled by BIOS\n", + tboot ? "inside" : "outside"); + clear_cpu_cap(c, X86_FEATURE_VMX); + } else { +#ifdef CONFIG_X86_VMX_FEATURE_NAMES + init_vmx_capabilities(c); +#endif + } +} diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 4a900804a023..be82cd5841c3 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -32,41 +32,6 @@ #endif /* - * Just in case our CPU detection goes bad, or you have a weird system, - * allow a way to override the automatic disabling of MPX. - */ -static int forcempx; - -static int __init forcempx_setup(char *__unused) -{ - forcempx = 1; - - return 1; -} -__setup("intel-skd-046-workaround=disable", forcempx_setup); - -void check_mpx_erratum(struct cpuinfo_x86 *c) -{ - if (forcempx) - return; - /* - * Turn off the MPX feature on CPUs where SMEP is not - * available or disabled. - * - * Works around Intel Erratum SKD046: "Branch Instructions - * May Initialize MPX Bound Registers Incorrectly". - * - * This might falsely disable MPX on systems without - * SMEP, like Atom processors without SMEP. But there - * is no such hardware known at the moment. - */ - if (cpu_has(c, X86_FEATURE_MPX) && !cpu_has(c, X86_FEATURE_SMEP)) { - setup_clear_cpu_cap(X86_FEATURE_MPX); - pr_warn("x86/mpx: Disabling MPX since SMEP not present\n"); - } -} - -/* * Processors which have self-snooping capability can handle conflicting * memory type across CPUs by snooping its own cache. However, there exists * CPU models in which having conflicting memory types still leads to @@ -330,7 +295,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); } - check_mpx_erratum(c); check_memory_type_self_snoop_errata(c); /* @@ -494,52 +458,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } -static void detect_vmx_virtcap(struct cpuinfo_x86 *c) -{ - /* Intel VMX MSR indicated features */ -#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 -#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 -#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 -#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 -#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 -#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 -#define x86_VMX_FEATURE_EPT_CAP_AD 0x00200000 - - u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; - u32 msr_vpid_cap, msr_ept_cap; - - clear_cpu_cap(c, X86_FEATURE_TPR_SHADOW); - clear_cpu_cap(c, X86_FEATURE_VNMI); - clear_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); - clear_cpu_cap(c, X86_FEATURE_EPT); - clear_cpu_cap(c, X86_FEATURE_VPID); - clear_cpu_cap(c, X86_FEATURE_EPT_AD); - - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); - msr_ctl = vmx_msr_high | vmx_msr_low; - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) - set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) - set_cpu_cap(c, X86_FEATURE_VNMI); - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, - vmx_msr_low, vmx_msr_high); - msr_ctl2 = vmx_msr_high | vmx_msr_low; - if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && - (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) - set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); - if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) { - set_cpu_cap(c, X86_FEATURE_EPT); - rdmsr(MSR_IA32_VMX_EPT_VPID_CAP, - msr_ept_cap, msr_vpid_cap); - if (msr_ept_cap & x86_VMX_FEATURE_EPT_CAP_AD) - set_cpu_cap(c, X86_FEATURE_EPT_AD); - } - if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) - set_cpu_cap(c, X86_FEATURE_VPID); - } -} - #define MSR_IA32_TME_ACTIVATE 0x982 /* Helpers to access TME_ACTIVATE MSR */ @@ -755,8 +673,7 @@ static void init_intel(struct cpuinfo_x86 *c) /* Work around errata */ srat_detect_node(c); - if (cpu_has(c, X86_FEATURE_VMX)) - detect_vmx_virtcap(c); + init_ia32_feat_ctl(c); if (cpu_has(c, X86_FEATURE_TME)) detect_tme(c); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 5167bd2bb6b1..b3a50d962851 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -78,6 +78,7 @@ struct smca_bank_name { static struct smca_bank_name smca_names[] = { [SMCA_LS] = { "load_store", "Load Store Unit" }, + [SMCA_LS_V2] = { "load_store", "Load Store Unit" }, [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, [SMCA_DE] = { "decode_unit", "Decode Unit" }, @@ -138,6 +139,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = { /* ZN Core (HWID=0xB0) MCA types */ { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF }, + { SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10), 0xFFFFFF }, { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF }, { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF }, { SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF }, @@ -266,10 +268,10 @@ static void smca_configure(unsigned int bank, unsigned int cpu) smca_set_misc_banks_map(bank, cpu); /* Return early if this bank was already initialized. */ - if (smca_banks[bank].hwid) + if (smca_banks[bank].hwid && smca_banks[bank].hwid->hwid_mcatype != 0) return; - if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { + if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { pr_warn("Failed to read MCA_IPID for bank %d\n", bank); return; } diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 5f42f25bac8f..2c4f949611e4 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -53,8 +53,6 @@ #include "internal.h" -static DEFINE_MUTEX(mce_log_mutex); - /* sysfs synchronization */ static DEFINE_MUTEX(mce_sysfs_mutex); @@ -156,19 +154,10 @@ void mce_log(struct mce *m) if (!mce_gen_pool_add(m)) irq_work_queue(&mce_irq_work); } - -void mce_inject_log(struct mce *m) -{ - mutex_lock(&mce_log_mutex); - mce_log(m); - mutex_unlock(&mce_log_mutex); -} -EXPORT_SYMBOL_GPL(mce_inject_log); - -static struct notifier_block mce_srao_nb; +EXPORT_SYMBOL_GPL(mce_log); /* - * We run the default notifier if we have only the SRAO, the first and the + * We run the default notifier if we have only the UC, the first and the * default notifier registered. I.e., the mandatory NUM_DEFAULT_NOTIFIERS * notifiers registered on the chain. */ @@ -594,26 +583,29 @@ static struct notifier_block first_nb = { .priority = MCE_PRIO_FIRST, }; -static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, - void *data) +static int uc_decode_notifier(struct notifier_block *nb, unsigned long val, + void *data) { struct mce *mce = (struct mce *)data; unsigned long pfn; - if (!mce) + if (!mce || !mce_usable_address(mce)) return NOTIFY_DONE; - if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) { - pfn = mce->addr >> PAGE_SHIFT; - if (!memory_failure(pfn, 0)) - set_mce_nospec(pfn); - } + if (mce->severity != MCE_AO_SEVERITY && + mce->severity != MCE_DEFERRED_SEVERITY) + return NOTIFY_DONE; + + pfn = mce->addr >> PAGE_SHIFT; + if (!memory_failure(pfn, 0)) + set_mce_nospec(pfn); return NOTIFY_OK; } -static struct notifier_block mce_srao_nb = { - .notifier_call = srao_decode_notifier, - .priority = MCE_PRIO_SRAO, + +static struct notifier_block mce_uc_nb = { + .notifier_call = uc_decode_notifier, + .priority = MCE_PRIO_UC, }; static int mce_default_notifier(struct notifier_block *nb, unsigned long val, @@ -763,26 +755,22 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) log_it: error_seen = true; - mce_read_aux(&m, i); + if (flags & MCP_DONTLOG) + goto clear_it; + mce_read_aux(&m, i); m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); - /* * Don't get the IP here because it's unlikely to * have anything to do with the actual error location. */ - if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) - mce_log(&m); - else if (mce_usable_address(&m)) { - /* - * Although we skipped logging this, we still want - * to take action. Add to the pool so the registered - * notifiers will see it. - */ - if (!mce_gen_pool_add(&m)) - mce_schedule_work(); - } + if (mca_cfg.dont_log_ce && !mce_usable_address(&m)) + goto clear_it; + + mce_log(&m); + +clear_it: /* * Clear state for this bank. */ @@ -807,7 +795,7 @@ EXPORT_SYMBOL_GPL(machine_check_poll); static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, struct pt_regs *regs) { - char *tmp; + char *tmp = *msg; int i; for (i = 0; i < this_cpu_read(mce_num_banks); i++) { @@ -819,8 +807,8 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, if (quirk_no_way_out) quirk_no_way_out(i, m, regs); + m->bank = i; if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { - m->bank = i; mce_read_aux(m, i); *msg = tmp; return 1; @@ -1232,8 +1220,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) DECLARE_BITMAP(toclear, MAX_NR_BANKS); struct mca_config *cfg = &mca_cfg; int cpu = smp_processor_id(); - char *msg = "Unknown"; struct mce m, *final; + char *msg = NULL; int worst = 0; /* @@ -1365,7 +1353,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) ist_end_non_atomic(); } else { if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0)) - mce_panic("Failed kernel mode recovery", &m, NULL); + mce_panic("Failed kernel mode recovery", &m, msg); } out_ist: @@ -2041,7 +2029,7 @@ int __init mcheck_init(void) { mcheck_intel_therm_init(); mce_register_decode_chain(&first_nb); - mce_register_decode_chain(&mce_srao_nb); + mce_register_decode_chain(&mce_uc_nb); mce_register_decode_chain(&mce_default_nb); mcheck_vendor_init_severity(); diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 1f30117b24ba..3413b41b8d55 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -494,7 +494,7 @@ static void do_inject(void) i_mce.status |= MCI_STATUS_SYNDV; if (inj_type == SW_INJ) { - mce_inject_log(&i_mce); + mce_log(&i_mce); return; } diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index e270d0770134..5627b1091b85 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -115,15 +115,16 @@ static bool lmce_supported(void) /* * BIOS should indicate support for LMCE by setting bit 20 in - * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will - * generate a #GP fault. + * IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP + * fault. The MSR must also be locked for LMCE_ENABLED to take effect. + * WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally + * locks the MSR in the event that it wasn't already locked by BIOS. */ - rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp); - if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) == - (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) - return true; + rdmsrl(MSR_IA32_FEAT_CTL, tmp); + if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED))) + return false; - return false; + return tmp & FEAT_CTL_LMCE_ENABLED; } bool mce_intel_cmci_poll(void) diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 842b273bce31..b785c0d0b590 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -84,8 +84,6 @@ static inline int apei_clear_mce(u64 record_id) } #endif -void mce_inject_log(struct mce *m); - /* * We consider records to be equivalent if bank+status+addr+misc all match. * This is only used when the system is going down because of a fatal error diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c index b38010b541d6..58b4ee3cda77 100644 --- a/arch/x86/kernel/cpu/mce/therm_throt.c +++ b/arch/x86/kernel/cpu/mce/therm_throt.c @@ -235,7 +235,7 @@ static void get_therm_status(int level, bool *proc_hot, u8 *temp) *temp = (msr_val >> 16) & 0x7F; } -static void throttle_active_work(struct work_struct *work) +static void __maybe_unused throttle_active_work(struct work_struct *work) { struct _thermal_state *state = container_of(to_delayed_work(work), struct _thermal_state, therm_work); @@ -467,6 +467,7 @@ static int thermal_throttle_online(unsigned int cpu) { struct thermal_state *state = &per_cpu(thermal_state, cpu); struct device *dev = get_cpu_device(cpu); + u32 l; state->package_throttle.level = PACKAGE_LEVEL; state->core_throttle.level = CORE_LEVEL; @@ -474,6 +475,10 @@ static int thermal_throttle_online(unsigned int cpu) INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work); INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work); + /* Unmask the thermal vector after the above workqueues are initialized. */ + l = apic_read(APIC_LVTTHMR); + apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + return thermal_throttle_add_dev(dev, cpu); } @@ -722,10 +727,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c) rdmsr(MSR_IA32_MISC_ENABLE, l, h); wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); - /* Unmask the thermal vector: */ - l = apic_read(APIC_LVTTHMR); - apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - pr_info_once("CPU0: Thermal monitoring enabled (%s)\n", tm2 ? "TM2" : "TM1"); diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh index aed45b8895d5..1db560ed2ca3 100644 --- a/arch/x86/kernel/cpu/mkcapflags.sh +++ b/arch/x86/kernel/cpu/mkcapflags.sh @@ -6,8 +6,7 @@ set -e -IN=$1 -OUT=$2 +OUT=$1 dump_array() { @@ -15,6 +14,7 @@ dump_array() SIZE=$2 PFX=$3 POSTFIX=$4 + IN=$5 PFX_SZ=$(echo $PFX | wc -c) TABS="$(printf '\t\t\t\t\t')" @@ -57,11 +57,18 @@ trap 'rm "$OUT"' EXIT echo "#endif" echo "" - dump_array "x86_cap_flags" "NCAPINTS*32" "X86_FEATURE_" "" + dump_array "x86_cap_flags" "NCAPINTS*32" "X86_FEATURE_" "" $2 echo "" - dump_array "x86_bug_flags" "NBUGINTS*32" "X86_BUG_" "NCAPINTS*32" + dump_array "x86_bug_flags" "NBUGINTS*32" "X86_BUG_" "NCAPINTS*32" $2 + echo "" + echo "#ifdef CONFIG_X86_VMX_FEATURE_NAMES" + echo "#ifndef _ASM_X86_VMXFEATURES_H" + echo "#include <asm/vmxfeatures.h>" + echo "#endif" + dump_array "x86_vmx_flags" "NVMXINTS*32" "VMX_FEATURE_" "" $3 + echo "#endif /* CONFIG_X86_VMX_FEATURE_NAMES */" ) > $OUT trap - EXIT diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index aa5c064a6a22..51b9190c628b 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -15,7 +15,7 @@ #include <asm/tlbflush.h> #include <asm/mtrr.h> #include <asm/msr.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include "mtrr.h" diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 4d36dcc1cf87..a5c506f6da7f 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -101,9 +101,6 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) int length; size_t linelen; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - memset(line, 0, LINE_SIZE); len = min_t(size_t, len, LINE_SIZE - 1); @@ -226,8 +223,6 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) #ifdef CONFIG_COMPAT case MTRRIOC32_ADD_ENTRY: #endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; err = mtrr_file_add(sentry.base, sentry.size, sentry.type, true, file, 0); @@ -236,24 +231,18 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) #ifdef CONFIG_COMPAT case MTRRIOC32_SET_ENTRY: #endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; err = mtrr_add(sentry.base, sentry.size, sentry.type, false); break; case MTRRIOC_DEL_ENTRY: #ifdef CONFIG_COMPAT case MTRRIOC32_DEL_ENTRY: #endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; err = mtrr_file_del(sentry.base, sentry.size, file, 0); break; case MTRRIOC_KILL_ENTRY: #ifdef CONFIG_COMPAT case MTRRIOC32_KILL_ENTRY: #endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; err = mtrr_del(-1, sentry.base, sentry.size); break; case MTRRIOC_GET_ENTRY: @@ -279,8 +268,6 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) #ifdef CONFIG_COMPAT case MTRRIOC32_ADD_PAGE_ENTRY: #endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; err = mtrr_file_add(sentry.base, sentry.size, sentry.type, true, file, 1); @@ -289,8 +276,6 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) #ifdef CONFIG_COMPAT case MTRRIOC32_SET_PAGE_ENTRY: #endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; err = mtrr_add_page(sentry.base, sentry.size, sentry.type, false); break; @@ -298,16 +283,12 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) #ifdef CONFIG_COMPAT case MTRRIOC32_DEL_PAGE_ENTRY: #endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; err = mtrr_file_del(sentry.base, sentry.size, file, 1); break; case MTRRIOC_KILL_PAGE_ENTRY: #ifdef CONFIG_COMPAT case MTRRIOC32_KILL_PAGE_ENTRY: #endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; err = mtrr_del_page(-1, sentry.base, sentry.size); break; case MTRRIOC_GET_PAGE_ENTRY: @@ -373,28 +354,6 @@ static int mtrr_close(struct inode *ino, struct file *file) return single_release(ino, file); } -static int mtrr_seq_show(struct seq_file *seq, void *offset); - -static int mtrr_open(struct inode *inode, struct file *file) -{ - if (!mtrr_if) - return -EIO; - if (!mtrr_if->get) - return -ENXIO; - return single_open(file, mtrr_seq_show, NULL); -} - -static const struct file_operations mtrr_fops = { - .owner = THIS_MODULE, - .open = mtrr_open, - .read = seq_read, - .llseek = seq_lseek, - .write = mtrr_write, - .unlocked_ioctl = mtrr_ioctl, - .compat_ioctl = mtrr_ioctl, - .release = mtrr_close, -}; - static int mtrr_seq_show(struct seq_file *seq, void *offset) { char factor; @@ -426,6 +385,29 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) return 0; } +static int mtrr_open(struct inode *inode, struct file *file) +{ + if (!mtrr_if) + return -EIO; + if (!mtrr_if->get) + return -ENXIO; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + return single_open(file, mtrr_seq_show, NULL); +} + +static const struct proc_ops mtrr_proc_ops = { + .proc_open = mtrr_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = mtrr_write, + .proc_ioctl = mtrr_ioctl, +#ifdef CONFIG_COMPAT + .proc_compat_ioctl = mtrr_ioctl, +#endif + .proc_release = mtrr_close, +}; + static int __init mtrr_if_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -436,7 +418,7 @@ static int __init mtrr_if_init(void) (!cpu_has(c, X86_FEATURE_CENTAUR_MCR))) return -ENODEV; - proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops); + proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_proc_ops); return 0; } arch_initcall(mtrr_if_init); diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index 507039c20128..6a80f36b5d59 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -52,7 +52,7 @@ #include <asm/e820/api.h> #include <asm/mtrr.h> #include <asm/msr.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include "mtrr.h" diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index cb2e49810d68..4eec8889b0ff 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -7,6 +7,10 @@ #include "cpu.h" +#ifdef CONFIG_X86_VMX_FEATURE_NAMES +extern const char * const x86_vmx_flags[NVMXINTS*32]; +#endif + /* * Get CPU information for use by the procfs. */ @@ -102,6 +106,17 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (cpu_has(c, i) && x86_cap_flags[i] != NULL) seq_printf(m, " %s", x86_cap_flags[i]); +#ifdef CONFIG_X86_VMX_FEATURE_NAMES + if (cpu_has(c, X86_FEATURE_VMX) && c->vmx_capability[0]) { + seq_puts(m, "\nvmx flags\t:"); + for (i = 0; i < 32*NVMXINTS; i++) { + if (test_bit(i, (unsigned long *)c->vmx_capability) && + x86_vmx_flags[i] != NULL) + seq_printf(m, " %s", x86_vmx_flags[i]); + } + } +#endif + seq_puts(m, "\nbugs\t\t:"); for (i = 0; i < 32*NBUGINTS; i++) { unsigned int bug_bit = 32*NCAPINTS + i; diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 03eb90d00af0..89049b343c7a 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -618,7 +618,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) if (static_branch_unlikely(&rdt_mon_enable_key)) rmdir_mondata_subdir_allrdtgrp(r, d->id); list_del(&d->list); - if (is_mbm_enabled()) + if (r->mon_capable && is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) { /* diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index e49b77283924..181c992f448c 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -57,6 +57,7 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) } DECLARE_STATIC_KEY_FALSE(rdt_enable_key); +DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key); /** * struct mon_evt - Entry in the event list of a resource diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 397206f23d14..773124b0e18a 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -514,7 +514,7 @@ void mbm_handle_overflow(struct work_struct *work) mutex_lock(&rdtgroup_mutex); - if (!static_branch_likely(&rdt_enable_key)) + if (!static_branch_likely(&rdt_mon_enable_key)) goto out_unlock; d = get_domain_from_cpu(cpu, &rdt_resources_all[RDT_RESOURCE_L3]); @@ -543,7 +543,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms) unsigned long delay = msecs_to_jiffies(delay_ms); int cpu; - if (!static_branch_likely(&rdt_enable_key)) + if (!static_branch_likely(&rdt_mon_enable_key)) return; cpu = cpumask_any(&dom->cpu_mask); dom->mbm_work_cpu = cpu; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 2e3b06d6bbc6..8ca5e510f3ce 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -532,11 +532,15 @@ static void move_myself(struct callback_head *head) kfree(rdtgrp); } + if (unlikely(current->flags & PF_EXITING)) + goto out; + preempt_disable(); /* update PQR_ASSOC MSR to make resource group go into effect */ resctrl_sched_in(); preempt_enable(); +out: kfree(callback); } @@ -725,6 +729,92 @@ static int rdtgroup_tasks_show(struct kernfs_open_file *of, return ret; } +#ifdef CONFIG_PROC_CPU_RESCTRL + +/* + * A task can only be part of one resctrl control group and of one monitor + * group which is associated to that control group. + * + * 1) res: + * mon: + * + * resctrl is not available. + * + * 2) res:/ + * mon: + * + * Task is part of the root resctrl control group, and it is not associated + * to any monitor group. + * + * 3) res:/ + * mon:mon0 + * + * Task is part of the root resctrl control group and monitor group mon0. + * + * 4) res:group0 + * mon: + * + * Task is part of resctrl control group group0, and it is not associated + * to any monitor group. + * + * 5) res:group0 + * mon:mon1 + * + * Task is part of resctrl control group group0 and monitor group mon1. + */ +int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns, + struct pid *pid, struct task_struct *tsk) +{ + struct rdtgroup *rdtg; + int ret = 0; + + mutex_lock(&rdtgroup_mutex); + + /* Return empty if resctrl has not been mounted. */ + if (!static_branch_unlikely(&rdt_enable_key)) { + seq_puts(s, "res:\nmon:\n"); + goto unlock; + } + + list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) { + struct rdtgroup *crg; + + /* + * Task information is only relevant for shareable + * and exclusive groups. + */ + if (rdtg->mode != RDT_MODE_SHAREABLE && + rdtg->mode != RDT_MODE_EXCLUSIVE) + continue; + + if (rdtg->closid != tsk->closid) + continue; + + seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "", + rdtg->kn->name); + seq_puts(s, "mon:"); + list_for_each_entry(crg, &rdtg->mon.crdtgrp_list, + mon.crdtgrp_list) { + if (tsk->rmid != crg->mon.rmid) + continue; + seq_printf(s, "%s", crg->kn->name); + break; + } + seq_putc(s, '\n'); + goto unlock; + } + /* + * The above search should succeed. Otherwise return + * with an error. + */ + ret = -ENOENT; +unlock: + mutex_unlock(&rdtgroup_mutex); + + return ret; +} +#endif + static int rdt_last_cmd_status_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { @@ -1741,9 +1831,6 @@ static int set_cache_qos_cfg(int level, bool enable) struct rdt_domain *d; int cpu; - if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) - return -ENOMEM; - if (level == RDT_RESOURCE_L3) update = l3_qos_cfg_update; else if (level == RDT_RESOURCE_L2) @@ -1751,6 +1838,9 @@ static int set_cache_qos_cfg(int level, bool enable) else return -EINVAL; + if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) + return -ENOMEM; + r_l = &rdt_resources_all[level]; list_for_each_entry(d, &r_l->domains, list) { /* Pick one CPU from each domain instance to update MSR */ @@ -1970,7 +2060,7 @@ static int rdt_get_tree(struct fs_context *fc) if (rdt_mon_capable) { ret = mongroup_create_dir(rdtgroup_default.kn, - NULL, "mon_groups", + &rdtgroup_default, "mon_groups", &kn_mongrp); if (ret < 0) goto out_info; @@ -2205,7 +2295,11 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) { free_rmid(sentry->mon.rmid); list_del(&sentry->mon.crdtgrp_list); - kfree(sentry); + + if (atomic_read(&sentry->waitcount) != 0) + sentry->flags = RDT_DELETED; + else + kfree(sentry); } } @@ -2243,7 +2337,11 @@ static void rmdir_all_sub(void) kernfs_remove(rdtgrp->kn); list_del(&rdtgrp->rdtgroup_list); - kfree(rdtgrp); + + if (atomic_read(&rdtgrp->waitcount) != 0) + rdtgrp->flags = RDT_DELETED; + else + kfree(rdtgrp); } /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ update_closid_rmid(cpu_online_mask, &rdtgroup_default); @@ -2446,7 +2544,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, /* * Create the mon_data directory first. */ - ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn); + ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn); if (ret) return ret; @@ -2636,7 +2734,6 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) } static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, - struct kernfs_node *prgrp_kn, const char *name, umode_t mode, enum rdt_group_type rtype, struct rdtgroup **r) { @@ -2645,7 +2742,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, uint files = 0; int ret; - prdtgrp = rdtgroup_kn_lock_live(prgrp_kn); + prdtgrp = rdtgroup_kn_lock_live(parent_kn); if (!prdtgrp) { ret = -ENODEV; goto out_unlock; @@ -2718,7 +2815,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, kernfs_activate(kn); /* - * The caller unlocks the prgrp_kn upon success. + * The caller unlocks the parent_kn upon success. */ return 0; @@ -2729,7 +2826,7 @@ out_destroy: out_free_rgrp: kfree(rdtgrp); out_unlock: - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -2746,15 +2843,12 @@ static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) * to monitor a subset of tasks and cpus in its parent ctrl_mon group. */ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, - struct kernfs_node *prgrp_kn, - const char *name, - umode_t mode) + const char *name, umode_t mode) { struct rdtgroup *rdtgrp, *prgrp; int ret; - ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTMON_GROUP, - &rdtgrp); + ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp); if (ret) return ret; @@ -2767,7 +2861,7 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, */ list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list); - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -2776,7 +2870,6 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, * to allocate and monitor resources. */ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, - struct kernfs_node *prgrp_kn, const char *name, umode_t mode) { struct rdtgroup *rdtgrp; @@ -2784,8 +2877,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, u32 closid; int ret; - ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTCTRL_GROUP, - &rdtgrp); + ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp); if (ret) return ret; @@ -2810,7 +2902,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, * Create an empty mon_groups directory to hold the subset * of tasks and cpus to monitor. */ - ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL); + ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL); if (ret) { rdt_last_cmd_puts("kernfs subdir error\n"); goto out_del_list; @@ -2826,7 +2918,7 @@ out_id_free: out_common_fail: mkdir_rdt_prepare_clean(rdtgrp); out_unlock: - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -2859,14 +2951,14 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, * subdirectory */ if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn) - return rdtgroup_mkdir_ctrl_mon(parent_kn, parent_kn, name, mode); + return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode); /* * If RDT monitoring is supported and the parent directory is a valid * "mon_groups" directory, add a monitoring subdirectory. */ if (rdt_mon_capable && is_mon_groups(parent_kn, name)) - return rdtgroup_mkdir_mon(parent_kn, parent_kn->parent, name, mode); + return rdtgroup_mkdir_mon(parent_kn, name, mode); return -EPERM; } @@ -2952,13 +3044,13 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp, closid_free(rdtgrp->closid); free_rmid(rdtgrp->mon.rmid); + rdtgroup_ctrl_remove(kn, rdtgrp); + /* * Free all the child monitor group rmids. */ free_all_child_rdtgrp(rdtgrp); - rdtgroup_ctrl_remove(kn, rdtgrp); - return 0; } diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index adf9b71386ef..62b137c3c97a 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -4,7 +4,7 @@ */ #include <linux/cpu.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/apic.h> #include <asm/processor.h> diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index ee48c3fc8a65..d3a0791bc052 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -7,7 +7,7 @@ #include <linux/cpu.h> #include <asm/apic.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/processor.h> #include "cpu.h" diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index 3e20d322bc98..e2ad30e474f8 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -14,6 +14,9 @@ #include "cpu.h" +#undef pr_fmt +#define pr_fmt(fmt) "tsx: " fmt + enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED; void tsx_disable(void) @@ -99,7 +102,7 @@ void __init tsx_init(void) tsx_ctrl_state = x86_get_tsx_auto_mode(); } else { tsx_ctrl_state = TSX_CTRL_DISABLE; - pr_err("tsx: invalid option, defaulting to off\n"); + pr_err("invalid option, defaulting to off\n"); } } else { /* tsx= not provided */ @@ -115,11 +118,12 @@ void __init tsx_init(void) tsx_disable(); /* - * tsx_disable() will change the state of the - * RTM CPUID bit. Clear it here since it is now - * expected to be not set. + * tsx_disable() will change the state of the RTM and HLE CPUID + * bits. Clear them here since they are now expected to be not + * set. */ setup_clear_cpu_cap(X86_FEATURE_RTM); + setup_clear_cpu_cap(X86_FEATURE_HLE); } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) { /* @@ -131,10 +135,10 @@ void __init tsx_init(void) tsx_enable(); /* - * tsx_enable() will change the state of the - * RTM CPUID bit. Force it here since it is now - * expected to be set. + * tsx_enable() will change the state of the RTM and HLE CPUID + * bits. Force them here since they are now expected to be set. */ setup_force_cpu_cap(X86_FEATURE_RTM); + setup_force_cpu_cap(X86_FEATURE_HLE); } } diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c index 8e6f2f4b4afe..df1358ba622b 100644 --- a/arch/x86/kernel/cpu/zhaoxin.c +++ b/arch/x86/kernel/cpu/zhaoxin.c @@ -16,13 +16,6 @@ #define RNG_ENABLED (1 << 3) #define RNG_ENABLE (1 << 8) /* MSR_ZHAOXIN_RNG */ -#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 -#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 -#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 -#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 -#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 -#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 - static void init_zhaoxin_cap(struct cpuinfo_x86 *c) { u32 lo, hi; @@ -58,8 +51,6 @@ static void init_zhaoxin_cap(struct cpuinfo_x86 *c) if (c->x86 >= 0x6) set_cpu_cap(c, X86_FEATURE_REP_GOOD); - - cpu_detect_cache_sizes(c); } static void early_init_zhaoxin(struct cpuinfo_x86 *c) @@ -89,31 +80,6 @@ static void early_init_zhaoxin(struct cpuinfo_x86 *c) } -static void zhaoxin_detect_vmx_virtcap(struct cpuinfo_x86 *c) -{ - u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; - - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); - msr_ctl = vmx_msr_high | vmx_msr_low; - - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) - set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) - set_cpu_cap(c, X86_FEATURE_VNMI); - if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { - rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, - vmx_msr_low, vmx_msr_high); - msr_ctl2 = vmx_msr_high | vmx_msr_low; - if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && - (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) - set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); - if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) - set_cpu_cap(c, X86_FEATURE_EPT); - if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) - set_cpu_cap(c, X86_FEATURE_VPID); - } -} - static void init_zhaoxin(struct cpuinfo_x86 *c) { early_init_zhaoxin(c); @@ -141,8 +107,7 @@ static void init_zhaoxin(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); #endif - if (cpu_has(c, X86_FEATURE_VMX)) - zhaoxin_detect_vmx_virtcap(c); + init_ia32_feat_ctl(c); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 00fc55ac7ffa..fd87b59452a3 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -370,7 +370,7 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) /* Add crashk_low_res region */ if (crashk_low_res.end) { ei.addr = crashk_low_res.start; - ei.size = crashk_low_res.end - crashk_low_res.start + 1; + ei.size = resource_size(&crashk_low_res); ei.type = E820_TYPE_RAM; add_e820_entry(params, &ei); } diff --git a/arch/x86/kernel/crash_core_32.c b/arch/x86/kernel/crash_core_32.c new file mode 100644 index 000000000000..c0159a7bca6d --- /dev/null +++ b/arch/x86/kernel/crash_core_32.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/crash_core.h> + +#include <asm/pgtable.h> +#include <asm/setup.h> + +void arch_crash_save_vmcoreinfo(void) +{ +#ifdef CONFIG_NUMA + VMCOREINFO_SYMBOL(node_data); + VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); +#endif +#ifdef CONFIG_X86_PAE + VMCOREINFO_CONFIG(X86_PAE); +#endif +} diff --git a/arch/x86/kernel/crash_core_64.c b/arch/x86/kernel/crash_core_64.c new file mode 100644 index 000000000000..845a57eb4eb7 --- /dev/null +++ b/arch/x86/kernel/crash_core_64.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/crash_core.h> + +#include <asm/pgtable.h> +#include <asm/setup.h> + +void arch_crash_save_vmcoreinfo(void) +{ + u64 sme_mask = sme_me_mask; + + VMCOREINFO_NUMBER(phys_base); + VMCOREINFO_SYMBOL(init_top_pgt); + vmcoreinfo_append_str("NUMBER(pgtable_l5_enabled)=%d\n", + pgtable_l5_enabled()); + +#ifdef CONFIG_NUMA + VMCOREINFO_SYMBOL(node_data); + VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); +#endif + vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); + VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE); + VMCOREINFO_NUMBER(sme_mask); +} diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index e07424e19274..ae64ec7f752f 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -365,7 +365,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr) } NOKPROBE_SYMBOL(oops_end); -int __die(const char *str, struct pt_regs *regs, long err) +static void __die_header(const char *str, struct pt_regs *regs, long err) { const char *pr = ""; @@ -384,7 +384,11 @@ int __die(const char *str, struct pt_regs *regs, long err) IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "", IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ? (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : ""); +} +NOKPROBE_SYMBOL(__die_header); +static int __die_body(const char *str, struct pt_regs *regs, long err) +{ show_regs(regs); print_modules(); @@ -394,6 +398,13 @@ int __die(const char *str, struct pt_regs *regs, long err) return 0; } +NOKPROBE_SYMBOL(__die_body); + +int __die(const char *str, struct pt_regs *regs, long err) +{ + __die_header(str, regs, err); + return __die_body(str, regs, err); +} NOKPROBE_SYMBOL(__die); /* @@ -410,6 +421,19 @@ void die(const char *str, struct pt_regs *regs, long err) oops_end(flags, regs, sig); } +void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr) +{ + unsigned long flags = oops_begin(); + int sig = SIGSEGV; + + __die_header(str, regs, err); + if (gp_addr) + kasan_non_canonical_hook(gp_addr); + if (__die_body(str, regs, err)) + sig = 0; + oops_end(flags, regs, sig); +} + void show_regs(struct pt_regs *regs) { show_regs_print_info(KERN_DEFAULT); diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 4cba91ec8049..2f9ec14be3b1 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -710,8 +710,12 @@ static struct chipset early_qrk[] __initdata = { */ { PCI_VENDOR_ID_INTEL, 0x0f00, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, + { PCI_VENDOR_ID_INTEL, 0x3e20, + PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, { PCI_VENDOR_ID_INTEL, 0x3ec4, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, + { PCI_VENDOR_ID_INTEL, 0x8a12, + PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, { PCI_VENDOR_ID_BROADCOM, 0x4331, PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset}, {} diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 0071b794ed19..400a05e1c1c5 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -352,6 +352,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) fpregs_unlock(); return 0; } + fpregs_deactivate(fpu); fpregs_unlock(); } @@ -403,6 +404,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) } if (!ret) fpregs_mark_activate(); + else + fpregs_deactivate(fpu); fpregs_unlock(); err_out: diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 319be936c348..a1806598aaa4 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -107,23 +107,20 @@ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) } EXPORT_SYMBOL_GPL(cpu_has_xfeatures); -static int xfeature_is_supervisor(int xfeature_nr) +static bool xfeature_is_supervisor(int xfeature_nr) { /* - * We currently do not support supervisor states, but if - * we did, we could find out like this. - * - * SDM says: If state component 'i' is a user state component, - * ECX[0] return 0; if state component i is a supervisor - * state component, ECX[0] returns 1. + * Extended State Enumeration Sub-leaves (EAX = 0DH, ECX = n, n > 1) + * returns ECX[0] set to (1) for a supervisor state, and cleared (0) + * for a user state. */ u32 eax, ebx, ecx, edx; cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); - return !!(ecx & 1); + return ecx & 1; } -static int xfeature_is_user(int xfeature_nr) +static bool xfeature_is_user(int xfeature_nr) { return !xfeature_is_supervisor(xfeature_nr); } @@ -259,7 +256,7 @@ static void __init setup_xstate_features(void) xmm_space); xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP]; - xstate_sizes[XFEATURE_SSE] = FIELD_SIZEOF(struct fxregs_state, + xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state, xmm_space); for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { @@ -419,7 +416,8 @@ static void __init setup_init_fpu_buf(void) print_xstate_features(); if (boot_cpu_has(X86_FEATURE_XSAVES)) - init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; + init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | + xfeatures_mask; /* * Init all the features state with header.xfeatures being 0x0 diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 060a361d9d11..37a0aeaf89e7 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -23,6 +23,7 @@ #include <linux/list.h> #include <linux/module.h> #include <linux/memory.h> +#include <linux/vmalloc.h> #include <trace/syscall.h> @@ -34,6 +35,8 @@ #ifdef CONFIG_DYNAMIC_FTRACE +static int ftrace_poke_late = 0; + int ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex) { @@ -43,84 +46,37 @@ int ftrace_arch_code_modify_prepare(void) * ftrace has it set to "read/write". */ mutex_lock(&text_mutex); - set_kernel_text_rw(); - set_all_modules_text_rw(); + ftrace_poke_late = 1; return 0; } int ftrace_arch_code_modify_post_process(void) __releases(&text_mutex) { - set_all_modules_text_ro(); - set_kernel_text_ro(); + /* + * ftrace_make_{call,nop}() may be called during + * module load, and we need to finish the text_poke_queue() + * that they do, here. + */ + text_poke_finish(); + ftrace_poke_late = 0; mutex_unlock(&text_mutex); return 0; } -union ftrace_code_union { - char code[MCOUNT_INSN_SIZE]; - struct { - unsigned char op; - int offset; - } __attribute__((packed)); -}; - -static int ftrace_calc_offset(long ip, long addr) -{ - return (int)(addr - ip); -} - -static unsigned char * -ftrace_text_replace(unsigned char op, unsigned long ip, unsigned long addr) +static const char *ftrace_nop_replace(void) { - static union ftrace_code_union calc; - - calc.op = op; - calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); - - return calc.code; -} - -static unsigned char * -ftrace_call_replace(unsigned long ip, unsigned long addr) -{ - return ftrace_text_replace(0xe8, ip, addr); -} - -static inline int -within(unsigned long addr, unsigned long start, unsigned long end) -{ - return addr >= start && addr < end; + return ideal_nops[NOP_ATOMIC5]; } -static unsigned long text_ip_addr(unsigned long ip) +static const char *ftrace_call_replace(unsigned long ip, unsigned long addr) { - /* - * On x86_64, kernel text mappings are mapped read-only, so we use - * the kernel identity mapping instead of the kernel text mapping - * to modify the kernel text. - * - * For 32bit kernels, these mappings are same and we can use - * kernel identity mapping to modify code. - */ - if (within(ip, (unsigned long)_text, (unsigned long)_etext)) - ip = (unsigned long)__va(__pa_symbol(ip)); - - return ip; + return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr); } -static const unsigned char *ftrace_nop_replace(void) +static int ftrace_verify_code(unsigned long ip, const char *old_code) { - return ideal_nops[NOP_ATOMIC5]; -} - -static int -ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, - unsigned const char *new_code) -{ - unsigned char replaced[MCOUNT_INSN_SIZE]; - - ftrace_expected = old_code; + char cur_code[MCOUNT_INSN_SIZE]; /* * Note: @@ -129,31 +85,46 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, * Carefully read and modify the code with probe_kernel_*(), and make * sure what we read is what we expected it to be before modifying it. */ - /* read the text we want to modify */ - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (probe_kernel_read(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) { + WARN_ON(1); return -EFAULT; + } /* Make sure it is what we expect it to be */ - if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) + if (memcmp(cur_code, old_code, MCOUNT_INSN_SIZE) != 0) { + WARN_ON(1); return -EINVAL; + } - ip = text_ip_addr(ip); - - /* replace the text with the new text */ - if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) - return -EPERM; + return 0; +} - sync_core(); +/* + * Marked __ref because it calls text_poke_early() which is .init.text. That is + * ok because that call will happen early, during boot, when .init sections are + * still present. + */ +static int __ref +ftrace_modify_code_direct(unsigned long ip, const char *old_code, + const char *new_code) +{ + int ret = ftrace_verify_code(ip, old_code); + if (ret) + return ret; + /* replace the text with the new text */ + if (ftrace_poke_late) + text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL); + else + text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE); return 0; } -int ftrace_make_nop(struct module *mod, - struct dyn_ftrace *rec, unsigned long addr) +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned const char *new, *old; unsigned long ip = rec->ip; + const char *new, *old; old = ftrace_call_replace(ip, addr); new = ftrace_nop_replace(); @@ -167,19 +138,20 @@ int ftrace_make_nop(struct module *mod, * just modify the code directly. */ if (addr == MCOUNT_ADDR) - return ftrace_modify_code_direct(rec->ip, old, new); + return ftrace_modify_code_direct(ip, old, new); - ftrace_expected = NULL; - - /* Normal cases use add_brk_on_nop */ + /* + * x86 overrides ftrace_replace_code -- this function will never be used + * in this case. + */ WARN_ONCE(1, "invalid use of ftrace_make_nop"); return -EINVAL; } int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - unsigned const char *new, *old; unsigned long ip = rec->ip; + const char *new, *old; old = ftrace_nop_replace(); new = ftrace_call_replace(ip, addr); @@ -189,43 +161,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } /* - * The modifying_ftrace_code is used to tell the breakpoint - * handler to call ftrace_int3_handler(). If it fails to - * call this handler for a breakpoint added by ftrace, then - * the kernel may crash. - * - * As atomic_writes on x86 do not need a barrier, we do not - * need to add smp_mb()s for this to work. It is also considered - * that we can not read the modifying_ftrace_code before - * executing the breakpoint. That would be quite remarkable if - * it could do that. Here's the flow that is required: - * - * CPU-0 CPU-1 - * - * atomic_inc(mfc); - * write int3s - * <trap-int3> // implicit (r)mb - * if (atomic_read(mfc)) - * call ftrace_int3_handler() - * - * Then when we are finished: - * - * atomic_dec(mfc); - * - * If we hit a breakpoint that was not set by ftrace, it does not - * matter if ftrace_int3_handler() is called or not. It will - * simply be ignored. But it is crucial that a ftrace nop/caller - * breakpoint is handled. No other user should ever place a - * breakpoint on an ftrace nop/caller location. It must only - * be done by this code. - */ -atomic_t modifying_ftrace_code __read_mostly; - -static int -ftrace_modify_code(unsigned long ip, unsigned const char *old_code, - unsigned const char *new_code); - -/* * Should never be called: * As it is only called by __ftrace_replace_code() which is called by * ftrace_replace_code() that x86 overrides, and by ftrace_update_code() @@ -237,452 +172,84 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { WARN_ON(1); - ftrace_expected = NULL; return -EINVAL; } -static unsigned long ftrace_update_func; -static unsigned long ftrace_update_func_call; - -static int update_ftrace_func(unsigned long ip, void *new) -{ - unsigned char old[MCOUNT_INSN_SIZE]; - int ret; - - memcpy(old, (void *)ip, MCOUNT_INSN_SIZE); - - ftrace_update_func = ip; - /* Make sure the breakpoints see the ftrace_update_func update */ - smp_wmb(); - - /* See comment above by declaration of modifying_ftrace_code */ - atomic_inc(&modifying_ftrace_code); - - ret = ftrace_modify_code(ip, old, new); - - atomic_dec(&modifying_ftrace_code); - - return ret; -} - int ftrace_update_ftrace_func(ftrace_func_t func) { - unsigned long ip = (unsigned long)(&ftrace_call); - unsigned char *new; - int ret; - - ftrace_update_func_call = (unsigned long)func; - - new = ftrace_call_replace(ip, (unsigned long)func); - ret = update_ftrace_func(ip, new); - - /* Also update the regs callback function */ - if (!ret) { - ip = (unsigned long)(&ftrace_regs_call); - new = ftrace_call_replace(ip, (unsigned long)func); - ret = update_ftrace_func(ip, new); - } - - return ret; -} - -static nokprobe_inline int is_ftrace_caller(unsigned long ip) -{ - if (ip == ftrace_update_func) - return 1; - - return 0; -} - -/* - * A breakpoint was added to the code address we are about to - * modify, and this is the handle that will just skip over it. - * We are either changing a nop into a trace call, or a trace - * call to a nop. While the change is taking place, we treat - * it just like it was a nop. - */ -int ftrace_int3_handler(struct pt_regs *regs) -{ unsigned long ip; + const char *new; - if (WARN_ON_ONCE(!regs)) - return 0; - - ip = regs->ip - INT3_INSN_SIZE; - - if (ftrace_location(ip)) { - int3_emulate_call(regs, (unsigned long)ftrace_regs_caller); - return 1; - } else if (is_ftrace_caller(ip)) { - if (!ftrace_update_func_call) { - int3_emulate_jmp(regs, ip + CALL_INSN_SIZE); - return 1; - } - int3_emulate_call(regs, ftrace_update_func_call); - return 1; - } - - return 0; -} -NOKPROBE_SYMBOL(ftrace_int3_handler); - -static int ftrace_write(unsigned long ip, const char *val, int size) -{ - ip = text_ip_addr(ip); - - if (probe_kernel_write((void *)ip, val, size)) - return -EPERM; - - return 0; -} - -static int add_break(unsigned long ip, const char *old) -{ - unsigned char replaced[MCOUNT_INSN_SIZE]; - unsigned char brk = BREAKPOINT_INSTRUCTION; - - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) - return -EFAULT; - - ftrace_expected = old; - - /* Make sure it is what we expect it to be */ - if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) - return -EINVAL; - - return ftrace_write(ip, &brk, 1); -} - -static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned const char *old; - unsigned long ip = rec->ip; - - old = ftrace_call_replace(ip, addr); - - return add_break(rec->ip, old); -} - - -static int add_brk_on_nop(struct dyn_ftrace *rec) -{ - unsigned const char *old; - - old = ftrace_nop_replace(); - - return add_break(rec->ip, old); -} - -static int add_breakpoints(struct dyn_ftrace *rec, bool enable) -{ - unsigned long ftrace_addr; - int ret; - - ftrace_addr = ftrace_get_addr_curr(rec); - - ret = ftrace_test_record(rec, enable); - - switch (ret) { - case FTRACE_UPDATE_IGNORE: - return 0; - - case FTRACE_UPDATE_MAKE_CALL: - /* converting nop to call */ - return add_brk_on_nop(rec); - - case FTRACE_UPDATE_MODIFY_CALL: - case FTRACE_UPDATE_MAKE_NOP: - /* converting a call to a nop */ - return add_brk_on_call(rec, ftrace_addr); - } - return 0; -} - -/* - * On error, we need to remove breakpoints. This needs to - * be done caefully. If the address does not currently have a - * breakpoint, we know we are done. Otherwise, we look at the - * remaining 4 bytes of the instruction. If it matches a nop - * we replace the breakpoint with the nop. Otherwise we replace - * it with the call instruction. - */ -static int remove_breakpoint(struct dyn_ftrace *rec) -{ - unsigned char ins[MCOUNT_INSN_SIZE]; - unsigned char brk = BREAKPOINT_INSTRUCTION; - const unsigned char *nop; - unsigned long ftrace_addr; - unsigned long ip = rec->ip; - - /* If we fail the read, just give up */ - if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE)) - return -EFAULT; - - /* If this does not have a breakpoint, we are done */ - if (ins[0] != brk) - return 0; - - nop = ftrace_nop_replace(); - - /* - * If the last 4 bytes of the instruction do not match - * a nop, then we assume that this is a call to ftrace_addr. - */ - if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) { - /* - * For extra paranoidism, we check if the breakpoint is on - * a call that would actually jump to the ftrace_addr. - * If not, don't touch the breakpoint, we make just create - * a disaster. - */ - ftrace_addr = ftrace_get_addr_new(rec); - nop = ftrace_call_replace(ip, ftrace_addr); - - if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0) - goto update; - - /* Check both ftrace_addr and ftrace_old_addr */ - ftrace_addr = ftrace_get_addr_curr(rec); - nop = ftrace_call_replace(ip, ftrace_addr); - - ftrace_expected = nop; - - if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) - return -EINVAL; - } - - update: - return ftrace_write(ip, nop, 1); -} - -static int add_update_code(unsigned long ip, unsigned const char *new) -{ - /* skip breakpoint */ - ip++; - new++; - return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1); -} - -static int add_update_call(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned long ip = rec->ip; - unsigned const char *new; - - new = ftrace_call_replace(ip, addr); - return add_update_code(ip, new); -} - -static int add_update_nop(struct dyn_ftrace *rec) -{ - unsigned long ip = rec->ip; - unsigned const char *new; - - new = ftrace_nop_replace(); - return add_update_code(ip, new); -} - -static int add_update(struct dyn_ftrace *rec, bool enable) -{ - unsigned long ftrace_addr; - int ret; - - ret = ftrace_test_record(rec, enable); - - ftrace_addr = ftrace_get_addr_new(rec); - - switch (ret) { - case FTRACE_UPDATE_IGNORE: - return 0; - - case FTRACE_UPDATE_MODIFY_CALL: - case FTRACE_UPDATE_MAKE_CALL: - /* converting nop to call */ - return add_update_call(rec, ftrace_addr); - - case FTRACE_UPDATE_MAKE_NOP: - /* converting a call to a nop */ - return add_update_nop(rec); - } - - return 0; -} - -static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned long ip = rec->ip; - unsigned const char *new; - - new = ftrace_call_replace(ip, addr); - - return ftrace_write(ip, new, 1); -} - -static int finish_update_nop(struct dyn_ftrace *rec) -{ - unsigned long ip = rec->ip; - unsigned const char *new; - - new = ftrace_nop_replace(); - - return ftrace_write(ip, new, 1); -} - -static int finish_update(struct dyn_ftrace *rec, bool enable) -{ - unsigned long ftrace_addr; - int ret; - - ret = ftrace_update_record(rec, enable); - - ftrace_addr = ftrace_get_addr_new(rec); - - switch (ret) { - case FTRACE_UPDATE_IGNORE: - return 0; - - case FTRACE_UPDATE_MODIFY_CALL: - case FTRACE_UPDATE_MAKE_CALL: - /* converting nop to call */ - return finish_update_call(rec, ftrace_addr); + ip = (unsigned long)(&ftrace_call); + new = ftrace_call_replace(ip, (unsigned long)func); + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); - case FTRACE_UPDATE_MAKE_NOP: - /* converting a call to a nop */ - return finish_update_nop(rec); - } + ip = (unsigned long)(&ftrace_regs_call); + new = ftrace_call_replace(ip, (unsigned long)func); + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); return 0; } -static void do_sync_core(void *data) -{ - sync_core(); -} - -static void run_sync(void) -{ - int enable_irqs; - - /* No need to sync if there's only one CPU */ - if (num_online_cpus() == 1) - return; - - enable_irqs = irqs_disabled(); - - /* We may be called with interrupts disabled (on bootup). */ - if (enable_irqs) - local_irq_enable(); - on_each_cpu(do_sync_core, NULL, 1); - if (enable_irqs) - local_irq_disable(); -} - void ftrace_replace_code(int enable) { struct ftrace_rec_iter *iter; struct dyn_ftrace *rec; - const char *report = "adding breakpoints"; - int count = 0; + const char *new, *old; int ret; for_ftrace_rec_iter(iter) { rec = ftrace_rec_iter_record(iter); - ret = add_breakpoints(rec, enable); - if (ret) - goto remove_breakpoints; - count++; - } - - run_sync(); + switch (ftrace_test_record(rec, enable)) { + case FTRACE_UPDATE_IGNORE: + default: + continue; - report = "updating code"; - count = 0; + case FTRACE_UPDATE_MAKE_CALL: + old = ftrace_nop_replace(); + break; - for_ftrace_rec_iter(iter) { - rec = ftrace_rec_iter_record(iter); + case FTRACE_UPDATE_MODIFY_CALL: + case FTRACE_UPDATE_MAKE_NOP: + old = ftrace_call_replace(rec->ip, ftrace_get_addr_curr(rec)); + break; + } - ret = add_update(rec, enable); - if (ret) - goto remove_breakpoints; - count++; + ret = ftrace_verify_code(rec->ip, old); + if (ret) { + ftrace_bug(ret, rec); + return; + } } - run_sync(); - - report = "removing breakpoints"; - count = 0; - for_ftrace_rec_iter(iter) { rec = ftrace_rec_iter_record(iter); - ret = finish_update(rec, enable); - if (ret) - goto remove_breakpoints; - count++; - } + switch (ftrace_test_record(rec, enable)) { + case FTRACE_UPDATE_IGNORE: + default: + continue; - run_sync(); + case FTRACE_UPDATE_MAKE_CALL: + case FTRACE_UPDATE_MODIFY_CALL: + new = ftrace_call_replace(rec->ip, ftrace_get_addr_new(rec)); + break; - return; + case FTRACE_UPDATE_MAKE_NOP: + new = ftrace_nop_replace(); + break; + } - remove_breakpoints: - pr_warn("Failed on %s (%d):\n", report, count); - ftrace_bug(ret, rec); - for_ftrace_rec_iter(iter) { - rec = ftrace_rec_iter_record(iter); - /* - * Breakpoints are handled only when this function is in - * progress. The system could not work with them. - */ - if (remove_breakpoint(rec)) - BUG(); + text_poke_queue((void *)rec->ip, new, MCOUNT_INSN_SIZE, NULL); + ftrace_update_record(rec, enable); } - run_sync(); -} - -static int -ftrace_modify_code(unsigned long ip, unsigned const char *old_code, - unsigned const char *new_code) -{ - int ret; - - ret = add_break(ip, old_code); - if (ret) - goto out; - - run_sync(); - - ret = add_update_code(ip, new_code); - if (ret) - goto fail_update; - - run_sync(); - - ret = ftrace_write(ip, new_code, 1); - /* - * The breakpoint is handled only when this function is in progress. - * The system could not work if we could not remove it. - */ - BUG_ON(ret); - out: - run_sync(); - return ret; - - fail_update: - /* Also here the system could not work with the breakpoint */ - if (ftrace_write(ip, old_code, 1)) - BUG(); - goto out; + text_poke_finish(); } void arch_ftrace_update_code(int command) { - /* See comment above by declaration of modifying_ftrace_code */ - atomic_inc(&modifying_ftrace_code); - ftrace_modify_all_code(command); - - atomic_dec(&modifying_ftrace_code); } int __init ftrace_dyn_arch_init(void) @@ -747,6 +314,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) unsigned long start_offset; unsigned long end_offset; unsigned long op_offset; + unsigned long call_offset; unsigned long offset; unsigned long npages; unsigned long size; @@ -763,10 +331,12 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) start_offset = (unsigned long)ftrace_regs_caller; end_offset = (unsigned long)ftrace_regs_caller_end; op_offset = (unsigned long)ftrace_regs_caller_op_ptr; + call_offset = (unsigned long)ftrace_regs_call; } else { start_offset = (unsigned long)ftrace_caller; end_offset = (unsigned long)ftrace_epilogue; op_offset = (unsigned long)ftrace_caller_op_ptr; + call_offset = (unsigned long)ftrace_call; } size = end_offset - start_offset; @@ -823,16 +393,21 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* put in the new offset to the ftrace_ops */ memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE); + /* put in the call to the function */ + mutex_lock(&text_mutex); + call_offset -= start_offset; + memcpy(trampoline + call_offset, + text_gen_insn(CALL_INSN_OPCODE, + trampoline + call_offset, + ftrace_ops_get_func(ops)), CALL_INSN_SIZE); + mutex_unlock(&text_mutex); + /* ALLOC_TRAMP flags lets us know we created it */ ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP; set_vm_flush_reset_perms(trampoline); - /* - * Module allocation needs to be completed by making the page - * executable. The page is still writable, which is a security hazard, - * but anyhow ftrace breaks W^X completely. - */ + set_memory_ro((unsigned long)trampoline, npages); set_memory_x((unsigned long)trampoline, npages); return (unsigned long)trampoline; fail: @@ -859,62 +434,54 @@ static unsigned long calc_trampoline_call_offset(bool save_regs) void arch_ftrace_update_trampoline(struct ftrace_ops *ops) { ftrace_func_t func; - unsigned char *new; unsigned long offset; unsigned long ip; unsigned int size; - int ret, npages; + const char *new; - if (ops->trampoline) { - /* - * The ftrace_ops caller may set up its own trampoline. - * In such a case, this code must not modify it. - */ - if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) - return; - npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT; - set_memory_rw(ops->trampoline, npages); - } else { + if (!ops->trampoline) { ops->trampoline = create_trampoline(ops, &size); if (!ops->trampoline) return; ops->trampoline_size = size; - npages = PAGE_ALIGN(size) >> PAGE_SHIFT; + return; } + /* + * The ftrace_ops caller may set up its own trampoline. + * In such a case, this code must not modify it. + */ + if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) + return; + offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS); ip = ops->trampoline + offset; - func = ftrace_ops_get_func(ops); - ftrace_update_func_call = (unsigned long)func; - + mutex_lock(&text_mutex); /* Do a safe modify in case the trampoline is executing */ new = ftrace_call_replace(ip, (unsigned long)func); - ret = update_ftrace_func(ip, new); - set_memory_ro(ops->trampoline, npages); - - /* The update should never fail */ - WARN_ON(ret); + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); + mutex_unlock(&text_mutex); } /* Return the address of the function the trampoline calls */ static void *addr_from_call(void *ptr) { - union ftrace_code_union calc; + union text_poke_insn call; int ret; - ret = probe_kernel_read(&calc, ptr, MCOUNT_INSN_SIZE); + ret = probe_kernel_read(&call, ptr, CALL_INSN_SIZE); if (WARN_ON_ONCE(ret < 0)) return NULL; /* Make sure this is a call */ - if (WARN_ON_ONCE(calc.op != 0xe8)) { - pr_warn("Expected e8, got %x\n", calc.op); + if (WARN_ON_ONCE(call.opcode != CALL_INSN_OPCODE)) { + pr_warn("Expected E8, got %x\n", call.opcode); return NULL; } - return ptr + MCOUNT_INSN_SIZE + calc.offset; + return ptr + CALL_INSN_SIZE + call.disp; } void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, @@ -981,19 +548,18 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops) #ifdef CONFIG_DYNAMIC_FTRACE extern void ftrace_graph_call(void); -static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) +static const char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) { - return ftrace_text_replace(0xe9, ip, addr); + return text_gen_insn(JMP32_INSN_OPCODE, (void *)ip, (void *)addr); } static int ftrace_mod_jmp(unsigned long ip, void *func) { - unsigned char *new; + const char *new; - ftrace_update_func_call = 0UL; new = ftrace_jmp_replace(ip, (unsigned long)func); - - return update_ftrace_func(ip, new); + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); + return 0; } int ftrace_enable_ftrace_graph_caller(void) @@ -1019,10 +585,9 @@ int ftrace_disable_ftrace_graph_caller(void) void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, unsigned long frame_pointer) { + unsigned long return_hooker = (unsigned long)&return_to_handler; unsigned long old; int faulted; - unsigned long return_hooker = (unsigned long) - &return_to_handler; /* * When resuming from suspend-to-ram, this function can be indirectly @@ -1043,20 +608,6 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, return; /* - * If the return location is actually pointing directly to - * the start of a direct trampoline (if we trace the trampoline - * it will still be offset by MCOUNT_INSN_SIZE), then the - * return address is actually off by one word, and we - * need to adjust for that. - */ - if (ftrace_direct_func_count) { - if (ftrace_find_direct_func(self_addr + MCOUNT_INSN_SIZE)) { - self_addr = *parent; - parent++; - } - } - - /* * Protect against fault, even if it shouldn't * happen. This tool is too much intrusive to * ignore such a protection. diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index c6f791bc481e..7a50f0b62a70 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -84,7 +84,7 @@ static inline void hpet_writel(unsigned int d, unsigned int a) static inline void hpet_set_mapping(void) { - hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); + hpet_virt_address = ioremap(hpet_address, HPET_MMAP_SIZE); } static inline void hpet_clear_mapping(void) diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index c1a8b9e71408..9c4498ea0b3c 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -16,15 +16,7 @@ #include <asm/alternative.h> #include <asm/text-patching.h> -union jump_code_union { - char code[JUMP_LABEL_NOP_SIZE]; - struct { - char jump; - int offset; - } __attribute__((packed)); -}; - -static void bug_at(unsigned char *ip, int line) +static void bug_at(const void *ip, int line) { /* * The location is not an op that we were expecting. @@ -35,42 +27,42 @@ static void bug_at(unsigned char *ip, int line) BUG(); } -static void __jump_label_set_jump_code(struct jump_entry *entry, - enum jump_label_type type, - union jump_code_union *code, - int init) +static const void * +__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type, int init) { const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; - const void *expect; + const void *expect, *code; + const void *addr, *dest; int line; - code->jump = 0xe9; - code->offset = jump_entry_target(entry) - - (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); + addr = (void *)jump_entry_code(entry); + dest = (void *)jump_entry_target(entry); + + code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest); if (init) { expect = default_nop; line = __LINE__; } else if (type == JUMP_LABEL_JMP) { expect = ideal_nop; line = __LINE__; } else { - expect = code->code; line = __LINE__; + expect = code; line = __LINE__; } - if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE)) - bug_at((void *)jump_entry_code(entry), line); + if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE)) + bug_at(addr, line); if (type == JUMP_LABEL_NOP) - memcpy(code, ideal_nop, JUMP_LABEL_NOP_SIZE); + code = ideal_nop; + + return code; } -static void __ref __jump_label_transform(struct jump_entry *entry, - enum jump_label_type type, - int init) +static void inline __jump_label_transform(struct jump_entry *entry, + enum jump_label_type type, + int init) { - union jump_code_union code; - - __jump_label_set_jump_code(entry, type, &code, init); + const void *opcode = __jump_label_set_jump_code(entry, type, init); /* * As long as only a single processor is running and the code is still @@ -84,31 +76,33 @@ static void __ref __jump_label_transform(struct jump_entry *entry, * always nop being the 'currently valid' instruction */ if (init || system_state == SYSTEM_BOOTING) { - text_poke_early((void *)jump_entry_code(entry), &code, + text_poke_early((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE); return; } - text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, NULL); + text_poke_bp((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE, NULL); } -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) +static void __ref jump_label_transform(struct jump_entry *entry, + enum jump_label_type type, + int init) { mutex_lock(&text_mutex); - __jump_label_transform(entry, type, 0); + __jump_label_transform(entry, type, init); mutex_unlock(&text_mutex); } -#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc)) -static struct text_poke_loc tp_vec[TP_VEC_MAX]; -static int tp_vec_nr; +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + jump_label_transform(entry, type, 0); +} bool arch_jump_label_transform_queue(struct jump_entry *entry, enum jump_label_type type) { - struct text_poke_loc *tp; - void *entry_code; + const void *opcode; if (system_state == SYSTEM_BOOTING) { /* @@ -118,53 +112,19 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry, return true; } - /* - * No more space in the vector, tell upper layer to apply - * the queue before continuing. - */ - if (tp_vec_nr == TP_VEC_MAX) - return false; - - tp = &tp_vec[tp_vec_nr]; - - entry_code = (void *)jump_entry_code(entry); - - /* - * The INT3 handler will do a bsearch in the queue, so we need entries - * to be sorted. We can survive an unsorted list by rejecting the entry, - * forcing the generic jump_label code to apply the queue. Warning once, - * to raise the attention to the case of an unsorted entry that is - * better not happen, because, in the worst case we will perform in the - * same way as we do without batching - with some more overhead. - */ - if (tp_vec_nr > 0) { - int prev = tp_vec_nr - 1; - struct text_poke_loc *prev_tp = &tp_vec[prev]; - - if (WARN_ON_ONCE(prev_tp->addr > entry_code)) - return false; - } - - __jump_label_set_jump_code(entry, type, - (union jump_code_union *)&tp->text, 0); - - text_poke_loc_init(tp, entry_code, NULL, JUMP_LABEL_NOP_SIZE, NULL); - - tp_vec_nr++; - + mutex_lock(&text_mutex); + opcode = __jump_label_set_jump_code(entry, type, 0); + text_poke_queue((void *)jump_entry_code(entry), + opcode, JUMP_LABEL_NOP_SIZE, NULL); + mutex_unlock(&text_mutex); return true; } void arch_jump_label_transform_apply(void) { - if (!tp_vec_nr) - return; - mutex_lock(&text_mutex); - text_poke_bp_batch(tp_vec, tp_vec_nr); + text_poke_finish(); mutex_unlock(&text_mutex); - - tp_vec_nr = 0; } static enum { @@ -193,5 +153,5 @@ __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, jlstate = JL_STATE_NO_UPDATE; } if (jlstate == JL_STATE_UPDATE) - __jump_label_transform(entry, type, 1); + jump_label_transform(entry, type, 1); } diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index d2f4e706a428..f293d872602a 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -177,7 +177,7 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr, * acpi_rsdp=<addr> on kernel command line to make second kernel boot * without efi. */ - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return 0; params->secure_boot = boot_params.secure_boot; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 4f13af7cbcdb..4d7022a740ab 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -40,6 +40,7 @@ #include <linux/frame.h> #include <linux/kasan.h> #include <linux/moduleloader.h> +#include <linux/vmalloc.h> #include <asm/text-patching.h> #include <asm/cacheflush.h> @@ -119,14 +120,14 @@ __synthesize_relative_insn(void *dest, void *from, void *to, u8 op) /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ void synthesize_reljump(void *dest, void *from, void *to) { - __synthesize_relative_insn(dest, from, to, RELATIVEJUMP_OPCODE); + __synthesize_relative_insn(dest, from, to, JMP32_INSN_OPCODE); } NOKPROBE_SYMBOL(synthesize_reljump); /* Insert a call instruction at address 'from', which calls address 'to'.*/ void synthesize_relcall(void *dest, void *from, void *to) { - __synthesize_relative_insn(dest, from, to, RELATIVECALL_OPCODE); + __synthesize_relative_insn(dest, from, to, CALL_INSN_OPCODE); } NOKPROBE_SYMBOL(synthesize_relcall); @@ -301,7 +302,7 @@ static int can_probe(unsigned long paddr) * Another debugging subsystem might insert this breakpoint. * In that case, we can't recover it. */ - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) return 0; addr += insn.length; } @@ -356,7 +357,7 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) return 0; /* Another subsystem puts a breakpoint, failed to recover */ - if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + if (insn->opcode.bytes[0] == INT3_INSN_OPCODE) return 0; /* We should not singlestep on the exception masking instructions */ @@ -400,14 +401,14 @@ static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p, int len = insn->length; if (can_boost(insn, p->addr) && - MAX_INSN_SIZE - len >= RELATIVEJUMP_SIZE) { + MAX_INSN_SIZE - len >= JMP32_INSN_SIZE) { /* * These instructions can be executed directly if it * jumps back to correct address. */ synthesize_reljump(buf + len, p->ainsn.insn + len, p->addr + insn->length); - len += RELATIVEJUMP_SIZE; + len += JMP32_INSN_SIZE; p->ainsn.boostable = true; } else { p->ainsn.boostable = false; @@ -501,12 +502,14 @@ int arch_prepare_kprobe(struct kprobe *p) void arch_arm_kprobe(struct kprobe *p) { - text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); + text_poke(p->addr, ((unsigned char []){INT3_INSN_OPCODE}), 1); + text_poke_sync(); } void arch_disarm_kprobe(struct kprobe *p) { text_poke(p->addr, &p->opcode, 1); + text_poke_sync(); } void arch_remove_kprobe(struct kprobe *p) @@ -609,7 +612,7 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, regs->flags |= X86_EFLAGS_TF; regs->flags &= ~X86_EFLAGS_IF; /* single step inline if the instruction is an int3 */ - if (p->opcode == BREAKPOINT_INSTRUCTION) + if (p->opcode == INT3_INSN_OPCODE) regs->ip = (unsigned long)p->addr; else regs->ip = (unsigned long)p->ainsn.insn; @@ -695,7 +698,7 @@ int kprobe_int3_handler(struct pt_regs *regs) reset_current_kprobe(); return 1; } - } else if (*addr != BREAKPOINT_INSTRUCTION) { + } else if (*addr != INT3_INSN_OPCODE) { /* * The breakpoint instruction was removed right * after we hit it. Another cpu has removed diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 8900329c28a7..3f45b5c43a71 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -38,7 +38,7 @@ unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) long offs; int i; - for (i = 0; i < RELATIVEJUMP_SIZE; i++) { + for (i = 0; i < JMP32_INSN_SIZE; i++) { kp = get_kprobe((void *)addr - i); /* This function only handles jump-optimized kprobe */ if (kp && kprobe_optimized(kp)) { @@ -62,10 +62,10 @@ found: if (addr == (unsigned long)kp->addr) { buf[0] = kp->opcode; - memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); + memcpy(buf + 1, op->optinsn.copied_insn, DISP32_SIZE); } else { offs = addr - (unsigned long)kp->addr - 1; - memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); + memcpy(buf, op->optinsn.copied_insn + offs, DISP32_SIZE - offs); } return (unsigned long)buf; @@ -141,8 +141,6 @@ STACK_FRAME_NON_STANDARD(optprobe_template_func); #define TMPL_END_IDX \ ((long)optprobe_template_end - (long)optprobe_template_entry) -#define INT3_SIZE sizeof(kprobe_opcode_t) - /* Optimized kprobe call back function: called from optinsn */ static void optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) @@ -162,7 +160,7 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) regs->cs |= get_kernel_rpl(); regs->gs = 0; #endif - regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; + regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE; regs->orig_ax = ~0UL; __this_cpu_write(current_kprobe, &op->kp); @@ -179,7 +177,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) struct insn insn; int len = 0, ret; - while (len < RELATIVEJUMP_SIZE) { + while (len < JMP32_INSN_SIZE) { ret = __copy_instruction(dest + len, src + len, real + len, &insn); if (!ret || !can_boost(&insn, src + len)) return -EINVAL; @@ -271,7 +269,7 @@ static int can_optimize(unsigned long paddr) return 0; /* Check there is enough space for a relative jump. */ - if (size - offset < RELATIVEJUMP_SIZE) + if (size - offset < JMP32_INSN_SIZE) return 0; /* Decode instructions */ @@ -290,15 +288,15 @@ static int can_optimize(unsigned long paddr) kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); insn_get_length(&insn); /* Another subsystem puts a breakpoint */ - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) return 0; /* Recover address */ insn.kaddr = (void *)addr; insn.next_byte = (void *)(addr + insn.length); /* Check any instructions don't jump into target */ if (insn_is_indirect_jump(&insn) || - insn_jump_into_range(&insn, paddr + INT3_SIZE, - RELATIVE_ADDR_SIZE)) + insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE, + DISP32_SIZE)) return 0; addr += insn.length; } @@ -374,7 +372,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, * Verify if the address gap is in 2GB range, because this uses * a relative jump. */ - rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE; + rel = (long)slot - (long)op->kp.addr + JMP32_INSN_SIZE; if (abs(rel) > 0x7fffffff) { ret = -ERANGE; goto err; @@ -401,7 +399,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, /* Set returning jmp instruction at the tail of out-of-line buffer */ synthesize_reljump(buf + len, slot + len, (u8 *)op->kp.addr + op->optinsn.size); - len += RELATIVEJUMP_SIZE; + len += JMP32_INSN_SIZE; /* We have to use text_poke() for instruction buffer because it is RO */ text_poke(slot, buf, len); @@ -416,49 +414,50 @@ err: } /* - * Replace breakpoints (int3) with relative jumps. + * Replace breakpoints (INT3) with relative jumps (JMP.d32). * Caller must call with locking kprobe_mutex and text_mutex. + * + * The caller will have installed a regular kprobe and after that issued + * syncrhonize_rcu_tasks(), this ensures that the instruction(s) that live in + * the 4 bytes after the INT3 are unused and can now be overwritten. */ void arch_optimize_kprobes(struct list_head *oplist) { struct optimized_kprobe *op, *tmp; - u8 insn_buff[RELATIVEJUMP_SIZE]; + u8 insn_buff[JMP32_INSN_SIZE]; list_for_each_entry_safe(op, tmp, oplist, list) { s32 rel = (s32)((long)op->optinsn.insn - - ((long)op->kp.addr + RELATIVEJUMP_SIZE)); + ((long)op->kp.addr + JMP32_INSN_SIZE)); WARN_ON(kprobe_disabled(&op->kp)); /* Backup instructions which will be replaced by jump address */ - memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, - RELATIVE_ADDR_SIZE); + memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_INSN_SIZE, + DISP32_SIZE); - insn_buff[0] = RELATIVEJUMP_OPCODE; + insn_buff[0] = JMP32_INSN_OPCODE; *(s32 *)(&insn_buff[1]) = rel; - text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, NULL); + text_poke_bp(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL); list_del_init(&op->list); } } -/* Replace a relative jump with a breakpoint (int3). */ +/* + * Replace a relative jump (JMP.d32) with a breakpoint (INT3). + * + * After that, we can restore the 4 bytes after the INT3 to undo what + * arch_optimize_kprobes() scribbled. This is safe since those bytes will be + * unused once the INT3 lands. + */ void arch_unoptimize_kprobe(struct optimized_kprobe *op) { - u8 insn_buff[RELATIVEJUMP_SIZE]; - u8 emulate_buff[RELATIVEJUMP_SIZE]; - - /* Set int3 to first byte for kprobes */ - insn_buff[0] = BREAKPOINT_INSTRUCTION; - memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); - - emulate_buff[0] = RELATIVEJUMP_OPCODE; - *(s32 *)(&emulate_buff[1]) = (s32)((long)op->optinsn.insn - - ((long)op->kp.addr + RELATIVEJUMP_SIZE)); - - text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, - emulate_buff); + arch_arm_kprobe(&op->kp); + text_poke(op->kp.addr + INT3_INSN_SIZE, + op->optinsn.copied_insn, DISP32_SIZE); + text_poke_sync(); } /* diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 32ef1ee733b7..81045aabb6f4 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -245,17 +245,13 @@ NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason); dotraplinkage void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) { - enum ctx_state prev_state; - switch (kvm_read_and_reset_pf_reason()) { default: do_page_fault(regs, error_code, address); break; case KVM_PV_REASON_PAGE_NOT_PRESENT: /* page is swapped out by the host. */ - prev_state = exception_enter(); kvm_async_pf_task_wait((u32)address, !user_mode(regs)); - exception_exit(prev_state); break; case KVM_PV_REASON_PAGE_READY: rcu_irq_enter(); diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index b2463fcb20a8..c57e1ca70fd1 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -28,6 +28,89 @@ #include <asm/desc.h> #include <asm/mmu_context.h> #include <asm/syscalls.h> +#include <asm/pgtable_areas.h> + +/* This is a multiple of PAGE_SIZE. */ +#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) + +static inline void *ldt_slot_va(int slot) +{ + return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); +} + +void load_mm_ldt(struct mm_struct *mm) +{ + struct ldt_struct *ldt; + + /* READ_ONCE synchronizes with smp_store_release */ + ldt = READ_ONCE(mm->context.ldt); + + /* + * Any change to mm->context.ldt is followed by an IPI to all + * CPUs with the mm active. The LDT will not be freed until + * after the IPI is handled by all such CPUs. This means that, + * if the ldt_struct changes before we return, the values we see + * will be safe, and the new values will be loaded before we run + * any user code. + * + * NB: don't try to convert this to use RCU without extreme care. + * We would still need IRQs off, because we don't want to change + * the local LDT after an IPI loaded a newer value than the one + * that we can see. + */ + + if (unlikely(ldt)) { + if (static_cpu_has(X86_FEATURE_PTI)) { + if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { + /* + * Whoops -- either the new LDT isn't mapped + * (if slot == -1) or is mapped into a bogus + * slot (if slot > 1). + */ + clear_LDT(); + return; + } + + /* + * If page table isolation is enabled, ldt->entries + * will not be mapped in the userspace pagetables. + * Tell the CPU to access the LDT through the alias + * at ldt_slot_va(ldt->slot). + */ + set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); + } else { + set_ldt(ldt->entries, ldt->nr_entries); + } + } else { + clear_LDT(); + } +} + +void switch_ldt(struct mm_struct *prev, struct mm_struct *next) +{ + /* + * Load the LDT if either the old or new mm had an LDT. + * + * An mm will never go from having an LDT to not having an LDT. Two + * mms never share an LDT, so we don't gain anything by checking to + * see whether the LDT changed. There's also no guarantee that + * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, + * then prev->context.ldt will also be non-NULL. + * + * If we really cared, we could optimize the case where prev == next + * and we're exiting lazy mode. Most of the time, if this happens, + * we don't actually need to reload LDTR, but modify_ldt() is mostly + * used by legacy code and emulators where we don't need this level of + * performance. + * + * This uses | instead of || because it generates better code. + */ + if (unlikely((unsigned long)prev->context.ldt | + (unsigned long)next->context.ldt)) + load_mm_ldt(next); + + DEBUG_LOCKS_WARN_ON(preemptible()); +} static void refresh_ldt_segments(void) { diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 7b45e8daad22..02bddfc122a4 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -250,15 +250,3 @@ void machine_kexec(struct kimage *image) __ftrace_enabled_restore(save_ftrace_enabled); } - -void arch_crash_save_vmcoreinfo(void) -{ -#ifdef CONFIG_NUMA - VMCOREINFO_SYMBOL(node_data); - VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); -#endif -#ifdef CONFIG_X86_PAE - VMCOREINFO_CONFIG(X86_PAE); -#endif -} - diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 16e125a50b33..ad5cdd6a5f23 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -398,25 +398,6 @@ void machine_kexec(struct kimage *image) __ftrace_enabled_restore(save_ftrace_enabled); } -void arch_crash_save_vmcoreinfo(void) -{ - u64 sme_mask = sme_me_mask; - - VMCOREINFO_NUMBER(phys_base); - VMCOREINFO_SYMBOL(init_top_pgt); - vmcoreinfo_append_str("NUMBER(pgtable_l5_enabled)=%d\n", - pgtable_l5_enabled()); - -#ifdef CONFIG_NUMA - VMCOREINFO_SYMBOL(node_data); - VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); -#endif - vmcoreinfo_append_str("KERNELOFFSET=%lx\n", - kaslr_offset()); - VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE); - VMCOREINFO_NUMBER(sme_mask); -} - /* arch-dependent functionality related to kexec file-based syscall */ #ifdef CONFIG_KEXEC_FILE diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index e676a9916c49..54c21d6abd5a 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -104,18 +104,22 @@ static int __init nmi_warning_debugfs(void) } fs_initcall(nmi_warning_debugfs); -static void nmi_max_handler(struct irq_work *w) +static void nmi_check_duration(struct nmiaction *action, u64 duration) { - struct nmiaction *a = container_of(w, struct nmiaction, irq_work); + u64 whole_msecs = READ_ONCE(action->max_duration); int remainder_ns, decimal_msecs; - u64 whole_msecs = READ_ONCE(a->max_duration); + + if (duration < nmi_longest_ns || duration < action->max_duration) + return; + + action->max_duration = duration; remainder_ns = do_div(whole_msecs, (1000 * 1000)); decimal_msecs = remainder_ns / 1000; printk_ratelimited(KERN_INFO "INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n", - a->handler, whole_msecs, decimal_msecs); + action->handler, whole_msecs, decimal_msecs); } static int nmi_handle(unsigned int type, struct pt_regs *regs) @@ -142,11 +146,7 @@ static int nmi_handle(unsigned int type, struct pt_regs *regs) delta = sched_clock() - delta; trace_nmi_handler(a->handler, (int)delta, thishandled); - if (delta < nmi_longest_ns || delta < a->max_duration) - continue; - - a->max_duration = delta; - irq_work_queue(&a->irq_work); + nmi_check_duration(a, delta); } rcu_read_unlock(); @@ -164,8 +164,6 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) if (!action->handler) return -EINVAL; - init_irq_work(&action->irq_work, nmi_max_handler); - raw_spin_lock_irqsave(&desc->lock, flags); /* diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 61e93a318983..839b5244e3b7 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -615,12 +615,8 @@ void speculation_ctrl_update_current(void) void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) { - struct thread_struct *prev, *next; unsigned long tifp, tifn; - prev = &prev_p->thread; - next = &next_p->thread; - tifn = READ_ONCE(task_thread_info(next_p)->flags); tifp = READ_ONCE(task_thread_info(prev_p)->flags); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 323499f48858..5052ced43373 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -124,7 +124,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) regs->ip = new_ip; regs->sp = new_sp; regs->flags = X86_EFLAGS_IF; - force_iret(); } EXPORT_SYMBOL_GPL(start_thread); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 506d66830d4d..ffd497804dbc 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -394,7 +394,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, regs->cs = _cs; regs->ss = _ss; regs->flags = X86_EFLAGS_IF; - force_iret(); } void diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 1daf8f2aa21f..896d74cb5081 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -110,7 +110,7 @@ static void ich_force_enable_hpet(struct pci_dev *dev) } /* use bits 31:14, 16 kB aligned */ - rcba_base = ioremap_nocache(rcba, 0x4000); + rcba_base = ioremap(rcba, 0x4000); if (rcba_base == NULL) { dev_printk(KERN_DEBUG, &dev->dev, "ioremap failed; " "cannot force enable HPET\n"); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index cedfe2077a69..a74262c71484 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -2,130 +2,54 @@ /* * Copyright (C) 1995 Linus Torvalds * - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - * - * Memory region support - * David Parsons <orc@pell.chi.il.us>, July-August 1999 - * - * Added E820 sanitization routine (removes overlapping memory regions); - * Brian Moyle <bmoyle@mvista.com>, February 2001 - * - * Moved CPU detection code to cpu/${cpu}.c - * Patrick Mochel <mochel@osdl.org>, March 2002 - * - * Provisions for empty E820 memory regions (reported by certain BIOSes). - * Alex Achenbach <xela@slit.de>, December 2002. - * - */ - -/* - * This file handles the architecture-dependent parts of initialization + * This file contains the setup_arch() code, which handles the architecture-dependent + * parts of early kernel initialization. */ - -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/mmzone.h> -#include <linux/screen_info.h> -#include <linux/ioport.h> -#include <linux/acpi.h> -#include <linux/sfi.h> -#include <linux/apm_bios.h> -#include <linux/initrd.h> -#include <linux/memblock.h> -#include <linux/seq_file.h> #include <linux/console.h> -#include <linux/root_dev.h> -#include <linux/highmem.h> -#include <linux/export.h> +#include <linux/crash_dump.h> +#include <linux/dmi.h> #include <linux/efi.h> -#include <linux/init.h> -#include <linux/edd.h> +#include <linux/init_ohci1394_dma.h> +#include <linux/initrd.h> #include <linux/iscsi_ibft.h> -#include <linux/nodemask.h> -#include <linux/kexec.h> -#include <linux/dmi.h> -#include <linux/pfn.h> +#include <linux/memblock.h> #include <linux/pci.h> -#include <asm/pci-direct.h> -#include <linux/init_ohci1394_dma.h> -#include <linux/kvm_para.h> -#include <linux/dma-contiguous.h> -#include <xen/xen.h> -#include <uapi/linux/mount.h> - -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/stddef.h> -#include <linux/unistd.h> -#include <linux/ptrace.h> -#include <linux/user.h> -#include <linux/delay.h> - -#include <linux/kallsyms.h> -#include <linux/cpufreq.h> -#include <linux/dma-mapping.h> -#include <linux/ctype.h> -#include <linux/uaccess.h> - -#include <linux/percpu.h> -#include <linux/crash_dump.h> +#include <linux/root_dev.h> +#include <linux/sfi.h> #include <linux/tboot.h> -#include <linux/jiffies.h> -#include <linux/mem_encrypt.h> -#include <linux/sizes.h> - #include <linux/usb/xhci-dbgp.h> -#include <video/edid.h> -#include <asm/mtrr.h> +#include <uapi/linux/mount.h> + +#include <xen/xen.h> + #include <asm/apic.h> -#include <asm/realmode.h> -#include <asm/e820/api.h> -#include <asm/mpspec.h> -#include <asm/setup.h> -#include <asm/efi.h> -#include <asm/timer.h> -#include <asm/i8259.h> -#include <asm/sections.h> -#include <asm/io_apic.h> -#include <asm/ist.h> -#include <asm/setup_arch.h> #include <asm/bios_ebda.h> -#include <asm/cacheflush.h> -#include <asm/processor.h> #include <asm/bugs.h> -#include <asm/kasan.h> - -#include <asm/vsyscall.h> #include <asm/cpu.h> -#include <asm/desc.h> -#include <asm/dma.h> -#include <asm/iommu.h> +#include <asm/efi.h> #include <asm/gart.h> -#include <asm/mmu_context.h> -#include <asm/proto.h> - -#include <asm/paravirt.h> #include <asm/hypervisor.h> -#include <asm/olpc_ofw.h> - -#include <asm/percpu.h> -#include <asm/topology.h> -#include <asm/apicdef.h> -#include <asm/amd_nb.h> +#include <asm/io_apic.h> +#include <asm/kasan.h> +#include <asm/kaslr.h> #include <asm/mce.h> -#include <asm/alternative.h> +#include <asm/mtrr.h> +#include <asm/realmode.h> +#include <asm/olpc_ofw.h> +#include <asm/pci-direct.h> #include <asm/prom.h> -#include <asm/microcode.h> -#include <asm/kaslr.h> +#include <asm/proto.h> #include <asm/unwind.h> +#include <asm/vsyscall.h> +#include <linux/vmalloc.h> /* - * max_low_pfn_mapped: highest direct mapped pfn under 4GB - * max_pfn_mapped: highest direct mapped pfn over 4GB + * max_low_pfn_mapped: highest directly mapped pfn < 4 GB + * max_pfn_mapped: highest directly mapped pfn > 4 GB * * The direct mapping only covers E820_TYPE_RAM regions, so the ranges and gaps are - * represented by pfn_mapped + * represented by pfn_mapped[]. */ unsigned long max_low_pfn_mapped; unsigned long max_pfn_mapped; @@ -135,14 +59,23 @@ RESERVE_BRK(dmi_alloc, 65536); #endif -static __initdata unsigned long _brk_start = (unsigned long)__brk_base; -unsigned long _brk_end = (unsigned long)__brk_base; +/* + * Range of the BSS area. The size of the BSS area is determined + * at link time, with RESERVE_BRK*() facility reserving additional + * chunks. + */ +static __initdata +unsigned long _brk_start = (unsigned long)__brk_base; +unsigned long _brk_end = (unsigned long)__brk_base; struct boot_params boot_params; /* - * Machine setup.. + * These are the four main kernel memory regions, we put them into + * the resource tree so that kdump tools and other debugging tools + * recover it: */ + static struct resource rodata_resource = { .name = "Kernel rodata", .start = 0, @@ -173,16 +106,16 @@ static struct resource bss_resource = { #ifdef CONFIG_X86_32 -/* cpu data as detected by the assembly code in head_32.S */ +/* CPU data as detected by the assembly code in head_32.S */ struct cpuinfo_x86 new_cpu_data; -/* common cpu data for all cpus */ +/* Common CPU data for all CPUs */ struct cpuinfo_x86 boot_cpu_data __read_mostly; EXPORT_SYMBOL(boot_cpu_data); unsigned int def_to_bigsmp; -/* for MCA, but anyone else can use it if they want */ +/* For MCA, but anyone else can use it if they want */ unsigned int machine_id; unsigned int machine_submodel_id; unsigned int BIOS_revision; @@ -468,15 +401,15 @@ static void __init memblock_x86_reserve_range_setup_data(void) /* * Keep the crash kernel below this limit. * - * On 32 bits earlier kernels would limit the kernel to the low 512 MiB + * Earlier 32-bits kernels would limit the kernel to the low 512 MB range * due to mapping restrictions. * - * On 64bit, kdump kernel need be restricted to be under 64TB, which is + * 64-bit kdump kernels need to be restricted to be under 64 TB, which is * the upper limit of system RAM in 4-level paging mode. Since the kdump - * jumping could be from 5-level to 4-level, the jumping will fail if - * kernel is put above 64TB, and there's no way to detect the paging mode - * of the kernel which will be loaded for dumping during the 1st kernel - * bootup. + * jump could be from 5-level paging to 4-level paging, the jump will fail if + * the kernel is put above 64 TB, and during the 1st kernel bootup there's + * no good way to detect the paging mode of the target kernel which will be + * loaded for dumping. */ #ifdef CONFIG_X86_32 # define CRASH_ADDR_LOW_MAX SZ_512M @@ -887,7 +820,7 @@ void __init setup_arch(char **cmdline_p) /* * Note: Quark X1000 CPUs advertise PGE incorrectly and require * a cr3 based tlb flush, so the following __flush_tlb_all() - * will not flush anything because the cpu quirk which clears + * will not flush anything because the CPU quirk which clears * X86_FEATURE_PGE has not been invoked yet. Though due to the * load_cr3() above the TLB has been flushed already. The * quirk is invoked before subsequent calls to __flush_tlb_all() @@ -960,8 +893,6 @@ void __init setup_arch(char **cmdline_p) init_mm.end_data = (unsigned long) _edata; init_mm.brk = _brk_end; - mpx_mm_init(&init_mm); - code_resource.start = __pa_symbol(_text); code_resource.end = __pa_symbol(_etext)-1; rodata_resource.start = __pa_symbol(__start_rodata); @@ -1295,8 +1226,6 @@ void __init setup_arch(char **cmdline_p) #if defined(CONFIG_VGA_CONSOLE) if (!efi_enabled(EFI_BOOT) || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) conswitchp = &vga_con; -#elif defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; #endif #endif x86_init.oem.banner(); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 8eb7193e158d..8a29573851a3 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -151,8 +151,6 @@ static int restore_sigcontext(struct pt_regs *regs, err |= fpu__restore_sig(buf, IS_ENABLED(CONFIG_X86_32)); - force_iret(); - return err; } diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index f7476ce23b6e..ca3c11a17b5a 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -22,7 +22,6 @@ #include <asm/elf.h> #include <asm/ia32.h> #include <asm/syscalls.h> -#include <asm/mpx.h> /* * Align a virtual address to avoid aliasing in the I$ on AMD F15h. @@ -137,10 +136,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, struct vm_unmapped_area_info info; unsigned long begin, end; - addr = mpx_unmapped_area_check(addr, len, flags); - if (IS_ERR_VALUE(addr)) - return addr; - if (flags & MAP_FIXED) return addr; @@ -180,10 +175,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, unsigned long addr = addr0; struct vm_unmapped_area_info info; - addr = mpx_unmapped_area_check(addr, len, flags); - if (IS_ERR_VALUE(addr)) - return addr; - /* requested length too big for entire address space */ if (len > TASK_SIZE) return -ENOMEM; diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c index 01f0e2263b86..298fc1edd9c9 100644 --- a/arch/x86/kernel/sysfb_simplefb.c +++ b/arch/x86/kernel/sysfb_simplefb.c @@ -90,11 +90,11 @@ __init int create_simplefb(const struct screen_info *si, if (si->orig_video_isVGA == VIDEO_TYPE_VLFB) size <<= 16; length = mode->height * mode->stride; - length = PAGE_ALIGN(length); if (length > size) { printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); return -EINVAL; } + length = PAGE_ALIGN(length); /* setup IORESOURCE_MEM as framebuffer memory */ memset(&res, 0, sizeof(res)); diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 4c61f0713832..b89f6ac6a0c0 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -354,7 +354,7 @@ static ssize_t tboot_log_read(struct file *file, char __user *user_buf, size_t c void *kbuf; int ret = -EFAULT; - log_base = ioremap_nocache(TBOOT_SERIAL_LOG_ADDR, TBOOT_SERIAL_LOG_SIZE); + log_base = ioremap(TBOOT_SERIAL_LOG_ADDR, TBOOT_SERIAL_LOG_SIZE); if (!log_base) return ret; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 05da6b5b167b..6ef00eb6fbb9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -52,10 +52,10 @@ #include <asm/mach_traps.h> #include <asm/alternative.h> #include <asm/fpu/xstate.h> -#include <asm/trace/mpx.h> -#include <asm/mpx.h> #include <asm/vm86.h> #include <asm/umip.h> +#include <asm/insn.h> +#include <asm/insn-eval.h> #ifdef CONFIG_X86_64 #include <asm/x86_init.h> @@ -434,8 +434,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) { - const struct mpx_bndcsr *bndcsr; - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); if (notify_die(DIE_TRAP, "bounds", regs, error_code, X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) @@ -445,84 +443,60 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) if (!user_mode(regs)) die("bounds", regs, error_code); - if (!cpu_feature_enabled(X86_FEATURE_MPX)) { - /* The exception is not from Intel MPX */ - goto exit_trap; - } + do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL); +} - /* - * We need to look at BNDSTATUS to resolve this exception. - * A NULL here might mean that it is in its 'init state', - * which is all zeros which indicates MPX was not - * responsible for the exception. - */ - bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR); - if (!bndcsr) - goto exit_trap; +enum kernel_gp_hint { + GP_NO_HINT, + GP_NON_CANONICAL, + GP_CANONICAL +}; - trace_bounds_exception_mpx(bndcsr); - /* - * The error code field of the BNDSTATUS register communicates status - * information of a bound range exception #BR or operation involving - * bound directory. - */ - switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) { - case 2: /* Bound directory has invalid entry. */ - if (mpx_handle_bd_fault()) - goto exit_trap; - break; /* Success, it was handled */ - case 1: /* Bound violation. */ - { - struct task_struct *tsk = current; - struct mpx_fault_info mpx; - - if (mpx_fault_info(&mpx, regs)) { - /* - * We failed to decode the MPX instruction. Act as if - * the exception was not caused by MPX. - */ - goto exit_trap; - } - /* - * Success, we decoded the instruction and retrieved - * an 'mpx' containing the address being accessed - * which caused the exception. This information - * allows and application to possibly handle the - * #BR exception itself. - */ - if (!do_trap_no_signal(tsk, X86_TRAP_BR, "bounds", regs, - error_code)) - break; +/* + * When an uncaught #GP occurs, try to determine the memory address accessed by + * the instruction and return that address to the caller. Also, try to figure + * out whether any part of the access to that address was non-canonical. + */ +static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs, + unsigned long *addr) +{ + u8 insn_buf[MAX_INSN_SIZE]; + struct insn insn; - show_signal(tsk, SIGSEGV, "trap ", "bounds", regs, error_code); + if (probe_kernel_read(insn_buf, (void *)regs->ip, MAX_INSN_SIZE)) + return GP_NO_HINT; - force_sig_bnderr(mpx.addr, mpx.lower, mpx.upper); - break; - } - case 0: /* No exception caused by Intel MPX operations. */ - goto exit_trap; - default: - die("bounds", regs, error_code); - } + kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE); + insn_get_modrm(&insn); + insn_get_sib(&insn); - return; + *addr = (unsigned long)insn_get_addr_ref(&insn, regs); + if (*addr == -1UL) + return GP_NO_HINT; -exit_trap: +#ifdef CONFIG_X86_64 /* - * This path out is for all the cases where we could not - * handle the exception in some way (like allocating a - * table or telling userspace about it. We will also end - * up here if the kernel has MPX turned off at compile - * time.. + * Check that: + * - the operand is not in the kernel half + * - the last byte of the operand is not in the user canonical half */ - do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL); + if (*addr < ~__VIRTUAL_MASK && + *addr + insn.opnd_bytes - 1 > __VIRTUAL_MASK) + return GP_NON_CANONICAL; +#endif + + return GP_CANONICAL; } -dotraplinkage void -do_general_protection(struct pt_regs *regs, long error_code) +#define GPFSTR "general protection fault" + +dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) { - const char *desc = "general protection fault"; + char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR; + enum kernel_gp_hint hint = GP_NO_HINT; struct task_struct *tsk; + unsigned long gp_addr; + int ret; RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); cond_local_irq_enable(regs); @@ -539,48 +513,61 @@ do_general_protection(struct pt_regs *regs, long error_code) } tsk = current; - if (!user_mode(regs)) { - if (fixup_exception(regs, X86_TRAP_GP, error_code, 0)) - return; + if (user_mode(regs)) { tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; - /* - * To be potentially processing a kprobe fault and to - * trust the result from kprobe_running(), we have to - * be non-preemptible. - */ - if (!preemptible() && kprobe_running() && - kprobe_fault_handler(regs, X86_TRAP_GP)) - return; + show_signal(tsk, SIGSEGV, "", desc, regs, error_code); + force_sig(SIGSEGV); - if (notify_die(DIE_GPF, desc, regs, error_code, - X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) - die(desc, regs, error_code); return; } + if (fixup_exception(regs, X86_TRAP_GP, error_code, 0)) + return; + tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; - show_signal(tsk, SIGSEGV, "", desc, regs, error_code); + /* + * To be potentially processing a kprobe fault and to trust the result + * from kprobe_running(), we have to be non-preemptible. + */ + if (!preemptible() && + kprobe_running() && + kprobe_fault_handler(regs, X86_TRAP_GP)) + return; + + ret = notify_die(DIE_GPF, desc, regs, error_code, X86_TRAP_GP, SIGSEGV); + if (ret == NOTIFY_STOP) + return; + + if (error_code) + snprintf(desc, sizeof(desc), "segment-related " GPFSTR); + else + hint = get_kernel_gp_address(regs, &gp_addr); + + if (hint != GP_NO_HINT) + snprintf(desc, sizeof(desc), GPFSTR ", %s 0x%lx", + (hint == GP_NON_CANONICAL) ? "probably for non-canonical address" + : "maybe for address", + gp_addr); + + /* + * KASAN is interested only in the non-canonical case, clear it + * otherwise. + */ + if (hint != GP_NON_CANONICAL) + gp_addr = 0; + + die_addr(desc, regs, error_code, gp_addr); - force_sig(SIGSEGV); } NOKPROBE_SYMBOL(do_general_protection); dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) { -#ifdef CONFIG_DYNAMIC_FTRACE - /* - * ftrace must be first, everything else may cause a recursive crash. - * See note by declaration of modifying_ftrace_code in ftrace.c - */ - if (unlikely(atomic_read(&modifying_ftrace_code)) && - ftrace_int3_handler(regs)) - return; -#endif if (poke_int3_handler(regs)) return; diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index b8acf639abd1..32a818764e03 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -233,7 +233,6 @@ static cycles_t check_tsc_warp(unsigned int timeout) * The measurement runs for 'timeout' msecs: */ end = start + (cycles_t) tsc_khz * timeout; - now = start; for (i = 0; ; i++) { /* diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 332ae6530fa8..e9cc182aa97e 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -187,6 +187,8 @@ static struct orc_entry *orc_find(unsigned long ip) return orc_ftrace_find(ip); } +#ifdef CONFIG_MODULES + static void orc_sort_swap(void *_a, void *_b, int size) { struct orc_entry *orc_a, *orc_b; @@ -229,7 +231,6 @@ static int orc_sort_cmp(const void *_a, const void *_b) return orc_a->sp_reg == ORC_REG_UNDEFINED && !orc_a->end ? -1 : 1; } -#ifdef CONFIG_MODULES void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size, void *_orc, size_t orc_size) { @@ -273,9 +274,11 @@ void __init unwind_init(void) return; } - /* Sort the .orc_unwind and .orc_unwind_ip tables: */ - sort(__start_orc_unwind_ip, num_entries, sizeof(int), orc_sort_cmp, - orc_sort_swap); + /* + * Note, the orc_unwind and orc_unwind_ip tables were already + * sorted at build time via the 'sorttable' tool. + * It's ready for binary search straight away, no need to sort it. + */ /* Initialize the fast lookup table: */ lookup_num_blocks = orc_lookup_end - orc_lookup; diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index a76c12b38e92..91d55454e702 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -381,7 +381,6 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) mark_screen_rdonly(tsk->mm); memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs)); - force_iret(); return regs->ax; } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3a1a819da137..e3296aa028fe 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -193,12 +193,10 @@ SECTIONS __vvar_beginning_hack = .; /* Place all vvars at the offsets in asm/vvar.h. */ -#define EMIT_VVAR(name, offset) \ +#define EMIT_VVAR(name, offset) \ . = __vvar_beginning_hack + offset; \ *(.vvar_ ## name) -#define __VVAR_KERNEL_LDS #include <asm/vvar.h> -#undef __VVAR_KERNEL_LDS #undef EMIT_VVAR /* diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index ce89430a7f80..23e25f3034c2 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -20,7 +20,7 @@ #include <asm/irq.h> #include <asm/io_apic.h> #include <asm/hpet.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/tsc.h> #include <asm/iommu.h> #include <asm/mach_traps.h> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 840e12583b85..991019d5eee1 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -60,13 +60,11 @@ config KVM If unsure, say N. config KVM_INTEL - tristate "KVM for Intel processors support" - depends on KVM - # for perf_guest_get_msrs(): - depends on CPU_SUP_INTEL + tristate "KVM for Intel (and compatible) processors support" + depends on KVM && IA32_FEAT_CTL ---help--- - Provides support for KVM on Intel processors equipped with the VT - extensions. + Provides support for KVM on processors equipped with Intel's VT + extensions, a.k.a. Virtual Machine Extensions (VMX). To compile this as a module, choose M here: the module will be called kvm-intel. diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index cfafa320a8cf..b1c469446b07 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -62,7 +62,7 @@ u64 kvm_supported_xcr0(void) return xcr0; } -#define F(x) bit(X86_FEATURE_##x) +#define F feature_bit int kvm_update_cpuid(struct kvm_vcpu *vcpu) { @@ -281,8 +281,9 @@ out: return r; } -static void cpuid_mask(u32 *word, int wordnum) +static __always_inline void cpuid_mask(u32 *word, int wordnum) { + reverse_cpuid_check(wordnum); *word &= boot_cpu_data.x86_capability[wordnum]; } @@ -352,6 +353,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0; unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0; unsigned f_la57; + unsigned f_pku = kvm_x86_ops->pku_supported() ? F(PKU) : 0; /* cpuid 7.0.ebx */ const u32 kvm_cpuid_7_0_ebx_x86_features = @@ -363,7 +365,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) /* cpuid 7.0.ecx*/ const u32 kvm_cpuid_7_0_ecx_x86_features = - F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) | + F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/; @@ -392,6 +394,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) /* Set LA57 based on hardware capability. */ entry->ecx |= f_la57; entry->ecx |= f_umip; + entry->ecx |= f_pku; /* PKU is not yet implemented for shadow paging. */ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); @@ -402,7 +405,8 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) entry->edx |= F(SPEC_CTRL); if (boot_cpu_has(X86_FEATURE_STIBP)) entry->edx |= F(INTEL_STIBP); - if (boot_cpu_has(X86_FEATURE_SSBD)) + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + boot_cpu_has(X86_FEATURE_AMD_SSBD)) entry->edx |= F(SPEC_CTRL_SSBD); /* * We emulate ARCH_CAPABILITIES in software even @@ -759,7 +763,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, entry->ebx |= F(AMD_IBRS); if (boot_cpu_has(X86_FEATURE_STIBP)) entry->ebx |= F(AMD_STIBP); - if (boot_cpu_has(X86_FEATURE_SSBD)) + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + boot_cpu_has(X86_FEATURE_AMD_SSBD)) entry->ebx |= F(AMD_SSBD); if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) entry->ebx |= F(AMD_SSB_NO); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index d78a61408243..7366c618aa04 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -53,15 +53,46 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX}, [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, + [CPUID_7_1_EAX] = { 7, 1, CPUID_EAX}, }; -static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) +/* + * Reverse CPUID and its derivatives can only be used for hardware-defined + * feature words, i.e. words whose bits directly correspond to a CPUID leaf. + * Retrieving a feature bit or masking guest CPUID from a Linux-defined word + * is nonsensical as the bit number/mask is an arbitrary software-defined value + * and can't be used by KVM to query/control guest capabilities. And obviously + * the leaf being queried must have an entry in the lookup table. + */ +static __always_inline void reverse_cpuid_check(unsigned x86_leaf) { - unsigned x86_leaf = x86_feature / 32; - + BUILD_BUG_ON(x86_leaf == CPUID_LNX_1); + BUILD_BUG_ON(x86_leaf == CPUID_LNX_2); + BUILD_BUG_ON(x86_leaf == CPUID_LNX_3); + BUILD_BUG_ON(x86_leaf == CPUID_LNX_4); BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid)); BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0); +} + +/* + * Retrieve the bit mask from an X86_FEATURE_* definition. Features contain + * the hardware defined bit number (stored in bits 4:0) and a software defined + * "word" (stored in bits 31:5). The word is used to index into arrays of + * bit masks that hold the per-cpu feature capabilities, e.g. this_cpu_has(). + */ +static __always_inline u32 __feature_bit(int x86_feature) +{ + reverse_cpuid_check(x86_feature / 32); + return 1 << (x86_feature & 31); +} +#define feature_bit(name) __feature_bit(X86_FEATURE_##name) + +static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) +{ + unsigned x86_leaf = x86_feature / 32; + + reverse_cpuid_check(x86_leaf); return reverse_cpuid[x86_leaf]; } @@ -93,15 +124,11 @@ static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu, unsigned x86_ { int *reg; - if (x86_feature == X86_FEATURE_XSAVE && - !static_cpu_has(X86_FEATURE_XSAVE)) - return false; - reg = guest_cpuid_get_register(vcpu, x86_feature); if (!reg) return false; - return *reg & bit(x86_feature); + return *reg & __feature_bit(x86_feature); } static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu, unsigned x86_feature) @@ -110,7 +137,7 @@ static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu, unsigned x8 reg = guest_cpuid_get_register(vcpu, x86_feature); if (reg) - *reg &= ~bit(x86_feature); + *reg &= ~__feature_bit(x86_feature); } static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 952d1a4f4d7e..ddbc61984227 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -22,6 +22,7 @@ #include "kvm_cache_regs.h" #include <asm/kvm_emulate.h> #include <linux/stringify.h> +#include <asm/fpu/api.h> #include <asm/debugreg.h> #include <asm/nospec-branch.h> @@ -310,7 +311,9 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) #define ON64(x) #endif -static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); +typedef void (*fastop_t)(struct fastop *); + +static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); #define __FOP_FUNC(name) \ ".align " __stringify(FASTOP_SIZE) " \n\t" \ @@ -1075,8 +1078,23 @@ static void fetch_register_operand(struct operand *op) } } -static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) +static void emulator_get_fpu(void) +{ + fpregs_lock(); + + fpregs_assert_state_consistent(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_return(); +} + +static void emulator_put_fpu(void) { + fpregs_unlock(); +} + +static void read_sse_reg(sse128_t *data, int reg) +{ + emulator_get_fpu(); switch (reg) { case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break; case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break; @@ -1098,11 +1116,12 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) #endif default: BUG(); } + emulator_put_fpu(); } -static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, - int reg) +static void write_sse_reg(sse128_t *data, int reg) { + emulator_get_fpu(); switch (reg) { case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break; case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break; @@ -1124,10 +1143,12 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, #endif default: BUG(); } + emulator_put_fpu(); } -static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) +static void read_mmx_reg(u64 *data, int reg) { + emulator_get_fpu(); switch (reg) { case 0: asm("movq %%mm0, %0" : "=m"(*data)); break; case 1: asm("movq %%mm1, %0" : "=m"(*data)); break; @@ -1139,10 +1160,12 @@ static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) case 7: asm("movq %%mm7, %0" : "=m"(*data)); break; default: BUG(); } + emulator_put_fpu(); } -static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) +static void write_mmx_reg(u64 *data, int reg) { + emulator_get_fpu(); switch (reg) { case 0: asm("movq %0, %%mm0" : : "m"(*data)); break; case 1: asm("movq %0, %%mm1" : : "m"(*data)); break; @@ -1154,6 +1177,7 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) case 7: asm("movq %0, %%mm7" : : "m"(*data)); break; default: BUG(); } + emulator_put_fpu(); } static int em_fninit(struct x86_emulate_ctxt *ctxt) @@ -1161,7 +1185,9 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); + emulator_get_fpu(); asm volatile("fninit"); + emulator_put_fpu(); return X86EMUL_CONTINUE; } @@ -1172,7 +1198,9 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); + emulator_get_fpu(); asm volatile("fnstcw %0": "+m"(fcw)); + emulator_put_fpu(); ctxt->dst.val = fcw; @@ -1186,7 +1214,9 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); + emulator_get_fpu(); asm volatile("fnstsw %0": "+m"(fsw)); + emulator_put_fpu(); ctxt->dst.val = fsw; @@ -1205,7 +1235,7 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, op->type = OP_XMM; op->bytes = 16; op->addr.xmm = reg; - read_sse_reg(ctxt, &op->vec_val, reg); + read_sse_reg(&op->vec_val, reg); return; } if (ctxt->d & Mmx) { @@ -1256,7 +1286,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, op->type = OP_XMM; op->bytes = 16; op->addr.xmm = ctxt->modrm_rm; - read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm); + read_sse_reg(&op->vec_val, ctxt->modrm_rm); return rc; } if (ctxt->d & Mmx) { @@ -1833,10 +1863,10 @@ static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) op->bytes * op->count); break; case OP_XMM: - write_sse_reg(ctxt, &op->vec_val, op->addr.xmm); + write_sse_reg(&op->vec_val, op->addr.xmm); break; case OP_MM: - write_mmx_reg(ctxt, &op->mm_val, op->addr.mm); + write_mmx_reg(&op->mm_val, op->addr.mm); break; case OP_NONE: /* no writeback */ @@ -2348,12 +2378,7 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt) static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) { #ifdef CONFIG_X86_64 - u32 eax, ebx, ecx, edx; - - eax = 0x80000001; - ecx = 0; - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); - return edx & bit(X86_FEATURE_LM); + return ctxt->ops->guest_has_long_mode(ctxt); #else return false; #endif @@ -3618,18 +3643,11 @@ static int em_mov(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -#define FFL(x) bit(X86_FEATURE_##x) - static int em_movbe(struct x86_emulate_ctxt *ctxt) { - u32 ebx, ecx, edx, eax = 1; u16 tmp; - /* - * Check MOVBE is set in the guest-visible CPUID leaf. - */ - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); - if (!(ecx & FFL(MOVBE))) + if (!ctxt->ops->guest_has_movbe(ctxt)) return emulate_ud(ctxt); switch (ctxt->op_bytes) { @@ -4027,10 +4045,7 @@ static int em_movsxd(struct x86_emulate_ctxt *ctxt) static int check_fxsr(struct x86_emulate_ctxt *ctxt) { - u32 eax = 1, ebx, ecx = 0, edx; - - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); - if (!(edx & FFL(FXSR))) + if (!ctxt->ops->guest_has_fxsr(ctxt)) return emulate_ud(ctxt); if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) @@ -4092,8 +4107,12 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; + emulator_get_fpu(); + rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); + emulator_put_fpu(); + if (rc != X86EMUL_CONTINUE) return rc; @@ -4136,6 +4155,8 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; + emulator_get_fpu(); + if (size < __fxstate_size(16)) { rc = fxregs_fixup(&fx_state, size); if (rc != X86EMUL_CONTINUE) @@ -4151,6 +4172,8 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state)); out: + emulator_put_fpu(); + return rc; } @@ -5210,16 +5233,28 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->ad_bytes = def_ad_bytes ^ 6; break; case 0x26: /* ES override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_ES; + break; case 0x2e: /* CS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_CS; + break; case 0x36: /* SS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_SS; + break; case 0x3e: /* DS override */ has_seg_override = true; - ctxt->seg_override = (ctxt->b >> 3) & 3; + ctxt->seg_override = VCPU_SREG_DS; break; case 0x64: /* FS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_FS; + break; case 0x65: /* GS override */ has_seg_override = true; - ctxt->seg_override = ctxt->b & 7; + ctxt->seg_override = VCPU_SREG_GS; break; case 0x40 ... 0x4f: /* REX */ if (mode != X86EMUL_MODE_PROT64) @@ -5303,10 +5338,15 @@ done_prefixes: } break; case Escape: - if (ctxt->modrm > 0xbf) - opcode = opcode.u.esc->high[ctxt->modrm - 0xc0]; - else + if (ctxt->modrm > 0xbf) { + size_t size = ARRAY_SIZE(opcode.u.esc->high); + u32 index = array_index_nospec( + ctxt->modrm - 0xc0, size); + + opcode = opcode.u.esc->high[index]; + } else { opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7]; + } break; case InstrDual: if ((ctxt->modrm >> 6) == 3) @@ -5448,7 +5488,9 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt) { int rc; + emulator_get_fpu(); rc = asm_safe("fwait"); + emulator_put_fpu(); if (unlikely(rc != X86EMUL_CONTINUE)) return emulate_exception(ctxt, MF_VECTOR, 0, false); @@ -5456,14 +5498,13 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, - struct operand *op) +static void fetch_possible_mmx_operand(struct operand *op) { if (op->type == OP_MM) - read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); + read_mmx_reg(&op->mm_val, op->addr.mm); } -static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) +static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop) { ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; @@ -5539,10 +5580,10 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) * Now that we know the fpu is exception safe, we can fetch * operands from it. */ - fetch_possible_mmx_operand(ctxt, &ctxt->src); - fetch_possible_mmx_operand(ctxt, &ctxt->src2); + fetch_possible_mmx_operand(&ctxt->src); + fetch_possible_mmx_operand(&ctxt->src2); if (!(ctxt->d & Mov)) - fetch_possible_mmx_operand(ctxt, &ctxt->dst); + fetch_possible_mmx_operand(&ctxt->dst); } if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) { @@ -5641,14 +5682,10 @@ special_insn: ctxt->eflags &= ~X86_EFLAGS_RF; if (ctxt->execute) { - if (ctxt->d & Fastop) { - void (*fop)(struct fastop *) = (void *)ctxt->execute; - rc = fastop(ctxt, fop); - if (rc != X86EMUL_CONTINUE) - goto done; - goto writeback; - } - rc = ctxt->execute(ctxt); + if (ctxt->d & Fastop) + rc = fastop(ctxt, (fastop_t)ctxt->execute); + else + rc = ctxt->execute(ctxt); if (rc != X86EMUL_CONTINUE) goto done; goto writeback; diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 23ff65504d7e..4df1c965bf1a 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -33,6 +33,7 @@ #include <trace/events/kvm.h> #include "trace.h" +#include "irq.h" #define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64) @@ -809,11 +810,12 @@ static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, u32 index, u64 *pdata) { struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + size_t size = ARRAY_SIZE(hv->hv_crash_param); - if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) + if (WARN_ON_ONCE(index >= size)) return -EINVAL; - *pdata = hv->hv_crash_param[index]; + *pdata = hv->hv_crash_param[array_index_nospec(index, size)]; return 0; } @@ -852,11 +854,12 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 index, u64 data) { struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + size_t size = ARRAY_SIZE(hv->hv_crash_param); - if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) + if (WARN_ON_ONCE(index >= size)) return -EINVAL; - hv->hv_crash_param[index] = data; + hv->hv_crash_param[array_index_nospec(index, size)] = data; return 0; } @@ -1058,7 +1061,7 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, return 1; break; default: - vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", + vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n", msr, data); return 1; } @@ -1121,7 +1124,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) return 1; /* - * Clear apic_assist portion of f(struct hv_vp_assist_page + * Clear apic_assist portion of struct hv_vp_assist_page * only, there can be valuable data in the rest which needs * to be preserved e.g. on migration. */ @@ -1178,7 +1181,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) return 1; break; default: - vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", + vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n", msr, data); return 1; } diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 8b38bb4868a6..629a09ca9860 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -460,10 +460,14 @@ static int picdev_write(struct kvm_pic *s, switch (addr) { case 0x20: case 0x21: + pic_lock(s); + pic_ioport_write(&s->pics[0], addr, data); + pic_unlock(s); + break; case 0xa0: case 0xa1: pic_lock(s); - pic_ioport_write(&s->pics[addr >> 7], addr, data); + pic_ioport_write(&s->pics[1], addr, data); pic_unlock(s); break; case 0x4d0: diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 9fd2dd89a1c5..26aa22cb9b29 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -36,6 +36,7 @@ #include <linux/io.h> #include <linux/slab.h> #include <linux/export.h> +#include <linux/nospec.h> #include <asm/processor.h> #include <asm/page.h> #include <asm/current.h> @@ -68,13 +69,14 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, default: { u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; - u64 redir_content; + u64 redir_content = ~0ULL; - if (redir_index < IOAPIC_NUM_PINS) - redir_content = - ioapic->redirtbl[redir_index].bits; - else - redir_content = ~0ULL; + if (redir_index < IOAPIC_NUM_PINS) { + u32 index = array_index_nospec( + redir_index, IOAPIC_NUM_PINS); + + redir_content = ioapic->redirtbl[index].bits; + } result = (ioapic->ioregsel & 0x1) ? (redir_content >> 32) & 0xffffffff : @@ -108,8 +110,9 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) union kvm_ioapic_redirect_entry *e; e = &ioapic->redirtbl[RTC_GSI]; - if (!kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, - e->fields.dest_mode)) + if (!kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, + e->fields.dest_id, + kvm_lapic_irq_dest_mode(!!e->fields.dest_mode))) return; new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); @@ -188,7 +191,7 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, /* * Return 0 for coalesced interrupts; for edge-triggered interrupts, * this only happens if a previous edge has not been delivered due - * do masking. For level interrupts, the remote_irr field tells + * to masking. For level interrupts, the remote_irr field tells * us if the interrupt is waiting for an EOI. * * RTC is special: it is edge-triggered, but userspace likes to know @@ -250,8 +253,10 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) || index == RTC_GSI) { - if (kvm_apic_match_dest(vcpu, NULL, 0, - e->fields.dest_id, e->fields.dest_mode) || + u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode); + + if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, + e->fields.dest_id, dm) || kvm_apic_pending_eoi(vcpu, e->fields.vector)) __set_bit(e->fields.vector, ioapic_handled_vectors); @@ -292,6 +297,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) if (index >= IOAPIC_NUM_PINS) return; + index = array_index_nospec(index, IOAPIC_NUM_PINS); e = &ioapic->redirtbl[index]; mask_before = e->fields.mask; /* Preserve read-only fields */ @@ -327,11 +333,12 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) if (e->fields.delivery_mode == APIC_DM_FIXED) { struct kvm_lapic_irq irq; - irq.shorthand = 0; + irq.shorthand = APIC_DEST_NOSHORT; irq.vector = e->fields.vector; irq.delivery_mode = e->fields.delivery_mode << 8; irq.dest_id = e->fields.dest_id; - irq.dest_mode = e->fields.dest_mode; + irq.dest_mode = + kvm_lapic_irq_dest_mode(!!e->fields.dest_mode); bitmap_zero(&vcpu_bitmap, 16); kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq, &vcpu_bitmap); @@ -343,7 +350,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) * keep ioapic_handled_vectors synchronized. */ irq.dest_id = old_dest_id; - irq.dest_mode = old_dest_mode; + irq.dest_mode = + kvm_lapic_irq_dest_mode( + !!e->fields.dest_mode); kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq, &vcpu_bitmap); } @@ -369,11 +378,11 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) irqe.dest_id = entry->fields.dest_id; irqe.vector = entry->fields.vector; - irqe.dest_mode = entry->fields.dest_mode; + irqe.dest_mode = kvm_lapic_irq_dest_mode(!!entry->fields.dest_mode); irqe.trig_mode = entry->fields.trig_mode; irqe.delivery_mode = entry->fields.delivery_mode << 8; irqe.level = 1; - irqe.shorthand = 0; + irqe.shorthand = APIC_DEST_NOSHORT; irqe.msi_redir_hint = false; if (irqe.trig_mode == IOAPIC_EDGE_TRIG) diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index ea1a4e0297da..2fb2e3c80724 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -116,9 +116,6 @@ static inline int ioapic_in_kernel(struct kvm *kvm) } void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); -bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, - int short_hand, unsigned int dest, int dest_mode); -int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode); int kvm_ioapic_init(struct kvm *kvm); @@ -126,9 +123,6 @@ void kvm_ioapic_destroy(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, int level, bool line_status); void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); -int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, - struct dest_map *dest_map); void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 7c6233d37c64..f173ab6b407e 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -113,5 +113,8 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu); int kvm_setup_default_irq_routing(struct kvm *kvm); int kvm_setup_empty_irq_routing(struct kvm *kvm); +int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, + struct dest_map *dest_map); #endif diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 8ecd48d31800..79afa0bb5f41 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -52,15 +52,15 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; unsigned int dest_vcpus = 0; - if (irq->dest_mode == 0 && irq->dest_id == 0xff && - kvm_lowest_prio_delivery(irq)) { + if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) + return r; + + if (irq->dest_mode == APIC_DEST_PHYSICAL && + irq->dest_id == 0xff && kvm_lowest_prio_delivery(irq)) { printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); irq->delivery_mode = APIC_DM_FIXED; } - if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) - return r; - memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap)); kvm_for_each_vcpu(i, vcpu, kvm) { @@ -114,13 +114,14 @@ void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, irq->dest_id |= MSI_ADDR_EXT_DEST_ID(e->msi.address_hi); irq->vector = (e->msi.data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; - irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo; + irq->dest_mode = kvm_lapic_irq_dest_mode( + !!((1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo)); irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data; irq->delivery_mode = e->msi.data & 0x700; irq->msi_redir_hint = ((e->msi.address_lo & MSI_ADDR_REDIRECTION_LOWPRI) > 0); irq->level = 1; - irq->shorthand = 0; + irq->shorthand = APIC_DEST_NOSHORT; } EXPORT_SYMBOL_GPL(kvm_set_msi_irq); @@ -416,7 +417,8 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, kvm_set_msi_irq(vcpu->kvm, entry, &irq); - if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0, + if (irq.level && + kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, irq.dest_id, irq.dest_mode)) __set_bit(irq.vector, ioapic_handled_vectors); } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index cf9177b4a07f..cce1e6b204c8 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -56,9 +56,6 @@ #define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16)) #define LAPIC_MMIO_LENGTH (1 << 12) /* followed define is not in apicdef.h */ -#define APIC_SHORT_MASK 0xc0000 -#define APIC_DEST_NOSHORT 0x0 -#define APIC_DEST_MASK 0x800 #define MAX_APIC_VECTOR 256 #define APIC_VECTORS_PER_REG 32 @@ -792,13 +789,13 @@ static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id, } bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, - int short_hand, unsigned int dest, int dest_mode) + int shorthand, unsigned int dest, int dest_mode) { struct kvm_lapic *target = vcpu->arch.apic; u32 mda = kvm_apic_mda(vcpu, dest, source, target); ASSERT(target); - switch (short_hand) { + switch (shorthand) { case APIC_DEST_NOSHORT: if (dest_mode == APIC_DEST_PHYSICAL) return kvm_apic_match_physical_addr(target, mda); @@ -967,12 +964,12 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, } /* - * This routine tries to handler interrupts in posted mode, here is how + * This routine tries to handle interrupts in posted mode, here is how * it deals with different cases: * - For single-destination interrupts, handle it in posted mode * - Else if vector hashing is enabled and it is a lowest-priority * interrupt, handle it in posted mode and use the following mechanism - * to find the destinaiton vCPU. + * to find the destination vCPU. * 1. For lowest-priority interrupts, store all the possible * destination vCPUs in an array. * 2. Use "guest vector % max number of destination vCPUs" to find @@ -1151,7 +1148,7 @@ void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, if (!kvm_apic_present(vcpu)) continue; if (!kvm_apic_match_dest(vcpu, NULL, - irq->delivery_mode, + irq->shorthand, irq->dest_id, irq->dest_mode)) continue; @@ -1574,9 +1571,9 @@ static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic) struct kvm_timer *ktimer = &apic->lapic_timer; kvm_apic_local_deliver(apic, APIC_LVTT); - if (apic_lvtt_tscdeadline(apic)) + if (apic_lvtt_tscdeadline(apic)) { ktimer->tscdeadline = 0; - if (apic_lvtt_oneshot(apic)) { + } else if (apic_lvtt_oneshot(apic)) { ktimer->tscdeadline = 0; ktimer->target_expiration = 0; } @@ -1963,15 +1960,20 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_LVTTHMR: case APIC_LVTPC: case APIC_LVT1: - case APIC_LVTERR: + case APIC_LVTERR: { /* TODO: Check vector */ + size_t size; + u32 index; + if (!kvm_apic_sw_enabled(apic)) val |= APIC_LVT_MASKED; - - val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; + size = ARRAY_SIZE(apic_lvt_mask); + index = array_index_nospec( + (reg - APIC_LVTT) >> 4, size); + val &= apic_lvt_mask[index]; kvm_lapic_set_reg(apic, reg, val); - break; + } case APIC_LVTT: if (!kvm_apic_sw_enabled(apic)) @@ -2373,14 +2375,13 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) { u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0); - int r = 0; if (!kvm_apic_hw_enabled(vcpu->arch.apic)) - r = 1; + return 1; if ((lvt0 & APIC_LVT_MASKED) == 0 && GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) - r = 1; - return r; + return 1; + return 0; } void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 39925afdfcdc..ec730ce7a344 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -10,8 +10,9 @@ #define KVM_APIC_SIPI 1 #define KVM_APIC_LVT_NUM 6 -#define KVM_APIC_SHORT_MASK 0xc0000 -#define KVM_APIC_DEST_MASK 0x800 +#define APIC_SHORT_MASK 0xc0000 +#define APIC_DEST_NOSHORT 0x0 +#define APIC_DEST_MASK 0x800 #define APIC_BUS_CYCLE_NS 1 #define APIC_BUS_FREQUENCY (1000000000ULL / APIC_BUS_CYCLE_NS) @@ -82,8 +83,8 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val); int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, void *data); bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, - int short_hand, unsigned int dest, int dest_mode); - + int shorthand, unsigned int dest, int dest_mode); +int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr); bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr); void kvm_apic_update_ppr(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6f92b40d798c..adc84f0f16ba 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -40,7 +40,7 @@ #include <linux/kthread.h> #include <asm/page.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/cmpxchg.h> #include <asm/e820/api.h> #include <asm/io.h> @@ -418,22 +418,24 @@ static inline bool is_access_track_spte(u64 spte) * requires a full MMU zap). The flag is instead explicitly queried when * checking for MMIO spte cache hits. */ -#define MMIO_SPTE_GEN_MASK GENMASK_ULL(18, 0) +#define MMIO_SPTE_GEN_MASK GENMASK_ULL(17, 0) #define MMIO_SPTE_GEN_LOW_START 3 #define MMIO_SPTE_GEN_LOW_END 11 #define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \ MMIO_SPTE_GEN_LOW_START) -#define MMIO_SPTE_GEN_HIGH_START 52 -#define MMIO_SPTE_GEN_HIGH_END 61 +#define MMIO_SPTE_GEN_HIGH_START PT64_SECOND_AVAIL_BITS_SHIFT +#define MMIO_SPTE_GEN_HIGH_END 62 #define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \ MMIO_SPTE_GEN_HIGH_START) + static u64 generation_mmio_spte_mask(u64 gen) { u64 mask; WARN_ON(gen & ~MMIO_SPTE_GEN_MASK); + BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK); mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK; mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK; @@ -444,8 +446,6 @@ static u64 get_mmio_spte_generation(u64 spte) { u64 gen; - spte &= ~shadow_mmio_mask; - gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START; gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START; return gen; @@ -538,16 +538,20 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); static u8 kvm_get_shadow_phys_bits(void) { /* - * boot_cpu_data.x86_phys_bits is reduced when MKTME is detected - * in CPU detection code, but MKTME treats those reduced bits as - * 'keyID' thus they are not reserved bits. Therefore for MKTME - * we should still return physical address bits reported by CPUID. + * boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected + * in CPU detection code, but the processor treats those reduced bits as + * 'keyID' thus they are not reserved bits. Therefore KVM needs to look at + * the physical address bits reported by CPUID. */ - if (!boot_cpu_has(X86_FEATURE_TME) || - WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008)) - return boot_cpu_data.x86_phys_bits; + if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008)) + return cpuid_eax(0x80000008) & 0xff; - return cpuid_eax(0x80000008) & 0xff; + /* + * Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with + * custom CPUID. Proceed with whatever the kernel found since these features + * aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008). + */ + return boot_cpu_data.x86_phys_bits; } static void kvm_mmu_reset_all_pte_masks(void) @@ -1260,56 +1264,6 @@ static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) list_del(&sp->lpage_disallowed_link); } -static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, - struct kvm_memory_slot *slot) -{ - struct kvm_lpage_info *linfo; - - if (slot) { - linfo = lpage_info_slot(gfn, slot, level); - return !!linfo->disallow_lpage; - } - - return true; -} - -static bool mmu_gfn_lpage_is_disallowed(struct kvm_vcpu *vcpu, gfn_t gfn, - int level) -{ - struct kvm_memory_slot *slot; - - slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - return __mmu_gfn_lpage_is_disallowed(gfn, level, slot); -} - -static int host_mapping_level(struct kvm *kvm, gfn_t gfn) -{ - unsigned long page_size; - int i, ret = 0; - - page_size = kvm_host_page_size(kvm, gfn); - - for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { - if (page_size >= KVM_HPAGE_SIZE(i)) - ret = i; - else - break; - } - - return ret; -} - -static inline bool memslot_valid_for_gpte(struct kvm_memory_slot *slot, - bool no_dirty_log) -{ - if (!slot || slot->flags & KVM_MEMSLOT_INVALID) - return false; - if (no_dirty_log && slot->dirty_bitmap) - return false; - - return true; -} - static struct kvm_memory_slot * gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log) @@ -1317,40 +1271,14 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_memory_slot *slot; slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - if (!memslot_valid_for_gpte(slot, no_dirty_log)) - slot = NULL; + if (!slot || slot->flags & KVM_MEMSLOT_INVALID) + return NULL; + if (no_dirty_log && slot->dirty_bitmap) + return NULL; return slot; } -static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn, - bool *force_pt_level) -{ - int host_level, level, max_level; - struct kvm_memory_slot *slot; - - if (unlikely(*force_pt_level)) - return PT_PAGE_TABLE_LEVEL; - - slot = kvm_vcpu_gfn_to_memslot(vcpu, large_gfn); - *force_pt_level = !memslot_valid_for_gpte(slot, true); - if (unlikely(*force_pt_level)) - return PT_PAGE_TABLE_LEVEL; - - host_level = host_mapping_level(vcpu->kvm, large_gfn); - - if (host_level == PT_PAGE_TABLE_LEVEL) - return host_level; - - max_level = min(kvm_x86_ops->get_lpage_level(), host_level); - - for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) - if (__mmu_gfn_lpage_is_disallowed(large_gfn, level, slot)) - break; - - return level - 1; -} - /* * About rmap_head encoding: * @@ -1410,7 +1338,7 @@ pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head, if (j != 0) return; if (!prev_desc && !desc->more) - rmap_head->val = (unsigned long)desc->sptes[0]; + rmap_head->val = 0; else if (prev_desc) prev_desc->more = desc->more; @@ -1525,7 +1453,7 @@ struct rmap_iterator { /* * Iteration must be started by this function. This should also be used after * removing/dropping sptes from the rmap link because in such cases the - * information in the itererator may not be valid. + * information in the iterator may not be valid. * * Returns sptep if found, NULL otherwise. */ @@ -2899,6 +2827,26 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, return kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); } +static int make_mmu_pages_available(struct kvm_vcpu *vcpu) +{ + LIST_HEAD(invalid_list); + + if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES)) + return 0; + + while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) { + if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list)) + break; + + ++vcpu->kvm->stat.mmu_recycled; + } + kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); + + if (!kvm_mmu_available_pages(vcpu->kvm)) + return -ENOSPC; + return 0; +} + /* * Changing the number of mmu pages allocated to the vm * Note: if goal_nr_mmu_pages is too small, you will get dead lock @@ -3099,17 +3047,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= (u64)pfn << PAGE_SHIFT; if (pte_access & ACC_WRITE_MASK) { - - /* - * Other vcpu creates new sp in the window between - * mapping_level() and acquiring mmu-lock. We can - * allow guest to retry the access, the mapping can - * be fixed if guest refault. - */ - if (level > PT_PAGE_TABLE_LEVEL && - mmu_gfn_lpage_is_disallowed(vcpu, gfn, level)) - goto done; - spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; /* @@ -3141,7 +3078,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, set_pte: if (mmu_spte_update(sptep, spte)) ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH; -done: return ret; } @@ -3294,6 +3230,83 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) __direct_pte_prefetch(vcpu, sp, sptep); } +static int host_pfn_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn, + kvm_pfn_t pfn, struct kvm_memory_slot *slot) +{ + unsigned long hva; + pte_t *pte; + int level; + + BUILD_BUG_ON(PT_PAGE_TABLE_LEVEL != (int)PG_LEVEL_4K || + PT_DIRECTORY_LEVEL != (int)PG_LEVEL_2M || + PT_PDPE_LEVEL != (int)PG_LEVEL_1G); + + if (!PageCompound(pfn_to_page(pfn)) && !kvm_is_zone_device_pfn(pfn)) + return PT_PAGE_TABLE_LEVEL; + + /* + * Note, using the already-retrieved memslot and __gfn_to_hva_memslot() + * is not solely for performance, it's also necessary to avoid the + * "writable" check in __gfn_to_hva_many(), which will always fail on + * read-only memslots due to gfn_to_hva() assuming writes. Earlier + * page fault steps have already verified the guest isn't writing a + * read-only memslot. + */ + hva = __gfn_to_hva_memslot(slot, gfn); + + pte = lookup_address_in_mm(vcpu->kvm->mm, hva, &level); + if (unlikely(!pte)) + return PT_PAGE_TABLE_LEVEL; + + return level; +} + +static int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn, + int max_level, kvm_pfn_t *pfnp) +{ + struct kvm_memory_slot *slot; + struct kvm_lpage_info *linfo; + kvm_pfn_t pfn = *pfnp; + kvm_pfn_t mask; + int level; + + if (unlikely(max_level == PT_PAGE_TABLE_LEVEL)) + return PT_PAGE_TABLE_LEVEL; + + if (is_error_noslot_pfn(pfn) || kvm_is_reserved_pfn(pfn)) + return PT_PAGE_TABLE_LEVEL; + + slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, true); + if (!slot) + return PT_PAGE_TABLE_LEVEL; + + max_level = min(max_level, kvm_x86_ops->get_lpage_level()); + for ( ; max_level > PT_PAGE_TABLE_LEVEL; max_level--) { + linfo = lpage_info_slot(gfn, slot, max_level); + if (!linfo->disallow_lpage) + break; + } + + if (max_level == PT_PAGE_TABLE_LEVEL) + return PT_PAGE_TABLE_LEVEL; + + level = host_pfn_mapping_level(vcpu, gfn, pfn, slot); + if (level == PT_PAGE_TABLE_LEVEL) + return level; + + level = min(level, max_level); + + /* + * mmu_notifier_retry() was successful and mmu_lock is held, so + * the pmd can't be split from under us. + */ + mask = KVM_PAGES_PER_HPAGE(level) - 1; + VM_BUG_ON((gfn & mask) != (pfn & mask)); + *pfnp = pfn & ~mask; + + return level; +} + static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it, gfn_t gfn, kvm_pfn_t *pfnp, int *levelp) { @@ -3318,18 +3331,20 @@ static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it, } static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, - int map_writable, int level, kvm_pfn_t pfn, - bool prefault, bool lpage_disallowed) + int map_writable, int max_level, kvm_pfn_t pfn, + bool prefault, bool account_disallowed_nx_lpage) { struct kvm_shadow_walk_iterator it; struct kvm_mmu_page *sp; - int ret; + int level, ret; gfn_t gfn = gpa >> PAGE_SHIFT; gfn_t base_gfn = gfn; - if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) + if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa))) return RET_PF_RETRY; + level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, &pfn); + trace_kvm_mmu_spte_requested(gpa, level, pfn); for_each_shadow_entry(vcpu, gpa, it) { /* @@ -3348,7 +3363,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, it.level - 1, true, ACC_ALL); link_shadow_page(vcpu, it.sptep, sp); - if (lpage_disallowed) + if (account_disallowed_nx_lpage) account_huge_nx_page(vcpu->kvm, sp); } } @@ -3384,45 +3399,6 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) return -EFAULT; } -static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, - gfn_t gfn, kvm_pfn_t *pfnp, - int *levelp) -{ - kvm_pfn_t pfn = *pfnp; - int level = *levelp; - - /* - * Check if it's a transparent hugepage. If this would be an - * hugetlbfs page, level wouldn't be set to - * PT_PAGE_TABLE_LEVEL and there would be no adjustment done - * here. - */ - if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && - !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL && - PageTransCompoundMap(pfn_to_page(pfn)) && - !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) { - unsigned long mask; - /* - * mmu_notifier_retry was successful and we hold the - * mmu_lock here, so the pmd can't become splitting - * from under us, and in turn - * __split_huge_page_refcount() can't run from under - * us and we can safely transfer the refcount from - * PG_tail to PG_head as we switch the pfn to tail to - * head. - */ - *levelp = level = PT_DIRECTORY_LEVEL; - mask = KVM_PAGES_PER_HPAGE(level) - 1; - VM_BUG_ON((gfn & mask) != (pfn & mask)); - if (pfn & mask) { - kvm_release_pfn_clean(pfn); - pfn &= ~mask; - kvm_get_pfn(pfn); - *pfnp = pfn; - } - } -} - static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, kvm_pfn_t pfn, unsigned access, int *ret_val) { @@ -3528,7 +3504,7 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte) * - true: let the vcpu to access on the same address again. * - false: let the real page fault path to fix it. */ -static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, +static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code) { struct kvm_shadow_walk_iterator iterator; @@ -3537,9 +3513,6 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, u64 spte = 0ull; uint retry_count = 0; - if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) - return false; - if (!page_fault_can_be_fast(error_code)) return false; @@ -3548,9 +3521,8 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, do { u64 new_spte; - for_each_shadow_entry_lockless(vcpu, gva, iterator, spte) - if (!is_shadow_present_pte(spte) || - iterator.level < level) + for_each_shadow_entry_lockless(vcpu, cr2_or_gpa, iterator, spte) + if (!is_shadow_present_pte(spte)) break; sp = page_header(__pa(iterator.sptep)); @@ -3626,71 +3598,13 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, } while (true); - trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, + trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep, spte, fault_handled); walk_shadow_page_lockless_end(vcpu); return fault_handled; } -static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, - gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable); -static int make_mmu_pages_available(struct kvm_vcpu *vcpu); - -static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, - gfn_t gfn, bool prefault) -{ - int r; - int level; - bool force_pt_level; - kvm_pfn_t pfn; - unsigned long mmu_seq; - bool map_writable, write = error_code & PFERR_WRITE_MASK; - bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && - is_nx_huge_page_enabled(); - - force_pt_level = lpage_disallowed; - level = mapping_level(vcpu, gfn, &force_pt_level); - if (likely(!force_pt_level)) { - /* - * This path builds a PAE pagetable - so we can map - * 2mb pages at maximum. Therefore check if the level - * is larger than that. - */ - if (level > PT_DIRECTORY_LEVEL) - level = PT_DIRECTORY_LEVEL; - - gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); - } - - if (fast_page_fault(vcpu, v, level, error_code)) - return RET_PF_RETRY; - - mmu_seq = vcpu->kvm->mmu_notifier_seq; - smp_rmb(); - - if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable)) - return RET_PF_RETRY; - - if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) - return r; - - r = RET_PF_RETRY; - spin_lock(&vcpu->kvm->mmu_lock); - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) - goto out_unlock; - if (make_mmu_pages_available(vcpu) < 0) - goto out_unlock; - if (likely(!force_pt_level)) - transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); - r = __direct_map(vcpu, v, write, map_writable, level, pfn, - prefault, false); -out_unlock: - spin_unlock(&vcpu->kvm->mmu_lock); - kvm_release_pfn_clean(pfn); - return r; -} - static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, struct list_head *invalid_list) { @@ -3981,7 +3895,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots); -static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, +static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gpa_t vaddr, u32 access, struct x86_exception *exception) { if (exception) @@ -3989,7 +3903,7 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, return vaddr; } -static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, +static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gpa_t vaddr, u32 access, struct x86_exception *exception) { @@ -4001,20 +3915,14 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, static bool __is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level) { - int bit7 = (pte >> 7) & 1, low6 = pte & 0x3f; + int bit7 = (pte >> 7) & 1; - return (pte & rsvd_check->rsvd_bits_mask[bit7][level-1]) | - ((rsvd_check->bad_mt_xwr & (1ull << low6)) != 0); + return pte & rsvd_check->rsvd_bits_mask[bit7][level-1]; } -static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level) +static bool __is_bad_mt_xwr(struct rsvd_bits_validate *rsvd_check, u64 pte) { - return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level); -} - -static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level) -{ - return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level); + return rsvd_check->bad_mt_xwr & BIT_ULL(pte & 0x3f); } static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) @@ -4038,11 +3946,11 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { struct kvm_shadow_walk_iterator iterator; u64 sptes[PT64_ROOT_MAX_LEVEL], spte = 0ull; + struct rsvd_bits_validate *rsvd_check; int root, leaf; bool reserved = false; - if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) - goto exit; + rsvd_check = &vcpu->arch.mmu->shadow_zero_check; walk_shadow_page_lockless_begin(vcpu); @@ -4058,8 +3966,13 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) if (!is_shadow_present_pte(spte)) break; - reserved |= is_shadow_zero_bits_set(vcpu->arch.mmu, spte, - iterator.level); + /* + * Use a bitwise-OR instead of a logical-OR to aggregate the + * reserved bit and EPT's invalid memtype/XWR checks to avoid + * adding a Jcc in the loop. + */ + reserved |= __is_bad_mt_xwr(rsvd_check, spte) | + __is_rsvd_bits_set(rsvd_check, spte, iterator.level); } walk_shadow_page_lockless_end(vcpu); @@ -4073,7 +3986,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) root--; } } -exit: + *sptep = spte; return reserved; } @@ -4137,9 +4050,6 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) struct kvm_shadow_walk_iterator iterator; u64 spte; - if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) - return; - walk_shadow_page_lockless_begin(vcpu); for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { clear_sp_write_flooding_count(iterator.sptep); @@ -4149,29 +4059,8 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) walk_shadow_page_lockless_end(vcpu); } -static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, - u32 error_code, bool prefault) -{ - gfn_t gfn = gva >> PAGE_SHIFT; - int r; - - pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); - - if (page_fault_handle_page_track(vcpu, error_code, gfn)) - return RET_PF_EMULATE; - - r = mmu_topup_memory_caches(vcpu); - if (r) - return r; - - MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)); - - - return nonpaging_map(vcpu, gva & PAGE_MASK, - error_code, gfn, prefault); -} - -static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) +static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + gfn_t gfn) { struct kvm_arch_async_pf arch; @@ -4180,11 +4069,13 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) arch.direct_map = vcpu->arch.mmu->direct_map; arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu); - return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); + return kvm_setup_async_pf(vcpu, cr2_or_gpa, + kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, - gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable) + gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, + bool *writable) { struct kvm_memory_slot *slot; bool async; @@ -4204,12 +4095,12 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, return false; /* *pfn has correct page already */ if (!prefault && kvm_can_do_async_pf(vcpu)) { - trace_kvm_try_async_get_page(gva, gfn); + trace_kvm_try_async_get_page(cr2_or_gpa, gfn); if (kvm_find_async_pf_gfn(vcpu, gfn)) { - trace_kvm_async_pf_doublefault(gva, gfn); + trace_kvm_async_pf_doublefault(cr2_or_gpa, gfn); kvm_make_request(KVM_REQ_APF_HALT, vcpu); return true; - } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn)) + } else if (kvm_arch_setup_async_pf(vcpu, cr2_or_gpa, gfn)) return true; } @@ -4217,11 +4108,77 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, return false; } +static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, + bool prefault, int max_level, bool is_tdp) +{ + bool write = error_code & PFERR_WRITE_MASK; + bool exec = error_code & PFERR_FETCH_MASK; + bool lpage_disallowed = exec && is_nx_huge_page_enabled(); + bool map_writable; + + gfn_t gfn = gpa >> PAGE_SHIFT; + unsigned long mmu_seq; + kvm_pfn_t pfn; + int r; + + if (page_fault_handle_page_track(vcpu, error_code, gfn)) + return RET_PF_EMULATE; + + r = mmu_topup_memory_caches(vcpu); + if (r) + return r; + + if (lpage_disallowed) + max_level = PT_PAGE_TABLE_LEVEL; + + if (fast_page_fault(vcpu, gpa, error_code)) + return RET_PF_RETRY; + + mmu_seq = vcpu->kvm->mmu_notifier_seq; + smp_rmb(); + + if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable)) + return RET_PF_RETRY; + + if (handle_abnormal_pfn(vcpu, is_tdp ? 0 : gpa, gfn, pfn, ACC_ALL, &r)) + return r; + + r = RET_PF_RETRY; + spin_lock(&vcpu->kvm->mmu_lock); + if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) + goto out_unlock; + if (make_mmu_pages_available(vcpu) < 0) + goto out_unlock; + r = __direct_map(vcpu, gpa, write, map_writable, max_level, pfn, + prefault, is_tdp && lpage_disallowed); + +out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); + return r; +} + +static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, + u32 error_code, bool prefault) +{ + pgprintk("%s: gva %lx error %x\n", __func__, gpa, error_code); + + /* This path builds a PAE pagetable, we can map 2mb pages at maximum. */ + return direct_page_fault(vcpu, gpa & PAGE_MASK, error_code, prefault, + PT_DIRECTORY_LEVEL, false); +} + int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, u64 fault_address, char *insn, int insn_len) { int r = 1; +#ifndef CONFIG_X86_64 + /* A 64-bit CR2 should be impossible on 32-bit KVM. */ + if (WARN_ON_ONCE(fault_address >> 32)) + return -EFAULT; +#endif + vcpu->arch.l1tf_flush_l1d = true; switch (vcpu->arch.apf.host_apf_reason) { default: @@ -4249,76 +4206,23 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, } EXPORT_SYMBOL_GPL(kvm_handle_page_fault); -static bool -check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level) -{ - int page_num = KVM_PAGES_PER_HPAGE(level); - - gfn &= ~(page_num - 1); - - return kvm_mtrr_check_gfn_range_consistency(vcpu, gfn, page_num); -} - -static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, +static int tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, bool prefault) { - kvm_pfn_t pfn; - int r; - int level; - bool force_pt_level; - gfn_t gfn = gpa >> PAGE_SHIFT; - unsigned long mmu_seq; - int write = error_code & PFERR_WRITE_MASK; - bool map_writable; - bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && - is_nx_huge_page_enabled(); - - MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)); + int max_level; - if (page_fault_handle_page_track(vcpu, error_code, gfn)) - return RET_PF_EMULATE; + for (max_level = PT_MAX_HUGEPAGE_LEVEL; + max_level > PT_PAGE_TABLE_LEVEL; + max_level--) { + int page_num = KVM_PAGES_PER_HPAGE(max_level); + gfn_t base = (gpa >> PAGE_SHIFT) & ~(page_num - 1); - r = mmu_topup_memory_caches(vcpu); - if (r) - return r; - - force_pt_level = - lpage_disallowed || - !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL); - level = mapping_level(vcpu, gfn, &force_pt_level); - if (likely(!force_pt_level)) { - if (level > PT_DIRECTORY_LEVEL && - !check_hugepage_cache_consistency(vcpu, gfn, level)) - level = PT_DIRECTORY_LEVEL; - gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); + if (kvm_mtrr_check_gfn_range_consistency(vcpu, base, page_num)) + break; } - if (fast_page_fault(vcpu, gpa, level, error_code)) - return RET_PF_RETRY; - - mmu_seq = vcpu->kvm->mmu_notifier_seq; - smp_rmb(); - - if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable)) - return RET_PF_RETRY; - - if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r)) - return r; - - r = RET_PF_RETRY; - spin_lock(&vcpu->kvm->mmu_lock); - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) - goto out_unlock; - if (make_mmu_pages_available(vcpu) < 0) - goto out_unlock; - if (likely(!force_pt_level)) - transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); - r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, - prefault, lpage_disallowed); -out_unlock: - spin_unlock(&vcpu->kvm->mmu_lock); - kvm_release_pfn_clean(pfn); - return r; + return direct_page_fault(vcpu, gpa, error_code, prefault, + max_level, true); } static void nonpaging_init_context(struct kvm_vcpu *vcpu, @@ -5496,47 +5400,30 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) } EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); -static int make_mmu_pages_available(struct kvm_vcpu *vcpu) -{ - LIST_HEAD(invalid_list); - - if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES)) - return 0; - - while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) { - if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list)) - break; - - ++vcpu->kvm->stat.mmu_recycled; - } - kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); - - if (!kvm_mmu_available_pages(vcpu->kvm)) - return -ENOSPC; - return 0; -} - -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len) { int r, emulation_type = 0; bool direct = vcpu->arch.mmu->direct_map; + if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa))) + return RET_PF_RETRY; + /* With shadow page tables, fault_address contains a GVA or nGPA. */ if (vcpu->arch.mmu->direct_map) { vcpu->arch.gpa_available = true; - vcpu->arch.gpa_val = cr2; + vcpu->arch.gpa_val = cr2_or_gpa; } r = RET_PF_INVALID; if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, cr2, direct); + r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct); if (r == RET_PF_EMULATE) goto emulate; } if (r == RET_PF_INVALID) { - r = vcpu->arch.mmu->page_fault(vcpu, cr2, + r = vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa, lower_32_bits(error_code), false); WARN_ON(r == RET_PF_INVALID); @@ -5556,7 +5443,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, */ if (vcpu->arch.mmu->direct_map && (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) { - kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2)); + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa)); return 1; } @@ -5571,7 +5458,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, * explicitly shadowing L1's page tables, i.e. unprotecting something * for L1 isn't going to magically fix whatever issue cause L2 to fail. */ - if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu)) + if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu)) emulation_type = EMULTYPE_ALLOW_RETRY; emulate: /* @@ -5586,7 +5473,7 @@ emulate: return 1; } - return x86_emulate_instruction(vcpu, cr2, emulation_type, insn, + return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn, insn_len); } EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); @@ -6015,8 +5902,8 @@ restart: * mapping if the indirect sp has level = 1. */ if (sp->role.direct && !kvm_is_reserved_pfn(pfn) && - !kvm_is_zone_device_pfn(pfn) && - PageTransCompoundMap(pfn_to_page(pfn))) { + (kvm_is_zone_device_pfn(pfn) || + PageCompound(pfn_to_page(pfn)))) { pte_list_remove(rmap_head, sptep); if (kvm_available_flush_tlb_with_range()) @@ -6249,7 +6136,7 @@ static void kvm_set_mmio_spte_mask(void) * If reserved bit is not supported, clear the present bit to disable * mmio page fault. */ - if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52) + if (shadow_phys_bits == 52) mask &= ~1ull; kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 97b21e7fd013..4e1ef0473663 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -128,6 +128,21 @@ static inline int FNAME(is_present_gpte)(unsigned long pte) #endif } +static bool FNAME(is_bad_mt_xwr)(struct rsvd_bits_validate *rsvd_check, u64 gpte) +{ +#if PTTYPE != PTTYPE_EPT + return false; +#else + return __is_bad_mt_xwr(rsvd_check, gpte); +#endif +} + +static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level) +{ + return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level) || + FNAME(is_bad_mt_xwr)(&mmu->guest_rsvd_check, gpte); +} + static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pt_element_t __user *ptep_user, unsigned index, pt_element_t orig_pte, pt_element_t new_pte) @@ -175,9 +190,6 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, u64 gpte) { - if (is_rsvd_bits_set(vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) - goto no_present; - if (!FNAME(is_present_gpte)(gpte)) goto no_present; @@ -186,6 +198,9 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, !(gpte & PT_GUEST_ACCESSED_MASK)) goto no_present; + if (FNAME(is_rsvd_bits_set)(vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) + goto no_present; + return false; no_present: @@ -291,11 +306,11 @@ static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte) } /* - * Fetch a guest pte for a guest virtual address + * Fetch a guest pte for a guest virtual address, or for an L2's GPA. */ static int FNAME(walk_addr_generic)(struct guest_walker *walker, struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gva_t addr, u32 access) + gpa_t addr, u32 access) { int ret; pt_element_t pte; @@ -400,7 +415,7 @@ retry_walk: if (unlikely(!FNAME(is_present_gpte)(pte))) goto error; - if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) { + if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte, walker->level))) { errcode = PFERR_RSVD_MASK | PFERR_PRESENT_MASK; goto error; } @@ -496,7 +511,7 @@ error: } static int FNAME(walk_addr)(struct guest_walker *walker, - struct kvm_vcpu *vcpu, gva_t addr, u32 access) + struct kvm_vcpu *vcpu, gpa_t addr, u32 access) { return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr, access); @@ -611,17 +626,17 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, * If the guest tries to write a write-protected page, we need to * emulate this operation, return 1 to indicate this case. */ -static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, +static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr, struct guest_walker *gw, - int write_fault, int hlevel, + int write_fault, int max_level, kvm_pfn_t pfn, bool map_writable, bool prefault, bool lpage_disallowed) { struct kvm_mmu_page *sp = NULL; struct kvm_shadow_walk_iterator it; unsigned direct_access, access = gw->pt_access; - int top_level, ret; - gfn_t gfn, base_gfn; + int top_level, hlevel, ret; + gfn_t base_gfn = gw->gfn; direct_access = gw->pte_access; @@ -637,7 +652,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, if (FNAME(gpte_changed)(vcpu, gw, top_level)) goto out_gpte_changed; - if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) + if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa))) goto out_gpte_changed; for (shadow_walk_init(&it, vcpu, addr); @@ -666,12 +681,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, link_shadow_page(vcpu, it.sptep, sp); } - /* - * FNAME(page_fault) might have clobbered the bottom bits of - * gw->gfn, restore them from the virtual address. - */ - gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT); - base_gfn = gfn; + hlevel = kvm_mmu_hugepage_adjust(vcpu, gw->gfn, max_level, &pfn); trace_kvm_mmu_spte_requested(addr, gw->level, pfn); @@ -682,9 +692,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, * We cannot overwrite existing page tables with an NX * large page, as the leaf could be executable. */ - disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel); + disallowed_hugepage_adjust(it, gw->gfn, &pfn, &hlevel); - base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); if (it.level == hlevel) break; @@ -765,7 +775,7 @@ FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu, * Returns: 1 if we need to emulate the instruction, 0 otherwise, or * a negative value on error. */ -static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, +static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code, bool prefault) { int write_fault = error_code & PFERR_WRITE_MASK; @@ -773,12 +783,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, struct guest_walker walker; int r; kvm_pfn_t pfn; - int level = PT_PAGE_TABLE_LEVEL; unsigned long mmu_seq; bool map_writable, is_self_change_mapping; bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && is_nx_huge_page_enabled(); - bool force_pt_level = lpage_disallowed; + int max_level; pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); @@ -818,14 +827,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu, &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable); - if (walker.level >= PT_DIRECTORY_LEVEL && !is_self_change_mapping) { - level = mapping_level(vcpu, walker.gfn, &force_pt_level); - if (likely(!force_pt_level)) { - level = min(walker.level, level); - walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1); - } - } else - force_pt_level = true; + if (lpage_disallowed || is_self_change_mapping) + max_level = PT_PAGE_TABLE_LEVEL; + else + max_level = walker.level; mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); @@ -865,10 +870,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); if (make_mmu_pages_available(vcpu) < 0) goto out_unlock; - if (!force_pt_level) - transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level); - r = FNAME(fetch)(vcpu, addr, &walker, write_fault, - level, pfn, map_writable, prefault, lpage_disallowed); + r = FNAME(fetch)(vcpu, addr, &walker, write_fault, max_level, pfn, + map_writable, prefault, lpage_disallowed); kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); out_unlock: @@ -945,18 +948,19 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa) spin_unlock(&vcpu->kvm->mmu_lock); } -static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, +/* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */ +static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t addr, u32 access, struct x86_exception *exception) { struct guest_walker walker; gpa_t gpa = UNMAPPED_GVA; int r; - r = FNAME(walk_addr)(&walker, vcpu, vaddr, access); + r = FNAME(walk_addr)(&walker, vcpu, addr, access); if (r) { gpa = gfn_to_gpa(walker.gfn); - gpa |= vaddr & ~PAGE_MASK; + gpa |= addr & ~PAGE_MASK; } else if (exception) *exception = walker.fault; @@ -964,7 +968,8 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, } #if PTTYPE != PTTYPE_EPT -static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, +/* Note, gva_to_gpa_nested() is only used to translate L2 GVAs. */ +static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr, u32 access, struct x86_exception *exception) { @@ -972,6 +977,11 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, gpa_t gpa = UNMAPPED_GVA; int r; +#ifndef CONFIG_X86_64 + /* A 64-bit GVA should be impossible on 32-bit KVM. */ + WARN_ON_ONCE(vaddr >> 32); +#endif + r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access); if (r) { diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 7ca8831c7d1a..3c6522b84ff1 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -249,13 +249,13 @@ TRACE_EVENT( TRACE_EVENT( fast_page_fault, - TP_PROTO(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, + TP_PROTO(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code, u64 *sptep, u64 old_spte, bool retry), - TP_ARGS(vcpu, gva, error_code, sptep, old_spte, retry), + TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, retry), TP_STRUCT__entry( __field(int, vcpu_id) - __field(gva_t, gva) + __field(gpa_t, cr2_or_gpa) __field(u32, error_code) __field(u64 *, sptep) __field(u64, old_spte) @@ -265,7 +265,7 @@ TRACE_EVENT( TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; - __entry->gva = gva; + __entry->cr2_or_gpa = cr2_or_gpa; __entry->error_code = error_code; __entry->sptep = sptep; __entry->old_spte = old_spte; @@ -273,9 +273,9 @@ TRACE_EVENT( __entry->retry = retry; ), - TP_printk("vcpu %d gva %lx error_code %s sptep %p old %#llx" + TP_printk("vcpu %d gva %llx error_code %s sptep %p old %#llx" " new %llx spurious %d fixed %d", __entry->vcpu_id, - __entry->gva, __print_flags(__entry->error_code, "|", + __entry->cr2_or_gpa, __print_flags(__entry->error_code, "|", kvm_mmu_trace_pferr_flags), __entry->sptep, __entry->old_spte, __entry->new_spte, __spte_satisfied(old_spte), __spte_satisfied(new_spte) diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 25ce3edd1872..7f0059aa30e1 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -192,11 +192,15 @@ static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit) break; case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000: *seg = 1; - *unit = msr - MSR_MTRRfix16K_80000; + *unit = array_index_nospec( + msr - MSR_MTRRfix16K_80000, + MSR_MTRRfix16K_A0000 - MSR_MTRRfix16K_80000 + 1); break; case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000: *seg = 2; - *unit = msr - MSR_MTRRfix4K_C0000; + *unit = array_index_nospec( + msr - MSR_MTRRfix4K_C0000, + MSR_MTRRfix4K_F8000 - MSR_MTRRfix4K_C0000 + 1); break; default: return false; diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 7ebb62326c14..13332984b6d5 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -2,6 +2,8 @@ #ifndef __KVM_X86_PMU_H #define __KVM_X86_PMU_H +#include <linux/nospec.h> + #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu) #define pmu_to_vcpu(pmu) (container_of((pmu), struct kvm_vcpu, arch.pmu)) #define pmc_to_pmu(pmc) (&(pmc)->vcpu->arch.pmu) @@ -102,8 +104,12 @@ static inline bool kvm_valid_perf_global_ctrl(struct kvm_pmu *pmu, static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr, u32 base) { - if (msr >= base && msr < base + pmu->nr_arch_gp_counters) - return &pmu->gp_counters[msr - base]; + if (msr >= base && msr < base + pmu->nr_arch_gp_counters) { + u32 index = array_index_nospec(msr - base, + pmu->nr_arch_gp_counters); + + return &pmu->gp_counters[index]; + } return NULL; } @@ -113,8 +119,12 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr) { int base = MSR_CORE_PERF_FIXED_CTR0; - if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) - return &pmu->fixed_counters[msr - base]; + if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) { + u32 index = array_index_nospec(msr - base, + pmu->nr_arch_fixed_counters); + + return &pmu->fixed_counters[index]; + } return NULL; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 122d4ce3b1ab..9dbb990c319a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1307,6 +1307,47 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) } } +/* + * The default MMIO mask is a single bit (excluding the present bit), + * which could conflict with the memory encryption bit. Check for + * memory encryption support and override the default MMIO mask if + * memory encryption is enabled. + */ +static __init void svm_adjust_mmio_mask(void) +{ + unsigned int enc_bit, mask_bit; + u64 msr, mask; + + /* If there is no memory encryption support, use existing mask */ + if (cpuid_eax(0x80000000) < 0x8000001f) + return; + + /* If memory encryption is not enabled, use existing mask */ + rdmsrl(MSR_K8_SYSCFG, msr); + if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + return; + + enc_bit = cpuid_ebx(0x8000001f) & 0x3f; + mask_bit = boot_cpu_data.x86_phys_bits; + + /* Increment the mask bit if it is the same as the encryption bit */ + if (enc_bit == mask_bit) + mask_bit++; + + /* + * If the mask bit location is below 52, then some bits above the + * physical addressing limit will always be reserved, so use the + * rsvd_bits() function to generate the mask. This mask, along with + * the present bit, will be used to generate a page fault with + * PFER.RSV = 1. + * + * If the mask bit location is 52 (or above), then clear the mask. + */ + mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0; + + kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK); +} + static __init int svm_hardware_setup(void) { int cpu; @@ -1361,6 +1402,8 @@ static __init int svm_hardware_setup(void) } } + svm_adjust_mmio_mask(); + for_each_possible_cpu(cpu) { r = svm_cpu_init(cpu); if (r) @@ -2144,7 +2187,7 @@ static int avic_init_vcpu(struct vcpu_svm *svm) return ret; } -static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) +static int svm_create_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm; struct page *page; @@ -2153,39 +2196,13 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) struct page *nested_msrpm_pages; int err; - BUILD_BUG_ON_MSG(offsetof(struct vcpu_svm, vcpu) != 0, - "struct kvm_vcpu must be at offset 0 for arch usercopy region"); - - svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); - if (!svm) { - err = -ENOMEM; - goto out; - } - - svm->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, - GFP_KERNEL_ACCOUNT); - if (!svm->vcpu.arch.user_fpu) { - printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n"); - err = -ENOMEM; - goto free_partial_svm; - } - - svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, - GFP_KERNEL_ACCOUNT); - if (!svm->vcpu.arch.guest_fpu) { - printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); - err = -ENOMEM; - goto free_user_fpu; - } - - err = kvm_vcpu_init(&svm->vcpu, kvm, id); - if (err) - goto free_svm; + BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0); + svm = to_svm(vcpu); err = -ENOMEM; page = alloc_page(GFP_KERNEL_ACCOUNT); if (!page) - goto uninit; + goto out; msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER); if (!msrpm_pages) @@ -2222,9 +2239,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) svm->asid_generation = 0; init_vmcb(svm); - svm_init_osvw(&svm->vcpu); + svm_init_osvw(vcpu); - return &svm->vcpu; + return 0; free_page4: __free_page(hsave_page); @@ -2234,16 +2251,8 @@ free_page2: __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); free_page1: __free_page(page); -uninit: - kvm_vcpu_uninit(&svm->vcpu); -free_svm: - kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu); -free_user_fpu: - kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu); -free_partial_svm: - kmem_cache_free(kvm_vcpu_cache, svm); out: - return ERR_PTR(err); + return err; } static void svm_clear_current_vmcb(struct vmcb *vmcb) @@ -2269,10 +2278,6 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); __free_page(virt_to_page(svm->nested.hsave)); __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu); - kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu); - kmem_cache_free(kvm_vcpu_cache, svm); } static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -4281,12 +4286,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) return 1; - /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) + if (data & ~kvm_spec_ctrl_valid_bits(vcpu)) return 1; svm->spec_ctrl = data; - if (!data) break; @@ -4310,13 +4313,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) if (data & ~PRED_CMD_IBPB) return 1; - + if (!boot_cpu_has(X86_FEATURE_AMD_IBPB)) + return 1; if (!data) break; wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); - if (is_guest_mode(vcpu)) - break; set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); break; case MSR_AMD64_VIRT_SPEC_CTRL: @@ -4519,9 +4521,9 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm) */ kvm_for_each_vcpu(i, vcpu, kvm) { bool m = kvm_apic_match_dest(vcpu, apic, - icrl & KVM_APIC_SHORT_MASK, + icrl & APIC_SHORT_MASK, GET_APIC_DEST_FIELD(icrh), - icrl & KVM_APIC_DEST_MASK); + icrl & APIC_DEST_MASK); if (m && !avic_vcpu_is_running(vcpu)) kvm_vcpu_wake_up(vcpu); @@ -4935,7 +4937,8 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) *info2 = control->exit_info_2; } -static int handle_exit(struct kvm_vcpu *vcpu) +static int handle_exit(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion exit_fastpath) { struct vcpu_svm *svm = to_svm(vcpu); struct kvm_run *kvm_run = vcpu->run; @@ -4993,7 +4996,10 @@ static int handle_exit(struct kvm_vcpu *vcpu) __func__, svm->vmcb->control.exit_int_info, exit_code); - if (exit_code >= ARRAY_SIZE(svm_exit_handlers) + if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) { + kvm_skip_emulated_instruction(vcpu); + return 1; + } else if (exit_code >= ARRAY_SIZE(svm_exit_handlers) || !svm_exit_handlers[exit_code]) { vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code); dump_vmcb(vcpu); @@ -5913,6 +5919,7 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && + boot_cpu_has(X86_FEATURE_XSAVE) && boot_cpu_has(X86_FEATURE_XSAVES); /* Update nrips enabled cache */ @@ -5924,14 +5931,14 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC); } -#define F(x) bit(X86_FEATURE_##x) +#define F feature_bit static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) { switch (func) { case 0x1: if (avic) - entry->ecx &= ~bit(X86_FEATURE_X2APIC); + entry->ecx &= ~F(X2APIC); break; case 0x80000001: if (nested) @@ -6001,6 +6008,11 @@ static bool svm_has_wbinvd_exit(void) return true; } +static bool svm_pku_supported(void) +{ + return false; +} + #define PRE_EX(exit) { .exit_code = (exit), \ .stage = X86_ICPT_PRE_EXCEPT, } #define POST_EX(exit) { .exit_code = (exit), \ @@ -6186,9 +6198,12 @@ out: return ret; } -static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu) +static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion *exit_fastpath) { - + if (!is_guest_mode(vcpu) && + to_svm(vcpu)->vmcb->control.exit_code == EXIT_REASON_MSR_WRITE) + *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu); } static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) @@ -7341,6 +7356,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .xsaves_supported = svm_xsaves_supported, .umip_emulated = svm_umip_emulated, .pt_supported = svm_pt_supported, + .pku_supported = svm_pku_supported, .set_supported_cpuid = svm_set_supported_cpuid, diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 7aa69716d516..283bdb7071af 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -145,6 +145,11 @@ static inline bool vmx_umip_emulated(void) SECONDARY_EXEC_DESC; } +static inline bool vmx_pku_supported(void) +{ + return boot_cpu_has(X86_FEATURE_PKU); +} + static inline bool cpu_has_vmx_rdtscp(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index 72359709cdc1..89c3e0caf39f 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -350,17 +350,12 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu, uint16_t *vmcs_version) { struct vcpu_vmx *vmx = to_vmx(vcpu); - bool evmcs_already_enabled = vmx->nested.enlightened_vmcs_enabled; vmx->nested.enlightened_vmcs_enabled = true; if (vmcs_version) *vmcs_version = nested_get_evmcs_version(vcpu); - /* We don't support disabling the feature for simplicity. */ - if (evmcs_already_enabled) - return 0; - vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL; vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 4aea7d304beb..2db21d59eaf5 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -28,16 +28,6 @@ module_param(nested_early_check, bool, S_IRUGO); failed; \ }) -#define SET_MSR_OR_WARN(vcpu, idx, data) \ -({ \ - bool failed = kvm_set_msr(vcpu, idx, data); \ - if (failed) \ - pr_warn_ratelimited( \ - "%s cannot write MSR (0x%x, 0x%llx)\n", \ - __func__, idx, data); \ - failed; \ -}) - /* * Hyper-V requires all of these, so mark them as supported even though * they are just treated the same as all-context. @@ -2172,8 +2162,8 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) * EXEC CONTROLS */ exec_control = vmx_exec_control(vmx); /* L0's desires */ - exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; - exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; + exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING; + exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING; exec_control &= ~CPU_BASED_TPR_SHADOW; exec_control |= vmcs12->cpu_based_vm_exec_control; @@ -2550,8 +2540,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) && - SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, - vmcs12->guest_ia32_perf_global_ctrl)) + WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, + vmcs12->guest_ia32_perf_global_ctrl))) return -EINVAL; kvm_rsp_write(vcpu, vmcs12->guest_rsp); @@ -2566,7 +2556,7 @@ static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12) return -EINVAL; if (CC(!nested_cpu_has_virtual_nmis(vmcs12) && - nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING))) + nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING))) return -EINVAL; return 0; @@ -2823,7 +2813,6 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, CC(vmcs12->host_ss_selector == 0 && !ia32e)) return -EINVAL; -#ifdef CONFIG_X86_64 if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) || CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) || CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) || @@ -2831,7 +2820,6 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) || CC(is_noncanonical_address(vmcs12->host_rip, vcpu))) return -EINVAL; -#endif /* * If the load IA32_EFER VM-exit control is 1, bits reserved in the @@ -2899,6 +2887,10 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))) return -EINVAL; + if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) && + CC(!kvm_dr7_valid(vmcs12->guest_dr7))) + return -EINVAL; + if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) && CC(!kvm_pat_valid(vmcs12->guest_ia32_pat))) return -EINVAL; @@ -3048,9 +3040,6 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) return 0; } -static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12); - static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); @@ -3183,7 +3172,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, u32 exit_qual; evaluate_pending_interrupts = exec_controls_get(vmx) & - (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING); + (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING); if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); @@ -3230,7 +3219,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, } enter_guest_mode(vcpu); - if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) + if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) vcpu->arch.tsc_offset += vmcs12->tsc_offset; if (prepare_vmcs02(vcpu, vmcs12, &exit_qual)) @@ -3294,7 +3283,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, * 26.7 "VM-entry failures during or after loading guest state". */ vmentry_fail_vmexit_guest_mode: - if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) + if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) vcpu->arch.tsc_offset -= vmcs12->tsc_offset; leave_guest_mode(vcpu); @@ -3407,8 +3396,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) */ if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) && !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) && - !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_VIRTUAL_NMI_PENDING) && - !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_VIRTUAL_INTR_PENDING) && + !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_NMI_WINDOW_EXITING) && + !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_INTR_WINDOW_EXITING) && (vmcs12->guest_rflags & X86_EFLAGS_IF))) { vmx->nested.nested_run_pending = 0; return kvm_vcpu_halt(vcpu); @@ -3427,7 +3416,7 @@ vmentry_failed: /* * On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date - * because L2 may have changed some cr0 bits directly (CRO_GUEST_HOST_MASK). + * because L2 may have changed some cr0 bits directly (CR0_GUEST_HOST_MASK). * This function returns the new value we should put in vmcs12.guest_cr0. * It's not enough to just return the vmcs02 GUEST_CR0. Rather, * 1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now @@ -3999,8 +3988,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vcpu->arch.pat = vmcs12->host_ia32_pat; } if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) - SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, - vmcs12->host_ia32_perf_global_ctrl); + WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, + vmcs12->host_ia32_perf_global_ctrl)); /* Set L1 segment info according to Intel SDM 27.5.2 Loading Host Segment and Descriptor-Table Registers */ @@ -4209,7 +4198,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, if (nested_cpu_has_preemption_timer(vmcs12)) hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer); - if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) + if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) vcpu->arch.tsc_offset -= vmcs12->tsc_offset; if (likely(!vmx->fail)) { @@ -4588,8 +4577,8 @@ static int handle_vmon(struct kvm_vcpu *vcpu) gpa_t vmptr; uint32_t revision; struct vcpu_vmx *vmx = to_vmx(vcpu); - const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED - | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + const u64 VMXON_NEEDED_FEATURES = FEAT_CTL_LOCKED + | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; /* * The Intel VMX Instruction Reference lists a bunch of bits that are @@ -4751,36 +4740,32 @@ static int handle_vmresume(struct kvm_vcpu *vcpu) static int handle_vmread(struct kvm_vcpu *vcpu) { - unsigned long field; - u64 field_value; + struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu) + : get_vmcs12(vcpu); unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - int len; - gva_t gva = 0; - struct vmcs12 *vmcs12; + u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); + struct vcpu_vmx *vmx = to_vmx(vcpu); struct x86_exception e; + unsigned long field; + u64 value; + gva_t gva = 0; short offset; + int len; if (!nested_vmx_check_permission(vcpu)) return 1; - if (to_vmx(vcpu)->nested.current_vmptr == -1ull) + /* + * In VMX non-root operation, when the VMCS-link pointer is -1ull, + * any VMREAD sets the ALU flags for VMfailInvalid. + */ + if (vmx->nested.current_vmptr == -1ull || + (is_guest_mode(vcpu) && + get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)) return nested_vmx_failInvalid(vcpu); - if (!is_guest_mode(vcpu)) - vmcs12 = get_vmcs12(vcpu); - else { - /* - * When vmcs->vmcs_link_pointer is -1ull, any VMREAD - * to shadowed-field sets the ALU flags for VMfailInvalid. - */ - if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) - return nested_vmx_failInvalid(vcpu); - vmcs12 = get_shadow_vmcs12(vcpu); - } - /* Decode instruction info and find the field to read */ - field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); + field = kvm_register_readl(vcpu, (((instr_info) >> 28) & 0xf)); offset = vmcs_field_to_offset(field); if (offset < 0) @@ -4790,25 +4775,26 @@ static int handle_vmread(struct kvm_vcpu *vcpu) if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field)) copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); - /* Read the field, zero-extended to a u64 field_value */ - field_value = vmcs12_read_any(vmcs12, field, offset); + /* Read the field, zero-extended to a u64 value */ + value = vmcs12_read_any(vmcs12, field, offset); /* * Now copy part of this value to register or memory, as requested. * Note that the number of bits actually copied is 32 or 64 depending * on the guest's mode (32 or 64 bit), not on the given field's length. */ - if (vmx_instruction_info & (1u << 10)) { - kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf), - field_value); + if (instr_info & BIT(10)) { + kvm_register_writel(vcpu, (((instr_info) >> 3) & 0xf), value); } else { len = is_64_bit_mode(vcpu) ? 8 : 4; if (get_vmx_mem_address(vcpu, exit_qualification, - vmx_instruction_info, true, len, &gva)) + instr_info, true, len, &gva)) return 1; /* _system ok, nested_vmx_check_permission has verified cpl=0 */ - if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e)) + if (kvm_write_guest_virt_system(vcpu, gva, &value, len, &e)) { kvm_inject_page_fault(vcpu, &e); + return 1; + } } return nested_vmx_succeed(vcpu); @@ -4840,46 +4826,58 @@ static bool is_shadow_field_ro(unsigned long field) static int handle_vmwrite(struct kvm_vcpu *vcpu) { + struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu) + : get_vmcs12(vcpu); + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct x86_exception e; unsigned long field; - int len; + short offset; gva_t gva; - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); + int len; - /* The value to write might be 32 or 64 bits, depending on L1's long + /* + * The value to write might be 32 or 64 bits, depending on L1's long * mode, and eventually we need to write that into a field of several * possible lengths. The code below first zero-extends the value to 64 - * bit (field_value), and then copies only the appropriate number of + * bit (value), and then copies only the appropriate number of * bits into the vmcs12 field. */ - u64 field_value = 0; - struct x86_exception e; - struct vmcs12 *vmcs12; - short offset; + u64 value = 0; if (!nested_vmx_check_permission(vcpu)) return 1; - if (vmx->nested.current_vmptr == -1ull) + /* + * In VMX non-root operation, when the VMCS-link pointer is -1ull, + * any VMWRITE sets the ALU flags for VMfailInvalid. + */ + if (vmx->nested.current_vmptr == -1ull || + (is_guest_mode(vcpu) && + get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)) return nested_vmx_failInvalid(vcpu); - if (vmx_instruction_info & (1u << 10)) - field_value = kvm_register_readl(vcpu, - (((vmx_instruction_info) >> 3) & 0xf)); + if (instr_info & BIT(10)) + value = kvm_register_readl(vcpu, (((instr_info) >> 3) & 0xf)); else { len = is_64_bit_mode(vcpu) ? 8 : 4; if (get_vmx_mem_address(vcpu, exit_qualification, - vmx_instruction_info, false, len, &gva)) + instr_info, false, len, &gva)) return 1; - if (kvm_read_guest_virt(vcpu, gva, &field_value, len, &e)) { + if (kvm_read_guest_virt(vcpu, gva, &value, len, &e)) { kvm_inject_page_fault(vcpu, &e); return 1; } } + field = kvm_register_readl(vcpu, (((instr_info) >> 28) & 0xf)); + + offset = vmcs_field_to_offset(field); + if (offset < 0) + return nested_vmx_failValid(vcpu, + VMXERR_UNSUPPORTED_VMCS_COMPONENT); - field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); /* * If the vCPU supports "VMWRITE to any supported field in the * VMCS," then the "read-only" fields are actually read/write. @@ -4889,29 +4887,12 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) return nested_vmx_failValid(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); - if (!is_guest_mode(vcpu)) { - vmcs12 = get_vmcs12(vcpu); - - /* - * Ensure vmcs12 is up-to-date before any VMWRITE that dirties - * vmcs12, else we may crush a field or consume a stale value. - */ - if (!is_shadow_field_rw(field)) - copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); - } else { - /* - * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE - * to shadowed-field sets the ALU flags for VMfailInvalid. - */ - if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) - return nested_vmx_failInvalid(vcpu); - vmcs12 = get_shadow_vmcs12(vcpu); - } - - offset = vmcs_field_to_offset(field); - if (offset < 0) - return nested_vmx_failValid(vcpu, - VMXERR_UNSUPPORTED_VMCS_COMPONENT); + /* + * Ensure vmcs12 is up-to-date before any VMWRITE that dirties + * vmcs12, else we may crush a field or consume a stale value. + */ + if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) + copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); /* * Some Intel CPUs intentionally drop the reserved bits of the AR byte @@ -4922,9 +4903,9 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) * the stripped down value, L2 sees the full value as stored by KVM). */ if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES) - field_value &= 0x1f0ff; + value &= 0x1f0ff; - vmcs12_write_any(vmcs12, field, offset, field_value); + vmcs12_write_any(vmcs12, field, offset, value); /* * Do not track vmcs12 dirty-state if in guest-mode as we actually @@ -4941,7 +4922,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) preempt_disable(); vmcs_load(vmx->vmcs01.shadow_vmcs); - __vmcs_writel(field, field_value); + __vmcs_writel(field, value); vmcs_clear(vmx->vmcs01.shadow_vmcs); vmcs_load(vmx->loaded_vmcs->vmcs); @@ -5524,10 +5505,10 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) return false; case EXIT_REASON_TRIPLE_FAULT: return true; - case EXIT_REASON_PENDING_INTERRUPT: - return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING); + case EXIT_REASON_INTERRUPT_WINDOW: + return nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING); case EXIT_REASON_NMI_WINDOW: - return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING); + return nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING); case EXIT_REASON_TASK_SWITCH: return true; case EXIT_REASON_CPUID: @@ -6015,8 +5996,8 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, msrs->procbased_ctls_low = CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; msrs->procbased_ctls_high &= - CPU_BASED_VIRTUAL_INTR_PENDING | - CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | + CPU_BASED_INTR_WINDOW_EXITING | + CPU_BASED_NMI_WINDOW_EXITING | CPU_BASED_USE_TSC_OFFSETTING | CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING | diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 7023138b1cb0..34a3a17bb6d7 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -86,10 +86,14 @@ static unsigned intel_find_arch_event(struct kvm_pmu *pmu, static unsigned intel_find_fixed_event(int idx) { - if (idx >= ARRAY_SIZE(fixed_pmc_events)) + u32 event; + size_t size = ARRAY_SIZE(fixed_pmc_events); + + if (idx >= size) return PERF_COUNT_HW_MAX; - return intel_arch_events[fixed_pmc_events[idx]].event_type; + event = fixed_pmc_events[array_index_nospec(idx, size)]; + return intel_arch_events[event].event_type; } /* check if a PMC is enabled by comparing it with globl_ctrl bits. */ @@ -130,16 +134,20 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); bool fixed = idx & (1u << 30); struct kvm_pmc *counters; + unsigned int num_counters; idx &= ~(3u << 30); - if (!fixed && idx >= pmu->nr_arch_gp_counters) - return NULL; - if (fixed && idx >= pmu->nr_arch_fixed_counters) + if (fixed) { + counters = pmu->fixed_counters; + num_counters = pmu->nr_arch_fixed_counters; + } else { + counters = pmu->gp_counters; + num_counters = pmu->nr_arch_gp_counters; + } + if (idx >= num_counters) return NULL; - counters = fixed ? pmu->fixed_counters : pmu->gp_counters; *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP]; - - return &counters[idx]; + return &counters[array_index_nospec(idx, num_counters)]; } static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) diff --git a/arch/x86/kvm/vmx/vmcs_shadow_fields.h b/arch/x86/kvm/vmx/vmcs_shadow_fields.h index eb1ecd16fd22..cad128d1657b 100644 --- a/arch/x86/kvm/vmx/vmcs_shadow_fields.h +++ b/arch/x86/kvm/vmx/vmcs_shadow_fields.h @@ -23,12 +23,12 @@ BUILD_BUG_ON(1) * * When adding or removing fields here, note that shadowed * fields must always be synced by prepare_vmcs02, not just - * prepare_vmcs02_full. + * prepare_vmcs02_rare. */ /* * Keeping the fields ordered by size is an attempt at improving - * branch prediction in vmcs_read_any and vmcs_write_any. + * branch prediction in vmcs12_read_any and vmcs12_write_any. */ /* 16-bits */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e3394c839dea..c475fa2aaae0 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1057,6 +1057,12 @@ static unsigned long segment_base(u16 selector) } #endif +static inline bool pt_can_write_msr(struct vcpu_vmx *vmx) +{ + return (pt_mode == PT_MODE_HOST_GUEST) && + !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN); +} + static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range) { u32 i; @@ -1716,7 +1722,7 @@ static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu) struct vmcs12 *vmcs12 = get_vmcs12(vcpu); if (is_guest_mode(vcpu) && - (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)) + (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)) return vcpu->arch.tsc_offset - vmcs12->tsc_offset; return vcpu->arch.tsc_offset; @@ -1734,7 +1740,7 @@ static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) * to the newly set TSC to get L2's TSC. */ if (is_guest_mode(vcpu) && - (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)) + (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)) g_tsc_offset = vmcs12->tsc_offset; trace_kvm_write_tsc_offset(vcpu->vcpu_id, @@ -1773,8 +1779,6 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr) default: return 1; } - - return 0; } /* @@ -1839,11 +1843,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_MCG_EXT_CTL: if (!msr_info->host_initiated && !(vmx->msr_ia32_feature_control & - FEATURE_CONTROL_LMCE)) + FEAT_CTL_LMCE_ENABLED)) return 1; msr_info->data = vcpu->arch.mcg_ext_ctl; break; - case MSR_IA32_FEATURE_CONTROL: + case MSR_IA32_FEAT_CTL: msr_info->data = vmx->msr_ia32_feature_control; break; case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: @@ -1916,7 +1920,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } /* - * Writes msr value into into the appropriate "register". + * Writes msr value into the appropriate "register". * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */ @@ -1994,12 +1998,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; - /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) + if (data & ~kvm_spec_ctrl_valid_bits(vcpu)) return 1; vmx->spec_ctrl = data; - if (!data) break; @@ -2010,7 +2012,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) * * For nested: * The handling of the MSR bitmap for L2 guests is done in - * nested_vmx_merge_msr_bitmap. We should not touch the + * nested_vmx_prepare_msr_bitmap. We should not touch the * vmcs02.msr_bitmap here since it gets completely overwritten * in the merging. We update the vmcs01 here for L1 as well * since it will end up touching the MSR anyway now. @@ -2033,7 +2035,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & ~PRED_CMD_IBPB) return 1; - + if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + return 1; if (!data) break; @@ -2046,7 +2049,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) * * For nested: * The handling of the MSR bitmap for L2 guests is done in - * nested_vmx_merge_msr_bitmap. We should not touch the + * nested_vmx_prepare_msr_bitmap. We should not touch the * vmcs02.msr_bitmap here since it gets completely overwritten * in the merging. */ @@ -2074,15 +2077,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_MCG_EXT_CTL: if ((!msr_info->host_initiated && !(to_vmx(vcpu)->msr_ia32_feature_control & - FEATURE_CONTROL_LMCE)) || + FEAT_CTL_LMCE_ENABLED)) || (data & ~MCG_EXT_CTL_LMCE_EN)) return 1; vcpu->arch.mcg_ext_ctl = data; break; - case MSR_IA32_FEATURE_CONTROL: + case MSR_IA32_FEAT_CTL: if (!vmx_feature_control_msr_valid(vcpu, data) || (to_vmx(vcpu)->msr_ia32_feature_control & - FEATURE_CONTROL_LOCKED && !msr_info->host_initiated)) + FEAT_CTL_LOCKED && !msr_info->host_initiated)) return 1; vmx->msr_ia32_feature_control = data; if (msr_info->host_initiated && data == 0) @@ -2104,47 +2107,50 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) pt_update_intercept_for_msr(vmx); break; case MSR_IA32_RTIT_STATUS: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (data & MSR_IA32_RTIT_STATUS_MASK)) + if (!pt_can_write_msr(vmx)) + return 1; + if (data & MSR_IA32_RTIT_STATUS_MASK) return 1; vmx->pt_desc.guest.status = data; break; case MSR_IA32_RTIT_CR3_MATCH: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_cr3_filtering)) + if (!pt_can_write_msr(vmx)) + return 1; + if (!intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_cr3_filtering)) return 1; vmx->pt_desc.guest.cr3_match = data; break; case MSR_IA32_RTIT_OUTPUT_BASE: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (!intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_topa_output) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output)) || - (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK)) + if (!pt_can_write_msr(vmx)) + return 1; + if (!intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_topa_output) && + !intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_single_range_output)) + return 1; + if (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK) return 1; vmx->pt_desc.guest.output_base = data; break; case MSR_IA32_RTIT_OUTPUT_MASK: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (!intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_topa_output) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output))) + if (!pt_can_write_msr(vmx)) + return 1; + if (!intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_topa_output) && + !intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_single_range_output)) return 1; vmx->pt_desc.guest.output_mask = data; break; case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: + if (!pt_can_write_msr(vmx)) + return 1; index = msr_info->index - MSR_IA32_RTIT_ADDR0_A; - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_num_address_ranges))) + if (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_num_address_ranges)) + return 1; + if (is_noncanonical_address(data, vcpu)) return 1; if (index % 2) vmx->pt_desc.guest.addr_b[index / 2] = data; @@ -2204,29 +2210,8 @@ static __init int cpu_has_kvm_support(void) static __init int vmx_disabled_by_bios(void) { - u64 msr; - - rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); - if (msr & FEATURE_CONTROL_LOCKED) { - /* launched w/ TXT and VMX disabled */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) - && tboot_enabled()) - return 1; - /* launched w/o TXT and VMX only enabled w/ TXT */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) - && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) - && !tboot_enabled()) { - printk(KERN_WARNING "kvm: disable TXT in the BIOS or " - "activate TXT before enabling KVM\n"); - return 1; - } - /* launched w/o TXT and VMX disabled */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) - && !tboot_enabled()) - return 1; - } - - return 0; + return !boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || + !boot_cpu_has(X86_FEATURE_VMX); } static void kvm_cpu_vmxon(u64 addr) @@ -2241,7 +2226,6 @@ static int hardware_enable(void) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); - u64 old, test_bits; if (cr4_read_shadow() & X86_CR4_VMXE) return -EBUSY; @@ -2269,17 +2253,6 @@ static int hardware_enable(void) */ crash_enable_local_vmclear(cpu); - rdmsrl(MSR_IA32_FEATURE_CONTROL, old); - - test_bits = FEATURE_CONTROL_LOCKED; - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; - if (tboot_enabled()) - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX; - - if ((old & test_bits) != test_bits) { - /* enable and lock */ - wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); - } kvm_cpu_vmxon(phys_addr); if (enable_ept) ept_sync_global(); @@ -2355,7 +2328,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, CPU_BASED_CR3_STORE_EXITING | CPU_BASED_UNCOND_IO_EXITING | CPU_BASED_MOV_DR_EXITING | - CPU_BASED_USE_TSC_OFFSETING | + CPU_BASED_USE_TSC_OFFSETTING | CPU_BASED_MWAIT_EXITING | CPU_BASED_MONITOR_EXITING | CPU_BASED_INVLPG_EXITING | @@ -2690,8 +2663,6 @@ static void enter_pmode(struct kvm_vcpu *vcpu) vmx->rmode.vm86_active = 0; - vmx_segment_cache_clear(vmx); - vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); flags = vmcs_readl(GUEST_RFLAGS); @@ -3477,7 +3448,7 @@ out: static int init_rmode_identity_map(struct kvm *kvm) { struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); - int i, idx, r = 0; + int i, r = 0; kvm_pfn_t identity_map_pfn; u32 tmp; @@ -3485,7 +3456,7 @@ static int init_rmode_identity_map(struct kvm *kvm) mutex_lock(&kvm->slots_lock); if (likely(kvm_vmx->ept_identity_pagetable_done)) - goto out2; + goto out; if (!kvm_vmx->ept_identity_map_addr) kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; @@ -3494,9 +3465,8 @@ static int init_rmode_identity_map(struct kvm *kvm) r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, kvm_vmx->ept_identity_map_addr, PAGE_SIZE); if (r < 0) - goto out2; + goto out; - idx = srcu_read_lock(&kvm->srcu); r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); if (r < 0) goto out; @@ -3512,9 +3482,6 @@ static int init_rmode_identity_map(struct kvm *kvm) kvm_vmx->ept_identity_pagetable_done = true; out: - srcu_read_unlock(&kvm->srcu, idx); - -out2: mutex_unlock(&kvm->slots_lock); return r; } @@ -4042,6 +4009,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) if (vmx_xsaves_supported()) { /* Exposing XSAVES only when XSAVE is exposed */ bool xsaves_enabled = + boot_cpu_has(X86_FEATURE_XSAVE) && guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && guest_cpuid_has(vcpu, X86_FEATURE_XSAVES); @@ -4352,7 +4320,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) static void enable_irq_window(struct kvm_vcpu *vcpu) { - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING); + exec_controls_setbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); } static void enable_nmi_window(struct kvm_vcpu *vcpu) @@ -4363,7 +4331,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) return; } - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING); + exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); } static void vmx_inject_irq(struct kvm_vcpu *vcpu) @@ -4488,8 +4456,11 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) if (enable_unrestricted_guest) return 0; - ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr, - PAGE_SIZE * 3); + mutex_lock(&kvm->slots_lock); + ret = __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr, + PAGE_SIZE * 3); + mutex_unlock(&kvm->slots_lock); + if (ret) return ret; to_kvm_vmx(kvm)->tss_addr = addr; @@ -4971,7 +4942,7 @@ static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) static int handle_interrupt_window(struct kvm_vcpu *vcpu) { - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING); + exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -5184,7 +5155,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) static int handle_nmi_window(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(!enable_vnmi); - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING); + exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); ++vcpu->stat.nmi_window_exits; kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -5205,7 +5176,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending); intr_window_requested = exec_controls_get(vmx) & - CPU_BASED_VIRTUAL_INTR_PENDING; + CPU_BASED_INTR_WINDOW_EXITING; while (vmx->emulation_required && count-- != 0) { if (intr_window_requested && vmx_interrupt_allowed(vcpu)) @@ -5529,7 +5500,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_CPUID] = kvm_emulate_cpuid, [EXIT_REASON_MSR_READ] = kvm_emulate_rdmsr, [EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr, - [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, + [EXIT_REASON_INTERRUPT_WINDOW] = handle_interrupt_window, [EXIT_REASON_HLT] = kvm_emulate_halt, [EXIT_REASON_INVD] = handle_invd, [EXIT_REASON_INVLPG] = handle_invlpg, @@ -5816,7 +5787,8 @@ void dump_vmcs(void) * The guest has exited. See if we can fix it or if we need userspace * assistance. */ -static int vmx_handle_exit(struct kvm_vcpu *vcpu) +static int vmx_handle_exit(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion exit_fastpath) { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 exit_reason = vmx->exit_reason; @@ -5902,34 +5874,44 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) } } - if (exit_reason < kvm_vmx_max_exit_handlers - && kvm_vmx_exit_handlers[exit_reason]) { + if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) { + kvm_skip_emulated_instruction(vcpu); + return 1; + } + + if (exit_reason >= kvm_vmx_max_exit_handlers) + goto unexpected_vmexit; #ifdef CONFIG_RETPOLINE - if (exit_reason == EXIT_REASON_MSR_WRITE) - return kvm_emulate_wrmsr(vcpu); - else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER) - return handle_preemption_timer(vcpu); - else if (exit_reason == EXIT_REASON_PENDING_INTERRUPT) - return handle_interrupt_window(vcpu); - else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) - return handle_external_interrupt(vcpu); - else if (exit_reason == EXIT_REASON_HLT) - return kvm_emulate_halt(vcpu); - else if (exit_reason == EXIT_REASON_EPT_MISCONFIG) - return handle_ept_misconfig(vcpu); + if (exit_reason == EXIT_REASON_MSR_WRITE) + return kvm_emulate_wrmsr(vcpu); + else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER) + return handle_preemption_timer(vcpu); + else if (exit_reason == EXIT_REASON_INTERRUPT_WINDOW) + return handle_interrupt_window(vcpu); + else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) + return handle_external_interrupt(vcpu); + else if (exit_reason == EXIT_REASON_HLT) + return kvm_emulate_halt(vcpu); + else if (exit_reason == EXIT_REASON_EPT_MISCONFIG) + return handle_ept_misconfig(vcpu); #endif - return kvm_vmx_exit_handlers[exit_reason](vcpu); - } else { - vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", - exit_reason); - dump_vmcs(); - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = + + exit_reason = array_index_nospec(exit_reason, + kvm_vmx_max_exit_handlers); + if (!kvm_vmx_exit_handlers[exit_reason]) + goto unexpected_vmexit; + + return kvm_vmx_exit_handlers[exit_reason](vcpu); + +unexpected_vmexit: + vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", exit_reason); + dump_vmcs(); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; - vcpu->run->internal.ndata = 1; - vcpu->run->internal.data[0] = exit_reason; - return 0; - } + vcpu->run->internal.ndata = 1; + vcpu->run->internal.data[0] = exit_reason; + return 0; } /* @@ -6250,7 +6232,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) } STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff); -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) +static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion *exit_fastpath) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6258,6 +6241,9 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) handle_external_interrupt_irqoff(vcpu); else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI) handle_exception_nmi_irqoff(vmx); + else if (!is_guest_mode(vcpu) && + vmx->exit_reason == EXIT_REASON_MSR_WRITE) + *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu); } static bool vmx_has_emulated_msr(int index) @@ -6666,60 +6652,31 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) free_vpid(vmx->vpid); nested_vmx_free_vcpu(vcpu); free_loaded_vmcs(vmx->loaded_vmcs); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); - kmem_cache_free(kvm_vcpu_cache, vmx); } -static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) +static int vmx_create_vcpu(struct kvm_vcpu *vcpu) { - int err; struct vcpu_vmx *vmx; unsigned long *msr_bitmap; - int i, cpu; - - BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0, - "struct kvm_vcpu must be at offset 0 for arch usercopy region"); - - vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); - if (!vmx) - return ERR_PTR(-ENOMEM); + int i, cpu, err; - vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, - GFP_KERNEL_ACCOUNT); - if (!vmx->vcpu.arch.user_fpu) { - printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n"); - err = -ENOMEM; - goto free_partial_vcpu; - } + BUILD_BUG_ON(offsetof(struct vcpu_vmx, vcpu) != 0); + vmx = to_vmx(vcpu); - vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, - GFP_KERNEL_ACCOUNT); - if (!vmx->vcpu.arch.guest_fpu) { - printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); - err = -ENOMEM; - goto free_user_fpu; - } + err = -ENOMEM; vmx->vpid = allocate_vpid(); - err = kvm_vcpu_init(&vmx->vcpu, kvm, id); - if (err) - goto free_vcpu; - - err = -ENOMEM; - /* * If PML is turned on, failure on enabling PML just results in failure * of creating the vcpu, therefore we can simplify PML logic (by * avoiding dealing with cases, such as enabling PML partially on vcpus - * for the guest, etc. + * for the guest), etc. */ if (enable_pml) { vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!vmx->pml_pg) - goto uninit_vcpu; + goto free_vpid; } BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) != NR_SHARED_MSRS); @@ -6764,7 +6721,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); - if (kvm_cstate_in_guest(kvm)) { + if (kvm_cstate_in_guest(vcpu->kvm)) { vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R); vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R); vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R); @@ -6774,19 +6731,19 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->loaded_vmcs = &vmx->vmcs01; cpu = get_cpu(); - vmx_vcpu_load(&vmx->vcpu, cpu); - vmx->vcpu.cpu = cpu; + vmx_vcpu_load(vcpu, cpu); + vcpu->cpu = cpu; init_vmcs(vmx); - vmx_vcpu_put(&vmx->vcpu); + vmx_vcpu_put(vcpu); put_cpu(); - if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { - err = alloc_apic_access_page(kvm); + if (cpu_need_virtualize_apic_accesses(vcpu)) { + err = alloc_apic_access_page(vcpu->kvm); if (err) goto free_vmcs; } if (enable_ept && !enable_unrestricted_guest) { - err = init_rmode_identity_map(kvm); + err = init_rmode_identity_map(vcpu->kvm); if (err) goto free_vmcs; } @@ -6794,14 +6751,14 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (nested) nested_vmx_setup_ctls_msrs(&vmx->nested.msrs, vmx_capability.ept, - kvm_vcpu_apicv_active(&vmx->vcpu)); + kvm_vcpu_apicv_active(vcpu)); else memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs)); vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; - vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; + vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED; /* * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR @@ -6812,22 +6769,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->ept_pointer = INVALID_PAGE; - return &vmx->vcpu; + return 0; free_vmcs: free_loaded_vmcs(vmx->loaded_vmcs); free_pml: vmx_destroy_pml_buffer(vmx); -uninit_vcpu: - kvm_vcpu_uninit(&vmx->vcpu); -free_vcpu: +free_vpid: free_vpid(vmx->vpid); - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); -free_user_fpu: - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); -free_partial_vcpu: - kmem_cache_free(kvm_vcpu_cache, vmx); - return ERR_PTR(err); + return err; } #define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" @@ -6871,6 +6821,12 @@ static int __init vmx_check_processor_compat(void) struct vmcs_config vmcs_conf; struct vmx_capability vmx_cap; + if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || + !this_cpu_has(X86_FEATURE_VMX)) { + pr_err("kvm: VMX is disabled on CPU %d\n", smp_processor_id()); + return -EIO; + } + if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) return -EIO; if (nested) @@ -6973,28 +6929,28 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) } while (0) entry = kvm_find_cpuid_entry(vcpu, 0x1, 0); - cr4_fixed1_update(X86_CR4_VME, edx, bit(X86_FEATURE_VME)); - cr4_fixed1_update(X86_CR4_PVI, edx, bit(X86_FEATURE_VME)); - cr4_fixed1_update(X86_CR4_TSD, edx, bit(X86_FEATURE_TSC)); - cr4_fixed1_update(X86_CR4_DE, edx, bit(X86_FEATURE_DE)); - cr4_fixed1_update(X86_CR4_PSE, edx, bit(X86_FEATURE_PSE)); - cr4_fixed1_update(X86_CR4_PAE, edx, bit(X86_FEATURE_PAE)); - cr4_fixed1_update(X86_CR4_MCE, edx, bit(X86_FEATURE_MCE)); - cr4_fixed1_update(X86_CR4_PGE, edx, bit(X86_FEATURE_PGE)); - cr4_fixed1_update(X86_CR4_OSFXSR, edx, bit(X86_FEATURE_FXSR)); - cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM)); - cr4_fixed1_update(X86_CR4_VMXE, ecx, bit(X86_FEATURE_VMX)); - cr4_fixed1_update(X86_CR4_SMXE, ecx, bit(X86_FEATURE_SMX)); - cr4_fixed1_update(X86_CR4_PCIDE, ecx, bit(X86_FEATURE_PCID)); - cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, bit(X86_FEATURE_XSAVE)); + cr4_fixed1_update(X86_CR4_VME, edx, feature_bit(VME)); + cr4_fixed1_update(X86_CR4_PVI, edx, feature_bit(VME)); + cr4_fixed1_update(X86_CR4_TSD, edx, feature_bit(TSC)); + cr4_fixed1_update(X86_CR4_DE, edx, feature_bit(DE)); + cr4_fixed1_update(X86_CR4_PSE, edx, feature_bit(PSE)); + cr4_fixed1_update(X86_CR4_PAE, edx, feature_bit(PAE)); + cr4_fixed1_update(X86_CR4_MCE, edx, feature_bit(MCE)); + cr4_fixed1_update(X86_CR4_PGE, edx, feature_bit(PGE)); + cr4_fixed1_update(X86_CR4_OSFXSR, edx, feature_bit(FXSR)); + cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, feature_bit(XMM)); + cr4_fixed1_update(X86_CR4_VMXE, ecx, feature_bit(VMX)); + cr4_fixed1_update(X86_CR4_SMXE, ecx, feature_bit(SMX)); + cr4_fixed1_update(X86_CR4_PCIDE, ecx, feature_bit(PCID)); + cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, feature_bit(XSAVE)); entry = kvm_find_cpuid_entry(vcpu, 0x7, 0); - cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, bit(X86_FEATURE_FSGSBASE)); - cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP)); - cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP)); - cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU)); - cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP)); - cr4_fixed1_update(X86_CR4_LA57, ecx, bit(X86_FEATURE_LA57)); + cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, feature_bit(FSGSBASE)); + cr4_fixed1_update(X86_CR4_SMEP, ebx, feature_bit(SMEP)); + cr4_fixed1_update(X86_CR4_SMAP, ebx, feature_bit(SMAP)); + cr4_fixed1_update(X86_CR4_PKE, ecx, feature_bit(PKU)); + cr4_fixed1_update(X86_CR4_UMIP, ecx, feature_bit(UMIP)); + cr4_fixed1_update(X86_CR4_LA57, ecx, feature_bit(LA57)); #undef cr4_fixed1_update } @@ -7099,12 +7055,12 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) if (nested_vmx_allowed(vcpu)) to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= - FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + FEAT_CTL_VMX_ENABLED_INSIDE_SMX | + FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; else to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= - ~(FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX); + ~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX | + FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX); if (nested_vmx_allowed(vcpu)) { nested_vmx_cr_fixed1_bits_update(vcpu); @@ -7128,7 +7084,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) { if (func == 1 && nested) - entry->ecx |= bit(X86_FEATURE_VMX); + entry->ecx |= feature_bit(VMX); } static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) @@ -7523,10 +7479,10 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu) { if (vcpu->arch.mcg_cap & MCG_LMCE_P) to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= - FEATURE_CONTROL_LMCE; + FEAT_CTL_LMCE_ENABLED; else to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= - ~FEATURE_CONTROL_LMCE; + ~FEAT_CTL_LMCE_ENABLED; } static int vmx_smi_allowed(struct kvm_vcpu *vcpu) @@ -7870,6 +7826,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .xsaves_supported = vmx_xsaves_supported, .umip_emulated = vmx_umip_emulated, .pt_supported = vmx_pt_supported, + .pku_supported = vmx_pku_supported, .request_immediate_exit = vmx_request_immediate_exit, diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index a4f7f737c5d4..7f42cf3dcd70 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -289,7 +289,7 @@ struct vcpu_vmx { /* * Only bits masked by msr_ia32_feature_control_valid_bits can be set in - * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included + * msr_ia32_feature_control. FEAT_CTL_LOCKED is always included * in msr_ia32_feature_control_valid_bits. */ u64 msr_ia32_feature_control; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cf917139de6b..2d3be7f3ad67 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -93,6 +93,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); #endif +static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS; + #define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ #define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ @@ -879,30 +881,44 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) } EXPORT_SYMBOL_GPL(kvm_set_xcr); -static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) -{ - if (cr4 & CR4_RESERVED_BITS) - return -EINVAL; +#define __cr4_reserved_bits(__cpu_has, __c) \ +({ \ + u64 __reserved_bits = CR4_RESERVED_BITS; \ + \ + if (!__cpu_has(__c, X86_FEATURE_XSAVE)) \ + __reserved_bits |= X86_CR4_OSXSAVE; \ + if (!__cpu_has(__c, X86_FEATURE_SMEP)) \ + __reserved_bits |= X86_CR4_SMEP; \ + if (!__cpu_has(__c, X86_FEATURE_SMAP)) \ + __reserved_bits |= X86_CR4_SMAP; \ + if (!__cpu_has(__c, X86_FEATURE_FSGSBASE)) \ + __reserved_bits |= X86_CR4_FSGSBASE; \ + if (!__cpu_has(__c, X86_FEATURE_PKU)) \ + __reserved_bits |= X86_CR4_PKE; \ + if (!__cpu_has(__c, X86_FEATURE_LA57)) \ + __reserved_bits |= X86_CR4_LA57; \ + __reserved_bits; \ +}) - if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE)) - return -EINVAL; - - if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP)) - return -EINVAL; +static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c) +{ + u64 reserved_bits = __cr4_reserved_bits(cpu_has, c); - if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP)) - return -EINVAL; + if (cpuid_ecx(0x7) & feature_bit(LA57)) + reserved_bits &= ~X86_CR4_LA57; - if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE)) - return -EINVAL; + if (kvm_x86_ops->umip_emulated()) + reserved_bits &= ~X86_CR4_UMIP; - if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE)) - return -EINVAL; + return reserved_bits; +} - if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57)) +static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +{ + if (cr4 & cr4_reserved_bits) return -EINVAL; - if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP)) + if (cr4 & __cr4_reserved_bits(guest_cpuid_has, vcpu)) return -EINVAL; return 0; @@ -1047,9 +1063,11 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) { + size_t size = ARRAY_SIZE(vcpu->arch.db); + switch (dr) { case 0 ... 3: - vcpu->arch.db[dr] = val; + vcpu->arch.db[array_index_nospec(dr, size)] = val; if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) vcpu->arch.eff_db[dr] = val; break; @@ -1064,7 +1082,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) case 5: /* fall through */ default: /* 7 */ - if (val & 0xffffffff00000000ULL) + if (!kvm_dr7_valid(val)) return -1; /* #GP */ vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; kvm_update_dr7(vcpu); @@ -1086,9 +1104,11 @@ EXPORT_SYMBOL_GPL(kvm_set_dr); int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) { + size_t size = ARRAY_SIZE(vcpu->arch.db); + switch (dr) { case 0 ... 3: - *val = vcpu->arch.db[dr]; + *val = vcpu->arch.db[array_index_nospec(dr, size)]; break; case 4: /* fall through */ @@ -1142,7 +1162,7 @@ static const u32 msrs_to_save_all[] = { MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, - MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, + MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, MSR_IA32_SPEC_CTRL, MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH, MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK, @@ -1212,6 +1232,7 @@ static const u32 emulated_msrs_all[] = { MSR_MISC_FEATURES_ENABLES, MSR_AMD64_VIRT_SPEC_CTRL, MSR_IA32_POWER_CTL, + MSR_IA32_UCODE_REV, /* * The following list leaves out MSRs whose values are determined @@ -1526,6 +1547,49 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); /* + * The fast path for frequent and performance sensitive wrmsr emulation, + * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces + * the latency of virtual IPI by avoiding the expensive bits of transitioning + * from guest to host, e.g. reacquiring KVM's SRCU lock. In contrast to the + * other cases which must be called after interrupts are enabled on the host. + */ +static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data) +{ + if (lapic_in_kernel(vcpu) && apic_x2apic_mode(vcpu->arch.apic) && + ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) && + ((data & APIC_MODE_MASK) == APIC_DM_FIXED)) { + + kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32)); + return kvm_lapic_reg_write(vcpu->arch.apic, APIC_ICR, (u32)data); + } + + return 1; +} + +enum exit_fastpath_completion handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) +{ + u32 msr = kvm_rcx_read(vcpu); + u64 data = kvm_read_edx_eax(vcpu); + int ret = 0; + + switch (msr) { + case APIC_BASE_MSR + (APIC_ICR >> 4): + ret = handle_fastpath_set_x2apic_icr_irqoff(vcpu, data); + break; + default: + return EXIT_FASTPATH_NONE; + } + + if (!ret) { + trace_kvm_msr_write(msr, data); + return EXIT_FASTPATH_SKIP_EMUL_INS; + } + + return EXIT_FASTPATH_NONE; +} +EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff); + +/* * Adapt set_msr() to msr_io()'s calling convention */ static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) @@ -2485,7 +2549,10 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) default: if (msr >= MSR_IA32_MC0_CTL && msr < MSR_IA32_MCx_CTL(bank_num)) { - u32 offset = msr - MSR_IA32_MC0_CTL; + u32 offset = array_index_nospec( + msr - MSR_IA32_MC0_CTL, + MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL); + /* only 0 or all 1s can be written to IA32_MCi_CTL * some Linux kernels though clear bit 10 in bank 4 to * workaround a BIOS/GART TBL issue on AMD K8s, ignore @@ -2581,45 +2648,47 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) static void record_steal_time(struct kvm_vcpu *vcpu) { + struct kvm_host_map map; + struct kvm_steal_time *st; + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; - if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) + /* -EAGAIN is returned in atomic context so we can just return. */ + if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, + &map, &vcpu->arch.st.cache, false)) return; + st = map.hva + + offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); + /* * Doing a TLB flush here, on the guest's behalf, can avoid * expensive IPIs. */ trace_kvm_pv_tlb_flush(vcpu->vcpu_id, - vcpu->arch.st.steal.preempted & KVM_VCPU_FLUSH_TLB); - if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB) + st->preempted & KVM_VCPU_FLUSH_TLB); + if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB) kvm_vcpu_flush_tlb(vcpu, false); - if (vcpu->arch.st.steal.version & 1) - vcpu->arch.st.steal.version += 1; /* first time write, random junk */ + vcpu->arch.st.preempted = 0; - vcpu->arch.st.steal.version += 1; + if (st->version & 1) + st->version += 1; /* first time write, random junk */ - kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); + st->version += 1; smp_wmb(); - vcpu->arch.st.steal.steal += current->sched_info.run_delay - + st->steal += current->sched_info.run_delay - vcpu->arch.st.last_steal; vcpu->arch.st.last_steal = current->sched_info.run_delay; - kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); - smp_wmb(); - vcpu->arch.st.steal.version += 1; + st->version += 1; - kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); + kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false); } int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -2786,11 +2855,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & KVM_STEAL_RESERVED_MASK) return 1; - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime, - data & KVM_STEAL_VALID_BITS, - sizeof(struct kvm_steal_time))) - return 1; - vcpu->arch.st.msr_val = data; if (!(data & KVM_MSR_ENABLED)) @@ -2926,7 +2990,10 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) default: if (msr >= MSR_IA32_MC0_CTL && msr < MSR_IA32_MCx_CTL(bank_num)) { - u32 offset = msr - MSR_IA32_MC0_CTL; + u32 offset = array_index_nospec( + msr - MSR_IA32_MC0_CTL, + MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL); + data = vcpu->arch.mce_banks[offset]; break; } @@ -3458,10 +3525,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_x86_ops->vcpu_load(vcpu, cpu); - fpregs_assert_state_consistent(); - if (test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_return(); - /* Apply any externally detected TSC adjustments (due to suspend) */ if (unlikely(vcpu->arch.tsc_offset_adjustment)) { adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment); @@ -3501,15 +3564,25 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) { + struct kvm_host_map map; + struct kvm_steal_time *st; + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; - vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED; + if (vcpu->arch.st.preempted) + return; + + if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map, + &vcpu->arch.st.cache, true)) + return; + + st = map.hva + + offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); - kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal.preempted, - offsetof(struct kvm_steal_time, preempted), - sizeof(vcpu->arch.st.steal.preempted)); + st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; + + kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -4660,9 +4733,6 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, { struct kvm_pit *pit = kvm->arch.vpit; - if (!pit) - return -ENXIO; - /* pit->pit_state.lock was overloaded to prevent userspace from getting * an inconsistent state after running multiple KVM_REINJECT_CONTROL * ioctls in parallel. Use a separate lock if that ioctl isn't rare. @@ -5029,6 +5099,9 @@ set_identity_unlock: r = -EFAULT; if (copy_from_user(&control, argp, sizeof(control))) goto out; + r = -ENXIO; + if (!kvm->arch.vpit) + goto out; r = kvm_vm_ioctl_reinject(kvm, &control); break; } @@ -6186,6 +6259,21 @@ static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit); } +static bool emulator_guest_has_long_mode(struct x86_emulate_ctxt *ctxt) +{ + return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_LM); +} + +static bool emulator_guest_has_movbe(struct x86_emulate_ctxt *ctxt) +{ + return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE); +} + +static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt) +{ + return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR); +} + static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) { return kvm_register_read(emul_to_vcpu(ctxt), reg); @@ -6263,6 +6351,9 @@ static const struct x86_emulate_ops emulate_ops = { .fix_hypercall = emulator_fix_hypercall, .intercept = emulator_intercept, .get_cpuid = emulator_get_cpuid, + .guest_has_long_mode = emulator_guest_has_long_mode, + .guest_has_movbe = emulator_guest_has_movbe, + .guest_has_fxsr = emulator_guest_has_fxsr, .set_nmi_mask = emulator_set_nmi_mask, .get_hflags = emulator_get_hflags, .set_hflags = emulator_set_hflags, @@ -6379,11 +6470,11 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) return 1; } -static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, +static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, bool write_fault_to_shadow_pgtable, int emulation_type) { - gpa_t gpa = cr2; + gpa_t gpa = cr2_or_gpa; kvm_pfn_t pfn; if (!(emulation_type & EMULTYPE_ALLOW_RETRY)) @@ -6397,7 +6488,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, * Write permission should be allowed since only * write access need to be emulated. */ - gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); + gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); /* * If the mapping is invalid in guest, let cpu retry @@ -6454,10 +6545,10 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, } static bool retry_instruction(struct x86_emulate_ctxt *ctxt, - unsigned long cr2, int emulation_type) + gpa_t cr2_or_gpa, int emulation_type) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - unsigned long last_retry_eip, last_retry_addr, gpa = cr2; + unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa; last_retry_eip = vcpu->arch.last_retry_eip; last_retry_addr = vcpu->arch.last_retry_addr; @@ -6486,14 +6577,14 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, if (x86_page_table_writing_insn(ctxt)) return false; - if (ctxt->eip == last_retry_eip && last_retry_addr == cr2) + if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa) return false; vcpu->arch.last_retry_eip = ctxt->eip; - vcpu->arch.last_retry_addr = cr2; + vcpu->arch.last_retry_addr = cr2_or_gpa; if (!vcpu->arch.mmu->direct_map) - gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); + gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); @@ -6639,11 +6730,8 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt) return false; } -int x86_emulate_instruction(struct kvm_vcpu *vcpu, - unsigned long cr2, - int emulation_type, - void *insn, - int insn_len) +int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + int emulation_type, void *insn, int insn_len) { int r; struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; @@ -6689,8 +6777,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, kvm_queue_exception(vcpu, UD_VECTOR); return 1; } - if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, - emulation_type)) + if (reexecute_instruction(vcpu, cr2_or_gpa, + write_fault_to_spt, + emulation_type)) return 1; if (ctxt->have_exception) { /* @@ -6724,7 +6813,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, return 1; } - if (retry_instruction(ctxt, cr2, emulation_type)) + if (retry_instruction(ctxt, cr2_or_gpa, emulation_type)) return 1; /* this is needed for vmware backdoor interface to work since it @@ -6736,7 +6825,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, restart: /* Save the faulting GPA (cr2) in the address field */ - ctxt->exception.address = cr2; + ctxt->exception.address = cr2_or_gpa; r = x86_emulate_insn(ctxt); @@ -6744,7 +6833,7 @@ restart: return 1; if (r == EMULATION_FAILED) { - if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, + if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt, emulation_type)) return 1; @@ -7357,8 +7446,8 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) { struct kvm_lapic_irq lapic_irq; - lapic_irq.shorthand = 0; - lapic_irq.dest_mode = 0; + lapic_irq.shorthand = APIC_DEST_NOSHORT; + lapic_irq.dest_mode = APIC_DEST_PHYSICAL; lapic_irq.level = 0; lapic_irq.dest_id = apicid; lapic_irq.msi_redir_hint = false; @@ -7997,6 +8086,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_int_win = dm_request_for_irq_injection(vcpu) && kvm_cpu_accept_dm_intr(vcpu); + enum exit_fastpath_completion exit_fastpath = EXIT_FASTPATH_NONE; bool req_immediate_exit = false; @@ -8198,8 +8288,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) trace_kvm_entry(vcpu->vcpu_id); guest_enter_irqoff(); - /* The preempt notifier should have taken care of the FPU already. */ - WARN_ON_ONCE(test_thread_flag(TIF_NEED_FPU_LOAD)); + fpregs_assert_state_consistent(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_return(); if (unlikely(vcpu->arch.switch_db_regs)) { set_debugreg(0, 7); @@ -8243,7 +8334,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); - kvm_x86_ops->handle_exit_irqoff(vcpu); + kvm_x86_ops->handle_exit_irqoff(vcpu, &exit_fastpath); /* * Consume any pending interrupts, including the possible source of @@ -8287,7 +8378,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_lapic_sync_from_vapic(vcpu); vcpu->arch.gpa_available = false; - r = kvm_x86_ops->handle_exit(vcpu); + r = kvm_x86_ops->handle_exit(vcpu, exit_fastpath); return r; cancel_injection: @@ -8471,12 +8562,26 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) return 0; } +static void kvm_save_current_fpu(struct fpu *fpu) +{ + /* + * If the target FPU state is not resident in the CPU registers, just + * memcpy() from current, else save CPU state directly to the target. + */ + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + memcpy(&fpu->state, ¤t->thread.fpu.state, + fpu_kernel_xstate_size); + else + copy_fpregs_to_fpstate(fpu); +} + /* Swap (qemu) user FPU context for the guest FPU context. */ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { fpregs_lock(); - copy_fpregs_to_fpstate(vcpu->arch.user_fpu); + kvm_save_current_fpu(vcpu->arch.user_fpu); + /* PKRU is separately restored in kvm_x86_ops->run. */ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, ~XFEATURE_MASK_PKRU); @@ -8492,7 +8597,8 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { fpregs_lock(); - copy_fpregs_to_fpstate(vcpu->arch.guest_fpu); + kvm_save_current_fpu(vcpu->arch.guest_fpu); + copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state); fpregs_mark_activate(); @@ -8714,6 +8820,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { vcpu_load(vcpu); + if (kvm_mpx_supported()) + kvm_load_guest_fpu(vcpu); kvm_apic_accept_events(vcpu); if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && @@ -8722,6 +8830,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, else mp_state->mp_state = vcpu->arch.mp_state; + if (kvm_mpx_supported()) + kvm_put_guest_fpu(vcpu); vcpu_put(vcpu); return 0; } @@ -9082,33 +9192,90 @@ static void fx_init(struct kvm_vcpu *vcpu) vcpu->arch.cr0 |= X86_CR0_ET; } -void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) +int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) { - void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask; - - kvmclock_reset(vcpu); + if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) + pr_warn_once("kvm: SMP vm created on host with unstable TSC; " + "guest TSC will not be reliable\n"); - kvm_x86_ops->vcpu_free(vcpu); - free_cpumask_var(wbinvd_dirty_mask); + return 0; } -struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, - unsigned int id) +int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu; + struct page *page; + int r; - if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) - printk_once(KERN_WARNING - "kvm: SMP vm created on host with unstable TSC; " - "guest TSC will not be reliable\n"); + vcpu->arch.emulate_ctxt.ops = &emulate_ops; + if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + else + vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; - vcpu = kvm_x86_ops->vcpu_create(kvm, id); + kvm_set_tsc_khz(vcpu, max_tsc_khz); - return vcpu; -} + r = kvm_mmu_create(vcpu); + if (r < 0) + return r; + + if (irqchip_in_kernel(vcpu->kvm)) { + vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu->kvm); + r = kvm_create_lapic(vcpu, lapic_timer_advance_ns); + if (r < 0) + goto fail_mmu_destroy; + } else + static_key_slow_inc(&kvm_no_apic_vcpu); + + r = -ENOMEM; + + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + goto fail_free_lapic; + vcpu->arch.pio_data = page_address(page); + + vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, + GFP_KERNEL_ACCOUNT); + if (!vcpu->arch.mce_banks) + goto fail_free_pio_data; + vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; + + if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, + GFP_KERNEL_ACCOUNT)) + goto fail_free_mce_banks; + + vcpu->arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, + GFP_KERNEL_ACCOUNT); + if (!vcpu->arch.user_fpu) { + pr_err("kvm: failed to allocate userspace's fpu\n"); + goto free_wbinvd_dirty_mask; + } + + vcpu->arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, + GFP_KERNEL_ACCOUNT); + if (!vcpu->arch.guest_fpu) { + pr_err("kvm: failed to allocate vcpu's fpu\n"); + goto free_user_fpu; + } + fx_init(vcpu); + + vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; + + vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); + + vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; + + kvm_async_pf_hash_reset(vcpu); + kvm_pmu_init(vcpu); + + vcpu->arch.pending_external_vector = -1; + vcpu->arch.preempted_in_kernel = false; + + kvm_hv_vcpu_init(vcpu); + + r = kvm_x86_ops->vcpu_create(vcpu); + if (r) + goto free_guest_fpu; -int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) -{ vcpu->arch.arch_capabilities = kvm_get_arch_capabilities(); vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT; kvm_vcpu_mtrr_init(vcpu); @@ -9117,6 +9284,22 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) kvm_init_mmu(vcpu, false); vcpu_put(vcpu); return 0; + +free_guest_fpu: + kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu); +free_user_fpu: + kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu); +free_wbinvd_dirty_mask: + free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); +fail_free_mce_banks: + kfree(vcpu->arch.mce_banks); +fail_free_pio_data: + free_page((unsigned long)vcpu->arch.pio_data); +fail_free_lapic: + kvm_free_lapic(vcpu); +fail_mmu_destroy: + kvm_mmu_destroy(vcpu); + return r; } void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) @@ -9149,13 +9332,29 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { - vcpu->arch.apf.msr_val = 0; + struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache; + int idx; - vcpu_load(vcpu); - kvm_mmu_unload(vcpu); - vcpu_put(vcpu); + kvm_release_pfn(cache->pfn, cache->dirty, cache); + + kvmclock_reset(vcpu); kvm_x86_ops->vcpu_free(vcpu); + + free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); + kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu); + kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu); + + kvm_hv_vcpu_uninit(vcpu); + kvm_pmu_destroy(vcpu); + kfree(vcpu->arch.mce_banks); + kvm_free_lapic(vcpu); + idx = srcu_read_lock(&vcpu->kvm->srcu); + kvm_mmu_destroy(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + free_page((unsigned long)vcpu->arch.pio_data); + if (!lapic_in_kernel(vcpu)) + static_key_slow_dec(&kvm_no_apic_vcpu); } void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) @@ -9171,7 +9370,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.nmi_injected = false; kvm_clear_interrupt_queue(vcpu); kvm_clear_exception_queue(vcpu); - vcpu->arch.exception.pending = false; memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); kvm_update_dr0123(vcpu); @@ -9347,6 +9545,8 @@ int kvm_arch_hardware_setup(void) if (r != 0) return r; + cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data); + if (kvm_has_tsc_control) { /* * Make sure the user can only configure tsc_khz values that @@ -9375,6 +9575,13 @@ void kvm_arch_hardware_unsetup(void) int kvm_arch_check_processor_compat(void) { + struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); + + WARN_ON(!irqs_disabled()); + + if (kvm_host_cr4_reserved_bits(c) != cr4_reserved_bits) + return -EIO; + return kvm_x86_ops->check_processor_compatibility(); } @@ -9392,98 +9599,6 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) struct static_key kvm_no_apic_vcpu __read_mostly; EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu); -int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) -{ - struct page *page; - int r; - - vcpu->arch.emulate_ctxt.ops = &emulate_ops; - if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - else - vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; - - page = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!page) { - r = -ENOMEM; - goto fail; - } - vcpu->arch.pio_data = page_address(page); - - kvm_set_tsc_khz(vcpu, max_tsc_khz); - - r = kvm_mmu_create(vcpu); - if (r < 0) - goto fail_free_pio_data; - - if (irqchip_in_kernel(vcpu->kvm)) { - vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu->kvm); - r = kvm_create_lapic(vcpu, lapic_timer_advance_ns); - if (r < 0) - goto fail_mmu_destroy; - } else - static_key_slow_inc(&kvm_no_apic_vcpu); - - vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, - GFP_KERNEL_ACCOUNT); - if (!vcpu->arch.mce_banks) { - r = -ENOMEM; - goto fail_free_lapic; - } - vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; - - if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, - GFP_KERNEL_ACCOUNT)) { - r = -ENOMEM; - goto fail_free_mce_banks; - } - - fx_init(vcpu); - - vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; - - vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); - - vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; - - kvm_async_pf_hash_reset(vcpu); - kvm_pmu_init(vcpu); - - vcpu->arch.pending_external_vector = -1; - vcpu->arch.preempted_in_kernel = false; - - kvm_hv_vcpu_init(vcpu); - - return 0; - -fail_free_mce_banks: - kfree(vcpu->arch.mce_banks); -fail_free_lapic: - kvm_free_lapic(vcpu); -fail_mmu_destroy: - kvm_mmu_destroy(vcpu); -fail_free_pio_data: - free_page((unsigned long)vcpu->arch.pio_data); -fail: - return r; -} - -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) -{ - int idx; - - kvm_hv_vcpu_uninit(vcpu); - kvm_pmu_destroy(vcpu); - kfree(vcpu->arch.mce_banks); - kvm_free_lapic(vcpu); - idx = srcu_read_lock(&vcpu->kvm->srcu); - kvm_mmu_destroy(vcpu); - srcu_read_unlock(&vcpu->kvm->srcu, idx); - free_page((unsigned long)vcpu->arch.pio_data); - if (!lapic_in_kernel(vcpu)) - static_key_slow_dec(&kvm_no_apic_vcpu); -} - void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); @@ -9558,7 +9673,7 @@ static void kvm_free_vcpus(struct kvm *kvm) kvm_unload_vcpu_mmu(vcpu); } kvm_for_each_vcpu(i, vcpu, kvm) - kvm_arch_vcpu_free(vcpu); + kvm_vcpu_destroy(vcpu); mutex_lock(&kvm->lock); for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) @@ -9627,18 +9742,6 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) } EXPORT_SYMBOL_GPL(__x86_set_memory_region); -int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) -{ - int r; - - mutex_lock(&kvm->slots_lock); - r = __x86_set_memory_region(kvm, id, gpa, size); - mutex_unlock(&kvm->slots_lock); - - return r; -} -EXPORT_SYMBOL_GPL(x86_set_memory_region); - void kvm_arch_pre_destroy_vm(struct kvm *kvm) { kvm_mmu_pre_destroy_vm(kvm); @@ -9652,9 +9755,13 @@ void kvm_arch_destroy_vm(struct kvm *kvm) * unless the the memory map has changed due to process exit * or fd copying. */ - x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0); - x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0); - x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0); + mutex_lock(&kvm->slots_lock); + __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, + 0, 0); + __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, + 0, 0); + __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0); + mutex_unlock(&kvm->slots_lock); } if (kvm_x86_ops->vm_destroy) kvm_x86_ops->vm_destroy(kvm); @@ -9758,11 +9865,18 @@ out_free: void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) { + struct kvm_vcpu *vcpu; + int i; + /* * memslots->generation has been incremented. * mmio generation may have reached its maximum value. */ kvm_mmu_invalidate_mmio_sptes(kvm, gen); + + /* Force re-initialization of steal_time cache */ + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_vcpu_kick(vcpu); } int kvm_arch_prepare_memory_region(struct kvm *kvm, @@ -9792,7 +9906,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, * * The reason is, in case of PML, we need to set D-bit for any slots * with dirty logging disabled in order to eliminate unnecessary GPA - * logging in PML buffer (and potential PML buffer full VMEXT). This + * logging in PML buffer (and potential PML buffer full VMEXIT). This * guarantees leaving PML enabled during guest's lifetime won't have * any additional overhead from PML when guest is running with dirty * logging disabled for memory slots. @@ -10014,7 +10128,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu)) return; - vcpu->arch.mmu->page_fault(vcpu, work->gva, 0, true); + vcpu->arch.mmu->page_fault(vcpu, work->cr2_or_gpa, 0, true); } static inline u32 kvm_async_pf_hash_fn(gfn_t gfn) @@ -10127,7 +10241,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, { struct x86_exception fault; - trace_kvm_async_pf_not_present(work->arch.token, work->gva); + trace_kvm_async_pf_not_present(work->arch.token, work->cr2_or_gpa); kvm_add_async_pf_gfn(vcpu, work->arch.gfn); if (kvm_can_deliver_async_pf(vcpu) && @@ -10162,7 +10276,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, work->arch.token = ~0; /* broadcast wakeup */ else kvm_del_async_pf_gfn(vcpu, work->arch.gfn); - trace_kvm_async_pf_ready(work->arch.token, work->gva); + trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa); if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED && !apf_get_user(vcpu, &val)) { @@ -10284,7 +10398,6 @@ bool kvm_vector_hashing_enabled(void) { return vector_hashing; } -EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled); bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) { @@ -10292,6 +10405,28 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_arch_no_poll); +u64 kvm_spec_ctrl_valid_bits(struct kvm_vcpu *vcpu) +{ + uint64_t bits = SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD; + + /* The STIBP bit doesn't fault even if it's not advertised */ + if (!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) + bits &= ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP); + if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL) && + !boot_cpu_has(X86_FEATURE_AMD_IBRS)) + bits &= ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP); + + if (!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL_SSBD) && + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) + bits &= ~SPEC_CTRL_SSBD; + if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) && + !boot_cpu_has(X86_FEATURE_AMD_SSBD)) + bits &= ~SPEC_CTRL_SSBD; + + return bits; +} +EXPORT_SYMBOL_GPL(kvm_spec_ctrl_valid_bits); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 29391af8871d..2d2ff855773b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -144,11 +144,6 @@ static inline bool is_pae_paging(struct kvm_vcpu *vcpu) return !is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu); } -static inline u32 bit(int bitno) -{ - return 1 << (bitno & 31); -} - static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu) { return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48; @@ -166,21 +161,13 @@ static inline u64 get_canonical(u64 la, u8 vaddr_bits) static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu) { -#ifdef CONFIG_X86_64 return get_canonical(la, vcpu_virt_addr_bits(vcpu)) != la; -#else - return false; -#endif } static inline bool emul_is_noncanonical_address(u64 la, struct x86_emulate_ctxt *ctxt) { -#ifdef CONFIG_X86_64 return get_canonical(la, ctxt_virt_addr_bits(ctxt)) != la; -#else - return false; -#endif } static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, @@ -289,8 +276,9 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int page_num); bool kvm_vector_hashing_enabled(void); -int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, +int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type, void *insn, int insn_len); +enum exit_fastpath_completion handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu); #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \ @@ -369,7 +357,14 @@ static inline bool kvm_pat_valid(u64 data) return (data | ((data & 0x0202020202020202ull) << 1)) == data; } +static inline bool kvm_dr7_valid(unsigned long data) +{ + /* Bits [63:32] are reserved */ + return !(data >> 32); +} + void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu); +u64 kvm_spec_ctrl_valid_bits(struct kvm_vcpu *vcpu); #endif diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 306c3a0902ba..31600d851fd8 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -155,7 +155,7 @@ static bool check_seg_overrides(struct insn *insn, int regoff) */ static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off) { - if (user_64bit_mode(regs)) + if (any_64bit_mode(regs)) return INAT_SEG_REG_IGNORE; /* * Resolve the default segment register as described in Section 3.7.4 @@ -266,7 +266,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff) * which may be invalid at this point. */ if (regoff == offsetof(struct pt_regs, ip)) { - if (user_64bit_mode(regs)) + if (any_64bit_mode(regs)) return INAT_SEG_REG_IGNORE; else return INAT_SEG_REG_CS; @@ -289,7 +289,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff) * In long mode, segment override prefixes are ignored, except for * overrides for FS and GS. */ - if (user_64bit_mode(regs)) { + if (any_64bit_mode(regs)) { if (idx != INAT_SEG_REG_FS && idx != INAT_SEG_REG_GS) idx = INAT_SEG_REG_IGNORE; @@ -646,23 +646,27 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx) */ return (unsigned long)(sel << 4); - if (user_64bit_mode(regs)) { + if (any_64bit_mode(regs)) { /* * Only FS or GS will have a base address, the rest of * the segments' bases are forced to 0. */ unsigned long base; - if (seg_reg_idx == INAT_SEG_REG_FS) + if (seg_reg_idx == INAT_SEG_REG_FS) { rdmsrl(MSR_FS_BASE, base); - else if (seg_reg_idx == INAT_SEG_REG_GS) + } else if (seg_reg_idx == INAT_SEG_REG_GS) { /* * swapgs was called at the kernel entry point. Thus, * MSR_KERNEL_GS_BASE will have the user-space GS base. */ - rdmsrl(MSR_KERNEL_GS_BASE, base); - else + if (user_mode(regs)) + rdmsrl(MSR_KERNEL_GS_BASE, base); + else + rdmsrl(MSR_GS_BASE, base); + } else { base = 0; + } return base; } @@ -703,7 +707,7 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx) if (sel < 0) return 0; - if (user_64bit_mode(regs) || v8086_mode(regs)) + if (any_64bit_mode(regs) || v8086_mode(regs)) return -1L; if (!sel) @@ -948,7 +952,7 @@ static int get_eff_addr_modrm(struct insn *insn, struct pt_regs *regs, * following instruction. */ if (*regoff == -EDOM) { - if (user_64bit_mode(regs)) + if (any_64bit_mode(regs)) tmp = regs->ip + insn->length; else tmp = 0; @@ -1250,7 +1254,7 @@ static void __user *get_addr_ref_32(struct insn *insn, struct pt_regs *regs) * After computed, the effective address is treated as an unsigned * quantity. */ - if (!user_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit)) + if (!any_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit)) goto out; /* diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 337830d7a59c..7ff00ea64e4f 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -29,10 +29,7 @@ SYM_FUNC_START_ALIAS(memmove) SYM_FUNC_START(__memmove) - /* Handle more 32 bytes in loop */ mov %rdi, %rax - cmp $0x20, %rdx - jb 1f /* Decide forward/backward copy mode */ cmp %rdi, %rsi @@ -42,7 +39,9 @@ SYM_FUNC_START(__memmove) cmp %rdi, %r8 jg 2f + /* FSRM implies ERMS => no length checks, do the copy directly */ .Lmemmove_begin_forward: + ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS /* @@ -114,6 +113,8 @@ SYM_FUNC_START(__memmove) */ .p2align 4 2: + cmp $0x20, %rdx + jb 1f cmp $680, %rdx jb 6f cmp %dil, %sil diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 8908c58bd6cd..53adc1762ec0 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -929,7 +929,7 @@ EndTable GrpTable: Grp3_2 0: TEST Ev,Iz -1: +1: TEST Ev,Iz 2: NOT Ev 3: NEG Ev 4: MUL rAX,Ev diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 3b89c201ac26..98f7c6fa2eaa 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -12,8 +12,10 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg CFLAGS_REMOVE_mem_encrypt_identity.o = -pg endif -obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ - pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o +obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \ + pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o + +obj-y += pat/ # Make sure __phys_addr has no stackprotector nostackp := $(call cc-option, -fno-stack-protector) @@ -23,13 +25,11 @@ CFLAGS_mem_encrypt_identity.o := $(nostackp) CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace -obj-$(CONFIG_X86_PAT) += pat_interval.o - obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_X86_PTDUMP_CORE) += dump_pagetables.o -obj-$(CONFIG_X86_PTDUMP) += debug_pagetables.o +obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o +obj-$(CONFIG_PTDUMP_DEBUGFS) += debug_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o @@ -45,7 +45,6 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o obj-$(CONFIG_ACPI_NUMA) += srat.o obj-$(CONFIG_NUMA_EMU) += numa_emulation.o -obj-$(CONFIG_X86_INTEL_MPX) += mpx.o obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c index 39001a401eff..4a3b62f780b4 100644 --- a/arch/x86/mm/debug_pagetables.c +++ b/arch/x86/mm/debug_pagetables.c @@ -7,7 +7,7 @@ static int ptdump_show(struct seq_file *m, void *v) { - ptdump_walk_pgd_level_debugfs(m, NULL, false); + ptdump_walk_pgd_level_debugfs(m, &init_mm, false); return 0; } @@ -15,11 +15,8 @@ DEFINE_SHOW_ATTRIBUTE(ptdump); static int ptdump_curknl_show(struct seq_file *m, void *v) { - if (current->mm->pgd) { - down_read(¤t->mm->mmap_sem); - ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, false); - up_read(¤t->mm->mmap_sem); - } + if (current->mm->pgd) + ptdump_walk_pgd_level_debugfs(m, current->mm, false); return 0; } @@ -28,11 +25,8 @@ DEFINE_SHOW_ATTRIBUTE(ptdump_curknl); #ifdef CONFIG_PAGE_TABLE_ISOLATION static int ptdump_curusr_show(struct seq_file *m, void *v) { - if (current->mm->pgd) { - down_read(¤t->mm->mmap_sem); - ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, true); - up_read(¤t->mm->mmap_sem); - } + if (current->mm->pgd) + ptdump_walk_pgd_level_debugfs(m, current->mm, true); return 0; } @@ -43,7 +37,7 @@ DEFINE_SHOW_ATTRIBUTE(ptdump_curusr); static int ptdump_efi_show(struct seq_file *m, void *v) { if (efi_mm.pgd) - ptdump_walk_pgd_level_debugfs(m, efi_mm.pgd, false); + ptdump_walk_pgd_level_debugfs(m, &efi_mm, false); return 0; } diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index ab67822fd2f4..64229dad7eab 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -16,6 +16,7 @@ #include <linux/seq_file.h> #include <linux/highmem.h> #include <linux/pci.h> +#include <linux/ptdump.h> #include <asm/e820/types.h> #include <asm/pgtable.h> @@ -26,16 +27,18 @@ * when a "break" in the continuity is found. */ struct pg_state { + struct ptdump_state ptdump; int level; - pgprot_t current_prot; + pgprotval_t current_prot; pgprotval_t effective_prot; + pgprotval_t prot_levels[5]; unsigned long start_address; - unsigned long current_address; const struct addr_marker *marker; unsigned long lines; bool to_dmesg; bool check_wx; unsigned long wx_pages; + struct seq_file *seq; }; struct addr_marker { @@ -174,11 +177,10 @@ static struct addr_marker address_markers[] = { /* * Print a readable form of a pgprot_t to the seq_file */ -static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) +static void printk_prot(struct seq_file *m, pgprotval_t pr, int level, bool dmsg) { - pgprotval_t pr = pgprot_val(prot); static const char * const level_name[] = - { "cr3", "pgd", "p4d", "pud", "pmd", "pte" }; + { "pgd", "p4d", "pud", "pmd", "pte" }; if (!(pr & _PAGE_PRESENT)) { /* Not present */ @@ -202,12 +204,12 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) pt_dump_cont_printf(m, dmsg, " "); /* Bit 7 has a different meaning on level 3 vs 4 */ - if (level <= 4 && pr & _PAGE_PSE) + if (level <= 3 && pr & _PAGE_PSE) pt_dump_cont_printf(m, dmsg, "PSE "); else pt_dump_cont_printf(m, dmsg, " "); - if ((level == 5 && pr & _PAGE_PAT) || - ((level == 4 || level == 3) && pr & _PAGE_PAT_LARGE)) + if ((level == 4 && pr & _PAGE_PAT) || + ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE)) pt_dump_cont_printf(m, dmsg, "PAT "); else pt_dump_cont_printf(m, dmsg, " "); @@ -223,24 +225,11 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]); } -/* - * On 64 bits, sign-extend the 48 bit address to 64 bit - */ -static unsigned long normalize_addr(unsigned long u) -{ - int shift; - if (!IS_ENABLED(CONFIG_X86_64)) - return u; - - shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); - return (signed long)(u << shift) >> shift; -} - -static void note_wx(struct pg_state *st) +static void note_wx(struct pg_state *st, unsigned long addr) { unsigned long npages; - npages = (st->current_address - st->start_address) / PAGE_SIZE; + npages = (addr - st->start_address) / PAGE_SIZE; #ifdef CONFIG_PCI_BIOS /* @@ -248,7 +237,7 @@ static void note_wx(struct pg_state *st) * Inform about it, but avoid the warning. */ if (pcibios_enabled && st->start_address >= PAGE_OFFSET + BIOS_BEGIN && - st->current_address <= PAGE_OFFSET + BIOS_END) { + addr <= PAGE_OFFSET + BIOS_END) { pr_warn_once("x86/mm: PCI BIOS W+X mapping %lu pages\n", npages); return; } @@ -260,27 +249,47 @@ static void note_wx(struct pg_state *st) (void *)st->start_address); } +static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2) +{ + return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) | + ((prot1 | prot2) & _PAGE_NX); +} + /* * This function gets called on a break in a continuous series * of PTE entries; the next one is different so we need to * print what we collected so far. */ -static void note_page(struct seq_file *m, struct pg_state *st, - pgprot_t new_prot, pgprotval_t new_eff, int level) +static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, + unsigned long val) { - pgprotval_t prot, cur, eff; + struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); + pgprotval_t new_prot, new_eff; + pgprotval_t cur, eff; static const char units[] = "BKMGTPE"; + struct seq_file *m = st->seq; + + new_prot = val & PTE_FLAGS_MASK; + + if (level > 0) { + new_eff = effective_prot(st->prot_levels[level - 1], + new_prot); + } else { + new_eff = new_prot; + } + + if (level >= 0) + st->prot_levels[level] = new_eff; /* * If we have a "break" in the series, we need to flush the state that * we have now. "break" is either changing perms, levels or * address space marker. */ - prot = pgprot_val(new_prot); - cur = pgprot_val(st->current_prot); + cur = st->current_prot; eff = st->effective_prot; - if (!st->level) { + if (st->level == -1) { /* First entry */ st->current_prot = new_prot; st->effective_prot = new_eff; @@ -289,14 +298,14 @@ static void note_page(struct seq_file *m, struct pg_state *st, st->lines = 0; pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", st->marker->name); - } else if (prot != cur || new_eff != eff || level != st->level || - st->current_address >= st->marker[1].start_address) { + } else if (new_prot != cur || new_eff != eff || level != st->level || + addr >= st->marker[1].start_address) { const char *unit = units; unsigned long delta; int width = sizeof(unsigned long) * 2; if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX)) - note_wx(st); + note_wx(st, addr); /* * Now print the actual finished series @@ -306,9 +315,9 @@ static void note_page(struct seq_file *m, struct pg_state *st, pt_dump_seq_printf(m, st->to_dmesg, "0x%0*lx-0x%0*lx ", width, st->start_address, - width, st->current_address); + width, addr); - delta = st->current_address - st->start_address; + delta = addr - st->start_address; while (!(delta & 1023) && unit[1]) { delta >>= 10; unit++; @@ -325,7 +334,7 @@ static void note_page(struct seq_file *m, struct pg_state *st, * such as the start of vmalloc space etc. * This helps in the interpretation. */ - if (st->current_address >= st->marker[1].start_address) { + if (addr >= st->marker[1].start_address) { if (st->marker->max_lines && st->lines > st->marker->max_lines) { unsigned long nskip = @@ -341,222 +350,45 @@ static void note_page(struct seq_file *m, struct pg_state *st, st->marker->name); } - st->start_address = st->current_address; + st->start_address = addr; st->current_prot = new_prot; st->effective_prot = new_eff; st->level = level; } } -static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2) -{ - return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) | - ((prot1 | prot2) & _PAGE_NX); -} - -static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, - pgprotval_t eff_in, unsigned long P) -{ - int i; - pte_t *pte; - pgprotval_t prot, eff; - - for (i = 0; i < PTRS_PER_PTE; i++) { - st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); - pte = pte_offset_map(&addr, st->current_address); - prot = pte_flags(*pte); - eff = effective_prot(eff_in, prot); - note_page(m, st, __pgprot(prot), eff, 5); - pte_unmap(pte); - } -} -#ifdef CONFIG_KASAN - -/* - * This is an optimization for KASAN=y case. Since all kasan page tables - * eventually point to the kasan_early_shadow_page we could call note_page() - * right away without walking through lower level page tables. This saves - * us dozens of seconds (minutes for 5-level config) while checking for - * W+X mapping or reading kernel_page_tables debugfs file. - */ -static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st, - void *pt) -{ - if (__pa(pt) == __pa(kasan_early_shadow_pmd) || - (pgtable_l5_enabled() && - __pa(pt) == __pa(kasan_early_shadow_p4d)) || - __pa(pt) == __pa(kasan_early_shadow_pud)) { - pgprotval_t prot = pte_flags(kasan_early_shadow_pte[0]); - note_page(m, st, __pgprot(prot), 0, 5); - return true; - } - return false; -} -#else -static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st, - void *pt) -{ - return false; -} -#endif - -#if PTRS_PER_PMD > 1 - -static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, - pgprotval_t eff_in, unsigned long P) -{ - int i; - pmd_t *start, *pmd_start; - pgprotval_t prot, eff; - - pmd_start = start = (pmd_t *)pud_page_vaddr(addr); - for (i = 0; i < PTRS_PER_PMD; i++) { - st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); - if (!pmd_none(*start)) { - prot = pmd_flags(*start); - eff = effective_prot(eff_in, prot); - if (pmd_large(*start) || !pmd_present(*start)) { - note_page(m, st, __pgprot(prot), eff, 4); - } else if (!kasan_page_table(m, st, pmd_start)) { - walk_pte_level(m, st, *start, eff, - P + i * PMD_LEVEL_MULT); - } - } else - note_page(m, st, __pgprot(0), 0, 4); - start++; - } -} - -#else -#define walk_pmd_level(m,s,a,e,p) walk_pte_level(m,s,__pmd(pud_val(a)),e,p) -#define pud_large(a) pmd_large(__pmd(pud_val(a))) -#define pud_none(a) pmd_none(__pmd(pud_val(a))) -#endif - -#if PTRS_PER_PUD > 1 - -static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, - pgprotval_t eff_in, unsigned long P) -{ - int i; - pud_t *start, *pud_start; - pgprotval_t prot, eff; - - pud_start = start = (pud_t *)p4d_page_vaddr(addr); - - for (i = 0; i < PTRS_PER_PUD; i++) { - st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); - if (!pud_none(*start)) { - prot = pud_flags(*start); - eff = effective_prot(eff_in, prot); - if (pud_large(*start) || !pud_present(*start)) { - note_page(m, st, __pgprot(prot), eff, 3); - } else if (!kasan_page_table(m, st, pud_start)) { - walk_pmd_level(m, st, *start, eff, - P + i * PUD_LEVEL_MULT); - } - } else - note_page(m, st, __pgprot(0), 0, 3); - - start++; - } -} - -#else -#define walk_pud_level(m,s,a,e,p) walk_pmd_level(m,s,__pud(p4d_val(a)),e,p) -#define p4d_large(a) pud_large(__pud(p4d_val(a))) -#define p4d_none(a) pud_none(__pud(p4d_val(a))) -#endif - -static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, - pgprotval_t eff_in, unsigned long P) -{ - int i; - p4d_t *start, *p4d_start; - pgprotval_t prot, eff; - - if (PTRS_PER_P4D == 1) - return walk_pud_level(m, st, __p4d(pgd_val(addr)), eff_in, P); - - p4d_start = start = (p4d_t *)pgd_page_vaddr(addr); - - for (i = 0; i < PTRS_PER_P4D; i++) { - st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT); - if (!p4d_none(*start)) { - prot = p4d_flags(*start); - eff = effective_prot(eff_in, prot); - if (p4d_large(*start) || !p4d_present(*start)) { - note_page(m, st, __pgprot(prot), eff, 2); - } else if (!kasan_page_table(m, st, p4d_start)) { - walk_pud_level(m, st, *start, eff, - P + i * P4D_LEVEL_MULT); - } - } else - note_page(m, st, __pgprot(0), 0, 2); - - start++; - } -} - -#define pgd_large(a) (pgtable_l5_enabled() ? pgd_large(a) : p4d_large(__p4d(pgd_val(a)))) -#define pgd_none(a) (pgtable_l5_enabled() ? pgd_none(a) : p4d_none(__p4d(pgd_val(a)))) - -static inline bool is_hypervisor_range(int idx) -{ -#ifdef CONFIG_X86_64 - /* - * A hole in the beginning of kernel address space reserved - * for a hypervisor. - */ - return (idx >= pgd_index(GUARD_HOLE_BASE_ADDR)) && - (idx < pgd_index(GUARD_HOLE_END_ADDR)); -#else - return false; -#endif -} - -static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, +static void ptdump_walk_pgd_level_core(struct seq_file *m, + struct mm_struct *mm, pgd_t *pgd, bool checkwx, bool dmesg) { - pgd_t *start = INIT_PGD; - pgprotval_t prot, eff; - int i; - struct pg_state st = {}; - - if (pgd) { - start = pgd; - st.to_dmesg = dmesg; - } + const struct ptdump_range ptdump_ranges[] = { +#ifdef CONFIG_X86_64 - st.check_wx = checkwx; - if (checkwx) - st.wx_pages = 0; +#define normalize_addr_shift (64 - (__VIRTUAL_MASK_SHIFT + 1)) +#define normalize_addr(u) ((signed long)((u) << normalize_addr_shift) >> \ + normalize_addr_shift) - for (i = 0; i < PTRS_PER_PGD; i++) { - st.current_address = normalize_addr(i * PGD_LEVEL_MULT); - if (!pgd_none(*start) && !is_hypervisor_range(i)) { - prot = pgd_flags(*start); -#ifdef CONFIG_X86_PAE - eff = _PAGE_USER | _PAGE_RW; + {0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2}, + {normalize_addr(PTRS_PER_PGD * PGD_LEVEL_MULT / 2), ~0UL}, #else - eff = prot; + {0, ~0UL}, #endif - if (pgd_large(*start) || !pgd_present(*start)) { - note_page(m, &st, __pgprot(prot), eff, 1); - } else { - walk_p4d_level(m, &st, *start, eff, - i * PGD_LEVEL_MULT); - } - } else - note_page(m, &st, __pgprot(0), 0, 1); + {0, 0} +}; - cond_resched(); - start++; - } + struct pg_state st = { + .ptdump = { + .note_page = note_page, + .range = ptdump_ranges + }, + .level = -1, + .to_dmesg = dmesg, + .check_wx = checkwx, + .seq = m + }; + + ptdump_walk_pgd(&st.ptdump, mm, pgd); - /* Flush out the last page */ - st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); - note_page(m, &st, __pgprot(0), 0, 0); if (!checkwx) return; if (st.wx_pages) @@ -566,18 +398,20 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n"); } -void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) +void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm) { - ptdump_walk_pgd_level_core(m, pgd, false, true); + ptdump_walk_pgd_level_core(m, mm, mm->pgd, false, true); } -void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user) +void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, + bool user) { + pgd_t *pgd = mm->pgd; #ifdef CONFIG_PAGE_TABLE_ISOLATION if (user && boot_cpu_has(X86_FEATURE_PTI)) pgd = kernel_to_user_pgdp(pgd); #endif - ptdump_walk_pgd_level_core(m, pgd, false, false); + ptdump_walk_pgd_level_core(m, mm, pgd, false, false); } EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs); @@ -592,13 +426,13 @@ void ptdump_walk_user_pgd_level_checkwx(void) pr_info("x86/mm: Checking user space page tables\n"); pgd = kernel_to_user_pgdp(pgd); - ptdump_walk_pgd_level_core(NULL, pgd, true, false); + ptdump_walk_pgd_level_core(NULL, &init_mm, pgd, true, false); #endif } void ptdump_walk_pgd_level_checkwx(void) { - ptdump_walk_pgd_level_core(NULL, NULL, true, false); + ptdump_walk_pgd_level_core(NULL, &init_mm, INIT_PGD, true, false); } static int __init pt_dump_init(void) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 304d31d8cbbc..fa4ea09593ab 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -29,6 +29,7 @@ #include <asm/efi.h> /* efi_recover_from_page_fault()*/ #include <asm/desc.h> /* store_idt(), ... */ #include <asm/cpu_entry_area.h> /* exception stack */ +#include <asm/pgtable_areas.h> /* VMALLOC_START, ... */ #define CREATE_TRACE_POINTS #include <asm/trace/exceptions.h> @@ -1486,27 +1487,6 @@ good_area: } NOKPROBE_SYMBOL(do_user_addr_fault); -/* - * Explicitly marked noinline such that the function tracer sees this as the - * page_fault entry point. - */ -static noinline void -__do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, - unsigned long address) -{ - prefetchw(¤t->mm->mmap_sem); - - if (unlikely(kmmio_fault(regs, address))) - return; - - /* Was the fault on kernel-controlled part of the address space? */ - if (unlikely(fault_in_kernel_space(address))) - do_kern_addr_fault(regs, hw_error_code, address); - else - do_user_addr_fault(regs, hw_error_code, address); -} -NOKPROBE_SYMBOL(__do_page_fault); - static __always_inline void trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code, unsigned long address) @@ -1521,13 +1501,19 @@ trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code, } dotraplinkage void -do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) +do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, + unsigned long address) { - enum ctx_state prev_state; + prefetchw(¤t->mm->mmap_sem); + trace_page_fault_entries(regs, hw_error_code, address); - prev_state = exception_enter(); - trace_page_fault_entries(regs, error_code, address); - __do_page_fault(regs, error_code, address); - exception_exit(prev_state); + if (unlikely(kmmio_fault(regs, address))) + return; + + /* Was the fault on kernel-controlled part of the address space? */ + if (unlikely(fault_in_kernel_space(address))) + do_kern_addr_fault(regs, hw_error_code, address); + else + do_user_addr_fault(regs, hw_error_code, address); } NOKPROBE_SYMBOL(do_page_fault); diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index fab095362c50..5bfd5aef5378 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -19,7 +19,6 @@ #include <asm/tlbflush.h> #include <asm/pgalloc.h> #include <asm/elf.h> -#include <asm/mpx.h> #if 0 /* This is just for testing */ struct page * @@ -151,10 +150,6 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, if (len & ~huge_page_mask(h)) return -EINVAL; - addr = mpx_unmapped_area_check(addr, len, flags); - if (IS_ERR_VALUE(addr)) - return addr; - if (len > TASK_SIZE) return -ENOMEM; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 930edeb41ec3..23df4885bbed 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -52,6 +52,7 @@ #include <asm/page_types.h> #include <asm/cpu_entry_area.h> #include <asm/init.h> +#include <asm/pgtable_areas.h> #include "mm_internal.h" @@ -865,43 +866,13 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif int kernel_set_to_readonly __read_mostly; -void set_kernel_text_rw(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long size = PFN_ALIGN(_etext) - start; - - if (!kernel_set_to_readonly) - return; - - pr_debug("Set kernel text: %lx - %lx for read write\n", - start, start+size); - - set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT); -} - -void set_kernel_text_ro(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long size = PFN_ALIGN(_etext) - start; - - if (!kernel_set_to_readonly) - return; - - pr_debug("Set kernel text: %lx - %lx for read only\n", - start, start+size); - - set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); -} - static void mark_nxdata_nx(void) { /* diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index dcb9bc961b39..abbdecb75fad 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1212,10 +1212,8 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap); - struct zone *zone = page_zone(page); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); kernel_physical_mapping_remove(start, start + size); } #endif /* CONFIG_MEMORY_HOTPLUG */ @@ -1260,42 +1258,6 @@ void __init mem_init(void) int kernel_set_to_readonly; -void set_kernel_text_rw(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long end = PFN_ALIGN(_etext); - - if (!kernel_set_to_readonly) - return; - - pr_debug("Set kernel text: %lx - %lx for read write\n", - start, end); - - /* - * Make the kernel identity mapping for text RW. Kernel text - * mapping will always be RO. Refer to the comment in - * static_protections() in pageattr.c - */ - set_memory_rw(start, (end - start) >> PAGE_SHIFT); -} - -void set_kernel_text_ro(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long end = PFN_ALIGN(_etext); - - if (!kernel_set_to_readonly) - return; - - pr_debug("Set kernel text: %lx - %lx for read only\n", - start, end); - - /* - * Set the kernel identity mapping for text RO. - */ - set_memory_ro(start, (end - start) >> PAGE_SHIFT); -} - void mark_rodata_ro(void) { unsigned long start = PFN_ALIGN(_text); diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 6748b4c2baff..f60398aeb644 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -4,7 +4,7 @@ */ #include <asm/iomap.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <linux/export.h> #include <linux/highmem.h> @@ -26,7 +26,7 @@ int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot) if (!is_io_mapping_possible(base, size)) return -EINVAL; - ret = io_reserve_memtype(base, base + size, &pcm); + ret = memtype_reserve_io(base, base + size, &pcm); if (ret) return ret; @@ -40,7 +40,7 @@ EXPORT_SYMBOL_GPL(iomap_create_wc); void iomap_free(resource_size_t base, unsigned long size) { - io_free_memtype(base, base + size); + memtype_free_io(base, base + size); } EXPORT_SYMBOL_GPL(iomap_free); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index b3a2936377b5..44e4beb4239f 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -24,7 +24,7 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/pgalloc.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/setup.h> #include "physaddr.h" @@ -196,10 +196,10 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size, phys_addr &= PHYSICAL_PAGE_MASK; size = PAGE_ALIGN(last_addr+1) - phys_addr; - retval = reserve_memtype(phys_addr, (u64)phys_addr + size, + retval = memtype_reserve(phys_addr, (u64)phys_addr + size, pcm, &new_pcm); if (retval) { - printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval); + printk(KERN_ERR "ioremap memtype_reserve failed %d\n", retval); return NULL; } @@ -255,7 +255,7 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size, area->phys_addr = phys_addr; vaddr = (unsigned long) area->addr; - if (kernel_map_sync_memtype(phys_addr, size, pcm)) + if (memtype_kernel_map_sync(phys_addr, size, pcm)) goto err_free_area; if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) @@ -275,7 +275,7 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size, err_free_area: free_vm_area(area); err_free_memtype: - free_memtype(phys_addr, phys_addr + size); + memtype_free(phys_addr, phys_addr + size); return NULL; } @@ -451,7 +451,7 @@ void iounmap(volatile void __iomem *addr) return; } - free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p)); + memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p)); /* Finally remove it */ o = remove_vm_area((void __force *)addr); diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index cf5bc37c90ac..763e71abc0fe 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -288,23 +288,6 @@ static void __init kasan_shallow_populate_pgds(void *start, void *end) } while (pgd++, addr = next, addr != (unsigned long)end); } -#ifdef CONFIG_KASAN_INLINE -static int kasan_die_handler(struct notifier_block *self, - unsigned long val, - void *data) -{ - if (val == DIE_GPF) { - pr_emerg("CONFIG_KASAN_INLINE enabled\n"); - pr_emerg("GPF could be caused by NULL-ptr deref or user memory access\n"); - } - return NOTIFY_OK; -} - -static struct notifier_block kasan_die_notifier = { - .notifier_call = kasan_die_handler, -}; -#endif - void __init kasan_early_init(void) { int i; @@ -341,10 +324,6 @@ void __init kasan_init(void) int i; void *shadow_cpu_entry_begin, *shadow_cpu_entry_end; -#ifdef CONFIG_KASAN_INLINE - register_die_notifier(&kasan_die_notifier); -#endif - memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); /* diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index aae9a933dfd4..cb91eccc4960 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -163,8 +163,6 @@ unsigned long get_mmap_base(int is_legacy) const char *arch_vma_name(struct vm_area_struct *vma) { - if (vma->vm_flags & VM_MPX) - return "[mpx]"; return NULL; } diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c deleted file mode 100644 index 895fb7a9294d..000000000000 --- a/arch/x86/mm/mpx.c +++ /dev/null @@ -1,938 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * mpx.c - Memory Protection eXtensions - * - * Copyright (c) 2014, Intel Corporation. - * Qiaowei Ren <qiaowei.ren@intel.com> - * Dave Hansen <dave.hansen@intel.com> - */ -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/mm_types.h> -#include <linux/mman.h> -#include <linux/syscalls.h> -#include <linux/sched/sysctl.h> - -#include <asm/insn.h> -#include <asm/insn-eval.h> -#include <asm/mmu_context.h> -#include <asm/mpx.h> -#include <asm/processor.h> -#include <asm/fpu/internal.h> - -#define CREATE_TRACE_POINTS -#include <asm/trace/mpx.h> - -static inline unsigned long mpx_bd_size_bytes(struct mm_struct *mm) -{ - if (is_64bit_mm(mm)) - return MPX_BD_SIZE_BYTES_64; - else - return MPX_BD_SIZE_BYTES_32; -} - -static inline unsigned long mpx_bt_size_bytes(struct mm_struct *mm) -{ - if (is_64bit_mm(mm)) - return MPX_BT_SIZE_BYTES_64; - else - return MPX_BT_SIZE_BYTES_32; -} - -/* - * This is really a simplified "vm_mmap". it only handles MPX - * bounds tables (the bounds directory is user-allocated). - */ -static unsigned long mpx_mmap(unsigned long len) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, populate; - - /* Only bounds table can be allocated here */ - if (len != mpx_bt_size_bytes(mm)) - return -EINVAL; - - down_write(&mm->mmap_sem); - addr = do_mmap(NULL, 0, len, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate, NULL); - up_write(&mm->mmap_sem); - if (populate) - mm_populate(addr, populate); - - return addr; -} - -static int mpx_insn_decode(struct insn *insn, - struct pt_regs *regs) -{ - unsigned char buf[MAX_INSN_SIZE]; - int x86_64 = !test_thread_flag(TIF_IA32); - int not_copied; - int nr_copied; - - not_copied = copy_from_user(buf, (void __user *)regs->ip, sizeof(buf)); - nr_copied = sizeof(buf) - not_copied; - /* - * The decoder _should_ fail nicely if we pass it a short buffer. - * But, let's not depend on that implementation detail. If we - * did not get anything, just error out now. - */ - if (!nr_copied) - return -EFAULT; - insn_init(insn, buf, nr_copied, x86_64); - insn_get_length(insn); - /* - * copy_from_user() tries to get as many bytes as we could see in - * the largest possible instruction. If the instruction we are - * after is shorter than that _and_ we attempt to copy from - * something unreadable, we might get a short read. This is OK - * as long as the read did not stop in the middle of the - * instruction. Check to see if we got a partial instruction. - */ - if (nr_copied < insn->length) - return -EFAULT; - - insn_get_opcode(insn); - /* - * We only _really_ need to decode bndcl/bndcn/bndcu - * Error out on anything else. - */ - if (insn->opcode.bytes[0] != 0x0f) - goto bad_opcode; - if ((insn->opcode.bytes[1] != 0x1a) && - (insn->opcode.bytes[1] != 0x1b)) - goto bad_opcode; - - return 0; -bad_opcode: - return -EINVAL; -} - -/* - * If a bounds overflow occurs then a #BR is generated. This - * function decodes MPX instructions to get violation address - * and set this address into extended struct siginfo. - * - * Note that this is not a super precise way of doing this. - * Userspace could have, by the time we get here, written - * anything it wants in to the instructions. We can not - * trust anything about it. They might not be valid - * instructions or might encode invalid registers, etc... - */ -int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs) -{ - const struct mpx_bndreg_state *bndregs; - const struct mpx_bndreg *bndreg; - struct insn insn; - uint8_t bndregno; - int err; - - err = mpx_insn_decode(&insn, regs); - if (err) - goto err_out; - - /* - * We know at this point that we are only dealing with - * MPX instructions. - */ - insn_get_modrm(&insn); - bndregno = X86_MODRM_REG(insn.modrm.value); - if (bndregno > 3) { - err = -EINVAL; - goto err_out; - } - /* get bndregs field from current task's xsave area */ - bndregs = get_xsave_field_ptr(XFEATURE_BNDREGS); - if (!bndregs) { - err = -EINVAL; - goto err_out; - } - /* now go select the individual register in the set of 4 */ - bndreg = &bndregs->bndreg[bndregno]; - - /* - * The registers are always 64-bit, but the upper 32 - * bits are ignored in 32-bit mode. Also, note that the - * upper bounds are architecturally represented in 1's - * complement form. - * - * The 'unsigned long' cast is because the compiler - * complains when casting from integers to different-size - * pointers. - */ - info->lower = (void __user *)(unsigned long)bndreg->lower_bound; - info->upper = (void __user *)(unsigned long)~bndreg->upper_bound; - info->addr = insn_get_addr_ref(&insn, regs); - - /* - * We were not able to extract an address from the instruction, - * probably because there was something invalid in it. - */ - if (info->addr == (void __user *)-1) { - err = -EINVAL; - goto err_out; - } - trace_mpx_bounds_register_exception(info->addr, bndreg); - return 0; -err_out: - /* info might be NULL, but kfree() handles that */ - return err; -} - -static __user void *mpx_get_bounds_dir(void) -{ - const struct mpx_bndcsr *bndcsr; - - if (!cpu_feature_enabled(X86_FEATURE_MPX)) - return MPX_INVALID_BOUNDS_DIR; - - /* - * The bounds directory pointer is stored in a register - * only accessible if we first do an xsave. - */ - bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR); - if (!bndcsr) - return MPX_INVALID_BOUNDS_DIR; - - /* - * Make sure the register looks valid by checking the - * enable bit. - */ - if (!(bndcsr->bndcfgu & MPX_BNDCFG_ENABLE_FLAG)) - return MPX_INVALID_BOUNDS_DIR; - - /* - * Lastly, mask off the low bits used for configuration - * flags, and return the address of the bounds table. - */ - return (void __user *)(unsigned long) - (bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK); -} - -int mpx_enable_management(void) -{ - void __user *bd_base = MPX_INVALID_BOUNDS_DIR; - struct mm_struct *mm = current->mm; - int ret = 0; - - /* - * runtime in the userspace will be responsible for allocation of - * the bounds directory. Then, it will save the base of the bounds - * directory into XSAVE/XRSTOR Save Area and enable MPX through - * XRSTOR instruction. - * - * The copy_xregs_to_kernel() beneath get_xsave_field_ptr() is - * expected to be relatively expensive. Storing the bounds - * directory here means that we do not have to do xsave in the - * unmap path; we can just use mm->context.bd_addr instead. - */ - bd_base = mpx_get_bounds_dir(); - down_write(&mm->mmap_sem); - - /* MPX doesn't support addresses above 47 bits yet. */ - if (find_vma(mm, DEFAULT_MAP_WINDOW)) { - pr_warn_once("%s (%d): MPX cannot handle addresses " - "above 47-bits. Disabling.", - current->comm, current->pid); - ret = -ENXIO; - goto out; - } - mm->context.bd_addr = bd_base; - if (mm->context.bd_addr == MPX_INVALID_BOUNDS_DIR) - ret = -ENXIO; -out: - up_write(&mm->mmap_sem); - return ret; -} - -int mpx_disable_management(void) -{ - struct mm_struct *mm = current->mm; - - if (!cpu_feature_enabled(X86_FEATURE_MPX)) - return -ENXIO; - - down_write(&mm->mmap_sem); - mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR; - up_write(&mm->mmap_sem); - return 0; -} - -static int mpx_cmpxchg_bd_entry(struct mm_struct *mm, - unsigned long *curval, - unsigned long __user *addr, - unsigned long old_val, unsigned long new_val) -{ - int ret; - /* - * user_atomic_cmpxchg_inatomic() actually uses sizeof() - * the pointer that we pass to it to figure out how much - * data to cmpxchg. We have to be careful here not to - * pass a pointer to a 64-bit data type when we only want - * a 32-bit copy. - */ - if (is_64bit_mm(mm)) { - ret = user_atomic_cmpxchg_inatomic(curval, - addr, old_val, new_val); - } else { - u32 uninitialized_var(curval_32); - u32 old_val_32 = old_val; - u32 new_val_32 = new_val; - u32 __user *addr_32 = (u32 __user *)addr; - - ret = user_atomic_cmpxchg_inatomic(&curval_32, - addr_32, old_val_32, new_val_32); - *curval = curval_32; - } - return ret; -} - -/* - * With 32-bit mode, a bounds directory is 4MB, and the size of each - * bounds table is 16KB. With 64-bit mode, a bounds directory is 2GB, - * and the size of each bounds table is 4MB. - */ -static int allocate_bt(struct mm_struct *mm, long __user *bd_entry) -{ - unsigned long expected_old_val = 0; - unsigned long actual_old_val = 0; - unsigned long bt_addr; - unsigned long bd_new_entry; - int ret = 0; - - /* - * Carve the virtual space out of userspace for the new - * bounds table: - */ - bt_addr = mpx_mmap(mpx_bt_size_bytes(mm)); - if (IS_ERR((void *)bt_addr)) - return PTR_ERR((void *)bt_addr); - /* - * Set the valid flag (kinda like _PAGE_PRESENT in a pte) - */ - bd_new_entry = bt_addr | MPX_BD_ENTRY_VALID_FLAG; - - /* - * Go poke the address of the new bounds table in to the - * bounds directory entry out in userspace memory. Note: - * we may race with another CPU instantiating the same table. - * In that case the cmpxchg will see an unexpected - * 'actual_old_val'. - * - * This can fault, but that's OK because we do not hold - * mmap_sem at this point, unlike some of the other part - * of the MPX code that have to pagefault_disable(). - */ - ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, bd_entry, - expected_old_val, bd_new_entry); - if (ret) - goto out_unmap; - - /* - * The user_atomic_cmpxchg_inatomic() will only return nonzero - * for faults, *not* if the cmpxchg itself fails. Now we must - * verify that the cmpxchg itself completed successfully. - */ - /* - * We expected an empty 'expected_old_val', but instead found - * an apparently valid entry. Assume we raced with another - * thread to instantiate this table and desclare succecss. - */ - if (actual_old_val & MPX_BD_ENTRY_VALID_FLAG) { - ret = 0; - goto out_unmap; - } - /* - * We found a non-empty bd_entry but it did not have the - * VALID_FLAG set. Return an error which will result in - * a SEGV since this probably means that somebody scribbled - * some invalid data in to a bounds table. - */ - if (expected_old_val != actual_old_val) { - ret = -EINVAL; - goto out_unmap; - } - trace_mpx_new_bounds_table(bt_addr); - return 0; -out_unmap: - vm_munmap(bt_addr, mpx_bt_size_bytes(mm)); - return ret; -} - -/* - * When a BNDSTX instruction attempts to save bounds to a bounds - * table, it will first attempt to look up the table in the - * first-level bounds directory. If it does not find a table in - * the directory, a #BR is generated and we get here in order to - * allocate a new table. - * - * With 32-bit mode, the size of BD is 4MB, and the size of each - * bound table is 16KB. With 64-bit mode, the size of BD is 2GB, - * and the size of each bound table is 4MB. - */ -static int do_mpx_bt_fault(void) -{ - unsigned long bd_entry, bd_base; - const struct mpx_bndcsr *bndcsr; - struct mm_struct *mm = current->mm; - - bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR); - if (!bndcsr) - return -EINVAL; - /* - * Mask off the preserve and enable bits - */ - bd_base = bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK; - /* - * The hardware provides the address of the missing or invalid - * entry via BNDSTATUS, so we don't have to go look it up. - */ - bd_entry = bndcsr->bndstatus & MPX_BNDSTA_ADDR_MASK; - /* - * Make sure the directory entry is within where we think - * the directory is. - */ - if ((bd_entry < bd_base) || - (bd_entry >= bd_base + mpx_bd_size_bytes(mm))) - return -EINVAL; - - return allocate_bt(mm, (long __user *)bd_entry); -} - -int mpx_handle_bd_fault(void) -{ - /* - * Userspace never asked us to manage the bounds tables, - * so refuse to help. - */ - if (!kernel_managing_mpx_tables(current->mm)) - return -EINVAL; - - return do_mpx_bt_fault(); -} - -/* - * A thin wrapper around get_user_pages(). Returns 0 if the - * fault was resolved or -errno if not. - */ -static int mpx_resolve_fault(long __user *addr, int write) -{ - long gup_ret; - int nr_pages = 1; - - gup_ret = get_user_pages((unsigned long)addr, nr_pages, - write ? FOLL_WRITE : 0, NULL, NULL); - /* - * get_user_pages() returns number of pages gotten. - * 0 means we failed to fault in and get anything, - * probably because 'addr' is bad. - */ - if (!gup_ret) - return -EFAULT; - /* Other error, return it */ - if (gup_ret < 0) - return gup_ret; - /* must have gup'd a page and gup_ret>0, success */ - return 0; -} - -static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm, - unsigned long bd_entry) -{ - unsigned long bt_addr = bd_entry; - int align_to_bytes; - /* - * Bit 0 in a bt_entry is always the valid bit. - */ - bt_addr &= ~MPX_BD_ENTRY_VALID_FLAG; - /* - * Tables are naturally aligned at 8-byte boundaries - * on 64-bit and 4-byte boundaries on 32-bit. The - * documentation makes it appear that the low bits - * are ignored by the hardware, so we do the same. - */ - if (is_64bit_mm(mm)) - align_to_bytes = 8; - else - align_to_bytes = 4; - bt_addr &= ~(align_to_bytes-1); - return bt_addr; -} - -/* - * We only want to do a 4-byte get_user() on 32-bit. Otherwise, - * we might run off the end of the bounds table if we are on - * a 64-bit kernel and try to get 8 bytes. - */ -static int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret, - long __user *bd_entry_ptr) -{ - u32 bd_entry_32; - int ret; - - if (is_64bit_mm(mm)) - return get_user(*bd_entry_ret, bd_entry_ptr); - - /* - * Note that get_user() uses the type of the *pointer* to - * establish the size of the get, not the destination. - */ - ret = get_user(bd_entry_32, (u32 __user *)bd_entry_ptr); - *bd_entry_ret = bd_entry_32; - return ret; -} - -/* - * Get the base of bounds tables pointed by specific bounds - * directory entry. - */ -static int get_bt_addr(struct mm_struct *mm, - long __user *bd_entry_ptr, - unsigned long *bt_addr_result) -{ - int ret; - int valid_bit; - unsigned long bd_entry; - unsigned long bt_addr; - - if (!access_ok((bd_entry_ptr), sizeof(*bd_entry_ptr))) - return -EFAULT; - - while (1) { - int need_write = 0; - - pagefault_disable(); - ret = get_user_bd_entry(mm, &bd_entry, bd_entry_ptr); - pagefault_enable(); - if (!ret) - break; - if (ret == -EFAULT) - ret = mpx_resolve_fault(bd_entry_ptr, need_write); - /* - * If we could not resolve the fault, consider it - * userspace's fault and error out. - */ - if (ret) - return ret; - } - - valid_bit = bd_entry & MPX_BD_ENTRY_VALID_FLAG; - bt_addr = mpx_bd_entry_to_bt_addr(mm, bd_entry); - - /* - * When the kernel is managing bounds tables, a bounds directory - * entry will either have a valid address (plus the valid bit) - * *OR* be completely empty. If we see a !valid entry *and* some - * data in the address field, we know something is wrong. This - * -EINVAL return will cause a SIGSEGV. - */ - if (!valid_bit && bt_addr) - return -EINVAL; - /* - * Do we have an completely zeroed bt entry? That is OK. It - * just means there was no bounds table for this memory. Make - * sure to distinguish this from -EINVAL, which will cause - * a SEGV. - */ - if (!valid_bit) - return -ENOENT; - - *bt_addr_result = bt_addr; - return 0; -} - -static inline int bt_entry_size_bytes(struct mm_struct *mm) -{ - if (is_64bit_mm(mm)) - return MPX_BT_ENTRY_BYTES_64; - else - return MPX_BT_ENTRY_BYTES_32; -} - -/* - * Take a virtual address and turns it in to the offset in bytes - * inside of the bounds table where the bounds table entry - * controlling 'addr' can be found. - */ -static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm, - unsigned long addr) -{ - unsigned long bt_table_nr_entries; - unsigned long offset = addr; - - if (is_64bit_mm(mm)) { - /* Bottom 3 bits are ignored on 64-bit */ - offset >>= 3; - bt_table_nr_entries = MPX_BT_NR_ENTRIES_64; - } else { - /* Bottom 2 bits are ignored on 32-bit */ - offset >>= 2; - bt_table_nr_entries = MPX_BT_NR_ENTRIES_32; - } - /* - * We know the size of the table in to which we are - * indexing, and we have eliminated all the low bits - * which are ignored for indexing. - * - * Mask out all the high bits which we do not need - * to index in to the table. Note that the tables - * are always powers of two so this gives us a proper - * mask. - */ - offset &= (bt_table_nr_entries-1); - /* - * We now have an entry offset in terms of *entries* in - * the table. We need to scale it back up to bytes. - */ - offset *= bt_entry_size_bytes(mm); - return offset; -} - -/* - * How much virtual address space does a single bounds - * directory entry cover? - * - * Note, we need a long long because 4GB doesn't fit in - * to a long on 32-bit. - */ -static inline unsigned long bd_entry_virt_space(struct mm_struct *mm) -{ - unsigned long long virt_space; - unsigned long long GB = (1ULL << 30); - - /* - * This covers 32-bit emulation as well as 32-bit kernels - * running on 64-bit hardware. - */ - if (!is_64bit_mm(mm)) - return (4ULL * GB) / MPX_BD_NR_ENTRIES_32; - - /* - * 'x86_virt_bits' returns what the hardware is capable - * of, and returns the full >32-bit address space when - * running 32-bit kernels on 64-bit hardware. - */ - virt_space = (1ULL << boot_cpu_data.x86_virt_bits); - return virt_space / MPX_BD_NR_ENTRIES_64; -} - -/* - * Free the backing physical pages of bounds table 'bt_addr'. - * Assume start...end is within that bounds table. - */ -static noinline int zap_bt_entries_mapping(struct mm_struct *mm, - unsigned long bt_addr, - unsigned long start_mapping, unsigned long end_mapping) -{ - struct vm_area_struct *vma; - unsigned long addr, len; - unsigned long start; - unsigned long end; - - /* - * if we 'end' on a boundary, the offset will be 0 which - * is not what we want. Back it up a byte to get the - * last bt entry. Then once we have the entry itself, - * move 'end' back up by the table entry size. - */ - start = bt_addr + mpx_get_bt_entry_offset_bytes(mm, start_mapping); - end = bt_addr + mpx_get_bt_entry_offset_bytes(mm, end_mapping - 1); - /* - * Move end back up by one entry. Among other things - * this ensures that it remains page-aligned and does - * not screw up zap_page_range() - */ - end += bt_entry_size_bytes(mm); - - /* - * Find the first overlapping vma. If vma->vm_start > start, there - * will be a hole in the bounds table. This -EINVAL return will - * cause a SIGSEGV. - */ - vma = find_vma(mm, start); - if (!vma || vma->vm_start > start) - return -EINVAL; - - /* - * A NUMA policy on a VM_MPX VMA could cause this bounds table to - * be split. So we need to look across the entire 'start -> end' - * range of this bounds table, find all of the VM_MPX VMAs, and - * zap only those. - */ - addr = start; - while (vma && vma->vm_start < end) { - /* - * We followed a bounds directory entry down - * here. If we find a non-MPX VMA, that's bad, - * so stop immediately and return an error. This - * probably results in a SIGSEGV. - */ - if (!(vma->vm_flags & VM_MPX)) - return -EINVAL; - - len = min(vma->vm_end, end) - addr; - zap_page_range(vma, addr, len); - trace_mpx_unmap_zap(addr, addr+len); - - vma = vma->vm_next; - addr = vma->vm_start; - } - return 0; -} - -static unsigned long mpx_get_bd_entry_offset(struct mm_struct *mm, - unsigned long addr) -{ - /* - * There are several ways to derive the bd offsets. We - * use the following approach here: - * 1. We know the size of the virtual address space - * 2. We know the number of entries in a bounds table - * 3. We know that each entry covers a fixed amount of - * virtual address space. - * So, we can just divide the virtual address by the - * virtual space used by one entry to determine which - * entry "controls" the given virtual address. - */ - if (is_64bit_mm(mm)) { - int bd_entry_size = 8; /* 64-bit pointer */ - /* - * Take the 64-bit addressing hole in to account. - */ - addr &= ((1UL << boot_cpu_data.x86_virt_bits) - 1); - return (addr / bd_entry_virt_space(mm)) * bd_entry_size; - } else { - int bd_entry_size = 4; /* 32-bit pointer */ - /* - * 32-bit has no hole so this case needs no mask - */ - return (addr / bd_entry_virt_space(mm)) * bd_entry_size; - } - /* - * The two return calls above are exact copies. If we - * pull out a single copy and put it in here, gcc won't - * realize that we're doing a power-of-2 divide and use - * shifts. It uses a real divide. If we put them up - * there, it manages to figure it out (gcc 4.8.3). - */ -} - -static int unmap_entire_bt(struct mm_struct *mm, - long __user *bd_entry, unsigned long bt_addr) -{ - unsigned long expected_old_val = bt_addr | MPX_BD_ENTRY_VALID_FLAG; - unsigned long uninitialized_var(actual_old_val); - int ret; - - while (1) { - int need_write = 1; - unsigned long cleared_bd_entry = 0; - - pagefault_disable(); - ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, - bd_entry, expected_old_val, cleared_bd_entry); - pagefault_enable(); - if (!ret) - break; - if (ret == -EFAULT) - ret = mpx_resolve_fault(bd_entry, need_write); - /* - * If we could not resolve the fault, consider it - * userspace's fault and error out. - */ - if (ret) - return ret; - } - /* - * The cmpxchg was performed, check the results. - */ - if (actual_old_val != expected_old_val) { - /* - * Someone else raced with us to unmap the table. - * That is OK, since we were both trying to do - * the same thing. Declare success. - */ - if (!actual_old_val) - return 0; - /* - * Something messed with the bounds directory - * entry. We hold mmap_sem for read or write - * here, so it could not be a _new_ bounds table - * that someone just allocated. Something is - * wrong, so pass up the error and SIGSEGV. - */ - return -EINVAL; - } - /* - * Note, we are likely being called under do_munmap() already. To - * avoid recursion, do_munmap() will check whether it comes - * from one bounds table through VM_MPX flag. - */ - return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm), NULL); -} - -static int try_unmap_single_bt(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - struct vm_area_struct *next; - struct vm_area_struct *prev; - /* - * "bta" == Bounds Table Area: the area controlled by the - * bounds table that we are unmapping. - */ - unsigned long bta_start_vaddr = start & ~(bd_entry_virt_space(mm)-1); - unsigned long bta_end_vaddr = bta_start_vaddr + bd_entry_virt_space(mm); - unsigned long uninitialized_var(bt_addr); - void __user *bde_vaddr; - int ret; - /* - * We already unlinked the VMAs from the mm's rbtree so 'start' - * is guaranteed to be in a hole. This gets us the first VMA - * before the hole in to 'prev' and the next VMA after the hole - * in to 'next'. - */ - next = find_vma_prev(mm, start, &prev); - /* - * Do not count other MPX bounds table VMAs as neighbors. - * Although theoretically possible, we do not allow bounds - * tables for bounds tables so our heads do not explode. - * If we count them as neighbors here, we may end up with - * lots of tables even though we have no actual table - * entries in use. - */ - while (next && (next->vm_flags & VM_MPX)) - next = next->vm_next; - while (prev && (prev->vm_flags & VM_MPX)) - prev = prev->vm_prev; - /* - * We know 'start' and 'end' lie within an area controlled - * by a single bounds table. See if there are any other - * VMAs controlled by that bounds table. If there are not - * then we can "expand" the are we are unmapping to possibly - * cover the entire table. - */ - next = find_vma_prev(mm, start, &prev); - if ((!prev || prev->vm_end <= bta_start_vaddr) && - (!next || next->vm_start >= bta_end_vaddr)) { - /* - * No neighbor VMAs controlled by same bounds - * table. Try to unmap the whole thing - */ - start = bta_start_vaddr; - end = bta_end_vaddr; - } - - bde_vaddr = mm->context.bd_addr + mpx_get_bd_entry_offset(mm, start); - ret = get_bt_addr(mm, bde_vaddr, &bt_addr); - /* - * No bounds table there, so nothing to unmap. - */ - if (ret == -ENOENT) { - ret = 0; - return 0; - } - if (ret) - return ret; - /* - * We are unmapping an entire table. Either because the - * unmap that started this whole process was large enough - * to cover an entire table, or that the unmap was small - * but was the area covered by a bounds table. - */ - if ((start == bta_start_vaddr) && - (end == bta_end_vaddr)) - return unmap_entire_bt(mm, bde_vaddr, bt_addr); - return zap_bt_entries_mapping(mm, bt_addr, start, end); -} - -static int mpx_unmap_tables(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - unsigned long one_unmap_start; - trace_mpx_unmap_search(start, end); - - one_unmap_start = start; - while (one_unmap_start < end) { - int ret; - unsigned long next_unmap_start = ALIGN(one_unmap_start+1, - bd_entry_virt_space(mm)); - unsigned long one_unmap_end = end; - /* - * if the end is beyond the current bounds table, - * move it back so we only deal with a single one - * at a time - */ - if (one_unmap_end > next_unmap_start) - one_unmap_end = next_unmap_start; - ret = try_unmap_single_bt(mm, one_unmap_start, one_unmap_end); - if (ret) - return ret; - - one_unmap_start = next_unmap_start; - } - return 0; -} - -/* - * Free unused bounds tables covered in a virtual address region being - * munmap()ed. Assume end > start. - * - * This function will be called by do_munmap(), and the VMAs covering - * the virtual address region start...end have already been split if - * necessary, and the 'vma' is the first vma in this range (start -> end). - */ -void mpx_notify_unmap(struct mm_struct *mm, unsigned long start, - unsigned long end) -{ - struct vm_area_struct *vma; - int ret; - - /* - * Refuse to do anything unless userspace has asked - * the kernel to help manage the bounds tables, - */ - if (!kernel_managing_mpx_tables(current->mm)) - return; - /* - * This will look across the entire 'start -> end' range, - * and find all of the non-VM_MPX VMAs. - * - * To avoid recursion, if a VM_MPX vma is found in the range - * (start->end), we will not continue follow-up work. This - * recursion represents having bounds tables for bounds tables, - * which should not occur normally. Being strict about it here - * helps ensure that we do not have an exploitable stack overflow. - */ - vma = find_vma(mm, start); - while (vma && vma->vm_start < end) { - if (vma->vm_flags & VM_MPX) - return; - vma = vma->vm_next; - } - - ret = mpx_unmap_tables(mm, start, end); - if (ret) - force_sig(SIGSEGV); -} - -/* MPX cannot handle addresses above 47 bits yet. */ -unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len, - unsigned long flags) -{ - if (!kernel_managing_mpx_tables(current->mm)) - return addr; - if (addr + len <= DEFAULT_MAP_WINDOW) - return addr; - if (flags & MAP_FIXED) - return -ENOMEM; - - /* - * Requested len is larger than the whole area we're allowed to map in. - * Resetting hinting address wouldn't do much good -- fail early. - */ - if (len > DEFAULT_MAP_WINDOW) - return -ENOMEM; - - /* Look for unmap area within DEFAULT_MAP_WINDOW */ - return 0; -} diff --git a/arch/x86/mm/pat/Makefile b/arch/x86/mm/pat/Makefile new file mode 100644 index 000000000000..ea464c995161 --- /dev/null +++ b/arch/x86/mm/pat/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-y := set_memory.o memtype.o + +obj-$(CONFIG_X86_PAT) += memtype_interval.o diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pat/cpa-test.c index facce271e8b9..facce271e8b9 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pat/cpa-test.c diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat/memtype.c index 2d758e19ef22..394be8611748 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat/memtype.c @@ -1,11 +1,34 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Handle caching attributes in page tables (PAT) + * Page Attribute Table (PAT) support: handle memory caching attributes in page tables. * * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> * Suresh B Siddha <suresh.b.siddha@intel.com> * * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen. + * + * Basic principles: + * + * PAT is a CPU feature supported by all modern x86 CPUs, to allow the firmware and + * the kernel to set one of a handful of 'caching type' attributes for physical + * memory ranges: uncached, write-combining, write-through, write-protected, + * and the most commonly used and default attribute: write-back caching. + * + * PAT support supercedes and augments MTRR support in a compatible fashion: MTRR is + * a hardware interface to enumerate a limited number of physical memory ranges + * and set their caching attributes explicitly, programmed into the CPU via MSRs. + * Even modern CPUs have MTRRs enabled - but these are typically not touched + * by the kernel or by user-space (such as the X server), we rely on PAT for any + * additional cache attribute logic. + * + * PAT doesn't work via explicit memory ranges, but uses page table entries to add + * cache attribute information to the mapped memory range: there's 3 bits used, + * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT), with the 8 possible values mapped by the + * CPU to actual cache attributes via an MSR loaded into the CPU (MSR_IA32_CR_PAT). + * + * ( There's a metric ton of finer details, such as compatibility with CPU quirks + * that only support 4 types of PAT entries, and interaction with MTRRs, see + * below for details. ) */ #include <linux/seq_file.h> @@ -29,44 +52,48 @@ #include <asm/mtrr.h> #include <asm/page.h> #include <asm/msr.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/io.h> -#include "pat_internal.h" -#include "mm_internal.h" +#include "memtype.h" +#include "../mm_internal.h" #undef pr_fmt #define pr_fmt(fmt) "" fmt -static bool __read_mostly boot_cpu_done; +static bool __read_mostly pat_bp_initialized; static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT); -static bool __read_mostly pat_initialized; -static bool __read_mostly init_cm_done; +static bool __read_mostly pat_bp_enabled; +static bool __read_mostly pat_cm_initialized; -void pat_disable(const char *reason) +/* + * PAT support is enabled by default, but can be disabled for + * various user-requested or hardware-forced reasons: + */ +void pat_disable(const char *msg_reason) { if (pat_disabled) return; - if (boot_cpu_done) { + if (pat_bp_initialized) { WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n"); return; } pat_disabled = true; - pr_info("x86/PAT: %s\n", reason); + pr_info("x86/PAT: %s\n", msg_reason); } static int __init nopat(char *str) { - pat_disable("PAT support disabled."); + pat_disable("PAT support disabled via boot option."); return 0; } early_param("nopat", nopat); bool pat_enabled(void) { - return pat_initialized; + return pat_bp_enabled; } EXPORT_SYMBOL_GPL(pat_enabled); @@ -197,6 +224,8 @@ static void __init_cache_modes(u64 pat) char pat_msg[33]; int i; + WARN_ON_ONCE(pat_cm_initialized); + pat_msg[32] = 0; for (i = 7; i >= 0; i--) { cache = pat_get_cache_mode((pat >> (i * 8)) & 7, @@ -205,28 +234,28 @@ static void __init_cache_modes(u64 pat) } pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg); - init_cm_done = true; + pat_cm_initialized = true; } #define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) -static void pat_bsp_init(u64 pat) +static void pat_bp_init(u64 pat) { u64 tmp_pat; if (!boot_cpu_has(X86_FEATURE_PAT)) { - pat_disable("PAT not supported by CPU."); + pat_disable("PAT not supported by the CPU."); return; } rdmsrl(MSR_IA32_CR_PAT, tmp_pat); if (!tmp_pat) { - pat_disable("PAT MSR is 0, disabled."); + pat_disable("PAT support disabled by the firmware."); return; } wrmsrl(MSR_IA32_CR_PAT, pat); - pat_initialized = true; + pat_bp_enabled = true; __init_cache_modes(pat); } @@ -248,7 +277,7 @@ void init_cache_modes(void) { u64 pat = 0; - if (init_cm_done) + if (pat_cm_initialized) return; if (boot_cpu_has(X86_FEATURE_PAT)) { @@ -291,7 +320,7 @@ void init_cache_modes(void) } /** - * pat_init - Initialize PAT MSR and PAT table + * pat_init - Initialize the PAT MSR and PAT table on the current CPU * * This function initializes PAT MSR and PAT table with an OS-defined value * to enable additional cache attributes, WC, WT and WP. @@ -305,6 +334,10 @@ void pat_init(void) u64 pat; struct cpuinfo_x86 *c = &boot_cpu_data; +#ifndef CONFIG_X86_PAT + pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n"); +#endif + if (pat_disabled) return; @@ -364,9 +397,9 @@ void pat_init(void) PAT(4, WB) | PAT(5, WP) | PAT(6, UC_MINUS) | PAT(7, WT); } - if (!boot_cpu_done) { - pat_bsp_init(pat); - boot_cpu_done = true; + if (!pat_bp_initialized) { + pat_bp_init(pat); + pat_bp_initialized = true; } else { pat_ap_init(pat); } @@ -542,10 +575,10 @@ static u64 sanitize_phys(u64 address) * available type in new_type in case of no error. In case of any error * it will return a negative return value. */ -int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, +int memtype_reserve(u64 start, u64 end, enum page_cache_mode req_type, enum page_cache_mode *new_type) { - struct memtype *new; + struct memtype *entry_new; enum page_cache_mode actual_type; int is_range_ram; int err = 0; @@ -593,22 +626,22 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, return -EINVAL; } - new = kzalloc(sizeof(struct memtype), GFP_KERNEL); - if (!new) + entry_new = kzalloc(sizeof(struct memtype), GFP_KERNEL); + if (!entry_new) return -ENOMEM; - new->start = start; - new->end = end; - new->type = actual_type; + entry_new->start = start; + entry_new->end = end; + entry_new->type = actual_type; spin_lock(&memtype_lock); - err = memtype_check_insert(new, new_type); + err = memtype_check_insert(entry_new, new_type); if (err) { - pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n", + pr_info("x86/PAT: memtype_reserve failed [mem %#010Lx-%#010Lx], track %s, req %s\n", start, end - 1, - cattr_name(new->type), cattr_name(req_type)); - kfree(new); + cattr_name(entry_new->type), cattr_name(req_type)); + kfree(entry_new); spin_unlock(&memtype_lock); return err; @@ -616,18 +649,17 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, spin_unlock(&memtype_lock); - dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n", - start, end - 1, cattr_name(new->type), cattr_name(req_type), + dprintk("memtype_reserve added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n", + start, end - 1, cattr_name(entry_new->type), cattr_name(req_type), new_type ? cattr_name(*new_type) : "-"); return err; } -int free_memtype(u64 start, u64 end) +int memtype_free(u64 start, u64 end) { - int err = -EINVAL; int is_range_ram; - struct memtype *entry; + struct memtype *entry_old; if (!pat_enabled()) return 0; @@ -640,28 +672,24 @@ int free_memtype(u64 start, u64 end) return 0; is_range_ram = pat_pagerange_is_ram(start, end); - if (is_range_ram == 1) { - - err = free_ram_pages_type(start, end); - - return err; - } else if (is_range_ram < 0) { + if (is_range_ram == 1) + return free_ram_pages_type(start, end); + if (is_range_ram < 0) return -EINVAL; - } spin_lock(&memtype_lock); - entry = memtype_erase(start, end); + entry_old = memtype_erase(start, end); spin_unlock(&memtype_lock); - if (IS_ERR(entry)) { + if (IS_ERR(entry_old)) { pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n", current->comm, current->pid, start, end - 1); return -EINVAL; } - kfree(entry); + kfree(entry_old); - dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1); + dprintk("memtype_free request [mem %#010Lx-%#010Lx]\n", start, end - 1); return 0; } @@ -700,6 +728,7 @@ static enum page_cache_mode lookup_memtype(u64 paddr) rettype = _PAGE_CACHE_MODE_UC_MINUS; spin_unlock(&memtype_lock); + return rettype; } @@ -723,7 +752,7 @@ bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn) EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr); /** - * io_reserve_memtype - Request a memory type mapping for a region of memory + * memtype_reserve_io - Request a memory type mapping for a region of memory * @start: start (physical address) of the region * @end: end (physical address) of the region * @type: A pointer to memtype, with requested type. On success, requested @@ -732,7 +761,7 @@ EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr); * On success, returns 0 * On failure, returns non-zero */ -int io_reserve_memtype(resource_size_t start, resource_size_t end, +int memtype_reserve_io(resource_size_t start, resource_size_t end, enum page_cache_mode *type) { resource_size_t size = end - start; @@ -742,47 +771,47 @@ int io_reserve_memtype(resource_size_t start, resource_size_t end, WARN_ON_ONCE(iomem_map_sanity_check(start, size)); - ret = reserve_memtype(start, end, req_type, &new_type); + ret = memtype_reserve(start, end, req_type, &new_type); if (ret) goto out_err; if (!is_new_memtype_allowed(start, size, req_type, new_type)) goto out_free; - if (kernel_map_sync_memtype(start, size, new_type) < 0) + if (memtype_kernel_map_sync(start, size, new_type) < 0) goto out_free; *type = new_type; return 0; out_free: - free_memtype(start, end); + memtype_free(start, end); ret = -EBUSY; out_err: return ret; } /** - * io_free_memtype - Release a memory type mapping for a region of memory + * memtype_free_io - Release a memory type mapping for a region of memory * @start: start (physical address) of the region * @end: end (physical address) of the region */ -void io_free_memtype(resource_size_t start, resource_size_t end) +void memtype_free_io(resource_size_t start, resource_size_t end) { - free_memtype(start, end); + memtype_free(start, end); } int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size) { enum page_cache_mode type = _PAGE_CACHE_MODE_WC; - return io_reserve_memtype(start, start + size, &type); + return memtype_reserve_io(start, start + size, &type); } EXPORT_SYMBOL(arch_io_reserve_memtype_wc); void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size) { - io_free_memtype(start, start + size); + memtype_free_io(start, start + size); } EXPORT_SYMBOL(arch_io_free_memtype_wc); @@ -839,10 +868,10 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, } /* - * Change the memory type for the physial address range in kernel identity + * Change the memory type for the physical address range in kernel identity * mapping space if that range is a part of identity map. */ -int kernel_map_sync_memtype(u64 base, unsigned long size, +int memtype_kernel_map_sync(u64 base, unsigned long size, enum page_cache_mode pcm) { unsigned long id_sz; @@ -851,15 +880,14 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, return 0; /* - * some areas in the middle of the kernel identity range - * are not mapped, like the PCI space. + * Some areas in the middle of the kernel identity range + * are not mapped, for example the PCI space. */ if (!page_is_ram(base >> PAGE_SHIFT)) return 0; id_sz = (__pa(high_memory-1) <= base + size) ? - __pa(high_memory) - base : - size; + __pa(high_memory) - base : size; if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) { pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n", @@ -873,7 +901,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, /* * Internal interface to reserve a range of physical memory with prot. - * Reserved non RAM regions only and after successful reserve_memtype, + * Reserved non RAM regions only and after successful memtype_reserve, * this func also keeps identity mapping (if any) in sync with this new prot. */ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, @@ -910,14 +938,14 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, return 0; } - ret = reserve_memtype(paddr, paddr + size, want_pcm, &pcm); + ret = memtype_reserve(paddr, paddr + size, want_pcm, &pcm); if (ret) return ret; if (pcm != want_pcm) { if (strict_prot || !is_new_memtype_allowed(paddr, size, want_pcm, pcm)) { - free_memtype(paddr, paddr + size); + memtype_free(paddr, paddr + size); pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n", current->comm, current->pid, cattr_name(want_pcm), @@ -935,8 +963,8 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, cachemode2protval(pcm)); } - if (kernel_map_sync_memtype(paddr, size, pcm) < 0) { - free_memtype(paddr, paddr + size); + if (memtype_kernel_map_sync(paddr, size, pcm) < 0) { + memtype_free(paddr, paddr + size); return -EINVAL; } return 0; @@ -952,7 +980,7 @@ static void free_pfn_range(u64 paddr, unsigned long size) is_ram = pat_pagerange_is_ram(paddr, paddr + size); if (is_ram == 0) - free_memtype(paddr, paddr + size); + memtype_free(paddr, paddr + size); } /* @@ -1099,25 +1127,30 @@ EXPORT_SYMBOL_GPL(pgprot_writethrough); #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) +/* + * We are allocating a temporary printout-entry to be passed + * between seq_start()/next() and seq_show(): + */ static struct memtype *memtype_get_idx(loff_t pos) { - struct memtype *print_entry; + struct memtype *entry_print; int ret; - print_entry = kzalloc(sizeof(struct memtype), GFP_KERNEL); - if (!print_entry) + entry_print = kzalloc(sizeof(struct memtype), GFP_KERNEL); + if (!entry_print) return NULL; spin_lock(&memtype_lock); - ret = memtype_copy_nth_element(print_entry, pos); + ret = memtype_copy_nth_element(entry_print, pos); spin_unlock(&memtype_lock); - if (!ret) { - return print_entry; - } else { - kfree(print_entry); + /* Free it on error: */ + if (ret) { + kfree(entry_print); return NULL; } + + return entry_print; } static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) @@ -1142,11 +1175,14 @@ static void memtype_seq_stop(struct seq_file *seq, void *v) static int memtype_seq_show(struct seq_file *seq, void *v) { - struct memtype *print_entry = (struct memtype *)v; + struct memtype *entry_print = (struct memtype *)v; + + seq_printf(seq, "PAT: [mem 0x%016Lx-0x%016Lx] %s\n", + entry_print->start, + entry_print->end, + cattr_name(entry_print->type)); - seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type), - print_entry->start, print_entry->end); - kfree(print_entry); + kfree(entry_print); return 0; } @@ -1178,7 +1214,6 @@ static int __init pat_memtype_list_init(void) } return 0; } - late_initcall(pat_memtype_list_init); #endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */ diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat/memtype.h index 79a06684349e..cacecdbceb55 100644 --- a/arch/x86/mm/pat_internal.h +++ b/arch/x86/mm/pat/memtype.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PAT_INTERNAL_H_ -#define __PAT_INTERNAL_H_ +#ifndef __MEMTYPE_H_ +#define __MEMTYPE_H_ extern int pat_debug_enable; @@ -29,13 +29,13 @@ static inline char *cattr_name(enum page_cache_mode pcm) } #ifdef CONFIG_X86_PAT -extern int memtype_check_insert(struct memtype *new, +extern int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *new_type); extern struct memtype *memtype_erase(u64 start, u64 end); extern struct memtype *memtype_lookup(u64 addr); -extern int memtype_copy_nth_element(struct memtype *out, loff_t pos); +extern int memtype_copy_nth_element(struct memtype *entry_out, loff_t pos); #else -static inline int memtype_check_insert(struct memtype *new, +static inline int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *new_type) { return 0; } static inline struct memtype *memtype_erase(u64 start, u64 end) @@ -46,4 +46,4 @@ static inline int memtype_copy_nth_element(struct memtype *out, loff_t pos) { return 0; } #endif -#endif /* __PAT_INTERNAL_H_ */ +#endif /* __MEMTYPE_H_ */ diff --git a/arch/x86/mm/pat/memtype_interval.c b/arch/x86/mm/pat/memtype_interval.c new file mode 100644 index 000000000000..a07e4882bf36 --- /dev/null +++ b/arch/x86/mm/pat/memtype_interval.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Handle caching attributes in page tables (PAT) + * + * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> + * Suresh B Siddha <suresh.b.siddha@intel.com> + * + * Interval tree used to store the PAT memory type reservations. + */ + +#include <linux/seq_file.h> +#include <linux/debugfs.h> +#include <linux/kernel.h> +#include <linux/interval_tree_generic.h> +#include <linux/sched.h> +#include <linux/gfp.h> + +#include <asm/pgtable.h> +#include <asm/memtype.h> + +#include "memtype.h" + +/* + * The memtype tree keeps track of memory type for specific + * physical memory areas. Without proper tracking, conflicting memory + * types in different mappings can cause CPU cache corruption. + * + * The tree is an interval tree (augmented rbtree) which tree is ordered + * by the starting address. The tree can contain multiple entries for + * different regions which overlap. All the aliases have the same + * cache attributes of course, as enforced by the PAT logic. + * + * memtype_lock protects the rbtree. + */ + +static inline u64 interval_start(struct memtype *entry) +{ + return entry->start; +} + +static inline u64 interval_end(struct memtype *entry) +{ + return entry->end - 1; +} + +INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end, + interval_start, interval_end, + static, interval) + +static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED; + +enum { + MEMTYPE_EXACT_MATCH = 0, + MEMTYPE_END_MATCH = 1 +}; + +static struct memtype *memtype_match(u64 start, u64 end, int match_type) +{ + struct memtype *entry_match; + + entry_match = interval_iter_first(&memtype_rbroot, start, end-1); + + while (entry_match != NULL && entry_match->start < end) { + if ((match_type == MEMTYPE_EXACT_MATCH) && + (entry_match->start == start) && (entry_match->end == end)) + return entry_match; + + if ((match_type == MEMTYPE_END_MATCH) && + (entry_match->start < start) && (entry_match->end == end)) + return entry_match; + + entry_match = interval_iter_next(entry_match, start, end-1); + } + + return NULL; /* Returns NULL if there is no match */ +} + +static int memtype_check_conflict(u64 start, u64 end, + enum page_cache_mode reqtype, + enum page_cache_mode *newtype) +{ + struct memtype *entry_match; + enum page_cache_mode found_type = reqtype; + + entry_match = interval_iter_first(&memtype_rbroot, start, end-1); + if (entry_match == NULL) + goto success; + + if (entry_match->type != found_type && newtype == NULL) + goto failure; + + dprintk("Overlap at 0x%Lx-0x%Lx\n", entry_match->start, entry_match->end); + found_type = entry_match->type; + + entry_match = interval_iter_next(entry_match, start, end-1); + while (entry_match) { + if (entry_match->type != found_type) + goto failure; + + entry_match = interval_iter_next(entry_match, start, end-1); + } +success: + if (newtype) + *newtype = found_type; + + return 0; + +failure: + pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n", + current->comm, current->pid, start, end, + cattr_name(found_type), cattr_name(entry_match->type)); + + return -EBUSY; +} + +int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *ret_type) +{ + int err = 0; + + err = memtype_check_conflict(entry_new->start, entry_new->end, entry_new->type, ret_type); + if (err) + return err; + + if (ret_type) + entry_new->type = *ret_type; + + interval_insert(entry_new, &memtype_rbroot); + return 0; +} + +struct memtype *memtype_erase(u64 start, u64 end) +{ + struct memtype *entry_old; + + /* + * Since the memtype_rbroot tree allows overlapping ranges, + * memtype_erase() checks with EXACT_MATCH first, i.e. free + * a whole node for the munmap case. If no such entry is found, + * it then checks with END_MATCH, i.e. shrink the size of a node + * from the end for the mremap case. + */ + entry_old = memtype_match(start, end, MEMTYPE_EXACT_MATCH); + if (!entry_old) { + entry_old = memtype_match(start, end, MEMTYPE_END_MATCH); + if (!entry_old) + return ERR_PTR(-EINVAL); + } + + if (entry_old->start == start) { + /* munmap: erase this node */ + interval_remove(entry_old, &memtype_rbroot); + } else { + /* mremap: update the end value of this node */ + interval_remove(entry_old, &memtype_rbroot); + entry_old->end = start; + interval_insert(entry_old, &memtype_rbroot); + + return NULL; + } + + return entry_old; +} + +struct memtype *memtype_lookup(u64 addr) +{ + return interval_iter_first(&memtype_rbroot, addr, addr + PAGE_SIZE-1); +} + +/* + * Debugging helper, copy the Nth entry of the tree into a + * a copy for printout. This allows us to print out the tree + * via debugfs, without holding the memtype_lock too long: + */ +#ifdef CONFIG_DEBUG_FS +int memtype_copy_nth_element(struct memtype *entry_out, loff_t pos) +{ + struct memtype *entry_match; + int i = 1; + + entry_match = interval_iter_first(&memtype_rbroot, 0, ULONG_MAX); + + while (entry_match && pos != i) { + entry_match = interval_iter_next(entry_match, 0, ULONG_MAX); + i++; + } + + if (entry_match) { /* pos == i */ + *entry_out = *entry_match; + return 0; + } else { + return 1; + } +} +#endif diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pat/set_memory.c index 1b99ad05b117..c4aedd00c1ba 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pat/set_memory.c @@ -24,10 +24,10 @@ #include <linux/uaccess.h> #include <asm/pgalloc.h> #include <asm/proto.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/set_memory.h> -#include "mm_internal.h" +#include "../mm_internal.h" /* * The current flushing context - we pass it instead of 5 arguments: @@ -331,7 +331,7 @@ static void cpa_flush_all(unsigned long cache) on_each_cpu(__cpa_flush_all, (void *) cache, 1); } -void __cpa_flush_tlb(void *data) +static void __cpa_flush_tlb(void *data) { struct cpa_data *cpa = data; unsigned int i; @@ -618,6 +618,17 @@ pte_t *lookup_address(unsigned long address, unsigned int *level) } EXPORT_SYMBOL_GPL(lookup_address); +/* + * Lookup the page table entry for a virtual address in a given mm. Return a + * pointer to the entry and the level of the mapping. + */ +pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address, + unsigned int *level) +{ + return lookup_address_in_pgd(pgd_offset(mm, address), address, level); +} +EXPORT_SYMBOL_GPL(lookup_address_in_mm); + static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address, unsigned int *level) { @@ -1801,7 +1812,7 @@ int set_memory_uc(unsigned long addr, int numpages) /* * for now UC MINUS. see comments in ioremap() */ - ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, + ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, _PAGE_CACHE_MODE_UC_MINUS, NULL); if (ret) goto out_err; @@ -1813,7 +1824,7 @@ int set_memory_uc(unsigned long addr, int numpages) return 0; out_free: - free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); + memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); out_err: return ret; } @@ -1839,14 +1850,14 @@ int set_memory_wc(unsigned long addr, int numpages) { int ret; - ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, + ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, _PAGE_CACHE_MODE_WC, NULL); if (ret) return ret; ret = _set_memory_wc(addr, numpages); if (ret) - free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); + memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); return ret; } @@ -1873,7 +1884,7 @@ int set_memory_wb(unsigned long addr, int numpages) if (ret) return ret; - free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); + memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); return 0; } EXPORT_SYMBOL(set_memory_wb); @@ -2014,7 +2025,7 @@ static int _set_pages_array(struct page **pages, int numpages, continue; start = page_to_pfn(pages[i]) << PAGE_SHIFT; end = start + PAGE_SIZE; - if (reserve_memtype(start, end, new_type, NULL)) + if (memtype_reserve(start, end, new_type, NULL)) goto err_out; } @@ -2040,7 +2051,7 @@ err_out: continue; start = page_to_pfn(pages[i]) << PAGE_SHIFT; end = start + PAGE_SIZE; - free_memtype(start, end); + memtype_free(start, end); } return -EINVAL; } @@ -2089,7 +2100,7 @@ int set_pages_array_wb(struct page **pages, int numpages) continue; start = page_to_pfn(pages[i]) << PAGE_SHIFT; end = start + PAGE_SIZE; - free_memtype(start, end); + memtype_free(start, end); } return 0; @@ -2215,7 +2226,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, .pgd = pgd, .numpages = numpages, .mask_set = __pgprot(0), - .mask_clr = __pgprot(0), + .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)), .flags = 0, }; @@ -2224,12 +2235,6 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, if (!(__supported_pte_mask & _PAGE_NX)) goto out; - if (!(page_flags & _PAGE_NX)) - cpa.mask_clr = __pgprot(_PAGE_NX); - - if (!(page_flags & _PAGE_RW)) - cpa.mask_clr = __pgprot(_PAGE_RW); - if (!(page_flags & _PAGE_ENC)) cpa.mask_clr = pgprot_encrypted(cpa.mask_clr); @@ -2281,5 +2286,5 @@ int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address, * be exposed to the rest of the kernel. Include these directly here. */ #ifdef CONFIG_CPA_DEBUG -#include "pageattr-test.c" +#include "cpa-test.c" #endif diff --git a/arch/x86/mm/pat_interval.c b/arch/x86/mm/pat_interval.c deleted file mode 100644 index 6855362eaf21..000000000000 --- a/arch/x86/mm/pat_interval.c +++ /dev/null @@ -1,185 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Handle caching attributes in page tables (PAT) - * - * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> - * Suresh B Siddha <suresh.b.siddha@intel.com> - * - * Interval tree used to store the PAT memory type reservations. - */ - -#include <linux/seq_file.h> -#include <linux/debugfs.h> -#include <linux/kernel.h> -#include <linux/interval_tree_generic.h> -#include <linux/sched.h> -#include <linux/gfp.h> - -#include <asm/pgtable.h> -#include <asm/pat.h> - -#include "pat_internal.h" - -/* - * The memtype tree keeps track of memory type for specific - * physical memory areas. Without proper tracking, conflicting memory - * types in different mappings can cause CPU cache corruption. - * - * The tree is an interval tree (augmented rbtree) with tree ordered - * on starting address. Tree can contain multiple entries for - * different regions which overlap. All the aliases have the same - * cache attributes of course. - * - * memtype_lock protects the rbtree. - */ -static inline u64 memtype_interval_start(struct memtype *memtype) -{ - return memtype->start; -} - -static inline u64 memtype_interval_end(struct memtype *memtype) -{ - return memtype->end - 1; -} -INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end, - memtype_interval_start, memtype_interval_end, - static, memtype_interval) - -static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED; - -enum { - MEMTYPE_EXACT_MATCH = 0, - MEMTYPE_END_MATCH = 1 -}; - -static struct memtype *memtype_match(u64 start, u64 end, int match_type) -{ - struct memtype *match; - - match = memtype_interval_iter_first(&memtype_rbroot, start, end-1); - while (match != NULL && match->start < end) { - if ((match_type == MEMTYPE_EXACT_MATCH) && - (match->start == start) && (match->end == end)) - return match; - - if ((match_type == MEMTYPE_END_MATCH) && - (match->start < start) && (match->end == end)) - return match; - - match = memtype_interval_iter_next(match, start, end-1); - } - - return NULL; /* Returns NULL if there is no match */ -} - -static int memtype_check_conflict(u64 start, u64 end, - enum page_cache_mode reqtype, - enum page_cache_mode *newtype) -{ - struct memtype *match; - enum page_cache_mode found_type = reqtype; - - match = memtype_interval_iter_first(&memtype_rbroot, start, end-1); - if (match == NULL) - goto success; - - if (match->type != found_type && newtype == NULL) - goto failure; - - dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end); - found_type = match->type; - - match = memtype_interval_iter_next(match, start, end-1); - while (match) { - if (match->type != found_type) - goto failure; - - match = memtype_interval_iter_next(match, start, end-1); - } -success: - if (newtype) - *newtype = found_type; - - return 0; - -failure: - pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n", - current->comm, current->pid, start, end, - cattr_name(found_type), cattr_name(match->type)); - return -EBUSY; -} - -int memtype_check_insert(struct memtype *new, - enum page_cache_mode *ret_type) -{ - int err = 0; - - err = memtype_check_conflict(new->start, new->end, new->type, ret_type); - if (err) - return err; - - if (ret_type) - new->type = *ret_type; - - memtype_interval_insert(new, &memtype_rbroot); - return 0; -} - -struct memtype *memtype_erase(u64 start, u64 end) -{ - struct memtype *data; - - /* - * Since the memtype_rbroot tree allows overlapping ranges, - * memtype_erase() checks with EXACT_MATCH first, i.e. free - * a whole node for the munmap case. If no such entry is found, - * it then checks with END_MATCH, i.e. shrink the size of a node - * from the end for the mremap case. - */ - data = memtype_match(start, end, MEMTYPE_EXACT_MATCH); - if (!data) { - data = memtype_match(start, end, MEMTYPE_END_MATCH); - if (!data) - return ERR_PTR(-EINVAL); - } - - if (data->start == start) { - /* munmap: erase this node */ - memtype_interval_remove(data, &memtype_rbroot); - } else { - /* mremap: update the end value of this node */ - memtype_interval_remove(data, &memtype_rbroot); - data->end = start; - memtype_interval_insert(data, &memtype_rbroot); - return NULL; - } - - return data; -} - -struct memtype *memtype_lookup(u64 addr) -{ - return memtype_interval_iter_first(&memtype_rbroot, addr, - addr + PAGE_SIZE-1); -} - -#if defined(CONFIG_DEBUG_FS) -int memtype_copy_nth_element(struct memtype *out, loff_t pos) -{ - struct memtype *match; - int i = 1; - - match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX); - while (match && pos != i) { - match = memtype_interval_iter_next(match, 0, ULONG_MAX); - i++; - } - - if (match) { /* pos == i */ - *out = *match; - return 0; - } else { - return 1; - } -} -#endif diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 9bb7f0ab9fe6..0e6700eaa4f9 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -18,6 +18,7 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/io.h> +#include <linux/vmalloc.h> unsigned int __VMALLOC_RESERVE = 128 << 20; diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c index bdc98150d4db..fc3f3d3e2ef2 100644 --- a/arch/x86/mm/physaddr.c +++ b/arch/x86/mm/physaddr.c @@ -5,6 +5,7 @@ #include <linux/mm.h> #include <asm/page.h> +#include <linux/vmalloc.h> #include "physaddr.h" diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index 92153d054d6c..bda73cb7a044 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -79,7 +79,7 @@ static void do_read_far_test(void __iomem *p) static void do_test(unsigned long size) { - void __iomem *p = ioremap_nocache(mmio_address, size); + void __iomem *p = ioremap(mmio_address, size); if (!p) { pr_err("could not ioremap, aborting.\n"); return; @@ -104,7 +104,7 @@ static void do_test_bulk_ioremapping(void) int i; for (i = 0; i < 10; ++i) { - p = ioremap_nocache(mmio_address, PAGE_SIZE); + p = ioremap(mmio_address, PAGE_SIZE); if (p) iounmap(p); } diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index e6a9edc5baaf..66f96f21a7b6 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -708,7 +708,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, (void *)info, 1); else on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func_remote, - (void *)info, 1, GFP_ATOMIC, cpumask); + (void *)info, 1, cpumask); } /* diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b8be18427277..9ba08e9abc09 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -10,10 +10,12 @@ #include <linux/if_vlan.h> #include <linux/bpf.h> #include <linux/memory.h> +#include <linux/sort.h> #include <asm/extable.h> #include <asm/set_memory.h> #include <asm/nospec-branch.h> #include <asm/text-patching.h> +#include <asm/asm-prototypes.h> static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { @@ -1326,7 +1328,7 @@ emit_jmp: return proglen; } -static void save_regs(struct btf_func_model *m, u8 **prog, int nr_args, +static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_args, int stack_size) { int i; @@ -1342,7 +1344,7 @@ static void save_regs(struct btf_func_model *m, u8 **prog, int nr_args, -(stack_size - i * 8)); } -static void restore_regs(struct btf_func_model *m, u8 **prog, int nr_args, +static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, int stack_size) { int i; @@ -1359,7 +1361,7 @@ static void restore_regs(struct btf_func_model *m, u8 **prog, int nr_args, -(stack_size - i * 8)); } -static int invoke_bpf(struct btf_func_model *m, u8 **pprog, +static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, struct bpf_prog **progs, int prog_cnt, int stack_size) { u8 *prog = *pprog; @@ -1454,7 +1456,8 @@ static int invoke_bpf(struct btf_func_model *m, u8 **pprog, * add rsp, 8 // skip eth_type_trans's frame * ret // return to its caller */ -int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags, +int arch_prepare_bpf_trampoline(void *image, void *image_end, + const struct btf_func_model *m, u32 flags, struct bpf_prog **fentry_progs, int fentry_cnt, struct bpf_prog **fexit_progs, int fexit_cnt, void *orig_call) @@ -1521,15 +1524,160 @@ int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags /* skip our return address and return to parent */ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ EMIT1(0xC3); /* ret */ - /* One half of the page has active running trampoline. - * Another half is an area for next trampoline. - * Make sure the trampoline generation logic doesn't overflow. - */ - if (WARN_ON_ONCE(prog - (u8 *)image > PAGE_SIZE / 2 - BPF_INSN_SAFETY)) + /* Make sure the trampoline generation logic doesn't overflow */ + if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) return -EFAULT; + return prog - (u8 *)image; +} + +static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) +{ + u8 *prog = *pprog; + int cnt = 0; + s64 offset; + + offset = func - (ip + 2 + 4); + if (!is_simm32(offset)) { + pr_err("Target %p is out of range\n", func); + return -EINVAL; + } + EMIT2_off32(0x0F, jmp_cond + 0x10, offset); + *pprog = prog; + return 0; +} + +static void emit_nops(u8 **pprog, unsigned int len) +{ + unsigned int i, noplen; + u8 *prog = *pprog; + int cnt = 0; + + while (len > 0) { + noplen = len; + + if (noplen > ASM_NOP_MAX) + noplen = ASM_NOP_MAX; + + for (i = 0; i < noplen; i++) + EMIT1(ideal_nops[noplen][i]); + len -= noplen; + } + + *pprog = prog; +} + +static int emit_fallback_jump(u8 **pprog) +{ + u8 *prog = *pprog; + int err = 0; + +#ifdef CONFIG_RETPOLINE + /* Note that this assumes the the compiler uses external + * thunks for indirect calls. Both clang and GCC use the same + * naming convention for external thunks. + */ + err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog); +#else + int cnt = 0; + + EMIT2(0xFF, 0xE2); /* jmp rdx */ +#endif + *pprog = prog; + return err; +} + +static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) +{ + u8 *jg_reloc, *jg_target, *prog = *pprog; + int pivot, err, jg_bytes = 1, cnt = 0; + s64 jg_offset; + + if (a == b) { + /* Leaf node of recursion, i.e. not a range of indices + * anymore. + */ + EMIT1(add_1mod(0x48, BPF_REG_3)); /* cmp rdx,func */ + if (!is_simm32(progs[a])) + return -1; + EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), + progs[a]); + err = emit_cond_near_jump(&prog, /* je func */ + (void *)progs[a], prog, + X86_JE); + if (err) + return err; + + err = emit_fallback_jump(&prog); /* jmp thunk/indirect */ + if (err) + return err; + + *pprog = prog; + return 0; + } + + /* Not a leaf node, so we pivot, and recursively descend into + * the lower and upper ranges. + */ + pivot = (b - a) / 2; + EMIT1(add_1mod(0x48, BPF_REG_3)); /* cmp rdx,func */ + if (!is_simm32(progs[a + pivot])) + return -1; + EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), progs[a + pivot]); + + if (pivot > 2) { /* jg upper_part */ + /* Require near jump. */ + jg_bytes = 4; + EMIT2_off32(0x0F, X86_JG + 0x10, 0); + } else { + EMIT2(X86_JG, 0); + } + jg_reloc = prog; + + err = emit_bpf_dispatcher(&prog, a, a + pivot, /* emit lower_part */ + progs); + if (err) + return err; + + /* From Intel 64 and IA-32 Architectures Optimization + * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler + * Coding Rule 11: All branch targets should be 16-byte + * aligned. + */ + jg_target = PTR_ALIGN(prog, 16); + if (jg_target != prog) + emit_nops(&prog, jg_target - prog); + jg_offset = prog - jg_reloc; + emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes); + + err = emit_bpf_dispatcher(&prog, a + pivot + 1, /* emit upper_part */ + b, progs); + if (err) + return err; + + *pprog = prog; return 0; } +static int cmp_ips(const void *a, const void *b) +{ + const s64 *ipa = a; + const s64 *ipb = b; + + if (*ipa > *ipb) + return 1; + if (*ipa < *ipb) + return -1; + return 0; +} + +int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs) +{ + u8 *prog = image; + + sort(funcs, num_funcs, sizeof(funcs[0]), cmp_ips, NULL); + return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs); +} + struct x64_jit_data { struct bpf_binary_header *header; int *addrs; diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 1e59df041456..df1d95913d4e 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -625,43 +625,6 @@ unsigned int pcibios_assign_all_busses(void) return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0; } -#if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS) -static LIST_HEAD(dma_domain_list); -static DEFINE_SPINLOCK(dma_domain_list_lock); - -void add_dma_domain(struct dma_domain *domain) -{ - spin_lock(&dma_domain_list_lock); - list_add(&domain->node, &dma_domain_list); - spin_unlock(&dma_domain_list_lock); -} -EXPORT_SYMBOL_GPL(add_dma_domain); - -void del_dma_domain(struct dma_domain *domain) -{ - spin_lock(&dma_domain_list_lock); - list_del(&domain->node); - spin_unlock(&dma_domain_list_lock); -} -EXPORT_SYMBOL_GPL(del_dma_domain); - -static void set_dma_domain_ops(struct pci_dev *pdev) -{ - struct dma_domain *domain; - - spin_lock(&dma_domain_list_lock); - list_for_each_entry(domain, &dma_domain_list, node) { - if (pci_domain_nr(pdev->bus) == domain->domain_nr) { - pdev->dev.dma_ops = domain->dma_ops; - break; - } - } - spin_unlock(&dma_domain_list_lock); -} -#else -static void set_dma_domain_ops(struct pci_dev *pdev) {} -#endif - static void set_dev_domain_options(struct pci_dev *pdev) { if (is_vmd(pdev->bus)) @@ -697,7 +660,6 @@ int pcibios_add_device(struct pci_dev *dev) pa_data = data->next; memunmap(data); } - set_dma_domain_ops(dev); set_dev_domain_options(dev); return 0; } @@ -736,3 +698,13 @@ int pci_ext_cfg_avail(void) else return 0; } + +#if IS_ENABLED(CONFIG_VMD) +struct pci_dev *pci_real_dma_dev(struct pci_dev *dev) +{ + if (is_vmd(dev->bus)) + return to_pci_sysdata(dev->bus)->vmd_dev; + + return dev; +} +#endif diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 9df652d3d927..fa855bbaebaf 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -34,7 +34,7 @@ #include <linux/errno.h> #include <linux/memblock.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/e820/api.h> #include <asm/pci_x86.h> #include <asm/io_apic.h> diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 887d181b769b..0c7b6e66c644 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -105,7 +105,7 @@ static void __iomem *mcfg_ioremap(struct pci_mmcfg_region *cfg) start = cfg->address + PCI_MMCFG_BUS_OFFSET(cfg->start_bus); num_buses = cfg->end_bus - cfg->start_bus + 1; size = PCI_MMCFG_BUS_OFFSET(num_buses); - addr = ioremap_nocache(start, size); + addr = ioremap(start, size); if (addr) addr -= PCI_MMCFG_BUS_OFFSET(cfg->start_bus); return addr; diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index fe29f3f5d384..84b09c230cbd 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y -OBJECT_FILES_NON_STANDARD_efi_stub_$(BITS).o := y +KASAN_SANITIZE := n +GCOV_PROFILE := n obj-$(CONFIG_EFI) += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 38d44f36d5ed..59f7f6d60cf6 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -54,8 +54,8 @@ #include <asm/x86_init.h> #include <asm/uv/uv.h> -static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; +static u64 efi_systab_phys __initdata; static efi_config_table_type_t arch_tables[] __initdata = { #ifdef CONFIG_X86_UV @@ -97,32 +97,6 @@ static int __init setup_add_efi_memmap(char *arg) } early_param("add_efi_memmap", setup_add_efi_memmap); -static efi_status_t __init phys_efi_set_virtual_address_map( - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) -{ - efi_status_t status; - unsigned long flags; - pgd_t *save_pgd; - - save_pgd = efi_call_phys_prolog(); - if (!save_pgd) - return EFI_ABORTED; - - /* Disable interrupts around EFI calls: */ - local_irq_save(flags); - status = efi_call_phys(efi_phys.set_virtual_address_map, - memory_map_size, descriptor_size, - descriptor_version, virtual_map); - local_irq_restore(flags); - - efi_call_phys_epilog(save_pgd); - - return status; -} - void __init efi_find_mirror(void) { efi_memory_desc_t *md; @@ -330,10 +304,16 @@ static void __init efi_clean_memmap(void) } if (n_removal > 0) { - u64 size = efi.memmap.nr_map - n_removal; + struct efi_memory_map_data data = { + .phys_map = efi.memmap.phys_map, + .desc_version = efi.memmap.desc_version, + .desc_size = efi.memmap.desc_size, + .size = data.desc_size * (efi.memmap.nr_map - n_removal), + .flags = 0, + }; pr_warn("Removing %d invalid memory map entries.\n", n_removal); - efi_memmap_install(efi.memmap.phys_map, size); + efi_memmap_install(&data); } } @@ -353,89 +333,90 @@ void __init efi_print_memmap(void) } } -static int __init efi_systab_init(void *phys) +static int __init efi_systab_init(u64 phys) { + int size = efi_enabled(EFI_64BIT) ? sizeof(efi_system_table_64_t) + : sizeof(efi_system_table_32_t); + bool over4g = false; + void *p; + + p = early_memremap_ro(phys, size); + if (p == NULL) { + pr_err("Couldn't map the system table!\n"); + return -ENOMEM; + } + if (efi_enabled(EFI_64BIT)) { - efi_system_table_64_t *systab64; - struct efi_setup_data *data = NULL; - u64 tmp = 0; + const efi_system_table_64_t *systab64 = p; + + efi_systab.hdr = systab64->hdr; + efi_systab.fw_vendor = systab64->fw_vendor; + efi_systab.fw_revision = systab64->fw_revision; + efi_systab.con_in_handle = systab64->con_in_handle; + efi_systab.con_in = systab64->con_in; + efi_systab.con_out_handle = systab64->con_out_handle; + efi_systab.con_out = (void *)(unsigned long)systab64->con_out; + efi_systab.stderr_handle = systab64->stderr_handle; + efi_systab.stderr = systab64->stderr; + efi_systab.runtime = (void *)(unsigned long)systab64->runtime; + efi_systab.boottime = (void *)(unsigned long)systab64->boottime; + efi_systab.nr_tables = systab64->nr_tables; + efi_systab.tables = systab64->tables; + + over4g = systab64->con_in_handle > U32_MAX || + systab64->con_in > U32_MAX || + systab64->con_out_handle > U32_MAX || + systab64->con_out > U32_MAX || + systab64->stderr_handle > U32_MAX || + systab64->stderr > U32_MAX || + systab64->boottime > U32_MAX; if (efi_setup) { - data = early_memremap(efi_setup, sizeof(*data)); - if (!data) + struct efi_setup_data *data; + + data = early_memremap_ro(efi_setup, sizeof(*data)); + if (!data) { + early_memunmap(p, size); return -ENOMEM; - } - systab64 = early_memremap((unsigned long)phys, - sizeof(*systab64)); - if (systab64 == NULL) { - pr_err("Couldn't map the system table!\n"); - if (data) - early_memunmap(data, sizeof(*data)); - return -ENOMEM; - } + } + + efi_systab.fw_vendor = (unsigned long)data->fw_vendor; + efi_systab.runtime = (void *)(unsigned long)data->runtime; + efi_systab.tables = (unsigned long)data->tables; + + over4g |= data->fw_vendor > U32_MAX || + data->runtime > U32_MAX || + data->tables > U32_MAX; - efi_systab.hdr = systab64->hdr; - efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor : - systab64->fw_vendor; - tmp |= data ? data->fw_vendor : systab64->fw_vendor; - efi_systab.fw_revision = systab64->fw_revision; - efi_systab.con_in_handle = systab64->con_in_handle; - tmp |= systab64->con_in_handle; - efi_systab.con_in = systab64->con_in; - tmp |= systab64->con_in; - efi_systab.con_out_handle = systab64->con_out_handle; - tmp |= systab64->con_out_handle; - efi_systab.con_out = systab64->con_out; - tmp |= systab64->con_out; - efi_systab.stderr_handle = systab64->stderr_handle; - tmp |= systab64->stderr_handle; - efi_systab.stderr = systab64->stderr; - tmp |= systab64->stderr; - efi_systab.runtime = data ? - (void *)(unsigned long)data->runtime : - (void *)(unsigned long)systab64->runtime; - tmp |= data ? data->runtime : systab64->runtime; - efi_systab.boottime = (void *)(unsigned long)systab64->boottime; - tmp |= systab64->boottime; - efi_systab.nr_tables = systab64->nr_tables; - efi_systab.tables = data ? (unsigned long)data->tables : - systab64->tables; - tmp |= data ? data->tables : systab64->tables; - - early_memunmap(systab64, sizeof(*systab64)); - if (data) early_memunmap(data, sizeof(*data)); -#ifdef CONFIG_X86_32 - if (tmp >> 32) { - pr_err("EFI data located above 4GB, disabling EFI.\n"); - return -EINVAL; + } else { + over4g |= systab64->fw_vendor > U32_MAX || + systab64->runtime > U32_MAX || + systab64->tables > U32_MAX; } -#endif } else { - efi_system_table_32_t *systab32; - - systab32 = early_memremap((unsigned long)phys, - sizeof(*systab32)); - if (systab32 == NULL) { - pr_err("Couldn't map the system table!\n"); - return -ENOMEM; - } - - efi_systab.hdr = systab32->hdr; - efi_systab.fw_vendor = systab32->fw_vendor; - efi_systab.fw_revision = systab32->fw_revision; - efi_systab.con_in_handle = systab32->con_in_handle; - efi_systab.con_in = systab32->con_in; - efi_systab.con_out_handle = systab32->con_out_handle; - efi_systab.con_out = systab32->con_out; - efi_systab.stderr_handle = systab32->stderr_handle; - efi_systab.stderr = systab32->stderr; - efi_systab.runtime = (void *)(unsigned long)systab32->runtime; - efi_systab.boottime = (void *)(unsigned long)systab32->boottime; - efi_systab.nr_tables = systab32->nr_tables; - efi_systab.tables = systab32->tables; - - early_memunmap(systab32, sizeof(*systab32)); + const efi_system_table_32_t *systab32 = p; + + efi_systab.hdr = systab32->hdr; + efi_systab.fw_vendor = systab32->fw_vendor; + efi_systab.fw_revision = systab32->fw_revision; + efi_systab.con_in_handle = systab32->con_in_handle; + efi_systab.con_in = systab32->con_in; + efi_systab.con_out_handle = systab32->con_out_handle; + efi_systab.con_out = (void *)(unsigned long)systab32->con_out; + efi_systab.stderr_handle = systab32->stderr_handle; + efi_systab.stderr = systab32->stderr; + efi_systab.runtime = (void *)(unsigned long)systab32->runtime; + efi_systab.boottime = (void *)(unsigned long)systab32->boottime; + efi_systab.nr_tables = systab32->nr_tables; + efi_systab.tables = systab32->tables; + } + + early_memunmap(p, size); + + if (IS_ENABLED(CONFIG_X86_32) && over4g) { + pr_err("EFI data located above 4GB, disabling EFI.\n"); + return -EINVAL; } efi.systab = &efi_systab; @@ -455,108 +436,23 @@ static int __init efi_systab_init(void *phys) return 0; } -static int __init efi_runtime_init32(void) -{ - efi_runtime_services_32_t *runtime; - - runtime = early_memremap((unsigned long)efi.systab->runtime, - sizeof(efi_runtime_services_32_t)); - if (!runtime) { - pr_err("Could not map the runtime service table!\n"); - return -ENOMEM; - } - - /* - * We will only need *early* access to the SetVirtualAddressMap - * EFI runtime service. All other runtime services will be called - * via the virtual mapping. - */ - efi_phys.set_virtual_address_map = - (efi_set_virtual_address_map_t *) - (unsigned long)runtime->set_virtual_address_map; - early_memunmap(runtime, sizeof(efi_runtime_services_32_t)); - - return 0; -} - -static int __init efi_runtime_init64(void) -{ - efi_runtime_services_64_t *runtime; - - runtime = early_memremap((unsigned long)efi.systab->runtime, - sizeof(efi_runtime_services_64_t)); - if (!runtime) { - pr_err("Could not map the runtime service table!\n"); - return -ENOMEM; - } - - /* - * We will only need *early* access to the SetVirtualAddressMap - * EFI runtime service. All other runtime services will be called - * via the virtual mapping. - */ - efi_phys.set_virtual_address_map = - (efi_set_virtual_address_map_t *) - (unsigned long)runtime->set_virtual_address_map; - early_memunmap(runtime, sizeof(efi_runtime_services_64_t)); - - return 0; -} - -static int __init efi_runtime_init(void) -{ - int rv; - - /* - * Check out the runtime services table. We need to map - * the runtime services table so that we can grab the physical - * address of several of the EFI runtime functions, needed to - * set the firmware into virtual mode. - * - * When EFI_PARAVIRT is in force then we could not map runtime - * service memory region because we do not have direct access to it. - * However, runtime services are available through proxy functions - * (e.g. in case of Xen dom0 EFI implementation they call special - * hypercall which executes relevant EFI functions) and that is why - * they are always enabled. - */ - - if (!efi_enabled(EFI_PARAVIRT)) { - if (efi_enabled(EFI_64BIT)) - rv = efi_runtime_init64(); - else - rv = efi_runtime_init32(); - - if (rv) - return rv; - } - - set_bit(EFI_RUNTIME_SERVICES, &efi.flags); - - return 0; -} - void __init efi_init(void) { efi_char16_t *c16; char vendor[100] = "unknown"; int i = 0; - void *tmp; -#ifdef CONFIG_X86_32 - if (boot_params.efi_info.efi_systab_hi || - boot_params.efi_info.efi_memmap_hi) { + if (IS_ENABLED(CONFIG_X86_32) && + (boot_params.efi_info.efi_systab_hi || + boot_params.efi_info.efi_memmap_hi)) { pr_info("Table located above 4GB, disabling EFI.\n"); return; } - efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; -#else - efi_phys.systab = (efi_system_table_t *) - (boot_params.efi_info.efi_systab | - ((__u64)boot_params.efi_info.efi_systab_hi<<32)); -#endif - if (efi_systab_init(efi_phys.systab)) + efi_systab_phys = boot_params.efi_info.efi_systab | + ((__u64)boot_params.efi_info.efi_systab_hi << 32); + + if (efi_systab_init(efi_systab_phys)) return; efi.config_table = (unsigned long)efi.systab->tables; @@ -566,14 +462,16 @@ void __init efi_init(void) /* * Show what we know for posterity */ - c16 = tmp = early_memremap(efi.systab->fw_vendor, 2); + c16 = early_memremap_ro(efi.systab->fw_vendor, + sizeof(vendor) * sizeof(efi_char16_t)); if (c16) { - for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) - vendor[i] = *c16++; + for (i = 0; i < sizeof(vendor) - 1 && c16[i]; ++i) + vendor[i] = c16[i]; vendor[i] = '\0'; - } else + early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); + } else { pr_err("Could not map the firmware vendor!\n"); - early_memunmap(tmp, 2); + } pr_info("EFI v%u.%.02u by %s\n", efi.systab->hdr.revision >> 16, @@ -592,19 +490,21 @@ void __init efi_init(void) if (!efi_runtime_supported()) pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); - else { - if (efi_runtime_disabled() || efi_runtime_init()) { - efi_memmap_unmap(); - return; - } + + if (!efi_runtime_supported() || efi_runtime_disabled()) { + efi_memmap_unmap(); + return; } + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); efi_clean_memmap(); if (efi_enabled(EFI_DBG)) efi_print_memmap(); } +#if defined(CONFIG_X86_32) || defined(CONFIG_X86_UV) + void __init efi_set_executable(efi_memory_desc_t *md, bool executable) { u64 addr, npages; @@ -669,6 +569,8 @@ void __init old_map_region(efi_memory_desc_t *md) (unsigned long long)md->phys_addr); } +#endif + /* Merge contiguous regions of the same type and attribute */ static void __init efi_merge_regions(void) { @@ -707,7 +609,7 @@ static void __init get_systab_virt_addr(efi_memory_desc_t *md) size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; - systab = (u64)(unsigned long)efi_phys.systab; + systab = efi_systab_phys; if (md->phys_addr <= systab && systab < end) { systab += md->virt_addr - md->phys_addr; efi.systab = (efi_system_table_t *)(unsigned long)systab; @@ -767,7 +669,7 @@ static inline void *efi_map_next_entry_reverse(void *entry) */ static void *efi_map_next_entry(void *entry) { - if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) { + if (!efi_have_uv1_memmap() && efi_enabled(EFI_64BIT)) { /* * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE * config table feature requires us to map all entries @@ -828,7 +730,7 @@ static bool should_map_region(efi_memory_desc_t *md) * Map all of RAM so that we can access arguments in the 1:1 * mapping when making EFI runtime calls. */ - if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_is_native()) { + if (efi_is_mixed()) { if (md->type == EFI_CONVENTIONAL_MEMORY || md->type == EFI_LOADER_DATA || md->type == EFI_LOADER_CODE) @@ -899,11 +801,11 @@ static void __init kexec_enter_virtual_mode(void) /* * We don't do virtual mode, since we don't do runtime services, on - * non-native EFI. With efi=old_map, we don't do runtime services in + * non-native EFI. With the UV1 memmap, we don't do runtime services in * kexec kernel because in the initial boot something else might * have been mapped at these virtual addresses. */ - if (!efi_is_native() || efi_enabled(EFI_OLD_MEMMAP)) { + if (efi_is_mixed() || efi_have_uv1_memmap()) { efi_memmap_unmap(); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return; @@ -958,11 +860,6 @@ static void __init kexec_enter_virtual_mode(void) efi.runtime_version = efi_systab.hdr.revision; efi_native_runtime_setup(); - - efi.set_virtual_address_map = NULL; - - if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) - runtime_code_page_mkexec(); #endif } @@ -974,9 +871,9 @@ static void __init kexec_enter_virtual_mode(void) * * The old method which used to update that memory descriptor with the * virtual address obtained from ioremap() is still supported when the - * kernel is booted with efi=old_map on its command line. Same old - * method enabled the runtime services to be called without having to - * thunk back into physical mode for every invocation. + * kernel is booted on SG1 UV1 hardware. Same old method enabled the + * runtime services to be called without having to thunk back into + * physical mode for every invocation. * * The new method does a pagetable switch in a preemption-safe manner * so that we're in a different address space when calling a runtime @@ -999,16 +896,14 @@ static void __init __efi_enter_virtual_mode(void) if (efi_alloc_page_tables()) { pr_err("Failed to allocate EFI page tables\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } efi_merge_regions(); new_memmap = efi_map_regions(&count, &pg_shift); if (!new_memmap) { pr_err("Error reallocating memory, EFI runtime non-functional!\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } pa = __pa(new_memmap); @@ -1022,8 +917,7 @@ static void __init __efi_enter_virtual_mode(void) if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) { pr_err("Failed to remap late EFI memory map\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } if (efi_enabled(EFI_DBG)) { @@ -1031,34 +925,22 @@ static void __init __efi_enter_virtual_mode(void) efi_print_memmap(); } - BUG_ON(!efi.systab); + if (WARN_ON(!efi.systab)) + goto err; - if (efi_setup_page_tables(pa, 1 << pg_shift)) { - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; - } + if (efi_setup_page_tables(pa, 1 << pg_shift)) + goto err; efi_sync_low_kernel_mappings(); - if (efi_is_native()) { - status = phys_efi_set_virtual_address_map( - efi.memmap.desc_size * count, - efi.memmap.desc_size, - efi.memmap.desc_version, - (efi_memory_desc_t *)pa); - } else { - status = efi_thunk_set_virtual_address_map( - efi_phys.set_virtual_address_map, - efi.memmap.desc_size * count, - efi.memmap.desc_size, - efi.memmap.desc_version, - (efi_memory_desc_t *)pa); - } - + status = efi_set_virtual_address_map(efi.memmap.desc_size * count, + efi.memmap.desc_size, + efi.memmap.desc_version, + (efi_memory_desc_t *)pa); if (status != EFI_SUCCESS) { - pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", - status); - panic("EFI call to SetVirtualAddressMap() failed!"); + pr_err("Unable to switch EFI into virtual mode (status=%lx)!\n", + status); + goto err; } efi_free_boot_services(); @@ -1071,13 +953,11 @@ static void __init __efi_enter_virtual_mode(void) */ efi.runtime_version = efi_systab.hdr.revision; - if (efi_is_native()) + if (!efi_is_mixed()) efi_native_runtime_setup(); else efi_thunk_runtime_setup(); - efi.set_virtual_address_map = NULL; - /* * Apply more restrictive page table mapping attributes now that * SVAM() has been called and the firmware has performed all @@ -1087,6 +967,10 @@ static void __init __efi_enter_virtual_mode(void) /* clean DUMMY object */ efi_delete_dummy_variable(); + return; + +err: + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); } void __init efi_enter_virtual_mode(void) @@ -1102,20 +986,6 @@ void __init efi_enter_virtual_mode(void) efi_dump_pagetable(); } -static int __init arch_parse_efi_cmdline(char *str) -{ - if (!str) { - pr_warn("need at least one option\n"); - return -EINVAL; - } - - if (parse_option_str(str, "old_map")) - set_bit(EFI_OLD_MEMMAP, &efi.flags); - - return 0; -} -early_param("efi", arch_parse_efi_cmdline); - bool efi_is_table_address(unsigned long phys_addr) { unsigned int i; diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 9959657127f4..081d466002c9 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -49,7 +49,7 @@ void efi_sync_low_kernel_mappings(void) {} void __init efi_dump_pagetable(void) { #ifdef CONFIG_EFI_PGT_DUMP - ptdump_walk_pgd_level(NULL, swapper_pg_dir); + ptdump_walk_pgd_level(NULL, &init_mm); #endif } @@ -66,9 +66,17 @@ void __init efi_map_region(efi_memory_desc_t *md) void __init efi_map_region_fixed(efi_memory_desc_t *md) {} void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} -pgd_t * __init efi_call_phys_prolog(void) +efi_status_t efi_call_svam(efi_set_virtual_address_map_t *__efiapi *, + u32, u32, u32, void *); + +efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) { struct desc_ptr gdt_descr; + efi_status_t status; + unsigned long flags; pgd_t *save_pgd; /* Current pgd is swapper_pg_dir, we'll restore it later: */ @@ -80,14 +88,18 @@ pgd_t * __init efi_call_phys_prolog(void) gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); - return save_pgd; -} + /* Disable interrupts around EFI calls: */ + local_irq_save(flags); + status = efi_call_svam(&efi.systab->runtime->set_virtual_address_map, + memory_map_size, descriptor_size, + descriptor_version, virtual_map); + local_irq_restore(flags); -void __init efi_call_phys_epilog(pgd_t *save_pgd) -{ load_fixmap_gdt(0); load_cr3(save_pgd); __flush_tlb_all(); + + return status; } void __init efi_runtime_update_mappings(void) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 08ce8177c3af..fa8506e76bbe 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -57,142 +57,6 @@ static u64 efi_va = EFI_VA_START; struct efi_scratch efi_scratch; -static void __init early_code_mapping_set_exec(int executable) -{ - efi_memory_desc_t *md; - - if (!(__supported_pte_mask & _PAGE_NX)) - return; - - /* Make EFI service code area executable */ - for_each_efi_memory_desc(md) { - if (md->type == EFI_RUNTIME_SERVICES_CODE || - md->type == EFI_BOOT_SERVICES_CODE) - efi_set_executable(md, executable); - } -} - -pgd_t * __init efi_call_phys_prolog(void) -{ - unsigned long vaddr, addr_pgd, addr_p4d, addr_pud; - pgd_t *save_pgd, *pgd_k, *pgd_efi; - p4d_t *p4d, *p4d_k, *p4d_efi; - pud_t *pud; - - int pgd; - int n_pgds, i, j; - - if (!efi_enabled(EFI_OLD_MEMMAP)) { - efi_switch_mm(&efi_mm); - return efi_mm.pgd; - } - - early_code_mapping_set_exec(1); - - n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); - save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL); - if (!save_pgd) - return NULL; - - /* - * Build 1:1 identity mapping for efi=old_map usage. Note that - * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while - * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical - * address X, the pud_index(X) != pud_index(__va(X)), we can only copy - * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping. - * This means here we can only reuse the PMD tables of the direct mapping. - */ - for (pgd = 0; pgd < n_pgds; pgd++) { - addr_pgd = (unsigned long)(pgd * PGDIR_SIZE); - vaddr = (unsigned long)__va(pgd * PGDIR_SIZE); - pgd_efi = pgd_offset_k(addr_pgd); - save_pgd[pgd] = *pgd_efi; - - p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd); - if (!p4d) { - pr_err("Failed to allocate p4d table!\n"); - goto out; - } - - for (i = 0; i < PTRS_PER_P4D; i++) { - addr_p4d = addr_pgd + i * P4D_SIZE; - p4d_efi = p4d + p4d_index(addr_p4d); - - pud = pud_alloc(&init_mm, p4d_efi, addr_p4d); - if (!pud) { - pr_err("Failed to allocate pud table!\n"); - goto out; - } - - for (j = 0; j < PTRS_PER_PUD; j++) { - addr_pud = addr_p4d + j * PUD_SIZE; - - if (addr_pud > (max_pfn << PAGE_SHIFT)) - break; - - vaddr = (unsigned long)__va(addr_pud); - - pgd_k = pgd_offset_k(vaddr); - p4d_k = p4d_offset(pgd_k, vaddr); - pud[j] = *pud_offset(p4d_k, vaddr); - } - } - pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX; - } - - __flush_tlb_all(); - return save_pgd; -out: - efi_call_phys_epilog(save_pgd); - return NULL; -} - -void __init efi_call_phys_epilog(pgd_t *save_pgd) -{ - /* - * After the lock is released, the original page table is restored. - */ - int pgd_idx, i; - int nr_pgds; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - - if (!efi_enabled(EFI_OLD_MEMMAP)) { - efi_switch_mm(efi_scratch.prev_mm); - return; - } - - nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); - - for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) { - pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE); - set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); - - if (!pgd_present(*pgd)) - continue; - - for (i = 0; i < PTRS_PER_P4D; i++) { - p4d = p4d_offset(pgd, - pgd_idx * PGDIR_SIZE + i * P4D_SIZE); - - if (!p4d_present(*p4d)) - continue; - - pud = (pud_t *)p4d_page_vaddr(*p4d); - pud_free(&init_mm, pud); - } - - p4d = (p4d_t *)pgd_page_vaddr(*pgd); - p4d_free(&init_mm, p4d); - } - - kfree(save_pgd); - - __flush_tlb_all(); - early_code_mapping_set_exec(0); -} - EXPORT_SYMBOL_GPL(efi_mm); /* @@ -211,7 +75,7 @@ int __init efi_alloc_page_tables(void) pud_t *pud; gfp_t gfp_mask; - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return 0; gfp_mask = GFP_KERNEL | __GFP_ZERO; @@ -252,7 +116,7 @@ void efi_sync_low_kernel_mappings(void) pud_t *pud_k, *pud_efi; pgd_t *efi_pgd = efi_mm.pgd; - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return; /* @@ -346,7 +210,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) unsigned npages; pgd_t *pgd = efi_mm.pgd; - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return 0; /* @@ -373,10 +237,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * as trim_bios_range() will reserve the first page and isolate it away * from memory allocators anyway. */ - pf = _PAGE_RW; - if (sev_active()) - pf |= _PAGE_ENC; - if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, pf)) { pr_err("Failed to create 1:1 mapping for the first page!\n"); return 1; @@ -388,21 +248,22 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * text and allocate a new stack because we can't rely on the * stack pointer being < 4GB. */ - if (!IS_ENABLED(CONFIG_EFI_MIXED) || efi_is_native()) + if (!efi_is_mixed()) return 0; page = alloc_page(GFP_KERNEL|__GFP_DMA32); - if (!page) - panic("Unable to allocate EFI runtime stack < 4GB\n"); + if (!page) { + pr_err("Unable to allocate EFI runtime stack < 4GB\n"); + return 1; + } - efi_scratch.phys_stack = virt_to_phys(page_address(page)); - efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */ + efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */ - npages = (_etext - _text) >> PAGE_SHIFT; + npages = (__end_rodata_aligned - _text) >> PAGE_SHIFT; text = __pa(_text); pfn = text >> PAGE_SHIFT; - pf = _PAGE_RW | _PAGE_ENC; + pf = _PAGE_ENC; if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) { pr_err("Failed to map kernel text 1:1\n"); return 1; @@ -417,6 +278,22 @@ static void __init __map_region(efi_memory_desc_t *md, u64 va) unsigned long pfn; pgd_t *pgd = efi_mm.pgd; + /* + * EFI_RUNTIME_SERVICES_CODE regions typically cover PE/COFF + * executable images in memory that consist of both R-X and + * RW- sections, so we cannot apply read-only or non-exec + * permissions just yet. However, modern EFI systems provide + * a memory attributes table that describes those sections + * with the appropriate restricted permissions, which are + * applied in efi_runtime_update_mappings() below. All other + * regions can be mapped non-executable at this point, with + * the exception of boot services code regions, but those will + * be unmapped again entirely in efi_free_boot_services(). + */ + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_RUNTIME_SERVICES_CODE) + flags |= _PAGE_NX; + if (!(md->attribute & EFI_MEMORY_WB)) flags |= _PAGE_PCD; @@ -434,7 +311,7 @@ void __init efi_map_region(efi_memory_desc_t *md) unsigned long size = md->num_pages << PAGE_SHIFT; u64 pa = md->phys_addr; - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return old_map_region(md); /* @@ -449,7 +326,7 @@ void __init efi_map_region(efi_memory_desc_t *md) * booting in EFI mixed mode, because even though we may be * running a 64-bit kernel, the firmware may only be 32-bit. */ - if (!efi_is_native () && IS_ENABLED(CONFIG_EFI_MIXED)) { + if (efi_is_mixed()) { md->virt_addr = md->phys_addr; return; } @@ -491,26 +368,6 @@ void __init efi_map_region_fixed(efi_memory_desc_t *md) __map_region(md, md->virt_addr); } -void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, - u32 type, u64 attribute) -{ - unsigned long last_map_pfn; - - if (type == EFI_MEMORY_MAPPED_IO) - return ioremap(phys_addr, size); - - last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); - if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { - unsigned long top = last_map_pfn << PAGE_SHIFT; - efi_ioremap(top, size - (top - phys_addr), type, attribute); - } - - if (!(attribute & EFI_MEMORY_WB)) - efi_memory_uc((u64)(unsigned long)__va(phys_addr), size); - - return (void __iomem *)__va(phys_addr); -} - void __init parse_efi_setup(u64 phys_addr, u32 data_len) { efi_setup = phys_addr + sizeof(struct setup_data); @@ -559,7 +416,7 @@ void __init efi_runtime_update_mappings(void) { efi_memory_desc_t *md; - if (efi_enabled(EFI_OLD_MEMMAP)) { + if (efi_have_uv1_memmap()) { if (__supported_pte_mask & _PAGE_NX) runtime_code_page_mkexec(); return; @@ -613,10 +470,10 @@ void __init efi_runtime_update_mappings(void) void __init efi_dump_pagetable(void) { #ifdef CONFIG_EFI_PGT_DUMP - if (efi_enabled(EFI_OLD_MEMMAP)) - ptdump_walk_pgd_level(NULL, swapper_pg_dir); + if (efi_have_uv1_memmap()) + ptdump_walk_pgd_level(NULL, &init_mm); else - ptdump_walk_pgd_level(NULL, efi_mm.pgd); + ptdump_walk_pgd_level(NULL, &efi_mm); #endif } @@ -634,63 +491,74 @@ void efi_switch_mm(struct mm_struct *mm) switch_mm(efi_scratch.prev_mm, mm, NULL); } -#ifdef CONFIG_EFI_MIXED -extern efi_status_t efi64_thunk(u32, ...); - static DEFINE_SPINLOCK(efi_runtime_lock); -#define runtime_service32(func) \ -({ \ - u32 table = (u32)(unsigned long)efi.systab; \ - u32 *rt, *___f; \ - \ - rt = (u32 *)(table + offsetof(efi_system_table_32_t, runtime)); \ - ___f = (u32 *)(*rt + offsetof(efi_runtime_services_32_t, func)); \ - *___f; \ +/* + * DS and ES contain user values. We need to save them. + * The 32-bit EFI code needs a valid DS, ES, and SS. There's no + * need to save the old SS: __KERNEL_DS is always acceptable. + */ +#define __efi_thunk(func, ...) \ +({ \ + efi_runtime_services_32_t *__rt; \ + unsigned short __ds, __es; \ + efi_status_t ____s; \ + \ + __rt = (void *)(unsigned long)efi.systab->mixed_mode.runtime; \ + \ + savesegment(ds, __ds); \ + savesegment(es, __es); \ + \ + loadsegment(ss, __KERNEL_DS); \ + loadsegment(ds, __KERNEL_DS); \ + loadsegment(es, __KERNEL_DS); \ + \ + ____s = efi64_thunk(__rt->func, __VA_ARGS__); \ + \ + loadsegment(ds, __ds); \ + loadsegment(es, __es); \ + \ + ____s ^= (____s & BIT(31)) | (____s & BIT_ULL(31)) << 32; \ + ____s; \ }) /* * Switch to the EFI page tables early so that we can access the 1:1 * runtime services mappings which are not mapped in any other page - * tables. This function must be called before runtime_service32(). + * tables. * * Also, disable interrupts because the IDT points to 64-bit handlers, * which aren't going to function correctly when we switch to 32-bit. */ -#define efi_thunk(f, ...) \ +#define efi_thunk(func...) \ ({ \ efi_status_t __s; \ - u32 __func; \ \ arch_efi_call_virt_setup(); \ \ - __func = runtime_service32(f); \ - __s = efi64_thunk(__func, __VA_ARGS__); \ + __s = __efi_thunk(func); \ \ arch_efi_call_virt_teardown(); \ \ __s; \ }) -efi_status_t efi_thunk_set_virtual_address_map( - void *phys_set_virtual_address_map, - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) +static efi_status_t __init __no_sanitize_address +efi_thunk_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) { efi_status_t status; unsigned long flags; - u32 func; efi_sync_low_kernel_mappings(); local_irq_save(flags); efi_switch_mm(&efi_mm); - func = (u32)(unsigned long)phys_set_virtual_address_map; - status = efi64_thunk(func, memory_map_size, descriptor_size, - descriptor_version, virtual_map); + status = __efi_thunk(set_virtual_address_map, memory_map_size, + descriptor_size, descriptor_version, virtual_map); efi_switch_mm(efi_scratch.prev_mm); local_irq_restore(flags); @@ -993,8 +861,11 @@ efi_thunk_query_capsule_caps(efi_capsule_header_t **capsules, return EFI_UNSUPPORTED; } -void efi_thunk_runtime_setup(void) +void __init efi_thunk_runtime_setup(void) { + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return; + efi.get_time = efi_thunk_get_time; efi.set_time = efi_thunk_set_time; efi.get_wakeup_time = efi_thunk_get_wakeup_time; @@ -1010,4 +881,46 @@ void efi_thunk_runtime_setup(void) efi.update_capsule = efi_thunk_update_capsule; efi.query_capsule_caps = efi_thunk_query_capsule_caps; } -#endif /* CONFIG_EFI_MIXED */ + +efi_status_t __init __no_sanitize_address +efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) +{ + efi_status_t status; + unsigned long flags; + pgd_t *save_pgd = NULL; + + if (efi_is_mixed()) + return efi_thunk_set_virtual_address_map(memory_map_size, + descriptor_size, + descriptor_version, + virtual_map); + + if (efi_have_uv1_memmap()) { + save_pgd = efi_uv1_memmap_phys_prolog(); + if (!save_pgd) + return EFI_ABORTED; + } else { + efi_switch_mm(&efi_mm); + } + + kernel_fpu_begin(); + + /* Disable interrupts around EFI calls: */ + local_irq_save(flags); + status = efi_call(efi.systab->runtime->set_virtual_address_map, + memory_map_size, descriptor_size, + descriptor_version, virtual_map); + local_irq_restore(flags); + + kernel_fpu_end(); + + if (save_pgd) + efi_uv1_memmap_phys_epilog(save_pgd); + else + efi_switch_mm(efi_scratch.prev_mm); + + return status; +} diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S index eed8b5b441f8..75c46e7a809f 100644 --- a/arch/x86/platform/efi/efi_stub_32.S +++ b/arch/x86/platform/efi/efi_stub_32.S @@ -7,118 +7,43 @@ */ #include <linux/linkage.h> +#include <linux/init.h> #include <asm/page_types.h> -/* - * efi_call_phys(void *, ...) is a function with variable parameters. - * All the callers of this function assure that all the parameters are 4-bytes. - */ - -/* - * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. - * So we'd better save all of them at the beginning of this function and restore - * at the end no matter how many we use, because we can not assure EFI runtime - * service functions will comply with gcc calling convention, too. - */ + __INIT +SYM_FUNC_START(efi_call_svam) + push 8(%esp) + push 8(%esp) + push %ecx + push %edx -.text -SYM_FUNC_START(efi_call_phys) /* - * 0. The function can only be called in Linux kernel. So CS has been - * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found - * the values of these registers are the same. And, the corresponding - * GDT entries are identical. So I will do nothing about segment reg - * and GDT, but change GDT base register in prolog and epilog. - */ - - /* - * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET. - * But to make it smoothly switch from virtual mode to flat mode. - * The mapping of lower virtual memory has been created in prolog and - * epilog. + * Switch to the flat mapped alias of this routine, by jumping to the + * address of label '1' after subtracting PAGE_OFFSET from it. */ movl $1f, %edx subl $__PAGE_OFFSET, %edx jmp *%edx 1: - /* - * 2. Now on the top of stack is the return - * address in the caller of efi_call_phys(), then parameter 1, - * parameter 2, ..., param n. To make things easy, we save the return - * address of efi_call_phys in a global variable. - */ - popl %edx - movl %edx, saved_return_addr - /* get the function pointer into ECX*/ - popl %ecx - movl %ecx, efi_rt_function_ptr - movl $2f, %edx - subl $__PAGE_OFFSET, %edx - pushl %edx - - /* - * 3. Clear PG bit in %CR0. - */ + /* disable paging */ movl %cr0, %edx andl $0x7fffffff, %edx movl %edx, %cr0 - jmp 1f -1: - /* - * 4. Adjust stack pointer. - */ + /* convert the stack pointer to a flat mapped address */ subl $__PAGE_OFFSET, %esp - /* - * 5. Call the physical function. - */ - jmp *%ecx + /* call the EFI routine */ + call *(%eax) -2: - /* - * 6. After EFI runtime service returns, control will return to - * following instruction. We'd better readjust stack pointer first. - */ - addl $__PAGE_OFFSET, %esp + /* convert ESP back to a kernel VA, and pop the outgoing args */ + addl $__PAGE_OFFSET + 16, %esp - /* - * 7. Restore PG bit - */ + /* re-enable paging */ movl %cr0, %edx orl $0x80000000, %edx movl %edx, %cr0 - jmp 1f -1: - /* - * 8. Now restore the virtual mode from flat mode by - * adding EIP with PAGE_OFFSET. - */ - movl $1f, %edx - jmp *%edx -1: - - /* - * 9. Balance the stack. And because EAX contain the return value, - * we'd better not clobber it. - */ - leal efi_rt_function_ptr, %edx - movl (%edx), %ecx - pushl %ecx - /* - * 10. Push the saved return address onto the stack and return. - */ - leal saved_return_addr, %edx - movl (%edx), %ecx - pushl %ecx ret -SYM_FUNC_END(efi_call_phys) -.previous - -.data -saved_return_addr: - .long 0 -efi_rt_function_ptr: - .long 0 +SYM_FUNC_END(efi_call_svam) diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index b1d2313fe3bf..15da118f04f0 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -8,41 +8,12 @@ */ #include <linux/linkage.h> -#include <asm/segment.h> -#include <asm/msr.h> -#include <asm/processor-flags.h> -#include <asm/page_types.h> +#include <asm/nospec-branch.h> -#define SAVE_XMM \ - mov %rsp, %rax; \ - subq $0x70, %rsp; \ - and $~0xf, %rsp; \ - mov %rax, (%rsp); \ - mov %cr0, %rax; \ - clts; \ - mov %rax, 0x8(%rsp); \ - movaps %xmm0, 0x60(%rsp); \ - movaps %xmm1, 0x50(%rsp); \ - movaps %xmm2, 0x40(%rsp); \ - movaps %xmm3, 0x30(%rsp); \ - movaps %xmm4, 0x20(%rsp); \ - movaps %xmm5, 0x10(%rsp) - -#define RESTORE_XMM \ - movaps 0x60(%rsp), %xmm0; \ - movaps 0x50(%rsp), %xmm1; \ - movaps 0x40(%rsp), %xmm2; \ - movaps 0x30(%rsp), %xmm3; \ - movaps 0x20(%rsp), %xmm4; \ - movaps 0x10(%rsp), %xmm5; \ - mov 0x8(%rsp), %rsi; \ - mov %rsi, %cr0; \ - mov (%rsp), %rsp - -SYM_FUNC_START(efi_call) +SYM_FUNC_START(__efi_call) pushq %rbp movq %rsp, %rbp - SAVE_XMM + and $~0xf, %rsp mov 16(%rbp), %rax subq $48, %rsp mov %r9, 32(%rsp) @@ -50,9 +21,7 @@ SYM_FUNC_START(efi_call) mov %r8, %r9 mov %rcx, %r8 mov %rsi, %rcx - call *%rdi - addq $48, %rsp - RESTORE_XMM - popq %rbp + CALL_NOSPEC %rdi + leave ret -SYM_FUNC_END(efi_call) +SYM_FUNC_END(__efi_call) diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 3189f1394701..26f0da238c1c 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -25,15 +25,16 @@ .text .code64 -SYM_FUNC_START(efi64_thunk) +SYM_CODE_START(__efi64_thunk) push %rbp push %rbx /* * Switch to 1:1 mapped 32-bit stack pointer. */ - movq %rsp, efi_saved_sp(%rip) + movq %rsp, %rax movq efi_scratch(%rip), %rsp + push %rax /* * Calculate the physical address of the kernel text. @@ -41,113 +42,31 @@ SYM_FUNC_START(efi64_thunk) movq $__START_KERNEL_map, %rax subq phys_base(%rip), %rax - /* - * Push some physical addresses onto the stack. This is easier - * to do now in a code64 section while the assembler can address - * 64-bit values. Note that all the addresses on the stack are - * 32-bit. - */ - subq $16, %rsp - leaq efi_exit32(%rip), %rbx - subq %rax, %rbx - movl %ebx, 8(%rsp) - - leaq __efi64_thunk(%rip), %rbx + leaq 1f(%rip), %rbp + leaq 2f(%rip), %rbx + subq %rax, %rbp subq %rax, %rbx - call *%rbx - - movq efi_saved_sp(%rip), %rsp - pop %rbx - pop %rbp - retq -SYM_FUNC_END(efi64_thunk) -/* - * We run this function from the 1:1 mapping. - * - * This function must be invoked with a 1:1 mapped stack. - */ -SYM_FUNC_START_LOCAL(__efi64_thunk) - movl %ds, %eax - push %rax - movl %es, %eax - push %rax - movl %ss, %eax - push %rax - - subq $32, %rsp - movl %esi, 0x0(%rsp) - movl %edx, 0x4(%rsp) - movl %ecx, 0x8(%rsp) - movq %r8, %rsi - movl %esi, 0xc(%rsp) - movq %r9, %rsi - movl %esi, 0x10(%rsp) - - leaq 1f(%rip), %rbx - movq %rbx, func_rt_ptr(%rip) + subq $28, %rsp + movl %ebx, 0x0(%rsp) /* return address */ + movl %esi, 0x4(%rsp) + movl %edx, 0x8(%rsp) + movl %ecx, 0xc(%rsp) + movl %r8d, 0x10(%rsp) + movl %r9d, 0x14(%rsp) /* Switch to 32-bit descriptor */ pushq $__KERNEL32_CS - leaq efi_enter32(%rip), %rax - pushq %rax + pushq %rdi /* EFI runtime service address */ lretq -1: addq $32, %rsp - +1: movq 24(%rsp), %rsp pop %rbx - movl %ebx, %ss - pop %rbx - movl %ebx, %es - pop %rbx - movl %ebx, %ds - - /* - * Convert 32-bit status code into 64-bit. - */ - test %rax, %rax - jz 1f - movl %eax, %ecx - andl $0x0fffffff, %ecx - andl $0xf0000000, %eax - shl $32, %rax - or %rcx, %rax -1: - ret -SYM_FUNC_END(__efi64_thunk) - -SYM_FUNC_START_LOCAL(efi_exit32) - movq func_rt_ptr(%rip), %rax - push %rax - mov %rdi, %rax - ret -SYM_FUNC_END(efi_exit32) + pop %rbp + retq .code32 -/* - * EFI service pointer must be in %edi. - * - * The stack should represent the 32-bit calling convention. - */ -SYM_FUNC_START_LOCAL(efi_enter32) - movl $__KERNEL_DS, %eax - movl %eax, %ds - movl %eax, %es - movl %eax, %ss - - call *%edi - - /* We must preserve return value */ - movl %eax, %edi - - movl 72(%esp), %eax - pushl $__KERNEL_CS - pushl %eax - +2: pushl $__KERNEL_CS + pushl %ebp lret -SYM_FUNC_END(efi_enter32) - - .data - .balign 8 -func_rt_ptr: .quad 0 -efi_saved_sp: .quad 0 +SYM_CODE_END(__efi64_thunk) diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 7675cf754d90..88d32c06cffa 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -16,6 +16,7 @@ #include <asm/efi.h> #include <asm/uv/uv.h> #include <asm/cpu_device_id.h> +#include <asm/realmode.h> #include <asm/reboot.h> #define EFI_MIN_RESERVE 5120 @@ -243,7 +244,7 @@ EXPORT_SYMBOL_GPL(efi_query_variable_store); */ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) { - phys_addr_t new_phys, new_size; + struct efi_memory_map_data data = { 0 }; struct efi_mem_range mr; efi_memory_desc_t md; int num_entries; @@ -260,10 +261,6 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) return; } - /* No need to reserve regions that will never be freed. */ - if (md.attribute & EFI_MEMORY_RUNTIME) - return; - size += addr % EFI_PAGE_SIZE; size = round_up(size, EFI_PAGE_SIZE); addr = round_down(addr, EFI_PAGE_SIZE); @@ -275,24 +272,23 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) num_entries = efi_memmap_split_count(&md, &mr.range); num_entries += efi.memmap.nr_map; - new_size = efi.memmap.desc_size * num_entries; - - new_phys = efi_memmap_alloc(num_entries); - if (!new_phys) { + if (efi_memmap_alloc(num_entries, &data) != 0) { pr_err("Could not allocate boot services memmap\n"); return; } - new = early_memremap(new_phys, new_size); + new = early_memremap(data.phys_map, data.size); if (!new) { pr_err("Failed to map new boot services memmap\n"); return; } efi_memmap_insert(&efi.memmap, new, &mr); - early_memunmap(new, new_size); + early_memunmap(new, data.size); - efi_memmap_install(new_phys, num_entries); + efi_memmap_install(&data); + e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); + e820__update_table(e820_table); } /* @@ -386,10 +382,10 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md) /* * To Do: Remove this check after adding functionality to unmap EFI boot - * services code/data regions from direct mapping area because - * "efi=old_map" maps EFI regions in swapper_pg_dir. + * services code/data regions from direct mapping area because the UV1 + * memory map maps EFI regions in swapper_pg_dir. */ - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return; /* @@ -397,7 +393,7 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md) * EFI runtime calls, hence don't unmap EFI boot services code/data * regions. */ - if (!efi_is_native()) + if (efi_is_mixed()) return; if (kernel_unmap_pages_in_pgd(pgd, pa, md->num_pages)) @@ -409,7 +405,7 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md) void __init efi_free_boot_services(void) { - phys_addr_t new_phys, new_size; + struct efi_memory_map_data data = { 0 }; efi_memory_desc_t *md; int num_entries = 0; void *new, *new_md; @@ -464,14 +460,12 @@ void __init efi_free_boot_services(void) if (!num_entries) return; - new_size = efi.memmap.desc_size * num_entries; - new_phys = efi_memmap_alloc(num_entries); - if (!new_phys) { + if (efi_memmap_alloc(num_entries, &data) != 0) { pr_err("Failed to allocate new EFI memmap\n"); return; } - new = memremap(new_phys, new_size, MEMREMAP_WB); + new = memremap(data.phys_map, data.size, MEMREMAP_WB); if (!new) { pr_err("Failed to map new EFI memmap\n"); return; @@ -495,7 +489,7 @@ void __init efi_free_boot_services(void) memunmap(new); - if (efi_memmap_install(new_phys, num_entries)) { + if (efi_memmap_install(&data) != 0) { pr_err("Could not install new EFI memmap\n"); return; } @@ -560,7 +554,7 @@ out: return ret; } -static const struct dmi_system_id sgi_uv1_dmi[] = { +static const struct dmi_system_id sgi_uv1_dmi[] __initconst = { { NULL, "SGI UV1", { DMI_MATCH(DMI_PRODUCT_NAME, "Stoutland Platform"), DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), @@ -583,8 +577,15 @@ void __init efi_apply_memmap_quirks(void) } /* UV2+ BIOS has a fix for this issue. UV1 still needs the quirk. */ - if (dmi_check_system(sgi_uv1_dmi)) - set_bit(EFI_OLD_MEMMAP, &efi.flags); + if (dmi_check_system(sgi_uv1_dmi)) { + if (IS_ENABLED(CONFIG_X86_UV)) { + set_bit(EFI_UV1_MEMMAP, &efi.flags); + } else { + pr_warn("EFI runtime disabled, needs CONFIG_X86_UV=y on UV1\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + efi_memmap_unmap(); + } + } } /* @@ -722,7 +723,7 @@ void efi_recover_from_page_fault(unsigned long phys_addr) /* * Make sure that an efi runtime service caused the page fault. * "efi_mm" cannot be used to check if the page fault had occurred - * in the firmware context because efi=old_map doesn't use efi_pgd. + * in the firmware context because the UV1 memmap doesn't use efi_pgd. */ if (efi_rts_work.efi_rts_id == EFI_NONE) return; diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c b/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c index 44d1f884c3d3..139738bbdd36 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c @@ -6,21 +6,31 @@ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com> */ -#include <linux/gpio.h> -#include <linux/platform_data/tc35876x.h> +#include <linux/gpio/machine.h> #include <asm/intel-mid.h> +static struct gpiod_lookup_table tc35876x_gpio_table = { + .dev_id = "i2c_disp_brig", + .table = { + GPIO_LOOKUP("0000:00:0c.0", -1, "bridge-reset", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("0000:00:0c.0", -1, "bl-en", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("0000:00:0c.0", -1, "vadd", GPIO_ACTIVE_HIGH), + { }, + }, +}; + /*tc35876x DSI_LVDS bridge chip and panel platform data*/ static void *tc35876x_platform_data(void *data) { - static struct tc35876x_platform_data pdata; + struct gpiod_lookup_table *table = &tc35876x_gpio_table; + struct gpiod_lookup *lookup = table->table; - /* gpio pins set to -1 will not be used by the driver */ - pdata.gpio_bridge_reset = get_gpio_by_name("LCMB_RXEN"); - pdata.gpio_panel_bl_en = get_gpio_by_name("6S6P_BL_EN"); - pdata.gpio_panel_vadd = get_gpio_by_name("EN_VREG_LCD_V3P3"); + lookup[0].chip_hwnum = get_gpio_by_name("LCMB_RXEN"); + lookup[1].chip_hwnum = get_gpio_by_name("6S6P_BL_EN"); + lookup[2].chip_hwnum = get_gpio_by_name("EN_VREG_LCD_V3P3"); + gpiod_add_lookup_table(table); - return &pdata; + return NULL; } static const struct devs_id tc35876x_dev_id __initconst = { diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c index 6dd25dc5f027..e9d97d52475e 100644 --- a/arch/x86/platform/intel-quark/imr.c +++ b/arch/x86/platform/intel-quark/imr.c @@ -29,6 +29,8 @@ #include <asm/cpu_device_id.h> #include <asm/imr.h> #include <asm/iosf_mbi.h> +#include <asm/io.h> + #include <linux/debugfs.h> #include <linux/init.h> #include <linux/mm.h> diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c index 42f879b75f9b..4307830e1b6f 100644 --- a/arch/x86/platform/intel-quark/imr_selftest.c +++ b/arch/x86/platform/intel-quark/imr_selftest.c @@ -14,6 +14,8 @@ #include <asm-generic/sections.h> #include <asm/cpu_device_id.h> #include <asm/imr.h> +#include <asm/io.h> + #include <linux/init.h> #include <linux/mm.h> #include <linux/types.h> diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index ece9cb9c1189..607f58147311 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -31,13 +31,16 @@ static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, return BIOS_STATUS_UNIMPLEMENTED; /* - * If EFI_OLD_MEMMAP is set, we need to fall back to using our old EFI + * If EFI_UV1_MEMMAP is set, we need to fall back to using our old EFI * callback method, which uses efi_call() directly, with the kernel page tables: */ - if (unlikely(efi_enabled(EFI_OLD_MEMMAP))) + if (unlikely(efi_enabled(EFI_UV1_MEMMAP))) { + kernel_fpu_begin(); ret = efi_call((void *)__va(tab->function), (u64)which, a1, a2, a3, a4, a5); - else + kernel_fpu_end(); + } else { ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5); + } return ret; } @@ -214,3 +217,163 @@ int uv_bios_init(void) pr_info("UV: UVsystab: Revision:%x\n", uv_systab->revision); return 0; } + +static void __init early_code_mapping_set_exec(int executable) +{ + efi_memory_desc_t *md; + + if (!(__supported_pte_mask & _PAGE_NX)) + return; + + /* Make EFI service code area executable */ + for_each_efi_memory_desc(md) { + if (md->type == EFI_RUNTIME_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_CODE) + efi_set_executable(md, executable); + } +} + +void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd) +{ + /* + * After the lock is released, the original page table is restored. + */ + int pgd_idx, i; + int nr_pgds; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + + nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); + + for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) { + pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE); + set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); + + if (!pgd_present(*pgd)) + continue; + + for (i = 0; i < PTRS_PER_P4D; i++) { + p4d = p4d_offset(pgd, + pgd_idx * PGDIR_SIZE + i * P4D_SIZE); + + if (!p4d_present(*p4d)) + continue; + + pud = (pud_t *)p4d_page_vaddr(*p4d); + pud_free(&init_mm, pud); + } + + p4d = (p4d_t *)pgd_page_vaddr(*pgd); + p4d_free(&init_mm, p4d); + } + + kfree(save_pgd); + + __flush_tlb_all(); + early_code_mapping_set_exec(0); +} + +pgd_t * __init efi_uv1_memmap_phys_prolog(void) +{ + unsigned long vaddr, addr_pgd, addr_p4d, addr_pud; + pgd_t *save_pgd, *pgd_k, *pgd_efi; + p4d_t *p4d, *p4d_k, *p4d_efi; + pud_t *pud; + + int pgd; + int n_pgds, i, j; + + early_code_mapping_set_exec(1); + + n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); + save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL); + if (!save_pgd) + return NULL; + + /* + * Build 1:1 identity mapping for UV1 memmap usage. Note that + * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while + * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical + * address X, the pud_index(X) != pud_index(__va(X)), we can only copy + * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping. + * This means here we can only reuse the PMD tables of the direct mapping. + */ + for (pgd = 0; pgd < n_pgds; pgd++) { + addr_pgd = (unsigned long)(pgd * PGDIR_SIZE); + vaddr = (unsigned long)__va(pgd * PGDIR_SIZE); + pgd_efi = pgd_offset_k(addr_pgd); + save_pgd[pgd] = *pgd_efi; + + p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd); + if (!p4d) { + pr_err("Failed to allocate p4d table!\n"); + goto out; + } + + for (i = 0; i < PTRS_PER_P4D; i++) { + addr_p4d = addr_pgd + i * P4D_SIZE; + p4d_efi = p4d + p4d_index(addr_p4d); + + pud = pud_alloc(&init_mm, p4d_efi, addr_p4d); + if (!pud) { + pr_err("Failed to allocate pud table!\n"); + goto out; + } + + for (j = 0; j < PTRS_PER_PUD; j++) { + addr_pud = addr_p4d + j * PUD_SIZE; + + if (addr_pud > (max_pfn << PAGE_SHIFT)) + break; + + vaddr = (unsigned long)__va(addr_pud); + + pgd_k = pgd_offset_k(vaddr); + p4d_k = p4d_offset(pgd_k, vaddr); + pud[j] = *pud_offset(p4d_k, vaddr); + } + } + pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX; + } + + __flush_tlb_all(); + return save_pgd; +out: + efi_uv1_memmap_phys_epilog(save_pgd); + return NULL; +} + +void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, + u32 type, u64 attribute) +{ + unsigned long last_map_pfn; + + if (type == EFI_MEMORY_MAPPED_IO) + return ioremap(phys_addr, size); + + last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); + if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { + unsigned long top = last_map_pfn << PAGE_SHIFT; + efi_ioremap(top, size - (top - phys_addr), type, attribute); + } + + if (!(attribute & EFI_MEMORY_WB)) + efi_memory_uc((u64)(unsigned long)__va(phys_addr), size); + + return (void __iomem *)__va(phys_addr); +} + +static int __init arch_parse_efi_cmdline(char *str) +{ + if (!str) { + pr_warn("need at least one option\n"); + return -EINVAL; + } + + if (!efi_is_mixed() && parse_option_str(str, "old_map")) + set_bit(EFI_UV1_MEMMAP, &efi.flags); + + return 0; +} +early_param("efi", arch_parse_efi_cmdline); diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 5f0a96bf27a1..1fd321f37f1b 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1668,12 +1668,12 @@ static int tunables_open(struct inode *inode, struct file *file) return 0; } -static const struct file_operations proc_uv_ptc_operations = { - .open = ptc_proc_open, - .read = seq_read, - .write = ptc_proc_write, - .llseek = seq_lseek, - .release = seq_release, +static const struct proc_ops uv_ptc_proc_ops = { + .proc_open = ptc_proc_open, + .proc_read = seq_read, + .proc_write = ptc_proc_write, + .proc_lseek = seq_lseek, + .proc_release = seq_release, }; static const struct file_operations tunables_fops = { @@ -1691,7 +1691,7 @@ static int __init uv_ptc_init(void) return 0; proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL, - &proc_uv_ptc_operations); + &uv_ptc_proc_ops); if (!proc_uv_ptc) { pr_err("unable to create %s proc entry\n", UV_PTC_BASENAME); diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c index 5bd949da7a4a..ac8eee093f9c 100644 --- a/arch/x86/um/tls_32.c +++ b/arch/x86/um/tls_32.c @@ -215,14 +215,12 @@ static int set_tls_entry(struct task_struct* task, struct user_desc *info, return 0; } -int arch_copy_tls(struct task_struct *new) +int arch_set_tls(struct task_struct *new, unsigned long tls) { struct user_desc info; int idx, ret = -EFAULT; - if (copy_from_user(&info, - (void __user *) UPT_SI(&new->thread.regs.regs), - sizeof(info))) + if (copy_from_user(&info, (void __user *) tls, sizeof(info))) goto out; ret = -EINVAL; diff --git a/arch/x86/um/tls_64.c b/arch/x86/um/tls_64.c index 3a621e0d3925..ebd3855d9b13 100644 --- a/arch/x86/um/tls_64.c +++ b/arch/x86/um/tls_64.c @@ -6,14 +6,13 @@ void clear_flushed_tls(struct task_struct *task) { } -int arch_copy_tls(struct task_struct *t) +int arch_set_tls(struct task_struct *t, unsigned long tls) { /* * If CLONE_SETTLS is set, we need to save the thread id - * (which is argument 5, child_tid, of clone) so it can be set - * during context switches. + * so it can be set during context switches. */ - t->thread.arch.fs = t->thread.regs.regs.gp[R8 / sizeof(long)]; + t->thread.arch.fs = tls; return 0; } diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index ba5a41828e9d..1aded63a95cb 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -62,10 +62,10 @@ config XEN_512GB boot parameter "xen_512gb_limit". config XEN_SAVE_RESTORE - bool - depends on XEN - select HIBERNATE_CALLBACKS - default y + bool + depends on XEN + select HIBERNATE_CALLBACKS + default y config XEN_DEBUG_FS bool "Enable Xen debug and tuning parameters in debugfs" diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index a04551ee5568..1abe455d926a 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -31,7 +31,7 @@ static efi_system_table_t efi_systab_xen __initdata = { .con_in_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ .con_in = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ .con_out_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ - .con_out = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .con_out = NULL, /* Not used under Xen. */ .stderr_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ .stderr = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ .runtime = (efi_runtime_services_t *)EFI_INVALID_TABLE_ADDR, diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index c8dbee62ec2a..bbba8b17829a 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -67,7 +67,7 @@ #include <asm/linkage.h> #include <asm/page.h> #include <asm/init.h> -#include <asm/pat.h> +#include <asm/memtype.h> #include <asm/smp.h> #include <asm/tlb.h> diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 4a3fa295d8fe..1c645172b4b5 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -11,7 +11,7 @@ config XTENSA select ARCH_USE_QUEUED_SPINLOCKS select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_IPC_PARSE_VERSION - select BUILDTIME_EXTABLE_SORT + select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS select COMMON_CLK select DMA_REMAP if MMU @@ -24,6 +24,7 @@ config XTENSA select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL select HAVE_ARCH_TRACEHOOK + select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_EXIT_THREAD diff --git a/arch/xtensa/include/asm/vmalloc.h b/arch/xtensa/include/asm/vmalloc.h new file mode 100644 index 000000000000..0eb94b70be55 --- /dev/null +++ b/arch/xtensa/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_XTENSA_VMALLOC_H +#define _ASM_XTENSA_VMALLOC_H + +#endif /* _ASM_XTENSA_VMALLOC_H */ diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index be897803834a..2c9e48566e48 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -520,7 +520,7 @@ common_exception_return: call4 schedule # void schedule (void) j 1b -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION 6: _bbci.l a4, TIF_NEED_RESCHED, 4f diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 9e1c49134c07..3edecc41ef8c 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -202,8 +202,9 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) * involved. Much simpler to just not copy those live frames across. */ -int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn, - unsigned long thread_fn_arg, struct task_struct *p) +int copy_thread_tls(unsigned long clone_flags, unsigned long usp_thread_fn, + unsigned long thread_fn_arg, struct task_struct *p, + unsigned long tls) { struct pt_regs *childregs = task_pt_regs(p); @@ -266,9 +267,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn, childregs->syscall = regs->syscall; - /* The thread pointer is passed in the '4th argument' (= a5) */ if (clone_flags & CLONE_SETTLS) - childregs->threadptr = childregs->areg[5]; + childregs->threadptr = tls; } else { p->thread.ra = MAKE_RA_FOR_CALL( (unsigned long)ret_from_kernel_thread, 1); diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 0f93b67c7a5a..adead45debe8 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -405,8 +405,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_VT # if defined(CONFIG_VGA_CONSOLE) conswitchp = &vga_con; -# elif defined(CONFIG_DUMMY_CONSOLE) - conswitchp = &dummy_con; # endif #endif } diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 25f4de729a6d..85a9ab1bc04d 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -406,3 +406,5 @@ 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open 435 common clone3 sys_clone3 +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 87bd68dd7687..0976e27b8d5d 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -519,12 +519,15 @@ DEFINE_SPINLOCK(die_lock); void die(const char * str, struct pt_regs * regs, long err) { static int die_counter; + const char *pr = ""; + + if (IS_ENABLED(CONFIG_PREEMPTION)) + pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT"; console_verbose(); spin_lock_irq(&die_lock); - pr_info("%s: sig: %ld [#%d]%s\n", str, err, ++die_counter, - IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : ""); + pr_info("%s: sig: %ld [#%d]%s\n", str, err, ++die_counter, pr); show_regs(regs); if (!user_mode(regs)) show_stack(NULL, (unsigned long*)regs->areg[1]); diff --git a/arch/xtensa/platforms/iss/include/platform/simcall.h b/arch/xtensa/platforms/iss/include/platform/simcall.h index 2ba45858e50a..4e2a48380dbf 100644 --- a/arch/xtensa/platforms/iss/include/platform/simcall.h +++ b/arch/xtensa/platforms/iss/include/platform/simcall.h @@ -113,9 +113,9 @@ static inline int simc_write(int fd, const void *buf, size_t count) static inline int simc_poll(int fd) { - struct timeval tv = { .tv_sec = 0, .tv_usec = 0 }; + long timeval[2] = { 0, 0 }; - return __simc(SYS_select_one, fd, XTISS_SELECT_ONE_READ, (int)&tv); + return __simc(SYS_select_one, fd, XTISS_SELECT_ONE_READ, (int)&timeval); } static inline int simc_lseek(int fd, uint32_t off, int whence) diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index fa9f3893b002..4986226a5ab2 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -455,7 +455,7 @@ static void iss_net_set_multicast_list(struct net_device *dev) { } -static void iss_net_tx_timeout(struct net_device *dev) +static void iss_net_tx_timeout(struct net_device *dev, unsigned int txqueue) { } diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index f9cd45860bee..833109880165 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -251,10 +251,10 @@ out_free: return err; } -static const struct file_operations fops = { - .read = proc_read_simdisk, - .write = proc_write_simdisk, - .llseek = default_llseek, +static const struct proc_ops simdisk_proc_ops = { + .proc_read = proc_read_simdisk, + .proc_write = proc_write_simdisk, + .proc_lseek = default_llseek, }; static int __init simdisk_setup(struct simdisk *dev, int which, @@ -290,7 +290,7 @@ static int __init simdisk_setup(struct simdisk *dev, int which, set_capacity(dev->gd, 0); add_disk(dev->gd); - dev->procfile = proc_create_data(tmp, 0644, procdir, &fops, dev); + dev->procfile = proc_create_data(tmp, 0644, procdir, &simdisk_proc_ops, dev); return 0; out_alloc_disk: |