diff options
Diffstat (limited to 'arch')
220 files changed, 5200 insertions, 3966 deletions
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 5f448201955b..ff2a393b635c 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -28,6 +28,7 @@ config ARC select GENERIC_SMP_IDLE_THREAD select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK + select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_STACKOVERFLOW select HAVE_DEBUG_KMEMLEAK select HAVE_FUTEX_CMPXCHG if FUTEX @@ -350,9 +351,8 @@ config NODES_SHIFT Accessing memory beyond 1GB (with or w/o PAE) requires 2 memory zones. -if ISA_ARCOMPACT - config ARC_COMPACT_IRQ_LEVELS + depends on ISA_ARCOMPACT bool "Setup Timer IRQ as high Priority" # if SMP, LV2 enabled ONLY if ARC implementation has LV2 re-entrancy depends on !SMP @@ -360,14 +360,10 @@ config ARC_COMPACT_IRQ_LEVELS config ARC_FPU_SAVE_RESTORE bool "Enable FPU state persistence across context switch" help - Double Precision Floating Point unit had dedicated regs which - need to be saved/restored across context-switch. - Note that ARC FPU is overly simplistic, unlike say x86, which has - hardware pieces to allow software to conditionally save/restore, - based on actual usage of FPU by a task. Thus our implemn does - this for all tasks in system. - -endif #ISA_ARCOMPACT + ARCompact FPU has internal registers to assist with Double precision + Floating Point operations. There are control and stauts registers + for floating point exceptions and rounding modes. These are + preserved across task context switch when enabled. config ARC_CANT_LLSC def_bool n diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index f9a5c9ddcae7..1d109b06e7d8 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -78,6 +78,7 @@ interrupt-names = "macirq"; phy-mode = "rgmii"; snps,pbl = < 32 >; + snps,multicast-filter-bins = <256>; clocks = <&apbclk>; clock-names = "stmmaceth"; max-speed = <100>; diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 5134f0baf33c..f7e432448e4b 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -39,6 +39,8 @@ #define ARC_REG_CLUSTER_BCR 0xcf #define ARC_REG_AUX_ICCM 0x208 /* ICCM Base Addr (ARCv2) */ #define ARC_REG_LPB_CTRL 0x488 /* ARCv2 Loop Buffer control */ +#define ARC_REG_FPU_CTRL 0x300 +#define ARC_REG_FPU_STATUS 0x301 /* Common for ARCompact and ARCv2 status register */ #define ARC_REG_STATUS32 0x0A diff --git a/arch/arc/include/asm/fpu.h b/arch/arc/include/asm/fpu.h new file mode 100644 index 000000000000..64347250fdf5 --- /dev/null +++ b/arch/arc/include/asm/fpu.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com) + * + */ + +#ifndef _ASM_ARC_FPU_H +#define _ASM_ARC_FPU_H + +#ifdef CONFIG_ARC_FPU_SAVE_RESTORE + +#include <asm/ptrace.h> + +#ifdef CONFIG_ISA_ARCOMPACT + +/* These DPFP regs need to be saved/restored across ctx-sw */ +struct arc_fpu { + struct { + unsigned int l, h; + } aux_dpfp[2]; +}; + +#define fpu_init_task(regs) + +#else + +/* + * ARCv2 FPU Control aux register + * - bits to enable Traps on Exceptions + * - Rounding mode + * + * ARCv2 FPU Status aux register + * - FPU exceptions flags (Inv, Div-by-Zero, overflow, underflow, inexact) + * - Flag Write Enable to clear flags explicitly (vs. by fpu instructions + * only + */ + +struct arc_fpu { + unsigned int ctrl, status; +}; + +extern void fpu_init_task(struct pt_regs *regs); + +#endif /* !CONFIG_ISA_ARCOMPACT */ + +extern void fpu_save_restore(struct task_struct *p, struct task_struct *n); + +#else /* !CONFIG_ARC_FPU_SAVE_RESTORE */ + +#define fpu_save_restore(p, n) +#define fpu_init_task(regs) + +#endif /* CONFIG_ARC_FPU_SAVE_RESTORE */ + +#endif /* _ASM_ARC_FPU_H */ diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index 706edeaa5583..ec532d1e0725 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -14,15 +14,7 @@ #ifndef __ASSEMBLY__ #include <asm/ptrace.h> - -#ifdef CONFIG_ARC_FPU_SAVE_RESTORE -/* These DPFP regs need to be saved/restored across ctx-sw */ -struct arc_fpu { - struct { - unsigned int l, h; - } aux_dpfp[2]; -}; -#endif +#include <asm/fpu.h> #ifdef CONFIG_ARC_PLAT_EZNPS struct eznps_dp { diff --git a/arch/arc/include/asm/switch_to.h b/arch/arc/include/asm/switch_to.h index 77f123385e96..aadf65b2b56c 100644 --- a/arch/arc/include/asm/switch_to.h +++ b/arch/arc/include/asm/switch_to.h @@ -9,19 +9,7 @@ #ifndef __ASSEMBLY__ #include <linux/sched.h> - -#ifdef CONFIG_ARC_FPU_SAVE_RESTORE - -extern void fpu_save_restore(struct task_struct *p, struct task_struct *n); -#define ARC_FPU_PREV(p, n) fpu_save_restore(p, n) -#define ARC_FPU_NEXT(t) - -#else - -#define ARC_FPU_PREV(p, n) -#define ARC_FPU_NEXT(n) - -#endif /* !CONFIG_ARC_FPU_SAVE_RESTORE */ +#include <asm/fpu.h> #ifdef CONFIG_ARC_PLAT_EZNPS extern void dp_save_restore(struct task_struct *p, struct task_struct *n); @@ -36,9 +24,8 @@ struct task_struct *__switch_to(struct task_struct *p, struct task_struct *n); #define switch_to(prev, next, last) \ do { \ ARC_EZNPS_DP_PREV(prev, next); \ - ARC_FPU_PREV(prev, next); \ + fpu_save_restore(prev, next); \ last = __switch_to(prev, next);\ - ARC_FPU_NEXT(next); \ mb(); \ } while (0) diff --git a/arch/arc/include/asm/syscalls.h b/arch/arc/include/asm/syscalls.h index 7ddba13e9b59..c3f4714a4f5c 100644 --- a/arch/arc/include/asm/syscalls.h +++ b/arch/arc/include/asm/syscalls.h @@ -11,6 +11,7 @@ #include <linux/types.h> int sys_clone_wrapper(int, int, int, int, int); +int sys_clone3_wrapper(void *, size_t); int sys_cacheflush(uint32_t, uint32_t uint32_t); int sys_arc_settls(void *); int sys_arc_gettls(void); diff --git a/arch/arc/include/uapi/asm/unistd.h b/arch/arc/include/uapi/asm/unistd.h index 5eafa1115162..fa2713ae6bea 100644 --- a/arch/arc/include/uapi/asm/unistd.h +++ b/arch/arc/include/uapi/asm/unistd.h @@ -21,6 +21,7 @@ #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_TIME32_SYSCALLS diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile index e784f5396dda..75539670431a 100644 --- a/arch/arc/kernel/Makefile +++ b/arch/arc/kernel/Makefile @@ -23,7 +23,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_ARC_FPU_SAVE_RESTORE) += fpu.o +ifdef CONFIG_ISA_ARCOMPACT CFLAGS_fpu.o += -mdpfp +endif ifdef CONFIG_ARC_DW2_UNWIND CFLAGS_ctx_sw.o += -fno-omit-frame-pointer diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 1f6bb184a44d..60406ec62eb8 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -35,6 +35,18 @@ ENTRY(sys_clone_wrapper) b .Lret_from_system_call END(sys_clone_wrapper) +ENTRY(sys_clone3_wrapper) + SAVE_CALLEE_SAVED_USER + bl @sys_clone3 + DISCARD_CALLEE_SAVED_USER + + GET_CURR_THR_INFO_FLAGS r10 + btst r10, TIF_SYSCALL_TRACE + bnz tracesys_exit + + b .Lret_from_system_call +END(sys_clone3_wrapper) + ENTRY(ret_from_fork) ; when the forked child comes here from the __switch_to function ; r0 has the last task pointer. diff --git a/arch/arc/kernel/fpu.c b/arch/arc/kernel/fpu.c index 07e22b563fbb..c67c0f0f5f77 100644 --- a/arch/arc/kernel/fpu.c +++ b/arch/arc/kernel/fpu.c @@ -6,7 +6,9 @@ */ #include <linux/sched.h> -#include <asm/switch_to.h> +#include <asm/fpu.h> + +#ifdef CONFIG_ISA_ARCOMPACT /* * To save/restore FPU regs, simplest scheme would use LR/SR insns. @@ -50,3 +52,28 @@ void fpu_save_restore(struct task_struct *prev, struct task_struct *next) : "r" (zero), "r" (*(readfrom + 3)), "r" (*(readfrom + 2)) ); } + +#else + +void fpu_init_task(struct pt_regs *regs) +{ + /* default rounding mode */ + write_aux_reg(ARC_REG_FPU_CTRL, 0x100); + + /* set "Write enable" to allow explicit write to exception flags */ + write_aux_reg(ARC_REG_FPU_STATUS, 0x80000000); +} + +void fpu_save_restore(struct task_struct *prev, struct task_struct *next) +{ + struct arc_fpu *save = &prev->thread.fpu; + struct arc_fpu *restore = &next->thread.fpu; + + save->ctrl = read_aux_reg(ARC_REG_FPU_CTRL); + save->status = read_aux_reg(ARC_REG_FPU_STATUS); + + write_aux_reg(ARC_REG_FPU_CTRL, restore->ctrl); + write_aux_reg(ARC_REG_FPU_STATUS, restore->status); +} + +#endif diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index e1889ce3faf9..315528f04bc1 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -20,6 +20,8 @@ #include <linux/elf.h> #include <linux/tick.h> +#include <asm/fpu.h> + SYSCALL_DEFINE1(arc_settls, void *, user_tls_data_ptr) { task_thread_info(current)->thr_ptr = (unsigned int)user_tls_data_ptr; @@ -171,9 +173,8 @@ asmlinkage void ret_from_fork(void); * | user_r25 | * ------------------ <===== END of PAGE */ -int copy_thread(unsigned long clone_flags, - unsigned long usp, unsigned long kthread_arg, - struct task_struct *p) +int copy_thread_tls(unsigned long clone_flags, unsigned long usp, + unsigned long kthread_arg, struct task_struct *p, unsigned long tls) { struct pt_regs *c_regs; /* child's pt_regs */ unsigned long *childksp; /* to unwind out of __switch_to() */ @@ -231,7 +232,7 @@ int copy_thread(unsigned long clone_flags, * set task's userland tls data ptr from 4th arg * clone C-lib call is difft from clone sys-call */ - task_thread_info(p)->thr_ptr = regs->r3; + task_thread_info(p)->thr_ptr = tls; } else { /* Normal fork case: set parent's TLS ptr in child */ task_thread_info(p)->thr_ptr = @@ -264,7 +265,7 @@ int copy_thread(unsigned long clone_flags, /* * Do necessary setup to start up a new user task */ -void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp) +void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp) { regs->sp = usp; regs->ret = pc; @@ -280,6 +281,8 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp) regs->eflags = 0; #endif + fpu_init_task(regs); + /* bogus seed values for debugging */ regs->lp_start = 0x10; regs->lp_end = 0x80; diff --git a/arch/arc/kernel/sys.c b/arch/arc/kernel/sys.c index fddecc76efb7..1069446bdc58 100644 --- a/arch/arc/kernel/sys.c +++ b/arch/arc/kernel/sys.c @@ -7,6 +7,7 @@ #include <asm/syscalls.h> #define sys_clone sys_clone_wrapper +#define sys_clone3 sys_clone3_wrapper #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 9b118516d2db..3944305e81df 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -9,18 +9,29 @@ #include <linux/kvm_host.h> #include <asm/kvm_asm.h> -#include <asm/kvm_mmio.h> #include <asm/kvm_arm.h> #include <asm/cputype.h> /* arm64 compatibility macros */ +#define PSR_AA32_MODE_FIQ FIQ_MODE +#define PSR_AA32_MODE_SVC SVC_MODE #define PSR_AA32_MODE_ABT ABT_MODE #define PSR_AA32_MODE_UND UND_MODE #define PSR_AA32_T_BIT PSR_T_BIT +#define PSR_AA32_F_BIT PSR_F_BIT #define PSR_AA32_I_BIT PSR_I_BIT #define PSR_AA32_A_BIT PSR_A_BIT #define PSR_AA32_E_BIT PSR_E_BIT #define PSR_AA32_IT_MASK PSR_IT_MASK +#define PSR_AA32_GE_MASK 0x000f0000 +#define PSR_AA32_DIT_BIT 0x00200000 +#define PSR_AA32_PAN_BIT 0x00400000 +#define PSR_AA32_SSBS_BIT 0x00800000 +#define PSR_AA32_Q_BIT PSR_Q_BIT +#define PSR_AA32_V_BIT PSR_V_BIT +#define PSR_AA32_C_BIT PSR_C_BIT +#define PSR_AA32_Z_BIT PSR_Z_BIT +#define PSR_AA32_N_BIT PSR_N_BIT unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); @@ -41,6 +52,11 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) *__vcpu_spsr(vcpu) = v; } +static inline unsigned long host_spsr_to_spsr32(unsigned long spsr) +{ + return spsr; +} + static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu, u8 reg_num) { @@ -182,6 +198,11 @@ static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & HSR_SSE; } +static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) +{ + return false; +} + static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT; @@ -198,7 +219,7 @@ static inline bool kvm_vcpu_dabt_is_cm(struct kvm_vcpu *vcpu) } /* Get Access Size from a data abort */ -static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu) +static inline unsigned int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu) { switch ((kvm_vcpu_get_hsr(vcpu) >> 22) & 0x3) { case 0: @@ -209,7 +230,7 @@ static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu) return 4; default: kvm_err("Hardware is weird: SAS 0b11 is reserved\n"); - return -EFAULT; + return 4; } } diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 556cd818eccf..c3314b286a61 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -14,7 +14,6 @@ #include <asm/cputype.h> #include <asm/kvm.h> #include <asm/kvm_asm.h> -#include <asm/kvm_mmio.h> #include <asm/fpstate.h> #include <kvm/arm_arch_timer.h> @@ -202,9 +201,6 @@ struct kvm_vcpu_arch { /* Don't run the guest (internal implementation need) */ bool pause; - /* IO related fields */ - struct kvm_decode mmio_decode; - /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; @@ -284,8 +280,6 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); -struct kvm_vcpu *kvm_arm_get_running_vcpu(void); -struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); @@ -300,6 +294,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, static inline void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index) {} +/* MMIO helpers */ +void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); +unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); + +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); +int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, + phys_addr_t fault_ipa); + static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) @@ -363,9 +365,9 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); static inline bool kvm_arch_requires_vhe(void) { return false; } static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} -static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} +static inline void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_init_debug(void) {} static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h index 40e9034db601..3c1b55ecc578 100644 --- a/arch/arm/include/asm/kvm_hyp.h +++ b/arch/arm/include/asm/kvm_hyp.h @@ -10,6 +10,7 @@ #include <linux/compiler.h> #include <linux/kvm_host.h> #include <asm/cp15.h> +#include <asm/kvm_arm.h> #include <asm/vfp.h> #define __hyp_text __section(.hyp.text) notrace diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h deleted file mode 100644 index 7c0eddb0adb2..000000000000 --- a/arch/arm/include/asm/kvm_mmio.h +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall <c.dall@virtualopensystems.com> - */ - -#ifndef __ARM_KVM_MMIO_H__ -#define __ARM_KVM_MMIO_H__ - -#include <linux/kvm_host.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_arm.h> - -struct kvm_decode { - unsigned long rt; - bool sign_extend; -}; - -void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); -unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); - -int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); -int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, - phys_addr_t fault_ipa); - -#endif /* __ARM_KVM_MMIO_H__ */ diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 0e6f23504c26..9f7ae0d8690f 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -34,11 +34,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) -{ - return 0; -} - static u64 core_reg_offset_from_id(u64 id) { return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5efe5ca8fecf..688c63412cc2 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -17,7 +17,6 @@ #include <asm/esr.h> #include <asm/kvm_arm.h> #include <asm/kvm_hyp.h> -#include <asm/kvm_mmio.h> #include <asm/ptrace.h> #include <asm/cputype.h> #include <asm/virt.h> @@ -219,6 +218,38 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v; } +/* + * The layout of SPSR for an AArch32 state is different when observed from an + * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32 + * view given an AArch64 view. + * + * In ARM DDI 0487E.a see: + * + * - The AArch64 view (SPSR_EL2) in section C5.2.18, page C5-426 + * - The AArch32 view (SPSR_abt) in section G8.2.126, page G8-6256 + * - The AArch32 view (SPSR_und) in section G8.2.132, page G8-6280 + * + * Which show the following differences: + * + * | Bit | AA64 | AA32 | Notes | + * +-----+------+------+-----------------------------| + * | 24 | DIT | J | J is RES0 in ARMv8 | + * | 21 | SS | DIT | SS doesn't exist in AArch32 | + * + * ... and all other bits are (currently) common. + */ +static inline unsigned long host_spsr_to_spsr32(unsigned long spsr) +{ + const unsigned long overlap = BIT(24) | BIT(21); + unsigned long dit = !!(spsr & PSR_AA32_DIT_BIT); + + spsr &= ~overlap; + + spsr |= dit << 21; + + return spsr; +} + static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) { u32 mode; @@ -283,6 +314,11 @@ static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); } +static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF); +} + static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; @@ -304,7 +340,7 @@ static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM); } -static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) +static inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) { return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f5acdde17f3b..d87aa609d2b6 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -24,7 +24,6 @@ #include <asm/fpsimd.h> #include <asm/kvm.h> #include <asm/kvm_asm.h> -#include <asm/kvm_mmio.h> #include <asm/thread_info.h> #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -53,7 +52,7 @@ int kvm_arm_init_sve(void); int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu); +void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext); void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); @@ -325,9 +324,6 @@ struct kvm_vcpu_arch { /* Don't run the guest (internal implementation need) */ bool pause; - /* IO related fields */ - struct kvm_decode mmio_decode; - /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; @@ -446,8 +442,6 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); -struct kvm_vcpu *kvm_arm_get_running_vcpu(void); -struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); @@ -491,6 +485,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); +/* MMIO helpers */ +void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); +unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); + +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); +int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, + phys_addr_t fault_ipa); + int kvm_perf_init(void); int kvm_perf_teardown(void); diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h deleted file mode 100644 index 02b5c48fd467..000000000000 --- a/arch/arm64/include/asm/kvm_mmio.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall <c.dall@virtualopensystems.com> - */ - -#ifndef __ARM64_KVM_MMIO_H__ -#define __ARM64_KVM_MMIO_H__ - -#include <linux/kvm_host.h> -#include <asm/kvm_arm.h> - -/* - * This is annoying. The mmio code requires this, even if we don't - * need any decoding. To be fixed. - */ -struct kvm_decode { - unsigned long rt; - bool sign_extend; -}; - -void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); -unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); - -int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); -int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, - phys_addr_t fault_ipa); - -#endif /* __ARM64_KVM_MMIO_H__ */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index fbebb411ae20..bf57308fcd63 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -62,6 +62,7 @@ #define PSR_AA32_I_BIT 0x00000080 #define PSR_AA32_A_BIT 0x00000100 #define PSR_AA32_E_BIT 0x00000200 +#define PSR_AA32_PAN_BIT 0x00400000 #define PSR_AA32_SSBS_BIT 0x00800000 #define PSR_AA32_DIT_BIT 0x01000000 #define PSR_AA32_Q_BIT 0x08000000 diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 820e5751ada7..ba85bb23f060 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -220,10 +220,18 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_PTIMER_CVAL ARM64_SYS_REG(3, 3, 14, 2, 2) #define KVM_REG_ARM_PTIMER_CNT ARM64_SYS_REG(3, 3, 14, 0, 1) -/* EL0 Virtual Timer Registers */ +/* + * EL0 Virtual Timer Registers + * + * WARNING: + * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined + * with the appropriate register encodings. Their values have been + * accidentally swapped. As this is set API, the definitions here + * must be used, rather than ones derived from the encodings. + */ #define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1) -#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) #define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) +#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) /* KVM-as-firmware specific pseudo-registers */ #define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT) diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 7ed9294e2004..d1bb5b69f1ce 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -49,6 +49,7 @@ #define PSR_SSBS_BIT 0x00001000 #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 +#define PSR_DIT_BIT 0x01000000 #define PSR_V_BIT 0x10000000 #define PSR_C_BIT 0x20000000 #define PSR_Z_BIT 0x40000000 diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 43487f035385..7a7e425616b5 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -101,7 +101,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) { bool trap_debug = !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY); - unsigned long mdscr; + unsigned long mdscr, orig_mdcr_el2 = vcpu->arch.mdcr_el2; trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug); @@ -197,6 +197,10 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE)) vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; + /* Write mdcr_el2 changes since vcpu_load on VHE systems */ + if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2) + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); + trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2); trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1)); } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 2fff06114a8f..2bd92301d32f 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -47,11 +47,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) -{ - return 0; -} - static bool core_reg_offset_is_vreg(u64 off) { return off >= KVM_REG_ARM_CORE_REG(fp_regs.vregs) && diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 0c6832ec52b1..d22d0534dd60 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -51,7 +51,7 @@ * u64 __guest_enter(struct kvm_vcpu *vcpu, * struct kvm_cpu_context *host_ctxt); */ -ENTRY(__guest_enter) +SYM_FUNC_START(__guest_enter) // x0: vcpu // x1: host context // x2-x17: clobbered by macros @@ -100,9 +100,8 @@ alternative_else_nop_endif // Do not touch any register after this! eret sb -ENDPROC(__guest_enter) -ENTRY(__guest_exit) +SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) // x0: return code // x1: vcpu // x2-x29,lr: vcpu regs @@ -195,4 +194,4 @@ abort_guest_exit_end: msr spsr_el2, x4 orr x0, x0, x5 1: ret -ENDPROC(__guest_exit) +SYM_FUNC_END(__guest_enter) diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index ccdb6a051ab2..6aafc2825c1c 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -14,9 +14,6 @@ #include <asm/kvm_emulate.h> #include <asm/esr.h> -#define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \ - PSR_I_BIT | PSR_D_BIT) - #define CURRENT_EL_SP_EL0_VECTOR 0x0 #define CURRENT_EL_SP_ELx_VECTOR 0x200 #define LOWER_EL_AArch64_VECTOR 0x400 @@ -50,6 +47,69 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; } +/* + * When an exception is taken, most PSTATE fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all + * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx + * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0. + * + * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429. + * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426. + * + * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from + * MSB to LSB. + */ +static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu) +{ + unsigned long sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); + unsigned long old, new; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_N_BIT); + new |= (old & PSR_Z_BIT); + new |= (old & PSR_C_BIT); + new |= (old & PSR_V_BIT); + + // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) + + new |= (old & PSR_DIT_BIT); + + // PSTATE.UAO is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D5-2579. + + // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0 + // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page D5-2578. + new |= (old & PSR_PAN_BIT); + if (!(sctlr & SCTLR_EL1_SPAN)) + new |= PSR_PAN_BIT; + + // PSTATE.SS is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D2-2452. + + // PSTATE.IL is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D1-2306. + + // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64 + // See ARM DDI 0487E.a, page D13-3258 + if (sctlr & SCTLR_ELx_DSSBS) + new |= PSR_SSBS_BIT; + + // PSTATE.BTYPE is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, pages D1-2293 to D1-2294. + + new |= PSR_D_BIT; + new |= PSR_A_BIT; + new |= PSR_I_BIT; + new |= PSR_F_BIT; + + new |= PSR_MODE_EL1h; + + return new; +} + static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); @@ -59,7 +119,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); vcpu_write_spsr(vcpu, cpsr); vcpu_write_sys_reg(vcpu, addr, FAR_EL1); @@ -94,7 +154,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); vcpu_write_spsr(vcpu, cpsr); /* diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f4a8ae918827..30b7ea680f66 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -204,7 +204,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu) return true; } -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) { kfree(vcpu->arch.sve_state); } diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index dab1fea4752a..a4f48c1ac28c 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -13,52 +13,46 @@ #include <asm/kvm_mmu.h> /* - * The LSB of the random hyp VA tag or 0 if no randomization is used. + * The LSB of the HYP VA tag */ static u8 tag_lsb; /* - * The random hyp VA tag value with the region bit if hyp randomization is used + * The HYP VA tag value with the region bit */ static u64 tag_val; static u64 va_mask; +/* + * We want to generate a hyp VA with the following format (with V == + * vabits_actual): + * + * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0 + * --------------------------------------------------------- + * | 0000000 | hyp_va_msb | random tag | kern linear VA | + * |--------- tag_val -----------|----- va_mask ---| + * + * which does not conflict with the idmap regions. + */ __init void kvm_compute_layout(void) { phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start); u64 hyp_va_msb; - int kva_msb; /* Where is my RAM region? */ hyp_va_msb = idmap_addr & BIT(vabits_actual - 1); hyp_va_msb ^= BIT(vabits_actual - 1); - kva_msb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^ + tag_lsb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^ (u64)(high_memory - 1)); - if (kva_msb == (vabits_actual - 1)) { - /* - * No space in the address, let's compute the mask so - * that it covers (vabits_actual - 1) bits, and the region - * bit. The tag stays set to zero. - */ - va_mask = BIT(vabits_actual - 1) - 1; - va_mask |= hyp_va_msb; - } else { - /* - * We do have some free bits to insert a random tag. - * Hyp VAs are now created from kernel linear map VAs - * using the following formula (with V == vabits_actual): - * - * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0 - * --------------------------------------------------------- - * | 0000000 | hyp_va_msb | random tag | kern linear VA | - */ - tag_lsb = kva_msb; - va_mask = GENMASK_ULL(tag_lsb - 1, 0); - tag_val = get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); - tag_val |= hyp_va_msb; - tag_val >>= tag_lsb; + va_mask = GENMASK_ULL(tag_lsb - 1, 0); + tag_val = hyp_va_msb; + + if (tag_lsb != (vabits_actual - 1)) { + /* We have some free bits to insert a random tag. */ + tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); } + tag_val >>= tag_lsb; } static u32 compute_instruction(int n, u32 rd, u32 rn) @@ -117,11 +111,11 @@ void __init kvm_update_va_mask(struct alt_instr *alt, * VHE doesn't need any address translation, let's NOP * everything. * - * Alternatively, if we don't have any spare bits in - * the address, NOP everything after masking that - * kernel VA. + * Alternatively, if the tag is zero (because the layout + * dictates it and we don't have any spare bits in the + * address), NOP everything after masking the kernel VA. */ - if (has_vhe() || (!tag_lsb && i > 0)) { + if (has_vhe() || (!tag_val && i > 0)) { updptr[i] = cpu_to_le32(aarch64_insn_gen_nop()); continue; } diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index a2739a34bb12..797d7f1ad5fe 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -5,9 +5,11 @@ config MIPS select ARCH_32BIT_OFF_T if !64BIT select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT select ARCH_CLOCKSOURCE_DATA + select ARCH_HAS_FORTIFY_SOURCE + select ARCH_HAS_KCOV + select ARCH_HAS_PTE_SPECIAL if !(32BIT && CPU_HAS_RIXI) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UBSAN_SANITIZE_ALL - select ARCH_HAS_FORTIFY_SOURCE select ARCH_SUPPORTS_UPROBES select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if 64BIT @@ -47,7 +49,7 @@ config MIPS select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES select HAVE_ASM_MODVERSIONS - select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2 + select HAVE_CBPF_JIT if !64BIT && !CPU_MICROMIPS select HAVE_CONTEXT_TRACKING select HAVE_COPY_THREAD_TLS select HAVE_C_RECORDMCOUNT @@ -55,11 +57,14 @@ config MIPS select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE + select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2 select HAVE_EXIT_THREAD select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER + select HAVE_GCC_PLUGINS + select HAVE_GENERIC_VDSO select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK @@ -78,18 +83,14 @@ config MIPS select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP - select HAVE_GENERIC_VDSO select IRQ_FORCED_THREADING select ISA if EISA - select MODULES_USE_ELF_RELA if MODULES && 64BIT select MODULES_USE_ELF_REL if MODULES + select MODULES_USE_ELF_RELA if MODULES && 64BIT select PERF_USE_VMALLOC select RTC_LIB select SYSCTL_EXCEPTION_TRACE select VIRT_TO_BUS - select ARCH_HAS_PTE_SPECIAL if !(32BIT && CPU_HAS_RIXI) - select ARCH_HAS_KCOV - select HAVE_GCC_PLUGINS menu "Machine selection" @@ -104,20 +105,18 @@ config MIPS_GENERIC select CEVT_R4K select CLKSRC_MIPS_GIC select COMMON_CLK - select CPU_MIPSR2_IRQ_VI select CPU_MIPSR2_IRQ_EI + select CPU_MIPSR2_IRQ_VI select CSRC_R4K select DMA_PERDEV_COHERENT select HAVE_PCI select IRQ_MIPS_CPU - select LIBFDT select MIPS_AUTO_PFN_OFFSET select MIPS_CPU_SCACHE select MIPS_GIC select MIPS_L1_CACHE_SHIFT_7 select NO_EXCEPT_FILL select PCI_DRIVERS_GENERIC - select PINCTRL select SMP_UP if SMP select SWAP_IO_SPACE select SYS_HAS_CPU_MIPS32_R1 @@ -132,11 +131,12 @@ config MIPS_GENERIC select SYS_SUPPORTS_HIGHMEM select SYS_SUPPORTS_LITTLE_ENDIAN select SYS_SUPPORTS_MICROMIPS - select SYS_SUPPORTS_MIPS_CPS select SYS_SUPPORTS_MIPS16 + select SYS_SUPPORTS_MIPS_CPS select SYS_SUPPORTS_MULTITHREADING select SYS_SUPPORTS_RELOCATABLE select SYS_SUPPORTS_SMARTMIPS + select UHI_BOOT select USB_EHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN select USB_OHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN @@ -144,7 +144,6 @@ config MIPS_GENERIC select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN select USE_OF - select UHI_BOOT help Select this to build a kernel which aims to support multiple boards, generally using a flattened device tree passed from the bootloader @@ -403,7 +402,6 @@ config MACH_INGENIC select GENERIC_IRQ_CHIP select BUILTIN_DTB if MIPS_NO_APPENDED_DTB select USE_OF - select LIBFDT config LANTIQ bool "Lantiq based platforms" @@ -510,7 +508,6 @@ config MACH_PISTACHIO select DMA_NONCOHERENT select GPIOLIB select IRQ_MIPS_CPU - select LIBFDT select MFD_SYSCON select MIPS_CPU_SCACHE select MIPS_GIC @@ -548,7 +545,6 @@ config MIPS_MALTA select I8253 select I8259 select IRQ_MIPS_CPU - select LIBFDT select MIPS_BONITO64 select MIPS_CPU_SCACHE select MIPS_GIC @@ -980,7 +976,6 @@ config CAVIUM_OCTEON_SOC select ZONE_DMA32 select HOLES_IN_ZONE select GPIOLIB - select LIBFDT select USE_OF select ARCH_SPARSEMEM_ENABLE select SYS_SUPPORTS_SMP @@ -1223,8 +1218,7 @@ config NO_IOPORT_MAP def_bool n config GENERIC_CSUM - bool - default y if !CPU_HAS_LOAD_STORE_LR + def_bool CPU_NO_LOAD_STORE_LR config GENERIC_ISA_DMA bool @@ -1442,11 +1436,14 @@ config CPU_LOONGSON64 bool "Loongson 64-bit CPU" depends on SYS_HAS_CPU_LOONGSON64 select ARCH_HAS_PHYS_TO_DMA + select CPU_MIPSR2 + select CPU_HAS_PREFETCH select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_HUGEPAGES select CPU_SUPPORTS_MSA - select CPU_HAS_LOAD_STORE_LR + select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT + select CPU_MIPSR2_IRQ_VI select WEAK_ORDERING select WEAK_REORDERING_BEYOND_LLSC select MIPS_ASID_BITS_VARIABLE @@ -1464,8 +1461,6 @@ config CPU_LOONGSON64 config LOONGSON3_ENHANCEMENT bool "New Loongson-3 CPU Enhancements" default n - select CPU_MIPSR2 - select CPU_HAS_PREFETCH depends on CPU_LOONGSON64 help New Loongson-3 cores (since Loongson-3A R2, as opposed to Loongson-3A @@ -1542,7 +1537,6 @@ config CPU_MIPS32_R1 bool "MIPS32 Release 1" depends on SYS_HAS_CPU_MIPS32_R1 select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_HIGHMEM help @@ -1560,7 +1554,6 @@ config CPU_MIPS32_R2 bool "MIPS32 Release 2" depends on SYS_HAS_CPU_MIPS32_R2 select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_MSA @@ -1576,6 +1569,7 @@ config CPU_MIPS32_R6 bool "MIPS32 Release 6" depends on SYS_HAS_CPU_MIPS32_R6 select CPU_HAS_PREFETCH + select CPU_NO_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_MSA @@ -1591,7 +1585,6 @@ config CPU_MIPS64_R1 bool "MIPS64 Release 1" depends on SYS_HAS_CPU_MIPS64_R1 select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1611,7 +1604,6 @@ config CPU_MIPS64_R2 bool "MIPS64 Release 2" depends on SYS_HAS_CPU_MIPS64_R2 select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1629,6 +1621,7 @@ config CPU_MIPS64_R6 bool "MIPS64 Release 6" depends on SYS_HAS_CPU_MIPS64_R6 select CPU_HAS_PREFETCH + select CPU_NO_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1646,7 +1639,6 @@ config CPU_R3000 bool "R3000" depends on SYS_HAS_CPU_R3000 select CPU_HAS_WB - select CPU_HAS_LOAD_STORE_LR select CPU_R3K_TLB select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1662,7 +1654,6 @@ config CPU_TX39XX bool "R39XX" depends on SYS_HAS_CPU_TX39XX select CPU_SUPPORTS_32BIT_KERNEL - select CPU_HAS_LOAD_STORE_LR select CPU_R3K_TLB config CPU_VR41XX @@ -1670,7 +1661,6 @@ config CPU_VR41XX depends on SYS_HAS_CPU_VR41XX select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL - select CPU_HAS_LOAD_STORE_LR help The options selects support for the NEC VR4100 series of processors. Only choose this option if you have one of these processors as a @@ -1683,7 +1673,6 @@ config CPU_R4X00 select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HUGEPAGES - select CPU_HAS_LOAD_STORE_LR help MIPS Technologies R4000-series processors other than 4300, including the R4000, R4400, R4600, and 4700. @@ -1692,7 +1681,6 @@ config CPU_TX49XX bool "R49XX" depends on SYS_HAS_CPU_TX49XX select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HUGEPAGES @@ -1703,7 +1691,6 @@ config CPU_R5000 select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HUGEPAGES - select CPU_HAS_LOAD_STORE_LR help MIPS Technologies R5000-series processors other than the Nevada. @@ -1713,7 +1700,6 @@ config CPU_R5500 select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HUGEPAGES - select CPU_HAS_LOAD_STORE_LR help NEC VR5500 and VR5500A series processors implement 64-bit MIPS IV instruction set. @@ -1724,7 +1710,6 @@ config CPU_NEVADA select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HUGEPAGES - select CPU_HAS_LOAD_STORE_LR help QED / PMC-Sierra RM52xx-series ("Nevada") processors. @@ -1732,7 +1717,6 @@ config CPU_R10000 bool "R10000" depends on SYS_HAS_CPU_R10000 select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1744,7 +1728,6 @@ config CPU_RM7000 bool "RM7000" depends on SYS_HAS_CPU_RM7000 select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1753,7 +1736,6 @@ config CPU_RM7000 config CPU_SB1 bool "SB1" depends on SYS_HAS_CPU_SB1 - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1764,7 +1746,6 @@ config CPU_CAVIUM_OCTEON bool "Cavium Octeon processor" depends on SYS_HAS_CPU_CAVIUM_OCTEON select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_64BIT_KERNEL select WEAK_ORDERING select CPU_SUPPORTS_HIGHMEM @@ -1794,7 +1775,6 @@ config CPU_BMIPS select WEAK_ORDERING select CPU_SUPPORTS_HIGHMEM select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_CPUFREQ select MIPS_EXTERNAL_TIMER help @@ -1803,7 +1783,6 @@ config CPU_BMIPS config CPU_XLR bool "Netlogic XLR SoC" depends on SYS_HAS_CPU_XLR - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1822,7 +1801,6 @@ config CPU_XLP select WEAK_ORDERING select WEAK_REORDERING_BEYOND_LLSC select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_MIPSR2 select CPU_SUPPORTS_HUGEPAGES select MIPS_ASID_BITS_VARIABLE @@ -1928,14 +1906,12 @@ config CPU_LOONGSON2EF select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_HUGEPAGES select ARCH_HAS_PHYS_TO_DMA - select CPU_HAS_LOAD_STORE_LR config CPU_LOONGSON32 bool select CPU_MIPS32 select CPU_MIPSR2 select CPU_HAS_PREFETCH - select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_CPUFREQ @@ -2110,12 +2086,14 @@ config CPU_MIPSR2 bool default y if CPU_MIPS32_R2 || CPU_MIPS64_R2 || CPU_CAVIUM_OCTEON select CPU_HAS_RIXI + select CPU_HAS_DIEI if !CPU_DIEI_BROKEN select MIPS_SPRAM config CPU_MIPSR6 bool default y if CPU_MIPS32_R6 || CPU_MIPS64_R6 select CPU_HAS_RIXI + select CPU_HAS_DIEI if !CPU_DIEI_BROKEN select HAVE_ARCH_BITREVERSE select MIPS_ASID_BITS_VARIABLE select MIPS_CRC_SUPPORT @@ -2575,15 +2553,23 @@ config CPU_HAS_WB config XKS01 bool +config CPU_HAS_DIEI + depends on !CPU_DIEI_BROKEN + bool + +config CPU_DIEI_BROKEN + bool + config CPU_HAS_RIXI bool -config CPU_HAS_LOAD_STORE_LR +config CPU_NO_LOAD_STORE_LR bool help - CPU has support for unaligned load and store instructions: + CPU lacks support for unaligned load and store instructions: LWL, LWR, SWL, SWR (Load/store word left/right). - LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit systems). + LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit + systems). # # Vectored interrupt mode is an R2 feature @@ -2696,6 +2682,14 @@ config NUMA config SYS_SUPPORTS_NUMA bool +config HAVE_SETUP_PER_CPU_AREA + def_bool y + depends on NUMA + +config NEED_PER_CPU_EMBED_FIRST_CHUNK + def_bool y + depends on NUMA + config RELOCATABLE bool "Relocatable kernel" depends on SYS_SUPPORTS_RELOCATABLE && (CPU_MIPS32_R2 || CPU_MIPS64_R2 || CPU_MIPS32_R6 || CPU_MIPS64_R6 || CAVIUM_OCTEON_SOC) diff --git a/arch/mips/Makefile.postlink b/arch/mips/Makefile.postlink index f03fdc95143e..4b1d3ba3a8a2 100644 --- a/arch/mips/Makefile.postlink +++ b/arch/mips/Makefile.postlink @@ -17,7 +17,7 @@ quiet_cmd_ls3_llsc = LLSCCHK $@ cmd_ls3_llsc = $(CMD_LS3_LLSC) $@ CMD_RELOCS = arch/mips/boot/tools/relocs -quiet_cmd_relocs = RELOCS $@ +quiet_cmd_relocs = RELOCS $@ cmd_relocs = $(CMD_RELOCS) $@ # `@true` prevents complaint when there is nothing to be done diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile index 528bd73d530a..4ed45ade32a1 100644 --- a/arch/mips/boot/Makefile +++ b/arch/mips/boot/Makefile @@ -123,7 +123,7 @@ $(obj)/vmlinux.its.S: $(addprefix $(srctree)/arch/mips/$(PLATFORM)/,$(ITS_INPUTS targets += vmlinux.its targets += vmlinux.gz.its targets += vmlinux.bz2.its -targets += vmlinux.lzmo.its +targets += vmlinux.lzma.its targets += vmlinux.lzo.its quiet_cmd_cpp_its_S = ITS $@ diff --git a/arch/mips/boot/dts/ingenic/Makefile b/arch/mips/boot/dts/ingenic/Makefile index 9cc48441eb71..e1654291a7b0 100644 --- a/arch/mips/boot/dts/ingenic/Makefile +++ b/arch/mips/boot/dts/ingenic/Makefile @@ -2,5 +2,6 @@ dtb-$(CONFIG_JZ4740_QI_LB60) += qi_lb60.dtb dtb-$(CONFIG_JZ4770_GCW0) += gcw0.dtb dtb-$(CONFIG_JZ4780_CI20) += ci20.dtb +dtb-$(CONFIG_X1000_CU1000_NEO) += cu1000-neo.dtb obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y)) diff --git a/arch/mips/boot/dts/ingenic/cu1000-neo.dts b/arch/mips/boot/dts/ingenic/cu1000-neo.dts new file mode 100644 index 000000000000..03abd94acd84 --- /dev/null +++ b/arch/mips/boot/dts/ingenic/cu1000-neo.dts @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 +/dts-v1/; + +#include "x1000.dtsi" +#include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/clock/ingenic,tcu.h> +#include <dt-bindings/interrupt-controller/irq.h> + +/ { + compatible = "yna,cu1000-neo", "ingenic,x1000"; + model = "YSH & ATIL General Board CU Neo"; + + aliases { + serial2 = &uart2; + }; + + chosen { + stdout-path = "serial2:115200n8"; + }; + + memory { + device_type = "memory"; + reg = <0x0 0x04000000>; + }; + + wlan_pwrseq: msc1-pwrseq { + compatible = "mmc-pwrseq-simple"; + + clocks = <&lpoclk>; + clock-names = "ext_clock"; + + reset-gpios = <&gpc 17 GPIO_ACTIVE_LOW>; + post-power-on-delay-ms = <200>; + + lpoclk: ap6212a { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + }; + }; +}; + +&exclk { + clock-frequency = <24000000>; +}; + +&tcu { + /* 1500 kHz for the system timer and clocksource */ + assigned-clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER2>; + assigned-clock-rates = <1500000>, <1500000>; + + /* Use channel #0 for the system timer channel #2 for the clocksource */ + ingenic,pwm-channels-mask = <0xfa>; +}; + +&i2c0 { + status = "okay"; + + clock-frequency = <400000>; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_i2c0>; + + ads7830@48 { + compatible = "ti,ads7830"; + reg = <0x48>; + }; +}; + +&uart2 { + pinctrl-names = "default"; + pinctrl-0 = <&pins_uart2>; + + status = "okay"; +}; + +&mac { + phy-mode = "rmii"; + phy-handle = <&lan8720a>; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_mac>; + + snps,reset-gpio = <&gpc 23 GPIO_ACTIVE_LOW>; /* PC23 */ + snps,reset-active-low; + snps,reset-delays-us = <0 10000 30000>; + + status = "okay"; +}; + +&mdio { + status = "okay"; + + lan8720a: ethernet-phy@0 { + compatible = "ethernet-phy-id0007.c0f0", "ethernet-phy-ieee802.3-c22"; + reg = <0>; + }; +}; + +&msc0 { + bus-width = <8>; + max-frequency = <50000000>; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_msc0>; + + non-removable; + + status = "okay"; +}; + +&msc1 { + bus-width = <4>; + max-frequency = <50000000>; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_msc1>; + + #address-cells = <1>; + #size-cells = <0>; + + non-removable; + + mmc-pwrseq = <&wlan_pwrseq>; + + status = "okay"; + + ap6212a: wifi@1 { + compatible = "brcm,bcm4329-fmac"; + reg = <1>; + + interrupt-parent = <&gpc>; + interrupts = <16 IRQ_TYPE_EDGE_FALLING>; + interrupt-names = "host-wake"; + + brcm,drive-strength = <10>; + }; +}; + +&pinctrl { + pins_i2c0: i2c0 { + function = "i2c0"; + groups = "i2c0-data"; + bias-disable; + }; + + pins_uart2: uart2 { + function = "uart2"; + groups = "uart2-data-d"; + bias-disable; + }; + + pins_mac: mac { + function = "mac"; + groups = "mac"; + bias-disable; + }; + + pins_msc0: msc0 { + function = "mmc0"; + groups = "mmc0-1bit", "mmc0-4bit", "mmc0-8bit"; + bias-disable; + }; + + pins_msc1: msc1 { + function = "mmc1"; + groups = "mmc1-1bit", "mmc1-4bit"; + bias-disable; + }; +}; diff --git a/arch/mips/boot/dts/ingenic/x1000.dtsi b/arch/mips/boot/dts/ingenic/x1000.dtsi new file mode 100644 index 000000000000..4994c695a1a7 --- /dev/null +++ b/arch/mips/boot/dts/ingenic/x1000.dtsi @@ -0,0 +1,317 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <dt-bindings/clock/x1000-cgu.h> +#include <dt-bindings/dma/x1000-dma.h> + +/ { + #address-cells = <1>; + #size-cells = <1>; + compatible = "ingenic,x1000", "ingenic,x1000e"; + + cpuintc: interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + compatible = "mti,cpu-interrupt-controller"; + }; + + intc: interrupt-controller@10001000 { + compatible = "ingenic,x1000-intc", "ingenic,jz4780-intc"; + reg = <0x10001000 0x50>; + + interrupt-controller; + #interrupt-cells = <1>; + + interrupt-parent = <&cpuintc>; + interrupts = <2>; + }; + + exclk: ext { + compatible = "fixed-clock"; + #clock-cells = <0>; + }; + + rtclk: rtc { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + }; + + cgu: x1000-cgu@10000000 { + compatible = "ingenic,x1000-cgu"; + reg = <0x10000000 0x100>; + + #clock-cells = <1>; + + clocks = <&exclk>, <&rtclk>; + clock-names = "ext", "rtc"; + }; + + tcu: timer@10002000 { + compatible = "ingenic,x1000-tcu", + "ingenic,jz4770-tcu", + "simple-mfd"; + reg = <0x10002000 0x1000>; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x10002000 0x1000>; + + #clock-cells = <1>; + + clocks = <&cgu X1000_CLK_RTCLK + &cgu X1000_CLK_EXCLK + &cgu X1000_CLK_PCLK>; + clock-names = "rtc", "ext", "pclk"; + + interrupt-controller; + #interrupt-cells = <1>; + + interrupt-parent = <&intc>; + interrupts = <27 26 25>; + + wdt: watchdog@0 { + compatible = "ingenic,x1000-watchdog", "ingenic,jz4780-watchdog"; + reg = <0x0 0x10>; + + clocks = <&cgu X1000_CLK_RTCLK>; + clock-names = "wdt"; + }; + }; + + rtc: rtc@10003000 { + compatible = "ingenic,x1000-rtc", "ingenic,jz4780-rtc"; + reg = <0x10003000 0x4c>; + + interrupt-parent = <&intc>; + interrupts = <32>; + + clocks = <&cgu X1000_CLK_RTCLK>; + clock-names = "rtc"; + }; + + pinctrl: pin-controller@10010000 { + compatible = "ingenic,x1000-pinctrl"; + reg = <0x10010000 0x800>; + #address-cells = <1>; + #size-cells = <0>; + + gpa: gpio@0 { + compatible = "ingenic,x1000-gpio"; + reg = <0>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 0 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <17>; + }; + + gpb: gpio@1 { + compatible = "ingenic,x1000-gpio"; + reg = <1>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 32 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <16>; + }; + + gpc: gpio@2 { + compatible = "ingenic,x1000-gpio"; + reg = <2>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 64 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <15>; + }; + + gpd: gpio@3 { + compatible = "ingenic,x1000-gpio"; + reg = <3>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 96 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <14>; + }; + }; + + i2c0: i2c-controller@10050000 { + compatible = "ingenic,x1000-i2c"; + reg = <0x10050000 0x1000>; + + #address-cells = <1>; + #size-cells = <0>; + + interrupt-parent = <&intc>; + interrupts = <60>; + + clocks = <&cgu X1000_CLK_I2C0>; + + status = "disabled"; + }; + + i2c1: i2c-controller@10051000 { + compatible = "ingenic,x1000-i2c"; + reg = <0x10051000 0x1000>; + + #address-cells = <1>; + #size-cells = <0>; + + interrupt-parent = <&intc>; + interrupts = <59>; + + clocks = <&cgu X1000_CLK_I2C1>; + + status = "disabled"; + }; + + i2c2: i2c-controller@10052000 { + compatible = "ingenic,x1000-i2c"; + reg = <0x10052000 0x1000>; + + #address-cells = <1>; + #size-cells = <0>; + + interrupt-parent = <&intc>; + interrupts = <58>; + + clocks = <&cgu X1000_CLK_I2C2>; + + status = "disabled"; + }; + + uart0: serial@10030000 { + compatible = "ingenic,x1000-uart"; + reg = <0x10030000 0x100>; + + interrupt-parent = <&intc>; + interrupts = <51>; + + clocks = <&exclk>, <&cgu X1000_CLK_UART0>; + clock-names = "baud", "module"; + + status = "disabled"; + }; + + uart1: serial@10031000 { + compatible = "ingenic,x1000-uart"; + reg = <0x10031000 0x100>; + + interrupt-parent = <&intc>; + interrupts = <50>; + + clocks = <&exclk>, <&cgu X1000_CLK_UART1>; + clock-names = "baud", "module"; + + status = "disabled"; + }; + + uart2: serial@10032000 { + compatible = "ingenic,x1000-uart"; + reg = <0x10032000 0x100>; + + interrupt-parent = <&intc>; + interrupts = <49>; + + clocks = <&exclk>, <&cgu X1000_CLK_UART2>; + clock-names = "baud", "module"; + + status = "disabled"; + }; + + pdma: dma-controller@13420000 { + compatible = "ingenic,x1000-dma"; + reg = <0x13420000 0x400 + 0x13421000 0x40>; + #dma-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <10>; + + clocks = <&cgu X1000_CLK_PDMA>; + }; + + mac: ethernet@134b0000 { + compatible = "ingenic,x1000-mac", "snps,dwmac"; + reg = <0x134b0000 0x2000>; + + interrupt-parent = <&intc>; + interrupts = <55>; + interrupt-names = "macirq"; + + clocks = <&cgu X1000_CLK_MAC>; + clock-names = "stmmaceth"; + + status = "disabled"; + + mdio: mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + + status = "disabled"; + }; + }; + + msc0: mmc@13450000 { + compatible = "ingenic,x1000-mmc"; + reg = <0x13450000 0x1000>; + + interrupt-parent = <&intc>; + interrupts = <37>; + + clocks = <&cgu X1000_CLK_MSC0>; + clock-names = "mmc"; + + cap-sd-highspeed; + cap-mmc-highspeed; + cap-sdio-irq; + + dmas = <&pdma X1000_DMA_MSC0_RX 0xffffffff>, + <&pdma X1000_DMA_MSC0_TX 0xffffffff>; + dma-names = "rx", "tx"; + + status = "disabled"; + }; + + msc1: mmc@13460000 { + compatible = "ingenic,x1000-mmc"; + reg = <0x13460000 0x1000>; + + interrupt-parent = <&intc>; + interrupts = <36>; + + clocks = <&cgu X1000_CLK_MSC1>; + clock-names = "mmc"; + + cap-sd-highspeed; + cap-mmc-highspeed; + cap-sdio-irq; + + dmas = <&pdma X1000_DMA_MSC1_RX 0xffffffff>, + <&pdma X1000_DMA_MSC1_TX 0xffffffff>; + dma-names = "rx", "tx"; + + status = "disabled"; + }; +}; diff --git a/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts b/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts index aa5caaa31104..6069b33cf09f 100644 --- a/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts +++ b/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts @@ -177,6 +177,9 @@ pinctrl-names = "default"; pinctrl-0 = <&pinmux_i2s_gpio>; /* GPIO0..3 */ + fifo-size = <8>; + tx-threshold = <8>; + rts-gpios = <&gpio 1 GPIO_ACTIVE_LOW>; cts-gpios = <&gpio 2 GPIO_ACTIVE_LOW>; }; @@ -195,3 +198,8 @@ &watchdog { status = "okay"; }; + +&wmac { + status = "okay"; + mediatek,mtd-eeprom = <&factory 0x0000>; +}; diff --git a/arch/mips/boot/dts/ralink/mt7628a.dtsi b/arch/mips/boot/dts/ralink/mt7628a.dtsi index 742bcc1dc2e0..892e8ab863c5 100644 --- a/arch/mips/boot/dts/ralink/mt7628a.dtsi +++ b/arch/mips/boot/dts/ralink/mt7628a.dtsi @@ -285,4 +285,14 @@ interrupt-parent = <&intc>; interrupts = <18>; }; + + wmac: wmac@10300000 { + compatible = "mediatek,mt7628-wmac"; + reg = <0x10300000 0x100000>; + + interrupt-parent = <&cpuintc>; + interrupts = <6>; + + status = "disabled"; + }; }; diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index f97be32bf699..6bd1e97effdf 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -2193,7 +2193,7 @@ static int octeon_irq_cib_map(struct irq_domain *d, struct octeon_irq_cib_chip_data *cd; if (hw >= host_data->max_bits) { - pr_err("ERROR: %s mapping %u is to big!\n", + pr_err("ERROR: %s mapping %u is too big!\n", irq_domain_get_of_node(d)->name, (unsigned)hw); return -EINVAL; } diff --git a/arch/mips/configs/cu1000-neo_defconfig b/arch/mips/configs/cu1000-neo_defconfig new file mode 100644 index 000000000000..9b05a8fdabe1 --- /dev/null +++ b/arch/mips/configs/cu1000-neo_defconfig @@ -0,0 +1,117 @@ +CONFIG_LOCALVERSION_AUTO=y +CONFIG_KERNEL_GZIP=y +CONFIG_SYSVIPC=y +CONFIG_NO_HZ_IDLE=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_PREEMPT=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_CGROUPS=y +CONFIG_MEMCG=y +CONFIG_MEMCG_KMEM=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_NAMESPACES=y +CONFIG_USER_NS=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_SLAB=y +CONFIG_MACH_INGENIC=y +CONFIG_X1000_CU1000_NEO=y +CONFIG_HIGHMEM=y +CONFIG_HZ_100=y +# CONFIG_SECCOMP is not set +# CONFIG_SUSPEND is not set +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_COMPACTION is not set +CONFIG_CMA=y +CONFIG_CMA_AREAS=7 +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_CFG80211=y +CONFIG_UEVENT_HELPER=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +# CONFIG_FW_LOADER is not set +# CONFIG_ALLOW_DEV_COREDUMP is not set +CONFIG_NETDEVICES=y +CONFIG_STMMAC_ETH=y +CONFIG_SMSC_PHY=y +CONFIG_BRCMFMAC=y +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +CONFIG_VT_HW_CONSOLE_BINDING=y +CONFIG_LEGACY_PTY_COUNT=2 +CONFIG_SERIAL_EARLYCON=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=3 +CONFIG_SERIAL_8250_RUNTIME_UARTS=3 +CONFIG_SERIAL_8250_INGENIC=y +CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_HW_RANDOM is not set +CONFIG_I2C=y +CONFIG_I2C_JZ4780=y +CONFIG_GPIO_SYSFS=y +CONFIG_SENSORS_ADS7828=y +CONFIG_WATCHDOG=y +CONFIG_JZ4740_WDT=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set +# CONFIG_VGA_CONSOLE is not set +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_MMC=y +CONFIG_MMC_JZ4740=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_JZ4740=y +CONFIG_DMADEVICES=y +CONFIG_DMA_JZ4780=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_NVMEM=y +CONFIG_NVMEM_SYSFS=y +CONFIG_EXT4_FS=y +# CONFIG_DNOTIFY is not set +CONFIG_AUTOFS_FS=y +CONFIG_PROC_KCORE=y +# CONFIG_PROC_PAGE_MONITOR is not set +CONFIG_TMPFS=y +CONFIG_CONFIGFS_FS=y +CONFIG_NFS_FS=y +CONFIG_NLS=y +CONFIG_NLS_CODEPAGE_936=y +CONFIG_NLS_CODEPAGE_950=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_CRYPTO_ECHAINIV=y +CONFIG_CRYPTO_AES=y +CONFIG_CRYPTO_DEFLATE=y +CONFIG_CRYPTO_LZO=y +CONFIG_PRINTK_TIME=y +CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15 +CONFIG_CONSOLE_LOGLEVEL_QUIET=15 +CONFIG_MESSAGE_LOGLEVEL_DEFAULT=7 +CONFIG_DEBUG_INFO=y +CONFIG_STRIP_ASM_SYMS=y +CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PANIC_TIMEOUT=10 +# CONFIG_SCHED_DEBUG is not set +# CONFIG_DEBUG_PREEMPT is not set +CONFIG_STACKTRACE=y +# CONFIG_FTRACE is not set +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="earlycon clk_ignore_unused" diff --git a/arch/mips/configs/generic/board-ocelot.config b/arch/mips/configs/generic/board-ocelot.config index 1134fbb99fc2..7626f2a75b03 100644 --- a/arch/mips/configs/generic/board-ocelot.config +++ b/arch/mips/configs/generic/board-ocelot.config @@ -41,6 +41,7 @@ CONFIG_SPI_DESIGNWARE=y CONFIG_SPI_DW_MMIO=y CONFIG_SPI_SPIDEV=y +CONFIG_PINCTRL=y CONFIG_PINCTRL_OCELOT=y CONFIG_GPIO_SYSFS=y diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 61b0fc2026e6..16d1eb4c8fe6 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -19,6 +19,7 @@ generic-y += preempt.h generic-y += qrwlock.h generic-y += qspinlock.h generic-y += sections.h +generic-y += serial.h generic-y += trace_clock.h generic-y += unaligned.h generic-y += user.h diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h index d41a5057bc69..61727785a247 100644 --- a/arch/mips/include/asm/bootinfo.h +++ b/arch/mips/include/asm/bootinfo.h @@ -81,6 +81,7 @@ enum loongson2ef_machine_type { #define MACH_INGENIC_JZ4770 2 /* JZ4770 SOC */ #define MACH_INGENIC_JZ4780 3 /* JZ4780 SOC */ #define MACH_INGENIC_X1000 4 /* X1000 SOC */ +#define MACH_INGENIC_X1830 5 /* X1830 SOC */ extern char *system_type; const char *get_system_type(void); diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index 983a6a7f43a1..de44c92b1c1f 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -555,6 +555,10 @@ # define cpu_has_perf __opt(MIPS_CPU_PERF) #endif +#ifndef cpu_has_mac2008_only +# define cpu_has_mac2008_only __opt(MIPS_CPU_MAC_2008_ONLY) +#endif + #ifdef CONFIG_SMP /* * Some systems share FTLB RAMs between threads within a core (siblings in diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index ea830783d663..216a22916740 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -46,7 +46,7 @@ #define PRID_COMP_NETLOGIC 0x0c0000 #define PRID_COMP_CAVIUM 0x0d0000 #define PRID_COMP_LOONGSON 0x140000 -#define PRID_COMP_INGENIC_D0 0xd00000 /* JZ4740, JZ4750 */ +#define PRID_COMP_INGENIC_D0 0xd00000 /* JZ4740, JZ4750, X1830 */ #define PRID_COMP_INGENIC_D1 0xd10000 /* JZ4770, JZ4775, X1000 */ #define PRID_COMP_INGENIC_E1 0xe10000 /* JZ4780 */ @@ -185,7 +185,8 @@ * These are the PRID's for when 23:16 == PRID_COMP_INGENIC_* */ -#define PRID_IMP_XBURST 0x0200 +#define PRID_IMP_XBURST_REV1 0x0200 /* XBurst with MXU SIMD ISA */ +#define PRID_IMP_XBURST_REV2 0x0100 /* XBurst with MXU2 SIMD ISA */ /* * These are the PRID's for when 23:16 == PRID_COMP_NETLOGIC @@ -415,6 +416,7 @@ enum cpu_type_enum { #define MIPS_CPU_MT_PER_TC_PERF_COUNTERS \ BIT_ULL(56) /* CPU has perf counters implemented per TC (MIPSMT ASE) */ #define MIPS_CPU_MMID BIT_ULL(57) /* CPU supports MemoryMapIDs */ +#define MIPS_CPU_MAC_2008_ONLY BIT_ULL(58) /* CPU Only support MAC2008 Fused multiply-add instruction */ /* * CPU ASE encodings diff --git a/arch/mips/include/asm/gio_device.h b/arch/mips/include/asm/gio_device.h index c52948f9ca95..159087f5386e 100644 --- a/arch/mips/include/asm/gio_device.h +++ b/arch/mips/include/asm/gio_device.h @@ -32,8 +32,6 @@ struct gio_driver { }; #define to_gio_driver(drv) container_of(drv, struct gio_driver, driver) -extern const struct gio_device_id *gio_match_device(const struct gio_device_id *, - const struct gio_device *); extern struct gio_device *gio_dev_get(struct gio_device *); extern void gio_dev_put(struct gio_device *); diff --git a/arch/mips/include/asm/hazards.h b/arch/mips/include/asm/hazards.h index a4f48b0f5541..a0b92205f933 100644 --- a/arch/mips/include/asm/hazards.h +++ b/arch/mips/include/asm/hazards.h @@ -23,7 +23,7 @@ * TLB hazards */ #if (defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)) && \ - !defined(CONFIG_CPU_CAVIUM_OCTEON) && !defined(CONFIG_LOONGSON3_ENHANCEMENT) + !defined(CONFIG_CPU_CAVIUM_OCTEON) && !defined(CONFIG_CPU_LOONGSON64) /* * MIPSR2 defines ehb for hazard avoidance @@ -158,7 +158,7 @@ do { \ } while (0) #elif defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_CPU_CAVIUM_OCTEON) || \ - defined(CONFIG_CPU_LOONGSON2EF) || defined(CONFIG_LOONGSON3_ENHANCEMENT) || \ + defined(CONFIG_CPU_LOONGSON2EF) || defined(CONFIG_CPU_LOONGSON64) || \ defined(CONFIG_CPU_R10000) || defined(CONFIG_CPU_R5500) || defined(CONFIG_CPU_XLR) /* diff --git a/arch/mips/include/asm/irqflags.h b/arch/mips/include/asm/irqflags.h index c4728bbdf15b..47a8ffc0b413 100644 --- a/arch/mips/include/asm/irqflags.h +++ b/arch/mips/include/asm/irqflags.h @@ -18,7 +18,7 @@ #include <asm/compiler.h> #include <asm/hazards.h> -#if defined(CONFIG_CPU_MIPSR2) || defined (CONFIG_CPU_MIPSR6) +#if defined(CONFIG_CPU_HAS_DIEI) static inline void arch_local_irq_disable(void) { @@ -94,7 +94,7 @@ static inline void arch_local_irq_restore(unsigned long flags) void arch_local_irq_disable(void); unsigned long arch_local_irq_save(void); void arch_local_irq_restore(unsigned long flags); -#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ +#endif /* CONFIG_CPU_HAS_DIEI */ static inline void arch_local_irq_enable(void) { @@ -102,7 +102,7 @@ static inline void arch_local_irq_enable(void) " .set push \n" " .set reorder \n" " .set noat \n" -#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) +#if defined(CONFIG_CPU_HAS_DIEI) " ei \n" #else " mfc0 $1,$12 \n" diff --git a/arch/mips/include/asm/local.h b/arch/mips/include/asm/local.h index 02783e141c32..fef0fda8f82f 100644 --- a/arch/mips/include/asm/local.h +++ b/arch/mips/include/asm/local.h @@ -37,6 +37,7 @@ static __inline__ long local_add_return(long i, local_t * l) __asm__ __volatile__( " .set push \n" " .set arch=r4000 \n" + __SYNC(full, loongson3_war) " \n" "1:" __LL "%1, %2 # local_add_return \n" " addu %0, %1, %3 \n" __SC "%0, %2 \n" @@ -52,6 +53,7 @@ static __inline__ long local_add_return(long i, local_t * l) __asm__ __volatile__( " .set push \n" " .set "MIPS_ISA_ARCH_LEVEL" \n" + __SYNC(full, loongson3_war) " \n" "1:" __LL "%1, %2 # local_add_return \n" " addu %0, %1, %3 \n" __SC "%0, %2 \n" @@ -84,6 +86,7 @@ static __inline__ long local_sub_return(long i, local_t * l) __asm__ __volatile__( " .set push \n" " .set arch=r4000 \n" + __SYNC(full, loongson3_war) " \n" "1:" __LL "%1, %2 # local_sub_return \n" " subu %0, %1, %3 \n" __SC "%0, %2 \n" @@ -99,6 +102,7 @@ static __inline__ long local_sub_return(long i, local_t * l) __asm__ __volatile__( " .set push \n" " .set "MIPS_ISA_ARCH_LEVEL" \n" + __SYNC(full, loongson3_war) " \n" "1:" __LL "%1, %2 # local_sub_return \n" " subu %0, %1, %3 \n" __SC "%0, %2 \n" diff --git a/arch/mips/include/asm/mach-ip27/kernel-entry-init.h b/arch/mips/include/asm/mach-ip27/kernel-entry-init.h index f992c1db876b..3e54f605a70b 100644 --- a/arch/mips/include/asm/mach-ip27/kernel-entry-init.h +++ b/arch/mips/include/asm/mach-ip27/kernel-entry-init.h @@ -10,20 +10,10 @@ #define __ASM_MACH_IP27_KERNEL_ENTRY_H #include <asm/sn/addrs.h> -#include <asm/sn/sn0/hubni.h> +#include <asm/sn/agent.h> #include <asm/sn/klkernvars.h> /* - * Returns the local nasid into res. - */ - .macro GET_NASID_ASM res - dli \res, LOCAL_HUB_ADDR(NI_STATUS_REV_ID) - ld \res, (\res) - and \res, NSRI_NODEID_MASK - dsrl \res, NSRI_NODEID_SHFT - .endm - -/* * TLB bits */ #define PAGE_GLOBAL (1 << 6) diff --git a/arch/mips/include/asm/mach-ip27/mangle-port.h b/arch/mips/include/asm/mach-ip27/mangle-port.h index f6e4912ea062..27c56efa519f 100644 --- a/arch/mips/include/asm/mach-ip27/mangle-port.h +++ b/arch/mips/include/asm/mach-ip27/mangle-port.h @@ -8,7 +8,7 @@ #ifndef __ASM_MACH_IP27_MANGLE_PORT_H #define __ASM_MACH_IP27_MANGLE_PORT_H -#define __swizzle_addr_b(port) (port) +#define __swizzle_addr_b(port) ((port) ^ 3) #define __swizzle_addr_w(port) ((port) ^ 2) #define __swizzle_addr_l(port) (port) #define __swizzle_addr_q(port) (port) @@ -20,6 +20,6 @@ # define ioswabl(a, x) (x) # define __mem_ioswabl(a, x) cpu_to_le32(x) # define ioswabq(a, x) (x) -# define __mem_ioswabq(a, x) cpu_to_le32(x) +# define __mem_ioswabq(a, x) cpu_to_le64(x) #endif /* __ASM_MACH_IP27_MANGLE_PORT_H */ diff --git a/arch/mips/include/asm/mach-ip27/mmzone.h b/arch/mips/include/asm/mach-ip27/mmzone.h index f463826515df..08c36e50a860 100644 --- a/arch/mips/include/asm/mach-ip27/mmzone.h +++ b/arch/mips/include/asm/mach-ip27/mmzone.h @@ -4,7 +4,8 @@ #include <asm/sn/addrs.h> #include <asm/sn/arch.h> -#include <asm/sn/hub.h> +#include <asm/sn/agent.h> +#include <asm/sn/klkernvars.h> #define pa_to_nid(addr) NASID_GET(addr) @@ -12,7 +13,6 @@ struct hub_data { kern_vars_t kern_vars; DECLARE_BITMAP(h_bigwin_used, HUB_NUM_BIG_WINDOW); cpumask_t h_cpus; - unsigned long slice_map; }; struct node_data { diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h index be61ddcdacab..d66cc53feab8 100644 --- a/arch/mips/include/asm/mach-ip27/topology.h +++ b/arch/mips/include/asm/mach-ip27/topology.h @@ -2,12 +2,12 @@ #ifndef _ASM_MACH_TOPOLOGY_H #define _ASM_MACH_TOPOLOGY_H 1 -#include <asm/sn/hub.h> #include <asm/sn/types.h> #include <asm/mmzone.h> struct cpuinfo_ip27 { nasid_t p_nasid; /* my node ID in numa-as-id-space */ + unsigned short p_speed; /* cpu speed in MHz */ unsigned char p_slice; /* Physical position on node board */ }; diff --git a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h index 7dc8d75445a9..4fab38c743dd 100644 --- a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h @@ -46,5 +46,7 @@ #define cpu_has_wsbh 1 #define cpu_has_ic_fills_f_dc 1 #define cpu_hwrena_impl_bits 0xc0000000 +#define cpu_has_mac2008_only 1 +#define cpu_has_mips_r2_exec_hazard 0 #endif /* __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H */ diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 0d5a30988697..796fe47cfd17 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -1101,9 +1101,12 @@ /* * Bits 22:20 of the FPU Status Register will be read as 0, * and should be written as zero. + * MAC2008 was removed in Release 5 so we still treat it as + * reserved. */ #define FPU_CSR_RSVD (_ULCAST_(7) << 20) +#define FPU_CSR_MAC2008 (_ULCAST_(1) << 20) #define FPU_CSR_ABS2008 (_ULCAST_(1) << 19) #define FPU_CSR_NAN2008 (_ULCAST_(1) << 18) diff --git a/arch/mips/include/asm/pci/bridge.h b/arch/mips/include/asm/pci/bridge.h index 3bc630ff9ad4..9c476a0400e0 100644 --- a/arch/mips/include/asm/pci/bridge.h +++ b/arch/mips/include/asm/pci/bridge.h @@ -806,7 +806,8 @@ struct bridge_controller { unsigned long baddr; unsigned long intr_addr; struct irq_domain *domain; - unsigned int pci_int[8]; + unsigned int pci_int[8][2]; + unsigned int int_mapping[8][2]; u32 ioc3_sid[8]; nasid_t nasid; }; diff --git a/arch/mips/include/asm/serial.h b/arch/mips/include/asm/serial.h deleted file mode 100644 index 2777148dbfc5..000000000000 --- a/arch/mips/include/asm/serial.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2017 MIPS Tech, LLC - */ -#ifndef __ASM__SERIAL_H -#define __ASM__SERIAL_H - -#ifdef CONFIG_MIPS_GENERIC -/* - * Generic kernels cannot know a correct value for all platforms at - * compile time. Set it to 0 to prevent 8250_early using it - */ -#define BASE_BAUD 0 -#else -#include <asm-generic/serial.h> -#endif - -#endif /* __ASM__SERIAL_H */ diff --git a/arch/mips/include/asm/sn/arch.h b/arch/mips/include/asm/sn/arch.h index f7d3273d9599..9a9682543e89 100644 --- a/arch/mips/include/asm/sn/arch.h +++ b/arch/mips/include/asm/sn/arch.h @@ -25,7 +25,4 @@ #define INVALID_MODULE (moduleid_t)-1 #define INVALID_PARTID (partid_t)-1 -extern nasid_t get_nasid(void); -extern int get_cpu_slice(cpuid_t); - #endif /* _ASM_SN_ARCH_H */ diff --git a/arch/mips/include/asm/sn/hub.h b/arch/mips/include/asm/sn/hub.h deleted file mode 100644 index 45878fbefbae..000000000000 --- a/arch/mips/include/asm/sn/hub.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_SN_HUB_H -#define __ASM_SN_HUB_H - -#include <linux/types.h> -#include <linux/cpumask.h> -#include <asm/sn/types.h> -#include <asm/sn/io.h> -#include <asm/sn/klkernvars.h> -#include <asm/xtalk/xtalk.h> - -/* ip27-hubio.c */ -extern unsigned long hub_pio_map(nasid_t nasid, xwidgetnum_t widget, - unsigned long xtalk_addr, size_t size); -extern void hub_pio_init(nasid_t nasid); - -#endif /* __ASM_SN_HUB_H */ diff --git a/arch/mips/include/asm/sn/intr.h b/arch/mips/include/asm/sn/intr.h index fc1348193957..3d6954d370dc 100644 --- a/arch/mips/include/asm/sn/intr.h +++ b/arch/mips/include/asm/sn/intr.h @@ -8,15 +8,6 @@ #ifndef __ASM_SN_INTR_H #define __ASM_SN_INTR_H -/* Number of interrupt levels associated with each interrupt register. */ -#define N_INTPEND_BITS 64 - -#define INT_PEND0_BASELVL 0 -#define INT_PEND1_BASELVL 64 - -#define N_INTPENDJUNK_BITS 8 -#define INTPENDJUNK_CLRBIT 0x80 - /* * Macros to manipulate the interrupt register on the calling hub chip. */ @@ -84,14 +75,6 @@ do { \ #define CPU_RESCHED_B_IRQ 8 #define CPU_CALL_A_IRQ 9 #define CPU_CALL_B_IRQ 10 -#define MSC_MESG_INTR 11 -#define BASE_PCI_IRQ 12 - -/* - * INT_PEND0 again, bits determined by hardware / hardcoded: - */ -#define SDISK_INTR 63 /* SABLE name */ -#define IP_PEND0_6_63 63 /* What is this bit? */ /* * INT_PEND1 hard-coded bits: diff --git a/arch/mips/include/asm/sn/ioc3.h b/arch/mips/include/asm/sn/ioc3.h index 78ef760ddde4..2c09c17cadcd 100644 --- a/arch/mips/include/asm/sn/ioc3.h +++ b/arch/mips/include/asm/sn/ioc3.h @@ -21,50 +21,50 @@ struct ioc3_serialregs { /* SUPERIO uart register map */ struct ioc3_uartregs { + u8 iu_lcr; union { - u8 iu_rbr; /* read only, DLAB == 0 */ - u8 iu_thr; /* write only, DLAB == 0 */ - u8 iu_dll; /* DLAB == 1 */ + u8 iu_iir; /* read only */ + u8 iu_fcr; /* write only */ }; union { u8 iu_ier; /* DLAB == 0 */ u8 iu_dlm; /* DLAB == 1 */ }; union { - u8 iu_iir; /* read only */ - u8 iu_fcr; /* write only */ + u8 iu_rbr; /* read only, DLAB == 0 */ + u8 iu_thr; /* write only, DLAB == 0 */ + u8 iu_dll; /* DLAB == 1 */ }; - u8 iu_lcr; - u8 iu_mcr; - u8 iu_lsr; - u8 iu_msr; u8 iu_scr; + u8 iu_msr; + u8 iu_lsr; + u8 iu_mcr; }; struct ioc3_sioregs { u8 fill[0x141]; /* starts at 0x141 */ - u8 uartc; u8 kbdcg; + u8 uartc; - u8 fill0[0x150 - 0x142 - 1]; + u8 fill0[0x151 - 0x142 - 1]; - u8 pp_data; - u8 pp_dsr; u8 pp_dcr; + u8 pp_dsr; + u8 pp_data; - u8 fill1[0x158 - 0x152 - 1]; + u8 fill1[0x159 - 0x153 - 1]; - u8 pp_fifa; - u8 pp_cfgb; u8 pp_ecr; + u8 pp_cfgb; + u8 pp_fifa; - u8 fill2[0x168 - 0x15a - 1]; + u8 fill2[0x16a - 0x15b - 1]; - u8 rtcad; u8 rtcdat; + u8 rtcad; - u8 fill3[0x170 - 0x169 - 1]; + u8 fill3[0x170 - 0x16b - 1]; struct ioc3_uartregs uartb; /* 0x20170 */ struct ioc3_uartregs uarta; /* 0x20178 */ @@ -598,5 +598,9 @@ struct ioc3_etxd { #define IOC3_SUBSYS_IP30_SYSBOARD 0xc304 #define IOC3_SUBSYS_MENET 0xc305 #define IOC3_SUBSYS_MENET4 0xc306 +#define IOC3_SUBSYS_IO7 0xc307 +#define IOC3_SUBSYS_IO8 0xc308 +#define IOC3_SUBSYS_IO9 0xc309 +#define IOC3_SUBSYS_IP34_SYSBOARD 0xc30A #endif /* MIPS_SN_IOC3_H */ diff --git a/arch/mips/include/asm/sn/klconfig.h b/arch/mips/include/asm/sn/klconfig.h index 467c313d5767..117f85e4bef5 100644 --- a/arch/mips/include/asm/sn/klconfig.h +++ b/arch/mips/include/asm/sn/klconfig.h @@ -889,10 +889,6 @@ typedef union { extern lboard_t *find_lboard(lboard_t *start, unsigned char type); extern klinfo_t *find_component(lboard_t *brd, klinfo_t *kli, unsigned char type); extern klinfo_t *find_first_component(lboard_t *brd, unsigned char type); -extern klcpu_t *nasid_slice_to_cpuinfo(nasid_t, int); extern lboard_t *find_lboard_class(lboard_t *start, unsigned char brd_class); - -extern klcpu_t *sn_get_cpuinfo(cpuid_t cpu); - #endif /* _ASM_SN_KLCONFIG_H */ diff --git a/arch/mips/include/asm/sn/kldir.h b/arch/mips/include/asm/sn/kldir.h index bfb3aec94539..245f59bf3845 100644 --- a/arch/mips/include/asm/sn/kldir.h +++ b/arch/mips/include/asm/sn/kldir.h @@ -1,201 +1,16 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Derived from IRIX <sys/SN/kldir.h>, revision 1.21. - * - * Copyright (C) 1992 - 1997, 1999, 2000 Silicon Graphics, Inc. - * Copyright (C) 1999, 2000 by Ralf Baechle - */ +/* SPDX-License-Identifier: GPL-2.0 */ + #ifndef _ASM_SN_KLDIR_H #define _ASM_SN_KLDIR_H - -/* - * The kldir memory area resides at a fixed place in each node's memory and - * provides pointers to most other IP27 memory areas. This allows us to - * resize and/or relocate memory areas at a later time without breaking all - * firmware and kernels that use them. Indices in the array are - * permanently dedicated to areas listed below. Some memory areas (marked - * below) reside at a permanently fixed location, but are included in the - * directory for completeness. - */ - #define KLDIR_MAGIC 0x434d5f53505f5357 -/* - * The upper portion of the memory map applies during boot - * only and is overwritten by IRIX/SYMMON. - * - * MEMORY MAP PER NODE - * - * 0x2000000 (32M) +-----------------------------------------+ - * | IO6 BUFFERS FOR FLASH ENET IOC3 | - * 0x1F80000 (31.5M) +-----------------------------------------+ - * | IO6 TEXT/DATA/BSS/stack | - * 0x1C00000 (30M) +-----------------------------------------+ - * | IO6 PROM DEBUG TEXT/DATA/BSS/stack | - * 0x0800000 (28M) +-----------------------------------------+ - * | IP27 PROM TEXT/DATA/BSS/stack | - * 0x1B00000 (27M) +-----------------------------------------+ - * | IP27 CFG | - * 0x1A00000 (26M) +-----------------------------------------+ - * | Graphics PROM | - * 0x1800000 (24M) +-----------------------------------------+ - * | 3rd Party PROM drivers | - * 0x1600000 (22M) +-----------------------------------------+ - * | | - * | Free | - * | | - * +-----------------------------------------+ - * | UNIX DEBUG Version | - * 0x190000 (2M--) +-----------------------------------------+ - * | SYMMON | - * | (For UNIX Debug only) | - * 0x34000 (208K) +-----------------------------------------+ - * | SYMMON STACK [NUM_CPU_PER_NODE] | - * | (For UNIX Debug only) | - * 0x25000 (148K) +-----------------------------------------+ - * | KLCONFIG - II (temp) | - * | | - * | ---------------------------- | - * | | - * | UNIX NON-DEBUG Version | - * 0x19000 (100K) +-----------------------------------------+ - * - * - * The lower portion of the memory map contains information that is - * permanent and is used by the IP27PROM, IO6PROM and IRIX. - * - * 0x19000 (100K) +-----------------------------------------+ - * | | - * | PI Error Spools (32K) | - * | | - * 0x12000 (72K) +-----------------------------------------+ - * | Unused | - * 0x11c00 (71K) +-----------------------------------------+ - * | CPU 1 NMI Eframe area | - * 0x11a00 (70.5K) +-----------------------------------------+ - * | CPU 0 NMI Eframe area | - * 0x11800 (70K) +-----------------------------------------+ - * | CPU 1 NMI Register save area | - * 0x11600 (69.5K) +-----------------------------------------+ - * | CPU 0 NMI Register save area | - * 0x11400 (69K) +-----------------------------------------+ - * | GDA (1k) | - * 0x11000 (68K) +-----------------------------------------+ - * | Early cache Exception stack | - * | and/or | - * | kernel/io6prom nmi registers | - * 0x10800 (66k) +-----------------------------------------+ - * | cache error eframe | - * 0x10400 (65K) +-----------------------------------------+ - * | Exception Handlers (UALIAS copy) | - * 0x10000 (64K) +-----------------------------------------+ - * | | - * | | - * | KLCONFIG - I (permanent) (48K) | - * | | - * | | - * | | - * 0x4000 (16K) +-----------------------------------------+ - * | NMI Handler (Protected Page) | - * 0x3000 (12K) +-----------------------------------------+ - * | ARCS PVECTORS (master node only) | - * 0x2c00 (11K) +-----------------------------------------+ - * | ARCS TVECTORS (master node only) | - * 0x2800 (10K) +-----------------------------------------+ - * | LAUNCH [NUM_CPU] | - * 0x2400 (9K) +-----------------------------------------+ - * | Low memory directory (KLDIR) | - * 0x2000 (8K) +-----------------------------------------+ - * | ARCS SPB (1K) | - * 0x1000 (4K) +-----------------------------------------+ - * | Early cache Exception stack | - * | and/or | - * | kernel/io6prom nmi registers | - * 0x800 (2k) +-----------------------------------------+ - * | cache error eframe | - * 0x400 (1K) +-----------------------------------------+ - * | Exception Handlers | - * 0x0 (0K) +-----------------------------------------+ - */ - -#ifdef __ASSEMBLY__ #define KLDIR_OFF_MAGIC 0x00 #define KLDIR_OFF_OFFSET 0x08 #define KLDIR_OFF_POINTER 0x10 #define KLDIR_OFF_SIZE 0x18 #define KLDIR_OFF_COUNT 0x20 #define KLDIR_OFF_STRIDE 0x28 -#endif /* __ASSEMBLY__ */ - -/* - * This is defined here because IP27_SYMMON_STK_SIZE must be at least what - * we define here. Since it's set up in the prom. We can't redefine it later - * and expect more space to be allocated. The way to find out the true size - * of the symmon stacks is to divide SYMMON_STK_SIZE by SYMMON_STK_STRIDE - * for a particular node. - */ -#define SYMMON_STACK_SIZE 0x8000 - -#if defined(PROM) - -/* - * These defines are prom version dependent. No code other than the IP27 - * prom should attempt to use these values. - */ -#define IP27_LAUNCH_OFFSET 0x2400 -#define IP27_LAUNCH_SIZE 0x400 -#define IP27_LAUNCH_COUNT 2 -#define IP27_LAUNCH_STRIDE 0x200 - -#define IP27_KLCONFIG_OFFSET 0x4000 -#define IP27_KLCONFIG_SIZE 0xc000 -#define IP27_KLCONFIG_COUNT 1 -#define IP27_KLCONFIG_STRIDE 0 - -#define IP27_NMI_OFFSET 0x3000 -#define IP27_NMI_SIZE 0x40 -#define IP27_NMI_COUNT 2 -#define IP27_NMI_STRIDE 0x40 - -#define IP27_PI_ERROR_OFFSET 0x12000 -#define IP27_PI_ERROR_SIZE 0x4000 -#define IP27_PI_ERROR_COUNT 1 -#define IP27_PI_ERROR_STRIDE 0 - -#define IP27_SYMMON_STK_OFFSET 0x25000 -#define IP27_SYMMON_STK_SIZE 0xe000 -#define IP27_SYMMON_STK_COUNT 2 -/* IP27_SYMMON_STK_STRIDE must be >= SYMMON_STACK_SIZE */ -#define IP27_SYMMON_STK_STRIDE 0x7000 - -#define IP27_FREEMEM_OFFSET 0x19000 -#define IP27_FREEMEM_SIZE -1 -#define IP27_FREEMEM_COUNT 1 -#define IP27_FREEMEM_STRIDE 0 - -#endif /* PROM */ -/* - * There will be only one of these in a partition so the IO6 must set it up. - */ -#define IO6_GDA_OFFSET 0x11000 -#define IO6_GDA_SIZE 0x400 -#define IO6_GDA_COUNT 1 -#define IO6_GDA_STRIDE 0 - -/* - * save area of kernel nmi regs in the prom format - */ -#define IP27_NMI_KREGS_OFFSET 0x11400 -#define IP27_NMI_KREGS_CPU_SIZE 0x200 -/* - * save area of kernel nmi regs in eframe format - */ -#define IP27_NMI_EFRAME_OFFSET 0x11800 -#define IP27_NMI_EFRAME_SIZE 0x200 #define KLDIR_ENT_SIZE 0x40 #define KLDIR_MAX_ENTRIES (0x400 / 0x40) @@ -214,4 +29,8 @@ typedef struct kldir_ent_s { } kldir_ent_t; #endif /* !__ASSEMBLY__ */ +#ifdef CONFIG_SGI_IP27 +#include <asm/sn/sn0/kldir.h> +#endif + #endif /* _ASM_SN_KLDIR_H */ diff --git a/arch/mips/include/asm/sn/sn0/hub.h b/arch/mips/include/asm/sn/sn0/hub.h index d78dd76d5dcf..c84adde36d41 100644 --- a/arch/mips/include/asm/sn/sn0/hub.h +++ b/arch/mips/include/asm/sn/sn0/hub.h @@ -37,4 +37,26 @@ #define UATTR_MSPEC 2 #define UATTR_UNCAC 3 +#ifdef __ASSEMBLY__ +/* + * Returns the local nasid into res. + */ + .macro GET_NASID_ASM res + dli \res, LOCAL_HUB_ADDR(NI_STATUS_REV_ID) + ld \res, (\res) + and \res, NSRI_NODEID_MASK + dsrl \res, NSRI_NODEID_SHFT + .endm +#else + +/* + * get_nasid() returns the physical node id number of the caller. + */ +static inline nasid_t get_nasid(void) +{ + return (nasid_t)((LOCAL_HUB_L(NI_STATUS_REV_ID) & NSRI_NODEID_MASK) + >> NSRI_NODEID_SHFT); +} +#endif + #endif /* _ASM_SN_SN0_HUB_H */ diff --git a/arch/mips/include/asm/sn/sn0/hubni.h b/arch/mips/include/asm/sn/sn0/hubni.h index b73c4bee65f2..b8253142cb83 100644 --- a/arch/mips/include/asm/sn/sn0/hubni.h +++ b/arch/mips/include/asm/sn/sn0/hubni.h @@ -250,6 +250,14 @@ typedef union hubni_port_error_u { #define NI_LLP_CB_MAX 0xff #define NI_LLP_SN_MAX 0xff +static inline int get_region_shift(void) +{ + if (LOCAL_HUB_L(NI_STATUS_REV_ID) & NSRI_REGIONSIZE_MASK) + return NASID_TO_FINEREG_SHFT; + + return NASID_TO_COARSEREG_SHFT; +} + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_SGI_SN0_HUBNI_H */ diff --git a/arch/mips/include/asm/sn/sn0/ip27.h b/arch/mips/include/asm/sn/sn0/ip27.h deleted file mode 100644 index 3b5efeefcc3f..000000000000 --- a/arch/mips/include/asm/sn/sn0/ip27.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Derived from IRIX <sys/SN/SN0/IP27.h>. - * - * Copyright (C) 1992 - 1997, 1999 Silicon Graphics, Inc. - * Copyright (C) 1999, 2006 by Ralf Baechle - */ -#ifndef _ASM_SN_SN0_IP27_H -#define _ASM_SN_SN0_IP27_H - -#include <asm/mipsregs.h> - -/* - * Simple definitions for the masks which remove SW bits from pte. - */ - -#define TLBLO_HWBITSHIFT 0 /* Shift value, for masking */ - -#ifndef __ASSEMBLY__ - -#define CAUSE_BERRINTR IE_IRQ5 - -#define ECCF_CACHE_ERR 0 -#define ECCF_TAGLO 1 -#define ECCF_ECC 2 -#define ECCF_ERROREPC 3 -#define ECCF_PADDR 4 -#define ECCF_SIZE (5 * sizeof(long)) - -#endif /* !__ASSEMBLY__ */ - -#ifdef __ASSEMBLY__ - -/* - * KL_GET_CPUNUM (similar to EV_GET_SPNUM for EVEREST platform) reads - * the processor number of the calling processor. The proc parameters - * must be a register. - */ -#define KL_GET_CPUNUM(proc) \ - dli proc, LOCAL_HUB(0); \ - ld proc, PI_CPU_NUM(proc) - -#endif /* __ASSEMBLY__ */ - -/* - * R10000 status register interrupt bit mask usage for IP27. - */ -#define SRB_SWTIMO IE_SW0 /* 0x0100 */ -#define SRB_NET IE_SW1 /* 0x0200 */ -#define SRB_DEV0 IE_IRQ0 /* 0x0400 */ -#define SRB_DEV1 IE_IRQ1 /* 0x0800 */ -#define SRB_TIMOCLK IE_IRQ2 /* 0x1000 */ -#define SRB_PROFCLK IE_IRQ3 /* 0x2000 */ -#define SRB_ERR IE_IRQ4 /* 0x4000 */ -#define SRB_SCHEDCLK IE_IRQ5 /* 0x8000 */ - -#define SR_IBIT_HI SRB_DEV0 -#define SR_IBIT_PROF SRB_PROFCLK - -#define SRB_SWTIMO_IDX 0 -#define SRB_NET_IDX 1 -#define SRB_DEV0_IDX 2 -#define SRB_DEV1_IDX 3 -#define SRB_TIMOCLK_IDX 4 -#define SRB_PROFCLK_IDX 5 -#define SRB_ERR_IDX 6 -#define SRB_SCHEDCLK_IDX 7 - -#define NUM_CAUSE_INTRS 8 - -#define SCACHE_LINESIZE 128 -#define SCACHE_LINEMASK (SCACHE_LINESIZE - 1) - -#include <asm/sn/addrs.h> - -#define LED_CYCLE_MASK 0x0f -#define LED_CYCLE_SHFT 4 - -#define SEND_NMI(_nasid, _slice) \ - REMOTE_HUB_S((_nasid), (PI_NMI_A + ((_slice) * PI_NMI_OFFSET)), 1) - -#endif /* _ASM_SN_SN0_IP27_H */ diff --git a/arch/mips/include/asm/sn/sn0/kldir.h b/arch/mips/include/asm/sn/sn0/kldir.h new file mode 100644 index 000000000000..1b10af6cbd5e --- /dev/null +++ b/arch/mips/include/asm/sn/sn0/kldir.h @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Derived from IRIX <sys/SN/kldir.h>, revision 1.21. + * + * Copyright (C) 1992 - 1997, 1999, 2000 Silicon Graphics, Inc. + * Copyright (C) 1999, 2000 by Ralf Baechle + */ +#ifndef _ASM_SN_SN0_KLDIR_H +#define _ASM_SN_SN0_KLDIR_H + + +/* + * The kldir memory area resides at a fixed place in each node's memory and + * provides pointers to most other IP27 memory areas. This allows us to + * resize and/or relocate memory areas at a later time without breaking all + * firmware and kernels that use them. Indices in the array are + * permanently dedicated to areas listed below. Some memory areas (marked + * below) reside at a permanently fixed location, but are included in the + * directory for completeness. + */ + +/* + * The upper portion of the memory map applies during boot + * only and is overwritten by IRIX/SYMMON. + * + * MEMORY MAP PER NODE + * + * 0x2000000 (32M) +-----------------------------------------+ + * | IO6 BUFFERS FOR FLASH ENET IOC3 | + * 0x1F80000 (31.5M) +-----------------------------------------+ + * | IO6 TEXT/DATA/BSS/stack | + * 0x1C00000 (30M) +-----------------------------------------+ + * | IO6 PROM DEBUG TEXT/DATA/BSS/stack | + * 0x0800000 (28M) +-----------------------------------------+ + * | IP27 PROM TEXT/DATA/BSS/stack | + * 0x1B00000 (27M) +-----------------------------------------+ + * | IP27 CFG | + * 0x1A00000 (26M) +-----------------------------------------+ + * | Graphics PROM | + * 0x1800000 (24M) +-----------------------------------------+ + * | 3rd Party PROM drivers | + * 0x1600000 (22M) +-----------------------------------------+ + * | | + * | Free | + * | | + * +-----------------------------------------+ + * | UNIX DEBUG Version | + * 0x190000 (2M--) +-----------------------------------------+ + * | SYMMON | + * | (For UNIX Debug only) | + * 0x34000 (208K) +-----------------------------------------+ + * | SYMMON STACK [NUM_CPU_PER_NODE] | + * | (For UNIX Debug only) | + * 0x25000 (148K) +-----------------------------------------+ + * | KLCONFIG - II (temp) | + * | | + * | ---------------------------- | + * | | + * | UNIX NON-DEBUG Version | + * 0x19000 (100K) +-----------------------------------------+ + * + * + * The lower portion of the memory map contains information that is + * permanent and is used by the IP27PROM, IO6PROM and IRIX. + * + * 0x19000 (100K) +-----------------------------------------+ + * | | + * | PI Error Spools (32K) | + * | | + * 0x12000 (72K) +-----------------------------------------+ + * | Unused | + * 0x11c00 (71K) +-----------------------------------------+ + * | CPU 1 NMI Eframe area | + * 0x11a00 (70.5K) +-----------------------------------------+ + * | CPU 0 NMI Eframe area | + * 0x11800 (70K) +-----------------------------------------+ + * | CPU 1 NMI Register save area | + * 0x11600 (69.5K) +-----------------------------------------+ + * | CPU 0 NMI Register save area | + * 0x11400 (69K) +-----------------------------------------+ + * | GDA (1k) | + * 0x11000 (68K) +-----------------------------------------+ + * | Early cache Exception stack | + * | and/or | + * | kernel/io6prom nmi registers | + * 0x10800 (66k) +-----------------------------------------+ + * | cache error eframe | + * 0x10400 (65K) +-----------------------------------------+ + * | Exception Handlers (UALIAS copy) | + * 0x10000 (64K) +-----------------------------------------+ + * | | + * | | + * | KLCONFIG - I (permanent) (48K) | + * | | + * | | + * | | + * 0x4000 (16K) +-----------------------------------------+ + * | NMI Handler (Protected Page) | + * 0x3000 (12K) +-----------------------------------------+ + * | ARCS PVECTORS (master node only) | + * 0x2c00 (11K) +-----------------------------------------+ + * | ARCS TVECTORS (master node only) | + * 0x2800 (10K) +-----------------------------------------+ + * | LAUNCH [NUM_CPU] | + * 0x2400 (9K) +-----------------------------------------+ + * | Low memory directory (KLDIR) | + * 0x2000 (8K) +-----------------------------------------+ + * | ARCS SPB (1K) | + * 0x1000 (4K) +-----------------------------------------+ + * | Early cache Exception stack | + * | and/or | + * | kernel/io6prom nmi registers | + * 0x800 (2k) +-----------------------------------------+ + * | cache error eframe | + * 0x400 (1K) +-----------------------------------------+ + * | Exception Handlers | + * 0x0 (0K) +-----------------------------------------+ + */ + +/* + * This is defined here because IP27_SYMMON_STK_SIZE must be at least what + * we define here. Since it's set up in the prom. We can't redefine it later + * and expect more space to be allocated. The way to find out the true size + * of the symmon stacks is to divide SYMMON_STK_SIZE by SYMMON_STK_STRIDE + * for a particular node. + */ +#define SYMMON_STACK_SIZE 0x8000 + +#if defined(PROM) + +/* + * These defines are prom version dependent. No code other than the IP27 + * prom should attempt to use these values. + */ +#define IP27_LAUNCH_OFFSET 0x2400 +#define IP27_LAUNCH_SIZE 0x400 +#define IP27_LAUNCH_COUNT 2 +#define IP27_LAUNCH_STRIDE 0x200 + +#define IP27_KLCONFIG_OFFSET 0x4000 +#define IP27_KLCONFIG_SIZE 0xc000 +#define IP27_KLCONFIG_COUNT 1 +#define IP27_KLCONFIG_STRIDE 0 + +#define IP27_NMI_OFFSET 0x3000 +#define IP27_NMI_SIZE 0x40 +#define IP27_NMI_COUNT 2 +#define IP27_NMI_STRIDE 0x40 + +#define IP27_PI_ERROR_OFFSET 0x12000 +#define IP27_PI_ERROR_SIZE 0x4000 +#define IP27_PI_ERROR_COUNT 1 +#define IP27_PI_ERROR_STRIDE 0 + +#define IP27_SYMMON_STK_OFFSET 0x25000 +#define IP27_SYMMON_STK_SIZE 0xe000 +#define IP27_SYMMON_STK_COUNT 2 +/* IP27_SYMMON_STK_STRIDE must be >= SYMMON_STACK_SIZE */ +#define IP27_SYMMON_STK_STRIDE 0x7000 + +#define IP27_FREEMEM_OFFSET 0x19000 +#define IP27_FREEMEM_SIZE -1 +#define IP27_FREEMEM_COUNT 1 +#define IP27_FREEMEM_STRIDE 0 + +#endif /* PROM */ +/* + * There will be only one of these in a partition so the IO6 must set it up. + */ +#define IO6_GDA_OFFSET 0x11000 +#define IO6_GDA_SIZE 0x400 +#define IO6_GDA_COUNT 1 +#define IO6_GDA_STRIDE 0 + +/* + * save area of kernel nmi regs in the prom format + */ +#define IP27_NMI_KREGS_OFFSET 0x11400 +#define IP27_NMI_KREGS_CPU_SIZE 0x200 +/* + * save area of kernel nmi regs in eframe format + */ +#define IP27_NMI_EFRAME_OFFSET 0x11800 +#define IP27_NMI_EFRAME_SIZE 0x200 + +#endif /* _ASM_SN_SN0_KLDIR_H */ diff --git a/arch/mips/include/asm/sn/sn_private.h b/arch/mips/include/asm/sn/sn_private.h deleted file mode 100644 index 63a2c30d81c6..000000000000 --- a/arch/mips/include/asm/sn/sn_private.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_SN_SN_PRIVATE_H -#define __ASM_SN_SN_PRIVATE_H - -#include <asm/sn/types.h> - -extern nasid_t master_nasid; - -extern void cpu_node_probe(void); -extern void hub_rtc_init(nasid_t nasid); -extern void cpu_time_init(void); -extern void per_cpu_init(void); -extern void install_cpu_nmi_handler(int slice); -extern void install_ipi(void); -extern void setup_replication_mask(void); -extern void replicate_kernel_text(void); -extern unsigned long node_getfirstfree(nasid_t nasid); - -#endif /* __ASM_SN_SN_PRIVATE_H */ diff --git a/arch/mips/include/asm/sn/types.h b/arch/mips/include/asm/sn/types.h index 203c927e84d1..451ba1ee41ad 100644 --- a/arch/mips/include/asm/sn/types.h +++ b/arch/mips/include/asm/sn/types.h @@ -11,6 +11,8 @@ #include <linux/types.h> +#ifndef __ASSEMBLY__ + typedef unsigned long cpuid_t; typedef signed short nasid_t; /* node id in numa-as-id space */ typedef signed char partid_t; /* partition ID type */ @@ -18,4 +20,6 @@ typedef signed short moduleid_t; /* user-visible module number type */ typedef dev_t vertex_hdl_t; /* hardware graph vertex handle */ +#endif + #endif /* _ASM_SN_TYPES_H */ diff --git a/arch/mips/jz4740/Kconfig b/arch/mips/jz4740/Kconfig index 4dd0c446ecec..412d2faa3cdf 100644 --- a/arch/mips/jz4740/Kconfig +++ b/arch/mips/jz4740/Kconfig @@ -16,6 +16,10 @@ config JZ4780_CI20 bool "MIPS Creator CI20" select MACH_JZ4780 +config X1000_CU1000_NEO + bool "YSH & ATIL CU1000 Module with Neo backplane" + select MACH_X1000 + endchoice config MACH_JZ4740 @@ -33,3 +37,9 @@ config MACH_JZ4780 select MIPS_CPU_SCACHE select SYS_HAS_CPU_MIPS32_R2 select SYS_SUPPORTS_HIGHMEM + +config MACH_X1000 + bool + select MIPS_CPU_SCACHE + select SYS_HAS_CPU_MIPS32_R2 + select SYS_SUPPORTS_HIGHMEM diff --git a/arch/mips/jz4740/setup.c b/arch/mips/jz4740/setup.c index dc8ee21e0948..880c26857aff 100644 --- a/arch/mips/jz4740/setup.c +++ b/arch/mips/jz4740/setup.c @@ -44,6 +44,8 @@ static void __init jz4740_detect_mem(void) static unsigned long __init get_board_mach_type(const void *fdt) { + if (!fdt_node_check_compatible(fdt, 0, "ingenic,x1830")) + return MACH_INGENIC_X1830; if (!fdt_node_check_compatible(fdt, 0, "ingenic,x1000")) return MACH_INGENIC_X1000; if (!fdt_node_check_compatible(fdt, 0, "ingenic,jz4780")) @@ -86,6 +88,8 @@ void __init device_tree_init(void) const char *get_system_type(void) { switch (mips_machtype) { + case MACH_INGENIC_X1830: + return "X1830"; case MACH_INGENIC_X1000: return "X1000"; case MACH_INGENIC_JZ4780: diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index c54332697673..6ab6b03d35ba 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -102,7 +102,12 @@ static void cpu_set_fpu_2008(struct cpuinfo_mips *c) if (fir & MIPS_FPIR_HAS2008) { fcsr = read_32bit_cp1_register(CP1_STATUS); - fcsr0 = fcsr & ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008); + /* + * MAC2008 toolchain never landed in real world, so we're only + * testing wether it can be disabled and don't try to enabled + * it. + */ + fcsr0 = fcsr & ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008 | FPU_CSR_MAC2008); write_32bit_cp1_register(CP1_STATUS, fcsr0); fcsr0 = read_32bit_cp1_register(CP1_STATUS); @@ -112,6 +117,15 @@ static void cpu_set_fpu_2008(struct cpuinfo_mips *c) write_32bit_cp1_register(CP1_STATUS, fcsr); + if (c->isa_level & (MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2)) { + /* + * The bit for MAC2008 might be reused by R6 in future, + * so we only test for R2-R5. + */ + if (fcsr0 & FPU_CSR_MAC2008) + c->options |= MIPS_CPU_MAC_2008_ONLY; + } + if (!(fcsr0 & FPU_CSR_NAN2008)) c->options |= MIPS_CPU_NAN_LEGACY; if (fcsr1 & FPU_CSR_NAN2008) @@ -1960,10 +1974,8 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu) BUG_ON(!__builtin_constant_p(cpu_has_counter) || cpu_has_counter); switch (c->processor_id & PRID_IMP_MASK) { - case PRID_IMP_XBURST: - c->cputype = CPU_XBURST; - c->writecombine = _CACHE_UNCACHED_ACCELERATED; - __cpu_name[cpu] = "Ingenic JZRISC"; + case PRID_IMP_XBURST_REV1: + /* * The XBurst core by default attempts to avoid branch target * buffer lookups by detecting & special casing loops. This @@ -1971,34 +1983,43 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu) * Set cp0 config7 bit 4 to disable this feature. */ set_c0_config7(MIPS_CONF7_BTB_LOOP_EN); - break; - default: - panic("Unknown Ingenic Processor ID!"); - break; - } - switch (c->processor_id & PRID_COMP_MASK) { - /* - * The config0 register in the XBurst CPUs with a processor ID of - * PRID_COMP_INGENIC_D1 has an abandoned huge page tlb mode, this - * mode is not compatible with the MIPS standard, it will cause - * tlbmiss and into an infinite loop (line 21 in the tlb-funcs.S) - * when starting the init process. After chip reset, the default - * is HPTLB mode, Write 0xa9000000 to cp0 register 5 sel 4 to - * switch back to VTLB mode to prevent getting stuck. - */ - case PRID_COMP_INGENIC_D1: - write_c0_page_ctrl(XBURST_PAGECTRL_HPTLB_DIS); - break; - /* - * The config0 register in the XBurst CPUs with a processor ID of - * PRID_COMP_INGENIC_D0 report themselves as MIPS32r2 compatible, - * but they don't actually support this ISA. - */ - case PRID_COMP_INGENIC_D0: - c->isa_level &= ~MIPS_CPU_ISA_M32R2; + switch (c->processor_id & PRID_COMP_MASK) { + + /* + * The config0 register in the XBurst CPUs with a processor ID of + * PRID_COMP_INGENIC_D0 report themselves as MIPS32r2 compatible, + * but they don't actually support this ISA. + */ + case PRID_COMP_INGENIC_D0: + c->isa_level &= ~MIPS_CPU_ISA_M32R2; + break; + + /* + * The config0 register in the XBurst CPUs with a processor ID of + * PRID_COMP_INGENIC_D1 has an abandoned huge page tlb mode, this + * mode is not compatible with the MIPS standard, it will cause + * tlbmiss and into an infinite loop (line 21 in the tlb-funcs.S) + * when starting the init process. After chip reset, the default + * is HPTLB mode, Write 0xa9000000 to cp0 register 5 sel 4 to + * switch back to VTLB mode to prevent getting stuck. + */ + case PRID_COMP_INGENIC_D1: + write_c0_page_ctrl(XBURST_PAGECTRL_HPTLB_DIS); + break; + + default: + break; + } + /* fall-through */ + case PRID_IMP_XBURST_REV2: + c->cputype = CPU_XBURST; + c->writecombine = _CACHE_UNCACHED_ACCELERATED; + __cpu_name[cpu] = "Ingenic XBurst"; break; + default: + panic("Unknown Ingenic Processor ID!"); break; } } diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index a28057946ed1..1ac2752fb791 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -515,8 +515,7 @@ static void __init request_crashkernel(struct resource *res) ret = request_resource(res, &crashk_res); if (!ret) pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n", - (unsigned long)((crashk_res.end - - crashk_res.start + 1) >> 20), + (unsigned long)(resource_size(&crashk_res) >> 20), (unsigned long)(crashk_res.start >> 20)); } #else /* !defined(CONFIG_KEXEC) */ @@ -698,8 +697,7 @@ static void __init arch_mem_init(char **cmdline_p) mips_parse_crashkernel(); #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) - memblock_reserve(crashk_res.start, - crashk_res.end - crashk_res.start + 1); + memblock_reserve(crashk_res.start, resource_size(&crashk_res)); #endif device_tree_init(); sparse_init(); diff --git a/arch/mips/kernel/sync-r4k.c b/arch/mips/kernel/sync-r4k.c index f2973ce87f53..abdd7aaa3311 100644 --- a/arch/mips/kernel/sync-r4k.c +++ b/arch/mips/kernel/sync-r4k.c @@ -90,6 +90,9 @@ void synchronise_count_master(int cpu) void synchronise_count_slave(int cpu) { int i; + unsigned long flags; + + local_irq_save(flags); /* * Not every cpu is online at the time this gets called, @@ -113,5 +116,7 @@ void synchronise_count_slave(int cpu) } /* Arrange for an interrupt in a short while */ write_c0_compare(read_c0_count() + COUNTON); + + local_irq_restore(flags); } #undef NR_LOOPS diff --git a/arch/mips/kernel/syscalls/Makefile b/arch/mips/kernel/syscalls/Makefile index a3d4bec695c6..6efb2f6889a7 100644 --- a/arch/mips/kernel/syscalls/Makefile +++ b/arch/mips/kernel/syscalls/Makefile @@ -18,7 +18,7 @@ quiet_cmd_syshdr = SYSHDR $@ '$(syshdr_pfx_$(basetarget))' \ '$(syshdr_offset_$(basetarget))' -quiet_cmd_sysnr = SYSNR $@ +quiet_cmd_sysnr = SYSNR $@ cmd_sysnr = $(CONFIG_SHELL) '$(sysnr)' '$<' '$@' \ '$(sysnr_abis_$(basetarget))' \ '$(sysnr_pfx_$(basetarget))' \ diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index 92bd2b0f0548..ca6fc4762d97 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -131,7 +131,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR #define _LoadW(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -152,7 +152,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ /* For CPUs without lwl instruction */ #define _LoadW(addr, value, res, type) \ do { \ @@ -187,7 +187,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ #define _LoadHWU(addr, value, res, type) \ do { \ @@ -213,7 +213,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR #define _LoadWU(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -256,7 +256,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ /* For CPUs without lwl and ldl instructions */ #define _LoadWU(addr, value, res, type) \ do { \ @@ -340,7 +340,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ #define _StoreHW(addr, value, res, type) \ @@ -366,7 +366,7 @@ do { \ : "r" (value), "r" (addr), "i" (-EFAULT));\ } while(0) -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR #define _StoreW(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -407,7 +407,7 @@ do { \ : "r" (value), "r" (addr), "i" (-EFAULT)); \ } while(0) -#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ #define _StoreW(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -483,7 +483,7 @@ do { \ : "memory"); \ } while(0) -#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ #else /* __BIG_ENDIAN */ @@ -509,7 +509,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR #define _LoadW(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -530,7 +530,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ /* For CPUs without lwl instruction */ #define _LoadW(addr, value, res, type) \ do { \ @@ -565,7 +565,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ #define _LoadHWU(addr, value, res, type) \ @@ -592,7 +592,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR #define _LoadWU(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -635,7 +635,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ /* For CPUs without lwl and ldl instructions */ #define _LoadWU(addr, value, res, type) \ do { \ @@ -718,7 +718,7 @@ do { \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ #define _StoreHW(addr, value, res, type) \ do { \ @@ -743,7 +743,7 @@ do { \ : "r" (value), "r" (addr), "i" (-EFAULT));\ } while(0) -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR #define _StoreW(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -784,7 +784,7 @@ do { \ : "r" (value), "r" (addr), "i" (-EFAULT)); \ } while(0) -#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ /* For CPUs without swl and sdl instructions */ #define _StoreW(addr, value, res, type) \ do { \ @@ -861,7 +861,7 @@ do { \ : "memory"); \ } while(0) -#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ #endif #define LoadHWU(addr, value, res) _LoadHWU(addr, value, res, kernel) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 1109924560d8..2606f3f02b54 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -156,7 +156,7 @@ void kvm_mips_free_vcpus(struct kvm *kvm) struct kvm_vcpu *vcpu; kvm_for_each_vcpu(i, vcpu, kvm) { - kvm_arch_vcpu_free(vcpu); + kvm_vcpu_destroy(vcpu); } mutex_lock(&kvm->lock); @@ -280,25 +280,27 @@ static inline void dump_handler(const char *symbol, void *start, void *end) pr_debug("\tEND(%s)\n", symbol); } -struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) +int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) +{ + return 0; +} + +int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) { int err, size; void *gebase, *p, *handler, *refill_start, *refill_end; int i; - struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); - - if (!vcpu) { - err = -ENOMEM; - goto out; - } - - err = kvm_vcpu_init(vcpu, kvm, id); + kvm_debug("kvm @ %p: create cpu %d at %p\n", + vcpu->kvm, vcpu->vcpu_id, vcpu); + err = kvm_mips_callbacks->vcpu_init(vcpu); if (err) - goto out_free_cpu; + return err; - kvm_debug("kvm @ %p: create cpu %d at %p\n", kvm, id, vcpu); + hrtimer_init(&vcpu->arch.comparecount_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + vcpu->arch.comparecount_timer.function = kvm_mips_comparecount_wakeup; /* * Allocate space for host mode exception handlers that handle @@ -313,7 +315,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) if (!gebase) { err = -ENOMEM; - goto out_uninit_cpu; + goto out_uninit_vcpu; } kvm_debug("Allocated %d bytes for KVM Exception Handlers @ %p\n", ALIGN(size, PAGE_SIZE), gebase); @@ -392,38 +394,33 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) vcpu->arch.last_sched_cpu = -1; vcpu->arch.last_exec_cpu = -1; - return vcpu; + /* Initial guest state */ + err = kvm_mips_callbacks->vcpu_setup(vcpu); + if (err) + goto out_free_commpage; + + return 0; +out_free_commpage: + kfree(vcpu->arch.kseg0_commpage); out_free_gebase: kfree(gebase); - -out_uninit_cpu: - kvm_vcpu_uninit(vcpu); - -out_free_cpu: - kfree(vcpu); - -out: - return ERR_PTR(err); +out_uninit_vcpu: + kvm_mips_callbacks->vcpu_uninit(vcpu); + return err; } -void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { hrtimer_cancel(&vcpu->arch.comparecount_timer); - kvm_vcpu_uninit(vcpu); - kvm_mips_dump_stats(vcpu); kvm_mmu_free_memory_caches(vcpu); kfree(vcpu->arch.guest_ebase); kfree(vcpu->arch.kseg0_commpage); - kfree(vcpu); -} -void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) -{ - kvm_arch_vcpu_free(vcpu); + kvm_mips_callbacks->vcpu_uninit(vcpu); } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, @@ -1233,37 +1230,12 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer) return kvm_mips_count_timeout(vcpu); } -int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) -{ - int err; - - err = kvm_mips_callbacks->vcpu_init(vcpu); - if (err) - return err; - - hrtimer_init(&vcpu->arch.comparecount_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - vcpu->arch.comparecount_timer.function = kvm_mips_comparecount_wakeup; - return 0; -} - -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) -{ - kvm_mips_callbacks->vcpu_uninit(vcpu); -} - int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr) { return 0; } -/* Initial guest state */ -int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) -{ - return kvm_mips_callbacks->vcpu_setup(vcpu); -} - static void kvm_mips_set_c0_status(void) { u32 status = read_c0_status(); diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index cdd19d8561e8..f7994d936505 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S @@ -301,14 +301,14 @@ and t0, src, ADDRMASK PREFS( 0, 2*32(src) ) PREFD( 1, 2*32(dst) ) -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR bnez t1, .Ldst_unaligned\@ nop bnez t0, .Lsrc_unaligned_dst_aligned\@ -#else +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ or t0, t0, t1 bnez t0, .Lcopy_unaligned_bytes\@ -#endif +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ /* * use delay slot for fall-through * src and dst are aligned; need to compute rem @@ -389,7 +389,7 @@ bne rem, len, 1b .set noreorder -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR /* * src and dst are aligned, need to copy rem bytes (rem < NBYTES) * A loop would do only a byte at a time with possible branch @@ -491,7 +491,7 @@ bne len, rem, 1b .set noreorder -#endif /* CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* !CONFIG_CPU_NO_LOAD_STORE_LR */ .Lcopy_bytes_checklen\@: beqz len, .Ldone\@ nop @@ -520,7 +520,7 @@ jr ra nop -#ifndef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifdef CONFIG_CPU_NO_LOAD_STORE_LR .Lcopy_unaligned_bytes\@: 1: COPY_BYTE(0) @@ -534,7 +534,7 @@ ADD src, src, 8 b 1b ADD dst, dst, 8 -#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ .if __memcpy == 1 END(memcpy) .set __memcpy, 0 diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 418611ef13cf..d5449e8a3dfc 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -115,7 +115,7 @@ #endif .set reorder -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR R10KCBARRIER(0(ra)) #ifdef __MIPSEB__ EX(LONG_S_L, a1, (a0), .Lfirst_fixup\@) /* make word/dword aligned */ @@ -125,7 +125,7 @@ PTR_SUBU a0, t0 /* long align ptr */ PTR_ADDU a2, t0 /* correct size */ -#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ #define STORE_BYTE(N) \ EX(sb, a1, N(a0), .Lbyte_fixup\@); \ .set noreorder; \ @@ -150,7 +150,7 @@ ori a0, STORMASK xori a0, STORMASK PTR_ADDIU a0, STORSIZE -#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ 1: ori t1, a2, 0x3f /* # of full blocks */ xori t1, 0x3f andi t0, a2, 0x40-STORSIZE @@ -185,7 +185,7 @@ .set noreorder beqz a2, 1f -#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR PTR_ADDU a0, a2 /* What's left */ .set reorder R10KCBARRIER(0(ra)) @@ -194,7 +194,7 @@ #else EX(LONG_S_L, a1, -1(a0), .Llast_fixup\@) #endif -#else +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ PTR_SUBU t0, $0, a2 .set reorder move a2, zero /* No remaining longs */ @@ -211,7 +211,7 @@ EX(sb, a1, 6(a0), .Lbyte_fixup\@) #endif 0: -#endif +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ 1: move a2, zero jr ra @@ -234,7 +234,7 @@ .hidden __memset .endif -#ifndef CONFIG_CPU_HAS_LOAD_STORE_LR +#ifdef CONFIG_CPU_NO_LOAD_STORE_LR .Lbyte_fixup\@: /* * unset_bytes = (#bytes - (#unaligned bytes)) - (-#unaligned bytes remaining + 1) + 1 @@ -243,7 +243,7 @@ PTR_SUBU a2, t0 PTR_ADDIU a2, 1 jr ra -#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ .Lfirst_fixup\@: /* unset_bytes already in a2 */ diff --git a/arch/mips/lib/mips-atomic.c b/arch/mips/lib/mips-atomic.c index 5530070e0d05..de03838b343b 100644 --- a/arch/mips/lib/mips-atomic.c +++ b/arch/mips/lib/mips-atomic.c @@ -15,7 +15,7 @@ #include <linux/export.h> #include <linux/stringify.h> -#if !defined(CONFIG_CPU_MIPSR2) && !defined(CONFIG_CPU_MIPSR6) +#if !defined(CONFIG_CPU_HAS_DIEI) /* * For cli() we have to insert nops to make sure that the new value @@ -110,4 +110,4 @@ notrace void arch_local_irq_restore(unsigned long flags) } EXPORT_SYMBOL(arch_local_irq_restore); -#endif /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR6 */ +#endif /* !CONFIG_CPU_HAS_DIEI */ diff --git a/arch/mips/loongson2ef/common/pm.c b/arch/mips/loongson2ef/common/pm.c index 11f4cfd581fb..bcb7ae9777cf 100644 --- a/arch/mips/loongson2ef/common/pm.c +++ b/arch/mips/loongson2ef/common/pm.c @@ -91,7 +91,7 @@ static inline void stop_perf_counters(void) static void loongson_suspend_enter(void) { - static unsigned int cached_cpu_freq; + unsigned int cached_cpu_freq; /* setup wakeup events via enabling the IRQs */ setup_wakeup_events(); diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c index ef94a2278561..e5b40c5e3296 100644 --- a/arch/mips/loongson64/numa.c +++ b/arch/mips/loongson64/numa.c @@ -75,7 +75,7 @@ static int __init compute_node_distance(int row, int col) loongson_sysconf.cores_per_package; if (col == row) - return 0; + return LOCAL_DISTANCE; else if (package_row == package_col) return 40; else diff --git a/arch/mips/loongson64/platform.c b/arch/mips/loongson64/platform.c index 13f3404f0030..9674ae1361a8 100644 --- a/arch/mips/loongson64/platform.c +++ b/arch/mips/loongson64/platform.c @@ -27,6 +27,9 @@ static int __init loongson3_platform_init(void) continue; pdev = kzalloc(sizeof(struct platform_device), GFP_KERNEL); + if (!pdev) + return -ENOMEM; + pdev->name = loongson_sysconf.sensors[i].name; pdev->id = loongson_sysconf.sensors[i].id; pdev->dev.platform_data = &loongson_sysconf.sensors[i]; diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 710e1f804a54..9701c89e7e14 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -1514,16 +1514,28 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, break; case madd_s_op: - handler = fpemu_sp_madd; + if (cpu_has_mac2008_only) + handler = ieee754sp_madd; + else + handler = fpemu_sp_madd; goto scoptop; case msub_s_op: - handler = fpemu_sp_msub; + if (cpu_has_mac2008_only) + handler = ieee754sp_msub; + else + handler = fpemu_sp_msub; goto scoptop; case nmadd_s_op: - handler = fpemu_sp_nmadd; + if (cpu_has_mac2008_only) + handler = ieee754sp_nmadd; + else + handler = fpemu_sp_nmadd; goto scoptop; case nmsub_s_op: - handler = fpemu_sp_nmsub; + if (cpu_has_mac2008_only) + handler = ieee754sp_nmsub; + else + handler = fpemu_sp_nmsub; goto scoptop; scoptop: @@ -1610,15 +1622,27 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, break; case madd_d_op: - handler = fpemu_dp_madd; + if (cpu_has_mac2008_only) + handler = ieee754dp_madd; + else + handler = fpemu_dp_madd; goto dcoptop; case msub_d_op: - handler = fpemu_dp_msub; + if (cpu_has_mac2008_only) + handler = ieee754dp_msub; + else + handler = fpemu_dp_msub; goto dcoptop; case nmadd_d_op: - handler = fpemu_dp_nmadd; + if (cpu_has_mac2008_only) + handler = ieee754dp_nmadd; + else + handler = fpemu_dp_nmadd; goto dcoptop; case nmsub_d_op: + if (cpu_has_mac2008_only) + handler = ieee754dp_nmsub; + else handler = fpemu_dp_nmsub; goto dcoptop; diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c index 3da0ce44cdef..e24ef374d828 100644 --- a/arch/mips/math-emu/dp_maddf.c +++ b/arch/mips/math-emu/dp_maddf.c @@ -68,6 +68,12 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, ieee754_clearcx(); + rs = xs ^ ys; + if (flags & MADDF_NEGATE_PRODUCT) + rs ^= 1; + if (flags & MADDF_NEGATE_ADDITION) + zs ^= 1; + /* * Handle the cases when at least one of x, y or z is a NaN. * Order of precedence is sNaN, qNaN and z, x, y. @@ -104,9 +110,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): - if ((zc == IEEE754_CLASS_INF) && - ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) || - ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) { + if ((zc == IEEE754_CLASS_INF) && (zs != rs)) { /* * Cases of addition of infinities with opposite signs * or subtraction of infinities with same signs. @@ -116,15 +120,10 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, } /* * z is here either not an infinity, or an infinity having the - * same sign as product (x*y) (in case of MADDF.D instruction) - * or product -(x*y) (in MSUBF.D case). The result must be an - * infinity, and its sign is determined only by the value of - * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y. + * same sign as product (x*y). The result must be an infinity, + * and its sign is determined only by the sign of product (x*y). */ - if (flags & MADDF_NEGATE_PRODUCT) - return ieee754dp_inf(1 ^ (xs ^ ys)); - else - return ieee754dp_inf(xs ^ ys); + return ieee754dp_inf(rs); case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): @@ -135,10 +134,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, return ieee754dp_inf(zs); if (zc == IEEE754_CLASS_ZERO) { /* Handle cases +0 + (-0) and similar ones. */ - if ((!(flags & MADDF_NEGATE_PRODUCT) - && (zs == (xs ^ ys))) || - ((flags & MADDF_NEGATE_PRODUCT) - && (zs != (xs ^ ys)))) + if (zs == rs) /* * Cases of addition of zeros of equal signs * or subtraction of zeroes of opposite signs. @@ -187,9 +183,6 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, assert(ym & DP_HIDDEN_BIT); re = xe + ye; - rs = xs ^ ys; - if (flags & MADDF_NEGATE_PRODUCT) - rs ^= 1; /* shunt to top of word */ xm <<= 64 - (DP_FBITS + 1); @@ -340,3 +333,27 @@ union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, { return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); } + +union ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x, + union ieee754dp y) +{ + return _dp_maddf(z, x, y, 0); +} + +union ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x, + union ieee754dp y) +{ + return _dp_maddf(z, x, y, MADDF_NEGATE_ADDITION); +} + +union ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x, + union ieee754dp y) +{ + return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION); +} + +union ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x, + union ieee754dp y) +{ + return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); +} diff --git a/arch/mips/math-emu/ieee754.h b/arch/mips/math-emu/ieee754.h index b9167bd4eb60..090caa740b1e 100644 --- a/arch/mips/math-emu/ieee754.h +++ b/arch/mips/math-emu/ieee754.h @@ -68,6 +68,14 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, union ieee754sp y); union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, union ieee754sp y); +union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x, + union ieee754sp y); +union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x, + union ieee754sp y); +union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x, + union ieee754sp y); +union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x, + union ieee754sp y); int ieee754sp_2008class(union ieee754sp x); union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y); union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y); @@ -103,6 +111,14 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, union ieee754dp y); union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, union ieee754dp y); +union ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x, + union ieee754dp y); +union ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x, + union ieee754dp y); +union ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x, + union ieee754dp y); +union ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x, + union ieee754dp y); int ieee754dp_2008class(union ieee754dp x); union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y); union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y); diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h index 52b20119e315..2c3b13546ac8 100644 --- a/arch/mips/math-emu/ieee754int.h +++ b/arch/mips/math-emu/ieee754int.h @@ -16,6 +16,7 @@ enum maddf_flags { MADDF_NEGATE_PRODUCT = 1 << 0, + MADDF_NEGATE_ADDITION = 1 << 1, }; static inline void ieee754_clearcx(void) diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c index d638354add6d..1b85b1a527ac 100644 --- a/arch/mips/math-emu/sp_maddf.c +++ b/arch/mips/math-emu/sp_maddf.c @@ -36,6 +36,12 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, ieee754_clearcx(); + rs = xs ^ ys; + if (flags & MADDF_NEGATE_PRODUCT) + rs ^= 1; + if (flags & MADDF_NEGATE_ADDITION) + zs ^= 1; + /* * Handle the cases when at least one of x, y or z is a NaN. * Order of precedence is sNaN, qNaN and z, x, y. @@ -73,9 +79,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): - if ((zc == IEEE754_CLASS_INF) && - ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) || - ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) { + if ((zc == IEEE754_CLASS_INF) && (zs != rs)) { /* * Cases of addition of infinities with opposite signs * or subtraction of infinities with same signs. @@ -85,15 +89,10 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, } /* * z is here either not an infinity, or an infinity having the - * same sign as product (x*y) (in case of MADDF.D instruction) - * or product -(x*y) (in MSUBF.D case). The result must be an - * infinity, and its sign is determined only by the value of - * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y. + * same sign as product (x*y). The result must be an infinity, + * and its sign is determined only by the sign of product (x*y). */ - if (flags & MADDF_NEGATE_PRODUCT) - return ieee754sp_inf(1 ^ (xs ^ ys)); - else - return ieee754sp_inf(xs ^ ys); + return ieee754sp_inf(rs); case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): @@ -104,10 +103,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, return ieee754sp_inf(zs); if (zc == IEEE754_CLASS_ZERO) { /* Handle cases +0 + (-0) and similar ones. */ - if ((!(flags & MADDF_NEGATE_PRODUCT) - && (zs == (xs ^ ys))) || - ((flags & MADDF_NEGATE_PRODUCT) - && (zs != (xs ^ ys)))) + if (zs == rs) /* * Cases of addition of zeros of equal signs * or subtraction of zeroes of opposite signs. @@ -158,9 +154,6 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, assert(ym & SP_HIDDEN_BIT); re = xe + ye; - rs = xs ^ ys; - if (flags & MADDF_NEGATE_PRODUCT) - rs ^= 1; /* Multiple 24 bit xm and ym to give 48 bit results */ rm64 = (uint64_t)xm * ym; @@ -260,3 +253,27 @@ union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, { return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); } + +union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, 0); +} + +union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, MADDF_NEGATE_ADDITION); +} + +union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION); +} + +union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); +} diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 50f9ed8c6c1b..79684000de0e 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -508,6 +508,51 @@ void __ref free_initmem(void) free_initmem_default(POISON_FREE_INITMEM); } +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); + +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) +{ + return node_distance(cpu_to_node(from), cpu_to_node(to)); +} + +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, + size_t align) +{ + return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS), + MEMBLOCK_ALLOC_ACCESSIBLE, + cpu_to_node(cpu)); +} + +static void __init pcpu_fc_free(void *ptr, size_t size) +{ + memblock_free_early(__pa(ptr), size); +} + +void __init setup_per_cpu_areas(void) +{ + unsigned long delta; + unsigned int cpu; + int rc; + + /* + * Always reserve area for module percpu variables. That's + * what the legacy allocator did. + */ + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, + pcpu_cpu_distance, + pcpu_fc_alloc, pcpu_fc_free); + if (rc < 0) + panic("Failed to initialize percpu areas."); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; +} +#endif + #ifndef CONFIG_MIPS_PGD_C0_CONTEXT unsigned long pgd_current[NR_CPUS]; #endif diff --git a/arch/mips/net/Makefile b/arch/mips/net/Makefile index 2d03af7d6b19..d55912349039 100644 --- a/arch/mips/net/Makefile +++ b/arch/mips/net/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only # MIPS networking code +obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c new file mode 100644 index 000000000000..0af88622c619 --- /dev/null +++ b/arch/mips/net/bpf_jit.c @@ -0,0 +1,1270 @@ +/* + * Just-In-Time compiler for BPF filters on MIPS + * + * Copyright (c) 2014 Imagination Technologies Ltd. + * Author: Markos Chandras <markos.chandras@imgtec.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + */ + +#include <linux/bitops.h> +#include <linux/compiler.h> +#include <linux/errno.h> +#include <linux/filter.h> +#include <linux/if_vlan.h> +#include <linux/moduleloader.h> +#include <linux/netdevice.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <asm/asm.h> +#include <asm/bitops.h> +#include <asm/cacheflush.h> +#include <asm/cpu-features.h> +#include <asm/uasm.h> + +#include "bpf_jit.h" + +/* ABI + * r_skb_hl SKB header length + * r_data SKB data pointer + * r_off Offset + * r_A BPF register A + * r_X BPF register X + * r_skb *skb + * r_M *scratch memory + * r_skb_len SKB length + * + * On entry (*bpf_func)(*skb, *filter) + * a0 = MIPS_R_A0 = skb; + * a1 = MIPS_R_A1 = filter; + * + * Stack + * ... + * M[15] + * M[14] + * M[13] + * ... + * M[0] <-- r_M + * saved reg k-1 + * saved reg k-2 + * ... + * saved reg 0 <-- r_sp + * <no argument area> + * + * Packet layout + * + * <--------------------- len ------------------------> + * <--skb-len(r_skb_hl)-->< ----- skb->data_len ------> + * ---------------------------------------------------- + * | skb->data | + * ---------------------------------------------------- + */ + +#define ptr typeof(unsigned long) + +#define SCRATCH_OFF(k) (4 * (k)) + +/* JIT flags */ +#define SEEN_CALL (1 << BPF_MEMWORDS) +#define SEEN_SREG_SFT (BPF_MEMWORDS + 1) +#define SEEN_SREG_BASE (1 << SEEN_SREG_SFT) +#define SEEN_SREG(x) (SEEN_SREG_BASE << (x)) +#define SEEN_OFF SEEN_SREG(2) +#define SEEN_A SEEN_SREG(3) +#define SEEN_X SEEN_SREG(4) +#define SEEN_SKB SEEN_SREG(5) +#define SEEN_MEM SEEN_SREG(6) +/* SEEN_SK_DATA also implies skb_hl an skb_len */ +#define SEEN_SKB_DATA (SEEN_SREG(7) | SEEN_SREG(1) | SEEN_SREG(0)) + +/* Arguments used by JIT */ +#define ARGS_USED_BY_JIT 2 /* only applicable to 64-bit */ + +#define SBIT(x) (1 << (x)) /* Signed version of BIT() */ + +/** + * struct jit_ctx - JIT context + * @skf: The sk_filter + * @prologue_bytes: Number of bytes for prologue + * @idx: Instruction index + * @flags: JIT flags + * @offsets: Instruction offsets + * @target: Memory location for the compiled filter + */ +struct jit_ctx { + const struct bpf_prog *skf; + unsigned int prologue_bytes; + u32 idx; + u32 flags; + u32 *offsets; + u32 *target; +}; + + +static inline int optimize_div(u32 *k) +{ + /* power of 2 divides can be implemented with right shift */ + if (!(*k & (*k-1))) { + *k = ilog2(*k); + return 1; + } + + return 0; +} + +static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx); + +/* Simply emit the instruction if the JIT memory space has been allocated */ +#define emit_instr(ctx, func, ...) \ +do { \ + if ((ctx)->target != NULL) { \ + u32 *p = &(ctx)->target[ctx->idx]; \ + uasm_i_##func(&p, ##__VA_ARGS__); \ + } \ + (ctx)->idx++; \ +} while (0) + +/* + * Similar to emit_instr but it must be used when we need to emit + * 32-bit or 64-bit instructions + */ +#define emit_long_instr(ctx, func, ...) \ +do { \ + if ((ctx)->target != NULL) { \ + u32 *p = &(ctx)->target[ctx->idx]; \ + UASM_i_##func(&p, ##__VA_ARGS__); \ + } \ + (ctx)->idx++; \ +} while (0) + +/* Determine if immediate is within the 16-bit signed range */ +static inline bool is_range16(s32 imm) +{ + return !(imm >= SBIT(15) || imm < -SBIT(15)); +} + +static inline void emit_addu(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, addu, dst, src1, src2); +} + +static inline void emit_nop(struct jit_ctx *ctx) +{ + emit_instr(ctx, nop); +} + +/* Load a u32 immediate to a register */ +static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx) +{ + if (ctx->target != NULL) { + /* addiu can only handle s16 */ + if (!is_range16(imm)) { + u32 *p = &ctx->target[ctx->idx]; + uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16); + p = &ctx->target[ctx->idx + 1]; + uasm_i_ori(&p, dst, r_tmp_imm, imm & 0xffff); + } else { + u32 *p = &ctx->target[ctx->idx]; + uasm_i_addiu(&p, dst, r_zero, imm); + } + } + ctx->idx++; + + if (!is_range16(imm)) + ctx->idx++; +} + +static inline void emit_or(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, or, dst, src1, src2); +} + +static inline void emit_ori(unsigned int dst, unsigned src, u32 imm, + struct jit_ctx *ctx) +{ + if (imm >= BIT(16)) { + emit_load_imm(r_tmp, imm, ctx); + emit_or(dst, src, r_tmp, ctx); + } else { + emit_instr(ctx, ori, dst, src, imm); + } +} + +static inline void emit_daddiu(unsigned int dst, unsigned int src, + int imm, struct jit_ctx *ctx) +{ + /* + * Only used for stack, so the imm is relatively small + * and it fits in 15-bits + */ + emit_instr(ctx, daddiu, dst, src, imm); +} + +static inline void emit_addiu(unsigned int dst, unsigned int src, + u32 imm, struct jit_ctx *ctx) +{ + if (!is_range16(imm)) { + emit_load_imm(r_tmp, imm, ctx); + emit_addu(dst, r_tmp, src, ctx); + } else { + emit_instr(ctx, addiu, dst, src, imm); + } +} + +static inline void emit_and(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, and, dst, src1, src2); +} + +static inline void emit_andi(unsigned int dst, unsigned int src, + u32 imm, struct jit_ctx *ctx) +{ + /* If imm does not fit in u16 then load it to register */ + if (imm >= BIT(16)) { + emit_load_imm(r_tmp, imm, ctx); + emit_and(dst, src, r_tmp, ctx); + } else { + emit_instr(ctx, andi, dst, src, imm); + } +} + +static inline void emit_xor(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, xor, dst, src1, src2); +} + +static inline void emit_xori(ptr dst, ptr src, u32 imm, struct jit_ctx *ctx) +{ + /* If imm does not fit in u16 then load it to register */ + if (imm >= BIT(16)) { + emit_load_imm(r_tmp, imm, ctx); + emit_xor(dst, src, r_tmp, ctx); + } else { + emit_instr(ctx, xori, dst, src, imm); + } +} + +static inline void emit_stack_offset(int offset, struct jit_ctx *ctx) +{ + emit_long_instr(ctx, ADDIU, r_sp, r_sp, offset); +} + +static inline void emit_subu(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, subu, dst, src1, src2); +} + +static inline void emit_neg(unsigned int reg, struct jit_ctx *ctx) +{ + emit_subu(reg, r_zero, reg, ctx); +} + +static inline void emit_sllv(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + emit_instr(ctx, sllv, dst, src, sa); +} + +static inline void emit_sll(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + /* sa is 5-bits long */ + if (sa >= BIT(5)) + /* Shifting >= 32 results in zero */ + emit_jit_reg_move(dst, r_zero, ctx); + else + emit_instr(ctx, sll, dst, src, sa); +} + +static inline void emit_srlv(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + emit_instr(ctx, srlv, dst, src, sa); +} + +static inline void emit_srl(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + /* sa is 5-bits long */ + if (sa >= BIT(5)) + /* Shifting >= 32 results in zero */ + emit_jit_reg_move(dst, r_zero, ctx); + else + emit_instr(ctx, srl, dst, src, sa); +} + +static inline void emit_slt(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, slt, dst, src1, src2); +} + +static inline void emit_sltu(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, sltu, dst, src1, src2); +} + +static inline void emit_sltiu(unsigned dst, unsigned int src, + unsigned int imm, struct jit_ctx *ctx) +{ + /* 16 bit immediate */ + if (!is_range16((s32)imm)) { + emit_load_imm(r_tmp, imm, ctx); + emit_sltu(dst, src, r_tmp, ctx); + } else { + emit_instr(ctx, sltiu, dst, src, imm); + } + +} + +/* Store register on the stack */ +static inline void emit_store_stack_reg(ptr reg, ptr base, + unsigned int offset, + struct jit_ctx *ctx) +{ + emit_long_instr(ctx, SW, reg, offset, base); +} + +static inline void emit_store(ptr reg, ptr base, unsigned int offset, + struct jit_ctx *ctx) +{ + emit_instr(ctx, sw, reg, offset, base); +} + +static inline void emit_load_stack_reg(ptr reg, ptr base, + unsigned int offset, + struct jit_ctx *ctx) +{ + emit_long_instr(ctx, LW, reg, offset, base); +} + +static inline void emit_load(unsigned int reg, unsigned int base, + unsigned int offset, struct jit_ctx *ctx) +{ + emit_instr(ctx, lw, reg, offset, base); +} + +static inline void emit_load_byte(unsigned int reg, unsigned int base, + unsigned int offset, struct jit_ctx *ctx) +{ + emit_instr(ctx, lb, reg, offset, base); +} + +static inline void emit_half_load(unsigned int reg, unsigned int base, + unsigned int offset, struct jit_ctx *ctx) +{ + emit_instr(ctx, lh, reg, offset, base); +} + +static inline void emit_half_load_unsigned(unsigned int reg, unsigned int base, + unsigned int offset, struct jit_ctx *ctx) +{ + emit_instr(ctx, lhu, reg, offset, base); +} + +static inline void emit_mul(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, mul, dst, src1, src2); +} + +static inline void emit_div(unsigned int dst, unsigned int src, + struct jit_ctx *ctx) +{ + if (ctx->target != NULL) { + u32 *p = &ctx->target[ctx->idx]; + uasm_i_divu(&p, dst, src); + p = &ctx->target[ctx->idx + 1]; + uasm_i_mflo(&p, dst); + } + ctx->idx += 2; /* 2 insts */ +} + +static inline void emit_mod(unsigned int dst, unsigned int src, + struct jit_ctx *ctx) +{ + if (ctx->target != NULL) { + u32 *p = &ctx->target[ctx->idx]; + uasm_i_divu(&p, dst, src); + p = &ctx->target[ctx->idx + 1]; + uasm_i_mfhi(&p, dst); + } + ctx->idx += 2; /* 2 insts */ +} + +static inline void emit_dsll(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + emit_instr(ctx, dsll, dst, src, sa); +} + +static inline void emit_dsrl32(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + emit_instr(ctx, dsrl32, dst, src, sa); +} + +static inline void emit_wsbh(unsigned int dst, unsigned int src, + struct jit_ctx *ctx) +{ + emit_instr(ctx, wsbh, dst, src); +} + +/* load pointer to register */ +static inline void emit_load_ptr(unsigned int dst, unsigned int src, + int imm, struct jit_ctx *ctx) +{ + /* src contains the base addr of the 32/64-pointer */ + emit_long_instr(ctx, LW, dst, imm, src); +} + +/* load a function pointer to register */ +static inline void emit_load_func(unsigned int reg, ptr imm, + struct jit_ctx *ctx) +{ + if (IS_ENABLED(CONFIG_64BIT)) { + /* At this point imm is always 64-bit */ + emit_load_imm(r_tmp, (u64)imm >> 32, ctx); + emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ + emit_ori(r_tmp, r_tmp_imm, (imm >> 16) & 0xffff, ctx); + emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ + emit_ori(reg, r_tmp_imm, imm & 0xffff, ctx); + } else { + emit_load_imm(reg, imm, ctx); + } +} + +/* Move to real MIPS register */ +static inline void emit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) +{ + emit_long_instr(ctx, ADDU, dst, src, r_zero); +} + +/* Move to JIT (32-bit) register */ +static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) +{ + emit_addu(dst, src, r_zero, ctx); +} + +/* Compute the immediate value for PC-relative branches. */ +static inline u32 b_imm(unsigned int tgt, struct jit_ctx *ctx) +{ + if (ctx->target == NULL) + return 0; + + /* + * We want a pc-relative branch. We only do forward branches + * so tgt is always after pc. tgt is the instruction offset + * we want to jump to. + + * Branch on MIPS: + * I: target_offset <- sign_extend(offset) + * I+1: PC += target_offset (delay slot) + * + * ctx->idx currently points to the branch instruction + * but the offset is added to the delay slot so we need + * to subtract 4. + */ + return ctx->offsets[tgt] - + (ctx->idx * 4 - ctx->prologue_bytes) - 4; +} + +static inline void emit_bcond(int cond, unsigned int reg1, unsigned int reg2, + unsigned int imm, struct jit_ctx *ctx) +{ + if (ctx->target != NULL) { + u32 *p = &ctx->target[ctx->idx]; + + switch (cond) { + case MIPS_COND_EQ: + uasm_i_beq(&p, reg1, reg2, imm); + break; + case MIPS_COND_NE: + uasm_i_bne(&p, reg1, reg2, imm); + break; + case MIPS_COND_ALL: + uasm_i_b(&p, imm); + break; + default: + pr_warn("%s: Unhandled branch conditional: %d\n", + __func__, cond); + } + } + ctx->idx++; +} + +static inline void emit_b(unsigned int imm, struct jit_ctx *ctx) +{ + emit_bcond(MIPS_COND_ALL, r_zero, r_zero, imm, ctx); +} + +static inline void emit_jalr(unsigned int link, unsigned int reg, + struct jit_ctx *ctx) +{ + emit_instr(ctx, jalr, link, reg); +} + +static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx) +{ + emit_instr(ctx, jr, reg); +} + +static inline u16 align_sp(unsigned int num) +{ + /* Double word alignment for 32-bit, quadword for 64-bit */ + unsigned int align = IS_ENABLED(CONFIG_64BIT) ? 16 : 8; + num = (num + (align - 1)) & -align; + return num; +} + +static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset) +{ + int i = 0, real_off = 0; + u32 sflags, tmp_flags; + + /* Adjust the stack pointer */ + if (offset) + emit_stack_offset(-align_sp(offset), ctx); + + tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; + /* sflags is essentially a bitmap */ + while (tmp_flags) { + if ((sflags >> i) & 0x1) { + emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off, + ctx); + real_off += SZREG; + } + i++; + tmp_flags >>= 1; + } + + /* save return address */ + if (ctx->flags & SEEN_CALL) { + emit_store_stack_reg(r_ra, r_sp, real_off, ctx); + real_off += SZREG; + } + + /* Setup r_M leaving the alignment gap if necessary */ + if (ctx->flags & SEEN_MEM) { + if (real_off % (SZREG * 2)) + real_off += SZREG; + emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off); + } +} + +static void restore_bpf_jit_regs(struct jit_ctx *ctx, + unsigned int offset) +{ + int i, real_off = 0; + u32 sflags, tmp_flags; + + tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; + /* sflags is a bitmap */ + i = 0; + while (tmp_flags) { + if ((sflags >> i) & 0x1) { + emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off, + ctx); + real_off += SZREG; + } + i++; + tmp_flags >>= 1; + } + + /* restore return address */ + if (ctx->flags & SEEN_CALL) + emit_load_stack_reg(r_ra, r_sp, real_off, ctx); + + /* Restore the sp and discard the scrach memory */ + if (offset) + emit_stack_offset(align_sp(offset), ctx); +} + +static unsigned int get_stack_depth(struct jit_ctx *ctx) +{ + int sp_off = 0; + + + /* How may s* regs do we need to preserved? */ + sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * SZREG; + + if (ctx->flags & SEEN_MEM) + sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */ + + if (ctx->flags & SEEN_CALL) + sp_off += SZREG; /* Space for our ra register */ + + return sp_off; +} + +static void build_prologue(struct jit_ctx *ctx) +{ + int sp_off; + + /* Calculate the total offset for the stack pointer */ + sp_off = get_stack_depth(ctx); + save_bpf_jit_regs(ctx, sp_off); + + if (ctx->flags & SEEN_SKB) + emit_reg_move(r_skb, MIPS_R_A0, ctx); + + if (ctx->flags & SEEN_SKB_DATA) { + /* Load packet length */ + emit_load(r_skb_len, r_skb, offsetof(struct sk_buff, len), + ctx); + emit_load(r_tmp, r_skb, offsetof(struct sk_buff, data_len), + ctx); + /* Load the data pointer */ + emit_load_ptr(r_skb_data, r_skb, + offsetof(struct sk_buff, data), ctx); + /* Load the header length */ + emit_subu(r_skb_hl, r_skb_len, r_tmp, ctx); + } + + if (ctx->flags & SEEN_X) + emit_jit_reg_move(r_X, r_zero, ctx); + + /* + * Do not leak kernel data to userspace, we only need to clear + * r_A if it is ever used. In fact if it is never used, we + * will not save/restore it, so clearing it in this case would + * corrupt the state of the caller. + */ + if (bpf_needs_clear_a(&ctx->skf->insns[0]) && + (ctx->flags & SEEN_A)) + emit_jit_reg_move(r_A, r_zero, ctx); +} + +static void build_epilogue(struct jit_ctx *ctx) +{ + unsigned int sp_off; + + /* Calculate the total offset for the stack pointer */ + + sp_off = get_stack_depth(ctx); + restore_bpf_jit_regs(ctx, sp_off); + + /* Return */ + emit_jr(r_ra, ctx); + emit_nop(ctx); +} + +#define CHOOSE_LOAD_FUNC(K, func) \ + ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \ + func##_positive) + +static int build_body(struct jit_ctx *ctx) +{ + const struct bpf_prog *prog = ctx->skf; + const struct sock_filter *inst; + unsigned int i, off, condt; + u32 k, b_off __maybe_unused; + u8 (*sk_load_func)(unsigned long *skb, int offset); + + for (i = 0; i < prog->len; i++) { + u16 code; + + inst = &(prog->insns[i]); + pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n", + __func__, inst->code, inst->jt, inst->jf, inst->k); + k = inst->k; + code = bpf_anc_helper(inst); + + if (ctx->target == NULL) + ctx->offsets[i] = ctx->idx * 4; + + switch (code) { + case BPF_LD | BPF_IMM: + /* A <- k ==> li r_A, k */ + ctx->flags |= SEEN_A; + emit_load_imm(r_A, k, ctx); + break; + case BPF_LD | BPF_W | BPF_LEN: + BUILD_BUG_ON(sizeof_field(struct sk_buff, len) != 4); + /* A <- len ==> lw r_A, offset(skb) */ + ctx->flags |= SEEN_SKB | SEEN_A; + off = offsetof(struct sk_buff, len); + emit_load(r_A, r_skb, off, ctx); + break; + case BPF_LD | BPF_MEM: + /* A <- M[k] ==> lw r_A, offset(M) */ + ctx->flags |= SEEN_MEM | SEEN_A; + emit_load(r_A, r_M, SCRATCH_OFF(k), ctx); + break; + case BPF_LD | BPF_W | BPF_ABS: + /* A <- P[k:4] */ + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_word); + goto load; + case BPF_LD | BPF_H | BPF_ABS: + /* A <- P[k:2] */ + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_half); + goto load; + case BPF_LD | BPF_B | BPF_ABS: + /* A <- P[k:1] */ + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_byte); +load: + emit_load_imm(r_off, k, ctx); +load_common: + ctx->flags |= SEEN_CALL | SEEN_OFF | + SEEN_SKB | SEEN_A | SEEN_SKB_DATA; + + emit_load_func(r_s0, (ptr)sk_load_func, ctx); + emit_reg_move(MIPS_R_A0, r_skb, ctx); + emit_jalr(MIPS_R_RA, r_s0, ctx); + /* Load second argument to delay slot */ + emit_reg_move(MIPS_R_A1, r_off, ctx); + /* Check the error value */ + emit_bcond(MIPS_COND_EQ, r_ret, 0, b_imm(i + 1, ctx), + ctx); + /* Load return register on DS for failures */ + emit_reg_move(r_ret, r_zero, ctx); + /* Return with error */ + emit_b(b_imm(prog->len, ctx), ctx); + emit_nop(ctx); + break; + case BPF_LD | BPF_W | BPF_IND: + /* A <- P[X + k:4] */ + sk_load_func = sk_load_word; + goto load_ind; + case BPF_LD | BPF_H | BPF_IND: + /* A <- P[X + k:2] */ + sk_load_func = sk_load_half; + goto load_ind; + case BPF_LD | BPF_B | BPF_IND: + /* A <- P[X + k:1] */ + sk_load_func = sk_load_byte; +load_ind: + ctx->flags |= SEEN_OFF | SEEN_X; + emit_addiu(r_off, r_X, k, ctx); + goto load_common; + case BPF_LDX | BPF_IMM: + /* X <- k */ + ctx->flags |= SEEN_X; + emit_load_imm(r_X, k, ctx); + break; + case BPF_LDX | BPF_MEM: + /* X <- M[k] */ + ctx->flags |= SEEN_X | SEEN_MEM; + emit_load(r_X, r_M, SCRATCH_OFF(k), ctx); + break; + case BPF_LDX | BPF_W | BPF_LEN: + /* X <- len */ + ctx->flags |= SEEN_X | SEEN_SKB; + off = offsetof(struct sk_buff, len); + emit_load(r_X, r_skb, off, ctx); + break; + case BPF_LDX | BPF_B | BPF_MSH: + /* X <- 4 * (P[k:1] & 0xf) */ + ctx->flags |= SEEN_X | SEEN_CALL | SEEN_SKB; + /* Load offset to a1 */ + emit_load_func(r_s0, (ptr)sk_load_byte, ctx); + /* + * This may emit two instructions so it may not fit + * in the delay slot. So use a0 in the delay slot. + */ + emit_load_imm(MIPS_R_A1, k, ctx); + emit_jalr(MIPS_R_RA, r_s0, ctx); + emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */ + /* Check the error value */ + emit_bcond(MIPS_COND_NE, r_ret, 0, + b_imm(prog->len, ctx), ctx); + emit_reg_move(r_ret, r_zero, ctx); + /* We are good */ + /* X <- P[1:K] & 0xf */ + emit_andi(r_X, r_A, 0xf, ctx); + /* X << 2 */ + emit_b(b_imm(i + 1, ctx), ctx); + emit_sll(r_X, r_X, 2, ctx); /* delay slot */ + break; + case BPF_ST: + /* M[k] <- A */ + ctx->flags |= SEEN_MEM | SEEN_A; + emit_store(r_A, r_M, SCRATCH_OFF(k), ctx); + break; + case BPF_STX: + /* M[k] <- X */ + ctx->flags |= SEEN_MEM | SEEN_X; + emit_store(r_X, r_M, SCRATCH_OFF(k), ctx); + break; + case BPF_ALU | BPF_ADD | BPF_K: + /* A += K */ + ctx->flags |= SEEN_A; + emit_addiu(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_ADD | BPF_X: + /* A += X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_addu(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_SUB | BPF_K: + /* A -= K */ + ctx->flags |= SEEN_A; + emit_addiu(r_A, r_A, -k, ctx); + break; + case BPF_ALU | BPF_SUB | BPF_X: + /* A -= X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_subu(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_MUL | BPF_K: + /* A *= K */ + /* Load K to scratch register before MUL */ + ctx->flags |= SEEN_A; + emit_load_imm(r_s0, k, ctx); + emit_mul(r_A, r_A, r_s0, ctx); + break; + case BPF_ALU | BPF_MUL | BPF_X: + /* A *= X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_mul(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_DIV | BPF_K: + /* A /= k */ + if (k == 1) + break; + if (optimize_div(&k)) { + ctx->flags |= SEEN_A; + emit_srl(r_A, r_A, k, ctx); + break; + } + ctx->flags |= SEEN_A; + emit_load_imm(r_s0, k, ctx); + emit_div(r_A, r_s0, ctx); + break; + case BPF_ALU | BPF_MOD | BPF_K: + /* A %= k */ + if (k == 1) { + ctx->flags |= SEEN_A; + emit_jit_reg_move(r_A, r_zero, ctx); + } else { + ctx->flags |= SEEN_A; + emit_load_imm(r_s0, k, ctx); + emit_mod(r_A, r_s0, ctx); + } + break; + case BPF_ALU | BPF_DIV | BPF_X: + /* A /= X */ + ctx->flags |= SEEN_X | SEEN_A; + /* Check if r_X is zero */ + emit_bcond(MIPS_COND_EQ, r_X, r_zero, + b_imm(prog->len, ctx), ctx); + emit_load_imm(r_ret, 0, ctx); /* delay slot */ + emit_div(r_A, r_X, ctx); + break; + case BPF_ALU | BPF_MOD | BPF_X: + /* A %= X */ + ctx->flags |= SEEN_X | SEEN_A; + /* Check if r_X is zero */ + emit_bcond(MIPS_COND_EQ, r_X, r_zero, + b_imm(prog->len, ctx), ctx); + emit_load_imm(r_ret, 0, ctx); /* delay slot */ + emit_mod(r_A, r_X, ctx); + break; + case BPF_ALU | BPF_OR | BPF_K: + /* A |= K */ + ctx->flags |= SEEN_A; + emit_ori(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_OR | BPF_X: + /* A |= X */ + ctx->flags |= SEEN_A; + emit_ori(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_XOR | BPF_K: + /* A ^= k */ + ctx->flags |= SEEN_A; + emit_xori(r_A, r_A, k, ctx); + break; + case BPF_ANC | SKF_AD_ALU_XOR_X: + case BPF_ALU | BPF_XOR | BPF_X: + /* A ^= X */ + ctx->flags |= SEEN_A; + emit_xor(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_AND | BPF_K: + /* A &= K */ + ctx->flags |= SEEN_A; + emit_andi(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_AND | BPF_X: + /* A &= X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_and(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_LSH | BPF_K: + /* A <<= K */ + ctx->flags |= SEEN_A; + emit_sll(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_LSH | BPF_X: + /* A <<= X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_sllv(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_RSH | BPF_K: + /* A >>= K */ + ctx->flags |= SEEN_A; + emit_srl(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_RSH | BPF_X: + ctx->flags |= SEEN_A | SEEN_X; + emit_srlv(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_NEG: + /* A = -A */ + ctx->flags |= SEEN_A; + emit_neg(r_A, ctx); + break; + case BPF_JMP | BPF_JA: + /* pc += K */ + emit_b(b_imm(i + k + 1, ctx), ctx); + emit_nop(ctx); + break; + case BPF_JMP | BPF_JEQ | BPF_K: + /* pc += ( A == K ) ? pc->jt : pc->jf */ + condt = MIPS_COND_EQ | MIPS_COND_K; + goto jmp_cmp; + case BPF_JMP | BPF_JEQ | BPF_X: + ctx->flags |= SEEN_X; + /* pc += ( A == X ) ? pc->jt : pc->jf */ + condt = MIPS_COND_EQ | MIPS_COND_X; + goto jmp_cmp; + case BPF_JMP | BPF_JGE | BPF_K: + /* pc += ( A >= K ) ? pc->jt : pc->jf */ + condt = MIPS_COND_GE | MIPS_COND_K; + goto jmp_cmp; + case BPF_JMP | BPF_JGE | BPF_X: + ctx->flags |= SEEN_X; + /* pc += ( A >= X ) ? pc->jt : pc->jf */ + condt = MIPS_COND_GE | MIPS_COND_X; + goto jmp_cmp; + case BPF_JMP | BPF_JGT | BPF_K: + /* pc += ( A > K ) ? pc->jt : pc->jf */ + condt = MIPS_COND_GT | MIPS_COND_K; + goto jmp_cmp; + case BPF_JMP | BPF_JGT | BPF_X: + ctx->flags |= SEEN_X; + /* pc += ( A > X ) ? pc->jt : pc->jf */ + condt = MIPS_COND_GT | MIPS_COND_X; +jmp_cmp: + /* Greater or Equal */ + if ((condt & MIPS_COND_GE) || + (condt & MIPS_COND_GT)) { + if (condt & MIPS_COND_K) { /* K */ + ctx->flags |= SEEN_A; + emit_sltiu(r_s0, r_A, k, ctx); + } else { /* X */ + ctx->flags |= SEEN_A | + SEEN_X; + emit_sltu(r_s0, r_A, r_X, ctx); + } + /* A < (K|X) ? r_scrach = 1 */ + b_off = b_imm(i + inst->jf + 1, ctx); + emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, + ctx); + emit_nop(ctx); + /* A > (K|X) ? scratch = 0 */ + if (condt & MIPS_COND_GT) { + /* Checking for equality */ + ctx->flags |= SEEN_A | SEEN_X; + if (condt & MIPS_COND_K) + emit_load_imm(r_s0, k, ctx); + else + emit_jit_reg_move(r_s0, r_X, + ctx); + b_off = b_imm(i + inst->jf + 1, ctx); + emit_bcond(MIPS_COND_EQ, r_A, r_s0, + b_off, ctx); + emit_nop(ctx); + /* Finally, A > K|X */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_b(b_off, ctx); + emit_nop(ctx); + } else { + /* A >= (K|X) so jump */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_b(b_off, ctx); + emit_nop(ctx); + } + } else { + /* A == K|X */ + if (condt & MIPS_COND_K) { /* K */ + ctx->flags |= SEEN_A; + emit_load_imm(r_s0, k, ctx); + /* jump true */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_bcond(MIPS_COND_EQ, r_A, r_s0, + b_off, ctx); + emit_nop(ctx); + /* jump false */ + b_off = b_imm(i + inst->jf + 1, + ctx); + emit_bcond(MIPS_COND_NE, r_A, r_s0, + b_off, ctx); + emit_nop(ctx); + } else { /* X */ + /* jump true */ + ctx->flags |= SEEN_A | SEEN_X; + b_off = b_imm(i + inst->jt + 1, + ctx); + emit_bcond(MIPS_COND_EQ, r_A, r_X, + b_off, ctx); + emit_nop(ctx); + /* jump false */ + b_off = b_imm(i + inst->jf + 1, ctx); + emit_bcond(MIPS_COND_NE, r_A, r_X, + b_off, ctx); + emit_nop(ctx); + } + } + break; + case BPF_JMP | BPF_JSET | BPF_K: + ctx->flags |= SEEN_A; + /* pc += (A & K) ? pc -> jt : pc -> jf */ + emit_load_imm(r_s1, k, ctx); + emit_and(r_s0, r_A, r_s1, ctx); + /* jump true */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); + emit_nop(ctx); + /* jump false */ + b_off = b_imm(i + inst->jf + 1, ctx); + emit_b(b_off, ctx); + emit_nop(ctx); + break; + case BPF_JMP | BPF_JSET | BPF_X: + ctx->flags |= SEEN_X | SEEN_A; + /* pc += (A & X) ? pc -> jt : pc -> jf */ + emit_and(r_s0, r_A, r_X, ctx); + /* jump true */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); + emit_nop(ctx); + /* jump false */ + b_off = b_imm(i + inst->jf + 1, ctx); + emit_b(b_off, ctx); + emit_nop(ctx); + break; + case BPF_RET | BPF_A: + ctx->flags |= SEEN_A; + if (i != prog->len - 1) + /* + * If this is not the last instruction + * then jump to the epilogue + */ + emit_b(b_imm(prog->len, ctx), ctx); + emit_reg_move(r_ret, r_A, ctx); /* delay slot */ + break; + case BPF_RET | BPF_K: + /* + * It can emit two instructions so it does not fit on + * the delay slot. + */ + emit_load_imm(r_ret, k, ctx); + if (i != prog->len - 1) { + /* + * If this is not the last instruction + * then jump to the epilogue + */ + emit_b(b_imm(prog->len, ctx), ctx); + emit_nop(ctx); + } + break; + case BPF_MISC | BPF_TAX: + /* X = A */ + ctx->flags |= SEEN_X | SEEN_A; + emit_jit_reg_move(r_X, r_A, ctx); + break; + case BPF_MISC | BPF_TXA: + /* A = X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_jit_reg_move(r_A, r_X, ctx); + break; + /* AUX */ + case BPF_ANC | SKF_AD_PROTOCOL: + /* A = ntohs(skb->protocol */ + ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A; + BUILD_BUG_ON(sizeof_field(struct sk_buff, + protocol) != 2); + off = offsetof(struct sk_buff, protocol); + emit_half_load(r_A, r_skb, off, ctx); +#ifdef CONFIG_CPU_LITTLE_ENDIAN + /* This needs little endian fixup */ + if (cpu_has_wsbh) { + /* R2 and later have the wsbh instruction */ + emit_wsbh(r_A, r_A, ctx); + } else { + /* Get first byte */ + emit_andi(r_tmp_imm, r_A, 0xff, ctx); + /* Shift it */ + emit_sll(r_tmp, r_tmp_imm, 8, ctx); + /* Get second byte */ + emit_srl(r_tmp_imm, r_A, 8, ctx); + emit_andi(r_tmp_imm, r_tmp_imm, 0xff, ctx); + /* Put everyting together in r_A */ + emit_or(r_A, r_tmp, r_tmp_imm, ctx); + } +#endif + break; + case BPF_ANC | SKF_AD_CPU: + ctx->flags |= SEEN_A | SEEN_OFF; + /* A = current_thread_info()->cpu */ + BUILD_BUG_ON(sizeof_field(struct thread_info, + cpu) != 4); + off = offsetof(struct thread_info, cpu); + /* $28/gp points to the thread_info struct */ + emit_load(r_A, 28, off, ctx); + break; + case BPF_ANC | SKF_AD_IFINDEX: + /* A = skb->dev->ifindex */ + case BPF_ANC | SKF_AD_HATYPE: + /* A = skb->dev->type */ + ctx->flags |= SEEN_SKB | SEEN_A; + off = offsetof(struct sk_buff, dev); + /* Load *dev pointer */ + emit_load_ptr(r_s0, r_skb, off, ctx); + /* error (0) in the delay slot */ + emit_bcond(MIPS_COND_EQ, r_s0, r_zero, + b_imm(prog->len, ctx), ctx); + emit_reg_move(r_ret, r_zero, ctx); + if (code == (BPF_ANC | SKF_AD_IFINDEX)) { + BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4); + off = offsetof(struct net_device, ifindex); + emit_load(r_A, r_s0, off, ctx); + } else { /* (code == (BPF_ANC | SKF_AD_HATYPE) */ + BUILD_BUG_ON(sizeof_field(struct net_device, type) != 2); + off = offsetof(struct net_device, type); + emit_half_load_unsigned(r_A, r_s0, off, ctx); + } + break; + case BPF_ANC | SKF_AD_MARK: + ctx->flags |= SEEN_SKB | SEEN_A; + BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4); + off = offsetof(struct sk_buff, mark); + emit_load(r_A, r_skb, off, ctx); + break; + case BPF_ANC | SKF_AD_RXHASH: + ctx->flags |= SEEN_SKB | SEEN_A; + BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4); + off = offsetof(struct sk_buff, hash); + emit_load(r_A, r_skb, off, ctx); + break; + case BPF_ANC | SKF_AD_VLAN_TAG: + ctx->flags |= SEEN_SKB | SEEN_A; + BUILD_BUG_ON(sizeof_field(struct sk_buff, + vlan_tci) != 2); + off = offsetof(struct sk_buff, vlan_tci); + emit_half_load_unsigned(r_A, r_skb, off, ctx); + break; + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: + ctx->flags |= SEEN_SKB | SEEN_A; + emit_load_byte(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET(), ctx); + if (PKT_VLAN_PRESENT_BIT) + emit_srl(r_A, r_A, PKT_VLAN_PRESENT_BIT, ctx); + if (PKT_VLAN_PRESENT_BIT < 7) + emit_andi(r_A, r_A, 1, ctx); + break; + case BPF_ANC | SKF_AD_PKTTYPE: + ctx->flags |= SEEN_SKB; + + emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx); + /* Keep only the last 3 bits */ + emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx); +#ifdef __BIG_ENDIAN_BITFIELD + /* Get the actual packet type to the lower 3 bits */ + emit_srl(r_A, r_A, 5, ctx); +#endif + break; + case BPF_ANC | SKF_AD_QUEUE: + ctx->flags |= SEEN_SKB | SEEN_A; + BUILD_BUG_ON(sizeof_field(struct sk_buff, + queue_mapping) != 2); + BUILD_BUG_ON(offsetof(struct sk_buff, + queue_mapping) > 0xff); + off = offsetof(struct sk_buff, queue_mapping); + emit_half_load_unsigned(r_A, r_skb, off, ctx); + break; + default: + pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__, + inst->code); + return -1; + } + } + + /* compute offsets only during the first pass */ + if (ctx->target == NULL) + ctx->offsets[i] = ctx->idx * 4; + + return 0; +} + +void bpf_jit_compile(struct bpf_prog *fp) +{ + struct jit_ctx ctx; + unsigned int alloc_size, tmp_idx; + + if (!bpf_jit_enable) + return; + + memset(&ctx, 0, sizeof(ctx)); + + ctx.offsets = kcalloc(fp->len + 1, sizeof(*ctx.offsets), GFP_KERNEL); + if (ctx.offsets == NULL) + return; + + ctx.skf = fp; + + if (build_body(&ctx)) + goto out; + + tmp_idx = ctx.idx; + build_prologue(&ctx); + ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; + /* just to complete the ctx.idx count */ + build_epilogue(&ctx); + + alloc_size = 4 * ctx.idx; + ctx.target = module_alloc(alloc_size); + if (ctx.target == NULL) + goto out; + + /* Clean it */ + memset(ctx.target, 0, alloc_size); + + ctx.idx = 0; + + /* Generate the actual JIT code */ + build_prologue(&ctx); + build_body(&ctx); + build_epilogue(&ctx); + + /* Update the icache */ + flush_icache_range((ptr)ctx.target, (ptr)(ctx.target + ctx.idx)); + + if (bpf_jit_enable > 1) + /* Dump JIT code */ + bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); + + fp->bpf_func = (void *)ctx.target; + fp->jited = 1; + +out: + kfree(ctx.offsets); +} + +void bpf_jit_free(struct bpf_prog *fp) +{ + if (fp->jited) + module_memfree(fp->bpf_func); + + bpf_prog_unlock_free(fp); +} diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S new file mode 100644 index 000000000000..57154c5883b6 --- /dev/null +++ b/arch/mips/net/bpf_jit_asm.S @@ -0,0 +1,285 @@ +/* + * bpf_jib_asm.S: Packet/header access helper functions for MIPS/MIPS64 BPF + * compiler. + * + * Copyright (C) 2015 Imagination Technologies Ltd. + * Author: Markos Chandras <markos.chandras@imgtec.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + */ + +#include <asm/asm.h> +#include <asm/isa-rev.h> +#include <asm/regdef.h> +#include "bpf_jit.h" + +/* ABI + * + * r_skb_hl skb header length + * r_skb_data skb data + * r_off(a1) offset register + * r_A BPF register A + * r_X PF register X + * r_skb(a0) *skb + * r_M *scratch memory + * r_skb_le skb length + * r_s0 Scratch register 0 + * r_s1 Scratch register 1 + * + * On entry: + * a0: *skb + * a1: offset (imm or imm + X) + * + * All non-BPF-ABI registers are free for use. On return, we only + * care about r_ret. The BPF-ABI registers are assumed to remain + * unmodified during the entire filter operation. + */ + +#define skb a0 +#define offset a1 +#define SKF_LL_OFF (-0x200000) /* Can't include linux/filter.h in assembly */ + + /* We know better :) so prevent assembler reordering etc */ + .set noreorder + +#define is_offset_negative(TYPE) \ + /* If offset is negative we have more work to do */ \ + slti t0, offset, 0; \ + bgtz t0, bpf_slow_path_##TYPE##_neg; \ + /* Be careful what follows in DS. */ + +#define is_offset_in_header(SIZE, TYPE) \ + /* Reading from header? */ \ + addiu $r_s0, $r_skb_hl, -SIZE; \ + slt t0, $r_s0, offset; \ + bgtz t0, bpf_slow_path_##TYPE; \ + +LEAF(sk_load_word) + is_offset_negative(word) +FEXPORT(sk_load_word_positive) + is_offset_in_header(4, word) + /* Offset within header boundaries */ + PTR_ADDU t1, $r_skb_data, offset + .set reorder + lw $r_A, 0(t1) + .set noreorder +#ifdef CONFIG_CPU_LITTLE_ENDIAN +# if MIPS_ISA_REV >= 2 + wsbh t0, $r_A + rotr $r_A, t0, 16 +# else + sll t0, $r_A, 24 + srl t1, $r_A, 24 + srl t2, $r_A, 8 + or t0, t0, t1 + andi t2, t2, 0xff00 + andi t1, $r_A, 0xff00 + or t0, t0, t2 + sll t1, t1, 8 + or $r_A, t0, t1 +# endif +#endif + jr $r_ra + move $r_ret, zero + END(sk_load_word) + +LEAF(sk_load_half) + is_offset_negative(half) +FEXPORT(sk_load_half_positive) + is_offset_in_header(2, half) + /* Offset within header boundaries */ + PTR_ADDU t1, $r_skb_data, offset + lhu $r_A, 0(t1) +#ifdef CONFIG_CPU_LITTLE_ENDIAN +# if MIPS_ISA_REV >= 2 + wsbh $r_A, $r_A +# else + sll t0, $r_A, 8 + srl t1, $r_A, 8 + andi t0, t0, 0xff00 + or $r_A, t0, t1 +# endif +#endif + jr $r_ra + move $r_ret, zero + END(sk_load_half) + +LEAF(sk_load_byte) + is_offset_negative(byte) +FEXPORT(sk_load_byte_positive) + is_offset_in_header(1, byte) + /* Offset within header boundaries */ + PTR_ADDU t1, $r_skb_data, offset + lbu $r_A, 0(t1) + jr $r_ra + move $r_ret, zero + END(sk_load_byte) + +/* + * call skb_copy_bits: + * (prototype in linux/skbuff.h) + * + * int skb_copy_bits(sk_buff *skb, int offset, void *to, int len) + * + * o32 mandates we leave 4 spaces for argument registers in case + * the callee needs to use them. Even though we don't care about + * the argument registers ourselves, we need to allocate that space + * to remain ABI compliant since the callee may want to use that space. + * We also allocate 2 more spaces for $r_ra and our return register (*to). + * + * n64 is a bit different. The *caller* will allocate the space to preserve + * the arguments. So in 64-bit kernels, we allocate the 4-arg space for no + * good reason but it does not matter that much really. + * + * (void *to) is returned in r_s0 + * + */ +#ifdef CONFIG_CPU_LITTLE_ENDIAN +#define DS_OFFSET(SIZE) (4 * SZREG) +#else +#define DS_OFFSET(SIZE) ((4 * SZREG) + (4 - SIZE)) +#endif +#define bpf_slow_path_common(SIZE) \ + /* Quick check. Are we within reasonable boundaries? */ \ + LONG_ADDIU $r_s1, $r_skb_len, -SIZE; \ + sltu $r_s0, offset, $r_s1; \ + beqz $r_s0, fault; \ + /* Load 4th argument in DS */ \ + LONG_ADDIU a3, zero, SIZE; \ + PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \ + PTR_LA t0, skb_copy_bits; \ + PTR_S $r_ra, (5 * SZREG)($r_sp); \ + /* Assign low slot to a2 */ \ + PTR_ADDIU a2, $r_sp, DS_OFFSET(SIZE); \ + jalr t0; \ + /* Reset our destination slot (DS but it's ok) */ \ + INT_S zero, (4 * SZREG)($r_sp); \ + /* \ + * skb_copy_bits returns 0 on success and -EFAULT \ + * on error. Our data live in a2. Do not bother with \ + * our data if an error has been returned. \ + */ \ + /* Restore our frame */ \ + PTR_L $r_ra, (5 * SZREG)($r_sp); \ + INT_L $r_s0, (4 * SZREG)($r_sp); \ + bltz v0, fault; \ + PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \ + move $r_ret, zero; \ + +NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp) + bpf_slow_path_common(4) +#ifdef CONFIG_CPU_LITTLE_ENDIAN +# if MIPS_ISA_REV >= 2 + wsbh t0, $r_s0 + jr $r_ra + rotr $r_A, t0, 16 +# else + sll t0, $r_s0, 24 + srl t1, $r_s0, 24 + srl t2, $r_s0, 8 + or t0, t0, t1 + andi t2, t2, 0xff00 + andi t1, $r_s0, 0xff00 + or t0, t0, t2 + sll t1, t1, 8 + jr $r_ra + or $r_A, t0, t1 +# endif +#else + jr $r_ra + move $r_A, $r_s0 +#endif + + END(bpf_slow_path_word) + +NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp) + bpf_slow_path_common(2) +#ifdef CONFIG_CPU_LITTLE_ENDIAN +# if MIPS_ISA_REV >= 2 + jr $r_ra + wsbh $r_A, $r_s0 +# else + sll t0, $r_s0, 8 + andi t1, $r_s0, 0xff00 + andi t0, t0, 0xff00 + srl t1, t1, 8 + jr $r_ra + or $r_A, t0, t1 +# endif +#else + jr $r_ra + move $r_A, $r_s0 +#endif + + END(bpf_slow_path_half) + +NESTED(bpf_slow_path_byte, (6 * SZREG), $r_sp) + bpf_slow_path_common(1) + jr $r_ra + move $r_A, $r_s0 + + END(bpf_slow_path_byte) + +/* + * Negative entry points + */ + .macro bpf_is_end_of_data + li t0, SKF_LL_OFF + /* Reading link layer data? */ + slt t1, offset, t0 + bgtz t1, fault + /* Be careful what follows in DS. */ + .endm +/* + * call skb_copy_bits: + * (prototype in linux/filter.h) + * + * void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, + * int k, unsigned int size) + * + * see above (bpf_slow_path_common) for ABI restrictions + */ +#define bpf_negative_common(SIZE) \ + PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \ + PTR_LA t0, bpf_internal_load_pointer_neg_helper; \ + PTR_S $r_ra, (5 * SZREG)($r_sp); \ + jalr t0; \ + li a2, SIZE; \ + PTR_L $r_ra, (5 * SZREG)($r_sp); \ + /* Check return pointer */ \ + beqz v0, fault; \ + PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \ + /* Preserve our pointer */ \ + move $r_s0, v0; \ + /* Set return value */ \ + move $r_ret, zero; \ + +bpf_slow_path_word_neg: + bpf_is_end_of_data +NESTED(sk_load_word_negative, (6 * SZREG), $r_sp) + bpf_negative_common(4) + jr $r_ra + lw $r_A, 0($r_s0) + END(sk_load_word_negative) + +bpf_slow_path_half_neg: + bpf_is_end_of_data +NESTED(sk_load_half_negative, (6 * SZREG), $r_sp) + bpf_negative_common(2) + jr $r_ra + lhu $r_A, 0($r_s0) + END(sk_load_half_negative) + +bpf_slow_path_byte_neg: + bpf_is_end_of_data +NESTED(sk_load_byte_negative, (6 * SZREG), $r_sp) + bpf_negative_common(1) + jr $r_ra + lbu $r_A, 0($r_s0) + END(sk_load_byte_negative) + +fault: + jr $r_ra + addiu $r_ret, zero, 1 diff --git a/arch/mips/pci/pci-ip27.c b/arch/mips/pci/pci-ip27.c index 8e26b120f994..d85cbf84e41c 100644 --- a/arch/mips/pci/pci-ip27.c +++ b/arch/mips/pci/pci-ip27.c @@ -10,7 +10,7 @@ #include <asm/sn/addrs.h> #include <asm/sn/types.h> #include <asm/sn/klconfig.h> -#include <asm/sn/hub.h> +#include <asm/sn/agent.h> #include <asm/sn/ioc3.h> #include <asm/pci/bridge.h> diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c index 5c1a196be0c5..3b2552fb7735 100644 --- a/arch/mips/pci/pci-xtalk-bridge.c +++ b/arch/mips/pci/pci-xtalk-bridge.c @@ -437,17 +437,28 @@ static int bridge_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) struct irq_alloc_info info; int irq; - irq = bc->pci_int[slot]; + switch (pin) { + case PCI_INTERRUPT_UNKNOWN: + case PCI_INTERRUPT_INTA: + case PCI_INTERRUPT_INTC: + pin = 0; + break; + case PCI_INTERRUPT_INTB: + case PCI_INTERRUPT_INTD: + pin = 1; + } + + irq = bc->pci_int[slot][pin]; if (irq == -1) { info.ctrl = bc; info.nasid = bc->nasid; - info.pin = slot; + info.pin = bc->int_mapping[slot][pin]; irq = irq_domain_alloc_irqs(bc->domain, 1, bc->nasid, &info); if (irq < 0) return irq; - bc->pci_int[slot] = irq; + bc->pci_int[slot][pin] = irq; } return irq; } @@ -458,21 +469,26 @@ static void bridge_setup_ip27_baseio6g(struct bridge_controller *bc) { bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP27_BASEIO6G); bc->ioc3_sid[6] = IOC3_SID(IOC3_SUBSYS_IP27_MIO); + bc->int_mapping[2][1] = 4; + bc->int_mapping[6][1] = 6; } static void bridge_setup_ip27_baseio(struct bridge_controller *bc) { bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP27_BASEIO); + bc->int_mapping[2][1] = 4; } static void bridge_setup_ip29_baseio(struct bridge_controller *bc) { bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP29_SYSBOARD); + bc->int_mapping[2][1] = 3; } static void bridge_setup_ip30_sysboard(struct bridge_controller *bc) { bc->ioc3_sid[2] = IOC3_SID(IOC3_SUBSYS_IP30_SYSBOARD); + bc->int_mapping[2][1] = 4; } static void bridge_setup_menet(struct bridge_controller *bc) @@ -483,6 +499,26 @@ static void bridge_setup_menet(struct bridge_controller *bc) bc->ioc3_sid[3] = IOC3_SID(IOC3_SUBSYS_MENET4); } +static void bridge_setup_io7(struct bridge_controller *bc) +{ + bc->ioc3_sid[4] = IOC3_SID(IOC3_SUBSYS_IO7); +} + +static void bridge_setup_io8(struct bridge_controller *bc) +{ + bc->ioc3_sid[4] = IOC3_SID(IOC3_SUBSYS_IO8); +} + +static void bridge_setup_io9(struct bridge_controller *bc) +{ + bc->ioc3_sid[1] = IOC3_SID(IOC3_SUBSYS_IO9); +} + +static void bridge_setup_ip34_fuel_sysboard(struct bridge_controller *bc) +{ + bc->ioc3_sid[4] = IOC3_SID(IOC3_SUBSYS_IP34_SYSBOARD); +} + #define BRIDGE_BOARD_SETUP(_partno, _setup) \ { .match = _partno, .setup = _setup } @@ -500,6 +536,10 @@ static const struct { BRIDGE_BOARD_SETUP("030-0887-", bridge_setup_ip30_sysboard), BRIDGE_BOARD_SETUP("030-1467-", bridge_setup_ip30_sysboard), BRIDGE_BOARD_SETUP("030-0873-", bridge_setup_menet), + BRIDGE_BOARD_SETUP("030-1557-", bridge_setup_io7), + BRIDGE_BOARD_SETUP("030-1673-", bridge_setup_io8), + BRIDGE_BOARD_SETUP("030-1771-", bridge_setup_io9), + BRIDGE_BOARD_SETUP("030-1707-", bridge_setup_ip34_fuel_sysboard), }; static void bridge_setup_board(struct bridge_controller *bc, char *partnum) @@ -655,7 +695,11 @@ static int bridge_probe(struct platform_device *pdev) for (slot = 0; slot < 8; slot++) { bridge_set(bc, b_device[slot].reg, BRIDGE_DEV_SWAP_DIR); - bc->pci_int[slot] = -1; + bc->pci_int[slot][0] = -1; + bc->pci_int[slot][1] = -1; + /* default interrupt pin mapping */ + bc->int_mapping[slot][0] = slot; + bc->int_mapping[slot][1] = slot ^ 4; } bridge_read(bc, b_wid_tflush); /* wait until Bridge PIO complete */ diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c index 0ddeb31afa93..bdf53807d7c2 100644 --- a/arch/mips/ralink/ill_acc.c +++ b/arch/mips/ralink/ill_acc.c @@ -67,11 +67,13 @@ static int __init ill_acc_of_setup(void) irq = irq_of_parse_and_map(np, 0); if (!irq) { dev_err(&pdev->dev, "failed to get irq\n"); + put_device(&pdev->dev); return -EINVAL; } if (request_irq(irq, ill_acc_irq_handler, 0, "ill_acc", &pdev->dev)) { dev_err(&pdev->dev, "failed to request irq\n"); + put_device(&pdev->dev); return -EINVAL; } diff --git a/arch/mips/sgi-ip22/ip22-gio.c b/arch/mips/sgi-ip22/ip22-gio.c index 282b47c2dc27..de0768a49ee8 100644 --- a/arch/mips/sgi-ip22/ip22-gio.c +++ b/arch/mips/sgi-ip22/ip22-gio.c @@ -47,8 +47,9 @@ static struct device gio_bus = { * Used by a driver to check whether an of_device present in the * system is in its list of supported devices. */ -const struct gio_device_id *gio_match_device(const struct gio_device_id *match, - const struct gio_device *dev) +static const struct gio_device_id * +gio_match_device(const struct gio_device_id *match, + const struct gio_device *dev) { const struct gio_device_id *ids; @@ -58,7 +59,6 @@ const struct gio_device_id *gio_match_device(const struct gio_device_id *match, return NULL; } -EXPORT_SYMBOL_GPL(gio_match_device); struct gio_device *gio_dev_get(struct gio_device *dev) { diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c index 73ad29b180fb..5a38ae6bdfa9 100644 --- a/arch/mips/sgi-ip27/ip27-berr.c +++ b/arch/mips/sgi-ip27/ip27-berr.c @@ -16,8 +16,8 @@ #include <asm/ptrace.h> #include <asm/sn/addrs.h> +#include <asm/sn/agent.h> #include <asm/sn/arch.h> -#include <asm/sn/sn0/hub.h> #include <asm/tlbdebug.h> #include <asm/traps.h> #include <linux/uaccess.h> @@ -30,29 +30,31 @@ static void dump_hub_information(unsigned long errst0, unsigned long errst1) { "WERR", "Uncached Partial Write", "PWERR", "Write Timeout", NULL, NULL, NULL, NULL } }; - int wrb = errst1 & PI_ERR_ST1_WRBRRB_MASK; + union pi_err_stat0 st0; + union pi_err_stat1 st1; - if (!(errst0 & PI_ERR_ST0_VALID_MASK)) { - printk("Hub does not contain valid error information\n"); + st0.pi_stat0_word = errst0; + st1.pi_stat1_word = errst1; + + if (!st0.pi_stat0_fmt.s0_valid) { + pr_info("Hub does not contain valid error information\n"); return; } - - printk("Hub has valid error information:\n"); - if (errst0 & PI_ERR_ST0_OVERRUN_MASK) - printk("Overrun is set. Error stack may contain additional " + pr_info("Hub has valid error information:\n"); + if (st0.pi_stat0_fmt.s0_ovr_run) + pr_info("Overrun is set. Error stack may contain additional " "information.\n"); - printk("Hub error address is %08lx\n", - (errst0 & PI_ERR_ST0_ADDR_MASK) >> (PI_ERR_ST0_ADDR_SHFT - 3)); - printk("Incoming message command 0x%lx\n", - (errst0 & PI_ERR_ST0_CMD_MASK) >> PI_ERR_ST0_CMD_SHFT); - printk("Supplemental field of incoming message is 0x%lx\n", - (errst0 & PI_ERR_ST0_SUPPL_MASK) >> PI_ERR_ST0_SUPPL_SHFT); - printk("T5 Rn (for RRB only) is 0x%lx\n", - (errst0 & PI_ERR_ST0_REQNUM_MASK) >> PI_ERR_ST0_REQNUM_SHFT); - printk("Error type is %s\n", err_type[wrb] - [(errst0 & PI_ERR_ST0_TYPE_MASK) >> PI_ERR_ST0_TYPE_SHFT] - ? : "invalid"); + pr_info("Hub error address is %08lx\n", + (unsigned long)st0.pi_stat0_fmt.s0_addr); + pr_info("Incoming message command 0x%lx\n", + (unsigned long)st0.pi_stat0_fmt.s0_cmd); + pr_info("Supplemental field of incoming message is 0x%lx\n", + (unsigned long)st0.pi_stat0_fmt.s0_supl); + pr_info("T5 Rn (for RRB only) is 0x%lx\n", + (unsigned long)st0.pi_stat0_fmt.s0_t5_req); + pr_info("Error type is %s\n", err_type[st1.pi_stat1_fmt.s1_rw_rb] + [st0.pi_stat0_fmt.s0_err_type] ? : "invalid"); } int ip27_be_handler(struct pt_regs *regs, int is_fixup) diff --git a/arch/mips/sgi-ip27/ip27-common.h b/arch/mips/sgi-ip27/ip27-common.h index 3ffbcf9bfd41..ed008a08464c 100644 --- a/arch/mips/sgi-ip27/ip27-common.h +++ b/arch/mips/sgi-ip27/ip27-common.h @@ -3,8 +3,18 @@ #ifndef __IP27_COMMON_H #define __IP27_COMMON_H -extern void ip27_reboot_setup(void); +extern nasid_t master_nasid; + +extern void cpu_node_probe(void); extern void hub_rt_clock_event_init(void); +extern void hub_rtc_init(nasid_t nasid); +extern void install_cpu_nmi_handler(int slice); +extern void install_ipi(void); +extern void ip27_reboot_setup(void); extern const struct plat_smp_ops ip27_smp_ops; +extern unsigned long node_getfirstfree(nasid_t nasid); +extern void per_cpu_init(void); +extern void replicate_kernel_text(void); +extern void setup_replication_mask(void); #endif /* __IP27_COMMON_H */ diff --git a/arch/mips/sgi-ip27/ip27-console.c b/arch/mips/sgi-ip27/ip27-console.c index 5886bee89d06..7737a88c6569 100644 --- a/arch/mips/sgi-ip27/ip27-console.c +++ b/arch/mips/sgi-ip27/ip27-console.c @@ -9,14 +9,15 @@ #include <asm/page.h> #include <asm/setup.h> #include <asm/sn/addrs.h> -#include <asm/sn/sn0/hub.h> +#include <asm/sn/agent.h> #include <asm/sn/klconfig.h> #include <asm/sn/ioc3.h> -#include <asm/sn/sn_private.h> #include <linux/serial.h> #include <linux/serial_core.h> +#include "ip27-common.h" + #define IOC3_CLK (22000000 / 3) #define IOC3_FLAGS (0) diff --git a/arch/mips/sgi-ip27/ip27-hubio.c b/arch/mips/sgi-ip27/ip27-hubio.c index a538d0ceb61d..8352eb6403b4 100644 --- a/arch/mips/sgi-ip27/ip27-hubio.c +++ b/arch/mips/sgi-ip27/ip27-hubio.c @@ -11,7 +11,9 @@ #include <linux/mmzone.h> #include <asm/sn/addrs.h> #include <asm/sn/arch.h> -#include <asm/sn/hub.h> +#include <asm/sn/agent.h> +#include <asm/sn/io.h> +#include <asm/xtalk/xtalk.h> static int force_fire_and_forget = 1; @@ -82,7 +84,7 @@ unsigned long hub_pio_map(nasid_t nasid, xwidgetnum_t widget, */ static void hub_setup_prb(nasid_t nasid, int prbnum, int credits) { - iprb_t prb; + union iprb_u prb; int prb_offset; /* @@ -135,7 +137,7 @@ static void hub_setup_prb(nasid_t nasid, int prbnum, int credits) static void hub_set_piomode(nasid_t nasid) { u64 ii_iowa; - hubii_wcr_t ii_wcr; + union hubii_wcr_u ii_wcr; unsigned i; ii_iowa = REMOTE_HUB_L(nasid, IIO_OUTWIDGET_ACCESS); diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c index f597e1ee2df7..32bcb8d1dd88 100644 --- a/arch/mips/sgi-ip27/ip27-init.c +++ b/arch/mips/sgi-ip27/ip27-init.c @@ -19,23 +19,18 @@ #include <asm/pgtable.h> #include <asm/sgialib.h> #include <asm/time.h> +#include <asm/sn/agent.h> #include <asm/sn/types.h> -#include <asm/sn/sn0/addrs.h> -#include <asm/sn/sn0/hubni.h> -#include <asm/sn/sn0/hubio.h> #include <asm/sn/klconfig.h> #include <asm/sn/ioc3.h> #include <asm/mipsregs.h> #include <asm/sn/gda.h> -#include <asm/sn/hub.h> #include <asm/sn/intr.h> #include <asm/current.h> #include <asm/processor.h> #include <asm/mmu_context.h> #include <asm/thread_info.h> #include <asm/sn/launch.h> -#include <asm/sn/sn_private.h> -#include <asm/sn/sn0/ip27.h> #include <asm/sn/mapped_kernel.h> #include "ip27-common.h" @@ -77,18 +72,14 @@ static void per_hub_init(nasid_t nasid) void per_cpu_init(void) { int cpu = smp_processor_id(); - int slice = LOCAL_HUB_L(PI_CPU_NUM); nasid_t nasid = get_nasid(); - struct hub_data *hub = hub_data(nasid); - - if (test_and_set_bit(slice, &hub->slice_map)) - return; clear_c0_status(ST0_IM); per_hub_init(nasid); - cpu_time_init(); + pr_info("CPU %d clock is %dMHz.\n", cpu, sn_cpu_info[cpu].p_speed); + install_ipi(); /* Install our NMI handler if symmon hasn't installed one. */ @@ -98,16 +89,6 @@ void per_cpu_init(void) enable_percpu_irq(IP27_HUB_PEND1_IRQ, IRQ_TYPE_NONE); } -/* - * get_nasid() returns the physical node id number of the caller. - */ -nasid_t -get_nasid(void) -{ - return (nasid_t)((LOCAL_HUB_L(NI_STATUS_REV_ID) & NSRI_NODEID_MASK) - >> NSRI_NODEID_SHFT); -} - void __init plat_mem_setup(void) { u64 p, e, n_mode; diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c index c72ae330ea93..42df9fafa943 100644 --- a/arch/mips/sgi-ip27/ip27-irq.c +++ b/arch/mips/sgi-ip27/ip27-irq.c @@ -19,7 +19,6 @@ #include <asm/sn/addrs.h> #include <asm/sn/agent.h> #include <asm/sn/arch.h> -#include <asm/sn/hub.h> #include <asm/sn/intr.h> #include <asm/sn/irq_alloc.h> @@ -288,11 +287,9 @@ void __init arch_init_irq(void) * Mark these as reserved right away so they won't be used accidentally * later. */ - for (i = 0; i <= BASE_PCI_IRQ; i++) + for (i = 0; i <= CPU_CALL_B_IRQ; i++) set_bit(i, hub_irq_map); - set_bit(IP_PEND0_6_63, hub_irq_map); - for (i = NI_BRDCAST_ERR_A; i <= MSC_PANIC_INTR; i++) set_bit(i, hub_irq_map); diff --git a/arch/mips/sgi-ip27/ip27-klconfig.c b/arch/mips/sgi-ip27/ip27-klconfig.c index 6cb2160e7689..81a1646e609a 100644 --- a/arch/mips/sgi-ip27/ip27-klconfig.c +++ b/arch/mips/sgi-ip27/ip27-klconfig.c @@ -72,54 +72,3 @@ lboard_t *find_lboard_class(lboard_t *start, unsigned char brd_type) /* Didn't find it. */ return (lboard_t *)NULL; } - -klcpu_t *nasid_slice_to_cpuinfo(nasid_t nasid, int slice) -{ - lboard_t *brd; - klcpu_t *acpu; - - if (!(brd = find_lboard((lboard_t *)KL_CONFIG_INFO(nasid), KLTYPE_IP27))) - return (klcpu_t *)NULL; - - if (!(acpu = (klcpu_t *)find_first_component(brd, KLSTRUCT_CPU))) - return (klcpu_t *)NULL; - - do { - if ((acpu->cpu_info.physid) == slice) - return acpu; - } while ((acpu = (klcpu_t *)find_component(brd, (klinfo_t *)acpu, - KLSTRUCT_CPU))); - return (klcpu_t *)NULL; -} - -klcpu_t *sn_get_cpuinfo(cpuid_t cpu) -{ - nasid_t nasid; - int slice; - klcpu_t *acpu; - - if (!(cpu < MAXCPUS)) { - printk("sn_get_cpuinfo: illegal cpuid 0x%lx\n", cpu); - return NULL; - } - - nasid = cputonasid(cpu); - if (nasid == INVALID_NASID) - return NULL; - - for (slice = 0; slice < CPUS_PER_NODE; slice++) { - acpu = nasid_slice_to_cpuinfo(nasid, slice); - if (acpu && acpu->cpu_info.virtid == cpu) - return acpu; - } - return NULL; -} - -int get_cpu_slice(cpuid_t cpu) -{ - klcpu_t *acpu; - - if ((acpu = sn_get_cpuinfo(cpu)) == NULL) - return -1; - return acpu->cpu_info.physid; -} diff --git a/arch/mips/sgi-ip27/ip27-klnuma.c b/arch/mips/sgi-ip27/ip27-klnuma.c index ee1c6ff4aa00..abd7a84df7dd 100644 --- a/arch/mips/sgi-ip27/ip27-klnuma.c +++ b/arch/mips/sgi-ip27/ip27-klnuma.c @@ -16,11 +16,11 @@ #include <asm/sn/types.h> #include <asm/sn/arch.h> #include <asm/sn/gda.h> -#include <asm/sn/hub.h> #include <asm/sn/mapped_kernel.h> -#include <asm/sn/sn_private.h> -static cpumask_t ktext_repmask; +#include "ip27-common.h" + +static nodemask_t ktext_repmask; /* * XXX - This needs to be much smarter about where it puts copies of the @@ -30,8 +30,8 @@ static cpumask_t ktext_repmask; void __init setup_replication_mask(void) { /* Set only the master cnode's bit. The master cnode is always 0. */ - cpumask_clear(&ktext_repmask); - cpumask_set_cpu(0, &ktext_repmask); + nodes_clear(ktext_repmask); + node_set(0, ktext_repmask); #ifdef CONFIG_REPLICATE_KTEXT #ifndef CONFIG_MAPPED_KERNEL @@ -44,7 +44,7 @@ void __init setup_replication_mask(void) if (nasid == 0) continue; /* Advertise that we have a copy of the kernel */ - cpumask_set_cpu(nasid, &ktext_repmask); + node_set(nasid, ktext_repmask); } } #endif @@ -98,7 +98,7 @@ void __init replicate_kernel_text(void) continue; /* Check if this node should get a copy of the kernel */ - if (cpumask_test_cpu(client_nasid, &ktext_repmask)) { + if (node_isset(client_nasid, ktext_repmask)) { server_nasid = client_nasid; copy_kernel(server_nasid); } @@ -122,7 +122,7 @@ unsigned long node_getfirstfree(nasid_t nasid) loadbase += 16777216; #endif offset = PAGE_ALIGN((unsigned long)(&_end)) - loadbase; - if ((nasid == 0) || (cpumask_test_cpu(nasid, &ktext_repmask))) + if ((nasid == 0) || (node_isset(nasid, ktext_repmask))) return TO_NODE(nasid, offset) >> PAGE_SHIFT; else return KDM_TO_PHYS(PAGE_ALIGN(SYMMON_STK_ADDR(nasid, 0))) >> PAGE_SHIFT; diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index 563aad5e6398..a45691e6ab90 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -25,10 +25,10 @@ #include <asm/sections.h> #include <asm/sn/arch.h> -#include <asm/sn/hub.h> +#include <asm/sn/agent.h> #include <asm/sn/klconfig.h> -#include <asm/sn/sn_private.h> +#include "ip27-common.h" #define SLOT_PFNSHIFT (SLOT_SHIFT - PAGE_SHIFT) #define PFN_NASIDSHFT (NASID_SHFT - PAGE_SHIFT) @@ -37,31 +37,18 @@ struct node_data *__node_data[MAX_NUMNODES]; EXPORT_SYMBOL(__node_data); -static int fine_mode; - -static int is_fine_dirmode(void) -{ - return ((LOCAL_HUB_L(NI_STATUS_REV_ID) & NSRI_REGIONSIZE_MASK) >> NSRI_REGIONSIZE_SHFT) & REGIONSIZE_FINE; -} - -static u64 get_region(nasid_t nasid) -{ - if (fine_mode) - return nasid >> NASID_TO_FINEREG_SHFT; - else - return nasid >> NASID_TO_COARSEREG_SHFT; -} - -static u64 region_mask; - -static void gen_region_mask(u64 *region_mask) +static u64 gen_region_mask(void) { + int region_shift; + u64 region_mask; nasid_t nasid; - (*region_mask) = 0; - for_each_online_node(nasid) { - (*region_mask) |= 1ULL << get_region(nasid); - } + region_shift = get_region_shift(); + region_mask = 0; + for_each_online_node(nasid) + region_mask |= BIT_ULL(nasid >> region_shift); + + return region_mask; } #define rou_rflag rou_flags @@ -148,25 +135,25 @@ static int __init compute_node_distance(nasid_t nasid_a, nasid_t nasid_b) } while ((brd = find_lboard_class(KLCF_NEXT(brd), KLTYPE_ROUTER))); } + if (nasid_a == nasid_b) + return LOCAL_DISTANCE; + + if (router_a == router_b) + return LOCAL_DISTANCE + 1; + if (router_a == NULL) { pr_info("node_distance: router_a NULL\n"); - return -1; + return 255; } if (router_b == NULL) { pr_info("node_distance: router_b NULL\n"); - return -1; + return 255; } - if (nasid_a == nasid_b) - return 0; - - if (router_a == router_b) - return 1; - router_distance = 100; router_recurse(router_a, router_b, 2); - return router_distance; + return LOCAL_DISTANCE + router_distance; } static void __init init_topology_matrix(void) @@ -281,10 +268,10 @@ static unsigned long __init slot_psize_compute(nasid_t nasid, int slot) static void __init mlreset(void) { + u64 region_mask; nasid_t nasid; master_nasid = get_nasid(); - fine_mode = is_fine_dirmode(); /* * Probe for all CPUs - this creates the cpumask and sets up the @@ -297,7 +284,7 @@ static void __init mlreset(void) init_topology_matrix(); dump_topology(); - gen_region_mask(®ion_mask); + region_mask = gen_region_mask(); setup_replication_mask(); diff --git a/arch/mips/sgi-ip27/ip27-nmi.c b/arch/mips/sgi-ip27/ip27-nmi.c index daf3670d94e7..84889b57d5ff 100644 --- a/arch/mips/sgi-ip27/ip27-nmi.c +++ b/arch/mips/sgi-ip27/ip27-nmi.c @@ -9,7 +9,7 @@ #include <asm/sn/addrs.h> #include <asm/sn/nmi.h> #include <asm/sn/arch.h> -#include <asm/sn/sn0/hub.h> +#include <asm/sn/agent.h> #if 0 #define NODE_NUM_CPUS(n) CNODE_NUM_CPUS(n) @@ -17,6 +17,9 @@ #define NODE_NUM_CPUS(n) CPUS_PER_NODE #endif +#define SEND_NMI(_nasid, _slice) \ + REMOTE_HUB_S((_nasid), (PI_NMI_A + ((_slice) * PI_NMI_OFFSET)), 1) + typedef unsigned long machreg_t; static arch_spinlock_t nmi_lock = __ARCH_SPIN_LOCK_UNLOCKED; diff --git a/arch/mips/sgi-ip27/ip27-reset.c b/arch/mips/sgi-ip27/ip27-reset.c index 74d078247e49..5ac5ad638734 100644 --- a/arch/mips/sgi-ip27/ip27-reset.c +++ b/arch/mips/sgi-ip27/ip27-reset.c @@ -22,9 +22,9 @@ #include <asm/reboot.h> #include <asm/sgialib.h> #include <asm/sn/addrs.h> +#include <asm/sn/agent.h> #include <asm/sn/arch.h> #include <asm/sn/gda.h> -#include <asm/sn/sn0/hub.h> #include "ip27-common.h" diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c index faa0244c8b0c..5d2652a1d35a 100644 --- a/arch/mips/sgi-ip27/ip27-smp.c +++ b/arch/mips/sgi-ip27/ip27-smp.c @@ -15,36 +15,22 @@ #include <asm/page.h> #include <asm/processor.h> #include <asm/ptrace.h> +#include <asm/sn/agent.h> #include <asm/sn/arch.h> #include <asm/sn/gda.h> #include <asm/sn/intr.h> #include <asm/sn/klconfig.h> #include <asm/sn/launch.h> #include <asm/sn/mapped_kernel.h> -#include <asm/sn/sn_private.h> #include <asm/sn/types.h> -#include <asm/sn/sn0/hubpi.h> -#include <asm/sn/sn0/hubio.h> -#include <asm/sn/sn0/ip27.h> #include "ip27-common.h" -/* - * Takes as first input the PROM assigned cpu id, and the kernel - * assigned cpu id as the second. - */ -static void alloc_cpupda(nasid_t nasid, cpuid_t cpu, int cpunum) -{ - cputonasid(cpunum) = nasid; - cputoslice(cpunum) = get_cpu_slice(cpu); -} - -static int do_cpumask(nasid_t nasid, int highest) +static int node_scan_cpus(nasid_t nasid, int highest) { - static int tot_cpus_found = 0; + static int cpus_found; lboard_t *brd; klcpu_t *acpu; - int cpus_found = 0; cpuid_t cpuid; brd = find_lboard((lboard_t *)KL_CONFIG_INFO(nasid), KLTYPE_IP27); @@ -55,13 +41,15 @@ static int do_cpumask(nasid_t nasid, int highest) cpuid = acpu->cpu_info.virtid; /* Only let it join in if it's marked enabled */ if ((acpu->cpu_info.flags & KLINFO_ENABLE) && - (tot_cpus_found != NR_CPUS)) { + (cpus_found != NR_CPUS)) { if (cpuid > highest) highest = cpuid; set_cpu_possible(cpuid, true); - alloc_cpupda(nasid, cpuid, tot_cpus_found); + cputonasid(cpus_found) = nasid; + cputoslice(cpus_found) = acpu->cpu_info.physid; + sn_cpu_info[cpus_found].p_speed = + acpu->cpu_speed; cpus_found++; - tot_cpus_found++; } acpu = (klcpu_t *)find_component(brd, (klinfo_t *)acpu, KLSTRUCT_CPU); @@ -87,7 +75,7 @@ void cpu_node_probe(void) if (nasid == INVALID_NASID) break; node_set_online(nasid); - highest = do_cpumask(nasid, highest); + highest = node_scan_cpus(nasid, highest); } printk("Discovered %d cpus on %d nodes\n", highest + 1, num_online_nodes()); @@ -180,7 +168,8 @@ static void __init ip27_smp_setup(void) /* * PROM sets up system, that boot cpu is always first CPU on nasid 0 */ - alloc_cpupda(0, 0, 0); + cputonasid(0) = 0; + cputoslice(0) = LOCAL_HUB_L(PI_CPU_NUM); } static void __init ip27_prepare_cpus(unsigned int max_cpus) diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c index 17302bbfa7a6..61f3565f3645 100644 --- a/arch/mips/sgi-ip27/ip27-timer.c +++ b/arch/mips/sgi-ip27/ip27-timer.c @@ -25,17 +25,14 @@ #include <asm/sn/klconfig.h> #include <asm/sn/arch.h> #include <asm/sn/addrs.h> -#include <asm/sn/sn_private.h> -#include <asm/sn/sn0/ip27.h> -#include <asm/sn/sn0/hub.h> +#include <asm/sn/agent.h> + +#include "ip27-common.h" #define TICK_SIZE (tick_nsec / 1000) /* Includes for ioc3_init(). */ #include <asm/sn/types.h> -#include <asm/sn/sn0/addrs.h> -#include <asm/sn/sn0/hubni.h> -#include <asm/sn/sn0/hubio.h> #include <asm/pci/bridge.h> #include "ip27-common.h" @@ -153,25 +150,6 @@ void __init plat_time_init(void) hub_rt_clock_event_init(); } -void cpu_time_init(void) -{ - lboard_t *board; - klcpu_t *cpu; - int cpuid; - - /* Don't use ARCS. ARCS is fragile. Klconfig is simple and sane. */ - board = find_lboard(KL_CONFIG_INFO(get_nasid()), KLTYPE_IP27); - if (!board) - panic("Can't find board info for myself."); - - cpuid = LOCAL_HUB_L(PI_CPU_NUM) ? IP27_CPU0_INDEX : IP27_CPU1_INDEX; - cpu = (klcpu_t *) KLCF_COMP(board, cpuid); - if (!cpu) - panic("No information about myself?"); - - printk("CPU %d clock is %dMHz.\n", smp_processor_id(), cpu->cpu_speed); -} - void hub_rtc_init(nasid_t nasid) { @@ -190,23 +168,3 @@ void hub_rtc_init(nasid_t nasid) LOCAL_HUB_S(PI_RT_PEND_B, 0); } } - -static int __init sgi_ip27_rtc_devinit(void) -{ - struct resource res; - - memset(&res, 0, sizeof(res)); - res.start = XPHYSADDR(KL_CONFIG_CH_CONS_INFO(master_nasid)->memory_base + - IOC3_BYTEBUS_DEV0); - res.end = res.start + 32767; - res.flags = IORESOURCE_MEM; - - return IS_ERR(platform_device_register_simple("rtc-m48t35", -1, - &res, 1)); -} - -/* - * kludge make this a device_initcall after ioc3 resource conflicts - * are resolved - */ -late_initcall(sgi_ip27_rtc_devinit); diff --git a/arch/mips/sgi-ip27/ip27-xtalk.c b/arch/mips/sgi-ip27/ip27-xtalk.c index 5218b900f855..000ede156bdc 100644 --- a/arch/mips/sgi-ip27/ip27-xtalk.c +++ b/arch/mips/sgi-ip27/ip27-xtalk.c @@ -15,7 +15,6 @@ #include <asm/sn/addrs.h> #include <asm/sn/types.h> #include <asm/sn/klconfig.h> -#include <asm/sn/hub.h> #include <asm/pci/bridge.h> #include <asm/xtalk/xtalk.h> diff --git a/arch/mips/sgi-ip30/ip30-irq.c b/arch/mips/sgi-ip30/ip30-irq.c index d46655b914f1..c2ffcb920250 100644 --- a/arch/mips/sgi-ip30/ip30-irq.c +++ b/arch/mips/sgi-ip30/ip30-irq.c @@ -232,9 +232,10 @@ static void heart_domain_free(struct irq_domain *domain, return; irqd = irq_domain_get_irq_data(domain, virq); - clear_bit(irqd->hwirq, heart_irq_map); - if (irqd && irqd->chip_data) + if (irqd) { + clear_bit(irqd->hwirq, heart_irq_map); kfree(irqd->chip_data); + } } static const struct irq_domain_ops heart_domain_ops = { diff --git a/arch/mips/vdso/genvdso.c b/arch/mips/vdso/genvdso.c index b66b6b1c4aeb..be57b832bbe0 100644 --- a/arch/mips/vdso/genvdso.c +++ b/arch/mips/vdso/genvdso.c @@ -251,6 +251,18 @@ int main(int argc, char **argv) fprintf(out_file, "#include <linux/linkage.h>\n"); fprintf(out_file, "#include <linux/mm.h>\n"); fprintf(out_file, "#include <asm/vdso.h>\n"); + fprintf(out_file, "static int vdso_mremap(\n"); + fprintf(out_file, " const struct vm_special_mapping *sm,\n"); + fprintf(out_file, " struct vm_area_struct *new_vma)\n"); + fprintf(out_file, "{\n"); + fprintf(out_file, " unsigned long new_size =\n"); + fprintf(out_file, " new_vma->vm_end - new_vma->vm_start;\n"); + fprintf(out_file, " if (vdso_image.size != new_size)\n"); + fprintf(out_file, " return -EINVAL;\n"); + fprintf(out_file, " current->mm->context.vdso =\n"); + fprintf(out_file, " (void __user *)(new_vma->vm_start);\n"); + fprintf(out_file, " return 0;\n"); + fprintf(out_file, "}\n"); /* Write out the stripped VDSO data. */ fprintf(out_file, @@ -275,6 +287,7 @@ int main(int argc, char **argv) fprintf(out_file, "\t.mapping = {\n"); fprintf(out_file, "\t\t.name = \"[vdso]\",\n"); fprintf(out_file, "\t\t.pages = vdso_pages,\n"); + fprintf(out_file, "\t\t.mremap = vdso_mremap,\n"); fprintf(out_file, "\t},\n"); /* Calculate and write symbol offsets to <output file> */ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e7c607059f54..c150a9d49343 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -238,6 +238,7 @@ config PPC select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE select NEED_SG_DMA_LENGTH select OF + select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE select OF_EARLY_FLATTREE select OLD_SIGACTION if PPC32 select OLD_SIGSUSPEND diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 13bd870609c3..e90c073e437e 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -350,6 +350,7 @@ #define H_SVM_PAGE_OUT 0xEF04 #define H_SVM_INIT_START 0xEF08 #define H_SVM_INIT_DONE 0xEF0C +#define H_SVM_INIT_ABORT 0xEF14 /* Values for 2nd argument to H_SET_MODE */ #define H_SET_MODE_RESOURCE_SET_CIABR 1 diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h index 50204e228f16..5a9834e0e2d1 100644 --- a/arch/powerpc/include/asm/kvm_book3s_uvmem.h +++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h @@ -19,8 +19,9 @@ unsigned long kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long kvmppc_h_svm_init_start(struct kvm *kvm); unsigned long kvmppc_h_svm_init_done(struct kvm *kvm); int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn); +unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm); void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, - struct kvm *kvm); + struct kvm *kvm, bool skip_page_out); #else static inline int kvmppc_uvmem_init(void) { @@ -62,6 +63,11 @@ static inline unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) return H_UNSUPPORTED; } +static inline unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm) +{ + return H_UNSUPPORTED; +} + static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn) { return -EFAULT; @@ -69,6 +75,6 @@ static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn) static inline void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, - struct kvm *kvm) { } + struct kvm *kvm, bool skip_page_out) { } #endif /* CONFIG_PPC_UV */ #endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 0a398f2321c2..6e8b8ffd06ad 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -278,6 +278,7 @@ struct kvm_resize_hpt; /* Flag values for kvm_arch.secure_guest */ #define KVMPPC_SECURE_INIT_START 0x1 /* H_SVM_INIT_START has been called */ #define KVMPPC_SECURE_INIT_DONE 0x2 /* H_SVM_INIT_DONE completed */ +#define KVMPPC_SECURE_INIT_ABORT 0x4 /* H_SVM_INIT_ABORT issued */ struct kvm_arch { unsigned int lpid; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 3d2f871241a8..bc2494e5710a 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -119,8 +119,7 @@ extern int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid, enum xlate_readwrite xlrw, struct kvmppc_pte *pte); -extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, - unsigned int id); +extern int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu); extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu); extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu); extern int kvmppc_core_check_processor_compat(void); @@ -274,7 +273,7 @@ struct kvmppc_ops { void (*inject_interrupt)(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags); void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr); int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); - struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned int id); + int (*vcpu_create)(struct kvm_vcpu *vcpu); void (*vcpu_free)(struct kvm_vcpu *vcpu); int (*check_requests)(struct kvm_vcpu *vcpu); int (*get_dirty_log)(struct kvm *kvm, struct kvm_dirty_log *log); diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 58efca934311..360367c579de 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -238,11 +238,6 @@ static inline void arch_unmap(struct mm_struct *mm, mm->context.vdso_base = 0; } -static inline void arch_bprm_mm_init(struct mm_struct *mm, - struct vm_area_struct *vma) -{ -} - #ifdef CONFIG_PPC_MEM_KEYS bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, bool execute, bool foreign); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 58a59ee998e2..d07a8e12fa15 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -471,11 +471,6 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(kvmppc_load_last_inst); -int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) -{ - return 0; -} - int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) { return 0; @@ -789,9 +784,9 @@ void kvmppc_decrementer_func(struct kvm_vcpu *vcpu) kvm_vcpu_kick(vcpu); } -struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu) { - return kvm->arch.kvm_ops->vcpu_create(kvm, id); + return vcpu->kvm->arch.kvm_ops->vcpu_create(vcpu); } void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index d381526c5c9b..6c372f5c61b6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -284,7 +284,7 @@ static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, /* Protect linux PTE lookup from page table destruction */ rcu_read_lock_sched(); /* this disables preemption too */ ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel, - current->mm->pgd, false, pte_idx_ret); + kvm->mm->pgd, false, pte_idx_ret); rcu_read_unlock_sched(); if (ret == H_TOO_HARD) { /* this can't happen */ @@ -573,7 +573,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, is_ci = false; pfn = 0; page = NULL; - mm = current->mm; + mm = kvm->mm; pte_size = PAGE_SIZE; writing = (dsisr & DSISR_ISSTORE) != 0; /* If writing != 0, then the HPTE must allow writing, if we get here */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index da857c8ba6e4..744dba98e5d1 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -1102,7 +1102,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm, unsigned int shift; if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START) - kvmppc_uvmem_drop_pages(memslot, kvm); + kvmppc_uvmem_drop_pages(memslot, kvm, true); if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) return; diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 883a66e76638..ee6c103bb7d5 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -253,10 +253,11 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) } } + account_locked_vm(kvm->mm, + kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false); + kvm_put_kvm(stt->kvm); - account_locked_vm(current->mm, - kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false); call_rcu(&stt->rcu, release_spapr_tce_table); return 0; @@ -272,6 +273,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, { struct kvmppc_spapr_tce_table *stt = NULL; struct kvmppc_spapr_tce_table *siter; + struct mm_struct *mm = kvm->mm; unsigned long npages, size = args->size; int ret = -ENOMEM; @@ -280,7 +282,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, return -EINVAL; npages = kvmppc_tce_pages(size); - ret = account_locked_vm(current->mm, kvmppc_stt_pages(npages), true); + ret = account_locked_vm(mm, kvmppc_stt_pages(npages), true); if (ret) return ret; @@ -326,7 +328,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, kfree(stt); fail_acct: - account_locked_vm(current->mm, kvmppc_stt_pages(npages), false); + account_locked_vm(mm, kvmppc_stt_pages(npages), false); return ret; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 6ff3f896d908..2cefd071b848 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1091,6 +1091,9 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) case H_SVM_INIT_DONE: ret = kvmppc_h_svm_init_done(vcpu->kvm); break; + case H_SVM_INIT_ABORT: + ret = kvmppc_h_svm_init_abort(vcpu->kvm); + break; default: return RESUME_HOST; @@ -2271,22 +2274,16 @@ static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id) } #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ -static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, - unsigned int id) +static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu; int err; int core; struct kvmppc_vcore *vcore; + struct kvm *kvm; + unsigned int id; - err = -ENOMEM; - vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu) - goto out; - - err = kvm_vcpu_init(vcpu, kvm, id); - if (err) - goto free_vcpu; + kvm = vcpu->kvm; + id = vcpu->vcpu_id; vcpu->arch.shared = &vcpu->arch.shregs; #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE @@ -2368,7 +2365,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, mutex_unlock(&kvm->lock); if (!vcore) - goto free_vcpu; + return err; spin_lock(&vcore->lock); ++vcore->num_threads; @@ -2383,12 +2380,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, debugfs_vcpu_init(vcpu, id); - return vcpu; - -free_vcpu: - kmem_cache_free(kvm_vcpu_cache, vcpu); -out: - return ERR_PTR(err); + return 0; } static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode, @@ -2442,8 +2434,6 @@ static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu) unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow); unpin_vpa(vcpu->kvm, &vcpu->arch.vpa); spin_unlock(&vcpu->arch.vpa_update_lock); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(kvm_vcpu_cache, vcpu); } static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu) @@ -4285,7 +4275,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) user_vrsave = mfspr(SPRN_VRSAVE); vcpu->arch.wqp = &vcpu->arch.vcore->wq; - vcpu->arch.pgdir = current->mm->pgd; + vcpu->arch.pgdir = kvm->mm->pgd; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; do { @@ -4640,14 +4630,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) /* Look up the VMA for the start of this memory slot */ hva = memslot->userspace_addr; - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, hva); + down_read(&kvm->mm->mmap_sem); + vma = find_vma(kvm->mm, hva); if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO)) goto up_out; psize = vma_kernel_pagesize(vma); - up_read(¤t->mm->mmap_sem); + up_read(&kvm->mm->mmap_sem); /* We can handle 4k, 64k or 16M pages in the VRMA */ if (psize >= 0x1000000) @@ -4680,7 +4670,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) return err; up_out: - up_read(¤t->mm->mmap_sem); + up_read(&kvm->mm->mmap_sem); goto out_srcu; } @@ -5477,7 +5467,7 @@ static int kvmhv_svm_off(struct kvm *kvm) continue; kvm_for_each_memslot(memslot, slots) { - kvmppc_uvmem_drop_pages(memslot, kvm); + kvmppc_uvmem_drop_pages(memslot, kvm, true); uv_unregister_mem_slot(kvm->arch.lpid, memslot->id); } } diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 2de264fc3156..79b1202b1c62 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -258,7 +258,7 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) * QEMU page table with normal PTEs from newly allocated pages. */ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, - struct kvm *kvm) + struct kvm *kvm, bool skip_page_out) { int i; struct kvmppc_uvmem_page_pvt *pvt; @@ -276,7 +276,7 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, uvmem_page = pfn_to_page(uvmem_pfn); pvt = uvmem_page->zone_device_data; - pvt->skip_page_out = true; + pvt->skip_page_out = skip_page_out; mutex_unlock(&kvm->arch.uvmem_lock); pfn = gfn_to_pfn(kvm, gfn); @@ -286,6 +286,34 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, } } +unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm) +{ + int srcu_idx; + struct kvm_memory_slot *memslot; + + /* + * Expect to be called only after INIT_START and before INIT_DONE. + * If INIT_DONE was completed, use normal VM termination sequence. + */ + if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) + return H_UNSUPPORTED; + + if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) + return H_STATE; + + srcu_idx = srcu_read_lock(&kvm->srcu); + + kvm_for_each_memslot(memslot, kvm_memslots(kvm)) + kvmppc_uvmem_drop_pages(memslot, kvm, false); + + srcu_read_unlock(&kvm->srcu, srcu_idx); + + kvm->arch.secure_guest = 0; + uv_svm_terminate(kvm->arch.lpid); + + return H_PARAMETER; +} + /* * Get a free device PFN from the pool * @@ -543,7 +571,7 @@ kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, ret = migrate_vma_setup(&mig); if (ret) - return ret; + goto out; spage = migrate_pfn_to_page(*mig.src); if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE)) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index ce4fcf76e53e..729a0f12a752 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1744,21 +1744,17 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, return r; } -static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, - unsigned int id) +static int kvmppc_core_vcpu_create_pr(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu_book3s; - struct kvm_vcpu *vcpu; - int err = -ENOMEM; unsigned long p; + int err; - vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu) - goto out; + err = -ENOMEM; vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s)); if (!vcpu_book3s) - goto free_vcpu; + goto out; vcpu->arch.book3s = vcpu_book3s; #ifdef CONFIG_KVM_BOOK3S_32_HANDLER @@ -1768,14 +1764,9 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, goto free_vcpu3s; #endif - err = kvm_vcpu_init(vcpu, kvm, id); - if (err) - goto free_shadow_vcpu; - - err = -ENOMEM; p = __get_free_page(GFP_KERNEL|__GFP_ZERO); if (!p) - goto uninit_vcpu; + goto free_shadow_vcpu; vcpu->arch.shared = (void *)p; #ifdef CONFIG_PPC_BOOK3S_64 /* Always start the shared struct in native endian mode */ @@ -1806,22 +1797,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, err = kvmppc_mmu_init(vcpu); if (err < 0) - goto uninit_vcpu; + goto free_shared_page; - return vcpu; + return 0; -uninit_vcpu: - kvm_vcpu_uninit(vcpu); +free_shared_page: + free_page((unsigned long)vcpu->arch.shared); free_shadow_vcpu: #ifdef CONFIG_KVM_BOOK3S_32_HANDLER kfree(vcpu->arch.shadow_vcpu); free_vcpu3s: #endif vfree(vcpu_book3s); -free_vcpu: - kmem_cache_free(kvm_vcpu_cache, vcpu); out: - return ERR_PTR(err); + return err; } static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu) @@ -1829,12 +1818,10 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu) struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); free_page((unsigned long)vcpu->arch.shared & PAGE_MASK); - kvm_vcpu_uninit(vcpu); #ifdef CONFIG_KVM_BOOK3S_32_HANDLER kfree(vcpu->arch.shadow_vcpu); #endif vfree(vcpu_book3s); - kmem_cache_free(kvm_vcpu_cache, vcpu); } static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) @@ -2030,6 +2017,7 @@ static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, { /* We should not get called */ BUG(); + return 0; } #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index d83adb1e1490..6ef0151ff70a 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -631,7 +631,7 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, srcu_idx = srcu_read_lock(&kvm->srcu); gfn = gpa_to_gfn(kvm_eq.qaddr); - page_size = kvm_host_page_size(kvm, gfn); + page_size = kvm_host_page_size(vcpu, gfn); if (1ull << kvm_eq.qshift > page_size) { srcu_read_unlock(&kvm->srcu, srcu_idx); pr_warn("Incompatible host page size %lx!\n", page_size); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index be9a45874194..7b27604adadf 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -775,7 +775,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) debug = current->thread.debug; current->thread.debug = vcpu->arch.dbg_reg; - vcpu->arch.pgdir = current->mm->pgd; + vcpu->arch.pgdir = vcpu->kvm->mm->pgd; kvmppc_fix_ee_before_entry(); ret = __kvmppc_vcpu_run(kvm_run, vcpu); @@ -1377,36 +1377,6 @@ static void kvmppc_set_tsr(struct kvm_vcpu *vcpu, u32 new_tsr) update_timer_ints(vcpu); } -/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ -int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) -{ - int i; - int r; - - vcpu->arch.regs.nip = 0; - vcpu->arch.shared->pir = vcpu->vcpu_id; - kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ - kvmppc_set_msr(vcpu, 0); - -#ifndef CONFIG_KVM_BOOKE_HV - vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS; - vcpu->arch.shadow_pid = 1; - vcpu->arch.shared->msr = 0; -#endif - - /* Eye-catching numbers so we know if the guest takes an interrupt - * before it's programmed its own IVPR/IVORs. */ - vcpu->arch.ivpr = 0x55550000; - for (i = 0; i < BOOKE_IRQPRIO_MAX; i++) - vcpu->arch.ivor[i] = 0x7700 | i * 4; - - kvmppc_init_timing_stats(vcpu); - - r = kvmppc_core_vcpu_setup(vcpu); - kvmppc_sanity_check(vcpu); - return r; -} - int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) { /* setup watchdog timer once */ @@ -2114,9 +2084,40 @@ int kvmppc_core_init_vm(struct kvm *kvm) return kvm->arch.kvm_ops->init_vm(kvm); } -struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu) { - return kvm->arch.kvm_ops->vcpu_create(kvm, id); + int i; + int r; + + r = vcpu->kvm->arch.kvm_ops->vcpu_create(vcpu); + if (r) + return r; + + /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ + vcpu->arch.regs.nip = 0; + vcpu->arch.shared->pir = vcpu->vcpu_id; + kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ + kvmppc_set_msr(vcpu, 0); + +#ifndef CONFIG_KVM_BOOKE_HV + vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS; + vcpu->arch.shadow_pid = 1; + vcpu->arch.shared->msr = 0; +#endif + + /* Eye-catching numbers so we know if the guest takes an interrupt + * before it's programmed its own IVPR/IVORs. */ + vcpu->arch.ivpr = 0x55550000; + for (i = 0; i < BOOKE_IRQPRIO_MAX; i++) + vcpu->arch.ivor[i] = 0x7700 | i * 4; + + kvmppc_init_timing_stats(vcpu); + + r = kvmppc_core_vcpu_setup(vcpu); + if (r) + vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu); + kvmppc_sanity_check(vcpu); + return r; } void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 00649ca5fa9a..f2b4feaff6d2 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -433,31 +433,16 @@ static int kvmppc_set_one_reg_e500(struct kvm_vcpu *vcpu, u64 id, return r; } -static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm, - unsigned int id) +static int kvmppc_core_vcpu_create_e500(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500; - struct kvm_vcpu *vcpu; int err; - BUILD_BUG_ON_MSG(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0, - "struct kvm_vcpu must be at offset 0 for arch usercopy region"); + BUILD_BUG_ON(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0); + vcpu_e500 = to_e500(vcpu); - vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu_e500) { - err = -ENOMEM; - goto out; - } - - vcpu = &vcpu_e500->vcpu; - err = kvm_vcpu_init(vcpu, kvm, id); - if (err) - goto free_vcpu; - - if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) { - err = -ENOMEM; - goto uninit_vcpu; - } + if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) + return -ENOMEM; err = kvmppc_e500_tlb_init(vcpu_e500); if (err) @@ -469,18 +454,13 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm, goto uninit_tlb; } - return vcpu; + return 0; uninit_tlb: kvmppc_e500_tlb_uninit(vcpu_e500); uninit_id: kvmppc_e500_id_table_free(vcpu_e500); -uninit_vcpu: - kvm_vcpu_uninit(vcpu); -free_vcpu: - kmem_cache_free(kvm_vcpu_cache, vcpu_e500); -out: - return ERR_PTR(err); + return err; } static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu) @@ -490,8 +470,6 @@ static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu) free_page((unsigned long)vcpu->arch.shared); kvmppc_e500_tlb_uninit(vcpu_e500); kvmppc_e500_id_table_free(vcpu_e500); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(kvm_vcpu_cache, vcpu_e500); } static int kvmppc_core_init_vm_e500(struct kvm *kvm) diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 318e65c65999..e6b06cb2b92c 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -301,30 +301,20 @@ static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id, return r; } -static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm, - unsigned int id) +static int kvmppc_core_vcpu_create_e500mc(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500; - struct kvm_vcpu *vcpu; int err; - vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu_e500) { - err = -ENOMEM; - goto out; - } - vcpu = &vcpu_e500->vcpu; + BUILD_BUG_ON(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0); + vcpu_e500 = to_e500(vcpu); /* Invalid PIR value -- this LPID dosn't have valid state on any cpu */ vcpu->arch.oldpir = 0xffffffff; - err = kvm_vcpu_init(vcpu, kvm, id); - if (err) - goto free_vcpu; - err = kvmppc_e500_tlb_init(vcpu_e500); if (err) - goto uninit_vcpu; + return err; vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); if (!vcpu->arch.shared) { @@ -332,17 +322,11 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm, goto uninit_tlb; } - return vcpu; + return 0; uninit_tlb: kvmppc_e500_tlb_uninit(vcpu_e500); -uninit_vcpu: - kvm_vcpu_uninit(vcpu); - -free_vcpu: - kmem_cache_free(kvm_vcpu_cache, vcpu_e500); -out: - return ERR_PTR(err); + return err; } static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu) @@ -351,8 +335,6 @@ static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu) free_page((unsigned long)vcpu->arch.shared); kvmppc_e500_tlb_uninit(vcpu_e500); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(kvm_vcpu_cache, vcpu_e500); } static int kvmppc_core_init_vm_e500mc(struct kvm *kvm) diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 2e496eb86e94..1139bc56e004 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -73,7 +73,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; u32 inst; - int ra, rs, rt; enum emulation_result emulated = EMULATE_FAIL; int advance = 1; struct instruction_op op; @@ -85,10 +84,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) if (emulated != EMULATE_DONE) return emulated; - ra = get_ra(inst); - rs = get_rs(inst); - rt = get_rt(inst); - vcpu->arch.mmio_vsx_copy_nums = 0; vcpu->arch.mmio_vsx_offset = 0; vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 416fb3d2a1d0..1af96fb5dc6f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -475,7 +475,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) #endif kvm_for_each_vcpu(i, vcpu, kvm) - kvm_arch_vcpu_free(vcpu); + kvm_vcpu_destroy(vcpu); mutex_lock(&kvm->lock); for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) @@ -720,22 +720,55 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, kvmppc_core_flush_memslot(kvm, slot); } -struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) +int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) +{ + return 0; +} + +static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) { struct kvm_vcpu *vcpu; - vcpu = kvmppc_core_vcpu_create(kvm, id); - if (!IS_ERR(vcpu)) { - vcpu->arch.wqp = &vcpu->wq; - kvmppc_create_vcpu_debugfs(vcpu, id); - } - return vcpu; + + vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer); + kvmppc_decrementer_func(vcpu); + + return HRTIMER_NORESTART; +} + +int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) +{ + int err; + + hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); + vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; + vcpu->arch.dec_expires = get_tb(); + +#ifdef CONFIG_KVM_EXIT_TIMING + mutex_init(&vcpu->arch.exit_timing_lock); +#endif + err = kvmppc_subarch_vcpu_init(vcpu); + if (err) + return err; + + err = kvmppc_core_vcpu_create(vcpu); + if (err) + goto out_vcpu_uninit; + + vcpu->arch.wqp = &vcpu->wq; + kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id); + return 0; + +out_vcpu_uninit: + kvmppc_mmu_destroy(vcpu); + kvmppc_subarch_vcpu_uninit(vcpu); + return err; } void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) { } -void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { /* Make sure we're not using the vcpu anymore */ hrtimer_cancel(&vcpu->arch.dec_timer); @@ -758,11 +791,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) } kvmppc_core_vcpu_free(vcpu); -} -void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) -{ - kvm_arch_vcpu_free(vcpu); + kvmppc_mmu_destroy(vcpu); + kvmppc_subarch_vcpu_uninit(vcpu); } int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) @@ -770,37 +801,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return kvmppc_core_pending_dec(vcpu); } -static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) -{ - struct kvm_vcpu *vcpu; - - vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer); - kvmppc_decrementer_func(vcpu); - - return HRTIMER_NORESTART; -} - -int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) -{ - int ret; - - hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); - vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; - vcpu->arch.dec_expires = get_tb(); - -#ifdef CONFIG_KVM_EXIT_TIMING - mutex_init(&vcpu->arch.exit_timing_lock); -#endif - ret = kvmppc_subarch_vcpu_init(vcpu); - return ret; -} - -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) -{ - kvmppc_mmu_destroy(vcpu); - kvmppc_subarch_vcpu_uninit(vcpu); -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { #ifdef CONFIG_BOOKE diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index fa7dc03459e7..73f029eae0cc 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -12,8 +12,6 @@ config 32BIT config RISCV def_bool y - # even on 32-bit, physical (and DMA) addresses are > 32-bits - select PHYS_ADDR_T_64BIT select OF select OF_EARLY_FLATTREE select OF_IRQ @@ -57,6 +55,7 @@ config RISCV select GENERIC_ARCH_TOPOLOGY if SMP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MMIOWB + select ARCH_HAS_DEBUG_VIRTUAL select HAVE_EBPF_JIT if 64BIT select EDAC_SUPPORT select ARCH_HAS_GIGANTIC_PAGE @@ -66,6 +65,7 @@ config RISCV select HAVE_ARCH_MMAP_RND_BITS if MMU select ARCH_HAS_GCOV_PROFILE_ALL select HAVE_COPY_THREAD_TLS + select HAVE_ARCH_KASAN if MMU && 64BIT config ARCH_MMAP_RND_BITS_MIN default 18 if 64BIT diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index a2e3d54e830c..7db861053483 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -268,6 +268,19 @@ interrupts = <1 2 3>; reg = <0x0 0x2010000 0x0 0x1000>; }; - + gpio: gpio@10060000 { + compatible = "sifive,fu540-c000-gpio", "sifive,gpio0"; + interrupt-parent = <&plic0>; + interrupts = <7>, <8>, <9>, <10>, <11>, <12>, <13>, + <14>, <15>, <16>, <17>, <18>, <19>, <20>, + <21>, <22>; + reg = <0x0 0x10060000 0x0 0x1000>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + clocks = <&prci PRCI_CLK_TLCLK>; + status = "disabled"; + }; }; }; diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 88cfcb96bf23..609198cb1163 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -94,3 +94,7 @@ &pwm1 { status = "okay"; }; + +&gpio { + status = "okay"; +}; diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h new file mode 100644 index 000000000000..eee6e6588b12 --- /dev/null +++ b/arch/riscv/include/asm/kasan.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 Andes Technology Corporation */ + +#ifndef __ASM_KASAN_H +#define __ASM_KASAN_H + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_KASAN + +#include <asm/pgtable.h> + +#define KASAN_SHADOW_SCALE_SHIFT 3 + +#define KASAN_SHADOW_SIZE (UL(1) << (38 - KASAN_SHADOW_SCALE_SHIFT)) +#define KASAN_SHADOW_START 0xffffffc000000000 /* 2^64 - 2^38 */ +#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) + +#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << \ + (64 - KASAN_SHADOW_SCALE_SHIFT))) + +void kasan_init(void); +asmlinkage void kasan_early_init(void); + +#endif +#endif +#endif /* __ASM_KASAN_H */ diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index ac699246ae7e..8ca1930caa44 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -100,8 +100,20 @@ extern unsigned long pfn_base; extern unsigned long max_low_pfn; extern unsigned long min_low_pfn; -#define __pa(x) ((unsigned long)(x) - va_pa_offset) -#define __va(x) ((void *)((unsigned long) (x) + va_pa_offset)) +#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset)) +#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset) + +#ifdef CONFIG_DEBUG_VIRTUAL +extern phys_addr_t __virt_to_phys(unsigned long x); +extern phys_addr_t __phys_addr_symbol(unsigned long x); +#else +#define __virt_to_phys(x) __va_to_pa_nodebug(x) +#define __phys_addr_symbol(x) __va_to_pa_nodebug(x) +#endif /* CONFIG_DEBUG_VIRTUAL */ + +#define __pa_symbol(x) __phys_addr_symbol(RELOC_HIDE((unsigned long)(x), 0)) +#define __pa(x) __virt_to_phys((unsigned long)(x)) +#define __va(x) ((void *)__pa_to_va_nodebug((phys_addr_t)(x))) #define phys_to_pfn(phys) (PFN_DOWN(phys)) #define pfn_to_phys(pfn) (PFN_PHYS(pfn)) diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 74630989006d..36e638d1dfe4 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -58,6 +58,11 @@ static inline unsigned long pud_page_vaddr(pud_t pud) return (unsigned long)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT); } +static inline struct page *pud_page(pud_t pud) +{ + return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT); +} + #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h index 1b5d44585962..924af13f8555 100644 --- a/arch/riscv/include/asm/string.h +++ b/arch/riscv/include/asm/string.h @@ -11,8 +11,17 @@ #define __HAVE_ARCH_MEMSET extern asmlinkage void *memset(void *, int, size_t); +extern asmlinkage void *__memset(void *, int, size_t); #define __HAVE_ARCH_MEMCPY extern asmlinkage void *memcpy(void *, const void *, size_t); +extern asmlinkage void *__memcpy(void *, const void *, size_t); +/* For those files which don't want to check by kasan. */ +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) + +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memset(s, c, n) __memset(s, c, n) + +#endif #endif /* _ASM_RISCV_STRING_H */ diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index a4242be66966..271860fc2c3f 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -121,6 +121,9 @@ clear_bss_done: sw zero, TASK_TI_CPU(tp) la sp, init_thread_union + THREAD_SIZE +#ifdef CONFIG_KASAN + call kasan_early_init +#endif /* Start the kernel */ call parse_dtb tail start_kernel diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c index 2a02b7eebee0..450492e1cb4e 100644 --- a/arch/riscv/kernel/riscv_ksyms.c +++ b/arch/riscv/kernel/riscv_ksyms.c @@ -11,3 +11,5 @@ */ EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(__memset); +EXPORT_SYMBOL(__memcpy); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 9babfcf3bb70..0a6d415b0a5a 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -24,6 +24,7 @@ #include <asm/smp.h> #include <asm/tlbflush.h> #include <asm/thread_info.h> +#include <asm/kasan.h> #include "head.h" @@ -74,6 +75,10 @@ void __init setup_arch(char **cmdline_p) swiotlb_init(1); #endif +#ifdef CONFIG_KASAN + kasan_init(); +#endif + #ifdef CONFIG_SMP setup_smp(); #endif diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 12f42f96d46e..1e0193ded420 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -46,6 +46,7 @@ SECTIONS KPROBES_TEXT ENTRY_TEXT IRQENTRY_TEXT + SOFTIRQENTRY_TEXT *(.fixup) _etext = .; } diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S index b4c477846e91..51ab716253fa 100644 --- a/arch/riscv/lib/memcpy.S +++ b/arch/riscv/lib/memcpy.S @@ -7,7 +7,8 @@ #include <asm/asm.h> /* void *memcpy(void *, const void *, size_t) */ -ENTRY(memcpy) +ENTRY(__memcpy) +WEAK(memcpy) move t6, a0 /* Preserve return value */ /* Defer to byte-oriented copy for small sizes */ @@ -104,4 +105,4 @@ ENTRY(memcpy) bltu a1, a3, 5b 6: ret -END(memcpy) +END(__memcpy) diff --git a/arch/riscv/lib/memset.S b/arch/riscv/lib/memset.S index 5a7386b47175..34c5360c6705 100644 --- a/arch/riscv/lib/memset.S +++ b/arch/riscv/lib/memset.S @@ -8,7 +8,8 @@ #include <asm/asm.h> /* void *memset(void *, int, size_t) */ -ENTRY(memset) +ENTRY(__memset) +WEAK(memset) move t0, a0 /* Preserve return value */ /* Defer to byte-oriented fill for small sizes */ @@ -109,4 +110,4 @@ ENTRY(memset) bltu t0, a3, 5b 6: ret -END(memset) +END(__memset) diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index a1bd95c8047a..50b7af58c566 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -15,3 +15,11 @@ ifeq ($(CONFIG_MMU),y) obj-$(CONFIG_SMP) += tlbflush.o endif obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_KASAN) += kasan_init.o + +ifdef CONFIG_KASAN +KASAN_SANITIZE_kasan_init.o := n +KASAN_SANITIZE_init.o := n +endif + +obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c new file mode 100644 index 000000000000..f0cc86040587 --- /dev/null +++ b/arch/riscv/mm/kasan_init.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 Andes Technology Corporation + +#include <linux/pfn.h> +#include <linux/init_task.h> +#include <linux/kasan.h> +#include <linux/kernel.h> +#include <linux/memblock.h> +#include <asm/tlbflush.h> +#include <asm/pgtable.h> +#include <asm/fixmap.h> + +extern pgd_t early_pg_dir[PTRS_PER_PGD]; +asmlinkage void __init kasan_early_init(void) +{ + uintptr_t i; + pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START); + + for (i = 0; i < PTRS_PER_PTE; ++i) + set_pte(kasan_early_shadow_pte + i, + mk_pte(virt_to_page(kasan_early_shadow_page), + PAGE_KERNEL)); + + for (i = 0; i < PTRS_PER_PMD; ++i) + set_pmd(kasan_early_shadow_pmd + i, + pfn_pmd(PFN_DOWN(__pa((uintptr_t)kasan_early_shadow_pte)), + __pgprot(_PAGE_TABLE))); + + for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END; + i += PGDIR_SIZE, ++pgd) + set_pgd(pgd, + pfn_pgd(PFN_DOWN(__pa(((uintptr_t)kasan_early_shadow_pmd))), + __pgprot(_PAGE_TABLE))); + + /* init for swapper_pg_dir */ + pgd = pgd_offset_k(KASAN_SHADOW_START); + + for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END; + i += PGDIR_SIZE, ++pgd) + set_pgd(pgd, + pfn_pgd(PFN_DOWN(__pa(((uintptr_t)kasan_early_shadow_pmd))), + __pgprot(_PAGE_TABLE))); + + flush_tlb_all(); +} + +static void __init populate(void *start, void *end) +{ + unsigned long i; + unsigned long vaddr = (unsigned long)start & PAGE_MASK; + unsigned long vend = PAGE_ALIGN((unsigned long)end); + unsigned long n_pages = (vend - vaddr) / PAGE_SIZE; + unsigned long n_pmds = + (n_pages % PTRS_PER_PTE) ? n_pages / PTRS_PER_PTE + 1 : + n_pages / PTRS_PER_PTE; + pgd_t *pgd = pgd_offset_k(vaddr); + pmd_t *pmd = memblock_alloc(n_pmds * sizeof(pmd_t), PAGE_SIZE); + pte_t *pte = memblock_alloc(n_pages * sizeof(pte_t), PAGE_SIZE); + + for (i = 0; i < n_pages; i++) { + phys_addr_t phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); + + set_pte(pte + i, pfn_pte(PHYS_PFN(phys), PAGE_KERNEL)); + } + + for (i = 0; i < n_pmds; ++pgd, i += PTRS_PER_PMD) + set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(((uintptr_t)(pmd + i)))), + __pgprot(_PAGE_TABLE))); + + for (i = 0; i < n_pages; ++pmd, i += PTRS_PER_PTE) + set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa((uintptr_t)(pte + i))), + __pgprot(_PAGE_TABLE))); + + flush_tlb_all(); + memset(start, 0, end - start); +} + +void __init kasan_init(void) +{ + struct memblock_region *reg; + unsigned long i; + + kasan_populate_early_shadow((void *)KASAN_SHADOW_START, + (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); + + for_each_memblock(memory, reg) { + void *start = (void *)__va(reg->base); + void *end = (void *)__va(reg->base + reg->size); + + if (start >= end) + break; + + populate(kasan_mem_to_shadow(start), + kasan_mem_to_shadow(end)); + }; + + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte(&kasan_early_shadow_pte[i], + mk_pte(virt_to_page(kasan_early_shadow_page), + __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_ACCESSED))); + + memset(kasan_early_shadow_page, 0, PAGE_SIZE); + init_task.kasan_depth = 0; +} diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c new file mode 100644 index 000000000000..e8e4dcd39fed --- /dev/null +++ b/arch/riscv/mm/physaddr.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/types.h> +#include <linux/mmdebug.h> +#include <linux/mm.h> +#include <asm/page.h> +#include <asm/sections.h> + +phys_addr_t __virt_to_phys(unsigned long x) +{ + phys_addr_t y = x - PAGE_OFFSET; + + /* + * Boundary checking aginst the kernel linear mapping space. + */ + WARN(y >= KERN_VIRT_SIZE, + "virt_to_phys used for non-linear address: %pK (%pS)\n", + (void *)x, (void *)x); + + return __va_to_pa_nodebug(x); +} +EXPORT_SYMBOL(__virt_to_phys); + +phys_addr_t __phys_addr_symbol(unsigned long x) +{ + unsigned long kernel_start = (unsigned long)PAGE_OFFSET; + unsigned long kernel_end = (unsigned long)_end; + + /* + * Boundary checking aginst the kernel image mapping. + * __pa_symbol should only be used on kernel symbol addresses. + */ + VIRTUAL_BUG_ON(x < kernel_start || x > kernel_end); + + return __va_to_pa_nodebug(x); +} +EXPORT_SYMBOL(__phys_addr_symbol); diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 02f4c21c57f6..11ecc4071a29 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -914,7 +914,6 @@ extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc); static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} -static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d9e6bf3d54f0..8646c99217f2 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2530,9 +2530,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) if (vcpu->kvm->arch.use_cmma) kvm_s390_vcpu_unsetup_cmma(vcpu); free_page((unsigned long)(vcpu->arch.sie_block)); - - kvm_vcpu_uninit(vcpu); - kmem_cache_free(kvm_vcpu_cache, vcpu); } static void kvm_free_vcpus(struct kvm *kvm) @@ -2541,7 +2538,7 @@ static void kvm_free_vcpus(struct kvm *kvm) struct kvm_vcpu *vcpu; kvm_for_each_vcpu(i, vcpu, kvm) - kvm_arch_vcpu_destroy(vcpu); + kvm_vcpu_destroy(vcpu); mutex_lock(&kvm->lock); for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) @@ -2703,39 +2700,6 @@ static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; } -int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) -{ - vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; - kvm_clear_async_pf_completion_queue(vcpu); - vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | - KVM_SYNC_GPRS | - KVM_SYNC_ACRS | - KVM_SYNC_CRS | - KVM_SYNC_ARCH0 | - KVM_SYNC_PFAULT; - kvm_s390_set_prefix(vcpu, 0); - if (test_kvm_facility(vcpu->kvm, 64)) - vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; - if (test_kvm_facility(vcpu->kvm, 82)) - vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; - if (test_kvm_facility(vcpu->kvm, 133)) - vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; - if (test_kvm_facility(vcpu->kvm, 156)) - vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; - /* fprs can be synchronized via vrs, even if the guest has no vx. With - * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. - */ - if (MACHINE_HAS_VX) - vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; - else - vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; - - if (kvm_is_ucontrol(vcpu->kvm)) - return __kvm_ucontrol_vcpu_init(vcpu); - - return 0; -} - /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) { @@ -2962,7 +2926,7 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; } -int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) { int rc = 0; @@ -3035,26 +2999,22 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) return rc; } -struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, - unsigned int id) +int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) { - struct kvm_vcpu *vcpu; - struct sie_page *sie_page; - int rc = -EINVAL; - if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) - goto out; - - rc = -ENOMEM; + return -EINVAL; + return 0; +} - vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu) - goto out; +int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) +{ + struct sie_page *sie_page; + int rc; BUILD_BUG_ON(sizeof(struct sie_page) != 4096); sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); if (!sie_page) - goto out_free_cpu; + return -ENOMEM; vcpu->arch.sie_block = &sie_page->sie_block; vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; @@ -3063,27 +3023,59 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block->mso = 0; vcpu->arch.sie_block->msl = sclp.hamax; - vcpu->arch.sie_block->icpua = id; + vcpu->arch.sie_block->icpua = vcpu->vcpu_id; spin_lock_init(&vcpu->arch.local_int.lock); - vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin; + vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin; if (vcpu->arch.sie_block->gd && sclp.has_gisaf) vcpu->arch.sie_block->gd |= GISA_FORMAT1; seqcount_init(&vcpu->arch.cputm_seqcount); - rc = kvm_vcpu_init(vcpu, kvm, id); + vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + kvm_clear_async_pf_completion_queue(vcpu); + vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | + KVM_SYNC_GPRS | + KVM_SYNC_ACRS | + KVM_SYNC_CRS | + KVM_SYNC_ARCH0 | + KVM_SYNC_PFAULT; + kvm_s390_set_prefix(vcpu, 0); + if (test_kvm_facility(vcpu->kvm, 64)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; + if (test_kvm_facility(vcpu->kvm, 82)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; + if (test_kvm_facility(vcpu->kvm, 133)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; + if (test_kvm_facility(vcpu->kvm, 156)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; + /* fprs can be synchronized via vrs, even if the guest has no vx. With + * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. + */ + if (MACHINE_HAS_VX) + vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; + else + vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; + + if (kvm_is_ucontrol(vcpu->kvm)) { + rc = __kvm_ucontrol_vcpu_init(vcpu); + if (rc) + goto out_free_sie_block; + } + + VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", + vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); + trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); + + rc = kvm_s390_vcpu_setup(vcpu); if (rc) - goto out_free_sie_block; - VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu, - vcpu->arch.sie_block); - trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); + goto out_ucontrol_uninit; + return 0; - return vcpu; +out_ucontrol_uninit: + if (kvm_is_ucontrol(vcpu->kvm)) + gmap_remove(vcpu->arch.gmap); out_free_sie_block: free_page((unsigned long)(vcpu->arch.sie_block)); -out_free_cpu: - kmem_cache_free(kvm_vcpu_cache, vcpu); -out: - return ERR_PTR(rc); + return rc; } int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h index 9d3e5cc95bbb..264e76ceccf6 100644 --- a/arch/sparc/include/asm/pgalloc_64.h +++ b/arch/sparc/include/asm/pgalloc_64.h @@ -16,12 +16,12 @@ extern struct kmem_cache *pgtable_cache; -static inline void __pgd_populate(pgd_t *pgd, pud_t *pud) +static inline void __p4d_populate(p4d_t *p4d, pud_t *pud) { - pgd_set(pgd, pud); + p4d_set(p4d, pud); } -#define pgd_populate(MM, PGD, PUD) __pgd_populate(PGD, PUD) +#define p4d_populate(MM, P4D, PUD) __p4d_populate(P4D, PUD) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 6ae8016ef4ec..34ff3b43afbb 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -13,7 +13,7 @@ * the SpitFire page tables. */ -#include <asm-generic/5level-fixup.h> +#include <asm-generic/pgtable-nop4d.h> #include <linux/compiler.h> #include <linux/const.h> #include <asm/types.h> @@ -810,9 +810,9 @@ static inline int pmd_present(pmd_t pmd) #define pud_bad(pud) (pud_val(pud) & ~PAGE_MASK) -#define pgd_none(pgd) (!pgd_val(pgd)) +#define p4d_none(p4d) (!p4d_val(p4d)) -#define pgd_bad(pgd) (pgd_val(pgd) & ~PAGE_MASK) +#define p4d_bad(p4d) (p4d_val(p4d) & ~PAGE_MASK) #ifdef CONFIG_TRANSPARENT_HUGEPAGE void set_pmd_at(struct mm_struct *mm, unsigned long addr, @@ -859,13 +859,13 @@ static inline unsigned long pud_page_vaddr(pud_t pud) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL) #define pud_present(pud) (pud_val(pud) != 0U) #define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) -#define pgd_page_vaddr(pgd) \ - ((unsigned long) __va(pgd_val(pgd))) -#define pgd_present(pgd) (pgd_val(pgd) != 0U) -#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL) +#define p4d_page_vaddr(p4d) \ + ((unsigned long) __va(p4d_val(p4d))) +#define p4d_present(p4d) (p4d_val(p4d) != 0U) +#define p4d_clear(p4dp) (p4d_val(*(p4dp)) = 0UL) /* only used by the stubbed out hugetlb gup code, should never be called */ -#define pgd_page(pgd) NULL +#define p4d_page(p4d) NULL static inline unsigned long pud_large(pud_t pud) { @@ -884,8 +884,8 @@ static inline unsigned long pud_pfn(pud_t pud) /* Same in both SUN4V and SUN4U. */ #define pte_none(pte) (!pte_val(pte)) -#define pgd_set(pgdp, pudp) \ - (pgd_val(*(pgdp)) = (__pa((unsigned long) (pudp)))) +#define p4d_set(p4dp, pudp) \ + (p4d_val(*(p4dp)) = (__pa((unsigned long) (pudp)))) /* to find an entry in a page-table-directory. */ #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) @@ -896,8 +896,8 @@ static inline unsigned long pud_pfn(pud_t pud) /* Find an entry in the third-level page table.. */ #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) -#define pud_offset(pgdp, address) \ - ((pud_t *) pgd_page_vaddr(*(pgdp)) + pud_index(address)) +#define pud_offset(p4dp, address) \ + ((pud_t *) p4d_page_vaddr(*(p4dp)) + pud_index(address)) /* Find an entry in the second-level page table.. */ #define pmd_offset(pudp, address) \ diff --git a/arch/sparc/include/uapi/asm/ipcbuf.h b/arch/sparc/include/uapi/asm/ipcbuf.h index 5b933a598a33..0ea1240d2ea1 100644 --- a/arch/sparc/include/uapi/asm/ipcbuf.h +++ b/arch/sparc/include/uapi/asm/ipcbuf.h @@ -17,19 +17,19 @@ struct ipc64_perm { - __kernel_key_t key; - __kernel_uid_t uid; - __kernel_gid_t gid; - __kernel_uid_t cuid; - __kernel_gid_t cgid; + __kernel_key_t key; + __kernel_uid32_t uid; + __kernel_gid32_t gid; + __kernel_uid32_t cuid; + __kernel_gid32_t cgid; #ifndef __arch64__ - unsigned short __pad0; + unsigned short __pad0; #endif - __kernel_mode_t mode; - unsigned short __pad1; - unsigned short seq; - unsigned long long __unused1; - unsigned long long __unused2; + __kernel_mode_t mode; + unsigned short __pad1; + unsigned short seq; + unsigned long long __unused1; + unsigned long long __unused2; }; #endif /* __SPARC_IPCBUF_H */ diff --git a/arch/sparc/include/uapi/asm/statfs.h b/arch/sparc/include/uapi/asm/statfs.h deleted file mode 100644 index 20c8f5bd340e..000000000000 --- a/arch/sparc/include/uapi/asm/statfs.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef ___ASM_SPARC_STATFS_H -#define ___ASM_SPARC_STATFS_H - -#include <asm-generic/statfs.h> - -#endif diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c index ec244d1022ce..da8902295c8c 100644 --- a/arch/sparc/kernel/prom_32.c +++ b/arch/sparc/kernel/prom_32.c @@ -132,12 +132,13 @@ static void __init ebus_path_component(struct device_node *dp, char *tmp_buf) regs->which_io, regs->phys_addr); } -/* "name:vendor:device@irq,addrlo" */ +/* "name@irq,addrlo" */ static void __init ambapp_path_component(struct device_node *dp, char *tmp_buf) { const char *name = of_get_property(dp, "name", NULL); struct amba_prom_registers *regs; - unsigned int *intr, *device, *vendor, reg0; + unsigned int *intr; + unsigned int reg0; struct property *prop; int interrupt = 0; @@ -159,18 +160,7 @@ static void __init ambapp_path_component(struct device_node *dp, char *tmp_buf) else intr = prop->value; - prop = of_find_property(dp, "vendor", NULL); - if (!prop) - return; - vendor = prop->value; - prop = of_find_property(dp, "device", NULL); - if (!prop) - return; - device = prop->value; - - sprintf(tmp_buf, "%s:%d:%d@%x,%x", - name, *vendor, *device, - *intr, reg0); + sprintf(tmp_buf, "%s@%x,%x", name, *intr, reg0); } static void __init __build_path_component(struct device_node *dp, char *tmp_buf) diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index a237810aa9f4..2a734ecd0a40 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -299,6 +299,7 @@ static void flush_signal_insns(unsigned long address) unsigned long pstate, paddr; pte_t *ptep, pte; pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; @@ -318,7 +319,10 @@ static void flush_signal_insns(unsigned long address) pgdp = pgd_offset(current->mm, address); if (pgd_none(*pgdp)) goto out_irqs_on; - pudp = pud_offset(pgdp, address); + p4dp = p4d_offset(pgdp, address); + if (p4d_none(*p4dp)) + goto out_irqs_on; + pudp = pud_offset(p4dp, address); if (pud_none(*pudp)) goto out_irqs_on; pmdp = pmd_offset(pudp, address); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 9b4506373353..80f20b3808ee 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1621,6 +1621,7 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) static void __init pcpu_populate_pte(unsigned long addr) { pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d; pud_t *pud; pmd_t *pmd; @@ -1633,7 +1634,17 @@ static void __init pcpu_populate_pte(unsigned long addr) pgd_populate(&init_mm, pgd, new); } - pud = pud_offset(pgd, addr); + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) { + pud_t *new; + + new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + if (!new) + goto err_alloc; + p4d_populate(&init_mm, p4d, new); + } + + pud = pud_offset(p4d, addr); if (pud_none(*pud)) { pmd_t *new; diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 7ec79918b566..f99e99e58075 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -171,12 +171,14 @@ SECTIONS } PERCPU_SECTION(SMP_CACHE_BYTES) -#ifdef CONFIG_JUMP_LABEL . = ALIGN(PAGE_SIZE); .exit.text : { EXIT_TEXT } -#endif + + .exit.data : { + EXIT_DATA + } . = ALIGN(PAGE_SIZE); __init_end = .; diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 2371fb6b97e4..8b7ddbd14b65 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -80,6 +80,7 @@ static void __kprobes bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr) static unsigned int get_user_insn(unsigned long tpc) { pgd_t *pgdp = pgd_offset(current->mm, tpc); + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep, pte; @@ -88,7 +89,10 @@ static unsigned int get_user_insn(unsigned long tpc) if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp))) goto out; - pudp = pud_offset(pgdp, tpc); + p4dp = p4d_offset(pgdp, tpc); + if (p4d_none(*p4dp) || unlikely(p4d_bad(*p4dp))) + goto out; + pudp = pud_offset(p4dp, tpc); if (pud_none(*pudp) || unlikely(pud_bad(*pudp))) goto out; diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index f78793a06bbd..7b9fa861b67c 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -277,11 +277,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); + p4d = p4d_offset(pgd, addr); + pud = pud_alloc(mm, p4d, addr); if (!pud) return NULL; if (sz >= PUD_SIZE) @@ -298,13 +300,17 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pgd = pgd_offset(mm, addr); if (pgd_none(*pgd)) return NULL; - pud = pud_offset(pgd, addr); + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) + return NULL; + pud = pud_offset(p4d, addr); if (pud_none(*pud)) return NULL; if (is_hugetlb_pud(*pud)) @@ -449,7 +455,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, mm_dec_nr_pmds(tlb->mm); } -static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, +static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { @@ -458,7 +464,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, unsigned long start; start = addr; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) @@ -481,8 +487,8 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, if (end - 1 > ceiling - 1) return; - pud = pud_offset(pgd, start); - pgd_clear(pgd); + pud = pud_offset(p4d, start); + p4d_clear(p4d); pud_free_tlb(tlb, pud, start); mm_dec_nr_puds(tlb->mm); } @@ -492,6 +498,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long floor, unsigned long ceiling) { pgd_t *pgd; + p4d_t *p4d; unsigned long next; addr &= PMD_MASK; @@ -511,10 +518,11 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, return; pgd = pgd_offset(tlb->mm, addr); + p4d = p4d_offset(pgd, addr); do { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(p4d)) continue; - hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); - } while (pgd++, addr = next, addr != end); + hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling); + } while (p4d++, addr = next, addr != end); } diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index e6d91819da92..1cf0d666dea3 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -530,7 +530,8 @@ void __kprobes flush_icache_range(unsigned long start, unsigned long end) paddr = kaddr & mask; else { pgd_t *pgdp = pgd_offset_k(kaddr); - pud_t *pudp = pud_offset(pgdp, kaddr); + p4d_t *p4dp = p4d_offset(pgdp, kaddr); + pud_t *pudp = pud_offset(p4dp, kaddr); pmd_t *pmdp = pmd_offset(pudp, kaddr); pte_t *ptep = pte_offset_kernel(pmdp, kaddr); @@ -1653,6 +1654,7 @@ static unsigned long max_phys_bits = 40; bool kern_addr_valid(unsigned long addr) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -1674,7 +1676,11 @@ bool kern_addr_valid(unsigned long addr) if (pgd_none(*pgd)) return 0; - pud = pud_offset(pgd, addr); + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) + return 0; + + pud = pud_offset(p4d, addr); if (pud_none(*pud)) return 0; @@ -1800,6 +1806,7 @@ static unsigned long __ref kernel_map_range(unsigned long pstart, while (vstart < vend) { unsigned long this_end, paddr = __pa(vstart); pgd_t *pgd = pgd_offset_k(vstart); + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -1814,7 +1821,20 @@ static unsigned long __ref kernel_map_range(unsigned long pstart, alloc_bytes += PAGE_SIZE; pgd_populate(&init_mm, pgd, new); } - pud = pud_offset(pgd, vstart); + + p4d = p4d_offset(pgd, vstart); + if (p4d_none(*p4d)) { + pud_t *new; + + new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, + PAGE_SIZE); + if (!new) + goto err_alloc; + alloc_bytes += PAGE_SIZE; + p4d_populate(&init_mm, p4d, new); + } + + pud = pud_offset(p4d, vstart); if (pud_none(*pud)) { pmd_t *new; @@ -2612,13 +2632,18 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, for (; vstart < vend; vstart += PMD_SIZE) { pgd_t *pgd = vmemmap_pgd_populate(vstart, node); unsigned long pte; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; if (!pgd) return -ENOMEM; - pud = vmemmap_pud_populate(pgd, vstart, node); + p4d = vmemmap_p4d_populate(pgd, vstart, node); + if (!p4d) + return -ENOMEM; + + pud = vmemmap_pud_populate(p4d, vstart, node); if (!pud) return -ENOMEM; diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h index 5aee0626e390..b4deb1bfbb68 100644 --- a/arch/um/include/asm/mmu_context.h +++ b/arch/um/include/asm/mmu_context.h @@ -25,11 +25,6 @@ static inline void arch_unmap(struct mm_struct *mm, unsigned long start, unsigned long end) { } -static inline void arch_bprm_mm_init(struct mm_struct *mm, - struct vm_area_struct *vma) -{ -} - static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, bool execute, bool foreign) { diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h index 247a07ae2cdc..388c0c811c68 100644 --- a/arch/unicore32/include/asm/mmu_context.h +++ b/arch/unicore32/include/asm/mmu_context.h @@ -89,11 +89,6 @@ static inline void arch_unmap(struct mm_struct *mm, { } -static inline void arch_bprm_mm_init(struct mm_struct *mm, - struct vm_area_struct *vma) -{ -} - static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, bool execute, bool foreign) { diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a283336bb5d5..90288ab1b99e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1888,34 +1888,6 @@ config X86_UMIP specific cases in protected and virtual-8086 modes. Emulated results are dummy. -config X86_INTEL_MPX - prompt "Intel MPX (Memory Protection Extensions)" - def_bool n - # Note: only available in 64-bit mode due to VMA flags shortage - depends on CPU_SUP_INTEL && X86_64 - select ARCH_USES_HIGH_VMA_FLAGS - ---help--- - MPX provides hardware features that can be used in - conjunction with compiler-instrumented code to check - memory references. It is designed to detect buffer - overflow or underflow bugs. - - This option enables running applications which are - instrumented or otherwise use MPX. It does not use MPX - itself inside the kernel or to protect the kernel - against bad memory references. - - Enabling this option will make the kernel larger: - ~8k of kernel text and 36 bytes of data on a 64-bit - defconfig. It adds a long to the 'mm_struct' which - will increase the kernel memory overhead of each - process and adds some branches to paths used during - exec() and munmap(). - - For details, see Documentation/x86/intel_mpx.rst - - If unsure, say N. - config X86_INTEL_MEMORY_PROTECTION_KEYS prompt "Intel Memory Protection Keys" def_bool y diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h index 794eb2129bc6..92ae28389940 100644 --- a/arch/x86/include/asm/bugs.h +++ b/arch/x86/include/asm/bugs.h @@ -6,12 +6,6 @@ extern void check_bugs(void); -#if defined(CONFIG_CPU_SUP_INTEL) -void check_mpx_erratum(struct cpuinfo_x86 *c); -#else -static inline void check_mpx_erratum(struct cpuinfo_x86 *c) {} -#endif - #if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32) int ppro_with_ram_bug(void); #else diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 8e1d0bb46361..4ea8584682f9 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -10,12 +10,6 @@ * cpu_feature_enabled(). */ -#ifdef CONFIG_X86_INTEL_MPX -# define DISABLE_MPX 0 -#else -# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31)) -#endif - #ifdef CONFIG_X86_SMAP # define DISABLE_SMAP 0 #else @@ -74,7 +68,7 @@ #define DISABLED_MASK6 0 #define DISABLED_MASK7 (DISABLE_PTI) #define DISABLED_MASK8 0 -#define DISABLED_MASK9 (DISABLE_MPX|DISABLE_SMAP) +#define DISABLED_MASK9 (DISABLE_SMAP) #define DISABLED_MASK10 0 #define DISABLED_MASK11 0 #define DISABLED_MASK12 0 diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 5f10f7f2098d..92abc1e42bfc 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -809,7 +809,8 @@ union hv_synic_sint { u64 reserved1:8; u64 masked:1; u64 auto_eoi:1; - u64 reserved2:46; + u64 polling:1; + u64 reserved2:45; } __packed; }; diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 77cf6c11f66b..03946eb3e2b9 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -222,6 +222,10 @@ struct x86_emulate_ops { bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit); + bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt); + bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt); + bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt); + void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b79cd6aa4075..329d01c689b7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -175,6 +175,11 @@ enum { VCPU_SREG_LDTR, }; +enum exit_fastpath_completion { + EXIT_FASTPATH_NONE, + EXIT_FASTPATH_SKIP_EMUL_INS, +}; + #include <asm/kvm_emulate.h> #define KVM_NR_MEM_OBJS 40 @@ -378,12 +383,12 @@ struct kvm_mmu { void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); - int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, + int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err, bool prefault); void (*inject_page_fault)(struct kvm_vcpu *vcpu, struct x86_exception *fault); - gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, - struct x86_exception *exception); + gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa, + u32 access, struct x86_exception *exception); gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, struct x86_exception *exception); int (*sync_page)(struct kvm_vcpu *vcpu, @@ -606,7 +611,7 @@ struct kvm_vcpu_arch { * Paging state of an L2 guest (used for nested npt) * * This context will save all necessary information to walk page tables - * of the an L2 guest. This context is only initialized for page table + * of an L2 guest. This context is only initialized for page table * walking and not for faulting since we never handle l2 page faults on * the host. */ @@ -685,10 +690,10 @@ struct kvm_vcpu_arch { bool pvclock_set_guest_stopped_request; struct { + u8 preempted; u64 msr_val; u64 last_steal; - struct gfn_to_hva_cache stime; - struct kvm_steal_time steal; + struct gfn_to_pfn_cache cache; } st; u64 tsc_offset; @@ -1022,6 +1027,11 @@ struct kvm_lapic_irq { bool msi_redir_hint; }; +static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) +{ + return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL; +} + struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ @@ -1040,7 +1050,7 @@ struct kvm_x86_ops { void (*vm_destroy)(struct kvm *kvm); /* Create, but do not attach this VCPU */ - struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); + int (*vcpu_create)(struct kvm_vcpu *vcpu); void (*vcpu_free)(struct kvm_vcpu *vcpu); void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event); @@ -1090,7 +1100,8 @@ struct kvm_x86_ops { void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr); void (*run)(struct kvm_vcpu *vcpu); - int (*handle_exit)(struct kvm_vcpu *vcpu); + int (*handle_exit)(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion exit_fastpath); int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu); @@ -1140,11 +1151,13 @@ struct kvm_x86_ops { int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage); - void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu); + void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion *exit_fastpath); bool (*mpx_supported)(void); bool (*xsaves_supported)(void); bool (*umip_emulated)(void); bool (*pt_supported)(void); + bool (*pku_supported)(void); int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); void (*request_immediate_exit)(struct kvm_vcpu *vcpu); @@ -1468,7 +1481,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code, +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); @@ -1614,7 +1627,6 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu); int kvm_is_in_guest(void); int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size); -int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size); bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index e78c7db87801..bdeae9291e5c 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -50,10 +50,6 @@ typedef struct { u16 pkey_allocation_map; s16 execute_only_pkey; #endif -#ifdef CONFIG_X86_INTEL_MPX - /* address of the bounds directory */ - void __user *bd_addr; -#endif } mm_context_t; #define INIT_MM_CONTEXT(mm) \ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index b243234e90cb..b538d9ddee9c 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -12,7 +12,6 @@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/paravirt.h> -#include <asm/mpx.h> #include <asm/debugreg.h> extern atomic64_t last_mm_ctx_id; @@ -200,34 +199,9 @@ static inline bool is_64bit_mm(struct mm_struct *mm) } #endif -static inline void arch_bprm_mm_init(struct mm_struct *mm, - struct vm_area_struct *vma) -{ - mpx_mm_init(mm); -} - static inline void arch_unmap(struct mm_struct *mm, unsigned long start, unsigned long end) { - /* - * mpx_notify_unmap() goes and reads a rarely-hot - * cacheline in the mm_struct. That can be expensive - * enough to be seen in profiles. - * - * The mpx_notify_unmap() call and its contents have been - * observed to affect munmap() performance on hardware - * where MPX is not present. - * - * The unlikely() optimizes for the fast case: no MPX - * in the CPU, or no MPX use in the process. Even if - * we get this wrong (in the unlikely event that MPX - * is widely enabled on some system) the overhead of - * MPX itself (reading bounds tables) is expected to - * overwhelm the overhead of getting this unlikely() - * consistently wrong. - */ - if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX))) - mpx_notify_unmap(mm, start, end); } /* diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h deleted file mode 100644 index 143a5c193ed3..000000000000 --- a/arch/x86/include/asm/mpx.h +++ /dev/null @@ -1,116 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_MPX_H -#define _ASM_X86_MPX_H - -#include <linux/types.h> -#include <linux/mm_types.h> - -#include <asm/ptrace.h> -#include <asm/insn.h> - -/* - * NULL is theoretically a valid place to put the bounds - * directory, so point this at an invalid address. - */ -#define MPX_INVALID_BOUNDS_DIR ((void __user *)-1) -#define MPX_BNDCFG_ENABLE_FLAG 0x1 -#define MPX_BD_ENTRY_VALID_FLAG 0x1 - -/* - * The upper 28 bits [47:20] of the virtual address in 64-bit - * are used to index into bounds directory (BD). - * - * The directory is 2G (2^31) in size, and with 8-byte entries - * it has 2^28 entries. - */ -#define MPX_BD_SIZE_BYTES_64 (1UL<<31) -#define MPX_BD_ENTRY_BYTES_64 8 -#define MPX_BD_NR_ENTRIES_64 (MPX_BD_SIZE_BYTES_64/MPX_BD_ENTRY_BYTES_64) - -/* - * The 32-bit directory is 4MB (2^22) in size, and with 4-byte - * entries it has 2^20 entries. - */ -#define MPX_BD_SIZE_BYTES_32 (1UL<<22) -#define MPX_BD_ENTRY_BYTES_32 4 -#define MPX_BD_NR_ENTRIES_32 (MPX_BD_SIZE_BYTES_32/MPX_BD_ENTRY_BYTES_32) - -/* - * A 64-bit table is 4MB total in size, and an entry is - * 4 64-bit pointers in size. - */ -#define MPX_BT_SIZE_BYTES_64 (1UL<<22) -#define MPX_BT_ENTRY_BYTES_64 32 -#define MPX_BT_NR_ENTRIES_64 (MPX_BT_SIZE_BYTES_64/MPX_BT_ENTRY_BYTES_64) - -/* - * A 32-bit table is 16kB total in size, and an entry is - * 4 32-bit pointers in size. - */ -#define MPX_BT_SIZE_BYTES_32 (1UL<<14) -#define MPX_BT_ENTRY_BYTES_32 16 -#define MPX_BT_NR_ENTRIES_32 (MPX_BT_SIZE_BYTES_32/MPX_BT_ENTRY_BYTES_32) - -#define MPX_BNDSTA_TAIL 2 -#define MPX_BNDCFG_TAIL 12 -#define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1)) -#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) -#define MPX_BNDSTA_ERROR_CODE 0x3 - -struct mpx_fault_info { - void __user *addr; - void __user *lower; - void __user *upper; -}; - -#ifdef CONFIG_X86_INTEL_MPX - -extern int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs); -extern int mpx_handle_bd_fault(void); - -static inline int kernel_managing_mpx_tables(struct mm_struct *mm) -{ - return (mm->context.bd_addr != MPX_INVALID_BOUNDS_DIR); -} - -static inline void mpx_mm_init(struct mm_struct *mm) -{ - /* - * NULL is theoretically a valid place to put the bounds - * directory, so point this at an invalid address. - */ - mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR; -} - -extern void mpx_notify_unmap(struct mm_struct *mm, unsigned long start, unsigned long end); -extern unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len, unsigned long flags); - -#else -static inline int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs) -{ - return -EINVAL; -} -static inline int mpx_handle_bd_fault(void) -{ - return -EINVAL; -} -static inline int kernel_managing_mpx_tables(struct mm_struct *mm) -{ - return 0; -} -static inline void mpx_mm_init(struct mm_struct *mm) -{ -} -static inline void mpx_notify_unmap(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ -} - -static inline unsigned long mpx_unmapped_area_check(unsigned long addr, - unsigned long len, unsigned long flags) -{ - return addr; -} -#endif /* CONFIG_X86_INTEL_MPX */ - -#endif /* _ASM_X86_MPX_H */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index ea7400726d7a..0239998d8cdc 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -566,6 +566,10 @@ static inline void update_page_count(int level, unsigned long pages) { } extern pte_t *lookup_address(unsigned long address, unsigned int *level); extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, unsigned int *level); + +struct mm_struct; +extern pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address, + unsigned int *level); extern pmd_t *lookup_pmd_address(unsigned long address); extern phys_addr_t slow_virt_to_phys(void *__address); extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6fb4870ed759..09705ccc393c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -947,24 +947,6 @@ extern int set_tsc_mode(unsigned int val); DECLARE_PER_CPU(u64, msr_misc_features_shadow); -/* Register/unregister a process' MPX related resource */ -#define MPX_ENABLE_MANAGEMENT() mpx_enable_management() -#define MPX_DISABLE_MANAGEMENT() mpx_disable_management() - -#ifdef CONFIG_X86_INTEL_MPX -extern int mpx_enable_management(void); -extern int mpx_disable_management(void); -#else -static inline int mpx_enable_management(void) -{ - return -EINVAL; -} -static inline int mpx_disable_management(void) -{ - return -EINVAL; -} -#endif /* CONFIG_X86_INTEL_MPX */ - #ifdef CONFIG_CPU_SUP_AMD extern u16 amd_get_nb_id(int cpu); extern u32 amd_get_nodes_per_socket(void); diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h deleted file mode 100644 index 54133017267c..000000000000 --- a/arch/x86/include/asm/trace/mpx.h +++ /dev/null @@ -1,134 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM mpx - -#if !defined(_TRACE_MPX_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_MPX_H - -#include <linux/tracepoint.h> - -#ifdef CONFIG_X86_INTEL_MPX - -TRACE_EVENT(mpx_bounds_register_exception, - - TP_PROTO(void __user *addr_referenced, - const struct mpx_bndreg *bndreg), - TP_ARGS(addr_referenced, bndreg), - - TP_STRUCT__entry( - __field(void __user *, addr_referenced) - __field(u64, lower_bound) - __field(u64, upper_bound) - ), - - TP_fast_assign( - __entry->addr_referenced = addr_referenced; - __entry->lower_bound = bndreg->lower_bound; - __entry->upper_bound = bndreg->upper_bound; - ), - /* - * Note that we are printing out the '~' of the upper - * bounds register here. It is actually stored in its - * one's complement form so that its 'init' state - * corresponds to all 0's. But, that looks like - * gibberish when printed out, so print out the 1's - * complement instead of the actual value here. Note - * though that you still need to specify filters for the - * actual value, not the displayed one. - */ - TP_printk("address referenced: 0x%p bounds: lower: 0x%llx ~upper: 0x%llx", - __entry->addr_referenced, - __entry->lower_bound, - ~__entry->upper_bound - ) -); - -TRACE_EVENT(bounds_exception_mpx, - - TP_PROTO(const struct mpx_bndcsr *bndcsr), - TP_ARGS(bndcsr), - - TP_STRUCT__entry( - __field(u64, bndcfgu) - __field(u64, bndstatus) - ), - - TP_fast_assign( - /* need to get rid of the 'const' on bndcsr */ - __entry->bndcfgu = (u64)bndcsr->bndcfgu; - __entry->bndstatus = (u64)bndcsr->bndstatus; - ), - - TP_printk("bndcfgu:0x%llx bndstatus:0x%llx", - __entry->bndcfgu, - __entry->bndstatus) -); - -DECLARE_EVENT_CLASS(mpx_range_trace, - - TP_PROTO(unsigned long start, - unsigned long end), - TP_ARGS(start, end), - - TP_STRUCT__entry( - __field(unsigned long, start) - __field(unsigned long, end) - ), - - TP_fast_assign( - __entry->start = start; - __entry->end = end; - ), - - TP_printk("[0x%p:0x%p]", - (void *)__entry->start, - (void *)__entry->end - ) -); - -DEFINE_EVENT(mpx_range_trace, mpx_unmap_zap, - TP_PROTO(unsigned long start, unsigned long end), - TP_ARGS(start, end) -); - -DEFINE_EVENT(mpx_range_trace, mpx_unmap_search, - TP_PROTO(unsigned long start, unsigned long end), - TP_ARGS(start, end) -); - -TRACE_EVENT(mpx_new_bounds_table, - - TP_PROTO(unsigned long table_vaddr), - TP_ARGS(table_vaddr), - - TP_STRUCT__entry( - __field(unsigned long, table_vaddr) - ), - - TP_fast_assign( - __entry->table_vaddr = table_vaddr; - ), - - TP_printk("table vaddr:%p", (void *)__entry->table_vaddr) -); - -#else - -/* - * This gets used outside of MPX-specific code, so we need a stub. - */ -static inline -void trace_bounds_exception_mpx(const struct mpx_bndcsr *bndcsr) -{ -} - -#endif /* CONFIG_X86_INTEL_MPX */ - -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH asm/trace/ -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE mpx -#endif /* _TRACE_MPX_H */ - -/* This part must be outside protection */ -#include <trace/define_trace.h> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 9fbba31be825..d380b3b7ddd9 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -22,8 +22,8 @@ /* * Definitions of Primary Processor-Based VM-Execution Controls. */ -#define CPU_BASED_VIRTUAL_INTR_PENDING VMCS_CONTROL_BIT(VIRTUAL_INTR_PENDING) -#define CPU_BASED_USE_TSC_OFFSETING VMCS_CONTROL_BIT(TSC_OFFSETTING) +#define CPU_BASED_INTR_WINDOW_EXITING VMCS_CONTROL_BIT(VIRTUAL_INTR_PENDING) +#define CPU_BASED_USE_TSC_OFFSETTING VMCS_CONTROL_BIT(TSC_OFFSETTING) #define CPU_BASED_HLT_EXITING VMCS_CONTROL_BIT(HLT_EXITING) #define CPU_BASED_INVLPG_EXITING VMCS_CONTROL_BIT(INVLPG_EXITING) #define CPU_BASED_MWAIT_EXITING VMCS_CONTROL_BIT(MWAIT_EXITING) @@ -34,7 +34,7 @@ #define CPU_BASED_CR8_LOAD_EXITING VMCS_CONTROL_BIT(CR8_LOAD_EXITING) #define CPU_BASED_CR8_STORE_EXITING VMCS_CONTROL_BIT(CR8_STORE_EXITING) #define CPU_BASED_TPR_SHADOW VMCS_CONTROL_BIT(VIRTUAL_TPR) -#define CPU_BASED_VIRTUAL_NMI_PENDING VMCS_CONTROL_BIT(VIRTUAL_NMI_PENDING) +#define CPU_BASED_NMI_WINDOW_EXITING VMCS_CONTROL_BIT(VIRTUAL_NMI_PENDING) #define CPU_BASED_MOV_DR_EXITING VMCS_CONTROL_BIT(MOV_DR_EXITING) #define CPU_BASED_UNCOND_IO_EXITING VMCS_CONTROL_BIT(UNCOND_IO_EXITING) #define CPU_BASED_USE_IO_BITMAPS VMCS_CONTROL_BIT(USE_IO_BITMAPS) diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 3eb8411ab60e..e95b72ec19bc 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -33,7 +33,7 @@ #define EXIT_REASON_TRIPLE_FAULT 2 #define EXIT_REASON_INIT_SIGNAL 3 -#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_INTERRUPT_WINDOW 7 #define EXIT_REASON_NMI_WINDOW 8 #define EXIT_REASON_TASK_SWITCH 9 #define EXIT_REASON_CPUID 10 @@ -94,7 +94,7 @@ { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ { EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \ - { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ + { EXIT_REASON_INTERRUPT_WINDOW, "INTERRUPT_WINDOW" }, \ { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ { EXIT_REASON_CPUID, "CPUID" }, \ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 6175e370ee4a..9b294c13809a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -94,6 +94,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += ftrace_$(BITS).o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_X86_TSC) += trace_clock.o +obj-$(CONFIG_CRASH_CORE) += crash_core_$(BITS).o obj-$(CONFIG_KEXEC_CORE) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 34360ca301a2..15ac0d5f4b40 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -23,6 +23,7 @@ #include <asm/nmi.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> +#include <asm/insn.h> #include <asm/io.h> #include <asm/fixmap.h> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 86b8241c8209..52c9bfbbdb2a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -164,22 +164,6 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); -static int __init x86_mpx_setup(char *s) -{ - /* require an exact match without trailing characters */ - if (strlen(s)) - return 0; - - /* do not emit a message if the feature is not present */ - if (!boot_cpu_has(X86_FEATURE_MPX)) - return 1; - - setup_clear_cpu_cap(X86_FEATURE_MPX); - pr_info("nompx: Intel Memory Protection Extensions (MPX) disabled\n"); - return 1; -} -__setup("nompx", x86_mpx_setup); - #ifdef CONFIG_X86_64 static int __init x86_nopcid_setup(char *s) { @@ -306,8 +290,6 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) static __init int setup_disable_smep(char *arg) { setup_clear_cpu_cap(X86_FEATURE_SMEP); - /* Check for things that depend on SMEP being enabled: */ - check_mpx_erratum(&boot_cpu_data); return 1; } __setup("nosmep", setup_disable_smep); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 57473e2c0869..be82cd5841c3 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -32,41 +32,6 @@ #endif /* - * Just in case our CPU detection goes bad, or you have a weird system, - * allow a way to override the automatic disabling of MPX. - */ -static int forcempx; - -static int __init forcempx_setup(char *__unused) -{ - forcempx = 1; - - return 1; -} -__setup("intel-skd-046-workaround=disable", forcempx_setup); - -void check_mpx_erratum(struct cpuinfo_x86 *c) -{ - if (forcempx) - return; - /* - * Turn off the MPX feature on CPUs where SMEP is not - * available or disabled. - * - * Works around Intel Erratum SKD046: "Branch Instructions - * May Initialize MPX Bound Registers Incorrectly". - * - * This might falsely disable MPX on systems without - * SMEP, like Atom processors without SMEP. But there - * is no such hardware known at the moment. - */ - if (cpu_has(c, X86_FEATURE_MPX) && !cpu_has(c, X86_FEATURE_SMEP)) { - setup_clear_cpu_cap(X86_FEATURE_MPX); - pr_warn("x86/mpx: Disabling MPX since SMEP not present\n"); - } -} - -/* * Processors which have self-snooping capability can handle conflicting * memory type across CPUs by snooping its own cache. However, there exists * CPU models in which having conflicting memory types still leads to @@ -330,7 +295,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); } - check_mpx_erratum(c); check_memory_type_self_snoop_errata(c); /* diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 1504bcabc63c..8ca5e510f3ce 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2060,7 +2060,7 @@ static int rdt_get_tree(struct fs_context *fc) if (rdt_mon_capable) { ret = mongroup_create_dir(rdtgroup_default.kn, - NULL, "mon_groups", + &rdtgroup_default, "mon_groups", &kn_mongrp); if (ret < 0) goto out_info; @@ -2295,7 +2295,11 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) { free_rmid(sentry->mon.rmid); list_del(&sentry->mon.crdtgrp_list); - kfree(sentry); + + if (atomic_read(&sentry->waitcount) != 0) + sentry->flags = RDT_DELETED; + else + kfree(sentry); } } @@ -2333,7 +2337,11 @@ static void rmdir_all_sub(void) kernfs_remove(rdtgrp->kn); list_del(&rdtgrp->rdtgroup_list); - kfree(rdtgrp); + + if (atomic_read(&rdtgrp->waitcount) != 0) + rdtgrp->flags = RDT_DELETED; + else + kfree(rdtgrp); } /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ update_closid_rmid(cpu_online_mask, &rdtgroup_default); @@ -2536,7 +2544,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, /* * Create the mon_data directory first. */ - ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn); + ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn); if (ret) return ret; @@ -2726,7 +2734,6 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) } static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, - struct kernfs_node *prgrp_kn, const char *name, umode_t mode, enum rdt_group_type rtype, struct rdtgroup **r) { @@ -2735,7 +2742,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, uint files = 0; int ret; - prdtgrp = rdtgroup_kn_lock_live(prgrp_kn); + prdtgrp = rdtgroup_kn_lock_live(parent_kn); if (!prdtgrp) { ret = -ENODEV; goto out_unlock; @@ -2808,7 +2815,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, kernfs_activate(kn); /* - * The caller unlocks the prgrp_kn upon success. + * The caller unlocks the parent_kn upon success. */ return 0; @@ -2819,7 +2826,7 @@ out_destroy: out_free_rgrp: kfree(rdtgrp); out_unlock: - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -2836,15 +2843,12 @@ static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) * to monitor a subset of tasks and cpus in its parent ctrl_mon group. */ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, - struct kernfs_node *prgrp_kn, - const char *name, - umode_t mode) + const char *name, umode_t mode) { struct rdtgroup *rdtgrp, *prgrp; int ret; - ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTMON_GROUP, - &rdtgrp); + ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp); if (ret) return ret; @@ -2857,7 +2861,7 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, */ list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list); - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -2866,7 +2870,6 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, * to allocate and monitor resources. */ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, - struct kernfs_node *prgrp_kn, const char *name, umode_t mode) { struct rdtgroup *rdtgrp; @@ -2874,8 +2877,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, u32 closid; int ret; - ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTCTRL_GROUP, - &rdtgrp); + ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp); if (ret) return ret; @@ -2900,7 +2902,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, * Create an empty mon_groups directory to hold the subset * of tasks and cpus to monitor. */ - ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL); + ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL); if (ret) { rdt_last_cmd_puts("kernfs subdir error\n"); goto out_del_list; @@ -2916,7 +2918,7 @@ out_id_free: out_common_fail: mkdir_rdt_prepare_clean(rdtgrp); out_unlock: - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -2949,14 +2951,14 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, * subdirectory */ if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn) - return rdtgroup_mkdir_ctrl_mon(parent_kn, parent_kn, name, mode); + return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode); /* * If RDT monitoring is supported and the parent directory is a valid * "mon_groups" directory, add a monitoring subdirectory. */ if (rdt_mon_capable && is_mon_groups(parent_kn, name)) - return rdtgroup_mkdir_mon(parent_kn, parent_kn->parent, name, mode); + return rdtgroup_mkdir_mon(parent_kn, name, mode); return -EPERM; } @@ -3042,13 +3044,13 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp, closid_free(rdtgrp->closid); free_rmid(rdtgrp->mon.rmid); + rdtgroup_ctrl_remove(kn, rdtgrp); + /* * Free all the child monitor group rmids. */ free_all_child_rdtgrp(rdtgrp); - rdtgroup_ctrl_remove(kn, rdtgrp); - return 0; } diff --git a/arch/x86/kernel/crash_core_32.c b/arch/x86/kernel/crash_core_32.c new file mode 100644 index 000000000000..c0159a7bca6d --- /dev/null +++ b/arch/x86/kernel/crash_core_32.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/crash_core.h> + +#include <asm/pgtable.h> +#include <asm/setup.h> + +void arch_crash_save_vmcoreinfo(void) +{ +#ifdef CONFIG_NUMA + VMCOREINFO_SYMBOL(node_data); + VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); +#endif +#ifdef CONFIG_X86_PAE + VMCOREINFO_CONFIG(X86_PAE); +#endif +} diff --git a/arch/x86/kernel/crash_core_64.c b/arch/x86/kernel/crash_core_64.c new file mode 100644 index 000000000000..845a57eb4eb7 --- /dev/null +++ b/arch/x86/kernel/crash_core_64.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/crash_core.h> + +#include <asm/pgtable.h> +#include <asm/setup.h> + +void arch_crash_save_vmcoreinfo(void) +{ + u64 sme_mask = sme_me_mask; + + VMCOREINFO_NUMBER(phys_base); + VMCOREINFO_SYMBOL(init_top_pgt); + vmcoreinfo_append_str("NUMBER(pgtable_l5_enabled)=%d\n", + pgtable_l5_enabled()); + +#ifdef CONFIG_NUMA + VMCOREINFO_SYMBOL(node_data); + VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); +#endif + vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); + VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE); + VMCOREINFO_NUMBER(sme_mask); +} diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 7b45e8daad22..02bddfc122a4 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -250,15 +250,3 @@ void machine_kexec(struct kimage *image) __ftrace_enabled_restore(save_ftrace_enabled); } - -void arch_crash_save_vmcoreinfo(void) -{ -#ifdef CONFIG_NUMA - VMCOREINFO_SYMBOL(node_data); - VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); -#endif -#ifdef CONFIG_X86_PAE - VMCOREINFO_CONFIG(X86_PAE); -#endif -} - diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 16e125a50b33..ad5cdd6a5f23 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -398,25 +398,6 @@ void machine_kexec(struct kimage *image) __ftrace_enabled_restore(save_ftrace_enabled); } -void arch_crash_save_vmcoreinfo(void) -{ - u64 sme_mask = sme_me_mask; - - VMCOREINFO_NUMBER(phys_base); - VMCOREINFO_SYMBOL(init_top_pgt); - vmcoreinfo_append_str("NUMBER(pgtable_l5_enabled)=%d\n", - pgtable_l5_enabled()); - -#ifdef CONFIG_NUMA - VMCOREINFO_SYMBOL(node_data); - VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); -#endif - vmcoreinfo_append_str("KERNELOFFSET=%lx\n", - kaslr_offset()); - VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE); - VMCOREINFO_NUMBER(sme_mask); -} - /* arch-dependent functionality related to kexec file-based syscall */ #ifdef CONFIG_KEXEC_FILE diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 1e4c20a1efec..a74262c71484 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -893,8 +893,6 @@ void __init setup_arch(char **cmdline_p) init_mm.end_data = (unsigned long) _edata; init_mm.brk = _brk_end; - mpx_mm_init(&init_mm); - code_resource.start = __pa_symbol(_text); code_resource.end = __pa_symbol(_etext)-1; rodata_resource.start = __pa_symbol(__start_rodata); diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index f7476ce23b6e..ca3c11a17b5a 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -22,7 +22,6 @@ #include <asm/elf.h> #include <asm/ia32.h> #include <asm/syscalls.h> -#include <asm/mpx.h> /* * Align a virtual address to avoid aliasing in the I$ on AMD F15h. @@ -137,10 +136,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, struct vm_unmapped_area_info info; unsigned long begin, end; - addr = mpx_unmapped_area_check(addr, len, flags); - if (IS_ERR_VALUE(addr)) - return addr; - if (flags & MAP_FIXED) return addr; @@ -180,10 +175,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, unsigned long addr = addr0; struct vm_unmapped_area_info info; - addr = mpx_unmapped_area_check(addr, len, flags); - if (IS_ERR_VALUE(addr)) - return addr; - /* requested length too big for entire address space */ if (len > TASK_SIZE) return -ENOMEM; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 9e6f822922a3..6ef00eb6fbb9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -52,8 +52,6 @@ #include <asm/mach_traps.h> #include <asm/alternative.h> #include <asm/fpu/xstate.h> -#include <asm/trace/mpx.h> -#include <asm/mpx.h> #include <asm/vm86.h> #include <asm/umip.h> #include <asm/insn.h> @@ -436,8 +434,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) { - const struct mpx_bndcsr *bndcsr; - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); if (notify_die(DIE_TRAP, "bounds", regs, error_code, X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) @@ -447,76 +443,6 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) if (!user_mode(regs)) die("bounds", regs, error_code); - if (!cpu_feature_enabled(X86_FEATURE_MPX)) { - /* The exception is not from Intel MPX */ - goto exit_trap; - } - - /* - * We need to look at BNDSTATUS to resolve this exception. - * A NULL here might mean that it is in its 'init state', - * which is all zeros which indicates MPX was not - * responsible for the exception. - */ - bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR); - if (!bndcsr) - goto exit_trap; - - trace_bounds_exception_mpx(bndcsr); - /* - * The error code field of the BNDSTATUS register communicates status - * information of a bound range exception #BR or operation involving - * bound directory. - */ - switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) { - case 2: /* Bound directory has invalid entry. */ - if (mpx_handle_bd_fault()) - goto exit_trap; - break; /* Success, it was handled */ - case 1: /* Bound violation. */ - { - struct task_struct *tsk = current; - struct mpx_fault_info mpx; - - if (mpx_fault_info(&mpx, regs)) { - /* - * We failed to decode the MPX instruction. Act as if - * the exception was not caused by MPX. - */ - goto exit_trap; - } - /* - * Success, we decoded the instruction and retrieved - * an 'mpx' containing the address being accessed - * which caused the exception. This information - * allows and application to possibly handle the - * #BR exception itself. - */ - if (!do_trap_no_signal(tsk, X86_TRAP_BR, "bounds", regs, - error_code)) - break; - - show_signal(tsk, SIGSEGV, "trap ", "bounds", regs, error_code); - - force_sig_bnderr(mpx.addr, mpx.lower, mpx.upper); - break; - } - case 0: /* No exception caused by Intel MPX operations. */ - goto exit_trap; - default: - die("bounds", regs, error_code); - } - - return; - -exit_trap: - /* - * This path out is for all the cases where we could not - * handle the exception in some way (like allocating a - * table or telling userspace about it. We will also end - * up here if the kernel has MPX turned off at compile - * time.. - */ do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL); } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index cf55629ff0ff..b1c469446b07 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -62,7 +62,7 @@ u64 kvm_supported_xcr0(void) return xcr0; } -#define F(x) bit(X86_FEATURE_##x) +#define F feature_bit int kvm_update_cpuid(struct kvm_vcpu *vcpu) { @@ -281,8 +281,9 @@ out: return r; } -static void cpuid_mask(u32 *word, int wordnum) +static __always_inline void cpuid_mask(u32 *word, int wordnum) { + reverse_cpuid_check(wordnum); *word &= boot_cpu_data.x86_capability[wordnum]; } @@ -352,6 +353,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0; unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0; unsigned f_la57; + unsigned f_pku = kvm_x86_ops->pku_supported() ? F(PKU) : 0; /* cpuid 7.0.ebx */ const u32 kvm_cpuid_7_0_ebx_x86_features = @@ -363,7 +365,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) /* cpuid 7.0.ecx*/ const u32 kvm_cpuid_7_0_ecx_x86_features = - F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) | + F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/; @@ -392,6 +394,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) /* Set LA57 based on hardware capability. */ entry->ecx |= f_la57; entry->ecx |= f_umip; + entry->ecx |= f_pku; /* PKU is not yet implemented for shadow paging. */ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index d78a61408243..7366c618aa04 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -53,15 +53,46 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX}, [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, + [CPUID_7_1_EAX] = { 7, 1, CPUID_EAX}, }; -static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) +/* + * Reverse CPUID and its derivatives can only be used for hardware-defined + * feature words, i.e. words whose bits directly correspond to a CPUID leaf. + * Retrieving a feature bit or masking guest CPUID from a Linux-defined word + * is nonsensical as the bit number/mask is an arbitrary software-defined value + * and can't be used by KVM to query/control guest capabilities. And obviously + * the leaf being queried must have an entry in the lookup table. + */ +static __always_inline void reverse_cpuid_check(unsigned x86_leaf) { - unsigned x86_leaf = x86_feature / 32; - + BUILD_BUG_ON(x86_leaf == CPUID_LNX_1); + BUILD_BUG_ON(x86_leaf == CPUID_LNX_2); + BUILD_BUG_ON(x86_leaf == CPUID_LNX_3); + BUILD_BUG_ON(x86_leaf == CPUID_LNX_4); BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid)); BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0); +} + +/* + * Retrieve the bit mask from an X86_FEATURE_* definition. Features contain + * the hardware defined bit number (stored in bits 4:0) and a software defined + * "word" (stored in bits 31:5). The word is used to index into arrays of + * bit masks that hold the per-cpu feature capabilities, e.g. this_cpu_has(). + */ +static __always_inline u32 __feature_bit(int x86_feature) +{ + reverse_cpuid_check(x86_feature / 32); + return 1 << (x86_feature & 31); +} +#define feature_bit(name) __feature_bit(X86_FEATURE_##name) + +static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) +{ + unsigned x86_leaf = x86_feature / 32; + + reverse_cpuid_check(x86_leaf); return reverse_cpuid[x86_leaf]; } @@ -93,15 +124,11 @@ static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu, unsigned x86_ { int *reg; - if (x86_feature == X86_FEATURE_XSAVE && - !static_cpu_has(X86_FEATURE_XSAVE)) - return false; - reg = guest_cpuid_get_register(vcpu, x86_feature); if (!reg) return false; - return *reg & bit(x86_feature); + return *reg & __feature_bit(x86_feature); } static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu, unsigned x86_feature) @@ -110,7 +137,7 @@ static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu, unsigned x8 reg = guest_cpuid_get_register(vcpu, x86_feature); if (reg) - *reg &= ~bit(x86_feature); + *reg &= ~__feature_bit(x86_feature); } static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 952d1a4f4d7e..ddbc61984227 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -22,6 +22,7 @@ #include "kvm_cache_regs.h" #include <asm/kvm_emulate.h> #include <linux/stringify.h> +#include <asm/fpu/api.h> #include <asm/debugreg.h> #include <asm/nospec-branch.h> @@ -310,7 +311,9 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) #define ON64(x) #endif -static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); +typedef void (*fastop_t)(struct fastop *); + +static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); #define __FOP_FUNC(name) \ ".align " __stringify(FASTOP_SIZE) " \n\t" \ @@ -1075,8 +1078,23 @@ static void fetch_register_operand(struct operand *op) } } -static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) +static void emulator_get_fpu(void) +{ + fpregs_lock(); + + fpregs_assert_state_consistent(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_return(); +} + +static void emulator_put_fpu(void) { + fpregs_unlock(); +} + +static void read_sse_reg(sse128_t *data, int reg) +{ + emulator_get_fpu(); switch (reg) { case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break; case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break; @@ -1098,11 +1116,12 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) #endif default: BUG(); } + emulator_put_fpu(); } -static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, - int reg) +static void write_sse_reg(sse128_t *data, int reg) { + emulator_get_fpu(); switch (reg) { case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break; case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break; @@ -1124,10 +1143,12 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, #endif default: BUG(); } + emulator_put_fpu(); } -static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) +static void read_mmx_reg(u64 *data, int reg) { + emulator_get_fpu(); switch (reg) { case 0: asm("movq %%mm0, %0" : "=m"(*data)); break; case 1: asm("movq %%mm1, %0" : "=m"(*data)); break; @@ -1139,10 +1160,12 @@ static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) case 7: asm("movq %%mm7, %0" : "=m"(*data)); break; default: BUG(); } + emulator_put_fpu(); } -static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) +static void write_mmx_reg(u64 *data, int reg) { + emulator_get_fpu(); switch (reg) { case 0: asm("movq %0, %%mm0" : : "m"(*data)); break; case 1: asm("movq %0, %%mm1" : : "m"(*data)); break; @@ -1154,6 +1177,7 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) case 7: asm("movq %0, %%mm7" : : "m"(*data)); break; default: BUG(); } + emulator_put_fpu(); } static int em_fninit(struct x86_emulate_ctxt *ctxt) @@ -1161,7 +1185,9 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); + emulator_get_fpu(); asm volatile("fninit"); + emulator_put_fpu(); return X86EMUL_CONTINUE; } @@ -1172,7 +1198,9 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); + emulator_get_fpu(); asm volatile("fnstcw %0": "+m"(fcw)); + emulator_put_fpu(); ctxt->dst.val = fcw; @@ -1186,7 +1214,9 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); + emulator_get_fpu(); asm volatile("fnstsw %0": "+m"(fsw)); + emulator_put_fpu(); ctxt->dst.val = fsw; @@ -1205,7 +1235,7 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, op->type = OP_XMM; op->bytes = 16; op->addr.xmm = reg; - read_sse_reg(ctxt, &op->vec_val, reg); + read_sse_reg(&op->vec_val, reg); return; } if (ctxt->d & Mmx) { @@ -1256,7 +1286,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, op->type = OP_XMM; op->bytes = 16; op->addr.xmm = ctxt->modrm_rm; - read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm); + read_sse_reg(&op->vec_val, ctxt->modrm_rm); return rc; } if (ctxt->d & Mmx) { @@ -1833,10 +1863,10 @@ static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) op->bytes * op->count); break; case OP_XMM: - write_sse_reg(ctxt, &op->vec_val, op->addr.xmm); + write_sse_reg(&op->vec_val, op->addr.xmm); break; case OP_MM: - write_mmx_reg(ctxt, &op->mm_val, op->addr.mm); + write_mmx_reg(&op->mm_val, op->addr.mm); break; case OP_NONE: /* no writeback */ @@ -2348,12 +2378,7 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt) static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) { #ifdef CONFIG_X86_64 - u32 eax, ebx, ecx, edx; - - eax = 0x80000001; - ecx = 0; - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); - return edx & bit(X86_FEATURE_LM); + return ctxt->ops->guest_has_long_mode(ctxt); #else return false; #endif @@ -3618,18 +3643,11 @@ static int em_mov(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -#define FFL(x) bit(X86_FEATURE_##x) - static int em_movbe(struct x86_emulate_ctxt *ctxt) { - u32 ebx, ecx, edx, eax = 1; u16 tmp; - /* - * Check MOVBE is set in the guest-visible CPUID leaf. - */ - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); - if (!(ecx & FFL(MOVBE))) + if (!ctxt->ops->guest_has_movbe(ctxt)) return emulate_ud(ctxt); switch (ctxt->op_bytes) { @@ -4027,10 +4045,7 @@ static int em_movsxd(struct x86_emulate_ctxt *ctxt) static int check_fxsr(struct x86_emulate_ctxt *ctxt) { - u32 eax = 1, ebx, ecx = 0, edx; - - ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); - if (!(edx & FFL(FXSR))) + if (!ctxt->ops->guest_has_fxsr(ctxt)) return emulate_ud(ctxt); if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) @@ -4092,8 +4107,12 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; + emulator_get_fpu(); + rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); + emulator_put_fpu(); + if (rc != X86EMUL_CONTINUE) return rc; @@ -4136,6 +4155,8 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; + emulator_get_fpu(); + if (size < __fxstate_size(16)) { rc = fxregs_fixup(&fx_state, size); if (rc != X86EMUL_CONTINUE) @@ -4151,6 +4172,8 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state)); out: + emulator_put_fpu(); + return rc; } @@ -5210,16 +5233,28 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->ad_bytes = def_ad_bytes ^ 6; break; case 0x26: /* ES override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_ES; + break; case 0x2e: /* CS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_CS; + break; case 0x36: /* SS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_SS; + break; case 0x3e: /* DS override */ has_seg_override = true; - ctxt->seg_override = (ctxt->b >> 3) & 3; + ctxt->seg_override = VCPU_SREG_DS; break; case 0x64: /* FS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_FS; + break; case 0x65: /* GS override */ has_seg_override = true; - ctxt->seg_override = ctxt->b & 7; + ctxt->seg_override = VCPU_SREG_GS; break; case 0x40 ... 0x4f: /* REX */ if (mode != X86EMUL_MODE_PROT64) @@ -5303,10 +5338,15 @@ done_prefixes: } break; case Escape: - if (ctxt->modrm > 0xbf) - opcode = opcode.u.esc->high[ctxt->modrm - 0xc0]; - else + if (ctxt->modrm > 0xbf) { + size_t size = ARRAY_SIZE(opcode.u.esc->high); + u32 index = array_index_nospec( + ctxt->modrm - 0xc0, size); + + opcode = opcode.u.esc->high[index]; + } else { opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7]; + } break; case InstrDual: if ((ctxt->modrm >> 6) == 3) @@ -5448,7 +5488,9 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt) { int rc; + emulator_get_fpu(); rc = asm_safe("fwait"); + emulator_put_fpu(); if (unlikely(rc != X86EMUL_CONTINUE)) return emulate_exception(ctxt, MF_VECTOR, 0, false); @@ -5456,14 +5498,13 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, - struct operand *op) +static void fetch_possible_mmx_operand(struct operand *op) { if (op->type == OP_MM) - read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); + read_mmx_reg(&op->mm_val, op->addr.mm); } -static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) +static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop) { ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; @@ -5539,10 +5580,10 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) * Now that we know the fpu is exception safe, we can fetch * operands from it. */ - fetch_possible_mmx_operand(ctxt, &ctxt->src); - fetch_possible_mmx_operand(ctxt, &ctxt->src2); + fetch_possible_mmx_operand(&ctxt->src); + fetch_possible_mmx_operand(&ctxt->src2); if (!(ctxt->d & Mov)) - fetch_possible_mmx_operand(ctxt, &ctxt->dst); + fetch_possible_mmx_operand(&ctxt->dst); } if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) { @@ -5641,14 +5682,10 @@ special_insn: ctxt->eflags &= ~X86_EFLAGS_RF; if (ctxt->execute) { - if (ctxt->d & Fastop) { - void (*fop)(struct fastop *) = (void *)ctxt->execute; - rc = fastop(ctxt, fop); - if (rc != X86EMUL_CONTINUE) - goto done; - goto writeback; - } - rc = ctxt->execute(ctxt); + if (ctxt->d & Fastop) + rc = fastop(ctxt, (fastop_t)ctxt->execute); + else + rc = ctxt->execute(ctxt); if (rc != X86EMUL_CONTINUE) goto done; goto writeback; diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 23ff65504d7e..4df1c965bf1a 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -33,6 +33,7 @@ #include <trace/events/kvm.h> #include "trace.h" +#include "irq.h" #define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64) @@ -809,11 +810,12 @@ static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, u32 index, u64 *pdata) { struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + size_t size = ARRAY_SIZE(hv->hv_crash_param); - if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) + if (WARN_ON_ONCE(index >= size)) return -EINVAL; - *pdata = hv->hv_crash_param[index]; + *pdata = hv->hv_crash_param[array_index_nospec(index, size)]; return 0; } @@ -852,11 +854,12 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 index, u64 data) { struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + size_t size = ARRAY_SIZE(hv->hv_crash_param); - if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) + if (WARN_ON_ONCE(index >= size)) return -EINVAL; - hv->hv_crash_param[index] = data; + hv->hv_crash_param[array_index_nospec(index, size)] = data; return 0; } @@ -1058,7 +1061,7 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, return 1; break; default: - vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", + vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n", msr, data); return 1; } @@ -1121,7 +1124,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) return 1; /* - * Clear apic_assist portion of f(struct hv_vp_assist_page + * Clear apic_assist portion of struct hv_vp_assist_page * only, there can be valuable data in the rest which needs * to be preserved e.g. on migration. */ @@ -1178,7 +1181,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) return 1; break; default: - vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", + vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n", msr, data); return 1; } diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 8b38bb4868a6..629a09ca9860 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -460,10 +460,14 @@ static int picdev_write(struct kvm_pic *s, switch (addr) { case 0x20: case 0x21: + pic_lock(s); + pic_ioport_write(&s->pics[0], addr, data); + pic_unlock(s); + break; case 0xa0: case 0xa1: pic_lock(s); - pic_ioport_write(&s->pics[addr >> 7], addr, data); + pic_ioport_write(&s->pics[1], addr, data); pic_unlock(s); break; case 0x4d0: diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 9fd2dd89a1c5..26aa22cb9b29 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -36,6 +36,7 @@ #include <linux/io.h> #include <linux/slab.h> #include <linux/export.h> +#include <linux/nospec.h> #include <asm/processor.h> #include <asm/page.h> #include <asm/current.h> @@ -68,13 +69,14 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, default: { u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; - u64 redir_content; + u64 redir_content = ~0ULL; - if (redir_index < IOAPIC_NUM_PINS) - redir_content = - ioapic->redirtbl[redir_index].bits; - else - redir_content = ~0ULL; + if (redir_index < IOAPIC_NUM_PINS) { + u32 index = array_index_nospec( + redir_index, IOAPIC_NUM_PINS); + + redir_content = ioapic->redirtbl[index].bits; + } result = (ioapic->ioregsel & 0x1) ? (redir_content >> 32) & 0xffffffff : @@ -108,8 +110,9 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) union kvm_ioapic_redirect_entry *e; e = &ioapic->redirtbl[RTC_GSI]; - if (!kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, - e->fields.dest_mode)) + if (!kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, + e->fields.dest_id, + kvm_lapic_irq_dest_mode(!!e->fields.dest_mode))) return; new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); @@ -188,7 +191,7 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, /* * Return 0 for coalesced interrupts; for edge-triggered interrupts, * this only happens if a previous edge has not been delivered due - * do masking. For level interrupts, the remote_irr field tells + * to masking. For level interrupts, the remote_irr field tells * us if the interrupt is waiting for an EOI. * * RTC is special: it is edge-triggered, but userspace likes to know @@ -250,8 +253,10 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) || index == RTC_GSI) { - if (kvm_apic_match_dest(vcpu, NULL, 0, - e->fields.dest_id, e->fields.dest_mode) || + u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode); + + if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, + e->fields.dest_id, dm) || kvm_apic_pending_eoi(vcpu, e->fields.vector)) __set_bit(e->fields.vector, ioapic_handled_vectors); @@ -292,6 +297,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) if (index >= IOAPIC_NUM_PINS) return; + index = array_index_nospec(index, IOAPIC_NUM_PINS); e = &ioapic->redirtbl[index]; mask_before = e->fields.mask; /* Preserve read-only fields */ @@ -327,11 +333,12 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) if (e->fields.delivery_mode == APIC_DM_FIXED) { struct kvm_lapic_irq irq; - irq.shorthand = 0; + irq.shorthand = APIC_DEST_NOSHORT; irq.vector = e->fields.vector; irq.delivery_mode = e->fields.delivery_mode << 8; irq.dest_id = e->fields.dest_id; - irq.dest_mode = e->fields.dest_mode; + irq.dest_mode = + kvm_lapic_irq_dest_mode(!!e->fields.dest_mode); bitmap_zero(&vcpu_bitmap, 16); kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq, &vcpu_bitmap); @@ -343,7 +350,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) * keep ioapic_handled_vectors synchronized. */ irq.dest_id = old_dest_id; - irq.dest_mode = old_dest_mode; + irq.dest_mode = + kvm_lapic_irq_dest_mode( + !!e->fields.dest_mode); kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq, &vcpu_bitmap); } @@ -369,11 +378,11 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) irqe.dest_id = entry->fields.dest_id; irqe.vector = entry->fields.vector; - irqe.dest_mode = entry->fields.dest_mode; + irqe.dest_mode = kvm_lapic_irq_dest_mode(!!entry->fields.dest_mode); irqe.trig_mode = entry->fields.trig_mode; irqe.delivery_mode = entry->fields.delivery_mode << 8; irqe.level = 1; - irqe.shorthand = 0; + irqe.shorthand = APIC_DEST_NOSHORT; irqe.msi_redir_hint = false; if (irqe.trig_mode == IOAPIC_EDGE_TRIG) diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index ea1a4e0297da..2fb2e3c80724 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -116,9 +116,6 @@ static inline int ioapic_in_kernel(struct kvm *kvm) } void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); -bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, - int short_hand, unsigned int dest, int dest_mode); -int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode); int kvm_ioapic_init(struct kvm *kvm); @@ -126,9 +123,6 @@ void kvm_ioapic_destroy(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, int level, bool line_status); void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); -int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, - struct dest_map *dest_map); void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 7c6233d37c64..f173ab6b407e 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -113,5 +113,8 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu); int kvm_setup_default_irq_routing(struct kvm *kvm); int kvm_setup_empty_irq_routing(struct kvm *kvm); +int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, + struct dest_map *dest_map); #endif diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 8ecd48d31800..79afa0bb5f41 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -52,15 +52,15 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; unsigned int dest_vcpus = 0; - if (irq->dest_mode == 0 && irq->dest_id == 0xff && - kvm_lowest_prio_delivery(irq)) { + if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) + return r; + + if (irq->dest_mode == APIC_DEST_PHYSICAL && + irq->dest_id == 0xff && kvm_lowest_prio_delivery(irq)) { printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); irq->delivery_mode = APIC_DM_FIXED; } - if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) - return r; - memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap)); kvm_for_each_vcpu(i, vcpu, kvm) { @@ -114,13 +114,14 @@ void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, irq->dest_id |= MSI_ADDR_EXT_DEST_ID(e->msi.address_hi); irq->vector = (e->msi.data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; - irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo; + irq->dest_mode = kvm_lapic_irq_dest_mode( + !!((1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo)); irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data; irq->delivery_mode = e->msi.data & 0x700; irq->msi_redir_hint = ((e->msi.address_lo & MSI_ADDR_REDIRECTION_LOWPRI) > 0); irq->level = 1; - irq->shorthand = 0; + irq->shorthand = APIC_DEST_NOSHORT; } EXPORT_SYMBOL_GPL(kvm_set_msi_irq); @@ -416,7 +417,8 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, kvm_set_msi_irq(vcpu->kvm, entry, &irq); - if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0, + if (irq.level && + kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, irq.dest_id, irq.dest_mode)) __set_bit(irq.vector, ioapic_handled_vectors); } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index cf9177b4a07f..cce1e6b204c8 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -56,9 +56,6 @@ #define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16)) #define LAPIC_MMIO_LENGTH (1 << 12) /* followed define is not in apicdef.h */ -#define APIC_SHORT_MASK 0xc0000 -#define APIC_DEST_NOSHORT 0x0 -#define APIC_DEST_MASK 0x800 #define MAX_APIC_VECTOR 256 #define APIC_VECTORS_PER_REG 32 @@ -792,13 +789,13 @@ static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id, } bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, - int short_hand, unsigned int dest, int dest_mode) + int shorthand, unsigned int dest, int dest_mode) { struct kvm_lapic *target = vcpu->arch.apic; u32 mda = kvm_apic_mda(vcpu, dest, source, target); ASSERT(target); - switch (short_hand) { + switch (shorthand) { case APIC_DEST_NOSHORT: if (dest_mode == APIC_DEST_PHYSICAL) return kvm_apic_match_physical_addr(target, mda); @@ -967,12 +964,12 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, } /* - * This routine tries to handler interrupts in posted mode, here is how + * This routine tries to handle interrupts in posted mode, here is how * it deals with different cases: * - For single-destination interrupts, handle it in posted mode * - Else if vector hashing is enabled and it is a lowest-priority * interrupt, handle it in posted mode and use the following mechanism - * to find the destinaiton vCPU. + * to find the destination vCPU. * 1. For lowest-priority interrupts, store all the possible * destination vCPUs in an array. * 2. Use "guest vector % max number of destination vCPUs" to find @@ -1151,7 +1148,7 @@ void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, if (!kvm_apic_present(vcpu)) continue; if (!kvm_apic_match_dest(vcpu, NULL, - irq->delivery_mode, + irq->shorthand, irq->dest_id, irq->dest_mode)) continue; @@ -1574,9 +1571,9 @@ static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic) struct kvm_timer *ktimer = &apic->lapic_timer; kvm_apic_local_deliver(apic, APIC_LVTT); - if (apic_lvtt_tscdeadline(apic)) + if (apic_lvtt_tscdeadline(apic)) { ktimer->tscdeadline = 0; - if (apic_lvtt_oneshot(apic)) { + } else if (apic_lvtt_oneshot(apic)) { ktimer->tscdeadline = 0; ktimer->target_expiration = 0; } @@ -1963,15 +1960,20 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_LVTTHMR: case APIC_LVTPC: case APIC_LVT1: - case APIC_LVTERR: + case APIC_LVTERR: { /* TODO: Check vector */ + size_t size; + u32 index; + if (!kvm_apic_sw_enabled(apic)) val |= APIC_LVT_MASKED; - - val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; + size = ARRAY_SIZE(apic_lvt_mask); + index = array_index_nospec( + (reg - APIC_LVTT) >> 4, size); + val &= apic_lvt_mask[index]; kvm_lapic_set_reg(apic, reg, val); - break; + } case APIC_LVTT: if (!kvm_apic_sw_enabled(apic)) @@ -2373,14 +2375,13 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) { u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0); - int r = 0; if (!kvm_apic_hw_enabled(vcpu->arch.apic)) - r = 1; + return 1; if ((lvt0 & APIC_LVT_MASKED) == 0 && GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) - r = 1; - return r; + return 1; + return 0; } void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 39925afdfcdc..ec730ce7a344 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -10,8 +10,9 @@ #define KVM_APIC_SIPI 1 #define KVM_APIC_LVT_NUM 6 -#define KVM_APIC_SHORT_MASK 0xc0000 -#define KVM_APIC_DEST_MASK 0x800 +#define APIC_SHORT_MASK 0xc0000 +#define APIC_DEST_NOSHORT 0x0 +#define APIC_DEST_MASK 0x800 #define APIC_BUS_CYCLE_NS 1 #define APIC_BUS_FREQUENCY (1000000000ULL / APIC_BUS_CYCLE_NS) @@ -82,8 +83,8 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val); int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, void *data); bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, - int short_hand, unsigned int dest, int dest_mode); - + int shorthand, unsigned int dest, int dest_mode); +int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr); bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr); void kvm_apic_update_ppr(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index a32b847a8089..adc84f0f16ba 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -418,22 +418,24 @@ static inline bool is_access_track_spte(u64 spte) * requires a full MMU zap). The flag is instead explicitly queried when * checking for MMIO spte cache hits. */ -#define MMIO_SPTE_GEN_MASK GENMASK_ULL(18, 0) +#define MMIO_SPTE_GEN_MASK GENMASK_ULL(17, 0) #define MMIO_SPTE_GEN_LOW_START 3 #define MMIO_SPTE_GEN_LOW_END 11 #define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \ MMIO_SPTE_GEN_LOW_START) -#define MMIO_SPTE_GEN_HIGH_START 52 -#define MMIO_SPTE_GEN_HIGH_END 61 +#define MMIO_SPTE_GEN_HIGH_START PT64_SECOND_AVAIL_BITS_SHIFT +#define MMIO_SPTE_GEN_HIGH_END 62 #define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \ MMIO_SPTE_GEN_HIGH_START) + static u64 generation_mmio_spte_mask(u64 gen) { u64 mask; WARN_ON(gen & ~MMIO_SPTE_GEN_MASK); + BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK); mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK; mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK; @@ -444,8 +446,6 @@ static u64 get_mmio_spte_generation(u64 spte) { u64 gen; - spte &= ~shadow_mmio_mask; - gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START; gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START; return gen; @@ -538,16 +538,20 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); static u8 kvm_get_shadow_phys_bits(void) { /* - * boot_cpu_data.x86_phys_bits is reduced when MKTME is detected - * in CPU detection code, but MKTME treats those reduced bits as - * 'keyID' thus they are not reserved bits. Therefore for MKTME - * we should still return physical address bits reported by CPUID. + * boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected + * in CPU detection code, but the processor treats those reduced bits as + * 'keyID' thus they are not reserved bits. Therefore KVM needs to look at + * the physical address bits reported by CPUID. */ - if (!boot_cpu_has(X86_FEATURE_TME) || - WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008)) - return boot_cpu_data.x86_phys_bits; + if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008)) + return cpuid_eax(0x80000008) & 0xff; - return cpuid_eax(0x80000008) & 0xff; + /* + * Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with + * custom CPUID. Proceed with whatever the kernel found since these features + * aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008). + */ + return boot_cpu_data.x86_phys_bits; } static void kvm_mmu_reset_all_pte_masks(void) @@ -1260,56 +1264,6 @@ static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) list_del(&sp->lpage_disallowed_link); } -static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, - struct kvm_memory_slot *slot) -{ - struct kvm_lpage_info *linfo; - - if (slot) { - linfo = lpage_info_slot(gfn, slot, level); - return !!linfo->disallow_lpage; - } - - return true; -} - -static bool mmu_gfn_lpage_is_disallowed(struct kvm_vcpu *vcpu, gfn_t gfn, - int level) -{ - struct kvm_memory_slot *slot; - - slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - return __mmu_gfn_lpage_is_disallowed(gfn, level, slot); -} - -static int host_mapping_level(struct kvm *kvm, gfn_t gfn) -{ - unsigned long page_size; - int i, ret = 0; - - page_size = kvm_host_page_size(kvm, gfn); - - for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { - if (page_size >= KVM_HPAGE_SIZE(i)) - ret = i; - else - break; - } - - return ret; -} - -static inline bool memslot_valid_for_gpte(struct kvm_memory_slot *slot, - bool no_dirty_log) -{ - if (!slot || slot->flags & KVM_MEMSLOT_INVALID) - return false; - if (no_dirty_log && slot->dirty_bitmap) - return false; - - return true; -} - static struct kvm_memory_slot * gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log) @@ -1317,40 +1271,14 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_memory_slot *slot; slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - if (!memslot_valid_for_gpte(slot, no_dirty_log)) - slot = NULL; + if (!slot || slot->flags & KVM_MEMSLOT_INVALID) + return NULL; + if (no_dirty_log && slot->dirty_bitmap) + return NULL; return slot; } -static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn, - bool *force_pt_level) -{ - int host_level, level, max_level; - struct kvm_memory_slot *slot; - - if (unlikely(*force_pt_level)) - return PT_PAGE_TABLE_LEVEL; - - slot = kvm_vcpu_gfn_to_memslot(vcpu, large_gfn); - *force_pt_level = !memslot_valid_for_gpte(slot, true); - if (unlikely(*force_pt_level)) - return PT_PAGE_TABLE_LEVEL; - - host_level = host_mapping_level(vcpu->kvm, large_gfn); - - if (host_level == PT_PAGE_TABLE_LEVEL) - return host_level; - - max_level = min(kvm_x86_ops->get_lpage_level(), host_level); - - for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) - if (__mmu_gfn_lpage_is_disallowed(large_gfn, level, slot)) - break; - - return level - 1; -} - /* * About rmap_head encoding: * @@ -1410,7 +1338,7 @@ pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head, if (j != 0) return; if (!prev_desc && !desc->more) - rmap_head->val = (unsigned long)desc->sptes[0]; + rmap_head->val = 0; else if (prev_desc) prev_desc->more = desc->more; @@ -1525,7 +1453,7 @@ struct rmap_iterator { /* * Iteration must be started by this function. This should also be used after * removing/dropping sptes from the rmap link because in such cases the - * information in the itererator may not be valid. + * information in the iterator may not be valid. * * Returns sptep if found, NULL otherwise. */ @@ -2899,6 +2827,26 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, return kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); } +static int make_mmu_pages_available(struct kvm_vcpu *vcpu) +{ + LIST_HEAD(invalid_list); + + if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES)) + return 0; + + while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) { + if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list)) + break; + + ++vcpu->kvm->stat.mmu_recycled; + } + kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); + + if (!kvm_mmu_available_pages(vcpu->kvm)) + return -ENOSPC; + return 0; +} + /* * Changing the number of mmu pages allocated to the vm * Note: if goal_nr_mmu_pages is too small, you will get dead lock @@ -3099,17 +3047,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= (u64)pfn << PAGE_SHIFT; if (pte_access & ACC_WRITE_MASK) { - - /* - * Other vcpu creates new sp in the window between - * mapping_level() and acquiring mmu-lock. We can - * allow guest to retry the access, the mapping can - * be fixed if guest refault. - */ - if (level > PT_PAGE_TABLE_LEVEL && - mmu_gfn_lpage_is_disallowed(vcpu, gfn, level)) - goto done; - spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; /* @@ -3141,7 +3078,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, set_pte: if (mmu_spte_update(sptep, spte)) ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH; -done: return ret; } @@ -3294,6 +3230,83 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) __direct_pte_prefetch(vcpu, sp, sptep); } +static int host_pfn_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn, + kvm_pfn_t pfn, struct kvm_memory_slot *slot) +{ + unsigned long hva; + pte_t *pte; + int level; + + BUILD_BUG_ON(PT_PAGE_TABLE_LEVEL != (int)PG_LEVEL_4K || + PT_DIRECTORY_LEVEL != (int)PG_LEVEL_2M || + PT_PDPE_LEVEL != (int)PG_LEVEL_1G); + + if (!PageCompound(pfn_to_page(pfn)) && !kvm_is_zone_device_pfn(pfn)) + return PT_PAGE_TABLE_LEVEL; + + /* + * Note, using the already-retrieved memslot and __gfn_to_hva_memslot() + * is not solely for performance, it's also necessary to avoid the + * "writable" check in __gfn_to_hva_many(), which will always fail on + * read-only memslots due to gfn_to_hva() assuming writes. Earlier + * page fault steps have already verified the guest isn't writing a + * read-only memslot. + */ + hva = __gfn_to_hva_memslot(slot, gfn); + + pte = lookup_address_in_mm(vcpu->kvm->mm, hva, &level); + if (unlikely(!pte)) + return PT_PAGE_TABLE_LEVEL; + + return level; +} + +static int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn, + int max_level, kvm_pfn_t *pfnp) +{ + struct kvm_memory_slot *slot; + struct kvm_lpage_info *linfo; + kvm_pfn_t pfn = *pfnp; + kvm_pfn_t mask; + int level; + + if (unlikely(max_level == PT_PAGE_TABLE_LEVEL)) + return PT_PAGE_TABLE_LEVEL; + + if (is_error_noslot_pfn(pfn) || kvm_is_reserved_pfn(pfn)) + return PT_PAGE_TABLE_LEVEL; + + slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, true); + if (!slot) + return PT_PAGE_TABLE_LEVEL; + + max_level = min(max_level, kvm_x86_ops->get_lpage_level()); + for ( ; max_level > PT_PAGE_TABLE_LEVEL; max_level--) { + linfo = lpage_info_slot(gfn, slot, max_level); + if (!linfo->disallow_lpage) + break; + } + + if (max_level == PT_PAGE_TABLE_LEVEL) + return PT_PAGE_TABLE_LEVEL; + + level = host_pfn_mapping_level(vcpu, gfn, pfn, slot); + if (level == PT_PAGE_TABLE_LEVEL) + return level; + + level = min(level, max_level); + + /* + * mmu_notifier_retry() was successful and mmu_lock is held, so + * the pmd can't be split from under us. + */ + mask = KVM_PAGES_PER_HPAGE(level) - 1; + VM_BUG_ON((gfn & mask) != (pfn & mask)); + *pfnp = pfn & ~mask; + + return level; +} + static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it, gfn_t gfn, kvm_pfn_t *pfnp, int *levelp) { @@ -3318,18 +3331,20 @@ static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it, } static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, - int map_writable, int level, kvm_pfn_t pfn, - bool prefault, bool lpage_disallowed) + int map_writable, int max_level, kvm_pfn_t pfn, + bool prefault, bool account_disallowed_nx_lpage) { struct kvm_shadow_walk_iterator it; struct kvm_mmu_page *sp; - int ret; + int level, ret; gfn_t gfn = gpa >> PAGE_SHIFT; gfn_t base_gfn = gfn; - if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) + if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa))) return RET_PF_RETRY; + level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, &pfn); + trace_kvm_mmu_spte_requested(gpa, level, pfn); for_each_shadow_entry(vcpu, gpa, it) { /* @@ -3348,7 +3363,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, it.level - 1, true, ACC_ALL); link_shadow_page(vcpu, it.sptep, sp); - if (lpage_disallowed) + if (account_disallowed_nx_lpage) account_huge_nx_page(vcpu->kvm, sp); } } @@ -3384,45 +3399,6 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) return -EFAULT; } -static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, - gfn_t gfn, kvm_pfn_t *pfnp, - int *levelp) -{ - kvm_pfn_t pfn = *pfnp; - int level = *levelp; - - /* - * Check if it's a transparent hugepage. If this would be an - * hugetlbfs page, level wouldn't be set to - * PT_PAGE_TABLE_LEVEL and there would be no adjustment done - * here. - */ - if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && - !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL && - PageTransCompoundMap(pfn_to_page(pfn)) && - !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) { - unsigned long mask; - /* - * mmu_notifier_retry was successful and we hold the - * mmu_lock here, so the pmd can't become splitting - * from under us, and in turn - * __split_huge_page_refcount() can't run from under - * us and we can safely transfer the refcount from - * PG_tail to PG_head as we switch the pfn to tail to - * head. - */ - *levelp = level = PT_DIRECTORY_LEVEL; - mask = KVM_PAGES_PER_HPAGE(level) - 1; - VM_BUG_ON((gfn & mask) != (pfn & mask)); - if (pfn & mask) { - kvm_release_pfn_clean(pfn); - pfn &= ~mask; - kvm_get_pfn(pfn); - *pfnp = pfn; - } - } -} - static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, kvm_pfn_t pfn, unsigned access, int *ret_val) { @@ -3528,7 +3504,7 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte) * - true: let the vcpu to access on the same address again. * - false: let the real page fault path to fix it. */ -static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, +static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code) { struct kvm_shadow_walk_iterator iterator; @@ -3537,9 +3513,6 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, u64 spte = 0ull; uint retry_count = 0; - if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) - return false; - if (!page_fault_can_be_fast(error_code)) return false; @@ -3548,9 +3521,8 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, do { u64 new_spte; - for_each_shadow_entry_lockless(vcpu, gva, iterator, spte) - if (!is_shadow_present_pte(spte) || - iterator.level < level) + for_each_shadow_entry_lockless(vcpu, cr2_or_gpa, iterator, spte) + if (!is_shadow_present_pte(spte)) break; sp = page_header(__pa(iterator.sptep)); @@ -3626,71 +3598,13 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, } while (true); - trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, + trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep, spte, fault_handled); walk_shadow_page_lockless_end(vcpu); return fault_handled; } -static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, - gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable); -static int make_mmu_pages_available(struct kvm_vcpu *vcpu); - -static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, - gfn_t gfn, bool prefault) -{ - int r; - int level; - bool force_pt_level; - kvm_pfn_t pfn; - unsigned long mmu_seq; - bool map_writable, write = error_code & PFERR_WRITE_MASK; - bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && - is_nx_huge_page_enabled(); - - force_pt_level = lpage_disallowed; - level = mapping_level(vcpu, gfn, &force_pt_level); - if (likely(!force_pt_level)) { - /* - * This path builds a PAE pagetable - so we can map - * 2mb pages at maximum. Therefore check if the level - * is larger than that. - */ - if (level > PT_DIRECTORY_LEVEL) - level = PT_DIRECTORY_LEVEL; - - gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); - } - - if (fast_page_fault(vcpu, v, level, error_code)) - return RET_PF_RETRY; - - mmu_seq = vcpu->kvm->mmu_notifier_seq; - smp_rmb(); - - if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable)) - return RET_PF_RETRY; - - if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) - return r; - - r = RET_PF_RETRY; - spin_lock(&vcpu->kvm->mmu_lock); - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) - goto out_unlock; - if (make_mmu_pages_available(vcpu) < 0) - goto out_unlock; - if (likely(!force_pt_level)) - transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); - r = __direct_map(vcpu, v, write, map_writable, level, pfn, - prefault, false); -out_unlock: - spin_unlock(&vcpu->kvm->mmu_lock); - kvm_release_pfn_clean(pfn); - return r; -} - static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, struct list_head *invalid_list) { @@ -3981,7 +3895,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots); -static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, +static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gpa_t vaddr, u32 access, struct x86_exception *exception) { if (exception) @@ -3989,7 +3903,7 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, return vaddr; } -static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, +static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gpa_t vaddr, u32 access, struct x86_exception *exception) { @@ -4001,20 +3915,14 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, static bool __is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level) { - int bit7 = (pte >> 7) & 1, low6 = pte & 0x3f; + int bit7 = (pte >> 7) & 1; - return (pte & rsvd_check->rsvd_bits_mask[bit7][level-1]) | - ((rsvd_check->bad_mt_xwr & (1ull << low6)) != 0); + return pte & rsvd_check->rsvd_bits_mask[bit7][level-1]; } -static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level) +static bool __is_bad_mt_xwr(struct rsvd_bits_validate *rsvd_check, u64 pte) { - return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level); -} - -static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level) -{ - return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level); + return rsvd_check->bad_mt_xwr & BIT_ULL(pte & 0x3f); } static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) @@ -4038,11 +3946,11 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { struct kvm_shadow_walk_iterator iterator; u64 sptes[PT64_ROOT_MAX_LEVEL], spte = 0ull; + struct rsvd_bits_validate *rsvd_check; int root, leaf; bool reserved = false; - if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) - goto exit; + rsvd_check = &vcpu->arch.mmu->shadow_zero_check; walk_shadow_page_lockless_begin(vcpu); @@ -4058,8 +3966,13 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) if (!is_shadow_present_pte(spte)) break; - reserved |= is_shadow_zero_bits_set(vcpu->arch.mmu, spte, - iterator.level); + /* + * Use a bitwise-OR instead of a logical-OR to aggregate the + * reserved bit and EPT's invalid memtype/XWR checks to avoid + * adding a Jcc in the loop. + */ + reserved |= __is_bad_mt_xwr(rsvd_check, spte) | + __is_rsvd_bits_set(rsvd_check, spte, iterator.level); } walk_shadow_page_lockless_end(vcpu); @@ -4073,7 +3986,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) root--; } } -exit: + *sptep = spte; return reserved; } @@ -4137,9 +4050,6 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) struct kvm_shadow_walk_iterator iterator; u64 spte; - if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) - return; - walk_shadow_page_lockless_begin(vcpu); for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { clear_sp_write_flooding_count(iterator.sptep); @@ -4149,29 +4059,8 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) walk_shadow_page_lockless_end(vcpu); } -static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, - u32 error_code, bool prefault) -{ - gfn_t gfn = gva >> PAGE_SHIFT; - int r; - - pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); - - if (page_fault_handle_page_track(vcpu, error_code, gfn)) - return RET_PF_EMULATE; - - r = mmu_topup_memory_caches(vcpu); - if (r) - return r; - - MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)); - - - return nonpaging_map(vcpu, gva & PAGE_MASK, - error_code, gfn, prefault); -} - -static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) +static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + gfn_t gfn) { struct kvm_arch_async_pf arch; @@ -4180,11 +4069,13 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) arch.direct_map = vcpu->arch.mmu->direct_map; arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu); - return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); + return kvm_setup_async_pf(vcpu, cr2_or_gpa, + kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, - gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable) + gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, + bool *writable) { struct kvm_memory_slot *slot; bool async; @@ -4204,12 +4095,12 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, return false; /* *pfn has correct page already */ if (!prefault && kvm_can_do_async_pf(vcpu)) { - trace_kvm_try_async_get_page(gva, gfn); + trace_kvm_try_async_get_page(cr2_or_gpa, gfn); if (kvm_find_async_pf_gfn(vcpu, gfn)) { - trace_kvm_async_pf_doublefault(gva, gfn); + trace_kvm_async_pf_doublefault(cr2_or_gpa, gfn); kvm_make_request(KVM_REQ_APF_HALT, vcpu); return true; - } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn)) + } else if (kvm_arch_setup_async_pf(vcpu, cr2_or_gpa, gfn)) return true; } @@ -4217,11 +4108,77 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, return false; } +static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, + bool prefault, int max_level, bool is_tdp) +{ + bool write = error_code & PFERR_WRITE_MASK; + bool exec = error_code & PFERR_FETCH_MASK; + bool lpage_disallowed = exec && is_nx_huge_page_enabled(); + bool map_writable; + + gfn_t gfn = gpa >> PAGE_SHIFT; + unsigned long mmu_seq; + kvm_pfn_t pfn; + int r; + + if (page_fault_handle_page_track(vcpu, error_code, gfn)) + return RET_PF_EMULATE; + + r = mmu_topup_memory_caches(vcpu); + if (r) + return r; + + if (lpage_disallowed) + max_level = PT_PAGE_TABLE_LEVEL; + + if (fast_page_fault(vcpu, gpa, error_code)) + return RET_PF_RETRY; + + mmu_seq = vcpu->kvm->mmu_notifier_seq; + smp_rmb(); + + if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable)) + return RET_PF_RETRY; + + if (handle_abnormal_pfn(vcpu, is_tdp ? 0 : gpa, gfn, pfn, ACC_ALL, &r)) + return r; + + r = RET_PF_RETRY; + spin_lock(&vcpu->kvm->mmu_lock); + if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) + goto out_unlock; + if (make_mmu_pages_available(vcpu) < 0) + goto out_unlock; + r = __direct_map(vcpu, gpa, write, map_writable, max_level, pfn, + prefault, is_tdp && lpage_disallowed); + +out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); + return r; +} + +static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, + u32 error_code, bool prefault) +{ + pgprintk("%s: gva %lx error %x\n", __func__, gpa, error_code); + + /* This path builds a PAE pagetable, we can map 2mb pages at maximum. */ + return direct_page_fault(vcpu, gpa & PAGE_MASK, error_code, prefault, + PT_DIRECTORY_LEVEL, false); +} + int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, u64 fault_address, char *insn, int insn_len) { int r = 1; +#ifndef CONFIG_X86_64 + /* A 64-bit CR2 should be impossible on 32-bit KVM. */ + if (WARN_ON_ONCE(fault_address >> 32)) + return -EFAULT; +#endif + vcpu->arch.l1tf_flush_l1d = true; switch (vcpu->arch.apf.host_apf_reason) { default: @@ -4249,76 +4206,23 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, } EXPORT_SYMBOL_GPL(kvm_handle_page_fault); -static bool -check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level) -{ - int page_num = KVM_PAGES_PER_HPAGE(level); - - gfn &= ~(page_num - 1); - - return kvm_mtrr_check_gfn_range_consistency(vcpu, gfn, page_num); -} - -static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, +static int tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, bool prefault) { - kvm_pfn_t pfn; - int r; - int level; - bool force_pt_level; - gfn_t gfn = gpa >> PAGE_SHIFT; - unsigned long mmu_seq; - int write = error_code & PFERR_WRITE_MASK; - bool map_writable; - bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && - is_nx_huge_page_enabled(); - - MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)); + int max_level; - if (page_fault_handle_page_track(vcpu, error_code, gfn)) - return RET_PF_EMULATE; + for (max_level = PT_MAX_HUGEPAGE_LEVEL; + max_level > PT_PAGE_TABLE_LEVEL; + max_level--) { + int page_num = KVM_PAGES_PER_HPAGE(max_level); + gfn_t base = (gpa >> PAGE_SHIFT) & ~(page_num - 1); - r = mmu_topup_memory_caches(vcpu); - if (r) - return r; - - force_pt_level = - lpage_disallowed || - !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL); - level = mapping_level(vcpu, gfn, &force_pt_level); - if (likely(!force_pt_level)) { - if (level > PT_DIRECTORY_LEVEL && - !check_hugepage_cache_consistency(vcpu, gfn, level)) - level = PT_DIRECTORY_LEVEL; - gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); + if (kvm_mtrr_check_gfn_range_consistency(vcpu, base, page_num)) + break; } - if (fast_page_fault(vcpu, gpa, level, error_code)) - return RET_PF_RETRY; - - mmu_seq = vcpu->kvm->mmu_notifier_seq; - smp_rmb(); - - if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable)) - return RET_PF_RETRY; - - if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r)) - return r; - - r = RET_PF_RETRY; - spin_lock(&vcpu->kvm->mmu_lock); - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) - goto out_unlock; - if (make_mmu_pages_available(vcpu) < 0) - goto out_unlock; - if (likely(!force_pt_level)) - transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); - r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, - prefault, lpage_disallowed); -out_unlock: - spin_unlock(&vcpu->kvm->mmu_lock); - kvm_release_pfn_clean(pfn); - return r; + return direct_page_fault(vcpu, gpa, error_code, prefault, + max_level, true); } static void nonpaging_init_context(struct kvm_vcpu *vcpu, @@ -5496,47 +5400,30 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) } EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); -static int make_mmu_pages_available(struct kvm_vcpu *vcpu) -{ - LIST_HEAD(invalid_list); - - if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES)) - return 0; - - while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) { - if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list)) - break; - - ++vcpu->kvm->stat.mmu_recycled; - } - kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); - - if (!kvm_mmu_available_pages(vcpu->kvm)) - return -ENOSPC; - return 0; -} - -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len) { int r, emulation_type = 0; bool direct = vcpu->arch.mmu->direct_map; + if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa))) + return RET_PF_RETRY; + /* With shadow page tables, fault_address contains a GVA or nGPA. */ if (vcpu->arch.mmu->direct_map) { vcpu->arch.gpa_available = true; - vcpu->arch.gpa_val = cr2; + vcpu->arch.gpa_val = cr2_or_gpa; } r = RET_PF_INVALID; if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, cr2, direct); + r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct); if (r == RET_PF_EMULATE) goto emulate; } if (r == RET_PF_INVALID) { - r = vcpu->arch.mmu->page_fault(vcpu, cr2, + r = vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa, lower_32_bits(error_code), false); WARN_ON(r == RET_PF_INVALID); @@ -5556,7 +5443,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, */ if (vcpu->arch.mmu->direct_map && (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) { - kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2)); + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa)); return 1; } @@ -5571,7 +5458,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, * explicitly shadowing L1's page tables, i.e. unprotecting something * for L1 isn't going to magically fix whatever issue cause L2 to fail. */ - if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu)) + if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu)) emulation_type = EMULTYPE_ALLOW_RETRY; emulate: /* @@ -5586,7 +5473,7 @@ emulate: return 1; } - return x86_emulate_instruction(vcpu, cr2, emulation_type, insn, + return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn, insn_len); } EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); @@ -6015,8 +5902,8 @@ restart: * mapping if the indirect sp has level = 1. */ if (sp->role.direct && !kvm_is_reserved_pfn(pfn) && - !kvm_is_zone_device_pfn(pfn) && - PageTransCompoundMap(pfn_to_page(pfn))) { + (kvm_is_zone_device_pfn(pfn) || + PageCompound(pfn_to_page(pfn)))) { pte_list_remove(rmap_head, sptep); if (kvm_available_flush_tlb_with_range()) @@ -6249,7 +6136,7 @@ static void kvm_set_mmio_spte_mask(void) * If reserved bit is not supported, clear the present bit to disable * mmio page fault. */ - if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52) + if (shadow_phys_bits == 52) mask &= ~1ull; kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 97b21e7fd013..4e1ef0473663 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -128,6 +128,21 @@ static inline int FNAME(is_present_gpte)(unsigned long pte) #endif } +static bool FNAME(is_bad_mt_xwr)(struct rsvd_bits_validate *rsvd_check, u64 gpte) +{ +#if PTTYPE != PTTYPE_EPT + return false; +#else + return __is_bad_mt_xwr(rsvd_check, gpte); +#endif +} + +static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level) +{ + return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level) || + FNAME(is_bad_mt_xwr)(&mmu->guest_rsvd_check, gpte); +} + static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pt_element_t __user *ptep_user, unsigned index, pt_element_t orig_pte, pt_element_t new_pte) @@ -175,9 +190,6 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, u64 gpte) { - if (is_rsvd_bits_set(vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) - goto no_present; - if (!FNAME(is_present_gpte)(gpte)) goto no_present; @@ -186,6 +198,9 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, !(gpte & PT_GUEST_ACCESSED_MASK)) goto no_present; + if (FNAME(is_rsvd_bits_set)(vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) + goto no_present; + return false; no_present: @@ -291,11 +306,11 @@ static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte) } /* - * Fetch a guest pte for a guest virtual address + * Fetch a guest pte for a guest virtual address, or for an L2's GPA. */ static int FNAME(walk_addr_generic)(struct guest_walker *walker, struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gva_t addr, u32 access) + gpa_t addr, u32 access) { int ret; pt_element_t pte; @@ -400,7 +415,7 @@ retry_walk: if (unlikely(!FNAME(is_present_gpte)(pte))) goto error; - if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) { + if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte, walker->level))) { errcode = PFERR_RSVD_MASK | PFERR_PRESENT_MASK; goto error; } @@ -496,7 +511,7 @@ error: } static int FNAME(walk_addr)(struct guest_walker *walker, - struct kvm_vcpu *vcpu, gva_t addr, u32 access) + struct kvm_vcpu *vcpu, gpa_t addr, u32 access) { return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr, access); @@ -611,17 +626,17 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, * If the guest tries to write a write-protected page, we need to * emulate this operation, return 1 to indicate this case. */ -static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, +static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr, struct guest_walker *gw, - int write_fault, int hlevel, + int write_fault, int max_level, kvm_pfn_t pfn, bool map_writable, bool prefault, bool lpage_disallowed) { struct kvm_mmu_page *sp = NULL; struct kvm_shadow_walk_iterator it; unsigned direct_access, access = gw->pt_access; - int top_level, ret; - gfn_t gfn, base_gfn; + int top_level, hlevel, ret; + gfn_t base_gfn = gw->gfn; direct_access = gw->pte_access; @@ -637,7 +652,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, if (FNAME(gpte_changed)(vcpu, gw, top_level)) goto out_gpte_changed; - if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) + if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa))) goto out_gpte_changed; for (shadow_walk_init(&it, vcpu, addr); @@ -666,12 +681,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, link_shadow_page(vcpu, it.sptep, sp); } - /* - * FNAME(page_fault) might have clobbered the bottom bits of - * gw->gfn, restore them from the virtual address. - */ - gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT); - base_gfn = gfn; + hlevel = kvm_mmu_hugepage_adjust(vcpu, gw->gfn, max_level, &pfn); trace_kvm_mmu_spte_requested(addr, gw->level, pfn); @@ -682,9 +692,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, * We cannot overwrite existing page tables with an NX * large page, as the leaf could be executable. */ - disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel); + disallowed_hugepage_adjust(it, gw->gfn, &pfn, &hlevel); - base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); if (it.level == hlevel) break; @@ -765,7 +775,7 @@ FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu, * Returns: 1 if we need to emulate the instruction, 0 otherwise, or * a negative value on error. */ -static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, +static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code, bool prefault) { int write_fault = error_code & PFERR_WRITE_MASK; @@ -773,12 +783,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, struct guest_walker walker; int r; kvm_pfn_t pfn; - int level = PT_PAGE_TABLE_LEVEL; unsigned long mmu_seq; bool map_writable, is_self_change_mapping; bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && is_nx_huge_page_enabled(); - bool force_pt_level = lpage_disallowed; + int max_level; pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); @@ -818,14 +827,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu, &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable); - if (walker.level >= PT_DIRECTORY_LEVEL && !is_self_change_mapping) { - level = mapping_level(vcpu, walker.gfn, &force_pt_level); - if (likely(!force_pt_level)) { - level = min(walker.level, level); - walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1); - } - } else - force_pt_level = true; + if (lpage_disallowed || is_self_change_mapping) + max_level = PT_PAGE_TABLE_LEVEL; + else + max_level = walker.level; mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); @@ -865,10 +870,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); if (make_mmu_pages_available(vcpu) < 0) goto out_unlock; - if (!force_pt_level) - transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level); - r = FNAME(fetch)(vcpu, addr, &walker, write_fault, - level, pfn, map_writable, prefault, lpage_disallowed); + r = FNAME(fetch)(vcpu, addr, &walker, write_fault, max_level, pfn, + map_writable, prefault, lpage_disallowed); kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); out_unlock: @@ -945,18 +948,19 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa) spin_unlock(&vcpu->kvm->mmu_lock); } -static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, +/* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */ +static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t addr, u32 access, struct x86_exception *exception) { struct guest_walker walker; gpa_t gpa = UNMAPPED_GVA; int r; - r = FNAME(walk_addr)(&walker, vcpu, vaddr, access); + r = FNAME(walk_addr)(&walker, vcpu, addr, access); if (r) { gpa = gfn_to_gpa(walker.gfn); - gpa |= vaddr & ~PAGE_MASK; + gpa |= addr & ~PAGE_MASK; } else if (exception) *exception = walker.fault; @@ -964,7 +968,8 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, } #if PTTYPE != PTTYPE_EPT -static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, +/* Note, gva_to_gpa_nested() is only used to translate L2 GVAs. */ +static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr, u32 access, struct x86_exception *exception) { @@ -972,6 +977,11 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, gpa_t gpa = UNMAPPED_GVA; int r; +#ifndef CONFIG_X86_64 + /* A 64-bit GVA should be impossible on 32-bit KVM. */ + WARN_ON_ONCE(vaddr >> 32); +#endif + r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access); if (r) { diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 7ca8831c7d1a..3c6522b84ff1 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -249,13 +249,13 @@ TRACE_EVENT( TRACE_EVENT( fast_page_fault, - TP_PROTO(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, + TP_PROTO(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code, u64 *sptep, u64 old_spte, bool retry), - TP_ARGS(vcpu, gva, error_code, sptep, old_spte, retry), + TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, retry), TP_STRUCT__entry( __field(int, vcpu_id) - __field(gva_t, gva) + __field(gpa_t, cr2_or_gpa) __field(u32, error_code) __field(u64 *, sptep) __field(u64, old_spte) @@ -265,7 +265,7 @@ TRACE_EVENT( TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; - __entry->gva = gva; + __entry->cr2_or_gpa = cr2_or_gpa; __entry->error_code = error_code; __entry->sptep = sptep; __entry->old_spte = old_spte; @@ -273,9 +273,9 @@ TRACE_EVENT( __entry->retry = retry; ), - TP_printk("vcpu %d gva %lx error_code %s sptep %p old %#llx" + TP_printk("vcpu %d gva %llx error_code %s sptep %p old %#llx" " new %llx spurious %d fixed %d", __entry->vcpu_id, - __entry->gva, __print_flags(__entry->error_code, "|", + __entry->cr2_or_gpa, __print_flags(__entry->error_code, "|", kvm_mmu_trace_pferr_flags), __entry->sptep, __entry->old_spte, __entry->new_spte, __spte_satisfied(old_spte), __spte_satisfied(new_spte) diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 25ce3edd1872..7f0059aa30e1 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -192,11 +192,15 @@ static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit) break; case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000: *seg = 1; - *unit = msr - MSR_MTRRfix16K_80000; + *unit = array_index_nospec( + msr - MSR_MTRRfix16K_80000, + MSR_MTRRfix16K_A0000 - MSR_MTRRfix16K_80000 + 1); break; case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000: *seg = 2; - *unit = msr - MSR_MTRRfix4K_C0000; + *unit = array_index_nospec( + msr - MSR_MTRRfix4K_C0000, + MSR_MTRRfix4K_F8000 - MSR_MTRRfix4K_C0000 + 1); break; default: return false; diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 7ebb62326c14..13332984b6d5 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -2,6 +2,8 @@ #ifndef __KVM_X86_PMU_H #define __KVM_X86_PMU_H +#include <linux/nospec.h> + #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu) #define pmu_to_vcpu(pmu) (container_of((pmu), struct kvm_vcpu, arch.pmu)) #define pmc_to_pmu(pmc) (&(pmc)->vcpu->arch.pmu) @@ -102,8 +104,12 @@ static inline bool kvm_valid_perf_global_ctrl(struct kvm_pmu *pmu, static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr, u32 base) { - if (msr >= base && msr < base + pmu->nr_arch_gp_counters) - return &pmu->gp_counters[msr - base]; + if (msr >= base && msr < base + pmu->nr_arch_gp_counters) { + u32 index = array_index_nospec(msr - base, + pmu->nr_arch_gp_counters); + + return &pmu->gp_counters[index]; + } return NULL; } @@ -113,8 +119,12 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr) { int base = MSR_CORE_PERF_FIXED_CTR0; - if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) - return &pmu->fixed_counters[msr - base]; + if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) { + u32 index = array_index_nospec(msr - base, + pmu->nr_arch_fixed_counters); + + return &pmu->fixed_counters[index]; + } return NULL; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 122d4ce3b1ab..9dbb990c319a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1307,6 +1307,47 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) } } +/* + * The default MMIO mask is a single bit (excluding the present bit), + * which could conflict with the memory encryption bit. Check for + * memory encryption support and override the default MMIO mask if + * memory encryption is enabled. + */ +static __init void svm_adjust_mmio_mask(void) +{ + unsigned int enc_bit, mask_bit; + u64 msr, mask; + + /* If there is no memory encryption support, use existing mask */ + if (cpuid_eax(0x80000000) < 0x8000001f) + return; + + /* If memory encryption is not enabled, use existing mask */ + rdmsrl(MSR_K8_SYSCFG, msr); + if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + return; + + enc_bit = cpuid_ebx(0x8000001f) & 0x3f; + mask_bit = boot_cpu_data.x86_phys_bits; + + /* Increment the mask bit if it is the same as the encryption bit */ + if (enc_bit == mask_bit) + mask_bit++; + + /* + * If the mask bit location is below 52, then some bits above the + * physical addressing limit will always be reserved, so use the + * rsvd_bits() function to generate the mask. This mask, along with + * the present bit, will be used to generate a page fault with + * PFER.RSV = 1. + * + * If the mask bit location is 52 (or above), then clear the mask. + */ + mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0; + + kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK); +} + static __init int svm_hardware_setup(void) { int cpu; @@ -1361,6 +1402,8 @@ static __init int svm_hardware_setup(void) } } + svm_adjust_mmio_mask(); + for_each_possible_cpu(cpu) { r = svm_cpu_init(cpu); if (r) @@ -2144,7 +2187,7 @@ static int avic_init_vcpu(struct vcpu_svm *svm) return ret; } -static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) +static int svm_create_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm; struct page *page; @@ -2153,39 +2196,13 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) struct page *nested_msrpm_pages; int err; - BUILD_BUG_ON_MSG(offsetof(struct vcpu_svm, vcpu) != 0, - "struct kvm_vcpu must be at offset 0 for arch usercopy region"); - - svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); - if (!svm) { - err = -ENOMEM; - goto out; - } - - svm->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, - GFP_KERNEL_ACCOUNT); - if (!svm->vcpu.arch.user_fpu) { - printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n"); - err = -ENOMEM; - goto free_partial_svm; - } - - svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, - GFP_KERNEL_ACCOUNT); - if (!svm->vcpu.arch.guest_fpu) { - printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); - err = -ENOMEM; - goto free_user_fpu; - } - - err = kvm_vcpu_init(&svm->vcpu, kvm, id); - if (err) - goto free_svm; + BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0); + svm = to_svm(vcpu); err = -ENOMEM; page = alloc_page(GFP_KERNEL_ACCOUNT); if (!page) - goto uninit; + goto out; msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER); if (!msrpm_pages) @@ -2222,9 +2239,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) svm->asid_generation = 0; init_vmcb(svm); - svm_init_osvw(&svm->vcpu); + svm_init_osvw(vcpu); - return &svm->vcpu; + return 0; free_page4: __free_page(hsave_page); @@ -2234,16 +2251,8 @@ free_page2: __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); free_page1: __free_page(page); -uninit: - kvm_vcpu_uninit(&svm->vcpu); -free_svm: - kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu); -free_user_fpu: - kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu); -free_partial_svm: - kmem_cache_free(kvm_vcpu_cache, svm); out: - return ERR_PTR(err); + return err; } static void svm_clear_current_vmcb(struct vmcb *vmcb) @@ -2269,10 +2278,6 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); __free_page(virt_to_page(svm->nested.hsave)); __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu); - kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu); - kmem_cache_free(kvm_vcpu_cache, svm); } static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -4281,12 +4286,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) return 1; - /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) + if (data & ~kvm_spec_ctrl_valid_bits(vcpu)) return 1; svm->spec_ctrl = data; - if (!data) break; @@ -4310,13 +4313,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) if (data & ~PRED_CMD_IBPB) return 1; - + if (!boot_cpu_has(X86_FEATURE_AMD_IBPB)) + return 1; if (!data) break; wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); - if (is_guest_mode(vcpu)) - break; set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); break; case MSR_AMD64_VIRT_SPEC_CTRL: @@ -4519,9 +4521,9 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm) */ kvm_for_each_vcpu(i, vcpu, kvm) { bool m = kvm_apic_match_dest(vcpu, apic, - icrl & KVM_APIC_SHORT_MASK, + icrl & APIC_SHORT_MASK, GET_APIC_DEST_FIELD(icrh), - icrl & KVM_APIC_DEST_MASK); + icrl & APIC_DEST_MASK); if (m && !avic_vcpu_is_running(vcpu)) kvm_vcpu_wake_up(vcpu); @@ -4935,7 +4937,8 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) *info2 = control->exit_info_2; } -static int handle_exit(struct kvm_vcpu *vcpu) +static int handle_exit(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion exit_fastpath) { struct vcpu_svm *svm = to_svm(vcpu); struct kvm_run *kvm_run = vcpu->run; @@ -4993,7 +4996,10 @@ static int handle_exit(struct kvm_vcpu *vcpu) __func__, svm->vmcb->control.exit_int_info, exit_code); - if (exit_code >= ARRAY_SIZE(svm_exit_handlers) + if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) { + kvm_skip_emulated_instruction(vcpu); + return 1; + } else if (exit_code >= ARRAY_SIZE(svm_exit_handlers) || !svm_exit_handlers[exit_code]) { vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code); dump_vmcb(vcpu); @@ -5913,6 +5919,7 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && + boot_cpu_has(X86_FEATURE_XSAVE) && boot_cpu_has(X86_FEATURE_XSAVES); /* Update nrips enabled cache */ @@ -5924,14 +5931,14 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC); } -#define F(x) bit(X86_FEATURE_##x) +#define F feature_bit static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) { switch (func) { case 0x1: if (avic) - entry->ecx &= ~bit(X86_FEATURE_X2APIC); + entry->ecx &= ~F(X2APIC); break; case 0x80000001: if (nested) @@ -6001,6 +6008,11 @@ static bool svm_has_wbinvd_exit(void) return true; } +static bool svm_pku_supported(void) +{ + return false; +} + #define PRE_EX(exit) { .exit_code = (exit), \ .stage = X86_ICPT_PRE_EXCEPT, } #define POST_EX(exit) { .exit_code = (exit), \ @@ -6186,9 +6198,12 @@ out: return ret; } -static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu) +static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion *exit_fastpath) { - + if (!is_guest_mode(vcpu) && + to_svm(vcpu)->vmcb->control.exit_code == EXIT_REASON_MSR_WRITE) + *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu); } static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) @@ -7341,6 +7356,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .xsaves_supported = svm_xsaves_supported, .umip_emulated = svm_umip_emulated, .pt_supported = svm_pt_supported, + .pku_supported = svm_pku_supported, .set_supported_cpuid = svm_set_supported_cpuid, diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 7aa69716d516..283bdb7071af 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -145,6 +145,11 @@ static inline bool vmx_umip_emulated(void) SECONDARY_EXEC_DESC; } +static inline bool vmx_pku_supported(void) +{ + return boot_cpu_has(X86_FEATURE_PKU); +} + static inline bool cpu_has_vmx_rdtscp(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index 72359709cdc1..89c3e0caf39f 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -350,17 +350,12 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu, uint16_t *vmcs_version) { struct vcpu_vmx *vmx = to_vmx(vcpu); - bool evmcs_already_enabled = vmx->nested.enlightened_vmcs_enabled; vmx->nested.enlightened_vmcs_enabled = true; if (vmcs_version) *vmcs_version = nested_get_evmcs_version(vcpu); - /* We don't support disabling the feature for simplicity. */ - if (evmcs_already_enabled) - return 0; - vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL; vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 6879966b7648..2db21d59eaf5 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -28,16 +28,6 @@ module_param(nested_early_check, bool, S_IRUGO); failed; \ }) -#define SET_MSR_OR_WARN(vcpu, idx, data) \ -({ \ - bool failed = kvm_set_msr(vcpu, idx, data); \ - if (failed) \ - pr_warn_ratelimited( \ - "%s cannot write MSR (0x%x, 0x%llx)\n", \ - __func__, idx, data); \ - failed; \ -}) - /* * Hyper-V requires all of these, so mark them as supported even though * they are just treated the same as all-context. @@ -2172,8 +2162,8 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) * EXEC CONTROLS */ exec_control = vmx_exec_control(vmx); /* L0's desires */ - exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; - exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; + exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING; + exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING; exec_control &= ~CPU_BASED_TPR_SHADOW; exec_control |= vmcs12->cpu_based_vm_exec_control; @@ -2550,8 +2540,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) && - SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, - vmcs12->guest_ia32_perf_global_ctrl)) + WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, + vmcs12->guest_ia32_perf_global_ctrl))) return -EINVAL; kvm_rsp_write(vcpu, vmcs12->guest_rsp); @@ -2566,7 +2556,7 @@ static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12) return -EINVAL; if (CC(!nested_cpu_has_virtual_nmis(vmcs12) && - nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING))) + nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING))) return -EINVAL; return 0; @@ -2823,7 +2813,6 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, CC(vmcs12->host_ss_selector == 0 && !ia32e)) return -EINVAL; -#ifdef CONFIG_X86_64 if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) || CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) || CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) || @@ -2831,7 +2820,6 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) || CC(is_noncanonical_address(vmcs12->host_rip, vcpu))) return -EINVAL; -#endif /* * If the load IA32_EFER VM-exit control is 1, bits reserved in the @@ -2899,6 +2887,10 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))) return -EINVAL; + if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) && + CC(!kvm_dr7_valid(vmcs12->guest_dr7))) + return -EINVAL; + if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) && CC(!kvm_pat_valid(vmcs12->guest_ia32_pat))) return -EINVAL; @@ -3048,9 +3040,6 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) return 0; } -static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12); - static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); @@ -3183,7 +3172,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, u32 exit_qual; evaluate_pending_interrupts = exec_controls_get(vmx) & - (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING); + (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING); if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); @@ -3230,7 +3219,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, } enter_guest_mode(vcpu); - if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) + if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) vcpu->arch.tsc_offset += vmcs12->tsc_offset; if (prepare_vmcs02(vcpu, vmcs12, &exit_qual)) @@ -3294,7 +3283,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, * 26.7 "VM-entry failures during or after loading guest state". */ vmentry_fail_vmexit_guest_mode: - if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) + if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) vcpu->arch.tsc_offset -= vmcs12->tsc_offset; leave_guest_mode(vcpu); @@ -3407,8 +3396,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) */ if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) && !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) && - !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_VIRTUAL_NMI_PENDING) && - !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_VIRTUAL_INTR_PENDING) && + !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_NMI_WINDOW_EXITING) && + !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_INTR_WINDOW_EXITING) && (vmcs12->guest_rflags & X86_EFLAGS_IF))) { vmx->nested.nested_run_pending = 0; return kvm_vcpu_halt(vcpu); @@ -3427,7 +3416,7 @@ vmentry_failed: /* * On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date - * because L2 may have changed some cr0 bits directly (CRO_GUEST_HOST_MASK). + * because L2 may have changed some cr0 bits directly (CR0_GUEST_HOST_MASK). * This function returns the new value we should put in vmcs12.guest_cr0. * It's not enough to just return the vmcs02 GUEST_CR0. Rather, * 1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now @@ -3999,8 +3988,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vcpu->arch.pat = vmcs12->host_ia32_pat; } if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) - SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, - vmcs12->host_ia32_perf_global_ctrl); + WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, + vmcs12->host_ia32_perf_global_ctrl)); /* Set L1 segment info according to Intel SDM 27.5.2 Loading Host Segment and Descriptor-Table Registers */ @@ -4209,7 +4198,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, if (nested_cpu_has_preemption_timer(vmcs12)) hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer); - if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) + if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) vcpu->arch.tsc_offset -= vmcs12->tsc_offset; if (likely(!vmx->fail)) { @@ -4751,36 +4740,32 @@ static int handle_vmresume(struct kvm_vcpu *vcpu) static int handle_vmread(struct kvm_vcpu *vcpu) { - unsigned long field; - u64 field_value; + struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu) + : get_vmcs12(vcpu); unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - int len; - gva_t gva = 0; - struct vmcs12 *vmcs12; + u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); + struct vcpu_vmx *vmx = to_vmx(vcpu); struct x86_exception e; + unsigned long field; + u64 value; + gva_t gva = 0; short offset; + int len; if (!nested_vmx_check_permission(vcpu)) return 1; - if (to_vmx(vcpu)->nested.current_vmptr == -1ull) + /* + * In VMX non-root operation, when the VMCS-link pointer is -1ull, + * any VMREAD sets the ALU flags for VMfailInvalid. + */ + if (vmx->nested.current_vmptr == -1ull || + (is_guest_mode(vcpu) && + get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)) return nested_vmx_failInvalid(vcpu); - if (!is_guest_mode(vcpu)) - vmcs12 = get_vmcs12(vcpu); - else { - /* - * When vmcs->vmcs_link_pointer is -1ull, any VMREAD - * to shadowed-field sets the ALU flags for VMfailInvalid. - */ - if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) - return nested_vmx_failInvalid(vcpu); - vmcs12 = get_shadow_vmcs12(vcpu); - } - /* Decode instruction info and find the field to read */ - field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); + field = kvm_register_readl(vcpu, (((instr_info) >> 28) & 0xf)); offset = vmcs_field_to_offset(field); if (offset < 0) @@ -4790,25 +4775,26 @@ static int handle_vmread(struct kvm_vcpu *vcpu) if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field)) copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); - /* Read the field, zero-extended to a u64 field_value */ - field_value = vmcs12_read_any(vmcs12, field, offset); + /* Read the field, zero-extended to a u64 value */ + value = vmcs12_read_any(vmcs12, field, offset); /* * Now copy part of this value to register or memory, as requested. * Note that the number of bits actually copied is 32 or 64 depending * on the guest's mode (32 or 64 bit), not on the given field's length. */ - if (vmx_instruction_info & (1u << 10)) { - kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf), - field_value); + if (instr_info & BIT(10)) { + kvm_register_writel(vcpu, (((instr_info) >> 3) & 0xf), value); } else { len = is_64_bit_mode(vcpu) ? 8 : 4; if (get_vmx_mem_address(vcpu, exit_qualification, - vmx_instruction_info, true, len, &gva)) + instr_info, true, len, &gva)) return 1; /* _system ok, nested_vmx_check_permission has verified cpl=0 */ - if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e)) + if (kvm_write_guest_virt_system(vcpu, gva, &value, len, &e)) { kvm_inject_page_fault(vcpu, &e); + return 1; + } } return nested_vmx_succeed(vcpu); @@ -4840,46 +4826,58 @@ static bool is_shadow_field_ro(unsigned long field) static int handle_vmwrite(struct kvm_vcpu *vcpu) { + struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu) + : get_vmcs12(vcpu); + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct x86_exception e; unsigned long field; - int len; + short offset; gva_t gva; - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); + int len; - /* The value to write might be 32 or 64 bits, depending on L1's long + /* + * The value to write might be 32 or 64 bits, depending on L1's long * mode, and eventually we need to write that into a field of several * possible lengths. The code below first zero-extends the value to 64 - * bit (field_value), and then copies only the appropriate number of + * bit (value), and then copies only the appropriate number of * bits into the vmcs12 field. */ - u64 field_value = 0; - struct x86_exception e; - struct vmcs12 *vmcs12; - short offset; + u64 value = 0; if (!nested_vmx_check_permission(vcpu)) return 1; - if (vmx->nested.current_vmptr == -1ull) + /* + * In VMX non-root operation, when the VMCS-link pointer is -1ull, + * any VMWRITE sets the ALU flags for VMfailInvalid. + */ + if (vmx->nested.current_vmptr == -1ull || + (is_guest_mode(vcpu) && + get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)) return nested_vmx_failInvalid(vcpu); - if (vmx_instruction_info & (1u << 10)) - field_value = kvm_register_readl(vcpu, - (((vmx_instruction_info) >> 3) & 0xf)); + if (instr_info & BIT(10)) + value = kvm_register_readl(vcpu, (((instr_info) >> 3) & 0xf)); else { len = is_64_bit_mode(vcpu) ? 8 : 4; if (get_vmx_mem_address(vcpu, exit_qualification, - vmx_instruction_info, false, len, &gva)) + instr_info, false, len, &gva)) return 1; - if (kvm_read_guest_virt(vcpu, gva, &field_value, len, &e)) { + if (kvm_read_guest_virt(vcpu, gva, &value, len, &e)) { kvm_inject_page_fault(vcpu, &e); return 1; } } + field = kvm_register_readl(vcpu, (((instr_info) >> 28) & 0xf)); + + offset = vmcs_field_to_offset(field); + if (offset < 0) + return nested_vmx_failValid(vcpu, + VMXERR_UNSUPPORTED_VMCS_COMPONENT); - field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); /* * If the vCPU supports "VMWRITE to any supported field in the * VMCS," then the "read-only" fields are actually read/write. @@ -4889,29 +4887,12 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) return nested_vmx_failValid(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); - if (!is_guest_mode(vcpu)) { - vmcs12 = get_vmcs12(vcpu); - - /* - * Ensure vmcs12 is up-to-date before any VMWRITE that dirties - * vmcs12, else we may crush a field or consume a stale value. - */ - if (!is_shadow_field_rw(field)) - copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); - } else { - /* - * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE - * to shadowed-field sets the ALU flags for VMfailInvalid. - */ - if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) - return nested_vmx_failInvalid(vcpu); - vmcs12 = get_shadow_vmcs12(vcpu); - } - - offset = vmcs_field_to_offset(field); - if (offset < 0) - return nested_vmx_failValid(vcpu, - VMXERR_UNSUPPORTED_VMCS_COMPONENT); + /* + * Ensure vmcs12 is up-to-date before any VMWRITE that dirties + * vmcs12, else we may crush a field or consume a stale value. + */ + if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) + copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); /* * Some Intel CPUs intentionally drop the reserved bits of the AR byte @@ -4922,9 +4903,9 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) * the stripped down value, L2 sees the full value as stored by KVM). */ if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES) - field_value &= 0x1f0ff; + value &= 0x1f0ff; - vmcs12_write_any(vmcs12, field, offset, field_value); + vmcs12_write_any(vmcs12, field, offset, value); /* * Do not track vmcs12 dirty-state if in guest-mode as we actually @@ -4941,7 +4922,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) preempt_disable(); vmcs_load(vmx->vmcs01.shadow_vmcs); - __vmcs_writel(field, field_value); + __vmcs_writel(field, value); vmcs_clear(vmx->vmcs01.shadow_vmcs); vmcs_load(vmx->loaded_vmcs->vmcs); @@ -5524,10 +5505,10 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) return false; case EXIT_REASON_TRIPLE_FAULT: return true; - case EXIT_REASON_PENDING_INTERRUPT: - return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING); + case EXIT_REASON_INTERRUPT_WINDOW: + return nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING); case EXIT_REASON_NMI_WINDOW: - return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING); + return nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING); case EXIT_REASON_TASK_SWITCH: return true; case EXIT_REASON_CPUID: @@ -6015,8 +5996,8 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, msrs->procbased_ctls_low = CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; msrs->procbased_ctls_high &= - CPU_BASED_VIRTUAL_INTR_PENDING | - CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | + CPU_BASED_INTR_WINDOW_EXITING | + CPU_BASED_NMI_WINDOW_EXITING | CPU_BASED_USE_TSC_OFFSETTING | CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING | diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 7023138b1cb0..34a3a17bb6d7 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -86,10 +86,14 @@ static unsigned intel_find_arch_event(struct kvm_pmu *pmu, static unsigned intel_find_fixed_event(int idx) { - if (idx >= ARRAY_SIZE(fixed_pmc_events)) + u32 event; + size_t size = ARRAY_SIZE(fixed_pmc_events); + + if (idx >= size) return PERF_COUNT_HW_MAX; - return intel_arch_events[fixed_pmc_events[idx]].event_type; + event = fixed_pmc_events[array_index_nospec(idx, size)]; + return intel_arch_events[event].event_type; } /* check if a PMC is enabled by comparing it with globl_ctrl bits. */ @@ -130,16 +134,20 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); bool fixed = idx & (1u << 30); struct kvm_pmc *counters; + unsigned int num_counters; idx &= ~(3u << 30); - if (!fixed && idx >= pmu->nr_arch_gp_counters) - return NULL; - if (fixed && idx >= pmu->nr_arch_fixed_counters) + if (fixed) { + counters = pmu->fixed_counters; + num_counters = pmu->nr_arch_fixed_counters; + } else { + counters = pmu->gp_counters; + num_counters = pmu->nr_arch_gp_counters; + } + if (idx >= num_counters) return NULL; - counters = fixed ? pmu->fixed_counters : pmu->gp_counters; *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP]; - - return &counters[idx]; + return &counters[array_index_nospec(idx, num_counters)]; } static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) diff --git a/arch/x86/kvm/vmx/vmcs_shadow_fields.h b/arch/x86/kvm/vmx/vmcs_shadow_fields.h index eb1ecd16fd22..cad128d1657b 100644 --- a/arch/x86/kvm/vmx/vmcs_shadow_fields.h +++ b/arch/x86/kvm/vmx/vmcs_shadow_fields.h @@ -23,12 +23,12 @@ BUILD_BUG_ON(1) * * When adding or removing fields here, note that shadowed * fields must always be synced by prepare_vmcs02, not just - * prepare_vmcs02_full. + * prepare_vmcs02_rare. */ /* * Keeping the fields ordered by size is an attempt at improving - * branch prediction in vmcs_read_any and vmcs_write_any. + * branch prediction in vmcs12_read_any and vmcs12_write_any. */ /* 16-bits */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index cdb4bf50ee14..c475fa2aaae0 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1057,6 +1057,12 @@ static unsigned long segment_base(u16 selector) } #endif +static inline bool pt_can_write_msr(struct vcpu_vmx *vmx) +{ + return (pt_mode == PT_MODE_HOST_GUEST) && + !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN); +} + static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range) { u32 i; @@ -1716,7 +1722,7 @@ static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu) struct vmcs12 *vmcs12 = get_vmcs12(vcpu); if (is_guest_mode(vcpu) && - (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)) + (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)) return vcpu->arch.tsc_offset - vmcs12->tsc_offset; return vcpu->arch.tsc_offset; @@ -1734,7 +1740,7 @@ static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) * to the newly set TSC to get L2's TSC. */ if (is_guest_mode(vcpu) && - (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)) + (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)) g_tsc_offset = vmcs12->tsc_offset; trace_kvm_write_tsc_offset(vcpu->vcpu_id, @@ -1773,8 +1779,6 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr) default: return 1; } - - return 0; } /* @@ -1916,7 +1920,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } /* - * Writes msr value into into the appropriate "register". + * Writes msr value into the appropriate "register". * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */ @@ -1994,12 +1998,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; - /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) + if (data & ~kvm_spec_ctrl_valid_bits(vcpu)) return 1; vmx->spec_ctrl = data; - if (!data) break; @@ -2010,7 +2012,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) * * For nested: * The handling of the MSR bitmap for L2 guests is done in - * nested_vmx_merge_msr_bitmap. We should not touch the + * nested_vmx_prepare_msr_bitmap. We should not touch the * vmcs02.msr_bitmap here since it gets completely overwritten * in the merging. We update the vmcs01 here for L1 as well * since it will end up touching the MSR anyway now. @@ -2033,7 +2035,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & ~PRED_CMD_IBPB) return 1; - + if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + return 1; if (!data) break; @@ -2046,7 +2049,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) * * For nested: * The handling of the MSR bitmap for L2 guests is done in - * nested_vmx_merge_msr_bitmap. We should not touch the + * nested_vmx_prepare_msr_bitmap. We should not touch the * vmcs02.msr_bitmap here since it gets completely overwritten * in the merging. */ @@ -2104,47 +2107,50 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) pt_update_intercept_for_msr(vmx); break; case MSR_IA32_RTIT_STATUS: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (data & MSR_IA32_RTIT_STATUS_MASK)) + if (!pt_can_write_msr(vmx)) + return 1; + if (data & MSR_IA32_RTIT_STATUS_MASK) return 1; vmx->pt_desc.guest.status = data; break; case MSR_IA32_RTIT_CR3_MATCH: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_cr3_filtering)) + if (!pt_can_write_msr(vmx)) + return 1; + if (!intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_cr3_filtering)) return 1; vmx->pt_desc.guest.cr3_match = data; break; case MSR_IA32_RTIT_OUTPUT_BASE: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (!intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_topa_output) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output)) || - (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK)) + if (!pt_can_write_msr(vmx)) + return 1; + if (!intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_topa_output) && + !intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_single_range_output)) + return 1; + if (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK) return 1; vmx->pt_desc.guest.output_base = data; break; case MSR_IA32_RTIT_OUTPUT_MASK: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (!intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_topa_output) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output))) + if (!pt_can_write_msr(vmx)) + return 1; + if (!intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_topa_output) && + !intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_single_range_output)) return 1; vmx->pt_desc.guest.output_mask = data; break; case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: + if (!pt_can_write_msr(vmx)) + return 1; index = msr_info->index - MSR_IA32_RTIT_ADDR0_A; - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_num_address_ranges))) + if (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_num_address_ranges)) + return 1; + if (is_noncanonical_address(data, vcpu)) return 1; if (index % 2) vmx->pt_desc.guest.addr_b[index / 2] = data; @@ -2322,7 +2328,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, CPU_BASED_CR3_STORE_EXITING | CPU_BASED_UNCOND_IO_EXITING | CPU_BASED_MOV_DR_EXITING | - CPU_BASED_USE_TSC_OFFSETING | + CPU_BASED_USE_TSC_OFFSETTING | CPU_BASED_MWAIT_EXITING | CPU_BASED_MONITOR_EXITING | CPU_BASED_INVLPG_EXITING | @@ -2657,8 +2663,6 @@ static void enter_pmode(struct kvm_vcpu *vcpu) vmx->rmode.vm86_active = 0; - vmx_segment_cache_clear(vmx); - vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); flags = vmcs_readl(GUEST_RFLAGS); @@ -3444,7 +3448,7 @@ out: static int init_rmode_identity_map(struct kvm *kvm) { struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); - int i, idx, r = 0; + int i, r = 0; kvm_pfn_t identity_map_pfn; u32 tmp; @@ -3452,7 +3456,7 @@ static int init_rmode_identity_map(struct kvm *kvm) mutex_lock(&kvm->slots_lock); if (likely(kvm_vmx->ept_identity_pagetable_done)) - goto out2; + goto out; if (!kvm_vmx->ept_identity_map_addr) kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; @@ -3461,9 +3465,8 @@ static int init_rmode_identity_map(struct kvm *kvm) r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, kvm_vmx->ept_identity_map_addr, PAGE_SIZE); if (r < 0) - goto out2; + goto out; - idx = srcu_read_lock(&kvm->srcu); r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); if (r < 0) goto out; @@ -3479,9 +3482,6 @@ static int init_rmode_identity_map(struct kvm *kvm) kvm_vmx->ept_identity_pagetable_done = true; out: - srcu_read_unlock(&kvm->srcu, idx); - -out2: mutex_unlock(&kvm->slots_lock); return r; } @@ -4009,6 +4009,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) if (vmx_xsaves_supported()) { /* Exposing XSAVES only when XSAVE is exposed */ bool xsaves_enabled = + boot_cpu_has(X86_FEATURE_XSAVE) && guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && guest_cpuid_has(vcpu, X86_FEATURE_XSAVES); @@ -4319,7 +4320,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) static void enable_irq_window(struct kvm_vcpu *vcpu) { - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING); + exec_controls_setbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); } static void enable_nmi_window(struct kvm_vcpu *vcpu) @@ -4330,7 +4331,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) return; } - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING); + exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); } static void vmx_inject_irq(struct kvm_vcpu *vcpu) @@ -4455,8 +4456,11 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) if (enable_unrestricted_guest) return 0; - ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr, - PAGE_SIZE * 3); + mutex_lock(&kvm->slots_lock); + ret = __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr, + PAGE_SIZE * 3); + mutex_unlock(&kvm->slots_lock); + if (ret) return ret; to_kvm_vmx(kvm)->tss_addr = addr; @@ -4938,7 +4942,7 @@ static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) static int handle_interrupt_window(struct kvm_vcpu *vcpu) { - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING); + exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -5151,7 +5155,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) static int handle_nmi_window(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(!enable_vnmi); - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING); + exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); ++vcpu->stat.nmi_window_exits; kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -5172,7 +5176,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending); intr_window_requested = exec_controls_get(vmx) & - CPU_BASED_VIRTUAL_INTR_PENDING; + CPU_BASED_INTR_WINDOW_EXITING; while (vmx->emulation_required && count-- != 0) { if (intr_window_requested && vmx_interrupt_allowed(vcpu)) @@ -5496,7 +5500,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_CPUID] = kvm_emulate_cpuid, [EXIT_REASON_MSR_READ] = kvm_emulate_rdmsr, [EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr, - [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, + [EXIT_REASON_INTERRUPT_WINDOW] = handle_interrupt_window, [EXIT_REASON_HLT] = kvm_emulate_halt, [EXIT_REASON_INVD] = handle_invd, [EXIT_REASON_INVLPG] = handle_invlpg, @@ -5783,7 +5787,8 @@ void dump_vmcs(void) * The guest has exited. See if we can fix it or if we need userspace * assistance. */ -static int vmx_handle_exit(struct kvm_vcpu *vcpu) +static int vmx_handle_exit(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion exit_fastpath) { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 exit_reason = vmx->exit_reason; @@ -5869,34 +5874,44 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) } } - if (exit_reason < kvm_vmx_max_exit_handlers - && kvm_vmx_exit_handlers[exit_reason]) { + if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) { + kvm_skip_emulated_instruction(vcpu); + return 1; + } + + if (exit_reason >= kvm_vmx_max_exit_handlers) + goto unexpected_vmexit; #ifdef CONFIG_RETPOLINE - if (exit_reason == EXIT_REASON_MSR_WRITE) - return kvm_emulate_wrmsr(vcpu); - else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER) - return handle_preemption_timer(vcpu); - else if (exit_reason == EXIT_REASON_PENDING_INTERRUPT) - return handle_interrupt_window(vcpu); - else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) - return handle_external_interrupt(vcpu); - else if (exit_reason == EXIT_REASON_HLT) - return kvm_emulate_halt(vcpu); - else if (exit_reason == EXIT_REASON_EPT_MISCONFIG) - return handle_ept_misconfig(vcpu); + if (exit_reason == EXIT_REASON_MSR_WRITE) + return kvm_emulate_wrmsr(vcpu); + else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER) + return handle_preemption_timer(vcpu); + else if (exit_reason == EXIT_REASON_INTERRUPT_WINDOW) + return handle_interrupt_window(vcpu); + else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) + return handle_external_interrupt(vcpu); + else if (exit_reason == EXIT_REASON_HLT) + return kvm_emulate_halt(vcpu); + else if (exit_reason == EXIT_REASON_EPT_MISCONFIG) + return handle_ept_misconfig(vcpu); #endif - return kvm_vmx_exit_handlers[exit_reason](vcpu); - } else { - vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", - exit_reason); - dump_vmcs(); - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = + + exit_reason = array_index_nospec(exit_reason, + kvm_vmx_max_exit_handlers); + if (!kvm_vmx_exit_handlers[exit_reason]) + goto unexpected_vmexit; + + return kvm_vmx_exit_handlers[exit_reason](vcpu); + +unexpected_vmexit: + vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", exit_reason); + dump_vmcs(); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; - vcpu->run->internal.ndata = 1; - vcpu->run->internal.data[0] = exit_reason; - return 0; - } + vcpu->run->internal.ndata = 1; + vcpu->run->internal.data[0] = exit_reason; + return 0; } /* @@ -6217,7 +6232,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) } STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff); -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) +static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion *exit_fastpath) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6225,6 +6241,9 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) handle_external_interrupt_irqoff(vcpu); else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI) handle_exception_nmi_irqoff(vmx); + else if (!is_guest_mode(vcpu) && + vmx->exit_reason == EXIT_REASON_MSR_WRITE) + *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu); } static bool vmx_has_emulated_msr(int index) @@ -6633,60 +6652,31 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) free_vpid(vmx->vpid); nested_vmx_free_vcpu(vcpu); free_loaded_vmcs(vmx->loaded_vmcs); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); - kmem_cache_free(kvm_vcpu_cache, vmx); } -static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) +static int vmx_create_vcpu(struct kvm_vcpu *vcpu) { - int err; struct vcpu_vmx *vmx; unsigned long *msr_bitmap; - int i, cpu; - - BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0, - "struct kvm_vcpu must be at offset 0 for arch usercopy region"); - - vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); - if (!vmx) - return ERR_PTR(-ENOMEM); + int i, cpu, err; - vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, - GFP_KERNEL_ACCOUNT); - if (!vmx->vcpu.arch.user_fpu) { - printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n"); - err = -ENOMEM; - goto free_partial_vcpu; - } + BUILD_BUG_ON(offsetof(struct vcpu_vmx, vcpu) != 0); + vmx = to_vmx(vcpu); - vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, - GFP_KERNEL_ACCOUNT); - if (!vmx->vcpu.arch.guest_fpu) { - printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); - err = -ENOMEM; - goto free_user_fpu; - } + err = -ENOMEM; vmx->vpid = allocate_vpid(); - err = kvm_vcpu_init(&vmx->vcpu, kvm, id); - if (err) - goto free_vcpu; - - err = -ENOMEM; - /* * If PML is turned on, failure on enabling PML just results in failure * of creating the vcpu, therefore we can simplify PML logic (by * avoiding dealing with cases, such as enabling PML partially on vcpus - * for the guest, etc. + * for the guest), etc. */ if (enable_pml) { vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!vmx->pml_pg) - goto uninit_vcpu; + goto free_vpid; } BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) != NR_SHARED_MSRS); @@ -6731,7 +6721,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); - if (kvm_cstate_in_guest(kvm)) { + if (kvm_cstate_in_guest(vcpu->kvm)) { vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R); vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R); vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R); @@ -6741,19 +6731,19 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->loaded_vmcs = &vmx->vmcs01; cpu = get_cpu(); - vmx_vcpu_load(&vmx->vcpu, cpu); - vmx->vcpu.cpu = cpu; + vmx_vcpu_load(vcpu, cpu); + vcpu->cpu = cpu; init_vmcs(vmx); - vmx_vcpu_put(&vmx->vcpu); + vmx_vcpu_put(vcpu); put_cpu(); - if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { - err = alloc_apic_access_page(kvm); + if (cpu_need_virtualize_apic_accesses(vcpu)) { + err = alloc_apic_access_page(vcpu->kvm); if (err) goto free_vmcs; } if (enable_ept && !enable_unrestricted_guest) { - err = init_rmode_identity_map(kvm); + err = init_rmode_identity_map(vcpu->kvm); if (err) goto free_vmcs; } @@ -6761,7 +6751,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (nested) nested_vmx_setup_ctls_msrs(&vmx->nested.msrs, vmx_capability.ept, - kvm_vcpu_apicv_active(&vmx->vcpu)); + kvm_vcpu_apicv_active(vcpu)); else memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs)); @@ -6779,22 +6769,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->ept_pointer = INVALID_PAGE; - return &vmx->vcpu; + return 0; free_vmcs: free_loaded_vmcs(vmx->loaded_vmcs); free_pml: vmx_destroy_pml_buffer(vmx); -uninit_vcpu: - kvm_vcpu_uninit(&vmx->vcpu); -free_vcpu: +free_vpid: free_vpid(vmx->vpid); - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); -free_user_fpu: - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); -free_partial_vcpu: - kmem_cache_free(kvm_vcpu_cache, vmx); - return ERR_PTR(err); + return err; } #define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" @@ -6946,28 +6929,28 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) } while (0) entry = kvm_find_cpuid_entry(vcpu, 0x1, 0); - cr4_fixed1_update(X86_CR4_VME, edx, bit(X86_FEATURE_VME)); - cr4_fixed1_update(X86_CR4_PVI, edx, bit(X86_FEATURE_VME)); - cr4_fixed1_update(X86_CR4_TSD, edx, bit(X86_FEATURE_TSC)); - cr4_fixed1_update(X86_CR4_DE, edx, bit(X86_FEATURE_DE)); - cr4_fixed1_update(X86_CR4_PSE, edx, bit(X86_FEATURE_PSE)); - cr4_fixed1_update(X86_CR4_PAE, edx, bit(X86_FEATURE_PAE)); - cr4_fixed1_update(X86_CR4_MCE, edx, bit(X86_FEATURE_MCE)); - cr4_fixed1_update(X86_CR4_PGE, edx, bit(X86_FEATURE_PGE)); - cr4_fixed1_update(X86_CR4_OSFXSR, edx, bit(X86_FEATURE_FXSR)); - cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM)); - cr4_fixed1_update(X86_CR4_VMXE, ecx, bit(X86_FEATURE_VMX)); - cr4_fixed1_update(X86_CR4_SMXE, ecx, bit(X86_FEATURE_SMX)); - cr4_fixed1_update(X86_CR4_PCIDE, ecx, bit(X86_FEATURE_PCID)); - cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, bit(X86_FEATURE_XSAVE)); + cr4_fixed1_update(X86_CR4_VME, edx, feature_bit(VME)); + cr4_fixed1_update(X86_CR4_PVI, edx, feature_bit(VME)); + cr4_fixed1_update(X86_CR4_TSD, edx, feature_bit(TSC)); + cr4_fixed1_update(X86_CR4_DE, edx, feature_bit(DE)); + cr4_fixed1_update(X86_CR4_PSE, edx, feature_bit(PSE)); + cr4_fixed1_update(X86_CR4_PAE, edx, feature_bit(PAE)); + cr4_fixed1_update(X86_CR4_MCE, edx, feature_bit(MCE)); + cr4_fixed1_update(X86_CR4_PGE, edx, feature_bit(PGE)); + cr4_fixed1_update(X86_CR4_OSFXSR, edx, feature_bit(FXSR)); + cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, feature_bit(XMM)); + cr4_fixed1_update(X86_CR4_VMXE, ecx, feature_bit(VMX)); + cr4_fixed1_update(X86_CR4_SMXE, ecx, feature_bit(SMX)); + cr4_fixed1_update(X86_CR4_PCIDE, ecx, feature_bit(PCID)); + cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, feature_bit(XSAVE)); entry = kvm_find_cpuid_entry(vcpu, 0x7, 0); - cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, bit(X86_FEATURE_FSGSBASE)); - cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP)); - cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP)); - cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU)); - cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP)); - cr4_fixed1_update(X86_CR4_LA57, ecx, bit(X86_FEATURE_LA57)); + cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, feature_bit(FSGSBASE)); + cr4_fixed1_update(X86_CR4_SMEP, ebx, feature_bit(SMEP)); + cr4_fixed1_update(X86_CR4_SMAP, ebx, feature_bit(SMAP)); + cr4_fixed1_update(X86_CR4_PKE, ecx, feature_bit(PKU)); + cr4_fixed1_update(X86_CR4_UMIP, ecx, feature_bit(UMIP)); + cr4_fixed1_update(X86_CR4_LA57, ecx, feature_bit(LA57)); #undef cr4_fixed1_update } @@ -7101,7 +7084,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) { if (func == 1 && nested) - entry->ecx |= bit(X86_FEATURE_VMX); + entry->ecx |= feature_bit(VMX); } static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) @@ -7843,6 +7826,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .xsaves_supported = vmx_xsaves_supported, .umip_emulated = vmx_umip_emulated, .pt_supported = vmx_pt_supported, + .pku_supported = vmx_pku_supported, .request_immediate_exit = vmx_request_immediate_exit, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 740d3ee42455..2d3be7f3ad67 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -93,6 +93,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); #endif +static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS; + #define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ #define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ @@ -879,30 +881,44 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) } EXPORT_SYMBOL_GPL(kvm_set_xcr); -static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) -{ - if (cr4 & CR4_RESERVED_BITS) - return -EINVAL; +#define __cr4_reserved_bits(__cpu_has, __c) \ +({ \ + u64 __reserved_bits = CR4_RESERVED_BITS; \ + \ + if (!__cpu_has(__c, X86_FEATURE_XSAVE)) \ + __reserved_bits |= X86_CR4_OSXSAVE; \ + if (!__cpu_has(__c, X86_FEATURE_SMEP)) \ + __reserved_bits |= X86_CR4_SMEP; \ + if (!__cpu_has(__c, X86_FEATURE_SMAP)) \ + __reserved_bits |= X86_CR4_SMAP; \ + if (!__cpu_has(__c, X86_FEATURE_FSGSBASE)) \ + __reserved_bits |= X86_CR4_FSGSBASE; \ + if (!__cpu_has(__c, X86_FEATURE_PKU)) \ + __reserved_bits |= X86_CR4_PKE; \ + if (!__cpu_has(__c, X86_FEATURE_LA57)) \ + __reserved_bits |= X86_CR4_LA57; \ + __reserved_bits; \ +}) - if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE)) - return -EINVAL; - - if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP)) - return -EINVAL; +static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c) +{ + u64 reserved_bits = __cr4_reserved_bits(cpu_has, c); - if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP)) - return -EINVAL; + if (cpuid_ecx(0x7) & feature_bit(LA57)) + reserved_bits &= ~X86_CR4_LA57; - if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE)) - return -EINVAL; + if (kvm_x86_ops->umip_emulated()) + reserved_bits &= ~X86_CR4_UMIP; - if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE)) - return -EINVAL; + return reserved_bits; +} - if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57)) +static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +{ + if (cr4 & cr4_reserved_bits) return -EINVAL; - if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP)) + if (cr4 & __cr4_reserved_bits(guest_cpuid_has, vcpu)) return -EINVAL; return 0; @@ -1047,9 +1063,11 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) { + size_t size = ARRAY_SIZE(vcpu->arch.db); + switch (dr) { case 0 ... 3: - vcpu->arch.db[dr] = val; + vcpu->arch.db[array_index_nospec(dr, size)] = val; if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) vcpu->arch.eff_db[dr] = val; break; @@ -1064,7 +1082,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) case 5: /* fall through */ default: /* 7 */ - if (val & 0xffffffff00000000ULL) + if (!kvm_dr7_valid(val)) return -1; /* #GP */ vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; kvm_update_dr7(vcpu); @@ -1086,9 +1104,11 @@ EXPORT_SYMBOL_GPL(kvm_set_dr); int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) { + size_t size = ARRAY_SIZE(vcpu->arch.db); + switch (dr) { case 0 ... 3: - *val = vcpu->arch.db[dr]; + *val = vcpu->arch.db[array_index_nospec(dr, size)]; break; case 4: /* fall through */ @@ -1212,6 +1232,7 @@ static const u32 emulated_msrs_all[] = { MSR_MISC_FEATURES_ENABLES, MSR_AMD64_VIRT_SPEC_CTRL, MSR_IA32_POWER_CTL, + MSR_IA32_UCODE_REV, /* * The following list leaves out MSRs whose values are determined @@ -1526,6 +1547,49 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); /* + * The fast path for frequent and performance sensitive wrmsr emulation, + * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces + * the latency of virtual IPI by avoiding the expensive bits of transitioning + * from guest to host, e.g. reacquiring KVM's SRCU lock. In contrast to the + * other cases which must be called after interrupts are enabled on the host. + */ +static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data) +{ + if (lapic_in_kernel(vcpu) && apic_x2apic_mode(vcpu->arch.apic) && + ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) && + ((data & APIC_MODE_MASK) == APIC_DM_FIXED)) { + + kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32)); + return kvm_lapic_reg_write(vcpu->arch.apic, APIC_ICR, (u32)data); + } + + return 1; +} + +enum exit_fastpath_completion handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) +{ + u32 msr = kvm_rcx_read(vcpu); + u64 data = kvm_read_edx_eax(vcpu); + int ret = 0; + + switch (msr) { + case APIC_BASE_MSR + (APIC_ICR >> 4): + ret = handle_fastpath_set_x2apic_icr_irqoff(vcpu, data); + break; + default: + return EXIT_FASTPATH_NONE; + } + + if (!ret) { + trace_kvm_msr_write(msr, data); + return EXIT_FASTPATH_SKIP_EMUL_INS; + } + + return EXIT_FASTPATH_NONE; +} +EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff); + +/* * Adapt set_msr() to msr_io()'s calling convention */ static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) @@ -2485,7 +2549,10 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) default: if (msr >= MSR_IA32_MC0_CTL && msr < MSR_IA32_MCx_CTL(bank_num)) { - u32 offset = msr - MSR_IA32_MC0_CTL; + u32 offset = array_index_nospec( + msr - MSR_IA32_MC0_CTL, + MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL); + /* only 0 or all 1s can be written to IA32_MCi_CTL * some Linux kernels though clear bit 10 in bank 4 to * workaround a BIOS/GART TBL issue on AMD K8s, ignore @@ -2581,45 +2648,47 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) static void record_steal_time(struct kvm_vcpu *vcpu) { + struct kvm_host_map map; + struct kvm_steal_time *st; + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; - if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) + /* -EAGAIN is returned in atomic context so we can just return. */ + if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, + &map, &vcpu->arch.st.cache, false)) return; + st = map.hva + + offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); + /* * Doing a TLB flush here, on the guest's behalf, can avoid * expensive IPIs. */ trace_kvm_pv_tlb_flush(vcpu->vcpu_id, - vcpu->arch.st.steal.preempted & KVM_VCPU_FLUSH_TLB); - if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB) + st->preempted & KVM_VCPU_FLUSH_TLB); + if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB) kvm_vcpu_flush_tlb(vcpu, false); - if (vcpu->arch.st.steal.version & 1) - vcpu->arch.st.steal.version += 1; /* first time write, random junk */ + vcpu->arch.st.preempted = 0; - vcpu->arch.st.steal.version += 1; + if (st->version & 1) + st->version += 1; /* first time write, random junk */ - kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); + st->version += 1; smp_wmb(); - vcpu->arch.st.steal.steal += current->sched_info.run_delay - + st->steal += current->sched_info.run_delay - vcpu->arch.st.last_steal; vcpu->arch.st.last_steal = current->sched_info.run_delay; - kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); - smp_wmb(); - vcpu->arch.st.steal.version += 1; + st->version += 1; - kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); + kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false); } int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -2786,11 +2855,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & KVM_STEAL_RESERVED_MASK) return 1; - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime, - data & KVM_STEAL_VALID_BITS, - sizeof(struct kvm_steal_time))) - return 1; - vcpu->arch.st.msr_val = data; if (!(data & KVM_MSR_ENABLED)) @@ -2926,7 +2990,10 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) default: if (msr >= MSR_IA32_MC0_CTL && msr < MSR_IA32_MCx_CTL(bank_num)) { - u32 offset = msr - MSR_IA32_MC0_CTL; + u32 offset = array_index_nospec( + msr - MSR_IA32_MC0_CTL, + MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL); + data = vcpu->arch.mce_banks[offset]; break; } @@ -3458,10 +3525,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_x86_ops->vcpu_load(vcpu, cpu); - fpregs_assert_state_consistent(); - if (test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_return(); - /* Apply any externally detected TSC adjustments (due to suspend) */ if (unlikely(vcpu->arch.tsc_offset_adjustment)) { adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment); @@ -3501,15 +3564,25 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) { + struct kvm_host_map map; + struct kvm_steal_time *st; + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; - vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED; + if (vcpu->arch.st.preempted) + return; + + if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map, + &vcpu->arch.st.cache, true)) + return; + + st = map.hva + + offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); - kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal.preempted, - offsetof(struct kvm_steal_time, preempted), - sizeof(vcpu->arch.st.steal.preempted)); + st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; + + kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -4660,9 +4733,6 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, { struct kvm_pit *pit = kvm->arch.vpit; - if (!pit) - return -ENXIO; - /* pit->pit_state.lock was overloaded to prevent userspace from getting * an inconsistent state after running multiple KVM_REINJECT_CONTROL * ioctls in parallel. Use a separate lock if that ioctl isn't rare. @@ -5029,6 +5099,9 @@ set_identity_unlock: r = -EFAULT; if (copy_from_user(&control, argp, sizeof(control))) goto out; + r = -ENXIO; + if (!kvm->arch.vpit) + goto out; r = kvm_vm_ioctl_reinject(kvm, &control); break; } @@ -6186,6 +6259,21 @@ static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit); } +static bool emulator_guest_has_long_mode(struct x86_emulate_ctxt *ctxt) +{ + return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_LM); +} + +static bool emulator_guest_has_movbe(struct x86_emulate_ctxt *ctxt) +{ + return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE); +} + +static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt) +{ + return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR); +} + static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) { return kvm_register_read(emul_to_vcpu(ctxt), reg); @@ -6263,6 +6351,9 @@ static const struct x86_emulate_ops emulate_ops = { .fix_hypercall = emulator_fix_hypercall, .intercept = emulator_intercept, .get_cpuid = emulator_get_cpuid, + .guest_has_long_mode = emulator_guest_has_long_mode, + .guest_has_movbe = emulator_guest_has_movbe, + .guest_has_fxsr = emulator_guest_has_fxsr, .set_nmi_mask = emulator_set_nmi_mask, .get_hflags = emulator_get_hflags, .set_hflags = emulator_set_hflags, @@ -6379,11 +6470,11 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) return 1; } -static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, +static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, bool write_fault_to_shadow_pgtable, int emulation_type) { - gpa_t gpa = cr2; + gpa_t gpa = cr2_or_gpa; kvm_pfn_t pfn; if (!(emulation_type & EMULTYPE_ALLOW_RETRY)) @@ -6397,7 +6488,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, * Write permission should be allowed since only * write access need to be emulated. */ - gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); + gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); /* * If the mapping is invalid in guest, let cpu retry @@ -6454,10 +6545,10 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, } static bool retry_instruction(struct x86_emulate_ctxt *ctxt, - unsigned long cr2, int emulation_type) + gpa_t cr2_or_gpa, int emulation_type) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - unsigned long last_retry_eip, last_retry_addr, gpa = cr2; + unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa; last_retry_eip = vcpu->arch.last_retry_eip; last_retry_addr = vcpu->arch.last_retry_addr; @@ -6486,14 +6577,14 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, if (x86_page_table_writing_insn(ctxt)) return false; - if (ctxt->eip == last_retry_eip && last_retry_addr == cr2) + if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa) return false; vcpu->arch.last_retry_eip = ctxt->eip; - vcpu->arch.last_retry_addr = cr2; + vcpu->arch.last_retry_addr = cr2_or_gpa; if (!vcpu->arch.mmu->direct_map) - gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); + gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); @@ -6639,11 +6730,8 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt) return false; } -int x86_emulate_instruction(struct kvm_vcpu *vcpu, - unsigned long cr2, - int emulation_type, - void *insn, - int insn_len) +int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + int emulation_type, void *insn, int insn_len) { int r; struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; @@ -6689,8 +6777,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, kvm_queue_exception(vcpu, UD_VECTOR); return 1; } - if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, - emulation_type)) + if (reexecute_instruction(vcpu, cr2_or_gpa, + write_fault_to_spt, + emulation_type)) return 1; if (ctxt->have_exception) { /* @@ -6724,7 +6813,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, return 1; } - if (retry_instruction(ctxt, cr2, emulation_type)) + if (retry_instruction(ctxt, cr2_or_gpa, emulation_type)) return 1; /* this is needed for vmware backdoor interface to work since it @@ -6736,7 +6825,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, restart: /* Save the faulting GPA (cr2) in the address field */ - ctxt->exception.address = cr2; + ctxt->exception.address = cr2_or_gpa; r = x86_emulate_insn(ctxt); @@ -6744,7 +6833,7 @@ restart: return 1; if (r == EMULATION_FAILED) { - if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, + if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt, emulation_type)) return 1; @@ -7357,8 +7446,8 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) { struct kvm_lapic_irq lapic_irq; - lapic_irq.shorthand = 0; - lapic_irq.dest_mode = 0; + lapic_irq.shorthand = APIC_DEST_NOSHORT; + lapic_irq.dest_mode = APIC_DEST_PHYSICAL; lapic_irq.level = 0; lapic_irq.dest_id = apicid; lapic_irq.msi_redir_hint = false; @@ -7997,6 +8086,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_int_win = dm_request_for_irq_injection(vcpu) && kvm_cpu_accept_dm_intr(vcpu); + enum exit_fastpath_completion exit_fastpath = EXIT_FASTPATH_NONE; bool req_immediate_exit = false; @@ -8198,8 +8288,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) trace_kvm_entry(vcpu->vcpu_id); guest_enter_irqoff(); - /* The preempt notifier should have taken care of the FPU already. */ - WARN_ON_ONCE(test_thread_flag(TIF_NEED_FPU_LOAD)); + fpregs_assert_state_consistent(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_return(); if (unlikely(vcpu->arch.switch_db_regs)) { set_debugreg(0, 7); @@ -8243,7 +8334,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); - kvm_x86_ops->handle_exit_irqoff(vcpu); + kvm_x86_ops->handle_exit_irqoff(vcpu, &exit_fastpath); /* * Consume any pending interrupts, including the possible source of @@ -8287,7 +8378,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_lapic_sync_from_vapic(vcpu); vcpu->arch.gpa_available = false; - r = kvm_x86_ops->handle_exit(vcpu); + r = kvm_x86_ops->handle_exit(vcpu, exit_fastpath); return r; cancel_injection: @@ -8471,12 +8562,26 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) return 0; } +static void kvm_save_current_fpu(struct fpu *fpu) +{ + /* + * If the target FPU state is not resident in the CPU registers, just + * memcpy() from current, else save CPU state directly to the target. + */ + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + memcpy(&fpu->state, ¤t->thread.fpu.state, + fpu_kernel_xstate_size); + else + copy_fpregs_to_fpstate(fpu); +} + /* Swap (qemu) user FPU context for the guest FPU context. */ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { fpregs_lock(); - copy_fpregs_to_fpstate(vcpu->arch.user_fpu); + kvm_save_current_fpu(vcpu->arch.user_fpu); + /* PKRU is separately restored in kvm_x86_ops->run. */ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, ~XFEATURE_MASK_PKRU); @@ -8492,7 +8597,8 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { fpregs_lock(); - copy_fpregs_to_fpstate(vcpu->arch.guest_fpu); + kvm_save_current_fpu(vcpu->arch.guest_fpu); + copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state); fpregs_mark_activate(); @@ -8714,6 +8820,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { vcpu_load(vcpu); + if (kvm_mpx_supported()) + kvm_load_guest_fpu(vcpu); kvm_apic_accept_events(vcpu); if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && @@ -8722,6 +8830,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, else mp_state->mp_state = vcpu->arch.mp_state; + if (kvm_mpx_supported()) + kvm_put_guest_fpu(vcpu); vcpu_put(vcpu); return 0; } @@ -9082,33 +9192,90 @@ static void fx_init(struct kvm_vcpu *vcpu) vcpu->arch.cr0 |= X86_CR0_ET; } -void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) +int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) { - void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask; - - kvmclock_reset(vcpu); + if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) + pr_warn_once("kvm: SMP vm created on host with unstable TSC; " + "guest TSC will not be reliable\n"); - kvm_x86_ops->vcpu_free(vcpu); - free_cpumask_var(wbinvd_dirty_mask); + return 0; } -struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, - unsigned int id) +int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu; + struct page *page; + int r; - if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) - printk_once(KERN_WARNING - "kvm: SMP vm created on host with unstable TSC; " - "guest TSC will not be reliable\n"); + vcpu->arch.emulate_ctxt.ops = &emulate_ops; + if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + else + vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; - vcpu = kvm_x86_ops->vcpu_create(kvm, id); + kvm_set_tsc_khz(vcpu, max_tsc_khz); - return vcpu; -} + r = kvm_mmu_create(vcpu); + if (r < 0) + return r; + + if (irqchip_in_kernel(vcpu->kvm)) { + vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu->kvm); + r = kvm_create_lapic(vcpu, lapic_timer_advance_ns); + if (r < 0) + goto fail_mmu_destroy; + } else + static_key_slow_inc(&kvm_no_apic_vcpu); + + r = -ENOMEM; + + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + goto fail_free_lapic; + vcpu->arch.pio_data = page_address(page); + + vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, + GFP_KERNEL_ACCOUNT); + if (!vcpu->arch.mce_banks) + goto fail_free_pio_data; + vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; + + if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, + GFP_KERNEL_ACCOUNT)) + goto fail_free_mce_banks; + + vcpu->arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, + GFP_KERNEL_ACCOUNT); + if (!vcpu->arch.user_fpu) { + pr_err("kvm: failed to allocate userspace's fpu\n"); + goto free_wbinvd_dirty_mask; + } + + vcpu->arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, + GFP_KERNEL_ACCOUNT); + if (!vcpu->arch.guest_fpu) { + pr_err("kvm: failed to allocate vcpu's fpu\n"); + goto free_user_fpu; + } + fx_init(vcpu); + + vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; + + vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); + + vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; + + kvm_async_pf_hash_reset(vcpu); + kvm_pmu_init(vcpu); + + vcpu->arch.pending_external_vector = -1; + vcpu->arch.preempted_in_kernel = false; + + kvm_hv_vcpu_init(vcpu); + + r = kvm_x86_ops->vcpu_create(vcpu); + if (r) + goto free_guest_fpu; -int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) -{ vcpu->arch.arch_capabilities = kvm_get_arch_capabilities(); vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT; kvm_vcpu_mtrr_init(vcpu); @@ -9117,6 +9284,22 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) kvm_init_mmu(vcpu, false); vcpu_put(vcpu); return 0; + +free_guest_fpu: + kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu); +free_user_fpu: + kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu); +free_wbinvd_dirty_mask: + free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); +fail_free_mce_banks: + kfree(vcpu->arch.mce_banks); +fail_free_pio_data: + free_page((unsigned long)vcpu->arch.pio_data); +fail_free_lapic: + kvm_free_lapic(vcpu); +fail_mmu_destroy: + kvm_mmu_destroy(vcpu); + return r; } void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) @@ -9149,13 +9332,29 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { - vcpu->arch.apf.msr_val = 0; + struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache; + int idx; - vcpu_load(vcpu); - kvm_mmu_unload(vcpu); - vcpu_put(vcpu); + kvm_release_pfn(cache->pfn, cache->dirty, cache); + + kvmclock_reset(vcpu); kvm_x86_ops->vcpu_free(vcpu); + + free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); + kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu); + kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu); + + kvm_hv_vcpu_uninit(vcpu); + kvm_pmu_destroy(vcpu); + kfree(vcpu->arch.mce_banks); + kvm_free_lapic(vcpu); + idx = srcu_read_lock(&vcpu->kvm->srcu); + kvm_mmu_destroy(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + free_page((unsigned long)vcpu->arch.pio_data); + if (!lapic_in_kernel(vcpu)) + static_key_slow_dec(&kvm_no_apic_vcpu); } void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) @@ -9171,7 +9370,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.nmi_injected = false; kvm_clear_interrupt_queue(vcpu); kvm_clear_exception_queue(vcpu); - vcpu->arch.exception.pending = false; memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); kvm_update_dr0123(vcpu); @@ -9347,6 +9545,8 @@ int kvm_arch_hardware_setup(void) if (r != 0) return r; + cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data); + if (kvm_has_tsc_control) { /* * Make sure the user can only configure tsc_khz values that @@ -9375,6 +9575,13 @@ void kvm_arch_hardware_unsetup(void) int kvm_arch_check_processor_compat(void) { + struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); + + WARN_ON(!irqs_disabled()); + + if (kvm_host_cr4_reserved_bits(c) != cr4_reserved_bits) + return -EIO; + return kvm_x86_ops->check_processor_compatibility(); } @@ -9392,98 +9599,6 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) struct static_key kvm_no_apic_vcpu __read_mostly; EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu); -int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) -{ - struct page *page; - int r; - - vcpu->arch.emulate_ctxt.ops = &emulate_ops; - if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - else - vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; - - page = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!page) { - r = -ENOMEM; - goto fail; - } - vcpu->arch.pio_data = page_address(page); - - kvm_set_tsc_khz(vcpu, max_tsc_khz); - - r = kvm_mmu_create(vcpu); - if (r < 0) - goto fail_free_pio_data; - - if (irqchip_in_kernel(vcpu->kvm)) { - vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu->kvm); - r = kvm_create_lapic(vcpu, lapic_timer_advance_ns); - if (r < 0) - goto fail_mmu_destroy; - } else - static_key_slow_inc(&kvm_no_apic_vcpu); - - vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, - GFP_KERNEL_ACCOUNT); - if (!vcpu->arch.mce_banks) { - r = -ENOMEM; - goto fail_free_lapic; - } - vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; - - if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, - GFP_KERNEL_ACCOUNT)) { - r = -ENOMEM; - goto fail_free_mce_banks; - } - - fx_init(vcpu); - - vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; - - vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); - - vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; - - kvm_async_pf_hash_reset(vcpu); - kvm_pmu_init(vcpu); - - vcpu->arch.pending_external_vector = -1; - vcpu->arch.preempted_in_kernel = false; - - kvm_hv_vcpu_init(vcpu); - - return 0; - -fail_free_mce_banks: - kfree(vcpu->arch.mce_banks); -fail_free_lapic: - kvm_free_lapic(vcpu); -fail_mmu_destroy: - kvm_mmu_destroy(vcpu); -fail_free_pio_data: - free_page((unsigned long)vcpu->arch.pio_data); -fail: - return r; -} - -void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) -{ - int idx; - - kvm_hv_vcpu_uninit(vcpu); - kvm_pmu_destroy(vcpu); - kfree(vcpu->arch.mce_banks); - kvm_free_lapic(vcpu); - idx = srcu_read_lock(&vcpu->kvm->srcu); - kvm_mmu_destroy(vcpu); - srcu_read_unlock(&vcpu->kvm->srcu, idx); - free_page((unsigned long)vcpu->arch.pio_data); - if (!lapic_in_kernel(vcpu)) - static_key_slow_dec(&kvm_no_apic_vcpu); -} - void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); @@ -9558,7 +9673,7 @@ static void kvm_free_vcpus(struct kvm *kvm) kvm_unload_vcpu_mmu(vcpu); } kvm_for_each_vcpu(i, vcpu, kvm) - kvm_arch_vcpu_free(vcpu); + kvm_vcpu_destroy(vcpu); mutex_lock(&kvm->lock); for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) @@ -9627,18 +9742,6 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) } EXPORT_SYMBOL_GPL(__x86_set_memory_region); -int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) -{ - int r; - - mutex_lock(&kvm->slots_lock); - r = __x86_set_memory_region(kvm, id, gpa, size); - mutex_unlock(&kvm->slots_lock); - - return r; -} -EXPORT_SYMBOL_GPL(x86_set_memory_region); - void kvm_arch_pre_destroy_vm(struct kvm *kvm) { kvm_mmu_pre_destroy_vm(kvm); @@ -9652,9 +9755,13 @@ void kvm_arch_destroy_vm(struct kvm *kvm) * unless the the memory map has changed due to process exit * or fd copying. */ - x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0); - x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0); - x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0); + mutex_lock(&kvm->slots_lock); + __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, + 0, 0); + __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, + 0, 0); + __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0); + mutex_unlock(&kvm->slots_lock); } if (kvm_x86_ops->vm_destroy) kvm_x86_ops->vm_destroy(kvm); @@ -9758,11 +9865,18 @@ out_free: void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) { + struct kvm_vcpu *vcpu; + int i; + /* * memslots->generation has been incremented. * mmio generation may have reached its maximum value. */ kvm_mmu_invalidate_mmio_sptes(kvm, gen); + + /* Force re-initialization of steal_time cache */ + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_vcpu_kick(vcpu); } int kvm_arch_prepare_memory_region(struct kvm *kvm, @@ -9792,7 +9906,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, * * The reason is, in case of PML, we need to set D-bit for any slots * with dirty logging disabled in order to eliminate unnecessary GPA - * logging in PML buffer (and potential PML buffer full VMEXT). This + * logging in PML buffer (and potential PML buffer full VMEXIT). This * guarantees leaving PML enabled during guest's lifetime won't have * any additional overhead from PML when guest is running with dirty * logging disabled for memory slots. @@ -10014,7 +10128,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu)) return; - vcpu->arch.mmu->page_fault(vcpu, work->gva, 0, true); + vcpu->arch.mmu->page_fault(vcpu, work->cr2_or_gpa, 0, true); } static inline u32 kvm_async_pf_hash_fn(gfn_t gfn) @@ -10127,7 +10241,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, { struct x86_exception fault; - trace_kvm_async_pf_not_present(work->arch.token, work->gva); + trace_kvm_async_pf_not_present(work->arch.token, work->cr2_or_gpa); kvm_add_async_pf_gfn(vcpu, work->arch.gfn); if (kvm_can_deliver_async_pf(vcpu) && @@ -10162,7 +10276,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, work->arch.token = ~0; /* broadcast wakeup */ else kvm_del_async_pf_gfn(vcpu, work->arch.gfn); - trace_kvm_async_pf_ready(work->arch.token, work->gva); + trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa); if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED && !apf_get_user(vcpu, &val)) { @@ -10284,7 +10398,6 @@ bool kvm_vector_hashing_enabled(void) { return vector_hashing; } -EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled); bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) { @@ -10292,6 +10405,28 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_arch_no_poll); +u64 kvm_spec_ctrl_valid_bits(struct kvm_vcpu *vcpu) +{ + uint64_t bits = SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD; + + /* The STIBP bit doesn't fault even if it's not advertised */ + if (!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) + bits &= ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP); + if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL) && + !boot_cpu_has(X86_FEATURE_AMD_IBRS)) + bits &= ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP); + + if (!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL_SSBD) && + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) + bits &= ~SPEC_CTRL_SSBD; + if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) && + !boot_cpu_has(X86_FEATURE_AMD_SSBD)) + bits &= ~SPEC_CTRL_SSBD; + + return bits; +} +EXPORT_SYMBOL_GPL(kvm_spec_ctrl_valid_bits); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 29391af8871d..2d2ff855773b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -144,11 +144,6 @@ static inline bool is_pae_paging(struct kvm_vcpu *vcpu) return !is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu); } -static inline u32 bit(int bitno) -{ - return 1 << (bitno & 31); -} - static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu) { return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48; @@ -166,21 +161,13 @@ static inline u64 get_canonical(u64 la, u8 vaddr_bits) static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu) { -#ifdef CONFIG_X86_64 return get_canonical(la, vcpu_virt_addr_bits(vcpu)) != la; -#else - return false; -#endif } static inline bool emul_is_noncanonical_address(u64 la, struct x86_emulate_ctxt *ctxt) { -#ifdef CONFIG_X86_64 return get_canonical(la, ctxt_virt_addr_bits(ctxt)) != la; -#else - return false; -#endif } static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, @@ -289,8 +276,9 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int page_num); bool kvm_vector_hashing_enabled(void); -int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, +int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type, void *insn, int insn_len); +enum exit_fastpath_completion handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu); #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \ @@ -369,7 +357,14 @@ static inline bool kvm_pat_valid(u64 data) return (data | ((data & 0x0202020202020202ull) << 1)) == data; } +static inline bool kvm_dr7_valid(unsigned long data) +{ + /* Bits [63:32] are reserved */ + return !(data >> 32); +} + void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu); +u64 kvm_spec_ctrl_valid_bits(struct kvm_vcpu *vcpu); #endif diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 8908c58bd6cd..53adc1762ec0 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -929,7 +929,7 @@ EndTable GrpTable: Grp3_2 0: TEST Ev,Iz -1: +1: TEST Ev,Iz 2: NOT Ev 3: NEG Ev 4: MUL rAX,Ev diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 345848f270e3..98aecb14fbcc 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -45,7 +45,6 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o obj-$(CONFIG_ACPI_NUMA) += srat.o obj-$(CONFIG_NUMA_EMU) += numa_emulation.o -obj-$(CONFIG_X86_INTEL_MPX) += mpx.o obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index fab095362c50..5bfd5aef5378 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -19,7 +19,6 @@ #include <asm/tlbflush.h> #include <asm/pgalloc.h> #include <asm/elf.h> -#include <asm/mpx.h> #if 0 /* This is just for testing */ struct page * @@ -151,10 +150,6 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, if (len & ~huge_page_mask(h)) return -EINVAL; - addr = mpx_unmapped_area_check(addr, len, flags); - if (IS_ERR_VALUE(addr)) - return addr; - if (len > TASK_SIZE) return -ENOMEM; diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index aae9a933dfd4..cb91eccc4960 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -163,8 +163,6 @@ unsigned long get_mmap_base(int is_legacy) const char *arch_vma_name(struct vm_area_struct *vma) { - if (vma->vm_flags & VM_MPX) - return "[mpx]"; return NULL; } diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c deleted file mode 100644 index 895fb7a9294d..000000000000 --- a/arch/x86/mm/mpx.c +++ /dev/null @@ -1,938 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * mpx.c - Memory Protection eXtensions - * - * Copyright (c) 2014, Intel Corporation. - * Qiaowei Ren <qiaowei.ren@intel.com> - * Dave Hansen <dave.hansen@intel.com> - */ -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/mm_types.h> -#include <linux/mman.h> -#include <linux/syscalls.h> -#include <linux/sched/sysctl.h> - -#include <asm/insn.h> -#include <asm/insn-eval.h> -#include <asm/mmu_context.h> -#include <asm/mpx.h> -#include <asm/processor.h> -#include <asm/fpu/internal.h> - -#define CREATE_TRACE_POINTS -#include <asm/trace/mpx.h> - -static inline unsigned long mpx_bd_size_bytes(struct mm_struct *mm) -{ - if (is_64bit_mm(mm)) - return MPX_BD_SIZE_BYTES_64; - else - return MPX_BD_SIZE_BYTES_32; -} - -static inline unsigned long mpx_bt_size_bytes(struct mm_struct *mm) -{ - if (is_64bit_mm(mm)) - return MPX_BT_SIZE_BYTES_64; - else - return MPX_BT_SIZE_BYTES_32; -} - -/* - * This is really a simplified "vm_mmap". it only handles MPX - * bounds tables (the bounds directory is user-allocated). - */ -static unsigned long mpx_mmap(unsigned long len) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, populate; - - /* Only bounds table can be allocated here */ - if (len != mpx_bt_size_bytes(mm)) - return -EINVAL; - - down_write(&mm->mmap_sem); - addr = do_mmap(NULL, 0, len, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate, NULL); - up_write(&mm->mmap_sem); - if (populate) - mm_populate(addr, populate); - - return addr; -} - -static int mpx_insn_decode(struct insn *insn, - struct pt_regs *regs) -{ - unsigned char buf[MAX_INSN_SIZE]; - int x86_64 = !test_thread_flag(TIF_IA32); - int not_copied; - int nr_copied; - - not_copied = copy_from_user(buf, (void __user *)regs->ip, sizeof(buf)); - nr_copied = sizeof(buf) - not_copied; - /* - * The decoder _should_ fail nicely if we pass it a short buffer. - * But, let's not depend on that implementation detail. If we - * did not get anything, just error out now. - */ - if (!nr_copied) - return -EFAULT; - insn_init(insn, buf, nr_copied, x86_64); - insn_get_length(insn); - /* - * copy_from_user() tries to get as many bytes as we could see in - * the largest possible instruction. If the instruction we are - * after is shorter than that _and_ we attempt to copy from - * something unreadable, we might get a short read. This is OK - * as long as the read did not stop in the middle of the - * instruction. Check to see if we got a partial instruction. - */ - if (nr_copied < insn->length) - return -EFAULT; - - insn_get_opcode(insn); - /* - * We only _really_ need to decode bndcl/bndcn/bndcu - * Error out on anything else. - */ - if (insn->opcode.bytes[0] != 0x0f) - goto bad_opcode; - if ((insn->opcode.bytes[1] != 0x1a) && - (insn->opcode.bytes[1] != 0x1b)) - goto bad_opcode; - - return 0; -bad_opcode: - return -EINVAL; -} - -/* - * If a bounds overflow occurs then a #BR is generated. This - * function decodes MPX instructions to get violation address - * and set this address into extended struct siginfo. - * - * Note that this is not a super precise way of doing this. - * Userspace could have, by the time we get here, written - * anything it wants in to the instructions. We can not - * trust anything about it. They might not be valid - * instructions or might encode invalid registers, etc... - */ -int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs) -{ - const struct mpx_bndreg_state *bndregs; - const struct mpx_bndreg *bndreg; - struct insn insn; - uint8_t bndregno; - int err; - - err = mpx_insn_decode(&insn, regs); - if (err) - goto err_out; - - /* - * We know at this point that we are only dealing with - * MPX instructions. - */ - insn_get_modrm(&insn); - bndregno = X86_MODRM_REG(insn.modrm.value); - if (bndregno > 3) { - err = -EINVAL; - goto err_out; - } - /* get bndregs field from current task's xsave area */ - bndregs = get_xsave_field_ptr(XFEATURE_BNDREGS); - if (!bndregs) { - err = -EINVAL; - goto err_out; - } - /* now go select the individual register in the set of 4 */ - bndreg = &bndregs->bndreg[bndregno]; - - /* - * The registers are always 64-bit, but the upper 32 - * bits are ignored in 32-bit mode. Also, note that the - * upper bounds are architecturally represented in 1's - * complement form. - * - * The 'unsigned long' cast is because the compiler - * complains when casting from integers to different-size - * pointers. - */ - info->lower = (void __user *)(unsigned long)bndreg->lower_bound; - info->upper = (void __user *)(unsigned long)~bndreg->upper_bound; - info->addr = insn_get_addr_ref(&insn, regs); - - /* - * We were not able to extract an address from the instruction, - * probably because there was something invalid in it. - */ - if (info->addr == (void __user *)-1) { - err = -EINVAL; - goto err_out; - } - trace_mpx_bounds_register_exception(info->addr, bndreg); - return 0; -err_out: - /* info might be NULL, but kfree() handles that */ - return err; -} - -static __user void *mpx_get_bounds_dir(void) -{ - const struct mpx_bndcsr *bndcsr; - - if (!cpu_feature_enabled(X86_FEATURE_MPX)) - return MPX_INVALID_BOUNDS_DIR; - - /* - * The bounds directory pointer is stored in a register - * only accessible if we first do an xsave. - */ - bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR); - if (!bndcsr) - return MPX_INVALID_BOUNDS_DIR; - - /* - * Make sure the register looks valid by checking the - * enable bit. - */ - if (!(bndcsr->bndcfgu & MPX_BNDCFG_ENABLE_FLAG)) - return MPX_INVALID_BOUNDS_DIR; - - /* - * Lastly, mask off the low bits used for configuration - * flags, and return the address of the bounds table. - */ - return (void __user *)(unsigned long) - (bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK); -} - -int mpx_enable_management(void) -{ - void __user *bd_base = MPX_INVALID_BOUNDS_DIR; - struct mm_struct *mm = current->mm; - int ret = 0; - - /* - * runtime in the userspace will be responsible for allocation of - * the bounds directory. Then, it will save the base of the bounds - * directory into XSAVE/XRSTOR Save Area and enable MPX through - * XRSTOR instruction. - * - * The copy_xregs_to_kernel() beneath get_xsave_field_ptr() is - * expected to be relatively expensive. Storing the bounds - * directory here means that we do not have to do xsave in the - * unmap path; we can just use mm->context.bd_addr instead. - */ - bd_base = mpx_get_bounds_dir(); - down_write(&mm->mmap_sem); - - /* MPX doesn't support addresses above 47 bits yet. */ - if (find_vma(mm, DEFAULT_MAP_WINDOW)) { - pr_warn_once("%s (%d): MPX cannot handle addresses " - "above 47-bits. Disabling.", - current->comm, current->pid); - ret = -ENXIO; - goto out; - } - mm->context.bd_addr = bd_base; - if (mm->context.bd_addr == MPX_INVALID_BOUNDS_DIR) - ret = -ENXIO; -out: - up_write(&mm->mmap_sem); - return ret; -} - -int mpx_disable_management(void) -{ - struct mm_struct *mm = current->mm; - - if (!cpu_feature_enabled(X86_FEATURE_MPX)) - return -ENXIO; - - down_write(&mm->mmap_sem); - mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR; - up_write(&mm->mmap_sem); - return 0; -} - -static int mpx_cmpxchg_bd_entry(struct mm_struct *mm, - unsigned long *curval, - unsigned long __user *addr, - unsigned long old_val, unsigned long new_val) -{ - int ret; - /* - * user_atomic_cmpxchg_inatomic() actually uses sizeof() - * the pointer that we pass to it to figure out how much - * data to cmpxchg. We have to be careful here not to - * pass a pointer to a 64-bit data type when we only want - * a 32-bit copy. - */ - if (is_64bit_mm(mm)) { - ret = user_atomic_cmpxchg_inatomic(curval, - addr, old_val, new_val); - } else { - u32 uninitialized_var(curval_32); - u32 old_val_32 = old_val; - u32 new_val_32 = new_val; - u32 __user *addr_32 = (u32 __user *)addr; - - ret = user_atomic_cmpxchg_inatomic(&curval_32, - addr_32, old_val_32, new_val_32); - *curval = curval_32; - } - return ret; -} - -/* - * With 32-bit mode, a bounds directory is 4MB, and the size of each - * bounds table is 16KB. With 64-bit mode, a bounds directory is 2GB, - * and the size of each bounds table is 4MB. - */ -static int allocate_bt(struct mm_struct *mm, long __user *bd_entry) -{ - unsigned long expected_old_val = 0; - unsigned long actual_old_val = 0; - unsigned long bt_addr; - unsigned long bd_new_entry; - int ret = 0; - - /* - * Carve the virtual space out of userspace for the new - * bounds table: - */ - bt_addr = mpx_mmap(mpx_bt_size_bytes(mm)); - if (IS_ERR((void *)bt_addr)) - return PTR_ERR((void *)bt_addr); - /* - * Set the valid flag (kinda like _PAGE_PRESENT in a pte) - */ - bd_new_entry = bt_addr | MPX_BD_ENTRY_VALID_FLAG; - - /* - * Go poke the address of the new bounds table in to the - * bounds directory entry out in userspace memory. Note: - * we may race with another CPU instantiating the same table. - * In that case the cmpxchg will see an unexpected - * 'actual_old_val'. - * - * This can fault, but that's OK because we do not hold - * mmap_sem at this point, unlike some of the other part - * of the MPX code that have to pagefault_disable(). - */ - ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, bd_entry, - expected_old_val, bd_new_entry); - if (ret) - goto out_unmap; - - /* - * The user_atomic_cmpxchg_inatomic() will only return nonzero - * for faults, *not* if the cmpxchg itself fails. Now we must - * verify that the cmpxchg itself completed successfully. - */ - /* - * We expected an empty 'expected_old_val', but instead found - * an apparently valid entry. Assume we raced with another - * thread to instantiate this table and desclare succecss. - */ - if (actual_old_val & MPX_BD_ENTRY_VALID_FLAG) { - ret = 0; - goto out_unmap; - } - /* - * We found a non-empty bd_entry but it did not have the - * VALID_FLAG set. Return an error which will result in - * a SEGV since this probably means that somebody scribbled - * some invalid data in to a bounds table. - */ - if (expected_old_val != actual_old_val) { - ret = -EINVAL; - goto out_unmap; - } - trace_mpx_new_bounds_table(bt_addr); - return 0; -out_unmap: - vm_munmap(bt_addr, mpx_bt_size_bytes(mm)); - return ret; -} - -/* - * When a BNDSTX instruction attempts to save bounds to a bounds - * table, it will first attempt to look up the table in the - * first-level bounds directory. If it does not find a table in - * the directory, a #BR is generated and we get here in order to - * allocate a new table. - * - * With 32-bit mode, the size of BD is 4MB, and the size of each - * bound table is 16KB. With 64-bit mode, the size of BD is 2GB, - * and the size of each bound table is 4MB. - */ -static int do_mpx_bt_fault(void) -{ - unsigned long bd_entry, bd_base; - const struct mpx_bndcsr *bndcsr; - struct mm_struct *mm = current->mm; - - bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR); - if (!bndcsr) - return -EINVAL; - /* - * Mask off the preserve and enable bits - */ - bd_base = bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK; - /* - * The hardware provides the address of the missing or invalid - * entry via BNDSTATUS, so we don't have to go look it up. - */ - bd_entry = bndcsr->bndstatus & MPX_BNDSTA_ADDR_MASK; - /* - * Make sure the directory entry is within where we think - * the directory is. - */ - if ((bd_entry < bd_base) || - (bd_entry >= bd_base + mpx_bd_size_bytes(mm))) - return -EINVAL; - - return allocate_bt(mm, (long __user *)bd_entry); -} - -int mpx_handle_bd_fault(void) -{ - /* - * Userspace never asked us to manage the bounds tables, - * so refuse to help. - */ - if (!kernel_managing_mpx_tables(current->mm)) - return -EINVAL; - - return do_mpx_bt_fault(); -} - -/* - * A thin wrapper around get_user_pages(). Returns 0 if the - * fault was resolved or -errno if not. - */ -static int mpx_resolve_fault(long __user *addr, int write) -{ - long gup_ret; - int nr_pages = 1; - - gup_ret = get_user_pages((unsigned long)addr, nr_pages, - write ? FOLL_WRITE : 0, NULL, NULL); - /* - * get_user_pages() returns number of pages gotten. - * 0 means we failed to fault in and get anything, - * probably because 'addr' is bad. - */ - if (!gup_ret) - return -EFAULT; - /* Other error, return it */ - if (gup_ret < 0) - return gup_ret; - /* must have gup'd a page and gup_ret>0, success */ - return 0; -} - -static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm, - unsigned long bd_entry) -{ - unsigned long bt_addr = bd_entry; - int align_to_bytes; - /* - * Bit 0 in a bt_entry is always the valid bit. - */ - bt_addr &= ~MPX_BD_ENTRY_VALID_FLAG; - /* - * Tables are naturally aligned at 8-byte boundaries - * on 64-bit and 4-byte boundaries on 32-bit. The - * documentation makes it appear that the low bits - * are ignored by the hardware, so we do the same. - */ - if (is_64bit_mm(mm)) - align_to_bytes = 8; - else - align_to_bytes = 4; - bt_addr &= ~(align_to_bytes-1); - return bt_addr; -} - -/* - * We only want to do a 4-byte get_user() on 32-bit. Otherwise, - * we might run off the end of the bounds table if we are on - * a 64-bit kernel and try to get 8 bytes. - */ -static int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret, - long __user *bd_entry_ptr) -{ - u32 bd_entry_32; - int ret; - - if (is_64bit_mm(mm)) - return get_user(*bd_entry_ret, bd_entry_ptr); - - /* - * Note that get_user() uses the type of the *pointer* to - * establish the size of the get, not the destination. - */ - ret = get_user(bd_entry_32, (u32 __user *)bd_entry_ptr); - *bd_entry_ret = bd_entry_32; - return ret; -} - -/* - * Get the base of bounds tables pointed by specific bounds - * directory entry. - */ -static int get_bt_addr(struct mm_struct *mm, - long __user *bd_entry_ptr, - unsigned long *bt_addr_result) -{ - int ret; - int valid_bit; - unsigned long bd_entry; - unsigned long bt_addr; - - if (!access_ok((bd_entry_ptr), sizeof(*bd_entry_ptr))) - return -EFAULT; - - while (1) { - int need_write = 0; - - pagefault_disable(); - ret = get_user_bd_entry(mm, &bd_entry, bd_entry_ptr); - pagefault_enable(); - if (!ret) - break; - if (ret == -EFAULT) - ret = mpx_resolve_fault(bd_entry_ptr, need_write); - /* - * If we could not resolve the fault, consider it - * userspace's fault and error out. - */ - if (ret) - return ret; - } - - valid_bit = bd_entry & MPX_BD_ENTRY_VALID_FLAG; - bt_addr = mpx_bd_entry_to_bt_addr(mm, bd_entry); - - /* - * When the kernel is managing bounds tables, a bounds directory - * entry will either have a valid address (plus the valid bit) - * *OR* be completely empty. If we see a !valid entry *and* some - * data in the address field, we know something is wrong. This - * -EINVAL return will cause a SIGSEGV. - */ - if (!valid_bit && bt_addr) - return -EINVAL; - /* - * Do we have an completely zeroed bt entry? That is OK. It - * just means there was no bounds table for this memory. Make - * sure to distinguish this from -EINVAL, which will cause - * a SEGV. - */ - if (!valid_bit) - return -ENOENT; - - *bt_addr_result = bt_addr; - return 0; -} - -static inline int bt_entry_size_bytes(struct mm_struct *mm) -{ - if (is_64bit_mm(mm)) - return MPX_BT_ENTRY_BYTES_64; - else - return MPX_BT_ENTRY_BYTES_32; -} - -/* - * Take a virtual address and turns it in to the offset in bytes - * inside of the bounds table where the bounds table entry - * controlling 'addr' can be found. - */ -static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm, - unsigned long addr) -{ - unsigned long bt_table_nr_entries; - unsigned long offset = addr; - - if (is_64bit_mm(mm)) { - /* Bottom 3 bits are ignored on 64-bit */ - offset >>= 3; - bt_table_nr_entries = MPX_BT_NR_ENTRIES_64; - } else { - /* Bottom 2 bits are ignored on 32-bit */ - offset >>= 2; - bt_table_nr_entries = MPX_BT_NR_ENTRIES_32; - } - /* - * We know the size of the table in to which we are - * indexing, and we have eliminated all the low bits - * which are ignored for indexing. - * - * Mask out all the high bits which we do not need - * to index in to the table. Note that the tables - * are always powers of two so this gives us a proper - * mask. - */ - offset &= (bt_table_nr_entries-1); - /* - * We now have an entry offset in terms of *entries* in - * the table. We need to scale it back up to bytes. - */ - offset *= bt_entry_size_bytes(mm); - return offset; -} - -/* - * How much virtual address space does a single bounds - * directory entry cover? - * - * Note, we need a long long because 4GB doesn't fit in - * to a long on 32-bit. - */ -static inline unsigned long bd_entry_virt_space(struct mm_struct *mm) -{ - unsigned long long virt_space; - unsigned long long GB = (1ULL << 30); - - /* - * This covers 32-bit emulation as well as 32-bit kernels - * running on 64-bit hardware. - */ - if (!is_64bit_mm(mm)) - return (4ULL * GB) / MPX_BD_NR_ENTRIES_32; - - /* - * 'x86_virt_bits' returns what the hardware is capable - * of, and returns the full >32-bit address space when - * running 32-bit kernels on 64-bit hardware. - */ - virt_space = (1ULL << boot_cpu_data.x86_virt_bits); - return virt_space / MPX_BD_NR_ENTRIES_64; -} - -/* - * Free the backing physical pages of bounds table 'bt_addr'. - * Assume start...end is within that bounds table. - */ -static noinline int zap_bt_entries_mapping(struct mm_struct *mm, - unsigned long bt_addr, - unsigned long start_mapping, unsigned long end_mapping) -{ - struct vm_area_struct *vma; - unsigned long addr, len; - unsigned long start; - unsigned long end; - - /* - * if we 'end' on a boundary, the offset will be 0 which - * is not what we want. Back it up a byte to get the - * last bt entry. Then once we have the entry itself, - * move 'end' back up by the table entry size. - */ - start = bt_addr + mpx_get_bt_entry_offset_bytes(mm, start_mapping); - end = bt_addr + mpx_get_bt_entry_offset_bytes(mm, end_mapping - 1); - /* - * Move end back up by one entry. Among other things - * this ensures that it remains page-aligned and does - * not screw up zap_page_range() - */ - end += bt_entry_size_bytes(mm); - - /* - * Find the first overlapping vma. If vma->vm_start > start, there - * will be a hole in the bounds table. This -EINVAL return will - * cause a SIGSEGV. - */ - vma = find_vma(mm, start); - if (!vma || vma->vm_start > start) - return -EINVAL; - - /* - * A NUMA policy on a VM_MPX VMA could cause this bounds table to - * be split. So we need to look across the entire 'start -> end' - * range of this bounds table, find all of the VM_MPX VMAs, and - * zap only those. - */ - addr = start; - while (vma && vma->vm_start < end) { - /* - * We followed a bounds directory entry down - * here. If we find a non-MPX VMA, that's bad, - * so stop immediately and return an error. This - * probably results in a SIGSEGV. - */ - if (!(vma->vm_flags & VM_MPX)) - return -EINVAL; - - len = min(vma->vm_end, end) - addr; - zap_page_range(vma, addr, len); - trace_mpx_unmap_zap(addr, addr+len); - - vma = vma->vm_next; - addr = vma->vm_start; - } - return 0; -} - -static unsigned long mpx_get_bd_entry_offset(struct mm_struct *mm, - unsigned long addr) -{ - /* - * There are several ways to derive the bd offsets. We - * use the following approach here: - * 1. We know the size of the virtual address space - * 2. We know the number of entries in a bounds table - * 3. We know that each entry covers a fixed amount of - * virtual address space. - * So, we can just divide the virtual address by the - * virtual space used by one entry to determine which - * entry "controls" the given virtual address. - */ - if (is_64bit_mm(mm)) { - int bd_entry_size = 8; /* 64-bit pointer */ - /* - * Take the 64-bit addressing hole in to account. - */ - addr &= ((1UL << boot_cpu_data.x86_virt_bits) - 1); - return (addr / bd_entry_virt_space(mm)) * bd_entry_size; - } else { - int bd_entry_size = 4; /* 32-bit pointer */ - /* - * 32-bit has no hole so this case needs no mask - */ - return (addr / bd_entry_virt_space(mm)) * bd_entry_size; - } - /* - * The two return calls above are exact copies. If we - * pull out a single copy and put it in here, gcc won't - * realize that we're doing a power-of-2 divide and use - * shifts. It uses a real divide. If we put them up - * there, it manages to figure it out (gcc 4.8.3). - */ -} - -static int unmap_entire_bt(struct mm_struct *mm, - long __user *bd_entry, unsigned long bt_addr) -{ - unsigned long expected_old_val = bt_addr | MPX_BD_ENTRY_VALID_FLAG; - unsigned long uninitialized_var(actual_old_val); - int ret; - - while (1) { - int need_write = 1; - unsigned long cleared_bd_entry = 0; - - pagefault_disable(); - ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, - bd_entry, expected_old_val, cleared_bd_entry); - pagefault_enable(); - if (!ret) - break; - if (ret == -EFAULT) - ret = mpx_resolve_fault(bd_entry, need_write); - /* - * If we could not resolve the fault, consider it - * userspace's fault and error out. - */ - if (ret) - return ret; - } - /* - * The cmpxchg was performed, check the results. - */ - if (actual_old_val != expected_old_val) { - /* - * Someone else raced with us to unmap the table. - * That is OK, since we were both trying to do - * the same thing. Declare success. - */ - if (!actual_old_val) - return 0; - /* - * Something messed with the bounds directory - * entry. We hold mmap_sem for read or write - * here, so it could not be a _new_ bounds table - * that someone just allocated. Something is - * wrong, so pass up the error and SIGSEGV. - */ - return -EINVAL; - } - /* - * Note, we are likely being called under do_munmap() already. To - * avoid recursion, do_munmap() will check whether it comes - * from one bounds table through VM_MPX flag. - */ - return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm), NULL); -} - -static int try_unmap_single_bt(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - struct vm_area_struct *next; - struct vm_area_struct *prev; - /* - * "bta" == Bounds Table Area: the area controlled by the - * bounds table that we are unmapping. - */ - unsigned long bta_start_vaddr = start & ~(bd_entry_virt_space(mm)-1); - unsigned long bta_end_vaddr = bta_start_vaddr + bd_entry_virt_space(mm); - unsigned long uninitialized_var(bt_addr); - void __user *bde_vaddr; - int ret; - /* - * We already unlinked the VMAs from the mm's rbtree so 'start' - * is guaranteed to be in a hole. This gets us the first VMA - * before the hole in to 'prev' and the next VMA after the hole - * in to 'next'. - */ - next = find_vma_prev(mm, start, &prev); - /* - * Do not count other MPX bounds table VMAs as neighbors. - * Although theoretically possible, we do not allow bounds - * tables for bounds tables so our heads do not explode. - * If we count them as neighbors here, we may end up with - * lots of tables even though we have no actual table - * entries in use. - */ - while (next && (next->vm_flags & VM_MPX)) - next = next->vm_next; - while (prev && (prev->vm_flags & VM_MPX)) - prev = prev->vm_prev; - /* - * We know 'start' and 'end' lie within an area controlled - * by a single bounds table. See if there are any other - * VMAs controlled by that bounds table. If there are not - * then we can "expand" the are we are unmapping to possibly - * cover the entire table. - */ - next = find_vma_prev(mm, start, &prev); - if ((!prev || prev->vm_end <= bta_start_vaddr) && - (!next || next->vm_start >= bta_end_vaddr)) { - /* - * No neighbor VMAs controlled by same bounds - * table. Try to unmap the whole thing - */ - start = bta_start_vaddr; - end = bta_end_vaddr; - } - - bde_vaddr = mm->context.bd_addr + mpx_get_bd_entry_offset(mm, start); - ret = get_bt_addr(mm, bde_vaddr, &bt_addr); - /* - * No bounds table there, so nothing to unmap. - */ - if (ret == -ENOENT) { - ret = 0; - return 0; - } - if (ret) - return ret; - /* - * We are unmapping an entire table. Either because the - * unmap that started this whole process was large enough - * to cover an entire table, or that the unmap was small - * but was the area covered by a bounds table. - */ - if ((start == bta_start_vaddr) && - (end == bta_end_vaddr)) - return unmap_entire_bt(mm, bde_vaddr, bt_addr); - return zap_bt_entries_mapping(mm, bt_addr, start, end); -} - -static int mpx_unmap_tables(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - unsigned long one_unmap_start; - trace_mpx_unmap_search(start, end); - - one_unmap_start = start; - while (one_unmap_start < end) { - int ret; - unsigned long next_unmap_start = ALIGN(one_unmap_start+1, - bd_entry_virt_space(mm)); - unsigned long one_unmap_end = end; - /* - * if the end is beyond the current bounds table, - * move it back so we only deal with a single one - * at a time - */ - if (one_unmap_end > next_unmap_start) - one_unmap_end = next_unmap_start; - ret = try_unmap_single_bt(mm, one_unmap_start, one_unmap_end); - if (ret) - return ret; - - one_unmap_start = next_unmap_start; - } - return 0; -} - -/* - * Free unused bounds tables covered in a virtual address region being - * munmap()ed. Assume end > start. - * - * This function will be called by do_munmap(), and the VMAs covering - * the virtual address region start...end have already been split if - * necessary, and the 'vma' is the first vma in this range (start -> end). - */ -void mpx_notify_unmap(struct mm_struct *mm, unsigned long start, - unsigned long end) -{ - struct vm_area_struct *vma; - int ret; - - /* - * Refuse to do anything unless userspace has asked - * the kernel to help manage the bounds tables, - */ - if (!kernel_managing_mpx_tables(current->mm)) - return; - /* - * This will look across the entire 'start -> end' range, - * and find all of the non-VM_MPX VMAs. - * - * To avoid recursion, if a VM_MPX vma is found in the range - * (start->end), we will not continue follow-up work. This - * recursion represents having bounds tables for bounds tables, - * which should not occur normally. Being strict about it here - * helps ensure that we do not have an exploitable stack overflow. - */ - vma = find_vma(mm, start); - while (vma && vma->vm_start < end) { - if (vma->vm_flags & VM_MPX) - return; - vma = vma->vm_next; - } - - ret = mpx_unmap_tables(mm, start, end); - if (ret) - force_sig(SIGSEGV); -} - -/* MPX cannot handle addresses above 47 bits yet. */ -unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len, - unsigned long flags) -{ - if (!kernel_managing_mpx_tables(current->mm)) - return addr; - if (addr + len <= DEFAULT_MAP_WINDOW) - return addr; - if (flags & MAP_FIXED) - return -ENOMEM; - - /* - * Requested len is larger than the whole area we're allowed to map in. - * Resetting hinting address wouldn't do much good -- fail early. - */ - if (len > DEFAULT_MAP_WINDOW) - return -ENOMEM; - - /* Look for unmap area within DEFAULT_MAP_WINDOW */ - return 0; -} diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 62a8ebe72a52..c4aedd00c1ba 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -618,6 +618,17 @@ pte_t *lookup_address(unsigned long address, unsigned int *level) } EXPORT_SYMBOL_GPL(lookup_address); +/* + * Lookup the page table entry for a virtual address in a given mm. Return a + * pointer to the entry and the level of the mapping. + */ +pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address, + unsigned int *level) +{ + return lookup_address_in_pgd(pgd_offset(mm, address), address, level); +} +EXPORT_SYMBOL_GPL(lookup_address_in_mm); + static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address, unsigned int *level) { diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c b/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c index 44d1f884c3d3..139738bbdd36 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c @@ -6,21 +6,31 @@ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com> */ -#include <linux/gpio.h> -#include <linux/platform_data/tc35876x.h> +#include <linux/gpio/machine.h> #include <asm/intel-mid.h> +static struct gpiod_lookup_table tc35876x_gpio_table = { + .dev_id = "i2c_disp_brig", + .table = { + GPIO_LOOKUP("0000:00:0c.0", -1, "bridge-reset", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("0000:00:0c.0", -1, "bl-en", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("0000:00:0c.0", -1, "vadd", GPIO_ACTIVE_HIGH), + { }, + }, +}; + /*tc35876x DSI_LVDS bridge chip and panel platform data*/ static void *tc35876x_platform_data(void *data) { - static struct tc35876x_platform_data pdata; + struct gpiod_lookup_table *table = &tc35876x_gpio_table; + struct gpiod_lookup *lookup = table->table; - /* gpio pins set to -1 will not be used by the driver */ - pdata.gpio_bridge_reset = get_gpio_by_name("LCMB_RXEN"); - pdata.gpio_panel_bl_en = get_gpio_by_name("6S6P_BL_EN"); - pdata.gpio_panel_vadd = get_gpio_by_name("EN_VREG_LCD_V3P3"); + lookup[0].chip_hwnum = get_gpio_by_name("LCMB_RXEN"); + lookup[1].chip_hwnum = get_gpio_by_name("6S6P_BL_EN"); + lookup[2].chip_hwnum = get_gpio_by_name("EN_VREG_LCD_V3P3"); + gpiod_add_lookup_table(table); - return &pdata; + return NULL; } static const struct devs_id tc35876x_dev_id __initconst = { |