diff options
56 files changed, 1143 insertions, 430 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index f7735c72c128..7610eaa4d491 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1901,6 +1901,8 @@ registers, find a list below: PPC | KVM_REG_PPC_ARCH_COMPAT | 32 PPC | KVM_REG_PPC_DABRX | 32 PPC | KVM_REG_PPC_WORT | 64 + PPC | KVM_REG_PPC_SPRG9 | 64 + PPC | KVM_REG_PPC_DBSR | 32 PPC | KVM_REG_PPC_TM_GPR0 | 64 ... PPC | KVM_REG_PPC_TM_GPR31 | 64 diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 46e5d4da1989..53036e21756b 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -170,7 +170,8 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); /* We do not have shadow page tables, hence the empty hooks */ -static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva) +static inline int kvm_age_hva(struct kvm *kvm, unsigned long start, + unsigned long end) { return 0; } @@ -180,6 +181,11 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) return 0; } +static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, + unsigned long address) +{ +} + struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index bcde41905746..2012c4ba8d67 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -180,7 +180,8 @@ int kvm_unmap_hva_range(struct kvm *kvm, void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); /* We do not have shadow page tables, hence the empty hooks */ -static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva) +static inline int kvm_age_hva(struct kvm *kvm, unsigned long start, + unsigned long end) { return 0; } @@ -190,6 +191,11 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) return 0; } +static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, + unsigned long address) +{ +} + struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 465dfcb82c92..5bca220bbb60 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -53,17 +53,17 @@ #define BOOKE_INTERRUPT_DEBUG 15 /* E500 */ -#define BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL 32 -#define BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST 33 -/* - * TODO: Unify 32-bit and 64-bit kernel exception handlers to use same defines - */ -#define BOOKE_INTERRUPT_SPE_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL -#define BOOKE_INTERRUPT_SPE_FP_DATA BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST -#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL -#define BOOKE_INTERRUPT_ALTIVEC_ASSIST \ - BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST +#ifdef CONFIG_SPE_POSSIBLE +#define BOOKE_INTERRUPT_SPE_UNAVAIL 32 +#define BOOKE_INTERRUPT_SPE_FP_DATA 33 #define BOOKE_INTERRUPT_SPE_FP_ROUND 34 +#endif + +#ifdef CONFIG_PPC_E500MC +#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL 32 +#define BOOKE_INTERRUPT_ALTIVEC_ASSIST 33 +#endif + #define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35 #define BOOKE_INTERRUPT_DOORBELL 36 #define BOOKE_INTERRUPT_DOORBELL_CRITICAL 37 diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index f7aa5cc395c4..3286f0d6a86c 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -23,15 +23,16 @@ #include <linux/types.h> #include <linux/kvm_host.h> -/* LPIDs we support with this build -- runtime limit may be lower */ +/* + * Number of available lpids. Only the low-order 6 bits of LPID rgister are + * implemented on e500mc+ cores. + */ #define KVMPPC_NR_LPIDS 64 #define KVMPPC_INST_EHPRIV 0x7c00021c #define EHPRIV_OC_SHIFT 11 /* "ehpriv 1" : ehpriv with OC = 1 is used for debug emulation */ #define EHPRIV_OC_DEBUG 1 -#define KVMPPC_INST_EHPRIV_DEBUG (KVMPPC_INST_EHPRIV | \ - (EHPRIV_OC_DEBUG << EHPRIV_OC_SHIFT)) static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 604000882352..047855619cc4 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -56,10 +56,15 @@ extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); extern int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); -extern int kvm_age_hva(struct kvm *kvm, unsigned long hva); +extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); +static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, + unsigned long address) +{ +} + #define HPTEG_CACHE_NUM (1 << 15) #define HPTEG_HASH_BITS_PTE 13 #define HPTEG_HASH_BITS_PTE_LONG 12 @@ -139,6 +144,7 @@ enum kvm_exit_types { EMULATED_TLBWE_EXITS, EMULATED_RFI_EXITS, EMULATED_RFCI_EXITS, + EMULATED_RFDI_EXITS, DEC_EXITS, EXT_INTR_EXITS, HALT_WAKEUP, @@ -584,8 +590,6 @@ struct kvm_vcpu_arch { u32 crit_save; /* guest debug registers*/ struct debug_reg dbg_reg; - /* hardware visible debug registers when in guest state */ - struct debug_reg shadow_dbg_reg; #endif gpa_t paddr_accessed; gva_t vaddr_accessed; @@ -607,7 +611,6 @@ struct kvm_vcpu_arch { u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ struct hrtimer dec_timer; - struct tasklet_struct tasklet; u64 dec_jiffies; u64 dec_expires; unsigned long pending_exceptions; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index fb86a2299d8a..a6dcdb6d13c1 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -38,6 +38,12 @@ #include <asm/paca.h> #endif +/* + * KVMPPC_INST_SW_BREAKPOINT is debug Instruction + * for supporting software breakpoint. + */ +#define KVMPPC_INST_SW_BREAKPOINT 0x00dddd00 + enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ @@ -89,7 +95,7 @@ extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); -extern void kvmppc_decrementer_func(unsigned long data); +extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu); extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu); extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu); extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu); @@ -206,6 +212,9 @@ extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq); extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq); +void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu); +void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu); + union kvmppc_one_reg { u32 wval; u64 dval; @@ -243,7 +252,7 @@ struct kvmppc_ops { int (*unmap_hva)(struct kvm *kvm, unsigned long hva); int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, unsigned long end); - int (*age_hva)(struct kvm *kvm, unsigned long hva); + int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end); int (*test_age_hva)(struct kvm *kvm, unsigned long hva); void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte); void (*mmu_destroy)(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 1d653308a33c..16547efa2d5a 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -319,6 +319,8 @@ * DBSR bits which have conflicting definitions on true Book E versus IBM 40x. */ #ifdef CONFIG_BOOKE +#define DBSR_IDE 0x80000000 /* Imprecise Debug Event */ +#define DBSR_MRR 0x30000000 /* Most Recent Reset */ #define DBSR_IC 0x08000000 /* Instruction Completion */ #define DBSR_BT 0x04000000 /* Branch Taken */ #define DBSR_IRPT 0x02000000 /* Exception Debug Event */ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index e0e49dbb145d..ab4d4732c492 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -476,6 +476,11 @@ struct kvm_get_htab_header { /* FP and vector status/control registers */ #define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80) +/* + * VSCR register is documented as a 32-bit register in the ISA, but it can + * only be accesses via a vector register. Expose VSCR as a 32-bit register + * even though the kernel represents it as a 128-bit vector. + */ #define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81) /* Virtual processor areas */ @@ -557,6 +562,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_DABRX (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8) #define KVM_REG_PPC_WORT (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9) #define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba) +#define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 4f1393d20079..dddba3e94260 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -91,6 +91,7 @@ _GLOBAL(setup_altivec_idle) blr +#ifdef CONFIG_PPC_E500MC _GLOBAL(__setup_cpu_e6500) mflr r6 #ifdef CONFIG_PPC64 @@ -107,14 +108,20 @@ _GLOBAL(__setup_cpu_e6500) bl __setup_cpu_e5500 mtlr r6 blr +#endif /* CONFIG_PPC_E500MC */ #ifdef CONFIG_PPC32 +#ifdef CONFIG_E200 _GLOBAL(__setup_cpu_e200) /* enable dedicated debug exception handling resources (Debug APU) */ mfspr r3,SPRN_HID0 ori r3,r3,HID0_DAPUEN@l mtspr SPRN_HID0,r3 b __setup_e200_ivors +#endif /* CONFIG_E200 */ + +#ifdef CONFIG_E500 +#ifndef CONFIG_PPC_E500MC _GLOBAL(__setup_cpu_e500v1) _GLOBAL(__setup_cpu_e500v2) mflr r4 @@ -129,6 +136,7 @@ _GLOBAL(__setup_cpu_e500v2) #endif mtlr r4 blr +#else /* CONFIG_PPC_E500MC */ _GLOBAL(__setup_cpu_e500mc) _GLOBAL(__setup_cpu_e5500) mflr r5 @@ -159,7 +167,9 @@ _GLOBAL(__setup_cpu_e5500) 2: mtlr r5 blr -#endif +#endif /* CONFIG_PPC_E500MC */ +#endif /* CONFIG_E500 */ +#endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC_BOOK3E_64 _GLOBAL(__restore_cpu_e6500) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 9b6dcaaec1a3..808405906336 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1961,6 +1961,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_PPC32 */ #ifdef CONFIG_E500 #ifdef CONFIG_PPC32 +#ifndef CONFIG_PPC_E500MC { /* e500 */ .pvr_mask = 0xffff0000, .pvr_value = 0x80200000, @@ -2000,6 +2001,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_e500, .platform = "ppc8548", }, +#else { /* e500mc */ .pvr_mask = 0xffff0000, .pvr_value = 0x80230000, @@ -2018,7 +2020,9 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_e500mc, .platform = "ppce500mc", }, +#endif /* CONFIG_PPC_E500MC */ #endif /* CONFIG_PPC32 */ +#ifdef CONFIG_PPC_E500MC { /* e5500 */ .pvr_mask = 0xffff0000, .pvr_value = 0x80240000, @@ -2062,6 +2066,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_e500mc, .platform = "ppce6500", }, +#endif /* CONFIG_PPC_E500MC */ #ifdef CONFIG_PPC32 { /* default match */ .pvr_mask = 0x00000000, diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index bb9cac6c8051..3e68d1c69718 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -635,7 +635,7 @@ interrupt_end_book3e: /* Altivec Unavailable Interrupt */ START_EXCEPTION(altivec_unavailable); - NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL, + NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, PROLOG_ADDITION_NONE) /* we can probably do a shorter exception entry for that one... */ EXCEPTION_COMMON(0x200) @@ -658,7 +658,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) /* AltiVec Assist */ START_EXCEPTION(altivec_assist); NORMAL_EXCEPTION_PROLOG(0x220, - BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST, + BOOKE_INTERRUPT_ALTIVEC_ASSIST, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x220) INTS_DISABLE diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index b497188a94a1..fffd1f96bb1d 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -613,34 +613,36 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) mfspr r10, SPRN_SPRG_RSCRATCH0 b InstructionStorage +/* Define SPE handlers for e200 and e500v2 */ #ifdef CONFIG_SPE /* SPE Unavailable */ START_EXCEPTION(SPEUnavailable) - NORMAL_EXCEPTION_PROLOG(SPE_ALTIVEC_UNAVAIL) + NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL) beq 1f bl load_up_spe b fast_exception_return 1: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_EE_LITE(0x2010, KernelSPE) -#else - EXCEPTION(0x2020, SPE_ALTIVEC_UNAVAIL, SPEUnavailable, \ +#elif defined(CONFIG_SPE_POSSIBLE) + EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ unknown_exception, EXC_XFER_EE) -#endif /* CONFIG_SPE */ +#endif /* CONFIG_SPE_POSSIBLE */ /* SPE Floating Point Data */ #ifdef CONFIG_SPE - EXCEPTION(0x2030, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData, + EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE) /* SPE Floating Point Round */ EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ SPEFloatingPointRoundException, EXC_XFER_EE) -#else - EXCEPTION(0x2040, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData, +#elif defined(CONFIG_SPE_POSSIBLE) + EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, unknown_exception, EXC_XFER_EE) EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ unknown_exception, EXC_XFER_EE) -#endif /* CONFIG_SPE */ +#endif /* CONFIG_SPE_POSSIBLE */ + /* Performance Monitor */ EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \ @@ -947,6 +949,7 @@ get_phys_addr: * Global functions */ +#ifdef CONFIG_E200 /* Adjust or setup IVORs for e200 */ _GLOBAL(__setup_e200_ivors) li r3,DebugDebug@l @@ -959,7 +962,10 @@ _GLOBAL(__setup_e200_ivors) mtspr SPRN_IVOR34,r3 sync blr +#endif +#ifdef CONFIG_E500 +#ifndef CONFIG_PPC_E500MC /* Adjust or setup IVORs for e500v1/v2 */ _GLOBAL(__setup_e500_ivors) li r3,DebugCrit@l @@ -974,7 +980,7 @@ _GLOBAL(__setup_e500_ivors) mtspr SPRN_IVOR35,r3 sync blr - +#else /* Adjust or setup IVORs for e500mc */ _GLOBAL(__setup_e500mc_ivors) li r3,DebugDebug@l @@ -1000,6 +1006,8 @@ _GLOBAL(__setup_ehv_ivors) mtspr SPRN_IVOR41,r3 sync blr +#endif /* CONFIG_PPC_E500MC */ +#endif /* CONFIG_E500 */ #ifdef CONFIG_SPE /* diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index dd03f6b299ba..b32db4b95361 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -535,174 +535,111 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return -ENOTSUPP; } -int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { - int r; - union kvmppc_one_reg val; - int size; + int r = 0; long int i; - size = one_reg_size(reg->id); - if (size > sizeof(val)) - return -EINVAL; - - r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val); if (r == -EINVAL) { r = 0; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_DAR: - val = get_reg_val(reg->id, kvmppc_get_dar(vcpu)); + *val = get_reg_val(id, kvmppc_get_dar(vcpu)); break; case KVM_REG_PPC_DSISR: - val = get_reg_val(reg->id, kvmppc_get_dsisr(vcpu)); + *val = get_reg_val(id, kvmppc_get_dsisr(vcpu)); break; case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: - i = reg->id - KVM_REG_PPC_FPR0; - val = get_reg_val(reg->id, VCPU_FPR(vcpu, i)); + i = id - KVM_REG_PPC_FPR0; + *val = get_reg_val(id, VCPU_FPR(vcpu, i)); break; case KVM_REG_PPC_FPSCR: - val = get_reg_val(reg->id, vcpu->arch.fp.fpscr); - break; -#ifdef CONFIG_ALTIVEC - case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: - if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { - r = -ENXIO; - break; - } - val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0]; - break; - case KVM_REG_PPC_VSCR: - if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { - r = -ENXIO; - break; - } - val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]); + *val = get_reg_val(id, vcpu->arch.fp.fpscr); break; - case KVM_REG_PPC_VRSAVE: - val = get_reg_val(reg->id, vcpu->arch.vrsave); - break; -#endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: if (cpu_has_feature(CPU_FTR_VSX)) { - long int i = reg->id - KVM_REG_PPC_VSR0; - val.vsxval[0] = vcpu->arch.fp.fpr[i][0]; - val.vsxval[1] = vcpu->arch.fp.fpr[i][1]; + i = id - KVM_REG_PPC_VSR0; + val->vsxval[0] = vcpu->arch.fp.fpr[i][0]; + val->vsxval[1] = vcpu->arch.fp.fpr[i][1]; } else { r = -ENXIO; } break; #endif /* CONFIG_VSX */ - case KVM_REG_PPC_DEBUG_INST: { - u32 opcode = INS_TW; - r = copy_to_user((u32 __user *)(long)reg->addr, - &opcode, sizeof(u32)); + case KVM_REG_PPC_DEBUG_INST: + *val = get_reg_val(id, INS_TW); break; - } #ifdef CONFIG_KVM_XICS case KVM_REG_PPC_ICP_STATE: if (!vcpu->arch.icp) { r = -ENXIO; break; } - val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu)); + *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); break; #endif /* CONFIG_KVM_XICS */ case KVM_REG_PPC_FSCR: - val = get_reg_val(reg->id, vcpu->arch.fscr); + *val = get_reg_val(id, vcpu->arch.fscr); break; case KVM_REG_PPC_TAR: - val = get_reg_val(reg->id, vcpu->arch.tar); + *val = get_reg_val(id, vcpu->arch.tar); break; case KVM_REG_PPC_EBBHR: - val = get_reg_val(reg->id, vcpu->arch.ebbhr); + *val = get_reg_val(id, vcpu->arch.ebbhr); break; case KVM_REG_PPC_EBBRR: - val = get_reg_val(reg->id, vcpu->arch.ebbrr); + *val = get_reg_val(id, vcpu->arch.ebbrr); break; case KVM_REG_PPC_BESCR: - val = get_reg_val(reg->id, vcpu->arch.bescr); + *val = get_reg_val(id, vcpu->arch.bescr); break; case KVM_REG_PPC_VTB: - val = get_reg_val(reg->id, vcpu->arch.vtb); + *val = get_reg_val(id, vcpu->arch.vtb); break; case KVM_REG_PPC_IC: - val = get_reg_val(reg->id, vcpu->arch.ic); + *val = get_reg_val(id, vcpu->arch.ic); break; default: r = -EINVAL; break; } } - if (r) - return r; - - if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size)) - r = -EFAULT; return r; } -int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { - int r; - union kvmppc_one_reg val; - int size; + int r = 0; long int i; - size = one_reg_size(reg->id); - if (size > sizeof(val)) - return -EINVAL; - - if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) - return -EFAULT; - - r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val); if (r == -EINVAL) { r = 0; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_DAR: - kvmppc_set_dar(vcpu, set_reg_val(reg->id, val)); + kvmppc_set_dar(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_DSISR: - kvmppc_set_dsisr(vcpu, set_reg_val(reg->id, val)); + kvmppc_set_dsisr(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: - i = reg->id - KVM_REG_PPC_FPR0; - VCPU_FPR(vcpu, i) = set_reg_val(reg->id, val); + i = id - KVM_REG_PPC_FPR0; + VCPU_FPR(vcpu, i) = set_reg_val(id, *val); break; case KVM_REG_PPC_FPSCR: - vcpu->arch.fp.fpscr = set_reg_val(reg->id, val); - break; -#ifdef CONFIG_ALTIVEC - case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: - if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { - r = -ENXIO; - break; - } - vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; - break; - case KVM_REG_PPC_VSCR: - if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { - r = -ENXIO; - break; - } - vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val); - break; - case KVM_REG_PPC_VRSAVE: - if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { - r = -ENXIO; - break; - } - vcpu->arch.vrsave = set_reg_val(reg->id, val); + vcpu->arch.fp.fpscr = set_reg_val(id, *val); break; -#endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: if (cpu_has_feature(CPU_FTR_VSX)) { - long int i = reg->id - KVM_REG_PPC_VSR0; - vcpu->arch.fp.fpr[i][0] = val.vsxval[0]; - vcpu->arch.fp.fpr[i][1] = val.vsxval[1]; + i = id - KVM_REG_PPC_VSR0; + vcpu->arch.fp.fpr[i][0] = val->vsxval[0]; + vcpu->arch.fp.fpr[i][1] = val->vsxval[1]; } else { r = -ENXIO; } @@ -715,29 +652,29 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) break; } r = kvmppc_xics_set_icp(vcpu, - set_reg_val(reg->id, val)); + set_reg_val(id, *val)); break; #endif /* CONFIG_KVM_XICS */ case KVM_REG_PPC_FSCR: - vcpu->arch.fscr = set_reg_val(reg->id, val); + vcpu->arch.fscr = set_reg_val(id, *val); break; case KVM_REG_PPC_TAR: - vcpu->arch.tar = set_reg_val(reg->id, val); + vcpu->arch.tar = set_reg_val(id, *val); break; case KVM_REG_PPC_EBBHR: - vcpu->arch.ebbhr = set_reg_val(reg->id, val); + vcpu->arch.ebbhr = set_reg_val(id, *val); break; case KVM_REG_PPC_EBBRR: - vcpu->arch.ebbrr = set_reg_val(reg->id, val); + vcpu->arch.ebbrr = set_reg_val(id, *val); break; case KVM_REG_PPC_BESCR: - vcpu->arch.bescr = set_reg_val(reg->id, val); + vcpu->arch.bescr = set_reg_val(id, *val); break; case KVM_REG_PPC_VTB: - vcpu->arch.vtb = set_reg_val(reg->id, val); + vcpu->arch.vtb = set_reg_val(id, *val); break; case KVM_REG_PPC_IC: - vcpu->arch.ic = set_reg_val(reg->id, val); + vcpu->arch.ic = set_reg_val(id, *val); break; default: r = -EINVAL; @@ -778,13 +715,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - return -EINVAL; + vcpu->guest_debug = dbg->control; + return 0; } -void kvmppc_decrementer_func(unsigned long data) +void kvmppc_decrementer_func(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; - kvmppc_core_queue_dec(vcpu); kvm_vcpu_kick(vcpu); } @@ -851,9 +787,9 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); } -int kvm_age_hva(struct kvm *kvm, unsigned long hva) +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { - return kvm->arch.kvm_ops->age_hva(kvm, hva); + return kvm->arch.kvm_ops->age_hva(kvm, start, end); } int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 4bf956cf94d6..d2b3ec088b8c 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -17,7 +17,8 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm, extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva); extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end); -extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva); +extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, + unsigned long end); extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva); extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 72c20bb16d26..81460c5359c0 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1002,11 +1002,11 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, return ret; } -int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva) +int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end) { if (!kvm->arch.using_mmu_notifiers) return 0; - return kvm_handle_hva(kvm, hva, kvm_age_rmapp); + return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp); } static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 27cced9c7249..e63587d30b70 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -725,6 +725,30 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd) return kvmppc_hcall_impl_hv_realmode(cmd); } +static int kvmppc_emulate_debug_inst(struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + u32 last_inst; + + if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) != + EMULATE_DONE) { + /* + * Fetch failed, so return to guest and + * try executing it again. + */ + return RESUME_GUEST; + } + + if (last_inst == KVMPPC_INST_SW_BREAKPOINT) { + run->exit_reason = KVM_EXIT_DEBUG; + run->debug.arch.address = kvmppc_get_pc(vcpu); + return RESUME_HOST; + } else { + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } +} + static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, struct task_struct *tsk) { @@ -807,12 +831,18 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, break; /* * This occurs if the guest executes an illegal instruction. - * We just generate a program interrupt to the guest, since - * we don't emulate any guest instructions at this stage. + * If the guest debug is disabled, generate a program interrupt + * to the guest. If guest debug is enabled, we need to check + * whether the instruction is a software breakpoint instruction. + * Accordingly return to Guest or Host. */ case BOOK3S_INTERRUPT_H_EMUL_ASSIST: - kvmppc_core_queue_program(vcpu, SRR1_PROGILL); - r = RESUME_GUEST; + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { + r = kvmppc_emulate_debug_inst(run, vcpu); + } else { + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + r = RESUME_GUEST; + } break; /* * This occurs if the guest (kernel or userspace), does something that @@ -856,7 +886,9 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, { int i, j; - kvmppc_set_pvr_hv(vcpu, sregs->pvr); + /* Only accept the same PVR as the host's, since we can't spoof it */ + if (sregs->pvr != vcpu->arch.pvr) + return -EINVAL; j = 0; for (i = 0; i < vcpu->arch.slb_nr; i++) { @@ -922,6 +954,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, long int i; switch (id) { + case KVM_REG_PPC_DEBUG_INST: + *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT); + break; case KVM_REG_PPC_HIOR: *val = get_reg_val(id, 0); break; @@ -1489,7 +1524,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, static int kvmppc_grab_hwthread(int cpu) { struct paca_struct *tpaca; - long timeout = 1000; + long timeout = 10000; tpaca = &paca[cpu]; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index f0c4db7704c3..edb2ccdbb2ba 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -355,6 +355,7 @@ kvmppc_hv_entry: * MSR = ~IR|DR * R13 = PACA * R1 = host R1 + * R2 = TOC * all other volatile GPRS = free */ mflr r0 @@ -503,7 +504,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) toc_tlbie_lock: .tc native_tlbie_lock[TC],native_tlbie_lock .previous - ld r3,toc_tlbie_lock@toc(2) + ld r3,toc_tlbie_lock@toc(r2) #ifdef __BIG_ENDIAN__ lwz r8,PACA_LOCK_TOKEN(r13) #else diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index faffb27badd9..cf2eb16846d1 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -295,7 +295,8 @@ static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start, return 0; } -static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva) +static int kvm_age_hva_pr(struct kvm *kvm, unsigned long start, + unsigned long end) { /* XXX could be more clever ;) */ return 0; @@ -1319,6 +1320,9 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, int r = 0; switch (id) { + case KVM_REG_PPC_DEBUG_INST: + *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT); + break; case KVM_REG_PPC_HIOR: *val = get_reg_val(id, to_book3s(vcpu)->hior); break; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index b4c89fa6f109..9b55dec2d6cc 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -124,6 +124,40 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu) } #endif +/* + * Load up guest vcpu FP state if it's needed. + * It also set the MSR_FP in thread so that host know + * we're holding FPU, and then host can help to save + * guest vcpu FP state if other threads require to use FPU. + * This simulates an FP unavailable fault. + * + * It requires to be called with preemption disabled. + */ +static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_PPC_FPU + if (!(current->thread.regs->msr & MSR_FP)) { + enable_kernel_fp(); + load_fp_state(&vcpu->arch.fp); + current->thread.fp_save_area = &vcpu->arch.fp; + current->thread.regs->msr |= MSR_FP; + } +#endif +} + +/* + * Save guest vcpu FP state into thread. + * It requires to be called with preemption disabled. + */ +static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_PPC_FPU + if (current->thread.regs->msr & MSR_FP) + giveup_fpu(current); + current->thread.fp_save_area = NULL; +#endif +} + static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu) { #if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV) @@ -134,6 +168,40 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu) #endif } +/* + * Simulate AltiVec unavailable fault to load guest state + * from thread to AltiVec unit. + * It requires to be called with preemption disabled. + */ +static inline void kvmppc_load_guest_altivec(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) { + if (!(current->thread.regs->msr & MSR_VEC)) { + enable_kernel_altivec(); + load_vr_state(&vcpu->arch.vr); + current->thread.vr_save_area = &vcpu->arch.vr; + current->thread.regs->msr |= MSR_VEC; + } + } +#endif +} + +/* + * Save guest vcpu AltiVec state into thread. + * It requires to be called with preemption disabled. + */ +static inline void kvmppc_save_guest_altivec(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) { + if (current->thread.regs->msr & MSR_VEC) + giveup_altivec(current); + current->thread.vr_save_area = NULL; + } +#endif +} + static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) { /* Synchronize guest's desire to get debug interrupts into shadow MSR */ @@ -267,6 +335,16 @@ static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu) clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions); } +void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DEBUG); +} + +void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu) +{ + clear_bit(BOOKE_IRQPRIO_DEBUG, &vcpu->arch.pending_exceptions); +} + static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) { kvmppc_set_srr0(vcpu, srr0); @@ -341,9 +419,15 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, case BOOKE_IRQPRIO_ITLB_MISS: case BOOKE_IRQPRIO_SYSCALL: case BOOKE_IRQPRIO_FP_UNAVAIL: +#ifdef CONFIG_SPE_POSSIBLE case BOOKE_IRQPRIO_SPE_UNAVAIL: case BOOKE_IRQPRIO_SPE_FP_DATA: case BOOKE_IRQPRIO_SPE_FP_ROUND: +#endif +#ifdef CONFIG_ALTIVEC + case BOOKE_IRQPRIO_ALTIVEC_UNAVAIL: + case BOOKE_IRQPRIO_ALTIVEC_ASSIST: +#endif case BOOKE_IRQPRIO_AP_UNAVAIL: allowed = 1; msr_mask = MSR_CE | MSR_ME | MSR_DE; @@ -377,7 +461,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, allowed = vcpu->arch.shared->msr & MSR_DE; allowed = allowed && !crit; msr_mask = MSR_ME; - int_class = INT_CLASS_CRIT; + if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) + int_class = INT_CLASS_DBG; + else + int_class = INT_CLASS_CRIT; + break; } @@ -654,20 +742,27 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) /* * Since we can't trap on MSR_FP in GS-mode, we consider the guest - * as always using the FPU. Kernel usage of FP (via - * enable_kernel_fp()) in this thread must not occur while - * vcpu->fpu_active is set. + * as always using the FPU. */ - vcpu->fpu_active = 1; - kvmppc_load_guest_fp(vcpu); #endif +#ifdef CONFIG_ALTIVEC + /* Save userspace AltiVec state in stack */ + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + enable_kernel_altivec(); + /* + * Since we can't trap on MSR_VEC in GS-mode, we consider the guest + * as always using the AltiVec. + */ + kvmppc_load_guest_altivec(vcpu); +#endif + /* Switch to guest debug context */ - debug = vcpu->arch.shadow_dbg_reg; + debug = vcpu->arch.dbg_reg; switch_booke_debug_regs(&debug); debug = current->thread.debug; - current->thread.debug = vcpu->arch.shadow_dbg_reg; + current->thread.debug = vcpu->arch.dbg_reg; vcpu->arch.pgdir = current->mm->pgd; kvmppc_fix_ee_before_entry(); @@ -683,8 +778,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) #ifdef CONFIG_PPC_FPU kvmppc_save_guest_fp(vcpu); +#endif - vcpu->fpu_active = 0; +#ifdef CONFIG_ALTIVEC + kvmppc_save_guest_altivec(vcpu); #endif out: @@ -728,9 +825,36 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu) { - struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg); + struct debug_reg *dbg_reg = &(vcpu->arch.dbg_reg); u32 dbsr = vcpu->arch.dbsr; + if (vcpu->guest_debug == 0) { + /* + * Debug resources belong to Guest. + * Imprecise debug event is not injected + */ + if (dbsr & DBSR_IDE) { + dbsr &= ~DBSR_IDE; + if (!dbsr) + return RESUME_GUEST; + } + + if (dbsr && (vcpu->arch.shared->msr & MSR_DE) && + (vcpu->arch.dbg_reg.dbcr0 & DBCR0_IDM)) + kvmppc_core_queue_debug(vcpu); + + /* Inject a program interrupt if trap debug is not allowed */ + if ((dbsr & DBSR_TIE) && !(vcpu->arch.shared->msr & MSR_DE)) + kvmppc_core_queue_program(vcpu, ESR_PTR); + + return RESUME_GUEST; + } + + /* + * Debug resource owned by userspace. + * Clear guest dbsr (vcpu->arch.dbsr) + */ + vcpu->arch.dbsr = 0; run->debug.arch.status = 0; run->debug.arch.address = vcpu->arch.pc; @@ -868,7 +992,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, case BOOKE_INTERRUPT_DATA_STORAGE: case BOOKE_INTERRUPT_DTLB_MISS: case BOOKE_INTERRUPT_HV_PRIV: - emulated = kvmppc_get_last_inst(vcpu, false, &last_inst); + emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); + break; + case BOOKE_INTERRUPT_PROGRAM: + /* SW breakpoints arrive as illegal instructions on HV */ + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) + emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); break; default: break; @@ -947,6 +1076,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOKE_INTERRUPT_PROGRAM: + if ((vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) && + (last_inst == KVMPPC_INST_SW_BREAKPOINT)) { + /* + * We are here because of an SW breakpoint instr, + * so lets return to host to handle. + */ + r = kvmppc_handle_debug(run, vcpu); + run->exit_reason = KVM_EXIT_DEBUG; + kvmppc_account_exit(vcpu, DEBUG_EXITS); + break; + } + if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) { /* * Program traps generated by user-level software must @@ -991,7 +1132,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND); r = RESUME_GUEST; break; -#else +#elif defined(CONFIG_SPE_POSSIBLE) case BOOKE_INTERRUPT_SPE_UNAVAIL: /* * Guest wants SPE, but host kernel doesn't support it. Send @@ -1012,6 +1153,22 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, run->hw.hardware_exit_reason = exit_nr; r = RESUME_HOST; break; +#endif /* CONFIG_SPE_POSSIBLE */ + +/* + * On cores with Vector category, KVM is loaded only if CONFIG_ALTIVEC, + * see kvmppc_core_check_processor_compat(). + */ +#ifdef CONFIG_ALTIVEC + case BOOKE_INTERRUPT_ALTIVEC_UNAVAIL: + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL); + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_ALTIVEC_ASSIST: + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_ASSIST); + r = RESUME_GUEST; + break; #endif case BOOKE_INTERRUPT_DATA_STORAGE: @@ -1188,6 +1345,8 @@ out: else { /* interrupts now hard-disabled */ kvmppc_fix_ee_before_entry(); + kvmppc_load_guest_fp(vcpu); + kvmppc_load_guest_altivec(vcpu); } } @@ -1243,6 +1402,11 @@ int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func, (unsigned long)vcpu); + /* + * Clear DBSR.MRR to avoid guest debug interrupt as + * this is of host interest + */ + mtspr(SPRN_DBSR, DBSR_MRR); return 0; } @@ -1457,144 +1621,125 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); } -int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { int r = 0; - union kvmppc_one_reg val; - int size; - - size = one_reg_size(reg->id); - if (size > sizeof(val)) - return -EINVAL; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_IAC1: - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac1); + *val = get_reg_val(id, vcpu->arch.dbg_reg.iac1); break; case KVM_REG_PPC_IAC2: - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac2); + *val = get_reg_val(id, vcpu->arch.dbg_reg.iac2); break; #if CONFIG_PPC_ADV_DEBUG_IACS > 2 case KVM_REG_PPC_IAC3: - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac3); + *val = get_reg_val(id, vcpu->arch.dbg_reg.iac3); break; case KVM_REG_PPC_IAC4: - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac4); + *val = get_reg_val(id, vcpu->arch.dbg_reg.iac4); break; #endif case KVM_REG_PPC_DAC1: - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac1); + *val = get_reg_val(id, vcpu->arch.dbg_reg.dac1); break; case KVM_REG_PPC_DAC2: - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2); + *val = get_reg_val(id, vcpu->arch.dbg_reg.dac2); break; case KVM_REG_PPC_EPR: { u32 epr = kvmppc_get_epr(vcpu); - val = get_reg_val(reg->id, epr); + *val = get_reg_val(id, epr); break; } #if defined(CONFIG_64BIT) case KVM_REG_PPC_EPCR: - val = get_reg_val(reg->id, vcpu->arch.epcr); + *val = get_reg_val(id, vcpu->arch.epcr); break; #endif case KVM_REG_PPC_TCR: - val = get_reg_val(reg->id, vcpu->arch.tcr); + *val = get_reg_val(id, vcpu->arch.tcr); break; case KVM_REG_PPC_TSR: - val = get_reg_val(reg->id, vcpu->arch.tsr); + *val = get_reg_val(id, vcpu->arch.tsr); break; case KVM_REG_PPC_DEBUG_INST: - val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV_DEBUG); + *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT); break; case KVM_REG_PPC_VRSAVE: - val = get_reg_val(reg->id, vcpu->arch.vrsave); + *val = get_reg_val(id, vcpu->arch.vrsave); break; default: - r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val); break; } - if (r) - return r; - - if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size)) - r = -EFAULT; - return r; } -int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) { int r = 0; - union kvmppc_one_reg val; - int size; - size = one_reg_size(reg->id); - if (size > sizeof(val)) - return -EINVAL; - - if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) - return -EFAULT; - - switch (reg->id) { + switch (id) { case KVM_REG_PPC_IAC1: - vcpu->arch.dbg_reg.iac1 = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.iac1 = set_reg_val(id, *val); break; case KVM_REG_PPC_IAC2: - vcpu->arch.dbg_reg.iac2 = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.iac2 = set_reg_val(id, *val); break; #if CONFIG_PPC_ADV_DEBUG_IACS > 2 case KVM_REG_PPC_IAC3: - vcpu->arch.dbg_reg.iac3 = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.iac3 = set_reg_val(id, *val); break; case KVM_REG_PPC_IAC4: - vcpu->arch.dbg_reg.iac4 = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.iac4 = set_reg_val(id, *val); break; #endif case KVM_REG_PPC_DAC1: - vcpu->arch.dbg_reg.dac1 = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.dac1 = set_reg_val(id, *val); break; case KVM_REG_PPC_DAC2: - vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val); + vcpu->arch.dbg_reg.dac2 = set_reg_val(id, *val); break; case KVM_REG_PPC_EPR: { - u32 new_epr = set_reg_val(reg->id, val); + u32 new_epr = set_reg_val(id, *val); kvmppc_set_epr(vcpu, new_epr); break; } #if defined(CONFIG_64BIT) case KVM_REG_PPC_EPCR: { - u32 new_epcr = set_reg_val(reg->id, val); + u32 new_epcr = set_reg_val(id, *val); kvmppc_set_epcr(vcpu, new_epcr); break; } #endif case KVM_REG_PPC_OR_TSR: { - u32 tsr_bits = set_reg_val(reg->id, val); + u32 tsr_bits = set_reg_val(id, *val); kvmppc_set_tsr_bits(vcpu, tsr_bits); break; } case KVM_REG_PPC_CLEAR_TSR: { - u32 tsr_bits = set_reg_val(reg->id, val); + u32 tsr_bits = set_reg_val(id, *val); kvmppc_clr_tsr_bits(vcpu, tsr_bits); break; } case KVM_REG_PPC_TSR: { - u32 tsr = set_reg_val(reg->id, val); + u32 tsr = set_reg_val(id, *val); kvmppc_set_tsr(vcpu, tsr); break; } case KVM_REG_PPC_TCR: { - u32 tcr = set_reg_val(reg->id, val); + u32 tcr = set_reg_val(id, *val); kvmppc_set_tcr(vcpu, tcr); break; } case KVM_REG_PPC_VRSAVE: - vcpu->arch.vrsave = set_reg_val(reg->id, val); + vcpu->arch.vrsave = set_reg_val(id, *val); break; default: - r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val); break; } @@ -1694,10 +1839,8 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) update_timer_ints(vcpu); } -void kvmppc_decrementer_func(unsigned long data) +void kvmppc_decrementer_func(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; - if (vcpu->arch.tcr & TCR_ARE) { vcpu->arch.dec = vcpu->arch.decar; kvmppc_emulate_dec(vcpu); @@ -1842,7 +1985,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, int n, b = 0, w = 0; if (!(dbg->control & KVM_GUESTDBG_ENABLE)) { - vcpu->arch.shadow_dbg_reg.dbcr0 = 0; + vcpu->arch.dbg_reg.dbcr0 = 0; vcpu->guest_debug = 0; kvm_guest_protect_msr(vcpu, MSR_DE, false); return 0; @@ -1850,15 +1993,13 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, kvm_guest_protect_msr(vcpu, MSR_DE, true); vcpu->guest_debug = dbg->control; - vcpu->arch.shadow_dbg_reg.dbcr0 = 0; - /* Set DBCR0_EDM in guest visible DBCR0 register. */ - vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM; + vcpu->arch.dbg_reg.dbcr0 = 0; if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC; + vcpu->arch.dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC; /* Code below handles only HW breakpoints */ - dbg_reg = &(vcpu->arch.shadow_dbg_reg); + dbg_reg = &(vcpu->arch.dbg_reg); #ifdef CONFIG_KVM_BOOKE_HV /* diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index f753543c56fa..22ba08ea68e9 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -32,9 +32,15 @@ #define BOOKE_IRQPRIO_ALIGNMENT 2 #define BOOKE_IRQPRIO_PROGRAM 3 #define BOOKE_IRQPRIO_FP_UNAVAIL 4 +#ifdef CONFIG_SPE_POSSIBLE #define BOOKE_IRQPRIO_SPE_UNAVAIL 5 #define BOOKE_IRQPRIO_SPE_FP_DATA 6 #define BOOKE_IRQPRIO_SPE_FP_ROUND 7 +#endif +#ifdef CONFIG_PPC_E500MC +#define BOOKE_IRQPRIO_ALTIVEC_UNAVAIL 5 +#define BOOKE_IRQPRIO_ALTIVEC_ASSIST 6 +#endif #define BOOKE_IRQPRIO_SYSCALL 8 #define BOOKE_IRQPRIO_AP_UNAVAIL 9 #define BOOKE_IRQPRIO_DTLB_MISS 10 @@ -116,40 +122,6 @@ extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); -/* - * Load up guest vcpu FP state if it's needed. - * It also set the MSR_FP in thread so that host know - * we're holding FPU, and then host can help to save - * guest vcpu FP state if other threads require to use FPU. - * This simulates an FP unavailable fault. - * - * It requires to be called with preemption disabled. - */ -static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_PPC_FPU - if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) { - enable_kernel_fp(); - load_fp_state(&vcpu->arch.fp); - current->thread.fp_save_area = &vcpu->arch.fp; - current->thread.regs->msr |= MSR_FP; - } -#endif -} - -/* - * Save guest vcpu FP state into thread. - * It requires to be called with preemption disabled. - */ -static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_PPC_FPU - if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP)) - giveup_fpu(current); - current->thread.fp_save_area = NULL; -#endif -} - static inline void kvmppc_clear_dbsr(void) { mtspr(SPRN_DBSR, mfspr(SPRN_DBSR)); diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 28c158881d23..a82f64502de1 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -25,6 +25,7 @@ #define OP_19_XOP_RFI 50 #define OP_19_XOP_RFCI 51 +#define OP_19_XOP_RFDI 39 #define OP_31_XOP_MFMSR 83 #define OP_31_XOP_WRTEE 131 @@ -37,6 +38,12 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1); } +static void kvmppc_emul_rfdi(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pc = vcpu->arch.dsrr0; + kvmppc_set_msr(vcpu, vcpu->arch.dsrr1); +} + static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu) { vcpu->arch.pc = vcpu->arch.csrr0; @@ -65,6 +72,12 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, *advance = 0; break; + case OP_19_XOP_RFDI: + kvmppc_emul_rfdi(vcpu); + kvmppc_set_exit_type(vcpu, EMULATED_RFDI_EXITS); + *advance = 0; + break; + default: emulated = EMULATE_FAIL; break; @@ -118,6 +131,7 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) { int emulated = EMULATE_DONE; + bool debug_inst = false; switch (sprn) { case SPRN_DEAR: @@ -132,14 +146,128 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_CSRR1: vcpu->arch.csrr1 = spr_val; break; + case SPRN_DSRR0: + vcpu->arch.dsrr0 = spr_val; + break; + case SPRN_DSRR1: + vcpu->arch.dsrr1 = spr_val; + break; + case SPRN_IAC1: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.iac1 = spr_val; + break; + case SPRN_IAC2: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.iac2 = spr_val; + break; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + case SPRN_IAC3: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.iac3 = spr_val; + break; + case SPRN_IAC4: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.iac4 = spr_val; + break; +#endif + case SPRN_DAC1: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.dac1 = spr_val; + break; + case SPRN_DAC2: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.dac2 = spr_val; + break; case SPRN_DBCR0: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + spr_val &= (DBCR0_IDM | DBCR0_IC | DBCR0_BT | DBCR0_TIE | + DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 | DBCR0_IAC4 | + DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W); + vcpu->arch.dbg_reg.dbcr0 = spr_val; break; case SPRN_DBCR1: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; vcpu->arch.dbg_reg.dbcr1 = spr_val; break; + case SPRN_DBCR2: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + + debug_inst = true; + vcpu->arch.dbg_reg.dbcr2 = spr_val; + break; case SPRN_DBSR: + /* + * If userspace is debugging guest then guest + * can not access debug registers. + */ + if (vcpu->guest_debug) + break; + vcpu->arch.dbsr &= ~spr_val; + if (!(vcpu->arch.dbsr & ~DBSR_IDE)) + kvmppc_core_dequeue_debug(vcpu); break; case SPRN_TSR: kvmppc_clr_tsr_bits(vcpu, spr_val); @@ -252,6 +380,10 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) emulated = EMULATE_FAIL; } + if (debug_inst) { + current->thread.debug = vcpu->arch.dbg_reg; + switch_booke_debug_regs(&vcpu->arch.dbg_reg); + } return emulated; } @@ -278,12 +410,43 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_CSRR1: *spr_val = vcpu->arch.csrr1; break; + case SPRN_DSRR0: + *spr_val = vcpu->arch.dsrr0; + break; + case SPRN_DSRR1: + *spr_val = vcpu->arch.dsrr1; + break; + case SPRN_IAC1: + *spr_val = vcpu->arch.dbg_reg.iac1; + break; + case SPRN_IAC2: + *spr_val = vcpu->arch.dbg_reg.iac2; + break; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + case SPRN_IAC3: + *spr_val = vcpu->arch.dbg_reg.iac3; + break; + case SPRN_IAC4: + *spr_val = vcpu->arch.dbg_reg.iac4; + break; +#endif + case SPRN_DAC1: + *spr_val = vcpu->arch.dbg_reg.dac1; + break; + case SPRN_DAC2: + *spr_val = vcpu->arch.dbg_reg.dac2; + break; case SPRN_DBCR0: *spr_val = vcpu->arch.dbg_reg.dbcr0; + if (vcpu->guest_debug) + *spr_val = *spr_val | DBCR0_EDM; break; case SPRN_DBCR1: *spr_val = vcpu->arch.dbg_reg.dbcr1; break; + case SPRN_DBCR2: + *spr_val = vcpu->arch.dbg_reg.dbcr2; + break; case SPRN_DBSR: *spr_val = vcpu->arch.dbsr; break; diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index e9fa56a911fd..81bd8a07aa51 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -238,7 +238,7 @@ kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \ kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \ SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR) kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \ - SPRN_SRR0, SPRN_SRR1,NEED_ESR + SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU) kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \ SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \ @@ -256,11 +256,9 @@ kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \ SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \ SPRN_SRR0, SPRN_SRR1, 0 -kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, EX_PARAMS(GEN), \ +kvm_handler BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, EX_PARAMS(GEN), \ SPRN_SRR0, SPRN_SRR1, 0 -kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, EX_PARAMS(GEN), \ - SPRN_SRR0, SPRN_SRR1, 0 -kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, EX_PARAMS(GEN), \ +kvm_handler BOOKE_INTERRUPT_ALTIVEC_ASSIST, EX_PARAMS(GEN), \ SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \ SPRN_SRR0, SPRN_SRR1, 0 @@ -350,7 +348,7 @@ kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \ SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR) -kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, NEED_ESR +kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU) kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 @@ -361,9 +359,6 @@ kvm_lvl_handler BOOKE_INTERRUPT_WATCHDOG, \ kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \ SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0 -kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 -kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, SPRN_SRR0, SPRN_SRR1, 0 -kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0 kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \ diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index a326178bdea5..72920bed3ac6 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -22,6 +22,7 @@ #include <linux/kvm_host.h> #include <asm/mmu-book3e.h> #include <asm/tlb.h> +#include <asm/cputhreads.h> enum vcpu_ftr { VCPU_FTR_MMU_V2 @@ -289,6 +290,25 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500); #define kvmppc_e500_get_tlb_stid(vcpu, gtlbe) get_tlb_tid(gtlbe) #define get_tlbmiss_tid(vcpu) get_cur_pid(vcpu) #define get_tlb_sts(gtlbe) (gtlbe->mas1 & MAS1_TS) + +/* + * These functions should be called with preemption disabled + * and the returned value is valid only in that context + */ +static inline int get_thread_specific_lpid(int vm_lpid) +{ + int vcpu_lpid = vm_lpid; + + if (threads_per_core == 2) + vcpu_lpid |= smp_processor_id() & 1; + + return vcpu_lpid; +} + +static inline int get_lpid(struct kvm_vcpu *vcpu) +{ + return get_thread_specific_lpid(vcpu->kvm->arch.lpid); +} #else unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu, struct kvm_book3e_206_tlb_entry *gtlbe); diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index c99c40e9182a..ce7291c79f6c 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -259,6 +259,7 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va break; /* extra exceptions */ +#ifdef CONFIG_SPE_POSSIBLE case SPRN_IVOR32: vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val; break; @@ -268,6 +269,15 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va case SPRN_IVOR34: vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val; break; +#endif +#ifdef CONFIG_ALTIVEC + case SPRN_IVOR32: + vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL] = spr_val; + break; + case SPRN_IVOR33: + vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST] = spr_val; + break; +#endif case SPRN_IVOR35: vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val; break; @@ -381,6 +391,7 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v break; /* extra exceptions */ +#ifdef CONFIG_SPE_POSSIBLE case SPRN_IVOR32: *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; break; @@ -390,6 +401,15 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v case SPRN_IVOR34: *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; break; +#endif +#ifdef CONFIG_ALTIVEC + case SPRN_IVOR32: + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL]; + break; + case SPRN_IVOR33: + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST]; + break; +#endif case SPRN_IVOR35: *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; break; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 08f14bb57897..769778f855b0 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -69,7 +69,8 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) * writing shadow tlb entry to host TLB */ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, - uint32_t mas0) + uint32_t mas0, + uint32_t lpid) { unsigned long flags; @@ -80,7 +81,7 @@ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); #ifdef CONFIG_KVM_BOOKE_HV - mtspr(SPRN_MAS8, stlbe->mas8); + mtspr(SPRN_MAS8, MAS8_TGS | get_thread_specific_lpid(lpid)); #endif asm volatile("isync; tlbwe" : : : "memory"); @@ -129,11 +130,12 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, if (tlbsel == 0) { mas0 = get_host_mas0(stlbe->mas2); - __write_host_tlbe(stlbe, mas0); + __write_host_tlbe(stlbe, mas0, vcpu_e500->vcpu.kvm->arch.lpid); } else { __write_host_tlbe(stlbe, MAS0_TLBSEL(1) | - MAS0_ESEL(to_htlb1_esel(sesel))); + MAS0_ESEL(to_htlb1_esel(sesel)), + vcpu_e500->vcpu.kvm->arch.lpid); } } @@ -176,7 +178,7 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu) MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; magic.mas8 = 0; - __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); + __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index), 0); preempt_enable(); } #endif @@ -317,10 +319,6 @@ static void kvmppc_e500_setup_stlbe( stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR); stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); - -#ifdef CONFIG_KVM_BOOKE_HV - stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid; -#endif } static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, @@ -633,7 +631,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, local_irq_save(flags); mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | addr_space); - mtspr(SPRN_MAS5, MAS5_SGS | vcpu->kvm->arch.lpid); + mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(vcpu)); asm volatile("tlbsx 0, %[geaddr]\n" : : [geaddr] "r" (geaddr)); mtspr(SPRN_MAS5, 0); @@ -732,7 +730,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) return 0; } -int kvm_age_hva(struct kvm *kvm, unsigned long hva) +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { /* XXX could be more clever ;) */ return 0; diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 164bad2a19bf..2fdc8722e324 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -48,10 +48,11 @@ void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type) return; } - - tag = PPC_DBELL_LPID(vcpu->kvm->arch.lpid) | vcpu->vcpu_id; + preempt_disable(); + tag = PPC_DBELL_LPID(get_lpid(vcpu)) | vcpu->vcpu_id; mb(); ppc_msgsnd(dbell_type, 0, tag); + preempt_enable(); } /* gtlbe must not be mapped by more than one host tlb entry */ @@ -60,12 +61,11 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, { unsigned int tid, ts; gva_t eaddr; - u32 val, lpid; + u32 val; unsigned long flags; ts = get_tlb_ts(gtlbe); tid = get_tlb_tid(gtlbe); - lpid = vcpu_e500->vcpu.kvm->arch.lpid; /* We search the host TLB to invalidate its shadow TLB entry */ val = (tid << 16) | ts; @@ -74,7 +74,7 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, local_irq_save(flags); mtspr(SPRN_MAS6, val); - mtspr(SPRN_MAS5, MAS5_SGS | lpid); + mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu)); asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr)); val = mfspr(SPRN_MAS1); @@ -95,7 +95,7 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500) unsigned long flags; local_irq_save(flags); - mtspr(SPRN_MAS5, MAS5_SGS | vcpu_e500->vcpu.kvm->arch.lpid); + mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu)); asm volatile("tlbilxlpid"); mtspr(SPRN_MAS5, 0); local_irq_restore(flags); @@ -110,6 +110,7 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) { } +/* We use two lpids per VM */ static DEFINE_PER_CPU(struct kvm_vcpu *[KVMPPC_NR_LPIDS], last_vcpu_of_lpid); static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) @@ -118,10 +119,12 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) kvmppc_booke_vcpu_load(vcpu, cpu); - mtspr(SPRN_LPID, vcpu->kvm->arch.lpid); + mtspr(SPRN_LPID, get_lpid(vcpu)); mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr); mtspr(SPRN_GPIR, vcpu->vcpu_id); mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp); + vcpu->arch.eplc = EPC_EGS | (get_lpid(vcpu) << EPC_ELPID_SHIFT); + vcpu->arch.epsc = vcpu->arch.eplc; mtspr(SPRN_EPLC, vcpu->arch.eplc); mtspr(SPRN_EPSC, vcpu->arch.epsc); @@ -141,12 +144,10 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_GESR, vcpu->arch.shared->esr); if (vcpu->arch.oldpir != mfspr(SPRN_PIR) || - __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] != vcpu) { + __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] != vcpu) { kvmppc_e500_tlbil_all(vcpu_e500); - __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] = vcpu; + __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] = vcpu; } - - kvmppc_load_guest_fp(vcpu); } static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu) @@ -179,6 +180,16 @@ int kvmppc_core_check_processor_compat(void) r = 0; else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0) r = 0; +#ifdef CONFIG_ALTIVEC + /* + * Since guests have the priviledge to enable AltiVec, we need AltiVec + * support in the host to save/restore their context. + * Don't use CPU_FTR_ALTIVEC to identify cores with AltiVec unit + * because it's cleared in the absence of CONFIG_ALTIVEC! + */ + else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0) + r = 0; +#endif else r = -ENOTSUPP; @@ -194,9 +205,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) #ifdef CONFIG_64BIT vcpu->arch.shadow_epcr |= SPRN_EPCR_ICM; #endif - vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_DEP | MSRP_PMMP; - vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT); - vcpu->arch.epsc = vcpu->arch.eplc; + vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_PMMP; vcpu->arch.pvr = mfspr(SPRN_PVR); vcpu_e500->svr = mfspr(SPRN_SVR); @@ -356,13 +365,26 @@ static int kvmppc_core_init_vm_e500mc(struct kvm *kvm) if (lpid < 0) return lpid; + /* + * Use two lpids per VM on cores with two threads like e6500. Use + * even numbers to speedup vcpu lpid computation with consecutive lpids + * per VM. vm1 will use lpids 2 and 3, vm2 lpids 4 and 5, and so on. + */ + if (threads_per_core == 2) + lpid <<= 1; + kvm->arch.lpid = lpid; return 0; } static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm) { - kvmppc_free_lpid(kvm->arch.lpid); + int lpid = kvm->arch.lpid; + + if (threads_per_core == 2) + lpid >>= 1; + + kvmppc_free_lpid(lpid); } static struct kvmppc_ops kvm_ops_e500mc = { @@ -390,7 +412,13 @@ static int __init kvmppc_e500mc_init(void) if (r) goto err_out; - kvmppc_init_lpid(64); + /* + * Use two lpids per VM on dual threaded processors like e6500 + * to workarround the lack of tlb write conditional instruction. + * Expose half the number of available hardware lpids to the lpid + * allocator. + */ + kvmppc_init_lpid(KVMPPC_NR_LPIDS/threads_per_core); kvmppc_claim_lpid(0); /* host */ r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index e96b50d0bdab..5cc2e7af3a7b 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -219,7 +219,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) /* this default type might be overwritten by subcategories */ kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); - emulated = kvmppc_get_last_inst(vcpu, false, &inst); + emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst); if (emulated != EMULATE_DONE) return emulated; @@ -274,6 +274,21 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } break; + case 0: + /* + * Instruction with primary opcode 0. Based on PowerISA + * these are illegal instructions. + */ + if (inst == KVMPPC_INST_SW_BREAKPOINT) { + run->exit_reason = KVM_EXIT_DEBUG; + run->debug.arch.address = kvmppc_get_pc(vcpu); + emulated = EMULATE_EXIT_USER; + advance = 0; + } else + emulated = EMULATE_FAIL; + + break; + default: emulated = EMULATE_FAIL; } diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 0de4ffa175a9..6d3c0ee1d744 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -58,7 +58,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) /* this default type might be overwritten by subcategories */ kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); - emulated = kvmppc_get_last_inst(vcpu, false, &inst); + emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst); if (emulated != EMULATE_DONE) return emulated; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index da505237a664..c1f8f53cd312 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -294,7 +294,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) { u32 last_inst; - kvmppc_get_last_inst(vcpu, false, &last_inst); + kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); /* XXX Deliver Program interrupt to guest. */ pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst); r = RESUME_HOST; @@ -638,7 +638,6 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { /* Make sure we're not using the vcpu anymore */ hrtimer_cancel(&vcpu->arch.dec_timer); - tasklet_kill(&vcpu->arch.tasklet); kvmppc_remove_vcpu_debugfs(vcpu); @@ -664,16 +663,12 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return kvmppc_core_pending_dec(vcpu); } -/* - * low level hrtimer wake routine. Because this runs in hardirq context - * we schedule a tasklet to do the real work. - */ enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) { struct kvm_vcpu *vcpu; vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer); - tasklet_schedule(&vcpu->arch.tasklet); + kvmppc_decrementer_func(vcpu); return HRTIMER_NORESTART; } @@ -683,7 +678,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) int ret; hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); - tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; vcpu->arch.dec_expires = ~(u64)0; @@ -907,6 +901,103 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(kvmppc_handle_store); +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r = 0; + union kvmppc_one_reg val; + int size; + + size = one_reg_size(reg->id); + if (size > sizeof(val)) + return -EINVAL; + + r = kvmppc_get_one_reg(vcpu, reg->id, &val); + if (r == -EINVAL) { + r = 0; + switch (reg->id) { +#ifdef CONFIG_ALTIVEC + case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; + break; + case KVM_REG_PPC_VSCR: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val); + break; + case KVM_REG_PPC_VRSAVE: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + vcpu->arch.vrsave = set_reg_val(reg->id, val); + break; +#endif /* CONFIG_ALTIVEC */ + default: + r = -EINVAL; + break; + } + } + + if (r) + return r; + + if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size)) + r = -EFAULT; + + return r; +} + +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r; + union kvmppc_one_reg val; + int size; + + size = one_reg_size(reg->id); + if (size > sizeof(val)) + return -EINVAL; + + if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) + return -EFAULT; + + r = kvmppc_set_one_reg(vcpu, reg->id, &val); + if (r == -EINVAL) { + r = 0; + switch (reg->id) { +#ifdef CONFIG_ALTIVEC + case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0]; + break; + case KVM_REG_PPC_VSCR: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]); + break; + case KVM_REG_PPC_VRSAVE: + val = get_reg_val(reg->id, vcpu->arch.vrsave); + break; +#endif /* CONFIG_ALTIVEC */ + default: + r = -EINVAL; + break; + } + } + + return r; +} + int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int r; diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index e8bc40869cbd..7d9ee3d8c618 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -303,9 +303,13 @@ config PPC_ICSWX_USE_SIGILL If in doubt, say N here. +config SPE_POSSIBLE + def_bool y + depends on E200 || (E500 && !PPC_E500MC) + config SPE bool "SPE Support" - depends on E200 || (E500 && !PPC_E500MC) + depends on SPE_POSSIBLE default y ---help--- This option enables kernel support for the Signal Processing diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index bb9b258d60e7..2075e6c34c78 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -202,6 +202,7 @@ #define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */ #define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */ #define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */ +#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 028df8dc538e..7d603a71ab3a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -574,7 +574,7 @@ struct kvm_arch { struct kvm_apic_map *apic_map; unsigned int tss_addr; - struct page *apic_access_page; + bool apic_access_page_done; gpa_t wall_clock; @@ -736,6 +736,7 @@ struct kvm_x86_ops { void (*hwapic_isr_update)(struct kvm *kvm, int isr); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); + void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); @@ -914,7 +915,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu); int fx_init(struct kvm_vcpu *vcpu); -void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes); int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); @@ -1036,7 +1036,7 @@ asmlinkage void kvm_spurious_fault(void); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); -int kvm_age_hva(struct kvm *kvm, unsigned long hva); +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); @@ -1045,6 +1045,9 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu); +void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); +void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, + unsigned long address); void kvm_define_shared_msr(unsigned index, u32 msr); void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index c7678e43465b..e62cf897f781 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -2,6 +2,7 @@ #define _ASM_X86_KVM_PARA_H #include <asm/processor.h> +#include <asm/alternative.h> #include <uapi/asm/kvm_para.h> extern void kvmclock_init(void); @@ -16,10 +17,15 @@ static inline bool kvm_check_and_clear_guest_paused(void) } #endif /* CONFIG_KVM_GUEST */ -/* This instruction is vmcall. On non-VT architectures, it will generate a - * trap that we will then rewrite to the appropriate instruction. +#ifdef CONFIG_DEBUG_RODATA +#define KVM_HYPERCALL \ + ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL) +#else +/* On AMD processors, vmcall will generate a trap that we will + * then rewrite to the appropriate instruction. */ #define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" +#endif /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall * instruction. The hypervisor may replace it with something else but only the diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 60e5497681f5..813d29d00a17 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -525,6 +525,13 @@ static void early_init_amd(struct cpuinfo_x86 *c) } #endif + /* + * This is only needed to tell the kernel whether to use VMCALL + * and VMMCALL. VMMCALL is never executed except under virt, so + * we can set it unconditionally. + */ + set_cpu_cap(c, X86_FEATURE_VMMCALL); + /* F16h erratum 793, CVE-2013-6885 */ if (c->x86 == 0x16 && c->x86_model <= 0xf) msr_set_bit(MSR_AMD64_LS_CFG, 15); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index f4bad87ef256..976e3a57f9ea 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -53,14 +53,14 @@ u64 kvm_supported_xcr0(void) return xcr0; } -void kvm_update_cpuid(struct kvm_vcpu *vcpu) +int kvm_update_cpuid(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; struct kvm_lapic *apic = vcpu->arch.apic; best = kvm_find_cpuid_entry(vcpu, 1, 0); if (!best) - return; + return 0; /* Update OSXSAVE bit */ if (cpu_has_xsave && best->function == 0x1) { @@ -88,7 +88,17 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu) xstate_required_size(vcpu->arch.xcr0); } + /* + * The existing code assumes virtual address is 48-bit in the canonical + * address checks; exit if it is ever changed. + */ + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + if (best && ((best->eax & 0xff00) >> 8) != 48 && + ((best->eax & 0xff00) >> 8) != 0) + return -EINVAL; + kvm_pmu_cpuid_update(vcpu); + return 0; } static int is_efer_nx(void) @@ -151,10 +161,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, } vcpu->arch.cpuid_nent = cpuid->nent; cpuid_fix_nx_cap(vcpu); - r = 0; kvm_apic_set_version(vcpu); kvm_x86_ops->cpuid_update(vcpu); - kvm_update_cpuid(vcpu); + r = kvm_update_cpuid(vcpu); out_free: vfree(cpuid_entries); @@ -178,9 +187,7 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, vcpu->arch.cpuid_nent = cpuid->nent; kvm_apic_set_version(vcpu); kvm_x86_ops->cpuid_update(vcpu); - kvm_update_cpuid(vcpu); - return 0; - + r = kvm_update_cpuid(vcpu); out: return r; } @@ -767,6 +774,12 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) if (!best) best = check_cpuid_limit(vcpu, function, index); + /* + * Perfmon not yet supported for L2 guest. + */ + if (is_guest_mode(vcpu) && function == 0xa) + best = NULL; + if (best) { *eax = best->eax; *ebx = best->ebx; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 43b33e301e68..4452eedfaedd 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -3,7 +3,7 @@ #include "x86.h" -void kvm_update_cpuid(struct kvm_vcpu *vcpu); +int kvm_update_cpuid(struct kvm_vcpu *vcpu); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, u32 function, u32 index); int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 20d91873d831..a46207a05835 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1504,6 +1504,15 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (rpl > cpl || dpl != cpl) goto exception; } + /* in long-mode d/b must be clear if l is set */ + if (seg_desc.d && seg_desc.l) { + u64 efer = 0; + + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + if (efer & EFER_LMA) + goto exception; + } + /* CS(RPL) <- CPL */ selector = (selector & 0xfffc) | cpl; break; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 76398fe15df2..3201e93ebd07 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1174,7 +1174,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) * Write-protect on the specified @sptep, @pt_protect indicates whether * spte write-protection is caused by protecting shadow page table. * - * Note: write protection is difference between drity logging and spte + * Note: write protection is difference between dirty logging and spte * protection: * - for dirty logging, the spte can be set to writable at anytime if * its dirty bitmap is properly set. @@ -1262,7 +1262,8 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn) } static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, gfn_t gfn, int level, + unsigned long data) { u64 *sptep; struct rmap_iterator iter; @@ -1270,7 +1271,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, while ((sptep = rmap_get_first(*rmapp, &iter))) { BUG_ON(!(*sptep & PT_PRESENT_MASK)); - rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep); + rmap_printk("kvm_rmap_unmap_hva: spte %p %llx gfn %llx (%d)\n", + sptep, *sptep, gfn, level); drop_spte(kvm, sptep); need_tlb_flush = 1; @@ -1280,7 +1282,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, } static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, gfn_t gfn, int level, + unsigned long data) { u64 *sptep; struct rmap_iterator iter; @@ -1294,7 +1297,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { BUG_ON(!is_shadow_present_pte(*sptep)); - rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep); + rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n", + sptep, *sptep, gfn, level); need_flush = 1; @@ -1328,6 +1332,8 @@ static int kvm_handle_hva_range(struct kvm *kvm, int (*handler)(struct kvm *kvm, unsigned long *rmapp, struct kvm_memory_slot *slot, + gfn_t gfn, + int level, unsigned long data)) { int j; @@ -1357,6 +1363,7 @@ static int kvm_handle_hva_range(struct kvm *kvm, j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) { unsigned long idx, idx_end; unsigned long *rmapp; + gfn_t gfn = gfn_start; /* * {idx(page_j) | page_j intersects with @@ -1367,8 +1374,10 @@ static int kvm_handle_hva_range(struct kvm *kvm, rmapp = __gfn_to_rmap(gfn_start, j, memslot); - for (; idx <= idx_end; ++idx) - ret |= handler(kvm, rmapp++, memslot, data); + for (; idx <= idx_end; + ++idx, gfn += (1UL << KVM_HPAGE_GFN_SHIFT(j))) + ret |= handler(kvm, rmapp++, memslot, + gfn, j, data); } } @@ -1379,6 +1388,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, unsigned long data, int (*handler)(struct kvm *kvm, unsigned long *rmapp, struct kvm_memory_slot *slot, + gfn_t gfn, int level, unsigned long data)) { return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); @@ -1400,24 +1410,14 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) } static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, gfn_t gfn, int level, + unsigned long data) { u64 *sptep; struct rmap_iterator uninitialized_var(iter); int young = 0; - /* - * In case of absence of EPT Access and Dirty Bits supports, - * emulate the accessed bit for EPT, by checking if this page has - * an EPT mapping, and clearing it if it does. On the next access, - * a new EPT mapping will be established. - * This has some overhead, but not as much as the cost of swapping - * out actively used pages or breaking up actively used hugepages. - */ - if (!shadow_accessed_mask) { - young = kvm_unmap_rmapp(kvm, rmapp, slot, data); - goto out; - } + BUG_ON(!shadow_accessed_mask); for (sptep = rmap_get_first(*rmapp, &iter); sptep; sptep = rmap_get_next(&iter)) { @@ -1429,14 +1429,13 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, (unsigned long *)sptep); } } -out: - /* @data has hva passed to kvm_age_hva(). */ - trace_kvm_age_page(data, slot, young); + trace_kvm_age_page(gfn, level, slot, young); return young; } static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, gfn_t gfn, + int level, unsigned long data) { u64 *sptep; struct rmap_iterator iter; @@ -1474,13 +1473,33 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); - kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0); + kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0); kvm_flush_remote_tlbs(vcpu->kvm); } -int kvm_age_hva(struct kvm *kvm, unsigned long hva) +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { - return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp); + /* + * In case of absence of EPT Access and Dirty Bits supports, + * emulate the accessed bit for EPT, by checking if this page has + * an EPT mapping, and clearing it if it does. On the next access, + * a new EPT mapping will be established. + * This has some overhead, but not as much as the cost of swapping + * out actively used pages or breaking up actively used hugepages. + */ + if (!shadow_accessed_mask) { + /* + * We are holding the kvm->mmu_lock, and we are blowing up + * shadow PTEs. MMU notifier consumers need to be kept at bay. + * This is correct as long as we don't decouple the mmu_lock + * protected regions (like invalidate_range_start|end does). + */ + kvm->mmu_notifier_seq++; + return kvm_handle_hva_range(kvm, start, end, 0, + kvm_unmap_rmapp); + } + + return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp); } int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) @@ -1743,7 +1762,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, return 1; } - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); return 0; } @@ -1796,7 +1815,7 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); if (flush) - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } struct mmu_page_path { @@ -2530,7 +2549,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, true, host_writable)) { if (write_fault) *emulate = 1; - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } if (unlikely(is_mmio_spte(*sptep) && emulate)) @@ -3444,13 +3463,6 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu, context->nx = false; } -void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) -{ - ++vcpu->stat.tlb_flush; - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); -} -EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb); - void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu) { mmu_free_roots(vcpu); @@ -3965,7 +3977,7 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page, if (remote_flush) kvm_flush_remote_tlbs(vcpu->kvm); else if (local_flush) - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, @@ -4226,7 +4238,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { vcpu->arch.mmu.invlpg(vcpu, gva); - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); ++vcpu->stat.invlpg; } EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 0ab6c65a2821..806d58e3c320 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -298,8 +298,7 @@ retry_walk: } #endif walker->max_level = walker->level; - ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || - (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); + ASSERT(!is_long_mode(vcpu) && is_pae(vcpu)); accessed_dirty = PT_GUEST_ACCESSED_MASK; pt_access = pte_access = ACC_ALL; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6ffd643d1a64..04fa1b8298c8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2626,6 +2626,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { + if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) + return 1; vmcs_write64(GUEST_IA32_PAT, data); vcpu->arch.pat = data; break; @@ -3132,9 +3134,17 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_unrestricted_guest()) enable_unrestricted_guest = 0; - if (!cpu_has_vmx_flexpriority()) + if (!cpu_has_vmx_flexpriority()) { flexpriority_enabled = 0; + /* + * set_apic_access_page_addr() is used to reload apic access + * page upon invalidation. No need to do anything if the + * processor does not have the APIC_ACCESS_ADDR VMCS field. + */ + kvm_x86_ops->set_apic_access_page_addr = NULL; + } + if (!cpu_has_vmx_tpr_shadow()) kvm_x86_ops->update_cr8_intercept = NULL; @@ -3930,7 +3940,7 @@ static int init_rmode_tss(struct kvm *kvm) { gfn_t fn; u16 data = 0; - int r, idx, ret = 0; + int idx, r; idx = srcu_read_lock(&kvm->srcu); fn = kvm->arch.tss_addr >> PAGE_SHIFT; @@ -3952,13 +3962,9 @@ static int init_rmode_tss(struct kvm *kvm) r = kvm_write_guest_page(kvm, fn, &data, RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1, sizeof(u8)); - if (r < 0) - goto out; - - ret = 1; out: srcu_read_unlock(&kvm->srcu, idx); - return ret; + return r; } static int init_rmode_identity_map(struct kvm *kvm) @@ -4027,7 +4033,7 @@ static int alloc_apic_access_page(struct kvm *kvm) int r = 0; mutex_lock(&kvm->slots_lock); - if (kvm->arch.apic_access_page) + if (kvm->arch.apic_access_page_done) goto out; kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; kvm_userspace_mem.flags = 0; @@ -4043,7 +4049,12 @@ static int alloc_apic_access_page(struct kvm *kvm) goto out; } - kvm->arch.apic_access_page = page; + /* + * Do not pin the page in memory, so that memory hot-unplug + * is able to migrate it. + */ + put_page(page); + kvm->arch.apic_access_page_done = true; out: mutex_unlock(&kvm->slots_lock); return r; @@ -4559,9 +4570,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write32(TPR_THRESHOLD, 0); } - if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) - vmcs_write64(APIC_ACCESS_ADDR, - page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); + kvm_vcpu_reload_apic_access_page(vcpu); if (vmx_vm_has_apicv(vcpu->kvm)) memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); @@ -4751,10 +4760,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) if (ret) return ret; kvm->arch.tss_addr = addr; - if (!init_rmode_tss(kvm)) - return -ENOMEM; - - return 0; + return init_rmode_tss(kvm); } static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) @@ -6718,7 +6724,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) switch (type) { case VMX_EPT_EXTENT_GLOBAL: kvm_mmu_sync_roots(vcpu); - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); nested_vmx_succeed(vcpu); break; default: @@ -6993,6 +6999,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_TASK_SWITCH: return 1; case EXIT_REASON_CPUID: + if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa) + return 0; return 1; case EXIT_REASON_HLT: return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING); @@ -7201,6 +7209,29 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) vmx_set_msr_bitmap(vcpu); } +static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + /* + * Currently we do not handle the nested case where L2 has an + * APIC access page of its own; that page is still pinned. + * Hence, we skip the case where the VCPU is in guest mode _and_ + * L1 prepared an APIC access page for L2. + * + * For the case where L1 and L2 share the same APIC access page + * (flexpriority=Y but SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES clear + * in the vmcs12), this function will only update either the vmcs01 + * or the vmcs02. If the former, the vmcs02 will be updated by + * prepare_vmcs02. If the latter, the vmcs01 will be updated in + * the next L2->L1 exit. + */ + if (!is_guest_mode(vcpu) || + !nested_cpu_has2(vmx->nested.current_vmcs12, + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) + vmcs_write64(APIC_ACCESS_ADDR, hpa); +} + static void vmx_hwapic_isr_update(struct kvm *kvm, int isr) { u16 status; @@ -8008,7 +8039,7 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) /* * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it - * with L0's requirements for its guest (a.k.a. vmsc01), so we can run the L2 + * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2 * guest in a way that will both be appropriate to L1's requests, and our * needs. In addition to modifying the active vmcs (which is vmcs02), this * function also has additional necessary side-effects, like setting various @@ -8143,8 +8174,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) { exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - vmcs_write64(APIC_ACCESS_ADDR, - page_to_phys(vcpu->kvm->arch.apic_access_page)); + kvm_vcpu_reload_apic_access_page(vcpu); } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); @@ -8953,6 +8983,12 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, } /* + * We are now running in L2, mmu_notifier will force to reload the + * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1. + */ + kvm_vcpu_reload_apic_access_page(vcpu); + + /* * Exiting from L2 to L1, we're now back to L1 which thinks it just * finished a VMLAUNCH or VMRESUME instruction, so we need to set the * success or failure flag accordingly. @@ -9077,6 +9113,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, + .set_apic_access_page_addr = vmx_set_apic_access_page_addr, .vm_has_apicv = vmx_vm_has_apicv, .load_eoi_exitmap = vmx_load_eoi_exitmap, .hwapic_irr_update = vmx_hwapic_irr_update, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2d7f65daa8d0..6857257f3810 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -729,7 +729,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { kvm_mmu_sync_roots(vcpu); - kvm_mmu_flush_tlb(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); return 0; } @@ -1726,7 +1726,7 @@ static bool valid_mtrr_type(unsigned t) return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ } -static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) +bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) { int i; u64 mask; @@ -1769,12 +1769,13 @@ static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) return true; } +EXPORT_SYMBOL_GPL(kvm_mtrr_valid); static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; - if (!mtrr_valid(vcpu, msr, data)) + if (!kvm_mtrr_valid(vcpu, msr, data)) return 1; if (msr == MSR_MTRRdefType) { @@ -1825,7 +1826,7 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) break; default: if (msr >= MSR_IA32_MC0_CTL && - msr < MSR_IA32_MC0_CTL + 4 * bank_num) { + msr < MSR_IA32_MCx_CTL(bank_num)) { u32 offset = msr - MSR_IA32_MC0_CTL; /* only 0 or all 1s can be written to IA32_MCi_CTL * some Linux kernels though clear bit 10 in bank 4 to @@ -2184,7 +2185,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_MCG_CTL: case MSR_IA32_MCG_STATUS: - case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: + case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: return set_msr_mce(vcpu, msr, data); /* Performance counters are not protected by a CPUID bit, @@ -2350,7 +2351,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) break; default: if (msr >= MSR_IA32_MC0_CTL && - msr < MSR_IA32_MC0_CTL + 4 * bank_num) { + msr < MSR_IA32_MCx_CTL(bank_num)) { u32 offset = msr - MSR_IA32_MC0_CTL; data = vcpu->arch.mce_banks[offset]; break; @@ -2531,7 +2532,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_IA32_MCG_CAP: case MSR_IA32_MCG_CTL: case MSR_IA32_MCG_STATUS: - case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: + case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: return get_msr_mce(vcpu, msr, pdata); case MSR_K7_CLK_CTL: /* @@ -5000,7 +5001,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) ++vcpu->stat.insn_emulation_fail; trace_kvm_emulate_insn_failed(vcpu); - if (!is_guest_mode(vcpu)) { + if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; @@ -6019,6 +6020,41 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) kvm_apic_update_tmr(vcpu, tmr); } +static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) +{ + ++vcpu->stat.tlb_flush; + kvm_x86_ops->tlb_flush(vcpu); +} + +void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) +{ + struct page *page = NULL; + + if (!kvm_x86_ops->set_apic_access_page_addr) + return; + + page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); + kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page)); + + /* + * Do not pin apic access page in memory, the MMU notifier + * will call us again if it is migrated or swapped out. + */ + put_page(page); +} +EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page); + +void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, + unsigned long address) +{ + /* + * The physical address of apic access page is stored in the VMCS. + * Update it when it becomes invalid. + */ + if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT)) + kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); +} + /* * Returns 1 to let __vcpu_run() continue the guest execution loop without * exiting to the userspace. Otherwise, the value will be returned to the @@ -6048,7 +6084,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) kvm_mmu_sync_roots(vcpu); if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) - kvm_x86_ops->tlb_flush(vcpu); + kvm_vcpu_flush_tlb(vcpu); if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; r = 0; @@ -6079,6 +6115,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_deliver_pmi(vcpu); if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) vcpu_scan_ioapic(vcpu); + if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) + kvm_vcpu_reload_apic_access_page(vcpu); } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { @@ -7271,8 +7309,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kfree(kvm->arch.vpic); kfree(kvm->arch.vioapic); kvm_free_vcpus(kvm); - if (kvm->arch.apic_access_page) - put_page(kvm->arch.apic_access_page); kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); } diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 985fb2c006fa..7cb9c45a5fe0 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -159,6 +159,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception); +bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data); + #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ | XSTATE_BNDREGS | XSTATE_BNDCSR) extern u64 host_xcr0; diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 5f578e850fc5..90d734bbf467 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -402,9 +402,11 @@ static void __mn_flush_page(struct mmu_notifier *mn, static int mn_clear_flush_young(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { - __mn_flush_page(mn, address); + for (; start < end; start += PAGE_SIZE) + __mn_flush_page(mn, start); return 0; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index bbd8d57b04e0..28be31f49250 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -136,6 +136,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22 #define KVM_REQ_ENABLE_IBS 23 #define KVM_REQ_DISABLE_IBS 24 +#define KVM_REQ_APIC_PAGE_RELOAD 25 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 @@ -198,6 +199,17 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif +/* + * Carry out a gup that requires IO. Allow the mm to relinquish the mmap + * semaphore if the filemap/swap has to wait on a page lock. pagep == NULL + * controls whether we retry the gup one more time to completion in that case. + * Typically this is called after a FAULT_FLAG_RETRY_NOWAIT in the main tdp + * handler. + */ +int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm, + unsigned long addr, bool write_fault, + struct page **pagep); + enum { OUTSIDE_GUEST_MODE, IN_GUEST_MODE, @@ -577,6 +589,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_reload_remote_mmus(struct kvm *kvm); void kvm_make_mclock_inprogress_request(struct kvm *kvm); void kvm_make_scan_ioapic_request(struct kvm *kvm); +bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); diff --git a/include/linux/mm.h b/include/linux/mm.h index 8981cc882ed2..0f4196a0bc20 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1985,6 +1985,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ +#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 27288692241e..88787bb4b3b9 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -57,10 +57,13 @@ struct mmu_notifier_ops { * pte. This way the VM will provide proper aging to the * accesses to the page through the secondary MMUs and not * only to the ones through the Linux pte. + * Start-end is necessary in case the secondary MMU is mapping the page + * at a smaller granularity than the primary MMU. */ int (*clear_flush_young)(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address); + unsigned long start, + unsigned long end); /* * test_young is called to check the young/accessed bitflag in @@ -175,7 +178,8 @@ extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); extern void __mmu_notifier_release(struct mm_struct *mm); extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address); + unsigned long start, + unsigned long end); extern int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address); extern void __mmu_notifier_change_pte(struct mm_struct *mm, @@ -194,10 +198,11 @@ static inline void mmu_notifier_release(struct mm_struct *mm) } static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { if (mm_has_notifiers(mm)) - return __mmu_notifier_clear_flush_young(mm, address); + return __mmu_notifier_clear_flush_young(mm, start, end); return 0; } @@ -255,7 +260,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) unsigned long ___address = __address; \ __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ - ___address); \ + ___address, \ + ___address + \ + PAGE_SIZE); \ __young; \ }) @@ -266,7 +273,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) unsigned long ___address = __address; \ __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \ __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ - ___address); \ + ___address, \ + ___address + \ + PMD_SIZE); \ __young; \ }) @@ -301,7 +310,8 @@ static inline void mmu_notifier_release(struct mm_struct *mm) } static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { return 0; } diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index ab679c395042..6edf1f2028cd 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -225,24 +225,26 @@ TRACE_EVENT(kvm_fpu, ); TRACE_EVENT(kvm_age_page, - TP_PROTO(ulong hva, struct kvm_memory_slot *slot, int ref), - TP_ARGS(hva, slot, ref), + TP_PROTO(ulong gfn, int level, struct kvm_memory_slot *slot, int ref), + TP_ARGS(gfn, level, slot, ref), TP_STRUCT__entry( __field( u64, hva ) __field( u64, gfn ) + __field( u8, level ) __field( u8, referenced ) ), TP_fast_assign( - __entry->hva = hva; - __entry->gfn = - slot->base_gfn + ((hva - slot->userspace_addr) >> PAGE_SHIFT); + __entry->gfn = gfn; + __entry->level = level; + __entry->hva = ((gfn - slot->base_gfn) << + PAGE_SHIFT) + slot->userspace_addr; __entry->referenced = ref; ), - TP_printk("hva %llx gfn %llx %s", - __entry->hva, __entry->gfn, + TP_printk("hva %llx gfn %llx level %u %s", + __entry->hva, __entry->gfn, __entry->level, __entry->referenced ? "YOUNG" : "OLD") ); @@ -281,6 +281,10 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, fault_flags |= FAULT_FLAG_ALLOW_RETRY; if (*flags & FOLL_NOWAIT) fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT; + if (*flags & FOLL_TRIED) { + VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_ALLOW_RETRY); + fault_flags |= FAULT_FLAG_TRIED; + } ret = handle_mm_fault(mm, vma, address, fault_flags); if (ret & VM_FAULT_ERROR) { diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 950813b1eb36..2c8da9825fe3 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -107,7 +107,8 @@ void __mmu_notifier_release(struct mm_struct *mm) * existed or not. */ int __mmu_notifier_clear_flush_young(struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { struct mmu_notifier *mn; int young = 0, id; @@ -115,7 +116,7 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm, id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { if (mn->ops->clear_flush_young) - young |= mn->ops->clear_flush_young(mn, mm, address); + young |= mn->ops->clear_flush_young(mn, mm, start, end); } srcu_read_unlock(&srcu, id); diff --git a/mm/rmap.c b/mm/rmap.c index 3e8491c504f8..bc74e0012809 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1355,7 +1355,11 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, continue; /* don't unmap */ } - if (ptep_clear_flush_young_notify(vma, address, pte)) + /* + * No need for _notify because we're within an + * mmu_notifier_invalidate_range_ {start|end} scope. + */ + if (ptep_clear_flush_young(vma, address, pte)) continue; /* Nuke the page table entry. */ diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index d6a3d0993d88..5ff7f7f2689a 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -80,9 +80,7 @@ static void async_pf_execute(struct work_struct *work) might_sleep(); - down_read(&mm->mmap_sem); - get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL); - up_read(&mm->mmap_sem); + kvm_get_user_page_io(NULL, mm, addr, 1, NULL); kvm_async_page_present_sync(vcpu, apf); spin_lock(&vcpu->async_pf.lock); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 0c712a779b44..b0fb390943c6 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -36,6 +36,9 @@ #include <linux/seqlock.h> #include <trace/events/kvm.h> +#ifdef __KVM_HAVE_IOAPIC +#include "ioapic.h" +#endif #include "iodev.h" #ifdef CONFIG_HAVE_KVM_IRQFD diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 278232025129..39a02fbdb572 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -52,11 +52,13 @@ #include <asm/processor.h> #include <asm/io.h> +#include <asm/ioctl.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include "coalesced_mmio.h" #include "async_pf.h" +#include "vfio.h" #define CREATE_TRACE_POINTS #include <trace/events/kvm.h> @@ -151,7 +153,7 @@ static void ack_flush(void *_completed) { } -static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) +bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) { int i, cpu, me; cpumask_var_t cpus; @@ -188,7 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) long dirty_count = kvm->tlbs_dirty; smp_mb(); - if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) + if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) ++kvm->stat.remote_tlb_flush; cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); } @@ -196,17 +198,17 @@ EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); void kvm_reload_remote_mmus(struct kvm *kvm) { - make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); + kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); } void kvm_make_mclock_inprogress_request(struct kvm *kvm) { - make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); + kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); } void kvm_make_scan_ioapic_request(struct kvm *kvm) { - make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); + kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); } int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) @@ -294,6 +296,9 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, kvm_flush_remote_tlbs(kvm); spin_unlock(&kvm->mmu_lock); + + kvm_arch_mmu_notifier_invalidate_page(kvm, address); + srcu_read_unlock(&kvm->srcu, idx); } @@ -367,7 +372,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address) + unsigned long start, + unsigned long end) { struct kvm *kvm = mmu_notifier_to_kvm(mn); int young, idx; @@ -375,7 +381,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); - young = kvm_age_hva(kvm, address); + young = kvm_age_hva(kvm, start, end); if (young) kvm_flush_remote_tlbs(kvm); @@ -1128,6 +1134,43 @@ static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL); } +int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm, + unsigned long addr, bool write_fault, + struct page **pagep) +{ + int npages; + int locked = 1; + int flags = FOLL_TOUCH | FOLL_HWPOISON | + (pagep ? FOLL_GET : 0) | + (write_fault ? FOLL_WRITE : 0); + + /* + * If retrying the fault, we get here *not* having allowed the filemap + * to wait on the page lock. We should now allow waiting on the IO with + * the mmap semaphore released. + */ + down_read(&mm->mmap_sem); + npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL, + &locked); + if (!locked) { + VM_BUG_ON(npages); + + if (!pagep) + return 0; + + /* + * The previous call has now waited on the IO. Now we can + * retry and complete. Pass TRIED to ensure we do not re + * schedule async IO (see e.g. filemap_fault). + */ + down_read(&mm->mmap_sem); + npages = __get_user_pages(tsk, mm, addr, 1, flags | FOLL_TRIED, + pagep, NULL, NULL); + } + up_read(&mm->mmap_sem); + return npages; +} + static inline int check_user_page_hwpoison(unsigned long addr) { int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE; @@ -1190,9 +1233,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, npages = get_user_page_nowait(current, current->mm, addr, write_fault, page); up_read(¤t->mm->mmap_sem); - } else - npages = get_user_pages_fast(addr, 1, write_fault, - page); + } else { + /* + * By now we have tried gup_fast, and possibly async_pf, and we + * are certainly not atomic. Time to retry the gup, allowing + * mmap semaphore to be relinquished in the case of IO. + */ + npages = kvm_get_user_page_io(current, current->mm, addr, + write_fault, page); + } if (npages != 1) return npages; @@ -1995,6 +2044,9 @@ static long kvm_vcpu_ioctl(struct file *filp, if (vcpu->kvm->mm != current->mm) return -EIO; + if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) + return -EINVAL; + #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) /* * Special cases: vcpu ioctls that are asynchronous to vcpu execution, @@ -3233,6 +3285,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, goto out_undebugfs; } + r = kvm_vfio_ops_init(); + WARN_ON(r); + return 0; out_undebugfs: diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index bb11b36ee8a2..281e7cf2b8e5 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -18,6 +18,7 @@ #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/vfio.h> +#include "vfio.h" struct kvm_vfio_group { struct list_head node; @@ -278,8 +279,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type) return 0; } -static int __init kvm_vfio_ops_init(void) +int kvm_vfio_ops_init(void) { return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO); } -module_init(kvm_vfio_ops_init); diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h new file mode 100644 index 000000000000..92eac75d6b62 --- /dev/null +++ b/virt/kvm/vfio.h @@ -0,0 +1,13 @@ +#ifndef __KVM_VFIO_H +#define __KVM_VFIO_H + +#ifdef CONFIG_KVM_VFIO +int kvm_vfio_ops_init(void); +#else +static inline int kvm_vfio_ops_init(void) +{ + return 0; +} +#endif + +#endif |