summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virtual/kvm/api.txt2
-rw-r--r--arch/arm/include/asm/kvm_host.h8
-rw-r--r--arch/arm64/include/asm/kvm_host.h8
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h20
-rw-r--r--arch/powerpc/include/asm/kvm_booke.h7
-rw-r--r--arch/powerpc/include/asm/kvm_host.h11
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h13
-rw-r--r--arch/powerpc/include/asm/reg_booke.h2
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h6
-rw-r--r--arch/powerpc/kernel/cpu_setup_fsl_booke.S12
-rw-r--r--arch/powerpc/kernel/cputable.c5
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S4
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S26
-rw-r--r--arch/powerpc/kvm/book3s.c162
-rw-r--r--arch/powerpc/kvm/book3s.h3
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c47
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S3
-rw-r--r--arch/powerpc/kvm/book3s_pr.c6
-rw-r--r--arch/powerpc/kvm/booke.c287
-rw-r--r--arch/powerpc/kvm/booke.h40
-rw-r--r--arch/powerpc/kvm/booke_emulate.c163
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S13
-rw-r--r--arch/powerpc/kvm/e500.h20
-rw-r--r--arch/powerpc/kvm/e500_emulate.c20
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c20
-rw-r--r--arch/powerpc/kvm/e500mc.c60
-rw-r--r--arch/powerpc/kvm/emulate.c17
-rw-r--r--arch/powerpc/kvm/emulate_loadstore.c2
-rw-r--r--arch/powerpc/kvm/powerpc.c107
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype6
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h9
-rw-r--r--arch/x86/include/asm/kvm_para.h10
-rw-r--r--arch/x86/kernel/cpu/amd.c7
-rw-r--r--arch/x86/kvm/cpuid.c27
-rw-r--r--arch/x86/kvm/cpuid.h2
-rw-r--r--arch/x86/kvm/emulate.c9
-rw-r--r--arch/x86/kvm/mmu.c90
-rw-r--r--arch/x86/kvm/paging_tmpl.h3
-rw-r--r--arch/x86/kvm/vmx.c77
-rw-r--r--arch/x86/kvm/x86.c58
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--drivers/iommu/amd_iommu_v2.c6
-rw-r--r--include/linux/kvm_host.h13
-rw-r--r--include/linux/mm.h1
-rw-r--r--include/linux/mmu_notifier.h24
-rw-r--r--include/trace/events/kvm.h16
-rw-r--r--mm/gup.c4
-rw-r--r--mm/mmu_notifier.c5
-rw-r--r--mm/rmap.c6
-rw-r--r--virt/kvm/async_pf.c4
-rw-r--r--virt/kvm/eventfd.c3
-rw-r--r--virt/kvm/kvm_main.c75
-rw-r--r--virt/kvm/vfio.c4
-rw-r--r--virt/kvm/vfio.h13
56 files changed, 1143 insertions, 430 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index f7735c72c128..7610eaa4d491 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1901,6 +1901,8 @@ registers, find a list below:
PPC | KVM_REG_PPC_ARCH_COMPAT | 32
PPC | KVM_REG_PPC_DABRX | 32
PPC | KVM_REG_PPC_WORT | 64
+ PPC | KVM_REG_PPC_SPRG9 | 64
+ PPC | KVM_REG_PPC_DBSR | 32
PPC | KVM_REG_PPC_TM_GPR0 | 64
...
PPC | KVM_REG_PPC_TM_GPR31 | 64
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 46e5d4da1989..53036e21756b 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -170,7 +170,8 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
/* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
+ unsigned long end)
{
return 0;
}
@@ -180,6 +181,11 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
return 0;
}
+static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+ unsigned long address)
+{
+}
+
struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index bcde41905746..2012c4ba8d67 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -180,7 +180,8 @@ int kvm_unmap_hva_range(struct kvm *kvm,
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
/* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
+ unsigned long end)
{
return 0;
}
@@ -190,6 +191,11 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
return 0;
}
+static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+ unsigned long address)
+{
+}
+
struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 465dfcb82c92..5bca220bbb60 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -53,17 +53,17 @@
#define BOOKE_INTERRUPT_DEBUG 15
/* E500 */
-#define BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL 32
-#define BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST 33
-/*
- * TODO: Unify 32-bit and 64-bit kernel exception handlers to use same defines
- */
-#define BOOKE_INTERRUPT_SPE_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL
-#define BOOKE_INTERRUPT_SPE_FP_DATA BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST
-#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL
-#define BOOKE_INTERRUPT_ALTIVEC_ASSIST \
- BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST
+#ifdef CONFIG_SPE_POSSIBLE
+#define BOOKE_INTERRUPT_SPE_UNAVAIL 32
+#define BOOKE_INTERRUPT_SPE_FP_DATA 33
#define BOOKE_INTERRUPT_SPE_FP_ROUND 34
+#endif
+
+#ifdef CONFIG_PPC_E500MC
+#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL 32
+#define BOOKE_INTERRUPT_ALTIVEC_ASSIST 33
+#endif
+
#define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35
#define BOOKE_INTERRUPT_DOORBELL 36
#define BOOKE_INTERRUPT_DOORBELL_CRITICAL 37
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index f7aa5cc395c4..3286f0d6a86c 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -23,15 +23,16 @@
#include <linux/types.h>
#include <linux/kvm_host.h>
-/* LPIDs we support with this build -- runtime limit may be lower */
+/*
+ * Number of available lpids. Only the low-order 6 bits of LPID rgister are
+ * implemented on e500mc+ cores.
+ */
#define KVMPPC_NR_LPIDS 64
#define KVMPPC_INST_EHPRIV 0x7c00021c
#define EHPRIV_OC_SHIFT 11
/* "ehpriv 1" : ehpriv with OC = 1 is used for debug emulation */
#define EHPRIV_OC_DEBUG 1
-#define KVMPPC_INST_EHPRIV_DEBUG (KVMPPC_INST_EHPRIV | \
- (EHPRIV_OC_DEBUG << EHPRIV_OC_SHIFT))
static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
{
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 604000882352..047855619cc4 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -56,10 +56,15 @@
extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
extern int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end);
-extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+ unsigned long address)
+{
+}
+
#define HPTEG_CACHE_NUM (1 << 15)
#define HPTEG_HASH_BITS_PTE 13
#define HPTEG_HASH_BITS_PTE_LONG 12
@@ -139,6 +144,7 @@ enum kvm_exit_types {
EMULATED_TLBWE_EXITS,
EMULATED_RFI_EXITS,
EMULATED_RFCI_EXITS,
+ EMULATED_RFDI_EXITS,
DEC_EXITS,
EXT_INTR_EXITS,
HALT_WAKEUP,
@@ -584,8 +590,6 @@ struct kvm_vcpu_arch {
u32 crit_save;
/* guest debug registers*/
struct debug_reg dbg_reg;
- /* hardware visible debug registers when in guest state */
- struct debug_reg shadow_dbg_reg;
#endif
gpa_t paddr_accessed;
gva_t vaddr_accessed;
@@ -607,7 +611,6 @@ struct kvm_vcpu_arch {
u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
struct hrtimer dec_timer;
- struct tasklet_struct tasklet;
u64 dec_jiffies;
u64 dec_expires;
unsigned long pending_exceptions;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index fb86a2299d8a..a6dcdb6d13c1 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -38,6 +38,12 @@
#include <asm/paca.h>
#endif
+/*
+ * KVMPPC_INST_SW_BREAKPOINT is debug Instruction
+ * for supporting software breakpoint.
+ */
+#define KVMPPC_INST_SW_BREAKPOINT 0x00dddd00
+
enum emulation_result {
EMULATE_DONE, /* no further processing */
EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */
@@ -89,7 +95,7 @@ extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu);
extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
-extern void kvmppc_decrementer_func(unsigned long data);
+extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu);
extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
@@ -206,6 +212,9 @@ extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
+void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu);
+void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu);
+
union kvmppc_one_reg {
u32 wval;
u64 dval;
@@ -243,7 +252,7 @@ struct kvmppc_ops {
int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
unsigned long end);
- int (*age_hva)(struct kvm *kvm, unsigned long hva);
+ int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end);
int (*test_age_hva)(struct kvm *kvm, unsigned long hva);
void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte);
void (*mmu_destroy)(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 1d653308a33c..16547efa2d5a 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -319,6 +319,8 @@
* DBSR bits which have conflicting definitions on true Book E versus IBM 40x.
*/
#ifdef CONFIG_BOOKE
+#define DBSR_IDE 0x80000000 /* Imprecise Debug Event */
+#define DBSR_MRR 0x30000000 /* Most Recent Reset */
#define DBSR_IC 0x08000000 /* Instruction Completion */
#define DBSR_BT 0x04000000 /* Branch Taken */
#define DBSR_IRPT 0x02000000 /* Exception Debug Event */
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index e0e49dbb145d..ab4d4732c492 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -476,6 +476,11 @@ struct kvm_get_htab_header {
/* FP and vector status/control registers */
#define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80)
+/*
+ * VSCR register is documented as a 32-bit register in the ISA, but it can
+ * only be accesses via a vector register. Expose VSCR as a 32-bit register
+ * even though the kernel represents it as a 128-bit vector.
+ */
#define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81)
/* Virtual processor areas */
@@ -557,6 +562,7 @@ struct kvm_get_htab_header {
#define KVM_REG_PPC_DABRX (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8)
#define KVM_REG_PPC_WORT (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9)
#define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
+#define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb)
/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 4f1393d20079..dddba3e94260 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -91,6 +91,7 @@ _GLOBAL(setup_altivec_idle)
blr
+#ifdef CONFIG_PPC_E500MC
_GLOBAL(__setup_cpu_e6500)
mflr r6
#ifdef CONFIG_PPC64
@@ -107,14 +108,20 @@ _GLOBAL(__setup_cpu_e6500)
bl __setup_cpu_e5500
mtlr r6
blr
+#endif /* CONFIG_PPC_E500MC */
#ifdef CONFIG_PPC32
+#ifdef CONFIG_E200
_GLOBAL(__setup_cpu_e200)
/* enable dedicated debug exception handling resources (Debug APU) */
mfspr r3,SPRN_HID0
ori r3,r3,HID0_DAPUEN@l
mtspr SPRN_HID0,r3
b __setup_e200_ivors
+#endif /* CONFIG_E200 */
+
+#ifdef CONFIG_E500
+#ifndef CONFIG_PPC_E500MC
_GLOBAL(__setup_cpu_e500v1)
_GLOBAL(__setup_cpu_e500v2)
mflr r4
@@ -129,6 +136,7 @@ _GLOBAL(__setup_cpu_e500v2)
#endif
mtlr r4
blr
+#else /* CONFIG_PPC_E500MC */
_GLOBAL(__setup_cpu_e500mc)
_GLOBAL(__setup_cpu_e5500)
mflr r5
@@ -159,7 +167,9 @@ _GLOBAL(__setup_cpu_e5500)
2:
mtlr r5
blr
-#endif
+#endif /* CONFIG_PPC_E500MC */
+#endif /* CONFIG_E500 */
+#endif /* CONFIG_PPC32 */
#ifdef CONFIG_PPC_BOOK3E_64
_GLOBAL(__restore_cpu_e6500)
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 9b6dcaaec1a3..808405906336 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1961,6 +1961,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
#endif /* CONFIG_PPC32 */
#ifdef CONFIG_E500
#ifdef CONFIG_PPC32
+#ifndef CONFIG_PPC_E500MC
{ /* e500 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x80200000,
@@ -2000,6 +2001,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_e500,
.platform = "ppc8548",
},
+#else
{ /* e500mc */
.pvr_mask = 0xffff0000,
.pvr_value = 0x80230000,
@@ -2018,7 +2020,9 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_e500mc,
.platform = "ppce500mc",
},
+#endif /* CONFIG_PPC_E500MC */
#endif /* CONFIG_PPC32 */
+#ifdef CONFIG_PPC_E500MC
{ /* e5500 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x80240000,
@@ -2062,6 +2066,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_e500mc,
.platform = "ppce6500",
},
+#endif /* CONFIG_PPC_E500MC */
#ifdef CONFIG_PPC32
{ /* default match */
.pvr_mask = 0x00000000,
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index bb9cac6c8051..3e68d1c69718 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -635,7 +635,7 @@ interrupt_end_book3e:
/* Altivec Unavailable Interrupt */
START_EXCEPTION(altivec_unavailable);
- NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL,
+ NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_ALTIVEC_UNAVAIL,
PROLOG_ADDITION_NONE)
/* we can probably do a shorter exception entry for that one... */
EXCEPTION_COMMON(0x200)
@@ -658,7 +658,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
/* AltiVec Assist */
START_EXCEPTION(altivec_assist);
NORMAL_EXCEPTION_PROLOG(0x220,
- BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST,
+ BOOKE_INTERRUPT_ALTIVEC_ASSIST,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x220)
INTS_DISABLE
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index b497188a94a1..fffd1f96bb1d 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -613,34 +613,36 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mfspr r10, SPRN_SPRG_RSCRATCH0
b InstructionStorage
+/* Define SPE handlers for e200 and e500v2 */
#ifdef CONFIG_SPE
/* SPE Unavailable */
START_EXCEPTION(SPEUnavailable)
- NORMAL_EXCEPTION_PROLOG(SPE_ALTIVEC_UNAVAIL)
+ NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL)
beq 1f
bl load_up_spe
b fast_exception_return
1: addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_EE_LITE(0x2010, KernelSPE)
-#else
- EXCEPTION(0x2020, SPE_ALTIVEC_UNAVAIL, SPEUnavailable, \
+#elif defined(CONFIG_SPE_POSSIBLE)
+ EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
unknown_exception, EXC_XFER_EE)
-#endif /* CONFIG_SPE */
+#endif /* CONFIG_SPE_POSSIBLE */
/* SPE Floating Point Data */
#ifdef CONFIG_SPE
- EXCEPTION(0x2030, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData,
+ EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData,
SPEFloatingPointException, EXC_XFER_EE)
/* SPE Floating Point Round */
EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
SPEFloatingPointRoundException, EXC_XFER_EE)
-#else
- EXCEPTION(0x2040, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData,
+#elif defined(CONFIG_SPE_POSSIBLE)
+ EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData,
unknown_exception, EXC_XFER_EE)
EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
unknown_exception, EXC_XFER_EE)
-#endif /* CONFIG_SPE */
+#endif /* CONFIG_SPE_POSSIBLE */
+
/* Performance Monitor */
EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \
@@ -947,6 +949,7 @@ get_phys_addr:
* Global functions
*/
+#ifdef CONFIG_E200
/* Adjust or setup IVORs for e200 */
_GLOBAL(__setup_e200_ivors)
li r3,DebugDebug@l
@@ -959,7 +962,10 @@ _GLOBAL(__setup_e200_ivors)
mtspr SPRN_IVOR34,r3
sync
blr
+#endif
+#ifdef CONFIG_E500
+#ifndef CONFIG_PPC_E500MC
/* Adjust or setup IVORs for e500v1/v2 */
_GLOBAL(__setup_e500_ivors)
li r3,DebugCrit@l
@@ -974,7 +980,7 @@ _GLOBAL(__setup_e500_ivors)
mtspr SPRN_IVOR35,r3
sync
blr
-
+#else
/* Adjust or setup IVORs for e500mc */
_GLOBAL(__setup_e500mc_ivors)
li r3,DebugDebug@l
@@ -1000,6 +1006,8 @@ _GLOBAL(__setup_ehv_ivors)
mtspr SPRN_IVOR41,r3
sync
blr
+#endif /* CONFIG_PPC_E500MC */
+#endif /* CONFIG_E500 */
#ifdef CONFIG_SPE
/*
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index dd03f6b299ba..b32db4b95361 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -535,174 +535,111 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return -ENOTSUPP;
}
-int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
- int r;
- union kvmppc_one_reg val;
- int size;
+ int r = 0;
long int i;
- size = one_reg_size(reg->id);
- if (size > sizeof(val))
- return -EINVAL;
-
- r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
+ r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val);
if (r == -EINVAL) {
r = 0;
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_DAR:
- val = get_reg_val(reg->id, kvmppc_get_dar(vcpu));
+ *val = get_reg_val(id, kvmppc_get_dar(vcpu));
break;
case KVM_REG_PPC_DSISR:
- val = get_reg_val(reg->id, kvmppc_get_dsisr(vcpu));
+ *val = get_reg_val(id, kvmppc_get_dsisr(vcpu));
break;
case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
- i = reg->id - KVM_REG_PPC_FPR0;
- val = get_reg_val(reg->id, VCPU_FPR(vcpu, i));
+ i = id - KVM_REG_PPC_FPR0;
+ *val = get_reg_val(id, VCPU_FPR(vcpu, i));
break;
case KVM_REG_PPC_FPSCR:
- val = get_reg_val(reg->id, vcpu->arch.fp.fpscr);
- break;
-#ifdef CONFIG_ALTIVEC
- case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
- if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
- r = -ENXIO;
- break;
- }
- val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
- break;
- case KVM_REG_PPC_VSCR:
- if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
- r = -ENXIO;
- break;
- }
- val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+ *val = get_reg_val(id, vcpu->arch.fp.fpscr);
break;
- case KVM_REG_PPC_VRSAVE:
- val = get_reg_val(reg->id, vcpu->arch.vrsave);
- break;
-#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
if (cpu_has_feature(CPU_FTR_VSX)) {
- long int i = reg->id - KVM_REG_PPC_VSR0;
- val.vsxval[0] = vcpu->arch.fp.fpr[i][0];
- val.vsxval[1] = vcpu->arch.fp.fpr[i][1];
+ i = id - KVM_REG_PPC_VSR0;
+ val->vsxval[0] = vcpu->arch.fp.fpr[i][0];
+ val->vsxval[1] = vcpu->arch.fp.fpr[i][1];
} else {
r = -ENXIO;
}
break;
#endif /* CONFIG_VSX */
- case KVM_REG_PPC_DEBUG_INST: {
- u32 opcode = INS_TW;
- r = copy_to_user((u32 __user *)(long)reg->addr,
- &opcode, sizeof(u32));
+ case KVM_REG_PPC_DEBUG_INST:
+ *val = get_reg_val(id, INS_TW);
break;
- }
#ifdef CONFIG_KVM_XICS
case KVM_REG_PPC_ICP_STATE:
if (!vcpu->arch.icp) {
r = -ENXIO;
break;
}
- val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu));
+ *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
break;
#endif /* CONFIG_KVM_XICS */
case KVM_REG_PPC_FSCR:
- val = get_reg_val(reg->id, vcpu->arch.fscr);
+ *val = get_reg_val(id, vcpu->arch.fscr);
break;
case KVM_REG_PPC_TAR:
- val = get_reg_val(reg->id, vcpu->arch.tar);
+ *val = get_reg_val(id, vcpu->arch.tar);
break;
case KVM_REG_PPC_EBBHR:
- val = get_reg_val(reg->id, vcpu->arch.ebbhr);
+ *val = get_reg_val(id, vcpu->arch.ebbhr);
break;
case KVM_REG_PPC_EBBRR:
- val = get_reg_val(reg->id, vcpu->arch.ebbrr);
+ *val = get_reg_val(id, vcpu->arch.ebbrr);
break;
case KVM_REG_PPC_BESCR:
- val = get_reg_val(reg->id, vcpu->arch.bescr);
+ *val = get_reg_val(id, vcpu->arch.bescr);
break;
case KVM_REG_PPC_VTB:
- val = get_reg_val(reg->id, vcpu->arch.vtb);
+ *val = get_reg_val(id, vcpu->arch.vtb);
break;
case KVM_REG_PPC_IC:
- val = get_reg_val(reg->id, vcpu->arch.ic);
+ *val = get_reg_val(id, vcpu->arch.ic);
break;
default:
r = -EINVAL;
break;
}
}
- if (r)
- return r;
-
- if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
- r = -EFAULT;
return r;
}
-int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
- int r;
- union kvmppc_one_reg val;
- int size;
+ int r = 0;
long int i;
- size = one_reg_size(reg->id);
- if (size > sizeof(val))
- return -EINVAL;
-
- if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
- return -EFAULT;
-
- r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
+ r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val);
if (r == -EINVAL) {
r = 0;
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_DAR:
- kvmppc_set_dar(vcpu, set_reg_val(reg->id, val));
+ kvmppc_set_dar(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_DSISR:
- kvmppc_set_dsisr(vcpu, set_reg_val(reg->id, val));
+ kvmppc_set_dsisr(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
- i = reg->id - KVM_REG_PPC_FPR0;
- VCPU_FPR(vcpu, i) = set_reg_val(reg->id, val);
+ i = id - KVM_REG_PPC_FPR0;
+ VCPU_FPR(vcpu, i) = set_reg_val(id, *val);
break;
case KVM_REG_PPC_FPSCR:
- vcpu->arch.fp.fpscr = set_reg_val(reg->id, val);
- break;
-#ifdef CONFIG_ALTIVEC
- case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
- if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
- r = -ENXIO;
- break;
- }
- vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
- break;
- case KVM_REG_PPC_VSCR:
- if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
- r = -ENXIO;
- break;
- }
- vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
- break;
- case KVM_REG_PPC_VRSAVE:
- if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
- r = -ENXIO;
- break;
- }
- vcpu->arch.vrsave = set_reg_val(reg->id, val);
+ vcpu->arch.fp.fpscr = set_reg_val(id, *val);
break;
-#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
if (cpu_has_feature(CPU_FTR_VSX)) {
- long int i = reg->id - KVM_REG_PPC_VSR0;
- vcpu->arch.fp.fpr[i][0] = val.vsxval[0];
- vcpu->arch.fp.fpr[i][1] = val.vsxval[1];
+ i = id - KVM_REG_PPC_VSR0;
+ vcpu->arch.fp.fpr[i][0] = val->vsxval[0];
+ vcpu->arch.fp.fpr[i][1] = val->vsxval[1];
} else {
r = -ENXIO;
}
@@ -715,29 +652,29 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
break;
}
r = kvmppc_xics_set_icp(vcpu,
- set_reg_val(reg->id, val));
+ set_reg_val(id, *val));
break;
#endif /* CONFIG_KVM_XICS */
case KVM_REG_PPC_FSCR:
- vcpu->arch.fscr = set_reg_val(reg->id, val);
+ vcpu->arch.fscr = set_reg_val(id, *val);
break;
case KVM_REG_PPC_TAR:
- vcpu->arch.tar = set_reg_val(reg->id, val);
+ vcpu->arch.tar = set_reg_val(id, *val);
break;
case KVM_REG_PPC_EBBHR:
- vcpu->arch.ebbhr = set_reg_val(reg->id, val);
+ vcpu->arch.ebbhr = set_reg_val(id, *val);
break;
case KVM_REG_PPC_EBBRR:
- vcpu->arch.ebbrr = set_reg_val(reg->id, val);
+ vcpu->arch.ebbrr = set_reg_val(id, *val);
break;
case KVM_REG_PPC_BESCR:
- vcpu->arch.bescr = set_reg_val(reg->id, val);
+ vcpu->arch.bescr = set_reg_val(id, *val);
break;
case KVM_REG_PPC_VTB:
- vcpu->arch.vtb = set_reg_val(reg->id, val);
+ vcpu->arch.vtb = set_reg_val(id, *val);
break;
case KVM_REG_PPC_IC:
- vcpu->arch.ic = set_reg_val(reg->id, val);
+ vcpu->arch.ic = set_reg_val(id, *val);
break;
default:
r = -EINVAL;
@@ -778,13 +715,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
- return -EINVAL;
+ vcpu->guest_debug = dbg->control;
+ return 0;
}
-void kvmppc_decrementer_func(unsigned long data)
+void kvmppc_decrementer_func(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
-
kvmppc_core_queue_dec(vcpu);
kvm_vcpu_kick(vcpu);
}
@@ -851,9 +787,9 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
}
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
{
- return kvm->arch.kvm_ops->age_hva(kvm, hva);
+ return kvm->arch.kvm_ops->age_hva(kvm, start, end);
}
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 4bf956cf94d6..d2b3ec088b8c 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -17,7 +17,8 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
unsigned long end);
-extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva);
+extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start,
+ unsigned long end);
extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva);
extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 72c20bb16d26..81460c5359c0 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1002,11 +1002,11 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
return ret;
}
-int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
{
if (!kvm->arch.using_mmu_notifiers)
return 0;
- return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+ return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp);
}
static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 27cced9c7249..e63587d30b70 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -725,6 +725,30 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
return kvmppc_hcall_impl_hv_realmode(cmd);
}
+static int kvmppc_emulate_debug_inst(struct kvm_run *run,
+ struct kvm_vcpu *vcpu)
+{
+ u32 last_inst;
+
+ if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) !=
+ EMULATE_DONE) {
+ /*
+ * Fetch failed, so return to guest and
+ * try executing it again.
+ */
+ return RESUME_GUEST;
+ }
+
+ if (last_inst == KVMPPC_INST_SW_BREAKPOINT) {
+ run->exit_reason = KVM_EXIT_DEBUG;
+ run->debug.arch.address = kvmppc_get_pc(vcpu);
+ return RESUME_HOST;
+ } else {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ return RESUME_GUEST;
+ }
+}
+
static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
struct task_struct *tsk)
{
@@ -807,12 +831,18 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
/*
* This occurs if the guest executes an illegal instruction.
- * We just generate a program interrupt to the guest, since
- * we don't emulate any guest instructions at this stage.
+ * If the guest debug is disabled, generate a program interrupt
+ * to the guest. If guest debug is enabled, we need to check
+ * whether the instruction is a software breakpoint instruction.
+ * Accordingly return to Guest or Host.
*/
case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
- kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
- r = RESUME_GUEST;
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
+ r = kvmppc_emulate_debug_inst(run, vcpu);
+ } else {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ r = RESUME_GUEST;
+ }
break;
/*
* This occurs if the guest (kernel or userspace), does something that
@@ -856,7 +886,9 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
{
int i, j;
- kvmppc_set_pvr_hv(vcpu, sregs->pvr);
+ /* Only accept the same PVR as the host's, since we can't spoof it */
+ if (sregs->pvr != vcpu->arch.pvr)
+ return -EINVAL;
j = 0;
for (i = 0; i < vcpu->arch.slb_nr; i++) {
@@ -922,6 +954,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
long int i;
switch (id) {
+ case KVM_REG_PPC_DEBUG_INST:
+ *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
+ break;
case KVM_REG_PPC_HIOR:
*val = get_reg_val(id, 0);
break;
@@ -1489,7 +1524,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
static int kvmppc_grab_hwthread(int cpu)
{
struct paca_struct *tpaca;
- long timeout = 1000;
+ long timeout = 10000;
tpaca = &paca[cpu];
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index f0c4db7704c3..edb2ccdbb2ba 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -355,6 +355,7 @@ kvmppc_hv_entry:
* MSR = ~IR|DR
* R13 = PACA
* R1 = host R1
+ * R2 = TOC
* all other volatile GPRS = free
*/
mflr r0
@@ -503,7 +504,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
toc_tlbie_lock:
.tc native_tlbie_lock[TC],native_tlbie_lock
.previous
- ld r3,toc_tlbie_lock@toc(2)
+ ld r3,toc_tlbie_lock@toc(r2)
#ifdef __BIG_ENDIAN__
lwz r8,PACA_LOCK_TOKEN(r13)
#else
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index faffb27badd9..cf2eb16846d1 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -295,7 +295,8 @@ static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
return 0;
}
-static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva)
+static int kvm_age_hva_pr(struct kvm *kvm, unsigned long start,
+ unsigned long end)
{
/* XXX could be more clever ;) */
return 0;
@@ -1319,6 +1320,9 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
int r = 0;
switch (id) {
+ case KVM_REG_PPC_DEBUG_INST:
+ *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
+ break;
case KVM_REG_PPC_HIOR:
*val = get_reg_val(id, to_book3s(vcpu)->hior);
break;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index b4c89fa6f109..9b55dec2d6cc 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -124,6 +124,40 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
}
#endif
+/*
+ * Load up guest vcpu FP state if it's needed.
+ * It also set the MSR_FP in thread so that host know
+ * we're holding FPU, and then host can help to save
+ * guest vcpu FP state if other threads require to use FPU.
+ * This simulates an FP unavailable fault.
+ *
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+ if (!(current->thread.regs->msr & MSR_FP)) {
+ enable_kernel_fp();
+ load_fp_state(&vcpu->arch.fp);
+ current->thread.fp_save_area = &vcpu->arch.fp;
+ current->thread.regs->msr |= MSR_FP;
+ }
+#endif
+}
+
+/*
+ * Save guest vcpu FP state into thread.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+ if (current->thread.regs->msr & MSR_FP)
+ giveup_fpu(current);
+ current->thread.fp_save_area = NULL;
+#endif
+}
+
static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
{
#if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV)
@@ -134,6 +168,40 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
#endif
}
+/*
+ * Simulate AltiVec unavailable fault to load guest state
+ * from thread to AltiVec unit.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_load_guest_altivec(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ALTIVEC
+ if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ if (!(current->thread.regs->msr & MSR_VEC)) {
+ enable_kernel_altivec();
+ load_vr_state(&vcpu->arch.vr);
+ current->thread.vr_save_area = &vcpu->arch.vr;
+ current->thread.regs->msr |= MSR_VEC;
+ }
+ }
+#endif
+}
+
+/*
+ * Save guest vcpu AltiVec state into thread.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_save_guest_altivec(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ALTIVEC
+ if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ if (current->thread.regs->msr & MSR_VEC)
+ giveup_altivec(current);
+ current->thread.vr_save_area = NULL;
+ }
+#endif
+}
+
static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
{
/* Synchronize guest's desire to get debug interrupts into shadow MSR */
@@ -267,6 +335,16 @@ static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
}
+void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu)
+{
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DEBUG);
+}
+
+void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu)
+{
+ clear_bit(BOOKE_IRQPRIO_DEBUG, &vcpu->arch.pending_exceptions);
+}
+
static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
{
kvmppc_set_srr0(vcpu, srr0);
@@ -341,9 +419,15 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
case BOOKE_IRQPRIO_ITLB_MISS:
case BOOKE_IRQPRIO_SYSCALL:
case BOOKE_IRQPRIO_FP_UNAVAIL:
+#ifdef CONFIG_SPE_POSSIBLE
case BOOKE_IRQPRIO_SPE_UNAVAIL:
case BOOKE_IRQPRIO_SPE_FP_DATA:
case BOOKE_IRQPRIO_SPE_FP_ROUND:
+#endif
+#ifdef CONFIG_ALTIVEC
+ case BOOKE_IRQPRIO_ALTIVEC_UNAVAIL:
+ case BOOKE_IRQPRIO_ALTIVEC_ASSIST:
+#endif
case BOOKE_IRQPRIO_AP_UNAVAIL:
allowed = 1;
msr_mask = MSR_CE | MSR_ME | MSR_DE;
@@ -377,7 +461,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
allowed = vcpu->arch.shared->msr & MSR_DE;
allowed = allowed && !crit;
msr_mask = MSR_ME;
- int_class = INT_CLASS_CRIT;
+ if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
+ int_class = INT_CLASS_DBG;
+ else
+ int_class = INT_CLASS_CRIT;
+
break;
}
@@ -654,20 +742,27 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
/*
* Since we can't trap on MSR_FP in GS-mode, we consider the guest
- * as always using the FPU. Kernel usage of FP (via
- * enable_kernel_fp()) in this thread must not occur while
- * vcpu->fpu_active is set.
+ * as always using the FPU.
*/
- vcpu->fpu_active = 1;
-
kvmppc_load_guest_fp(vcpu);
#endif
+#ifdef CONFIG_ALTIVEC
+ /* Save userspace AltiVec state in stack */
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ enable_kernel_altivec();
+ /*
+ * Since we can't trap on MSR_VEC in GS-mode, we consider the guest
+ * as always using the AltiVec.
+ */
+ kvmppc_load_guest_altivec(vcpu);
+#endif
+
/* Switch to guest debug context */
- debug = vcpu->arch.shadow_dbg_reg;
+ debug = vcpu->arch.dbg_reg;
switch_booke_debug_regs(&debug);
debug = current->thread.debug;
- current->thread.debug = vcpu->arch.shadow_dbg_reg;
+ current->thread.debug = vcpu->arch.dbg_reg;
vcpu->arch.pgdir = current->mm->pgd;
kvmppc_fix_ee_before_entry();
@@ -683,8 +778,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
#ifdef CONFIG_PPC_FPU
kvmppc_save_guest_fp(vcpu);
+#endif
- vcpu->fpu_active = 0;
+#ifdef CONFIG_ALTIVEC
+ kvmppc_save_guest_altivec(vcpu);
#endif
out:
@@ -728,9 +825,36 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
- struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+ struct debug_reg *dbg_reg = &(vcpu->arch.dbg_reg);
u32 dbsr = vcpu->arch.dbsr;
+ if (vcpu->guest_debug == 0) {
+ /*
+ * Debug resources belong to Guest.
+ * Imprecise debug event is not injected
+ */
+ if (dbsr & DBSR_IDE) {
+ dbsr &= ~DBSR_IDE;
+ if (!dbsr)
+ return RESUME_GUEST;
+ }
+
+ if (dbsr && (vcpu->arch.shared->msr & MSR_DE) &&
+ (vcpu->arch.dbg_reg.dbcr0 & DBCR0_IDM))
+ kvmppc_core_queue_debug(vcpu);
+
+ /* Inject a program interrupt if trap debug is not allowed */
+ if ((dbsr & DBSR_TIE) && !(vcpu->arch.shared->msr & MSR_DE))
+ kvmppc_core_queue_program(vcpu, ESR_PTR);
+
+ return RESUME_GUEST;
+ }
+
+ /*
+ * Debug resource owned by userspace.
+ * Clear guest dbsr (vcpu->arch.dbsr)
+ */
+ vcpu->arch.dbsr = 0;
run->debug.arch.status = 0;
run->debug.arch.address = vcpu->arch.pc;
@@ -868,7 +992,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
case BOOKE_INTERRUPT_DATA_STORAGE:
case BOOKE_INTERRUPT_DTLB_MISS:
case BOOKE_INTERRUPT_HV_PRIV:
- emulated = kvmppc_get_last_inst(vcpu, false, &last_inst);
+ emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
+ break;
+ case BOOKE_INTERRUPT_PROGRAM:
+ /* SW breakpoints arrive as illegal instructions on HV */
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+ emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
break;
default:
break;
@@ -947,6 +1076,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case BOOKE_INTERRUPT_PROGRAM:
+ if ((vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) &&
+ (last_inst == KVMPPC_INST_SW_BREAKPOINT)) {
+ /*
+ * We are here because of an SW breakpoint instr,
+ * so lets return to host to handle.
+ */
+ r = kvmppc_handle_debug(run, vcpu);
+ run->exit_reason = KVM_EXIT_DEBUG;
+ kvmppc_account_exit(vcpu, DEBUG_EXITS);
+ break;
+ }
+
if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) {
/*
* Program traps generated by user-level software must
@@ -991,7 +1132,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
r = RESUME_GUEST;
break;
-#else
+#elif defined(CONFIG_SPE_POSSIBLE)
case BOOKE_INTERRUPT_SPE_UNAVAIL:
/*
* Guest wants SPE, but host kernel doesn't support it. Send
@@ -1012,6 +1153,22 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
run->hw.hardware_exit_reason = exit_nr;
r = RESUME_HOST;
break;
+#endif /* CONFIG_SPE_POSSIBLE */
+
+/*
+ * On cores with Vector category, KVM is loaded only if CONFIG_ALTIVEC,
+ * see kvmppc_core_check_processor_compat().
+ */
+#ifdef CONFIG_ALTIVEC
+ case BOOKE_INTERRUPT_ALTIVEC_UNAVAIL:
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL);
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_ALTIVEC_ASSIST:
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_ASSIST);
+ r = RESUME_GUEST;
+ break;
#endif
case BOOKE_INTERRUPT_DATA_STORAGE:
@@ -1188,6 +1345,8 @@ out:
else {
/* interrupts now hard-disabled */
kvmppc_fix_ee_before_entry();
+ kvmppc_load_guest_fp(vcpu);
+ kvmppc_load_guest_altivec(vcpu);
}
}
@@ -1243,6 +1402,11 @@ int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
(unsigned long)vcpu);
+ /*
+ * Clear DBSR.MRR to avoid guest debug interrupt as
+ * this is of host interest
+ */
+ mtspr(SPRN_DBSR, DBSR_MRR);
return 0;
}
@@ -1457,144 +1621,125 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
}
-int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
int r = 0;
- union kvmppc_one_reg val;
- int size;
-
- size = one_reg_size(reg->id);
- if (size > sizeof(val))
- return -EINVAL;
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_IAC1:
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac1);
+ *val = get_reg_val(id, vcpu->arch.dbg_reg.iac1);
break;
case KVM_REG_PPC_IAC2:
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac2);
+ *val = get_reg_val(id, vcpu->arch.dbg_reg.iac2);
break;
#if CONFIG_PPC_ADV_DEBUG_IACS > 2
case KVM_REG_PPC_IAC3:
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac3);
+ *val = get_reg_val(id, vcpu->arch.dbg_reg.iac3);
break;
case KVM_REG_PPC_IAC4:
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac4);
+ *val = get_reg_val(id, vcpu->arch.dbg_reg.iac4);
break;
#endif
case KVM_REG_PPC_DAC1:
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac1);
+ *val = get_reg_val(id, vcpu->arch.dbg_reg.dac1);
break;
case KVM_REG_PPC_DAC2:
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2);
+ *val = get_reg_val(id, vcpu->arch.dbg_reg.dac2);
break;
case KVM_REG_PPC_EPR: {
u32 epr = kvmppc_get_epr(vcpu);
- val = get_reg_val(reg->id, epr);
+ *val = get_reg_val(id, epr);
break;
}
#if defined(CONFIG_64BIT)
case KVM_REG_PPC_EPCR:
- val = get_reg_val(reg->id, vcpu->arch.epcr);
+ *val = get_reg_val(id, vcpu->arch.epcr);
break;
#endif
case KVM_REG_PPC_TCR:
- val = get_reg_val(reg->id, vcpu->arch.tcr);
+ *val = get_reg_val(id, vcpu->arch.tcr);
break;
case KVM_REG_PPC_TSR:
- val = get_reg_val(reg->id, vcpu->arch.tsr);
+ *val = get_reg_val(id, vcpu->arch.tsr);
break;
case KVM_REG_PPC_DEBUG_INST:
- val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV_DEBUG);
+ *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
break;
case KVM_REG_PPC_VRSAVE:
- val = get_reg_val(reg->id, vcpu->arch.vrsave);
+ *val = get_reg_val(id, vcpu->arch.vrsave);
break;
default:
- r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
+ r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val);
break;
}
- if (r)
- return r;
-
- if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
- r = -EFAULT;
-
return r;
}
-int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
int r = 0;
- union kvmppc_one_reg val;
- int size;
- size = one_reg_size(reg->id);
- if (size > sizeof(val))
- return -EINVAL;
-
- if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
- return -EFAULT;
-
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_IAC1:
- vcpu->arch.dbg_reg.iac1 = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.iac1 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_IAC2:
- vcpu->arch.dbg_reg.iac2 = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.iac2 = set_reg_val(id, *val);
break;
#if CONFIG_PPC_ADV_DEBUG_IACS > 2
case KVM_REG_PPC_IAC3:
- vcpu->arch.dbg_reg.iac3 = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.iac3 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_IAC4:
- vcpu->arch.dbg_reg.iac4 = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.iac4 = set_reg_val(id, *val);
break;
#endif
case KVM_REG_PPC_DAC1:
- vcpu->arch.dbg_reg.dac1 = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.dac1 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_DAC2:
- vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.dac2 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_EPR: {
- u32 new_epr = set_reg_val(reg->id, val);
+ u32 new_epr = set_reg_val(id, *val);
kvmppc_set_epr(vcpu, new_epr);
break;
}
#if defined(CONFIG_64BIT)
case KVM_REG_PPC_EPCR: {
- u32 new_epcr = set_reg_val(reg->id, val);
+ u32 new_epcr = set_reg_val(id, *val);
kvmppc_set_epcr(vcpu, new_epcr);
break;
}
#endif
case KVM_REG_PPC_OR_TSR: {
- u32 tsr_bits = set_reg_val(reg->id, val);
+ u32 tsr_bits = set_reg_val(id, *val);
kvmppc_set_tsr_bits(vcpu, tsr_bits);
break;
}
case KVM_REG_PPC_CLEAR_TSR: {
- u32 tsr_bits = set_reg_val(reg->id, val);
+ u32 tsr_bits = set_reg_val(id, *val);
kvmppc_clr_tsr_bits(vcpu, tsr_bits);
break;
}
case KVM_REG_PPC_TSR: {
- u32 tsr = set_reg_val(reg->id, val);
+ u32 tsr = set_reg_val(id, *val);
kvmppc_set_tsr(vcpu, tsr);
break;
}
case KVM_REG_PPC_TCR: {
- u32 tcr = set_reg_val(reg->id, val);
+ u32 tcr = set_reg_val(id, *val);
kvmppc_set_tcr(vcpu, tcr);
break;
}
case KVM_REG_PPC_VRSAVE:
- vcpu->arch.vrsave = set_reg_val(reg->id, val);
+ vcpu->arch.vrsave = set_reg_val(id, *val);
break;
default:
- r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
+ r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val);
break;
}
@@ -1694,10 +1839,8 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
update_timer_ints(vcpu);
}
-void kvmppc_decrementer_func(unsigned long data)
+void kvmppc_decrementer_func(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
-
if (vcpu->arch.tcr & TCR_ARE) {
vcpu->arch.dec = vcpu->arch.decar;
kvmppc_emulate_dec(vcpu);
@@ -1842,7 +1985,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
int n, b = 0, w = 0;
if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
- vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+ vcpu->arch.dbg_reg.dbcr0 = 0;
vcpu->guest_debug = 0;
kvm_guest_protect_msr(vcpu, MSR_DE, false);
return 0;
@@ -1850,15 +1993,13 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
kvm_guest_protect_msr(vcpu, MSR_DE, true);
vcpu->guest_debug = dbg->control;
- vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
- /* Set DBCR0_EDM in guest visible DBCR0 register. */
- vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
+ vcpu->arch.dbg_reg.dbcr0 = 0;
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
- vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+ vcpu->arch.dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
/* Code below handles only HW breakpoints */
- dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+ dbg_reg = &(vcpu->arch.dbg_reg);
#ifdef CONFIG_KVM_BOOKE_HV
/*
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index f753543c56fa..22ba08ea68e9 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -32,9 +32,15 @@
#define BOOKE_IRQPRIO_ALIGNMENT 2
#define BOOKE_IRQPRIO_PROGRAM 3
#define BOOKE_IRQPRIO_FP_UNAVAIL 4
+#ifdef CONFIG_SPE_POSSIBLE
#define BOOKE_IRQPRIO_SPE_UNAVAIL 5
#define BOOKE_IRQPRIO_SPE_FP_DATA 6
#define BOOKE_IRQPRIO_SPE_FP_ROUND 7
+#endif
+#ifdef CONFIG_PPC_E500MC
+#define BOOKE_IRQPRIO_ALTIVEC_UNAVAIL 5
+#define BOOKE_IRQPRIO_ALTIVEC_ASSIST 6
+#endif
#define BOOKE_IRQPRIO_SYSCALL 8
#define BOOKE_IRQPRIO_AP_UNAVAIL 9
#define BOOKE_IRQPRIO_DTLB_MISS 10
@@ -116,40 +122,6 @@ extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
ulong *spr_val);
-/*
- * Load up guest vcpu FP state if it's needed.
- * It also set the MSR_FP in thread so that host know
- * we're holding FPU, and then host can help to save
- * guest vcpu FP state if other threads require to use FPU.
- * This simulates an FP unavailable fault.
- *
- * It requires to be called with preemption disabled.
- */
-static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_PPC_FPU
- if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) {
- enable_kernel_fp();
- load_fp_state(&vcpu->arch.fp);
- current->thread.fp_save_area = &vcpu->arch.fp;
- current->thread.regs->msr |= MSR_FP;
- }
-#endif
-}
-
-/*
- * Save guest vcpu FP state into thread.
- * It requires to be called with preemption disabled.
- */
-static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_PPC_FPU
- if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP))
- giveup_fpu(current);
- current->thread.fp_save_area = NULL;
-#endif
-}
-
static inline void kvmppc_clear_dbsr(void)
{
mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 28c158881d23..a82f64502de1 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -25,6 +25,7 @@
#define OP_19_XOP_RFI 50
#define OP_19_XOP_RFCI 51
+#define OP_19_XOP_RFDI 39
#define OP_31_XOP_MFMSR 83
#define OP_31_XOP_WRTEE 131
@@ -37,6 +38,12 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
}
+static void kvmppc_emul_rfdi(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.pc = vcpu->arch.dsrr0;
+ kvmppc_set_msr(vcpu, vcpu->arch.dsrr1);
+}
+
static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu)
{
vcpu->arch.pc = vcpu->arch.csrr0;
@@ -65,6 +72,12 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
*advance = 0;
break;
+ case OP_19_XOP_RFDI:
+ kvmppc_emul_rfdi(vcpu);
+ kvmppc_set_exit_type(vcpu, EMULATED_RFDI_EXITS);
+ *advance = 0;
+ break;
+
default:
emulated = EMULATE_FAIL;
break;
@@ -118,6 +131,7 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
int emulated = EMULATE_DONE;
+ bool debug_inst = false;
switch (sprn) {
case SPRN_DEAR:
@@ -132,14 +146,128 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
case SPRN_CSRR1:
vcpu->arch.csrr1 = spr_val;
break;
+ case SPRN_DSRR0:
+ vcpu->arch.dsrr0 = spr_val;
+ break;
+ case SPRN_DSRR1:
+ vcpu->arch.dsrr1 = spr_val;
+ break;
+ case SPRN_IAC1:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.iac1 = spr_val;
+ break;
+ case SPRN_IAC2:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.iac2 = spr_val;
+ break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ case SPRN_IAC3:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.iac3 = spr_val;
+ break;
+ case SPRN_IAC4:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.iac4 = spr_val;
+ break;
+#endif
+ case SPRN_DAC1:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.dac1 = spr_val;
+ break;
+ case SPRN_DAC2:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.dac2 = spr_val;
+ break;
case SPRN_DBCR0:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ spr_val &= (DBCR0_IDM | DBCR0_IC | DBCR0_BT | DBCR0_TIE |
+ DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 | DBCR0_IAC4 |
+ DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W);
+
vcpu->arch.dbg_reg.dbcr0 = spr_val;
break;
case SPRN_DBCR1:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
vcpu->arch.dbg_reg.dbcr1 = spr_val;
break;
+ case SPRN_DBCR2:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.dbcr2 = spr_val;
+ break;
case SPRN_DBSR:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
vcpu->arch.dbsr &= ~spr_val;
+ if (!(vcpu->arch.dbsr & ~DBSR_IDE))
+ kvmppc_core_dequeue_debug(vcpu);
break;
case SPRN_TSR:
kvmppc_clr_tsr_bits(vcpu, spr_val);
@@ -252,6 +380,10 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
emulated = EMULATE_FAIL;
}
+ if (debug_inst) {
+ current->thread.debug = vcpu->arch.dbg_reg;
+ switch_booke_debug_regs(&vcpu->arch.dbg_reg);
+ }
return emulated;
}
@@ -278,12 +410,43 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
case SPRN_CSRR1:
*spr_val = vcpu->arch.csrr1;
break;
+ case SPRN_DSRR0:
+ *spr_val = vcpu->arch.dsrr0;
+ break;
+ case SPRN_DSRR1:
+ *spr_val = vcpu->arch.dsrr1;
+ break;
+ case SPRN_IAC1:
+ *spr_val = vcpu->arch.dbg_reg.iac1;
+ break;
+ case SPRN_IAC2:
+ *spr_val = vcpu->arch.dbg_reg.iac2;
+ break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ case SPRN_IAC3:
+ *spr_val = vcpu->arch.dbg_reg.iac3;
+ break;
+ case SPRN_IAC4:
+ *spr_val = vcpu->arch.dbg_reg.iac4;
+ break;
+#endif
+ case SPRN_DAC1:
+ *spr_val = vcpu->arch.dbg_reg.dac1;
+ break;
+ case SPRN_DAC2:
+ *spr_val = vcpu->arch.dbg_reg.dac2;
+ break;
case SPRN_DBCR0:
*spr_val = vcpu->arch.dbg_reg.dbcr0;
+ if (vcpu->guest_debug)
+ *spr_val = *spr_val | DBCR0_EDM;
break;
case SPRN_DBCR1:
*spr_val = vcpu->arch.dbg_reg.dbcr1;
break;
+ case SPRN_DBCR2:
+ *spr_val = vcpu->arch.dbg_reg.dbcr2;
+ break;
case SPRN_DBSR:
*spr_val = vcpu->arch.dbsr;
break;
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index e9fa56a911fd..81bd8a07aa51 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -238,7 +238,7 @@ kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \
kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \
SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR)
kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \
- SPRN_SRR0, SPRN_SRR1,NEED_ESR
+ SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU)
kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \
SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \
@@ -256,11 +256,9 @@ kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \
SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \
SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, EX_PARAMS(GEN), \
+kvm_handler BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, EX_PARAMS(GEN), \
SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, EX_PARAMS(GEN), \
- SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, EX_PARAMS(GEN), \
+kvm_handler BOOKE_INTERRUPT_ALTIVEC_ASSIST, EX_PARAMS(GEN), \
SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \
SPRN_SRR0, SPRN_SRR1, 0
@@ -350,7 +348,7 @@ kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR
kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \
SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR)
-kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, NEED_ESR
+kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU)
kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
@@ -361,9 +359,6 @@ kvm_lvl_handler BOOKE_INTERRUPT_WATCHDOG, \
kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \
SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0
kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index a326178bdea5..72920bed3ac6 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -22,6 +22,7 @@
#include <linux/kvm_host.h>
#include <asm/mmu-book3e.h>
#include <asm/tlb.h>
+#include <asm/cputhreads.h>
enum vcpu_ftr {
VCPU_FTR_MMU_V2
@@ -289,6 +290,25 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500);
#define kvmppc_e500_get_tlb_stid(vcpu, gtlbe) get_tlb_tid(gtlbe)
#define get_tlbmiss_tid(vcpu) get_cur_pid(vcpu)
#define get_tlb_sts(gtlbe) (gtlbe->mas1 & MAS1_TS)
+
+/*
+ * These functions should be called with preemption disabled
+ * and the returned value is valid only in that context
+ */
+static inline int get_thread_specific_lpid(int vm_lpid)
+{
+ int vcpu_lpid = vm_lpid;
+
+ if (threads_per_core == 2)
+ vcpu_lpid |= smp_processor_id() & 1;
+
+ return vcpu_lpid;
+}
+
+static inline int get_lpid(struct kvm_vcpu *vcpu)
+{
+ return get_thread_specific_lpid(vcpu->kvm->arch.lpid);
+}
#else
unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu,
struct kvm_book3e_206_tlb_entry *gtlbe);
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index c99c40e9182a..ce7291c79f6c 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -259,6 +259,7 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va
break;
/* extra exceptions */
+#ifdef CONFIG_SPE_POSSIBLE
case SPRN_IVOR32:
vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val;
break;
@@ -268,6 +269,15 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va
case SPRN_IVOR34:
vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val;
break;
+#endif
+#ifdef CONFIG_ALTIVEC
+ case SPRN_IVOR32:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL] = spr_val;
+ break;
+ case SPRN_IVOR33:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST] = spr_val;
+ break;
+#endif
case SPRN_IVOR35:
vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val;
break;
@@ -381,6 +391,7 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v
break;
/* extra exceptions */
+#ifdef CONFIG_SPE_POSSIBLE
case SPRN_IVOR32:
*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
break;
@@ -390,6 +401,15 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v
case SPRN_IVOR34:
*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
break;
+#endif
+#ifdef CONFIG_ALTIVEC
+ case SPRN_IVOR32:
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL];
+ break;
+ case SPRN_IVOR33:
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST];
+ break;
+#endif
case SPRN_IVOR35:
*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
break;
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 08f14bb57897..769778f855b0 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -69,7 +69,8 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
* writing shadow tlb entry to host TLB
*/
static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
- uint32_t mas0)
+ uint32_t mas0,
+ uint32_t lpid)
{
unsigned long flags;
@@ -80,7 +81,7 @@ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32));
#ifdef CONFIG_KVM_BOOKE_HV
- mtspr(SPRN_MAS8, stlbe->mas8);
+ mtspr(SPRN_MAS8, MAS8_TGS | get_thread_specific_lpid(lpid));
#endif
asm volatile("isync; tlbwe" : : : "memory");
@@ -129,11 +130,12 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
if (tlbsel == 0) {
mas0 = get_host_mas0(stlbe->mas2);
- __write_host_tlbe(stlbe, mas0);
+ __write_host_tlbe(stlbe, mas0, vcpu_e500->vcpu.kvm->arch.lpid);
} else {
__write_host_tlbe(stlbe,
MAS0_TLBSEL(1) |
- MAS0_ESEL(to_htlb1_esel(sesel)));
+ MAS0_ESEL(to_htlb1_esel(sesel)),
+ vcpu_e500->vcpu.kvm->arch.lpid);
}
}
@@ -176,7 +178,7 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu)
MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
magic.mas8 = 0;
- __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
+ __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index), 0);
preempt_enable();
}
#endif
@@ -317,10 +319,6 @@ static void kvmppc_e500_setup_stlbe(
stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR);
stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
-
-#ifdef CONFIG_KVM_BOOKE_HV
- stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid;
-#endif
}
static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
@@ -633,7 +631,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
local_irq_save(flags);
mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | addr_space);
- mtspr(SPRN_MAS5, MAS5_SGS | vcpu->kvm->arch.lpid);
+ mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(vcpu));
asm volatile("tlbsx 0, %[geaddr]\n" : :
[geaddr] "r" (geaddr));
mtspr(SPRN_MAS5, 0);
@@ -732,7 +730,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
return 0;
}
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
{
/* XXX could be more clever ;) */
return 0;
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 164bad2a19bf..2fdc8722e324 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -48,10 +48,11 @@ void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type)
return;
}
-
- tag = PPC_DBELL_LPID(vcpu->kvm->arch.lpid) | vcpu->vcpu_id;
+ preempt_disable();
+ tag = PPC_DBELL_LPID(get_lpid(vcpu)) | vcpu->vcpu_id;
mb();
ppc_msgsnd(dbell_type, 0, tag);
+ preempt_enable();
}
/* gtlbe must not be mapped by more than one host tlb entry */
@@ -60,12 +61,11 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
{
unsigned int tid, ts;
gva_t eaddr;
- u32 val, lpid;
+ u32 val;
unsigned long flags;
ts = get_tlb_ts(gtlbe);
tid = get_tlb_tid(gtlbe);
- lpid = vcpu_e500->vcpu.kvm->arch.lpid;
/* We search the host TLB to invalidate its shadow TLB entry */
val = (tid << 16) | ts;
@@ -74,7 +74,7 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
local_irq_save(flags);
mtspr(SPRN_MAS6, val);
- mtspr(SPRN_MAS5, MAS5_SGS | lpid);
+ mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu));
asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr));
val = mfspr(SPRN_MAS1);
@@ -95,7 +95,7 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
unsigned long flags;
local_irq_save(flags);
- mtspr(SPRN_MAS5, MAS5_SGS | vcpu_e500->vcpu.kvm->arch.lpid);
+ mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu));
asm volatile("tlbilxlpid");
mtspr(SPRN_MAS5, 0);
local_irq_restore(flags);
@@ -110,6 +110,7 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
{
}
+/* We use two lpids per VM */
static DEFINE_PER_CPU(struct kvm_vcpu *[KVMPPC_NR_LPIDS], last_vcpu_of_lpid);
static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
@@ -118,10 +119,12 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
kvmppc_booke_vcpu_load(vcpu, cpu);
- mtspr(SPRN_LPID, vcpu->kvm->arch.lpid);
+ mtspr(SPRN_LPID, get_lpid(vcpu));
mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
mtspr(SPRN_GPIR, vcpu->vcpu_id);
mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp);
+ vcpu->arch.eplc = EPC_EGS | (get_lpid(vcpu) << EPC_ELPID_SHIFT);
+ vcpu->arch.epsc = vcpu->arch.eplc;
mtspr(SPRN_EPLC, vcpu->arch.eplc);
mtspr(SPRN_EPSC, vcpu->arch.epsc);
@@ -141,12 +144,10 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
mtspr(SPRN_GESR, vcpu->arch.shared->esr);
if (vcpu->arch.oldpir != mfspr(SPRN_PIR) ||
- __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] != vcpu) {
+ __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] != vcpu) {
kvmppc_e500_tlbil_all(vcpu_e500);
- __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] = vcpu;
+ __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] = vcpu;
}
-
- kvmppc_load_guest_fp(vcpu);
}
static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu)
@@ -179,6 +180,16 @@ int kvmppc_core_check_processor_compat(void)
r = 0;
else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
r = 0;
+#ifdef CONFIG_ALTIVEC
+ /*
+ * Since guests have the priviledge to enable AltiVec, we need AltiVec
+ * support in the host to save/restore their context.
+ * Don't use CPU_FTR_ALTIVEC to identify cores with AltiVec unit
+ * because it's cleared in the absence of CONFIG_ALTIVEC!
+ */
+ else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0)
+ r = 0;
+#endif
else
r = -ENOTSUPP;
@@ -194,9 +205,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
#ifdef CONFIG_64BIT
vcpu->arch.shadow_epcr |= SPRN_EPCR_ICM;
#endif
- vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_DEP | MSRP_PMMP;
- vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT);
- vcpu->arch.epsc = vcpu->arch.eplc;
+ vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_PMMP;
vcpu->arch.pvr = mfspr(SPRN_PVR);
vcpu_e500->svr = mfspr(SPRN_SVR);
@@ -356,13 +365,26 @@ static int kvmppc_core_init_vm_e500mc(struct kvm *kvm)
if (lpid < 0)
return lpid;
+ /*
+ * Use two lpids per VM on cores with two threads like e6500. Use
+ * even numbers to speedup vcpu lpid computation with consecutive lpids
+ * per VM. vm1 will use lpids 2 and 3, vm2 lpids 4 and 5, and so on.
+ */
+ if (threads_per_core == 2)
+ lpid <<= 1;
+
kvm->arch.lpid = lpid;
return 0;
}
static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm)
{
- kvmppc_free_lpid(kvm->arch.lpid);
+ int lpid = kvm->arch.lpid;
+
+ if (threads_per_core == 2)
+ lpid >>= 1;
+
+ kvmppc_free_lpid(lpid);
}
static struct kvmppc_ops kvm_ops_e500mc = {
@@ -390,7 +412,13 @@ static int __init kvmppc_e500mc_init(void)
if (r)
goto err_out;
- kvmppc_init_lpid(64);
+ /*
+ * Use two lpids per VM on dual threaded processors like e6500
+ * to workarround the lack of tlb write conditional instruction.
+ * Expose half the number of available hardware lpids to the lpid
+ * allocator.
+ */
+ kvmppc_init_lpid(KVMPPC_NR_LPIDS/threads_per_core);
kvmppc_claim_lpid(0); /* host */
r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index e96b50d0bdab..5cc2e7af3a7b 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -219,7 +219,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
/* this default type might be overwritten by subcategories */
kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
- emulated = kvmppc_get_last_inst(vcpu, false, &inst);
+ emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst);
if (emulated != EMULATE_DONE)
return emulated;
@@ -274,6 +274,21 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
break;
+ case 0:
+ /*
+ * Instruction with primary opcode 0. Based on PowerISA
+ * these are illegal instructions.
+ */
+ if (inst == KVMPPC_INST_SW_BREAKPOINT) {
+ run->exit_reason = KVM_EXIT_DEBUG;
+ run->debug.arch.address = kvmppc_get_pc(vcpu);
+ emulated = EMULATE_EXIT_USER;
+ advance = 0;
+ } else
+ emulated = EMULATE_FAIL;
+
+ break;
+
default:
emulated = EMULATE_FAIL;
}
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index 0de4ffa175a9..6d3c0ee1d744 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -58,7 +58,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
/* this default type might be overwritten by subcategories */
kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
- emulated = kvmppc_get_last_inst(vcpu, false, &inst);
+ emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst);
if (emulated != EMULATE_DONE)
return emulated;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index da505237a664..c1f8f53cd312 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -294,7 +294,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
u32 last_inst;
- kvmppc_get_last_inst(vcpu, false, &last_inst);
+ kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
/* XXX Deliver Program interrupt to guest. */
pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst);
r = RESUME_HOST;
@@ -638,7 +638,6 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
/* Make sure we're not using the vcpu anymore */
hrtimer_cancel(&vcpu->arch.dec_timer);
- tasklet_kill(&vcpu->arch.tasklet);
kvmppc_remove_vcpu_debugfs(vcpu);
@@ -664,16 +663,12 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return kvmppc_core_pending_dec(vcpu);
}
-/*
- * low level hrtimer wake routine. Because this runs in hardirq context
- * we schedule a tasklet to do the real work.
- */
enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer);
- tasklet_schedule(&vcpu->arch.tasklet);
+ kvmppc_decrementer_func(vcpu);
return HRTIMER_NORESTART;
}
@@ -683,7 +678,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
int ret;
hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
- tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
vcpu->arch.dec_expires = ~(u64)0;
@@ -907,6 +901,103 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(kvmppc_handle_store);
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r = 0;
+ union kvmppc_one_reg val;
+ int size;
+
+ size = one_reg_size(reg->id);
+ if (size > sizeof(val))
+ return -EINVAL;
+
+ r = kvmppc_get_one_reg(vcpu, reg->id, &val);
+ if (r == -EINVAL) {
+ r = 0;
+ switch (reg->id) {
+#ifdef CONFIG_ALTIVEC
+ case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+ break;
+ case KVM_REG_PPC_VSCR:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_VRSAVE:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ vcpu->arch.vrsave = set_reg_val(reg->id, val);
+ break;
+#endif /* CONFIG_ALTIVEC */
+ default:
+ r = -EINVAL;
+ break;
+ }
+ }
+
+ if (r)
+ return r;
+
+ if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
+ r = -EFAULT;
+
+ return r;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r;
+ union kvmppc_one_reg val;
+ int size;
+
+ size = one_reg_size(reg->id);
+ if (size > sizeof(val))
+ return -EINVAL;
+
+ if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
+ return -EFAULT;
+
+ r = kvmppc_set_one_reg(vcpu, reg->id, &val);
+ if (r == -EINVAL) {
+ r = 0;
+ switch (reg->id) {
+#ifdef CONFIG_ALTIVEC
+ case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
+ break;
+ case KVM_REG_PPC_VSCR:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+ break;
+ case KVM_REG_PPC_VRSAVE:
+ val = get_reg_val(reg->id, vcpu->arch.vrsave);
+ break;
+#endif /* CONFIG_ALTIVEC */
+ default:
+ r = -EINVAL;
+ break;
+ }
+ }
+
+ return r;
+}
+
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
int r;
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index e8bc40869cbd..7d9ee3d8c618 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -303,9 +303,13 @@ config PPC_ICSWX_USE_SIGILL
If in doubt, say N here.
+config SPE_POSSIBLE
+ def_bool y
+ depends on E200 || (E500 && !PPC_E500MC)
+
config SPE
bool "SPE Support"
- depends on E200 || (E500 && !PPC_E500MC)
+ depends on SPE_POSSIBLE
default y
---help---
This option enables kernel support for the Signal Processing
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bb9b258d60e7..2075e6c34c78 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -202,6 +202,7 @@
#define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */
#define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */
#define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */
+#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 028df8dc538e..7d603a71ab3a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -574,7 +574,7 @@ struct kvm_arch {
struct kvm_apic_map *apic_map;
unsigned int tss_addr;
- struct page *apic_access_page;
+ bool apic_access_page_done;
gpa_t wall_clock;
@@ -736,6 +736,7 @@ struct kvm_x86_ops {
void (*hwapic_isr_update)(struct kvm *kvm, int isr);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
+ void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
@@ -914,7 +915,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu);
int fx_init(struct kvm_vcpu *vcpu);
-void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *new, int bytes);
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
@@ -1036,7 +1036,7 @@ asmlinkage void kvm_spurious_fault(void);
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
-int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
@@ -1045,6 +1045,9 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
+void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
+void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+ unsigned long address);
void kvm_define_shared_msr(unsigned index, u32 msr);
void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c7678e43465b..e62cf897f781 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -2,6 +2,7 @@
#define _ASM_X86_KVM_PARA_H
#include <asm/processor.h>
+#include <asm/alternative.h>
#include <uapi/asm/kvm_para.h>
extern void kvmclock_init(void);
@@ -16,10 +17,15 @@ static inline bool kvm_check_and_clear_guest_paused(void)
}
#endif /* CONFIG_KVM_GUEST */
-/* This instruction is vmcall. On non-VT architectures, it will generate a
- * trap that we will then rewrite to the appropriate instruction.
+#ifdef CONFIG_DEBUG_RODATA
+#define KVM_HYPERCALL \
+ ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL)
+#else
+/* On AMD processors, vmcall will generate a trap that we will
+ * then rewrite to the appropriate instruction.
*/
#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
+#endif
/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
* instruction. The hypervisor may replace it with something else but only the
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 60e5497681f5..813d29d00a17 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -525,6 +525,13 @@ static void early_init_amd(struct cpuinfo_x86 *c)
}
#endif
+ /*
+ * This is only needed to tell the kernel whether to use VMCALL
+ * and VMMCALL. VMMCALL is never executed except under virt, so
+ * we can set it unconditionally.
+ */
+ set_cpu_cap(c, X86_FEATURE_VMMCALL);
+
/* F16h erratum 793, CVE-2013-6885 */
if (c->x86 == 0x16 && c->x86_model <= 0xf)
msr_set_bit(MSR_AMD64_LS_CFG, 15);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index f4bad87ef256..976e3a57f9ea 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -53,14 +53,14 @@ u64 kvm_supported_xcr0(void)
return xcr0;
}
-void kvm_update_cpuid(struct kvm_vcpu *vcpu)
+int kvm_update_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
struct kvm_lapic *apic = vcpu->arch.apic;
best = kvm_find_cpuid_entry(vcpu, 1, 0);
if (!best)
- return;
+ return 0;
/* Update OSXSAVE bit */
if (cpu_has_xsave && best->function == 0x1) {
@@ -88,7 +88,17 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu)
xstate_required_size(vcpu->arch.xcr0);
}
+ /*
+ * The existing code assumes virtual address is 48-bit in the canonical
+ * address checks; exit if it is ever changed.
+ */
+ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+ if (best && ((best->eax & 0xff00) >> 8) != 48 &&
+ ((best->eax & 0xff00) >> 8) != 0)
+ return -EINVAL;
+
kvm_pmu_cpuid_update(vcpu);
+ return 0;
}
static int is_efer_nx(void)
@@ -151,10 +161,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
}
vcpu->arch.cpuid_nent = cpuid->nent;
cpuid_fix_nx_cap(vcpu);
- r = 0;
kvm_apic_set_version(vcpu);
kvm_x86_ops->cpuid_update(vcpu);
- kvm_update_cpuid(vcpu);
+ r = kvm_update_cpuid(vcpu);
out_free:
vfree(cpuid_entries);
@@ -178,9 +187,7 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
vcpu->arch.cpuid_nent = cpuid->nent;
kvm_apic_set_version(vcpu);
kvm_x86_ops->cpuid_update(vcpu);
- kvm_update_cpuid(vcpu);
- return 0;
-
+ r = kvm_update_cpuid(vcpu);
out:
return r;
}
@@ -767,6 +774,12 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
if (!best)
best = check_cpuid_limit(vcpu, function, index);
+ /*
+ * Perfmon not yet supported for L2 guest.
+ */
+ if (is_guest_mode(vcpu) && function == 0xa)
+ best = NULL;
+
if (best) {
*eax = best->eax;
*ebx = best->ebx;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 43b33e301e68..4452eedfaedd 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -3,7 +3,7 @@
#include "x86.h"
-void kvm_update_cpuid(struct kvm_vcpu *vcpu);
+int kvm_update_cpuid(struct kvm_vcpu *vcpu);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index);
int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 20d91873d831..a46207a05835 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1504,6 +1504,15 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
if (rpl > cpl || dpl != cpl)
goto exception;
}
+ /* in long-mode d/b must be clear if l is set */
+ if (seg_desc.d && seg_desc.l) {
+ u64 efer = 0;
+
+ ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+ if (efer & EFER_LMA)
+ goto exception;
+ }
+
/* CS(RPL) <- CPL */
selector = (selector & 0xfffc) | cpl;
break;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 76398fe15df2..3201e93ebd07 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1174,7 +1174,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
* Write-protect on the specified @sptep, @pt_protect indicates whether
* spte write-protection is caused by protecting shadow page table.
*
- * Note: write protection is difference between drity logging and spte
+ * Note: write protection is difference between dirty logging and spte
* protection:
* - for dirty logging, the spte can be set to writable at anytime if
* its dirty bitmap is properly set.
@@ -1262,7 +1262,8 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
}
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
- struct kvm_memory_slot *slot, unsigned long data)
+ struct kvm_memory_slot *slot, gfn_t gfn, int level,
+ unsigned long data)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1270,7 +1271,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
while ((sptep = rmap_get_first(*rmapp, &iter))) {
BUG_ON(!(*sptep & PT_PRESENT_MASK));
- rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep);
+ rmap_printk("kvm_rmap_unmap_hva: spte %p %llx gfn %llx (%d)\n",
+ sptep, *sptep, gfn, level);
drop_spte(kvm, sptep);
need_tlb_flush = 1;
@@ -1280,7 +1282,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
}
static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
- struct kvm_memory_slot *slot, unsigned long data)
+ struct kvm_memory_slot *slot, gfn_t gfn, int level,
+ unsigned long data)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1294,7 +1297,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
BUG_ON(!is_shadow_present_pte(*sptep));
- rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep);
+ rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n",
+ sptep, *sptep, gfn, level);
need_flush = 1;
@@ -1328,6 +1332,8 @@ static int kvm_handle_hva_range(struct kvm *kvm,
int (*handler)(struct kvm *kvm,
unsigned long *rmapp,
struct kvm_memory_slot *slot,
+ gfn_t gfn,
+ int level,
unsigned long data))
{
int j;
@@ -1357,6 +1363,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
unsigned long idx, idx_end;
unsigned long *rmapp;
+ gfn_t gfn = gfn_start;
/*
* {idx(page_j) | page_j intersects with
@@ -1367,8 +1374,10 @@ static int kvm_handle_hva_range(struct kvm *kvm,
rmapp = __gfn_to_rmap(gfn_start, j, memslot);
- for (; idx <= idx_end; ++idx)
- ret |= handler(kvm, rmapp++, memslot, data);
+ for (; idx <= idx_end;
+ ++idx, gfn += (1UL << KVM_HPAGE_GFN_SHIFT(j)))
+ ret |= handler(kvm, rmapp++, memslot,
+ gfn, j, data);
}
}
@@ -1379,6 +1388,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
unsigned long data,
int (*handler)(struct kvm *kvm, unsigned long *rmapp,
struct kvm_memory_slot *slot,
+ gfn_t gfn, int level,
unsigned long data))
{
return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
@@ -1400,24 +1410,14 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
}
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
- struct kvm_memory_slot *slot, unsigned long data)
+ struct kvm_memory_slot *slot, gfn_t gfn, int level,
+ unsigned long data)
{
u64 *sptep;
struct rmap_iterator uninitialized_var(iter);
int young = 0;
- /*
- * In case of absence of EPT Access and Dirty Bits supports,
- * emulate the accessed bit for EPT, by checking if this page has
- * an EPT mapping, and clearing it if it does. On the next access,
- * a new EPT mapping will be established.
- * This has some overhead, but not as much as the cost of swapping
- * out actively used pages or breaking up actively used hugepages.
- */
- if (!shadow_accessed_mask) {
- young = kvm_unmap_rmapp(kvm, rmapp, slot, data);
- goto out;
- }
+ BUG_ON(!shadow_accessed_mask);
for (sptep = rmap_get_first(*rmapp, &iter); sptep;
sptep = rmap_get_next(&iter)) {
@@ -1429,14 +1429,13 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
(unsigned long *)sptep);
}
}
-out:
- /* @data has hva passed to kvm_age_hva(). */
- trace_kvm_age_page(data, slot, young);
+ trace_kvm_age_page(gfn, level, slot, young);
return young;
}
static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
- struct kvm_memory_slot *slot, unsigned long data)
+ struct kvm_memory_slot *slot, gfn_t gfn,
+ int level, unsigned long data)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1474,13 +1473,33 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
- kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0);
+ kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0);
kvm_flush_remote_tlbs(vcpu->kvm);
}
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
{
- return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp);
+ /*
+ * In case of absence of EPT Access and Dirty Bits supports,
+ * emulate the accessed bit for EPT, by checking if this page has
+ * an EPT mapping, and clearing it if it does. On the next access,
+ * a new EPT mapping will be established.
+ * This has some overhead, but not as much as the cost of swapping
+ * out actively used pages or breaking up actively used hugepages.
+ */
+ if (!shadow_accessed_mask) {
+ /*
+ * We are holding the kvm->mmu_lock, and we are blowing up
+ * shadow PTEs. MMU notifier consumers need to be kept at bay.
+ * This is correct as long as we don't decouple the mmu_lock
+ * protected regions (like invalidate_range_start|end does).
+ */
+ kvm->mmu_notifier_seq++;
+ return kvm_handle_hva_range(kvm, start, end, 0,
+ kvm_unmap_rmapp);
+ }
+
+ return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
}
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
@@ -1743,7 +1762,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
return 1;
}
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
return 0;
}
@@ -1796,7 +1815,7 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
if (flush)
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
struct mmu_page_path {
@@ -2530,7 +2549,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
true, host_writable)) {
if (write_fault)
*emulate = 1;
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
if (unlikely(is_mmio_spte(*sptep) && emulate))
@@ -3444,13 +3463,6 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
context->nx = false;
}
-void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
-{
- ++vcpu->stat.tlb_flush;
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb);
-
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
{
mmu_free_roots(vcpu);
@@ -3965,7 +3977,7 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page,
if (remote_flush)
kvm_flush_remote_tlbs(vcpu->kvm);
else if (local_flush)
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
@@ -4226,7 +4238,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
vcpu->arch.mmu.invlpg(vcpu, gva);
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
++vcpu->stat.invlpg;
}
EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 0ab6c65a2821..806d58e3c320 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -298,8 +298,7 @@ retry_walk:
}
#endif
walker->max_level = walker->level;
- ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
- (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
+ ASSERT(!is_long_mode(vcpu) && is_pae(vcpu));
accessed_dirty = PT_GUEST_ACCESSED_MASK;
pt_access = pte_access = ACC_ALL;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6ffd643d1a64..04fa1b8298c8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2626,6 +2626,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_CR_PAT:
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+ if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+ return 1;
vmcs_write64(GUEST_IA32_PAT, data);
vcpu->arch.pat = data;
break;
@@ -3132,9 +3134,17 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_unrestricted_guest())
enable_unrestricted_guest = 0;
- if (!cpu_has_vmx_flexpriority())
+ if (!cpu_has_vmx_flexpriority()) {
flexpriority_enabled = 0;
+ /*
+ * set_apic_access_page_addr() is used to reload apic access
+ * page upon invalidation. No need to do anything if the
+ * processor does not have the APIC_ACCESS_ADDR VMCS field.
+ */
+ kvm_x86_ops->set_apic_access_page_addr = NULL;
+ }
+
if (!cpu_has_vmx_tpr_shadow())
kvm_x86_ops->update_cr8_intercept = NULL;
@@ -3930,7 +3940,7 @@ static int init_rmode_tss(struct kvm *kvm)
{
gfn_t fn;
u16 data = 0;
- int r, idx, ret = 0;
+ int idx, r;
idx = srcu_read_lock(&kvm->srcu);
fn = kvm->arch.tss_addr >> PAGE_SHIFT;
@@ -3952,13 +3962,9 @@ static int init_rmode_tss(struct kvm *kvm)
r = kvm_write_guest_page(kvm, fn, &data,
RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
sizeof(u8));
- if (r < 0)
- goto out;
-
- ret = 1;
out:
srcu_read_unlock(&kvm->srcu, idx);
- return ret;
+ return r;
}
static int init_rmode_identity_map(struct kvm *kvm)
@@ -4027,7 +4033,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
int r = 0;
mutex_lock(&kvm->slots_lock);
- if (kvm->arch.apic_access_page)
+ if (kvm->arch.apic_access_page_done)
goto out;
kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
kvm_userspace_mem.flags = 0;
@@ -4043,7 +4049,12 @@ static int alloc_apic_access_page(struct kvm *kvm)
goto out;
}
- kvm->arch.apic_access_page = page;
+ /*
+ * Do not pin the page in memory, so that memory hot-unplug
+ * is able to migrate it.
+ */
+ put_page(page);
+ kvm->arch.apic_access_page_done = true;
out:
mutex_unlock(&kvm->slots_lock);
return r;
@@ -4559,9 +4570,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmcs_write32(TPR_THRESHOLD, 0);
}
- if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
- vmcs_write64(APIC_ACCESS_ADDR,
- page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
+ kvm_vcpu_reload_apic_access_page(vcpu);
if (vmx_vm_has_apicv(vcpu->kvm))
memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
@@ -4751,10 +4760,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
if (ret)
return ret;
kvm->arch.tss_addr = addr;
- if (!init_rmode_tss(kvm))
- return -ENOMEM;
-
- return 0;
+ return init_rmode_tss(kvm);
}
static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
@@ -6718,7 +6724,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
switch (type) {
case VMX_EPT_EXTENT_GLOBAL:
kvm_mmu_sync_roots(vcpu);
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
nested_vmx_succeed(vcpu);
break;
default:
@@ -6993,6 +6999,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_TASK_SWITCH:
return 1;
case EXIT_REASON_CPUID:
+ if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa)
+ return 0;
return 1;
case EXIT_REASON_HLT:
return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
@@ -7201,6 +7209,29 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
vmx_set_msr_bitmap(vcpu);
}
+static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ /*
+ * Currently we do not handle the nested case where L2 has an
+ * APIC access page of its own; that page is still pinned.
+ * Hence, we skip the case where the VCPU is in guest mode _and_
+ * L1 prepared an APIC access page for L2.
+ *
+ * For the case where L1 and L2 share the same APIC access page
+ * (flexpriority=Y but SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES clear
+ * in the vmcs12), this function will only update either the vmcs01
+ * or the vmcs02. If the former, the vmcs02 will be updated by
+ * prepare_vmcs02. If the latter, the vmcs01 will be updated in
+ * the next L2->L1 exit.
+ */
+ if (!is_guest_mode(vcpu) ||
+ !nested_cpu_has2(vmx->nested.current_vmcs12,
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+ vmcs_write64(APIC_ACCESS_ADDR, hpa);
+}
+
static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
{
u16 status;
@@ -8008,7 +8039,7 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
/*
* prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
* L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
- * with L0's requirements for its guest (a.k.a. vmsc01), so we can run the L2
+ * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
* guest in a way that will both be appropriate to L1's requests, and our
* needs. In addition to modifying the active vmcs (which is vmcs02), this
* function also has additional necessary side-effects, like setting various
@@ -8143,8 +8174,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
} else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) {
exec_control |=
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
- vmcs_write64(APIC_ACCESS_ADDR,
- page_to_phys(vcpu->kvm->arch.apic_access_page));
+ kvm_vcpu_reload_apic_access_page(vcpu);
}
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
@@ -8953,6 +8983,12 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
}
/*
+ * We are now running in L2, mmu_notifier will force to reload the
+ * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1.
+ */
+ kvm_vcpu_reload_apic_access_page(vcpu);
+
+ /*
* Exiting from L2 to L1, we're now back to L1 which thinks it just
* finished a VMLAUNCH or VMRESUME instruction, so we need to set the
* success or failure flag accordingly.
@@ -9077,6 +9113,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.enable_irq_window = enable_irq_window,
.update_cr8_intercept = update_cr8_intercept,
.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
+ .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
.vm_has_apicv = vmx_vm_has_apicv,
.load_eoi_exitmap = vmx_load_eoi_exitmap,
.hwapic_irr_update = vmx_hwapic_irr_update,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2d7f65daa8d0..6857257f3810 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -729,7 +729,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
kvm_mmu_sync_roots(vcpu);
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
return 0;
}
@@ -1726,7 +1726,7 @@ static bool valid_mtrr_type(unsigned t)
return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
}
-static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
int i;
u64 mask;
@@ -1769,12 +1769,13 @@ static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
return true;
}
+EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
- if (!mtrr_valid(vcpu, msr, data))
+ if (!kvm_mtrr_valid(vcpu, msr, data))
return 1;
if (msr == MSR_MTRRdefType) {
@@ -1825,7 +1826,7 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
break;
default:
if (msr >= MSR_IA32_MC0_CTL &&
- msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+ msr < MSR_IA32_MCx_CTL(bank_num)) {
u32 offset = msr - MSR_IA32_MC0_CTL;
/* only 0 or all 1s can be written to IA32_MCi_CTL
* some Linux kernels though clear bit 10 in bank 4 to
@@ -2184,7 +2185,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
- case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+ case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
return set_msr_mce(vcpu, msr, data);
/* Performance counters are not protected by a CPUID bit,
@@ -2350,7 +2351,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
break;
default:
if (msr >= MSR_IA32_MC0_CTL &&
- msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+ msr < MSR_IA32_MCx_CTL(bank_num)) {
u32 offset = msr - MSR_IA32_MC0_CTL;
data = vcpu->arch.mce_banks[offset];
break;
@@ -2531,7 +2532,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_IA32_MCG_CAP:
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
- case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+ case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
return get_msr_mce(vcpu, msr, pdata);
case MSR_K7_CLK_CTL:
/*
@@ -5000,7 +5001,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
++vcpu->stat.insn_emulation_fail;
trace_kvm_emulate_insn_failed(vcpu);
- if (!is_guest_mode(vcpu)) {
+ if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
@@ -6019,6 +6020,41 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
kvm_apic_update_tmr(vcpu, tmr);
}
+static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
+{
+ ++vcpu->stat.tlb_flush;
+ kvm_x86_ops->tlb_flush(vcpu);
+}
+
+void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
+{
+ struct page *page = NULL;
+
+ if (!kvm_x86_ops->set_apic_access_page_addr)
+ return;
+
+ page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+ kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
+
+ /*
+ * Do not pin apic access page in memory, the MMU notifier
+ * will call us again if it is migrated or swapped out.
+ */
+ put_page(page);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
+
+void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+ unsigned long address)
+{
+ /*
+ * The physical address of apic access page is stored in the VMCS.
+ * Update it when it becomes invalid.
+ */
+ if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT))
+ kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+}
+
/*
* Returns 1 to let __vcpu_run() continue the guest execution loop without
* exiting to the userspace. Otherwise, the value will be returned to the
@@ -6048,7 +6084,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
kvm_mmu_sync_roots(vcpu);
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
- kvm_x86_ops->tlb_flush(vcpu);
+ kvm_vcpu_flush_tlb(vcpu);
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
r = 0;
@@ -6079,6 +6115,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_deliver_pmi(vcpu);
if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
vcpu_scan_ioapic(vcpu);
+ if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
+ kvm_vcpu_reload_apic_access_page(vcpu);
}
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -7271,8 +7309,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
kvm_free_vcpus(kvm);
- if (kvm->arch.apic_access_page)
- put_page(kvm->arch.apic_access_page);
kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
}
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 985fb2c006fa..7cb9c45a5fe0 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -159,6 +159,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
gva_t addr, void *val, unsigned int bytes,
struct x86_exception *exception);
+bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+
#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
| XSTATE_BNDREGS | XSTATE_BNDCSR)
extern u64 host_xcr0;
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 5f578e850fc5..90d734bbf467 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -402,9 +402,11 @@ static void __mn_flush_page(struct mmu_notifier *mn,
static int mn_clear_flush_young(struct mmu_notifier *mn,
struct mm_struct *mm,
- unsigned long address)
+ unsigned long start,
+ unsigned long end)
{
- __mn_flush_page(mn, address);
+ for (; start < end; start += PAGE_SIZE)
+ __mn_flush_page(mn, start);
return 0;
}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bbd8d57b04e0..28be31f49250 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -136,6 +136,7 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQ_GLOBAL_CLOCK_UPDATE 22
#define KVM_REQ_ENABLE_IBS 23
#define KVM_REQ_DISABLE_IBS 24
+#define KVM_REQ_APIC_PAGE_RELOAD 25
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
@@ -198,6 +199,17 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
#endif
+/*
+ * Carry out a gup that requires IO. Allow the mm to relinquish the mmap
+ * semaphore if the filemap/swap has to wait on a page lock. pagep == NULL
+ * controls whether we retry the gup one more time to completion in that case.
+ * Typically this is called after a FAULT_FLAG_RETRY_NOWAIT in the main tdp
+ * handler.
+ */
+int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long addr, bool write_fault,
+ struct page **pagep);
+
enum {
OUTSIDE_GUEST_MODE,
IN_GUEST_MODE,
@@ -577,6 +589,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm);
void kvm_reload_remote_mmus(struct kvm *kvm);
void kvm_make_mclock_inprogress_request(struct kvm *kvm);
void kvm_make_scan_ioapic_request(struct kvm *kvm);
+bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8981cc882ed2..0f4196a0bc20 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1985,6 +1985,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */
#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */
+#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */
typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
void *data);
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 27288692241e..88787bb4b3b9 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -57,10 +57,13 @@ struct mmu_notifier_ops {
* pte. This way the VM will provide proper aging to the
* accesses to the page through the secondary MMUs and not
* only to the ones through the Linux pte.
+ * Start-end is necessary in case the secondary MMU is mapping the page
+ * at a smaller granularity than the primary MMU.
*/
int (*clear_flush_young)(struct mmu_notifier *mn,
struct mm_struct *mm,
- unsigned long address);
+ unsigned long start,
+ unsigned long end);
/*
* test_young is called to check the young/accessed bitflag in
@@ -175,7 +178,8 @@ extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
extern void __mmu_notifier_release(struct mm_struct *mm);
extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
- unsigned long address);
+ unsigned long start,
+ unsigned long end);
extern int __mmu_notifier_test_young(struct mm_struct *mm,
unsigned long address);
extern void __mmu_notifier_change_pte(struct mm_struct *mm,
@@ -194,10 +198,11 @@ static inline void mmu_notifier_release(struct mm_struct *mm)
}
static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
- unsigned long address)
+ unsigned long start,
+ unsigned long end)
{
if (mm_has_notifiers(mm))
- return __mmu_notifier_clear_flush_young(mm, address);
+ return __mmu_notifier_clear_flush_young(mm, start, end);
return 0;
}
@@ -255,7 +260,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
unsigned long ___address = __address; \
__young = ptep_clear_flush_young(___vma, ___address, __ptep); \
__young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \
- ___address); \
+ ___address, \
+ ___address + \
+ PAGE_SIZE); \
__young; \
})
@@ -266,7 +273,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
unsigned long ___address = __address; \
__young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \
__young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \
- ___address); \
+ ___address, \
+ ___address + \
+ PMD_SIZE); \
__young; \
})
@@ -301,7 +310,8 @@ static inline void mmu_notifier_release(struct mm_struct *mm)
}
static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
- unsigned long address)
+ unsigned long start,
+ unsigned long end)
{
return 0;
}
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index ab679c395042..6edf1f2028cd 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -225,24 +225,26 @@ TRACE_EVENT(kvm_fpu,
);
TRACE_EVENT(kvm_age_page,
- TP_PROTO(ulong hva, struct kvm_memory_slot *slot, int ref),
- TP_ARGS(hva, slot, ref),
+ TP_PROTO(ulong gfn, int level, struct kvm_memory_slot *slot, int ref),
+ TP_ARGS(gfn, level, slot, ref),
TP_STRUCT__entry(
__field( u64, hva )
__field( u64, gfn )
+ __field( u8, level )
__field( u8, referenced )
),
TP_fast_assign(
- __entry->hva = hva;
- __entry->gfn =
- slot->base_gfn + ((hva - slot->userspace_addr) >> PAGE_SHIFT);
+ __entry->gfn = gfn;
+ __entry->level = level;
+ __entry->hva = ((gfn - slot->base_gfn) <<
+ PAGE_SHIFT) + slot->userspace_addr;
__entry->referenced = ref;
),
- TP_printk("hva %llx gfn %llx %s",
- __entry->hva, __entry->gfn,
+ TP_printk("hva %llx gfn %llx level %u %s",
+ __entry->hva, __entry->gfn, __entry->level,
__entry->referenced ? "YOUNG" : "OLD")
);
diff --git a/mm/gup.c b/mm/gup.c
index 91d044b1600d..af7ea3e0826b 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -281,6 +281,10 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
fault_flags |= FAULT_FLAG_ALLOW_RETRY;
if (*flags & FOLL_NOWAIT)
fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
+ if (*flags & FOLL_TRIED) {
+ VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_ALLOW_RETRY);
+ fault_flags |= FAULT_FLAG_TRIED;
+ }
ret = handle_mm_fault(mm, vma, address, fault_flags);
if (ret & VM_FAULT_ERROR) {
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 950813b1eb36..2c8da9825fe3 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -107,7 +107,8 @@ void __mmu_notifier_release(struct mm_struct *mm)
* existed or not.
*/
int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
- unsigned long address)
+ unsigned long start,
+ unsigned long end)
{
struct mmu_notifier *mn;
int young = 0, id;
@@ -115,7 +116,7 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
if (mn->ops->clear_flush_young)
- young |= mn->ops->clear_flush_young(mn, mm, address);
+ young |= mn->ops->clear_flush_young(mn, mm, start, end);
}
srcu_read_unlock(&srcu, id);
diff --git a/mm/rmap.c b/mm/rmap.c
index 3e8491c504f8..bc74e0012809 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1355,7 +1355,11 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
continue; /* don't unmap */
}
- if (ptep_clear_flush_young_notify(vma, address, pte))
+ /*
+ * No need for _notify because we're within an
+ * mmu_notifier_invalidate_range_ {start|end} scope.
+ */
+ if (ptep_clear_flush_young(vma, address, pte))
continue;
/* Nuke the page table entry. */
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index d6a3d0993d88..5ff7f7f2689a 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -80,9 +80,7 @@ static void async_pf_execute(struct work_struct *work)
might_sleep();
- down_read(&mm->mmap_sem);
- get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL);
- up_read(&mm->mmap_sem);
+ kvm_get_user_page_io(NULL, mm, addr, 1, NULL);
kvm_async_page_present_sync(vcpu, apf);
spin_lock(&vcpu->async_pf.lock);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 0c712a779b44..b0fb390943c6 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -36,6 +36,9 @@
#include <linux/seqlock.h>
#include <trace/events/kvm.h>
+#ifdef __KVM_HAVE_IOAPIC
+#include "ioapic.h"
+#endif
#include "iodev.h"
#ifdef CONFIG_HAVE_KVM_IRQFD
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 278232025129..39a02fbdb572 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -52,11 +52,13 @@
#include <asm/processor.h>
#include <asm/io.h>
+#include <asm/ioctl.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include "coalesced_mmio.h"
#include "async_pf.h"
+#include "vfio.h"
#define CREATE_TRACE_POINTS
#include <trace/events/kvm.h>
@@ -151,7 +153,7 @@ static void ack_flush(void *_completed)
{
}
-static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
+bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
{
int i, cpu, me;
cpumask_var_t cpus;
@@ -188,7 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
long dirty_count = kvm->tlbs_dirty;
smp_mb();
- if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
+ if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
++kvm->stat.remote_tlb_flush;
cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
}
@@ -196,17 +198,17 @@ EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
void kvm_reload_remote_mmus(struct kvm *kvm)
{
- make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+ kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
}
void kvm_make_mclock_inprogress_request(struct kvm *kvm)
{
- make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
+ kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
}
void kvm_make_scan_ioapic_request(struct kvm *kvm)
{
- make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
+ kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
}
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
@@ -294,6 +296,9 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
kvm_flush_remote_tlbs(kvm);
spin_unlock(&kvm->mmu_lock);
+
+ kvm_arch_mmu_notifier_invalidate_page(kvm, address);
+
srcu_read_unlock(&kvm->srcu, idx);
}
@@ -367,7 +372,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
struct mm_struct *mm,
- unsigned long address)
+ unsigned long start,
+ unsigned long end)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int young, idx;
@@ -375,7 +381,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
- young = kvm_age_hva(kvm, address);
+ young = kvm_age_hva(kvm, start, end);
if (young)
kvm_flush_remote_tlbs(kvm);
@@ -1128,6 +1134,43 @@ static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL);
}
+int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long addr, bool write_fault,
+ struct page **pagep)
+{
+ int npages;
+ int locked = 1;
+ int flags = FOLL_TOUCH | FOLL_HWPOISON |
+ (pagep ? FOLL_GET : 0) |
+ (write_fault ? FOLL_WRITE : 0);
+
+ /*
+ * If retrying the fault, we get here *not* having allowed the filemap
+ * to wait on the page lock. We should now allow waiting on the IO with
+ * the mmap semaphore released.
+ */
+ down_read(&mm->mmap_sem);
+ npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL,
+ &locked);
+ if (!locked) {
+ VM_BUG_ON(npages);
+
+ if (!pagep)
+ return 0;
+
+ /*
+ * The previous call has now waited on the IO. Now we can
+ * retry and complete. Pass TRIED to ensure we do not re
+ * schedule async IO (see e.g. filemap_fault).
+ */
+ down_read(&mm->mmap_sem);
+ npages = __get_user_pages(tsk, mm, addr, 1, flags | FOLL_TRIED,
+ pagep, NULL, NULL);
+ }
+ up_read(&mm->mmap_sem);
+ return npages;
+}
+
static inline int check_user_page_hwpoison(unsigned long addr)
{
int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE;
@@ -1190,9 +1233,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
npages = get_user_page_nowait(current, current->mm,
addr, write_fault, page);
up_read(&current->mm->mmap_sem);
- } else
- npages = get_user_pages_fast(addr, 1, write_fault,
- page);
+ } else {
+ /*
+ * By now we have tried gup_fast, and possibly async_pf, and we
+ * are certainly not atomic. Time to retry the gup, allowing
+ * mmap semaphore to be relinquished in the case of IO.
+ */
+ npages = kvm_get_user_page_io(current, current->mm, addr,
+ write_fault, page);
+ }
if (npages != 1)
return npages;
@@ -1995,6 +2044,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
if (vcpu->kvm->mm != current->mm)
return -EIO;
+ if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
+ return -EINVAL;
+
#if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
/*
* Special cases: vcpu ioctls that are asynchronous to vcpu execution,
@@ -3233,6 +3285,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
goto out_undebugfs;
}
+ r = kvm_vfio_ops_init();
+ WARN_ON(r);
+
return 0;
out_undebugfs:
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index bb11b36ee8a2..281e7cf2b8e5 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/vfio.h>
+#include "vfio.h"
struct kvm_vfio_group {
struct list_head node;
@@ -278,8 +279,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
return 0;
}
-static int __init kvm_vfio_ops_init(void)
+int kvm_vfio_ops_init(void)
{
return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
}
-module_init(kvm_vfio_ops_init);
diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h
new file mode 100644
index 000000000000..92eac75d6b62
--- /dev/null
+++ b/virt/kvm/vfio.h
@@ -0,0 +1,13 @@
+#ifndef __KVM_VFIO_H
+#define __KVM_VFIO_H
+
+#ifdef CONFIG_KVM_VFIO
+int kvm_vfio_ops_init(void);
+#else
+static inline int kvm_vfio_ops_init(void)
+{
+ return 0;
+}
+#endif
+
+#endif