summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/include/asm/kvm_host.h28
-rw-r--r--arch/arm/include/uapi/asm/kvm.h8
-rw-r--r--arch/arm/kvm/guest.c51
-rw-r--r--arch/arm/kvm/handle_exit.c1
-rw-r--r--arch/arm/kvm/hyp/switch.c2
-rw-r--r--arch/arm/kvm/init.S5
-rw-r--r--arch/arm/kvm/reset.c16
-rw-r--r--arch/arm64/Kconfig11
-rw-r--r--arch/arm64/include/asm/arch_gicv3.h2
-rw-r--r--arch/arm64/include/asm/cpucaps.h3
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/esr.h24
-rw-r--r--arch/arm64/include/asm/kvm_host.h6
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h1
-rw-r--r--arch/arm64/include/asm/sysreg.h23
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h3
-rw-r--r--arch/arm64/kernel/cpu_errata.c21
-rw-r--r--arch/arm64/kvm/guest.c9
-rw-r--r--arch/arm64/kvm/handle_exit.c1
-rw-r--r--arch/arm64/kvm/hyp-init.S11
-rw-r--r--arch/arm64/kvm/hyp/switch.c15
-rw-r--r--arch/arm64/kvm/reset.c16
-rw-r--r--arch/arm64/kvm/sys_regs.c27
-rw-r--r--arch/arm64/kvm/vgic-sys-reg-v3.c55
-rw-r--r--arch/mips/kvm/trap_emul.c2
-rw-r--r--arch/mips/kvm/vz.c2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h4
-rw-r--r--arch/powerpc/kvm/book3s_xive.c4
-rw-r--r--arch/powerpc/kvm/booke.c2
-rw-r--r--arch/powerpc/kvm/powerpc.c5
-rw-r--r--arch/s390/include/asm/ctl_reg.h4
-rw-r--r--arch/s390/include/asm/kvm_host.h50
-rw-r--r--arch/s390/include/asm/nmi.h13
-rw-r--r--arch/s390/include/asm/processor.h2
-rw-r--r--arch/s390/include/uapi/asm/kvm.h12
-rw-r--r--arch/s390/kernel/asm-offsets.c3
-rw-r--r--arch/s390/kernel/entry.S13
-rw-r--r--arch/s390/kernel/nmi.c84
-rw-r--r--arch/s390/kvm/gaccess.c43
-rw-r--r--arch/s390/kvm/interrupt.c91
-rw-r--r--arch/s390/kvm/kvm-s390.c374
-rw-r--r--arch/s390/kvm/kvm-s390.h2
-rw-r--r--arch/s390/kvm/priv.c103
-rw-r--r--arch/s390/kvm/vsie.c25
-rw-r--r--arch/x86/include/asm/kvm_host.h47
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/emulate.c84
-rw-r--r--arch/x86/kvm/lapic.c121
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/svm.c121
-rw-r--r--arch/x86/kvm/vmx.c187
-rw-r--r--arch/x86/kvm/x86.c19
53 files changed, 1358 insertions, 412 deletions
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index f0e66577ce05..127e2dd2e21c 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -44,7 +44,9 @@
#define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS
#endif
-#define KVM_REQ_VCPU_EXIT (8 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_SLEEP \
+ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
int __attribute_const__ kvm_target_cpu(void);
@@ -233,8 +235,6 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
void kvm_arm_halt_guest(struct kvm *kvm);
void kvm_arm_resume_guest(struct kvm *kvm);
-void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu);
-void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu);
int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
@@ -291,20 +291,12 @@ static inline void kvm_arm_init_debug(void) {}
static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
-static inline int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
- struct kvm_device_attr *attr)
-{
- return -ENXIO;
-}
-static inline int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
- struct kvm_device_attr *attr)
-{
- return -ENXIO;
-}
-static inline int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
- struct kvm_device_attr *attr)
-{
- return -ENXIO;
-}
+
+int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 5e3c673fa3f4..5db2d4c6a55f 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -203,6 +203,14 @@ struct kvm_arch_memory_slot {
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
#define VGIC_LEVEL_INFO_LINE_LEVEL 0
+/* Device Control API on vcpu fd */
+#define KVM_ARM_VCPU_PMU_V3_CTRL 0
+#define KVM_ARM_VCPU_PMU_V3_IRQ 0
+#define KVM_ARM_VCPU_PMU_V3_INIT 1
+#define KVM_ARM_VCPU_TIMER_CTRL 1
+#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
+#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
+
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
#define KVM_DEV_ARM_ITS_SAVE_TABLES 1
#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index fa6182a40941..1e0784ebbfd6 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -301,3 +301,54 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
{
return -EINVAL;
}
+
+int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_set_attr(vcpu, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
+
+int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_get_attr(vcpu, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
+
+int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_has_attr(vcpu, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index f86a9aaef462..54442e375354 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -72,6 +72,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
trace_kvm_wfx(*vcpu_pc(vcpu), false);
vcpu->stat.wfi_exit_stat++;
kvm_vcpu_block(vcpu);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
}
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index 624a510d31df..ebd2dd46adf7 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -237,8 +237,10 @@ void __hyp_text __noreturn __hyp_panic(int cause)
vcpu = (struct kvm_vcpu *)read_sysreg(HTPIDR);
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ __timer_save_state(vcpu);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
+ __banked_restore_state(host_ctxt);
__sysreg_restore_state(host_ctxt);
}
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 570ed4a9c261..5386528665b5 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -104,7 +104,6 @@ __do_hyp_init:
@ - Write permission implies XN: disabled
@ - Instruction cache: enabled
@ - Data/Unified cache: enabled
- @ - Memory alignment checks: enabled
@ - MMU: enabled (this code must be run from an identity mapping)
mrc p15, 4, r0, c1, c0, 0 @ HSCR
ldr r2, =HSCTLR_MASK
@@ -112,8 +111,8 @@ __do_hyp_init:
mrc p15, 0, r1, c1, c0, 0 @ SCTLR
ldr r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C)
and r1, r1, r2
- ARM( ldr r2, =(HSCTLR_M | HSCTLR_A) )
- THUMB( ldr r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) )
+ ARM( ldr r2, =(HSCTLR_M) )
+ THUMB( ldr r2, =(HSCTLR_M | HSCTLR_TE) )
orr r1, r1, r2
orr r0, r0, r1
mcr p15, 4, r0, c1, c0, 0 @ HSCR
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index 1da8b2d14550..5ed0c3ee33d6 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -37,16 +37,6 @@ static struct kvm_regs cortexa_regs_reset = {
.usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
};
-static const struct kvm_irq_level cortexa_ptimer_irq = {
- { .irq = 30 },
- .level = 1,
-};
-
-static const struct kvm_irq_level cortexa_vtimer_irq = {
- { .irq = 27 },
- .level = 1,
-};
-
/*******************************************************************************
* Exported reset function
@@ -62,16 +52,12 @@ static const struct kvm_irq_level cortexa_vtimer_irq = {
int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
{
struct kvm_regs *reset_regs;
- const struct kvm_irq_level *cpu_vtimer_irq;
- const struct kvm_irq_level *cpu_ptimer_irq;
switch (vcpu->arch.target) {
case KVM_ARM_TARGET_CORTEX_A7:
case KVM_ARM_TARGET_CORTEX_A15:
reset_regs = &cortexa_regs_reset;
vcpu->arch.midr = read_cpuid_id();
- cpu_vtimer_irq = &cortexa_vtimer_irq;
- cpu_ptimer_irq = &cortexa_ptimer_irq;
break;
default:
return -ENODEV;
@@ -84,5 +70,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
kvm_reset_coprocs(vcpu);
/* Reset arch_timer context */
- return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq);
+ return kvm_timer_vcpu_reset(vcpu);
}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3dcd7ec69bca..6252365b0c96 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -480,6 +480,17 @@ config CAVIUM_ERRATUM_27456
If unsure, say Y.
+config CAVIUM_ERRATUM_30115
+ bool "Cavium erratum 30115: Guest may disable interrupts in host"
+ default y
+ help
+ On ThunderX T88 pass 1.x through 2.2, T81 pass 1.0 through
+ 1.2, and T83 Pass 1.0, KVM guest execution may disable
+ interrupts in host. Trapping both GICv3 group-0 and group-1
+ accesses sidesteps the issue.
+
+ If unsure, say Y.
+
config QCOM_FALKOR_ERRATUM_1003
bool "Falkor E1003: Incorrect translation due to ASID change"
default y
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 1a98bc8602a2..8cef47fa2218 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -89,7 +89,7 @@ static inline void gic_write_ctlr(u32 val)
static inline void gic_write_grpen1(u32 val)
{
- write_sysreg_s(val, SYS_ICC_GRPEN1_EL1);
+ write_sysreg_s(val, SYS_ICC_IGRPEN1_EL1);
isb();
}
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index b3aab8a17868..8d2272c6822c 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -38,7 +38,8 @@
#define ARM64_WORKAROUND_REPEAT_TLBI 17
#define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18
#define ARM64_WORKAROUND_858921 19
+#define ARM64_WORKAROUND_CAVIUM_30115 20
-#define ARM64_NCAPS 20
+#define ARM64_NCAPS 21
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 0984d1b3a8f2..235e77d98261 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -86,6 +86,7 @@
#define CAVIUM_CPU_PART_THUNDERX 0x0A1
#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2
+#define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3
#define BRCM_CPU_PART_VULCAN 0x516
@@ -96,6 +97,7 @@
#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
+#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 85997c0e5443..e7d8e281ff62 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -19,6 +19,7 @@
#define __ASM_ESR_H
#include <asm/memory.h>
+#include <asm/sysreg.h>
#define ESR_ELx_EC_UNKNOWN (0x00)
#define ESR_ELx_EC_WFx (0x01)
@@ -181,6 +182,29 @@
#define ESR_ELx_SYS64_ISS_SYS_CNTFRQ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \
ESR_ELx_SYS64_ISS_DIR_READ)
+#define esr_sys64_to_sysreg(e) \
+ sys_reg((((e) & ESR_ELx_SYS64_ISS_OP0_MASK) >> \
+ ESR_ELx_SYS64_ISS_OP0_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >> \
+ ESR_ELx_SYS64_ISS_OP1_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >> \
+ ESR_ELx_SYS64_ISS_CRN_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >> \
+ ESR_ELx_SYS64_ISS_CRM_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \
+ ESR_ELx_SYS64_ISS_OP2_SHIFT))
+
+#define esr_cp15_to_sysreg(e) \
+ sys_reg(3, \
+ (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >> \
+ ESR_ELx_SYS64_ISS_OP1_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >> \
+ ESR_ELx_SYS64_ISS_CRN_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >> \
+ ESR_ELx_SYS64_ISS_CRM_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \
+ ESR_ELx_SYS64_ISS_OP2_SHIFT))
+
#ifndef __ASSEMBLY__
#include <asm/types.h>
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 1f252a95bc02..d68630007b14 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -42,7 +42,9 @@
#define KVM_VCPU_MAX_FEATURES 4
-#define KVM_REQ_VCPU_EXIT (8 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_SLEEP \
+ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@@ -334,8 +336,6 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
void kvm_arm_halt_guest(struct kvm *kvm);
void kvm_arm_resume_guest(struct kvm *kvm);
-void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu);
-void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu);
u64 __kvm_call_hyp(void *hypfn, ...);
#define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__)
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index b18e852d27e8..4572a9b560fa 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -127,6 +127,7 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
+int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
void __timer_save_state(struct kvm_vcpu *vcpu);
void __timer_restore_state(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 15c142ce991c..16e44fa9b3b6 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -180,14 +180,31 @@
#define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0)
+#define SYS_ICC_IAR0_EL1 sys_reg(3, 0, 12, 8, 0)
+#define SYS_ICC_EOIR0_EL1 sys_reg(3, 0, 12, 8, 1)
+#define SYS_ICC_HPPIR0_EL1 sys_reg(3, 0, 12, 8, 2)
+#define SYS_ICC_BPR0_EL1 sys_reg(3, 0, 12, 8, 3)
+#define SYS_ICC_AP0Rn_EL1(n) sys_reg(3, 0, 12, 8, 4 | n)
+#define SYS_ICC_AP0R0_EL1 SYS_ICC_AP0Rn_EL1(0)
+#define SYS_ICC_AP0R1_EL1 SYS_ICC_AP0Rn_EL1(1)
+#define SYS_ICC_AP0R2_EL1 SYS_ICC_AP0Rn_EL1(2)
+#define SYS_ICC_AP0R3_EL1 SYS_ICC_AP0Rn_EL1(3)
+#define SYS_ICC_AP1Rn_EL1(n) sys_reg(3, 0, 12, 9, n)
+#define SYS_ICC_AP1R0_EL1 SYS_ICC_AP1Rn_EL1(0)
+#define SYS_ICC_AP1R1_EL1 SYS_ICC_AP1Rn_EL1(1)
+#define SYS_ICC_AP1R2_EL1 SYS_ICC_AP1Rn_EL1(2)
+#define SYS_ICC_AP1R3_EL1 SYS_ICC_AP1Rn_EL1(3)
#define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1)
+#define SYS_ICC_RPR_EL1 sys_reg(3, 0, 12, 11, 3)
#define SYS_ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5)
#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0)
#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1)
+#define SYS_ICC_HPPIR1_EL1 sys_reg(3, 0, 12, 12, 2)
#define SYS_ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3)
#define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4)
#define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5)
-#define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
+#define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6)
+#define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
#define SYS_CONTEXTIDR_EL1 sys_reg(3, 0, 13, 0, 1)
#define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4)
@@ -286,6 +303,10 @@
#define SCTLR_ELx_A (1 << 1)
#define SCTLR_ELx_M 1
+#define SCTLR_EL2_RES1 ((1 << 4) | (1 << 5) | (1 << 11) | (1 << 16) | \
+ (1 << 18) | (1 << 22) | (1 << 23) | (1 << 28) | \
+ (1 << 29))
+
#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
SCTLR_ELx_SA | SCTLR_ELx_I)
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 70eea2ecc663..9f3ca24bbcc6 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -232,6 +232,9 @@ struct kvm_arch_memory_slot {
#define KVM_ARM_VCPU_PMU_V3_CTRL 0
#define KVM_ARM_VCPU_PMU_V3_IRQ 0
#define KVM_ARM_VCPU_PMU_V3_INIT 1
+#define KVM_ARM_VCPU_TIMER_CTRL 1
+#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
+#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 2ed2a7657711..0e27f86ee709 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -133,6 +133,27 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
},
#endif
+#ifdef CONFIG_CAVIUM_ERRATUM_30115
+ {
+ /* Cavium ThunderX, T88 pass 1.x - 2.2 */
+ .desc = "Cavium erratum 30115",
+ .capability = ARM64_WORKAROUND_CAVIUM_30115,
+ MIDR_RANGE(MIDR_THUNDERX, 0x00,
+ (1 << MIDR_VARIANT_SHIFT) | 2),
+ },
+ {
+ /* Cavium ThunderX, T81 pass 1.0 - 1.2 */
+ .desc = "Cavium erratum 30115",
+ .capability = ARM64_WORKAROUND_CAVIUM_30115,
+ MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x02),
+ },
+ {
+ /* Cavium ThunderX, T83 pass 1.0 */
+ .desc = "Cavium erratum 30115",
+ .capability = ARM64_WORKAROUND_CAVIUM_30115,
+ MIDR_RANGE(MIDR_THUNDERX_83XX, 0x00, 0x00),
+ },
+#endif
{
.desc = "Mismatched cache line size",
.capability = ARM64_MISMATCHED_CACHE_LINE_SIZE,
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index b37446a8ffdb..5c7f657dd207 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -390,6 +390,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
case KVM_ARM_VCPU_PMU_V3_CTRL:
ret = kvm_arm_pmu_v3_set_attr(vcpu, attr);
break;
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_set_attr(vcpu, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -407,6 +410,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
case KVM_ARM_VCPU_PMU_V3_CTRL:
ret = kvm_arm_pmu_v3_get_attr(vcpu, attr);
break;
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_get_attr(vcpu, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -424,6 +430,9 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
case KVM_ARM_VCPU_PMU_V3_CTRL:
ret = kvm_arm_pmu_v3_has_attr(vcpu, attr);
break;
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_has_attr(vcpu, attr);
+ break;
default:
ret = -ENXIO;
break;
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index fa1b18e364fc..17d8a1677a0b 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -89,6 +89,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
vcpu->stat.wfi_exit_stat++;
kvm_vcpu_block(vcpu);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
}
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 839425c24b1c..3f9615582377 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -106,10 +106,13 @@ __do_hyp_init:
tlbi alle2
dsb sy
- mrs x4, sctlr_el2
- and x4, x4, #SCTLR_ELx_EE // preserve endianness of EL2
- ldr x5, =SCTLR_ELx_FLAGS
- orr x4, x4, x5
+ /*
+ * Preserve all the RES1 bits while setting the default flags,
+ * as well as the EE bit on BE. Drop the A flag since the compiler
+ * is allowed to generate unaligned accesses.
+ */
+ ldr x4, =(SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A))
+CPU_BE( orr x4, x4, #SCTLR_ELx_EE)
msr sctlr_el2, x4
isb
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index aede1658aeda..945e79c641c4 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -350,6 +350,20 @@ again:
}
}
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
+ exit_code == ARM_EXCEPTION_TRAP &&
+ (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
+ kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
+ int ret = __vgic_v3_perform_cpuif_access(vcpu);
+
+ if (ret == 1) {
+ __skip_instr(vcpu);
+ goto again;
+ }
+
+ /* 0 falls through to be handled out of EL2 */
+ }
+
fp_enabled = __fpsimd_enabled();
__sysreg_save_guest_state(guest_ctxt);
@@ -422,6 +436,7 @@ void __hyp_text __noreturn __hyp_panic(void)
vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2);
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ __timer_save_state(vcpu);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
__sysreg_restore_host_state(host_ctxt);
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 561badf93de8..3256b9228e75 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -46,16 +46,6 @@ static const struct kvm_regs default_regs_reset32 = {
COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT),
};
-static const struct kvm_irq_level default_ptimer_irq = {
- .irq = 30,
- .level = 1,
-};
-
-static const struct kvm_irq_level default_vtimer_irq = {
- .irq = 27,
- .level = 1,
-};
-
static bool cpu_has_32bit_el1(void)
{
u64 pfr0;
@@ -108,8 +98,6 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
*/
int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
{
- const struct kvm_irq_level *cpu_vtimer_irq;
- const struct kvm_irq_level *cpu_ptimer_irq;
const struct kvm_regs *cpu_reset;
switch (vcpu->arch.target) {
@@ -122,8 +110,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
cpu_reset = &default_regs_reset;
}
- cpu_vtimer_irq = &default_vtimer_irq;
- cpu_ptimer_irq = &default_ptimer_irq;
break;
}
@@ -137,5 +123,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
kvm_pmu_vcpu_reset(vcpu);
/* Reset timer */
- return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq);
+ return kvm_timer_vcpu_reset(vcpu);
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 0fe27024a2e1..77862881ae86 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -56,7 +56,8 @@
*/
static bool read_from_write_only(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *params)
+ struct sys_reg_params *params,
+ const struct sys_reg_desc *r)
{
WARN_ONCE(1, "Unexpected sys_reg read to write-only register\n");
print_sys_reg_instr(params);
@@ -64,6 +65,16 @@ static bool read_from_write_only(struct kvm_vcpu *vcpu,
return false;
}
+static bool write_to_read_only(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params,
+ const struct sys_reg_desc *r)
+{
+ WARN_ONCE(1, "Unexpected sys_reg write to read-only register\n");
+ print_sys_reg_instr(params);
+ kvm_inject_undefined(vcpu);
+ return false;
+}
+
/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
static u32 cache_levels;
@@ -93,7 +104,7 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
if (!p->is_write)
- return read_from_write_only(vcpu, p);
+ return read_from_write_only(vcpu, p, r);
kvm_set_way_flush(vcpu);
return true;
@@ -135,7 +146,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
if (!p->is_write)
- return read_from_write_only(vcpu, p);
+ return read_from_write_only(vcpu, p, r);
vgic_v3_dispatch_sgi(vcpu, p->regval);
@@ -773,7 +784,7 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return trap_raz_wi(vcpu, p, r);
if (!p->is_write)
- return read_from_write_only(vcpu, p);
+ return read_from_write_only(vcpu, p, r);
if (pmu_write_swinc_el0_disabled(vcpu))
return false;
@@ -953,7 +964,15 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 },
+ { SYS_DESC(SYS_ICC_IAR0_EL1), write_to_read_only },
+ { SYS_DESC(SYS_ICC_EOIR0_EL1), read_from_write_only },
+ { SYS_DESC(SYS_ICC_HPPIR0_EL1), write_to_read_only },
+ { SYS_DESC(SYS_ICC_DIR_EL1), read_from_write_only },
+ { SYS_DESC(SYS_ICC_RPR_EL1), write_to_read_only },
{ SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi },
+ { SYS_DESC(SYS_ICC_IAR1_EL1), write_to_read_only },
+ { SYS_DESC(SYS_ICC_EOIR1_EL1), read_from_write_only },
+ { SYS_DESC(SYS_ICC_HPPIR1_EL1), write_to_read_only },
{ SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre },
{ SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c
index 79f37e37d367..116786d2e8e8 100644
--- a/arch/arm64/kvm/vgic-sys-reg-v3.c
+++ b/arch/arm64/kvm/vgic-sys-reg-v3.c
@@ -65,8 +65,8 @@ static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
* Here set VMCR.CTLR in ICC_CTLR_EL1 layout.
* The vgic_set_vmcr() will convert to ICH_VMCR layout.
*/
- vmcr.ctlr = val & ICC_CTLR_EL1_CBPR_MASK;
- vmcr.ctlr |= val & ICC_CTLR_EL1_EOImode_MASK;
+ vmcr.cbpr = (val & ICC_CTLR_EL1_CBPR_MASK) >> ICC_CTLR_EL1_CBPR_SHIFT;
+ vmcr.eoim = (val & ICC_CTLR_EL1_EOImode_MASK) >> ICC_CTLR_EL1_EOImode_SHIFT;
vgic_set_vmcr(vcpu, &vmcr);
} else {
val = 0;
@@ -83,8 +83,8 @@ static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
* The VMCR.CTLR value is in ICC_CTLR_EL1 layout.
* Extract it directly using ICC_CTLR_EL1 reg definitions.
*/
- val |= vmcr.ctlr & ICC_CTLR_EL1_CBPR_MASK;
- val |= vmcr.ctlr & ICC_CTLR_EL1_EOImode_MASK;
+ val |= (vmcr.cbpr << ICC_CTLR_EL1_CBPR_SHIFT) & ICC_CTLR_EL1_CBPR_MASK;
+ val |= (vmcr.eoim << ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK;
p->regval = val;
}
@@ -135,7 +135,7 @@ static bool access_gic_bpr1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
p->regval = 0;
vgic_get_vmcr(vcpu, &vmcr);
- if (!((vmcr.ctlr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT)) {
+ if (!vmcr.cbpr) {
if (p->is_write) {
vmcr.abpr = (p->regval & ICC_BPR1_EL1_MASK) >>
ICC_BPR1_EL1_SHIFT;
@@ -268,36 +268,21 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return true;
}
static const struct sys_reg_desc gic_v3_icc_reg_descs[] = {
- /* ICC_PMR_EL1 */
- { Op0(3), Op1(0), CRn(4), CRm(6), Op2(0), access_gic_pmr },
- /* ICC_BPR0_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(8), Op2(3), access_gic_bpr0 },
- /* ICC_AP0R0_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(8), Op2(4), access_gic_ap0r },
- /* ICC_AP0R1_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(8), Op2(5), access_gic_ap0r },
- /* ICC_AP0R2_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(8), Op2(6), access_gic_ap0r },
- /* ICC_AP0R3_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(8), Op2(7), access_gic_ap0r },
- /* ICC_AP1R0_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(9), Op2(0), access_gic_ap1r },
- /* ICC_AP1R1_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(9), Op2(1), access_gic_ap1r },
- /* ICC_AP1R2_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(9), Op2(2), access_gic_ap1r },
- /* ICC_AP1R3_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(9), Op2(3), access_gic_ap1r },
- /* ICC_BPR1_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(12), Op2(3), access_gic_bpr1 },
- /* ICC_CTLR_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(12), Op2(4), access_gic_ctlr },
- /* ICC_SRE_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(12), Op2(5), access_gic_sre },
- /* ICC_IGRPEN0_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(12), Op2(6), access_gic_grpen0 },
- /* ICC_GRPEN1_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(12), Op2(7), access_gic_grpen1 },
+ { SYS_DESC(SYS_ICC_PMR_EL1), access_gic_pmr },
+ { SYS_DESC(SYS_ICC_BPR0_EL1), access_gic_bpr0 },
+ { SYS_DESC(SYS_ICC_AP0R0_EL1), access_gic_ap0r },
+ { SYS_DESC(SYS_ICC_AP0R1_EL1), access_gic_ap0r },
+ { SYS_DESC(SYS_ICC_AP0R2_EL1), access_gic_ap0r },
+ { SYS_DESC(SYS_ICC_AP0R3_EL1), access_gic_ap0r },
+ { SYS_DESC(SYS_ICC_AP1R0_EL1), access_gic_ap1r },
+ { SYS_DESC(SYS_ICC_AP1R1_EL1), access_gic_ap1r },
+ { SYS_DESC(SYS_ICC_AP1R2_EL1), access_gic_ap1r },
+ { SYS_DESC(SYS_ICC_AP1R3_EL1), access_gic_ap1r },
+ { SYS_DESC(SYS_ICC_BPR1_EL1), access_gic_bpr1 },
+ { SYS_DESC(SYS_ICC_CTLR_EL1), access_gic_ctlr },
+ { SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre },
+ { SYS_DESC(SYS_ICC_IGRPEN0_EL1), access_gic_grpen0 },
+ { SYS_DESC(SYS_ICC_IGRPEN1_EL1), access_gic_grpen1 },
};
int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id,
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index a563759fd142..6a0d7040d882 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c
@@ -1094,7 +1094,7 @@ static void kvm_trap_emul_check_requests(struct kvm_vcpu *vcpu, int cpu,
struct mm_struct *mm;
int i;
- if (likely(!vcpu->requests))
+ if (likely(!kvm_request_pending(vcpu)))
return;
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c
index 71d8856ade64..74805035edc8 100644
--- a/arch/mips/kvm/vz.c
+++ b/arch/mips/kvm/vz.c
@@ -2337,7 +2337,7 @@ static int kvm_vz_check_requests(struct kvm_vcpu *vcpu, int cpu)
int ret = 0;
int i;
- if (!vcpu->requests)
+ if (!kvm_request_pending(vcpu))
return 0;
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 7d64f99ea3b8..8b3f1238d07f 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -53,8 +53,8 @@
#define KVM_IRQCHIP_NUM_PINS 256
/* PPC-specific vcpu->requests bit members */
-#define KVM_REQ_WATCHDOG 8
-#define KVM_REQ_EPR_EXIT 9
+#define KVM_REQ_WATCHDOG KVM_ARCH_REQ(0)
+#define KVM_REQ_EPR_EXIT KVM_ARCH_REQ(1)
#include <linux/mmu_notifier.h>
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index ffe1da95033a..08b200a0bbce 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -1257,8 +1257,8 @@ static void xive_pre_save_scan(struct kvmppc_xive *xive)
if (!xc)
continue;
for (j = 0; j < KVMPPC_XIVE_Q_COUNT; j++) {
- if (xc->queues[i].qpage)
- xive_pre_save_queue(xive, &xc->queues[i]);
+ if (xc->queues[j].qpage)
+ xive_pre_save_queue(xive, &xc->queues[j]);
}
}
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 3eaac3809977..071b87ee682f 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -687,7 +687,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
kvmppc_core_check_exceptions(vcpu);
- if (vcpu->requests) {
+ if (kvm_request_pending(vcpu)) {
/* Exception delivery raised request; start over */
return 1;
}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index b14736f3870b..1a75c0b5f4ca 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -55,8 +55,7 @@ EXPORT_SYMBOL_GPL(kvmppc_pr_ops);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
- return !!(v->arch.pending_exceptions) ||
- v->requests;
+ return !!(v->arch.pending_exceptions) || kvm_request_pending(v);
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
@@ -108,7 +107,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
*/
smp_mb();
- if (vcpu->requests) {
+ if (kvm_request_pending(vcpu)) {
/* Make sure we process requests preemptable */
local_irq_enable();
trace_kvm_check_requests(vcpu);
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index d0441ad2a990..e508dff92535 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -59,7 +59,9 @@ union ctlreg0 {
unsigned long lap : 1; /* Low-address-protection control */
unsigned long : 4;
unsigned long edat : 1; /* Enhanced-DAT-enablement control */
- unsigned long : 4;
+ unsigned long : 2;
+ unsigned long iep : 1; /* Instruction-Execution-Protection */
+ unsigned long : 1;
unsigned long afp : 1; /* AFP-register control */
unsigned long vx : 1; /* Vector enablement control */
unsigned long : 7;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 426614a882a9..495aedbaf447 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -42,9 +42,11 @@
#define KVM_HALT_POLL_NS_DEFAULT 80000
/* s390-specific vcpu->requests bit members */
-#define KVM_REQ_ENABLE_IBS 8
-#define KVM_REQ_DISABLE_IBS 9
-#define KVM_REQ_ICPT_OPEREXC 10
+#define KVM_REQ_ENABLE_IBS KVM_ARCH_REQ(0)
+#define KVM_REQ_DISABLE_IBS KVM_ARCH_REQ(1)
+#define KVM_REQ_ICPT_OPEREXC KVM_ARCH_REQ(2)
+#define KVM_REQ_START_MIGRATION KVM_ARCH_REQ(3)
+#define KVM_REQ_STOP_MIGRATION KVM_ARCH_REQ(4)
#define SIGP_CTRL_C 0x80
#define SIGP_CTRL_SCN_MASK 0x3f
@@ -56,7 +58,7 @@ union bsca_sigp_ctrl {
__u8 r : 1;
__u8 scn : 6;
};
-} __packed;
+};
union esca_sigp_ctrl {
__u16 value;
@@ -65,14 +67,14 @@ union esca_sigp_ctrl {
__u8 reserved: 7;
__u8 scn;
};
-} __packed;
+};
struct esca_entry {
union esca_sigp_ctrl sigp_ctrl;
__u16 reserved1[3];
__u64 sda;
__u64 reserved2[6];
-} __packed;
+};
struct bsca_entry {
__u8 reserved0;
@@ -80,7 +82,7 @@ struct bsca_entry {
__u16 reserved[3];
__u64 sda;
__u64 reserved2[2];
-} __attribute__((packed));
+};
union ipte_control {
unsigned long val;
@@ -97,7 +99,7 @@ struct bsca_block {
__u64 mcn;
__u64 reserved2;
struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
-} __attribute__((packed));
+};
struct esca_block {
union ipte_control ipte_control;
@@ -105,7 +107,21 @@ struct esca_block {
__u64 mcn[4];
__u64 reserved2[20];
struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
-} __packed;
+};
+
+/*
+ * This struct is used to store some machine check info from lowcore
+ * for machine checks that happen while the guest is running.
+ * This info in host's lowcore might be overwritten by a second machine
+ * check from host when host is in the machine check's high-level handling.
+ * The size is 24 bytes.
+ */
+struct mcck_volatile_info {
+ __u64 mcic;
+ __u64 failing_storage_address;
+ __u32 ext_damage_code;
+ __u32 reserved;
+};
#define CPUSTAT_STOPPED 0x80000000
#define CPUSTAT_WAIT 0x10000000
@@ -260,14 +276,15 @@ struct kvm_s390_sie_block {
struct kvm_s390_itdb {
__u8 data[256];
-} __packed;
+};
struct sie_page {
struct kvm_s390_sie_block sie_block;
- __u8 reserved200[1024]; /* 0x0200 */
+ struct mcck_volatile_info mcck_info; /* 0x0200 */
+ __u8 reserved218[1000]; /* 0x0218 */
struct kvm_s390_itdb itdb; /* 0x0600 */
__u8 reserved700[2304]; /* 0x0700 */
-} __packed;
+};
struct kvm_vcpu_stat {
u64 exit_userspace;
@@ -681,7 +698,7 @@ struct sie_page2 {
__u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */
struct kvm_s390_crypto_cb crycb; /* 0x0800 */
u8 reserved900[0x1000 - 0x900]; /* 0x0900 */
-} __packed;
+};
struct kvm_s390_vsie {
struct mutex mutex;
@@ -691,6 +708,12 @@ struct kvm_s390_vsie {
struct page *pages[KVM_MAX_VCPUS];
};
+struct kvm_s390_migration_state {
+ unsigned long bitmap_size; /* in bits (number of guest pages) */
+ atomic64_t dirty_pages; /* number of dirty pages */
+ unsigned long *pgste_bitmap;
+};
+
struct kvm_arch{
void *sca;
int use_esca;
@@ -718,6 +741,7 @@ struct kvm_arch{
struct kvm_s390_crypto crypto;
struct kvm_s390_vsie vsie;
u64 epoch;
+ struct kvm_s390_migration_state *migration_state;
/* subset of available cpu features enabled by user space */
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
};
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index e3e8895f5d3e..9d91cf3e427f 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -14,11 +14,24 @@
#include <linux/const.h>
#include <linux/types.h>
+#define MCIC_SUBCLASS_MASK (1ULL<<63 | 1ULL<<62 | 1ULL<<61 | \
+ 1ULL<<59 | 1ULL<<58 | 1ULL<<56 | \
+ 1ULL<<55 | 1ULL<<54 | 1ULL<<53 | \
+ 1ULL<<52 | 1ULL<<47 | 1ULL<<46 | \
+ 1ULL<<45 | 1ULL<<44)
#define MCCK_CODE_SYSTEM_DAMAGE _BITUL(63)
+#define MCCK_CODE_EXT_DAMAGE _BITUL(63 - 5)
+#define MCCK_CODE_CP _BITUL(63 - 9)
#define MCCK_CODE_CPU_TIMER_VALID _BITUL(63 - 46)
#define MCCK_CODE_PSW_MWP_VALID _BITUL(63 - 20)
#define MCCK_CODE_PSW_IA_VALID _BITUL(63 - 23)
+#define MCCK_CR14_CR_PENDING_SUB_MASK (1 << 28)
+#define MCCK_CR14_RECOVERY_SUB_MASK (1 << 27)
+#define MCCK_CR14_DEGRAD_SUB_MASK (1 << 26)
+#define MCCK_CR14_EXT_DAMAGE_SUB_MASK (1 << 25)
+#define MCCK_CR14_WARN_SUB_MASK (1 << 24)
+
#ifndef __ASSEMBLY__
union mci {
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 60d395fdc864..5b1b247dfbca 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -20,6 +20,7 @@
#define CIF_FPU 4 /* restore FPU registers */
#define CIF_IGNORE_IRQ 5 /* ignore interrupt (for udelay) */
#define CIF_ENABLED_WAIT 6 /* in enabled wait state */
+#define CIF_MCCK_GUEST 7 /* machine check happening in guest */
#define _CIF_MCCK_PENDING _BITUL(CIF_MCCK_PENDING)
#define _CIF_ASCE_PRIMARY _BITUL(CIF_ASCE_PRIMARY)
@@ -28,6 +29,7 @@
#define _CIF_FPU _BITUL(CIF_FPU)
#define _CIF_IGNORE_IRQ _BITUL(CIF_IGNORE_IRQ)
#define _CIF_ENABLED_WAIT _BITUL(CIF_ENABLED_WAIT)
+#define _CIF_MCCK_GUEST _BITUL(CIF_MCCK_GUEST)
#ifndef __ASSEMBLY__
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 3dd2a1d308dd..69d09c39bbcd 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -28,6 +28,7 @@
#define KVM_DEV_FLIC_CLEAR_IO_IRQ 8
#define KVM_DEV_FLIC_AISM 9
#define KVM_DEV_FLIC_AIRQ_INJECT 10
+#define KVM_DEV_FLIC_AISM_ALL 11
/*
* We can have up to 4*64k pending subchannels + 8 adapter interrupts,
* as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
@@ -53,6 +54,11 @@ struct kvm_s390_ais_req {
__u16 mode;
};
+struct kvm_s390_ais_all {
+ __u8 simm;
+ __u8 nimm;
+};
+
#define KVM_S390_IO_ADAPTER_MASK 1
#define KVM_S390_IO_ADAPTER_MAP 2
#define KVM_S390_IO_ADAPTER_UNMAP 3
@@ -70,6 +76,7 @@ struct kvm_s390_io_adapter_req {
#define KVM_S390_VM_TOD 1
#define KVM_S390_VM_CRYPTO 2
#define KVM_S390_VM_CPU_MODEL 3
+#define KVM_S390_VM_MIGRATION 4
/* kvm attributes for mem_ctrl */
#define KVM_S390_VM_MEM_ENABLE_CMMA 0
@@ -151,6 +158,11 @@ struct kvm_s390_vm_cpu_subfunc {
#define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2
#define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3
+/* kvm attributes for migration mode */
+#define KVM_S390_VM_MIGRATION_STOP 0
+#define KVM_S390_VM_MIGRATION_START 1
+#define KVM_S390_VM_MIGRATION_STATUS 2
+
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
/* general purpose regs for s390 */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 6bb29633e1f1..b65c414b6c0e 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -58,6 +58,9 @@ int main(void)
OFFSET(__SF_BACKCHAIN, stack_frame, back_chain);
OFFSET(__SF_GPRS, stack_frame, gprs);
OFFSET(__SF_EMPTY, stack_frame, empty1);
+ OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[0]);
+ OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[1]);
+ OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]);
BLANK();
/* timeval/timezone offsets for use by vdso */
OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index e408d9cc5b96..3b4b158382ea 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -225,6 +225,7 @@ ENTRY(sie64a)
jnz .Lsie_skip
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
jo .Lsie_skip # exit if fp/vx regs changed
+.Lsie_entry:
sie 0(%r14)
.Lsie_skip:
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
@@ -1104,7 +1105,13 @@ cleanup_critical:
.quad .Lsie_done
.Lcleanup_sie:
- lg %r9,__SF_EMPTY(%r15) # get control block pointer
+ cghi %r11,__LC_SAVE_AREA_ASYNC #Is this in normal interrupt?
+ je 1f
+ slg %r9,BASED(.Lsie_crit_mcck_start)
+ clg %r9,BASED(.Lsie_crit_mcck_length)
+ jh 1f
+ oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
+1: lg %r9,__SF_EMPTY(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
larl %r9,sie_exit # skip forward to sie_exit
@@ -1289,6 +1296,10 @@ cleanup_critical:
.quad .Lsie_gmap
.Lsie_critical_length:
.quad .Lsie_done - .Lsie_gmap
+.Lsie_crit_mcck_start:
+ .quad .Lsie_entry
+.Lsie_crit_mcck_length:
+ .quad .Lsie_skip - .Lsie_entry
#endif
.section .rodata, "a"
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 985589523970..31d03a84126c 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -25,6 +25,8 @@
#include <asm/crw.h>
#include <asm/switch_to.h>
#include <asm/ctl_reg.h>
+#include <asm/asm-offsets.h>
+#include <linux/kvm_host.h>
struct mcck_struct {
unsigned int kill_task : 1;
@@ -274,12 +276,39 @@ static int notrace s390_validate_registers(union mci mci, int umode)
return kill_task;
}
+/*
+ * Backup the guest's machine check info to its description block
+ */
+static void notrace s390_backup_mcck_info(struct pt_regs *regs)
+{
+ struct mcck_volatile_info *mcck_backup;
+ struct sie_page *sie_page;
+
+ /* r14 contains the sie block, which was set in sie64a */
+ struct kvm_s390_sie_block *sie_block =
+ (struct kvm_s390_sie_block *) regs->gprs[14];
+
+ if (sie_block == NULL)
+ /* Something's seriously wrong, stop system. */
+ s390_handle_damage();
+
+ sie_page = container_of(sie_block, struct sie_page, sie_block);
+ mcck_backup = &sie_page->mcck_info;
+ mcck_backup->mcic = S390_lowcore.mcck_interruption_code &
+ ~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE);
+ mcck_backup->ext_damage_code = S390_lowcore.external_damage_code;
+ mcck_backup->failing_storage_address
+ = S390_lowcore.failing_storage_address;
+}
+
#define MAX_IPD_COUNT 29
#define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */
#define ED_STP_ISLAND 6 /* External damage STP island check */
#define ED_STP_SYNC 7 /* External damage STP sync check */
+#define MCCK_CODE_NO_GUEST (MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE)
+
/*
* machine check handler.
*/
@@ -291,6 +320,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
struct mcck_struct *mcck;
unsigned long long tmp;
union mci mci;
+ unsigned long mcck_dam_code;
nmi_enter();
inc_irq_stat(NMI_NMI);
@@ -301,7 +331,13 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
/* System damage -> stopping machine */
s390_handle_damage();
}
- if (mci.pd) {
+
+ /*
+ * Reinject the instruction processing damages' machine checks
+ * including Delayed Access Exception into the guest
+ * instead of damaging the host if they happen in the guest.
+ */
+ if (mci.pd && !test_cpu_flag(CIF_MCCK_GUEST)) {
if (mci.b) {
/* Processing backup -> verify if we can survive this */
u64 z_mcic, o_mcic, t_mcic;
@@ -345,6 +381,14 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
mcck->mcck_code = mci.val;
set_cpu_flag(CIF_MCCK_PENDING);
}
+
+ /*
+ * Backup the machine check's info if it happens when the guest
+ * is running.
+ */
+ if (test_cpu_flag(CIF_MCCK_GUEST))
+ s390_backup_mcck_info(regs);
+
if (mci.cd) {
/* Timing facility damage */
s390_handle_damage();
@@ -358,15 +402,22 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
if (mcck->stp_queue)
set_cpu_flag(CIF_MCCK_PENDING);
}
- if (mci.se)
- /* Storage error uncorrected */
- s390_handle_damage();
- if (mci.ke)
- /* Storage key-error uncorrected */
- s390_handle_damage();
- if (mci.ds && mci.fa)
- /* Storage degradation */
- s390_handle_damage();
+
+ /*
+ * Reinject storage related machine checks into the guest if they
+ * happen when the guest is running.
+ */
+ if (!test_cpu_flag(CIF_MCCK_GUEST)) {
+ if (mci.se)
+ /* Storage error uncorrected */
+ s390_handle_damage();
+ if (mci.ke)
+ /* Storage key-error uncorrected */
+ s390_handle_damage();
+ if (mci.ds && mci.fa)
+ /* Storage degradation */
+ s390_handle_damage();
+ }
if (mci.cp) {
/* Channel report word pending */
mcck->channel_report = 1;
@@ -377,6 +428,19 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
mcck->warning = 1;
set_cpu_flag(CIF_MCCK_PENDING);
}
+
+ /*
+ * If there are only Channel Report Pending and External Damage
+ * machine checks, they will not be reinjected into the guest
+ * because they refer to host conditions only.
+ */
+ mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK);
+ if (test_cpu_flag(CIF_MCCK_GUEST) &&
+ (mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) {
+ /* Set exit reason code for host's later handling */
+ *((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR;
+ }
+ clear_cpu_flag(CIF_MCCK_GUEST);
nmi_exit();
}
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 9da243d94cc3..17e3a4e71bc9 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -89,7 +89,7 @@ struct region3_table_entry_fc1 {
unsigned long f : 1; /* Fetch-Protection Bit */
unsigned long fc : 1; /* Format-Control */
unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long co : 1; /* Change-Recording Override */
+ unsigned long iep: 1; /* Instruction-Execution-Protection */
unsigned long : 2;
unsigned long i : 1; /* Region-Invalid Bit */
unsigned long cr : 1; /* Common-Region Bit */
@@ -131,7 +131,7 @@ struct segment_entry_fc1 {
unsigned long f : 1; /* Fetch-Protection Bit */
unsigned long fc : 1; /* Format-Control */
unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long co : 1; /* Change-Recording Override */
+ unsigned long iep: 1; /* Instruction-Execution-Protection */
unsigned long : 2;
unsigned long i : 1; /* Segment-Invalid Bit */
unsigned long cs : 1; /* Common-Segment Bit */
@@ -168,7 +168,8 @@ union page_table_entry {
unsigned long z : 1; /* Zero Bit */
unsigned long i : 1; /* Page-Invalid Bit */
unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long : 9;
+ unsigned long iep: 1; /* Instruction-Execution-Protection */
+ unsigned long : 8;
};
};
@@ -241,7 +242,7 @@ struct ale {
unsigned long asteo : 25; /* ASN-Second-Table-Entry Origin */
unsigned long : 6;
unsigned long astesn : 32; /* ASTE Sequence Number */
-} __packed;
+};
struct aste {
unsigned long i : 1; /* ASX-Invalid Bit */
@@ -257,7 +258,7 @@ struct aste {
unsigned long ald : 32;
unsigned long astesn : 32;
/* .. more fields there */
-} __packed;
+};
int ipte_lock_held(struct kvm_vcpu *vcpu)
{
@@ -485,6 +486,7 @@ enum prot_type {
PROT_TYPE_KEYC = 1,
PROT_TYPE_ALC = 2,
PROT_TYPE_DAT = 3,
+ PROT_TYPE_IEP = 4,
};
static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
@@ -500,6 +502,9 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
switch (code) {
case PGM_PROTECTION:
switch (prot) {
+ case PROT_TYPE_IEP:
+ tec->b61 = 1;
+ /* FALL THROUGH */
case PROT_TYPE_LA:
tec->b56 = 1;
break;
@@ -591,6 +596,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
* @gpa: points to where guest physical (absolute) address should be stored
* @asce: effective asce
* @mode: indicates the access mode to be used
+ * @prot: returns the type for protection exceptions
*
* Translate a guest virtual address into a guest absolute address by means
* of dynamic address translation as specified by the architecture.
@@ -606,19 +612,21 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
*/
static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
unsigned long *gpa, const union asce asce,
- enum gacc_mode mode)
+ enum gacc_mode mode, enum prot_type *prot)
{
union vaddress vaddr = {.addr = gva};
union raddress raddr = {.addr = gva};
union page_table_entry pte;
int dat_protection = 0;
+ int iep_protection = 0;
union ctlreg0 ctlreg0;
unsigned long ptr;
- int edat1, edat2;
+ int edat1, edat2, iep;
ctlreg0.val = vcpu->arch.sie_block->gcr[0];
edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
+ iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
if (asce.r)
goto real_address;
ptr = asce.origin * 4096;
@@ -702,6 +710,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
return PGM_TRANSLATION_SPEC;
if (rtte.fc && edat2) {
dat_protection |= rtte.fc1.p;
+ iep_protection = rtte.fc1.iep;
raddr.rfaa = rtte.fc1.rfaa;
goto absolute_address;
}
@@ -729,6 +738,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
return PGM_TRANSLATION_SPEC;
if (ste.fc && edat1) {
dat_protection |= ste.fc1.p;
+ iep_protection = ste.fc1.iep;
raddr.sfaa = ste.fc1.sfaa;
goto absolute_address;
}
@@ -745,12 +755,19 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
if (pte.z)
return PGM_TRANSLATION_SPEC;
dat_protection |= pte.p;
+ iep_protection = pte.iep;
raddr.pfra = pte.pfra;
real_address:
raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
absolute_address:
- if (mode == GACC_STORE && dat_protection)
+ if (mode == GACC_STORE && dat_protection) {
+ *prot = PROT_TYPE_DAT;
return PGM_PROTECTION;
+ }
+ if (mode == GACC_IFETCH && iep_protection && iep) {
+ *prot = PROT_TYPE_IEP;
+ return PGM_PROTECTION;
+ }
if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
return PGM_ADDRESSING;
*gpa = raddr.addr;
@@ -782,6 +799,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
int lap_enabled, rc = 0;
+ enum prot_type prot;
lap_enabled = low_address_protection_enabled(vcpu, asce);
while (nr_pages) {
@@ -791,7 +809,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
PROT_TYPE_LA);
ga &= PAGE_MASK;
if (psw_bits(*psw).t) {
- rc = guest_translate(vcpu, ga, pages, asce, mode);
+ rc = guest_translate(vcpu, ga, pages, asce, mode, &prot);
if (rc < 0)
return rc;
} else {
@@ -800,7 +818,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
rc = PGM_ADDRESSING;
}
if (rc)
- return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_DAT);
+ return trans_exc(vcpu, rc, ga, ar, mode, prot);
ga += PAGE_SIZE;
pages++;
nr_pages--;
@@ -886,6 +904,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
unsigned long *gpa, enum gacc_mode mode)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ enum prot_type prot;
union asce asce;
int rc;
@@ -900,9 +919,9 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
}
if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */
- rc = guest_translate(vcpu, gva, gpa, asce, mode);
+ rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot);
if (rc > 0)
- return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT);
+ return trans_exc(vcpu, rc, gva, 0, mode, prot);
} else {
*gpa = kvm_s390_real_to_abs(vcpu, gva);
if (kvm_is_error_gpa(vcpu->kvm, *gpa))
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index caf15c8a8948..f2c78fc1852d 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -251,8 +251,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
__clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
if (psw_mchk_disabled(vcpu))
active_mask &= ~IRQ_PEND_MCHK_MASK;
+ /*
+ * Check both floating and local interrupt's cr14 because
+ * bit IRQ_PEND_MCHK_REP could be set in both cases.
+ */
if (!(vcpu->arch.sie_block->gcr[14] &
- vcpu->kvm->arch.float_int.mchk.cr14))
+ (vcpu->kvm->arch.float_int.mchk.cr14 |
+ vcpu->arch.local_int.irq.mchk.cr14)))
__clear_bit(IRQ_PEND_MCHK_REP, &active_mask);
/*
@@ -1876,6 +1881,28 @@ out:
return ret < 0 ? ret : n;
}
+static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ struct kvm_s390_ais_all ais;
+
+ if (attr->attr < sizeof(ais))
+ return -EINVAL;
+
+ if (!test_kvm_facility(kvm, 72))
+ return -ENOTSUPP;
+
+ mutex_lock(&fi->ais_lock);
+ ais.simm = fi->simm;
+ ais.nimm = fi->nimm;
+ mutex_unlock(&fi->ais_lock);
+
+ if (copy_to_user((void __user *)attr->addr, &ais, sizeof(ais)))
+ return -EFAULT;
+
+ return 0;
+}
+
static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
int r;
@@ -1885,6 +1912,9 @@ static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr,
attr->attr);
break;
+ case KVM_DEV_FLIC_AISM_ALL:
+ r = flic_ais_mode_get_all(dev->kvm, attr);
+ break;
default:
r = -EINVAL;
}
@@ -2235,6 +2265,25 @@ static int flic_inject_airq(struct kvm *kvm, struct kvm_device_attr *attr)
return kvm_s390_inject_airq(kvm, adapter);
}
+static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ struct kvm_s390_ais_all ais;
+
+ if (!test_kvm_facility(kvm, 72))
+ return -ENOTSUPP;
+
+ if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais)))
+ return -EFAULT;
+
+ mutex_lock(&fi->ais_lock);
+ fi->simm = ais.simm;
+ fi->nimm = ais.nimm;
+ mutex_unlock(&fi->ais_lock);
+
+ return 0;
+}
+
static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
int r = 0;
@@ -2277,6 +2326,9 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
case KVM_DEV_FLIC_AIRQ_INJECT:
r = flic_inject_airq(dev->kvm, attr);
break;
+ case KVM_DEV_FLIC_AISM_ALL:
+ r = flic_ais_mode_set_all(dev->kvm, attr);
+ break;
default:
r = -EINVAL;
}
@@ -2298,6 +2350,7 @@ static int flic_has_attr(struct kvm_device *dev,
case KVM_DEV_FLIC_CLEAR_IO_IRQ:
case KVM_DEV_FLIC_AISM:
case KVM_DEV_FLIC_AIRQ_INJECT:
+ case KVM_DEV_FLIC_AISM_ALL:
return 0;
}
return -ENXIO;
@@ -2415,6 +2468,42 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
return ret;
}
+/*
+ * Inject the machine check to the guest.
+ */
+void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
+ struct mcck_volatile_info *mcck_info)
+{
+ struct kvm_s390_interrupt_info inti;
+ struct kvm_s390_irq irq;
+ struct kvm_s390_mchk_info *mchk;
+ union mci mci;
+ __u64 cr14 = 0; /* upper bits are not used */
+
+ mci.val = mcck_info->mcic;
+ if (mci.sr)
+ cr14 |= MCCK_CR14_RECOVERY_SUB_MASK;
+ if (mci.dg)
+ cr14 |= MCCK_CR14_DEGRAD_SUB_MASK;
+ if (mci.w)
+ cr14 |= MCCK_CR14_WARN_SUB_MASK;
+
+ mchk = mci.ck ? &inti.mchk : &irq.u.mchk;
+ mchk->cr14 = cr14;
+ mchk->mcic = mcck_info->mcic;
+ mchk->ext_damage_code = mcck_info->ext_damage_code;
+ mchk->failing_storage_address = mcck_info->failing_storage_address;
+ if (mci.ck) {
+ /* Inject the floating machine check */
+ inti.type = KVM_S390_MCHK;
+ WARN_ON_ONCE(__inject_vm(vcpu->kvm, &inti));
+ } else {
+ /* Inject the machine check to specified vcpu */
+ irq.type = KVM_S390_MCHK;
+ WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
+ }
+}
+
int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 689ac48361c6..ef6419654c16 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -30,6 +30,7 @@
#include <linux/vmalloc.h>
#include <linux/bitmap.h>
#include <linux/sched/signal.h>
+#include <linux/string.h>
#include <asm/asm-offsets.h>
#include <asm/lowcore.h>
@@ -386,6 +387,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_SKEYS:
case KVM_CAP_S390_IRQ_STATE:
case KVM_CAP_S390_USER_INSTR0:
+ case KVM_CAP_S390_CMMA_MIGRATION:
case KVM_CAP_S390_AIS:
r = 1;
break;
@@ -750,6 +752,129 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
return 0;
}
+static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
+{
+ int cx;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(cx, vcpu, kvm)
+ kvm_s390_sync_request(req, vcpu);
+}
+
+/*
+ * Must be called with kvm->srcu held to avoid races on memslots, and with
+ * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
+ */
+static int kvm_s390_vm_start_migration(struct kvm *kvm)
+{
+ struct kvm_s390_migration_state *mgs;
+ struct kvm_memory_slot *ms;
+ /* should be the only one */
+ struct kvm_memslots *slots;
+ unsigned long ram_pages;
+ int slotnr;
+
+ /* migration mode already enabled */
+ if (kvm->arch.migration_state)
+ return 0;
+
+ slots = kvm_memslots(kvm);
+ if (!slots || !slots->used_slots)
+ return -EINVAL;
+
+ mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
+ if (!mgs)
+ return -ENOMEM;
+ kvm->arch.migration_state = mgs;
+
+ if (kvm->arch.use_cmma) {
+ /*
+ * Get the last slot. They should be sorted by base_gfn, so the
+ * last slot is also the one at the end of the address space.
+ * We have verified above that at least one slot is present.
+ */
+ ms = slots->memslots + slots->used_slots - 1;
+ /* round up so we only use full longs */
+ ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
+ /* allocate enough bytes to store all the bits */
+ mgs->pgste_bitmap = vmalloc(ram_pages / 8);
+ if (!mgs->pgste_bitmap) {
+ kfree(mgs);
+ kvm->arch.migration_state = NULL;
+ return -ENOMEM;
+ }
+
+ mgs->bitmap_size = ram_pages;
+ atomic64_set(&mgs->dirty_pages, ram_pages);
+ /* mark all the pages in active slots as dirty */
+ for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
+ ms = slots->memslots + slotnr;
+ bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
+ }
+
+ kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
+ }
+ return 0;
+}
+
+/*
+ * Must be called with kvm->lock to avoid races with ourselves and
+ * kvm_s390_vm_start_migration.
+ */
+static int kvm_s390_vm_stop_migration(struct kvm *kvm)
+{
+ struct kvm_s390_migration_state *mgs;
+
+ /* migration mode already disabled */
+ if (!kvm->arch.migration_state)
+ return 0;
+ mgs = kvm->arch.migration_state;
+ kvm->arch.migration_state = NULL;
+
+ if (kvm->arch.use_cmma) {
+ kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
+ vfree(mgs->pgste_bitmap);
+ }
+ kfree(mgs);
+ return 0;
+}
+
+static int kvm_s390_vm_set_migration(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ int idx, res = -ENXIO;
+
+ mutex_lock(&kvm->lock);
+ switch (attr->attr) {
+ case KVM_S390_VM_MIGRATION_START:
+ idx = srcu_read_lock(&kvm->srcu);
+ res = kvm_s390_vm_start_migration(kvm);
+ srcu_read_unlock(&kvm->srcu, idx);
+ break;
+ case KVM_S390_VM_MIGRATION_STOP:
+ res = kvm_s390_vm_stop_migration(kvm);
+ break;
+ default:
+ break;
+ }
+ mutex_unlock(&kvm->lock);
+
+ return res;
+}
+
+static int kvm_s390_vm_get_migration(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ u64 mig = (kvm->arch.migration_state != NULL);
+
+ if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
+ return -ENXIO;
+
+ if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
+ return -EFAULT;
+ return 0;
+}
+
static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
{
u8 gtod_high;
@@ -1090,6 +1215,9 @@ static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_CRYPTO:
ret = kvm_s390_vm_set_crypto(kvm, attr);
break;
+ case KVM_S390_VM_MIGRATION:
+ ret = kvm_s390_vm_set_migration(kvm, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -1112,6 +1240,9 @@ static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_CPU_MODEL:
ret = kvm_s390_get_cpu_model(kvm, attr);
break;
+ case KVM_S390_VM_MIGRATION:
+ ret = kvm_s390_vm_get_migration(kvm, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -1179,6 +1310,9 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
break;
}
break;
+ case KVM_S390_VM_MIGRATION:
+ ret = 0;
+ break;
default:
ret = -ENXIO;
break;
@@ -1286,6 +1420,182 @@ out:
return r;
}
+/*
+ * Base address and length must be sent at the start of each block, therefore
+ * it's cheaper to send some clean data, as long as it's less than the size of
+ * two longs.
+ */
+#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
+/* for consistency */
+#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
+
+/*
+ * This function searches for the next page with dirty CMMA attributes, and
+ * saves the attributes in the buffer up to either the end of the buffer or
+ * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
+ * no trailing clean bytes are saved.
+ * In case no dirty bits were found, or if CMMA was not enabled or used, the
+ * output buffer will indicate 0 as length.
+ */
+static int kvm_s390_get_cmma_bits(struct kvm *kvm,
+ struct kvm_s390_cmma_log *args)
+{
+ struct kvm_s390_migration_state *s = kvm->arch.migration_state;
+ unsigned long bufsize, hva, pgstev, i, next, cur;
+ int srcu_idx, peek, r = 0, rr;
+ u8 *res;
+
+ cur = args->start_gfn;
+ i = next = pgstev = 0;
+
+ if (unlikely(!kvm->arch.use_cmma))
+ return -ENXIO;
+ /* Invalid/unsupported flags were specified */
+ if (args->flags & ~KVM_S390_CMMA_PEEK)
+ return -EINVAL;
+ /* Migration mode query, and we are not doing a migration */
+ peek = !!(args->flags & KVM_S390_CMMA_PEEK);
+ if (!peek && !s)
+ return -EINVAL;
+ /* CMMA is disabled or was not used, or the buffer has length zero */
+ bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
+ if (!bufsize || !kvm->mm->context.use_cmma) {
+ memset(args, 0, sizeof(*args));
+ return 0;
+ }
+
+ if (!peek) {
+ /* We are not peeking, and there are no dirty pages */
+ if (!atomic64_read(&s->dirty_pages)) {
+ memset(args, 0, sizeof(*args));
+ return 0;
+ }
+ cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
+ args->start_gfn);
+ if (cur >= s->bitmap_size) /* nothing found, loop back */
+ cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
+ if (cur >= s->bitmap_size) { /* again! (very unlikely) */
+ memset(args, 0, sizeof(*args));
+ return 0;
+ }
+ next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
+ }
+
+ res = vmalloc(bufsize);
+ if (!res)
+ return -ENOMEM;
+
+ args->start_gfn = cur;
+
+ down_read(&kvm->mm->mmap_sem);
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ while (i < bufsize) {
+ hva = gfn_to_hva(kvm, cur);
+ if (kvm_is_error_hva(hva)) {
+ r = -EFAULT;
+ break;
+ }
+ /* decrement only if we actually flipped the bit to 0 */
+ if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
+ atomic64_dec(&s->dirty_pages);
+ r = get_pgste(kvm->mm, hva, &pgstev);
+ if (r < 0)
+ pgstev = 0;
+ /* save the value */
+ res[i++] = (pgstev >> 24) & 0x3;
+ /*
+ * if the next bit is too far away, stop.
+ * if we reached the previous "next", find the next one
+ */
+ if (!peek) {
+ if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
+ break;
+ if (cur == next)
+ next = find_next_bit(s->pgste_bitmap,
+ s->bitmap_size, cur + 1);
+ /* reached the end of the bitmap or of the buffer, stop */
+ if ((next >= s->bitmap_size) ||
+ (next >= args->start_gfn + bufsize))
+ break;
+ }
+ cur++;
+ }
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ up_read(&kvm->mm->mmap_sem);
+ args->count = i;
+ args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
+
+ rr = copy_to_user((void __user *)args->values, res, args->count);
+ if (rr)
+ r = -EFAULT;
+
+ vfree(res);
+ return r;
+}
+
+/*
+ * This function sets the CMMA attributes for the given pages. If the input
+ * buffer has zero length, no action is taken, otherwise the attributes are
+ * set and the mm->context.use_cmma flag is set.
+ */
+static int kvm_s390_set_cmma_bits(struct kvm *kvm,
+ const struct kvm_s390_cmma_log *args)
+{
+ unsigned long hva, mask, pgstev, i;
+ uint8_t *bits;
+ int srcu_idx, r = 0;
+
+ mask = args->mask;
+
+ if (!kvm->arch.use_cmma)
+ return -ENXIO;
+ /* invalid/unsupported flags */
+ if (args->flags != 0)
+ return -EINVAL;
+ /* Enforce sane limit on memory allocation */
+ if (args->count > KVM_S390_CMMA_SIZE_MAX)
+ return -EINVAL;
+ /* Nothing to do */
+ if (args->count == 0)
+ return 0;
+
+ bits = vmalloc(sizeof(*bits) * args->count);
+ if (!bits)
+ return -ENOMEM;
+
+ r = copy_from_user(bits, (void __user *)args->values, args->count);
+ if (r) {
+ r = -EFAULT;
+ goto out;
+ }
+
+ down_read(&kvm->mm->mmap_sem);
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ for (i = 0; i < args->count; i++) {
+ hva = gfn_to_hva(kvm, args->start_gfn + i);
+ if (kvm_is_error_hva(hva)) {
+ r = -EFAULT;
+ break;
+ }
+
+ pgstev = bits[i];
+ pgstev = pgstev << 24;
+ mask &= _PGSTE_GPS_USAGE_MASK;
+ set_pgste_bits(kvm->mm, hva, mask, pgstev);
+ }
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ up_read(&kvm->mm->mmap_sem);
+
+ if (!kvm->mm->context.use_cmma) {
+ down_write(&kvm->mm->mmap_sem);
+ kvm->mm->context.use_cmma = 1;
+ up_write(&kvm->mm->mmap_sem);
+ }
+out:
+ vfree(bits);
+ return r;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -1364,6 +1674,29 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_s390_set_skeys(kvm, &args);
break;
}
+ case KVM_S390_GET_CMMA_BITS: {
+ struct kvm_s390_cmma_log args;
+
+ r = -EFAULT;
+ if (copy_from_user(&args, argp, sizeof(args)))
+ break;
+ r = kvm_s390_get_cmma_bits(kvm, &args);
+ if (!r) {
+ r = copy_to_user(argp, &args, sizeof(args));
+ if (r)
+ r = -EFAULT;
+ }
+ break;
+ }
+ case KVM_S390_SET_CMMA_BITS: {
+ struct kvm_s390_cmma_log args;
+
+ r = -EFAULT;
+ if (copy_from_user(&args, argp, sizeof(args)))
+ break;
+ r = kvm_s390_set_cmma_bits(kvm, &args);
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -1633,6 +1966,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_s390_destroy_adapters(kvm);
kvm_s390_clear_float_irqs(kvm);
kvm_s390_vsie_destroy(kvm);
+ if (kvm->arch.migration_state) {
+ vfree(kvm->arch.migration_state->pgste_bitmap);
+ kfree(kvm->arch.migration_state);
+ }
KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
}
@@ -1977,7 +2314,6 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
if (!vcpu->arch.sie_block->cbrlo)
return -ENOMEM;
- vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
return 0;
}
@@ -2069,6 +2405,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
if (!vcpu)
goto out;
+ BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
if (!sie_page)
goto out_free_cpu;
@@ -2440,7 +2777,7 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
{
retry:
kvm_s390_vcpu_request_handled(vcpu);
- if (!vcpu->requests)
+ if (!kvm_request_pending(vcpu))
return 0;
/*
* We use MMU_RELOAD just to re-arm the ipte notifier for the
@@ -2489,6 +2826,27 @@ retry:
goto retry;
}
+ if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
+ /*
+ * Disable CMMA virtualization; we will emulate the ESSA
+ * instruction manually, in order to provide additional
+ * functionalities needed for live migration.
+ */
+ vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
+ goto retry;
+ }
+
+ if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
+ /*
+ * Re-enable CMMA virtualization if CMMA is available and
+ * was used.
+ */
+ if ((vcpu->kvm->arch.use_cmma) &&
+ (vcpu->kvm->mm->context.use_cmma))
+ vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
+ goto retry;
+ }
+
/* nothing to do, just clear the request */
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
@@ -2683,6 +3041,9 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
{
+ struct mcck_volatile_info *mcck_info;
+ struct sie_page *sie_page;
+
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode);
trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
@@ -2693,6 +3054,15 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
+ if (exit_reason == -EINTR) {
+ VCPU_EVENT(vcpu, 3, "%s", "machine check");
+ sie_page = container_of(vcpu->arch.sie_block,
+ struct sie_page, sie_block);
+ mcck_info = &sie_page->mcck_info;
+ kvm_s390_reinject_machine_check(vcpu, mcck_info);
+ return 0;
+ }
+
if (vcpu->arch.sie_block->icptcode > 0) {
int rc = kvm_handle_sie_intercept(vcpu);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 55f5c8457d6d..6fedc8bc7a37 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -397,4 +397,6 @@ static inline int kvm_s390_use_sca_entries(void)
*/
return sclp.has_sigpif;
}
+void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
+ struct mcck_volatile_info *mcck_info);
#endif
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index c03106c428cf..a226c459809b 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -24,6 +24,7 @@
#include <asm/ebcdic.h>
#include <asm/sysinfo.h>
#include <asm/pgtable.h>
+#include <asm/page-states.h>
#include <asm/pgalloc.h>
#include <asm/gmap.h>
#include <asm/io.h>
@@ -949,13 +950,72 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
return 0;
}
+static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
+{
+ struct kvm_s390_migration_state *ms = vcpu->kvm->arch.migration_state;
+ int r1, r2, nappended, entries;
+ unsigned long gfn, hva, res, pgstev, ptev;
+ unsigned long *cbrlo;
+
+ /*
+ * We don't need to set SD.FPF.SK to 1 here, because if we have a
+ * machine check here we either handle it or crash
+ */
+
+ kvm_s390_get_regs_rre(vcpu, &r1, &r2);
+ gfn = vcpu->run->s.regs.gprs[r2] >> PAGE_SHIFT;
+ hva = gfn_to_hva(vcpu->kvm, gfn);
+ entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
+
+ if (kvm_is_error_hva(hva))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ nappended = pgste_perform_essa(vcpu->kvm->mm, hva, orc, &ptev, &pgstev);
+ if (nappended < 0) {
+ res = orc ? 0x10 : 0;
+ vcpu->run->s.regs.gprs[r1] = res; /* Exception Indication */
+ return 0;
+ }
+ res = (pgstev & _PGSTE_GPS_USAGE_MASK) >> 22;
+ /*
+ * Set the block-content state part of the result. 0 means resident, so
+ * nothing to do if the page is valid. 2 is for preserved pages
+ * (non-present and non-zero), and 3 for zero pages (non-present and
+ * zero).
+ */
+ if (ptev & _PAGE_INVALID) {
+ res |= 2;
+ if (pgstev & _PGSTE_GPS_ZERO)
+ res |= 1;
+ }
+ vcpu->run->s.regs.gprs[r1] = res;
+ /*
+ * It is possible that all the normal 511 slots were full, in which case
+ * we will now write in the 512th slot, which is reserved for host use.
+ * In both cases we let the normal essa handling code process all the
+ * slots, including the reserved one, if needed.
+ */
+ if (nappended > 0) {
+ cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo & PAGE_MASK);
+ cbrlo[entries] = gfn << PAGE_SHIFT;
+ }
+
+ if (orc) {
+ /* increment only if we are really flipping the bit to 1 */
+ if (!test_and_set_bit(gfn, ms->pgste_bitmap))
+ atomic64_inc(&ms->dirty_pages);
+ }
+
+ return nappended;
+}
+
static int handle_essa(struct kvm_vcpu *vcpu)
{
/* entries expected to be 1FF */
int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
unsigned long *cbrlo;
struct gmap *gmap;
- int i;
+ int i, orc;
VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries);
gmap = vcpu->arch.gmap;
@@ -965,12 +1025,45 @@ static int handle_essa(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
-
- if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6)
+ /* Check for invalid operation request code */
+ orc = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
+ if (orc > ESSA_MAX)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- /* Retry the ESSA instruction */
- kvm_s390_retry_instr(vcpu);
+ if (likely(!vcpu->kvm->arch.migration_state)) {
+ /*
+ * CMMA is enabled in the KVM settings, but is disabled in
+ * the SIE block and in the mm_context, and we are not doing
+ * a migration. Enable CMMA in the mm_context.
+ * Since we need to take a write lock to write to the context
+ * to avoid races with storage keys handling, we check if the
+ * value really needs to be written to; if the value is
+ * already correct, we do nothing and avoid the lock.
+ */
+ if (vcpu->kvm->mm->context.use_cmma == 0) {
+ down_write(&vcpu->kvm->mm->mmap_sem);
+ vcpu->kvm->mm->context.use_cmma = 1;
+ up_write(&vcpu->kvm->mm->mmap_sem);
+ }
+ /*
+ * If we are here, we are supposed to have CMMA enabled in
+ * the SIE block. Enabling CMMA works on a per-CPU basis,
+ * while the context use_cmma flag is per process.
+ * It's possible that the context flag is enabled and the
+ * SIE flag is not, so we set the flag always; if it was
+ * already set, nothing changes, otherwise we enable it
+ * on this CPU too.
+ */
+ vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
+ /* Retry the ESSA instruction */
+ kvm_s390_retry_instr(vcpu);
+ } else {
+ /* Account for the possible extra cbrl entry */
+ i = do_essa(vcpu, orc);
+ if (i < 0)
+ return i;
+ entries += i;
+ }
vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
down_read(&gmap->mm->mmap_sem);
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 4719ecb9ab42..715c19c45d9a 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -26,16 +26,21 @@
struct vsie_page {
struct kvm_s390_sie_block scb_s; /* 0x0000 */
+ /*
+ * the backup info for machine check. ensure it's at
+ * the same offset as that in struct sie_page!
+ */
+ struct mcck_volatile_info mcck_info; /* 0x0200 */
/* the pinned originial scb */
- struct kvm_s390_sie_block *scb_o; /* 0x0200 */
+ struct kvm_s390_sie_block *scb_o; /* 0x0218 */
/* the shadow gmap in use by the vsie_page */
- struct gmap *gmap; /* 0x0208 */
+ struct gmap *gmap; /* 0x0220 */
/* address of the last reported fault to guest2 */
- unsigned long fault_addr; /* 0x0210 */
- __u8 reserved[0x0700 - 0x0218]; /* 0x0218 */
+ unsigned long fault_addr; /* 0x0228 */
+ __u8 reserved[0x0700 - 0x0230]; /* 0x0230 */
struct kvm_s390_crypto_cb crycb; /* 0x0700 */
__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
-} __packed;
+};
/* trigger a validity icpt for the given scb */
static int set_validity_icpt(struct kvm_s390_sie_block *scb,
@@ -801,6 +806,8 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+ struct mcck_volatile_info *mcck_info;
+ struct sie_page *sie_page;
int rc;
handle_last_fault(vcpu, vsie_page);
@@ -822,6 +829,14 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
local_irq_enable();
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ if (rc == -EINTR) {
+ VCPU_EVENT(vcpu, 3, "%s", "machine check");
+ sie_page = container_of(scb_s, struct sie_page, sie_block);
+ mcck_info = &sie_page->mcck_info;
+ kvm_s390_reinject_machine_check(vcpu, mcck_info);
+ return 0;
+ }
+
if (rc > 0)
rc = 0; /* we could still have an icpt */
else if (rc == -EFAULT)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 695605eb1dfb..9be890893885 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -48,28 +48,31 @@
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
/* x86-specific vcpu->requests bit members */
-#define KVM_REQ_MIGRATE_TIMER 8
-#define KVM_REQ_REPORT_TPR_ACCESS 9
-#define KVM_REQ_TRIPLE_FAULT 10
-#define KVM_REQ_MMU_SYNC 11
-#define KVM_REQ_CLOCK_UPDATE 12
-#define KVM_REQ_EVENT 14
-#define KVM_REQ_APF_HALT 15
-#define KVM_REQ_STEAL_UPDATE 16
-#define KVM_REQ_NMI 17
-#define KVM_REQ_PMU 18
-#define KVM_REQ_PMI 19
-#define KVM_REQ_SMI 20
-#define KVM_REQ_MASTERCLOCK_UPDATE 21
-#define KVM_REQ_MCLOCK_INPROGRESS (22 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define KVM_REQ_SCAN_IOAPIC (23 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define KVM_REQ_GLOBAL_CLOCK_UPDATE 24
-#define KVM_REQ_APIC_PAGE_RELOAD (25 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define KVM_REQ_HV_CRASH 26
-#define KVM_REQ_IOAPIC_EOI_EXIT 27
-#define KVM_REQ_HV_RESET 28
-#define KVM_REQ_HV_EXIT 29
-#define KVM_REQ_HV_STIMER 30
+#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0)
+#define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1)
+#define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2)
+#define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3)
+#define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4)
+#define KVM_REQ_EVENT KVM_ARCH_REQ(6)
+#define KVM_REQ_APF_HALT KVM_ARCH_REQ(7)
+#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8)
+#define KVM_REQ_NMI KVM_ARCH_REQ(9)
+#define KVM_REQ_PMU KVM_ARCH_REQ(10)
+#define KVM_REQ_PMI KVM_ARCH_REQ(11)
+#define KVM_REQ_SMI KVM_ARCH_REQ(12)
+#define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13)
+#define KVM_REQ_MCLOCK_INPROGRESS \
+ KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_SCAN_IOAPIC \
+ KVM_ARCH_REQ_FLAGS(15, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_GLOBAL_CLOCK_UPDATE KVM_ARCH_REQ(16)
+#define KVM_REQ_APIC_PAGE_RELOAD \
+ KVM_ARCH_REQ_FLAGS(17, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_HV_CRASH KVM_ARCH_REQ(18)
+#define KVM_REQ_IOAPIC_EOI_EXIT KVM_ARCH_REQ(19)
+#define KVM_REQ_HV_RESET KVM_ARCH_REQ(20)
+#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
+#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 673f9ac50f6d..dbf266b0d14a 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -420,6 +420,8 @@
#define MSR_IA32_TSC_ADJUST 0x0000003b
#define MSR_IA32_BNDCFGS 0x00000d90
+#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc
+
#define MSR_IA32_XSS 0x00000da0
#define FEATURE_CONTROL_LOCKED (1<<0)
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index a6fd40aade7c..da6728383052 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -144,6 +144,14 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
return best && (best->ebx & bit(X86_FEATURE_RTM));
}
+static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ return best && (best->ebx & bit(X86_FEATURE_MPX));
+}
+
static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 0816ab2e8adc..4a38b9656391 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -900,7 +900,7 @@ static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
if (rc != X86EMUL_CONTINUE) \
goto done; \
ctxt->_eip += sizeof(_type); \
- _x = *(_type __aligned(1) *) ctxt->fetch.ptr; \
+ memcpy(&_x, ctxt->fetch.ptr, sizeof(_type)); \
ctxt->fetch.ptr += sizeof(_type); \
_x; \
})
@@ -3941,6 +3941,25 @@ static int check_fxsr(struct x86_emulate_ctxt *ctxt)
}
/*
+ * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
+ * and restore MXCSR.
+ */
+static size_t __fxstate_size(int nregs)
+{
+ return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16;
+}
+
+static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
+{
+ bool cr4_osfxsr;
+ if (ctxt->mode == X86EMUL_MODE_PROT64)
+ return __fxstate_size(16);
+
+ cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR;
+ return __fxstate_size(cr4_osfxsr ? 8 : 0);
+}
+
+/*
* FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
* 1) 16 bit mode
* 2) 32 bit mode
@@ -3961,7 +3980,6 @@ static int check_fxsr(struct x86_emulate_ctxt *ctxt)
static int em_fxsave(struct x86_emulate_ctxt *ctxt)
{
struct fxregs_state fx_state;
- size_t size;
int rc;
rc = check_fxsr(ctxt);
@@ -3977,68 +3995,42 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)
- size = offsetof(struct fxregs_state, xmm_space[8 * 16/4]);
- else
- size = offsetof(struct fxregs_state, xmm_space[0]);
-
- return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
-}
-
-static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt,
- struct fxregs_state *new)
-{
- int rc = X86EMUL_CONTINUE;
- struct fxregs_state old;
-
- rc = asm_safe("fxsave %[fx]", , [fx] "+m"(old));
- if (rc != X86EMUL_CONTINUE)
- return rc;
-
- /*
- * 64 bit host will restore XMM 8-15, which is not correct on non-64
- * bit guests. Load the current values in order to preserve 64 bit
- * XMMs after fxrstor.
- */
-#ifdef CONFIG_X86_64
- /* XXX: accessing XMM 8-15 very awkwardly */
- memcpy(&new->xmm_space[8 * 16/4], &old.xmm_space[8 * 16/4], 8 * 16);
-#endif
-
- /*
- * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but
- * does save and restore MXCSR.
- */
- if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))
- memcpy(new->xmm_space, old.xmm_space, 8 * 16);
-
- return rc;
+ return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state,
+ fxstate_size(ctxt));
}
static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
{
struct fxregs_state fx_state;
int rc;
+ size_t size;
rc = check_fxsr(ctxt);
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, 512);
- if (rc != X86EMUL_CONTINUE)
- return rc;
+ ctxt->ops->get_fpu(ctxt);
- if (fx_state.mxcsr >> 16)
- return emulate_gp(ctxt, 0);
+ size = fxstate_size(ctxt);
+ if (size < __fxstate_size(16)) {
+ rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
+ if (rc != X86EMUL_CONTINUE)
+ goto out;
+ }
- ctxt->ops->get_fpu(ctxt);
+ rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
+ if (rc != X86EMUL_CONTINUE)
+ goto out;
- if (ctxt->mode < X86EMUL_MODE_PROT64)
- rc = fxrstor_fixup(ctxt, &fx_state);
+ if (fx_state.mxcsr >> 16) {
+ rc = emulate_gp(ctxt, 0);
+ goto out;
+ }
if (rc == X86EMUL_CONTINUE)
rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
+out:
ctxt->ops->put_fpu(ctxt);
return rc;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c329d2894905..2819d4c123eb 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1495,31 +1495,65 @@ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
static void cancel_hv_timer(struct kvm_lapic *apic)
{
+ WARN_ON(!apic->lapic_timer.hv_timer_in_use);
+ preempt_disable();
kvm_x86_ops->cancel_hv_timer(apic->vcpu);
apic->lapic_timer.hv_timer_in_use = false;
+ preempt_enable();
}
static bool start_hv_timer(struct kvm_lapic *apic)
{
- u64 tscdeadline = apic->lapic_timer.tscdeadline;
+ struct kvm_timer *ktimer = &apic->lapic_timer;
+ int r;
- if ((atomic_read(&apic->lapic_timer.pending) &&
- !apic_lvtt_period(apic)) ||
- kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
- if (apic->lapic_timer.hv_timer_in_use)
- cancel_hv_timer(apic);
- } else {
- apic->lapic_timer.hv_timer_in_use = true;
- hrtimer_cancel(&apic->lapic_timer.timer);
+ if (!kvm_x86_ops->set_hv_timer)
+ return false;
+
+ if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
+ return false;
- /* In case the sw timer triggered in the window */
- if (atomic_read(&apic->lapic_timer.pending) &&
- !apic_lvtt_period(apic))
- cancel_hv_timer(apic);
+ r = kvm_x86_ops->set_hv_timer(apic->vcpu, ktimer->tscdeadline);
+ if (r < 0)
+ return false;
+
+ ktimer->hv_timer_in_use = true;
+ hrtimer_cancel(&ktimer->timer);
+
+ /*
+ * Also recheck ktimer->pending, in case the sw timer triggered in
+ * the window. For periodic timer, leave the hv timer running for
+ * simplicity, and the deadline will be recomputed on the next vmexit.
+ */
+ if (!apic_lvtt_period(apic) && (r || atomic_read(&ktimer->pending))) {
+ if (r)
+ apic_timer_expired(apic);
+ return false;
}
- trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
- apic->lapic_timer.hv_timer_in_use);
- return apic->lapic_timer.hv_timer_in_use;
+
+ trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, true);
+ return true;
+}
+
+static void start_sw_timer(struct kvm_lapic *apic)
+{
+ struct kvm_timer *ktimer = &apic->lapic_timer;
+ if (apic->lapic_timer.hv_timer_in_use)
+ cancel_hv_timer(apic);
+ if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
+ return;
+
+ if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
+ start_sw_period(apic);
+ else if (apic_lvtt_tscdeadline(apic))
+ start_sw_tscdeadline(apic);
+ trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
+}
+
+static void restart_apic_timer(struct kvm_lapic *apic)
+{
+ if (!start_hv_timer(apic))
+ start_sw_timer(apic);
}
void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
@@ -1533,19 +1567,14 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
advance_periodic_target_expiration(apic);
- if (!start_hv_timer(apic))
- start_sw_period(apic);
+ restart_apic_timer(apic);
}
}
EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
{
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- WARN_ON(apic->lapic_timer.hv_timer_in_use);
-
- start_hv_timer(apic);
+ restart_apic_timer(vcpu->arch.apic);
}
EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
@@ -1554,33 +1583,28 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic;
/* Possibly the TSC deadline timer is not enabled yet */
- if (!apic->lapic_timer.hv_timer_in_use)
- return;
-
- cancel_hv_timer(apic);
+ if (apic->lapic_timer.hv_timer_in_use)
+ start_sw_timer(apic);
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
- if (atomic_read(&apic->lapic_timer.pending))
- return;
+void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
- if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
- start_sw_period(apic);
- else if (apic_lvtt_tscdeadline(apic))
- start_sw_tscdeadline(apic);
+ WARN_ON(!apic->lapic_timer.hv_timer_in_use);
+ restart_apic_timer(apic);
}
-EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
static void start_apic_timer(struct kvm_lapic *apic)
{
atomic_set(&apic->lapic_timer.pending, 0);
- if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
- if (set_target_expiration(apic) &&
- !(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
- start_sw_period(apic);
- } else if (apic_lvtt_tscdeadline(apic)) {
- if (!(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
- start_sw_tscdeadline(apic);
- }
+ if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
+ && !set_target_expiration(apic))
+ return;
+
+ restart_apic_timer(apic);
}
static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
@@ -1811,16 +1835,6 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
* LAPIC interface
*----------------------------------------------------------------------
*/
-u64 kvm_get_lapic_target_expiration_tsc(struct kvm_vcpu *vcpu)
-{
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- if (!lapic_in_kernel(vcpu))
- return 0;
-
- return apic->lapic_timer.tscdeadline;
-}
-
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
@@ -1934,7 +1948,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
for (i = 0; i < KVM_APIC_LVT_NUM; i++)
kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
apic_update_lvtt(apic);
- if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
+ if (kvm_vcpu_is_reset_bsp(vcpu) &&
+ kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
kvm_lapic_set_reg(apic, APIC_LVT0,
SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index bcbe811f3b97..29caa2c3dff9 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -87,7 +87,6 @@ int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
-u64 kvm_get_lapic_target_expiration_tsc(struct kvm_vcpu *vcpu);
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
@@ -216,4 +215,5 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu);
void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu);
void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu);
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu);
+void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu);
#endif
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 183ddb235fb4..03df7c1da581 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -189,6 +189,7 @@ struct vcpu_svm {
struct nested_state nested;
bool nmi_singlestep;
+ u64 nmi_singlestep_guest_rflags;
unsigned int3_injected;
unsigned long int3_rip;
@@ -963,6 +964,18 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
}
+static void disable_nmi_singlestep(struct vcpu_svm *svm)
+{
+ svm->nmi_singlestep = false;
+ if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
+ /* Clear our flags if they were not set by the guest */
+ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
+ svm->vmcb->save.rflags &= ~X86_EFLAGS_TF;
+ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
+ svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
+ }
+}
+
/* Note:
* This hash table is used to map VM_ID to a struct kvm_arch,
* when handling AMD IOMMU GALOG notification to schedule in
@@ -1712,11 +1725,24 @@ static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
{
- return to_svm(vcpu)->vmcb->save.rflags;
+ struct vcpu_svm *svm = to_svm(vcpu);
+ unsigned long rflags = svm->vmcb->save.rflags;
+
+ if (svm->nmi_singlestep) {
+ /* Hide our flags if they were not set by the guest */
+ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
+ rflags &= ~X86_EFLAGS_TF;
+ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
+ rflags &= ~X86_EFLAGS_RF;
+ }
+ return rflags;
}
static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
+ if (to_svm(vcpu)->nmi_singlestep)
+ rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
+
/*
* Any change of EFLAGS.VM is accompanied by a reload of SS
* (caused by either a task switch or an inter-privilege IRET),
@@ -1807,7 +1833,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
* AMD's VMCB does not have an explicit unusable field, so emulate it
* for cross vendor migration purposes by "not present"
*/
- var->unusable = !var->present || (var->type == 0);
+ var->unusable = !var->present;
switch (seg) {
case VCPU_SREG_TR:
@@ -1840,6 +1866,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
*/
if (var->unusable)
var->db = 0;
+ /* This is symmetric with svm_set_segment() */
var->dpl = to_svm(vcpu)->vmcb->save.cpl;
break;
}
@@ -1980,18 +2007,14 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
s->base = var->base;
s->limit = var->limit;
s->selector = var->selector;
- if (var->unusable)
- s->attrib = 0;
- else {
- s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
- s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
- s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
- s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT;
- s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
- s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
- s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
- s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
- }
+ s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
+ s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
+ s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
+ s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT;
+ s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
+ s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
+ s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
+ s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
/*
* This is always accurate, except if SYSRET returned to a segment
@@ -2000,7 +2023,8 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
* would entail passing the CPL to userspace and back.
*/
if (seg == VCPU_SREG_SS)
- svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
+ /* This is symmetric with svm_get_segment() */
+ svm->vmcb->save.cpl = (var->dpl & 3);
mark_dirty(svm->vmcb, VMCB_SEG);
}
@@ -2113,10 +2137,7 @@ static int db_interception(struct vcpu_svm *svm)
}
if (svm->nmi_singlestep) {
- svm->nmi_singlestep = false;
- if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
- svm->vmcb->save.rflags &=
- ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+ disable_nmi_singlestep(svm);
}
if (svm->vcpu.guest_debug &
@@ -2371,8 +2392,8 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
static int nested_svm_check_permissions(struct vcpu_svm *svm)
{
- if (!(svm->vcpu.arch.efer & EFER_SVME)
- || !is_paging(&svm->vcpu)) {
+ if (!(svm->vcpu.arch.efer & EFER_SVME) ||
+ !is_paging(&svm->vcpu)) {
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
}
@@ -2382,7 +2403,7 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm)
return 1;
}
- return 0;
+ return 0;
}
static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
@@ -2535,6 +2556,31 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
}
+/* DB exceptions for our internal use must not cause vmexit */
+static int nested_svm_intercept_db(struct vcpu_svm *svm)
+{
+ unsigned long dr6;
+
+ /* if we're not singlestepping, it's not ours */
+ if (!svm->nmi_singlestep)
+ return NESTED_EXIT_DONE;
+
+ /* if it's not a singlestep exception, it's not ours */
+ if (kvm_get_dr(&svm->vcpu, 6, &dr6))
+ return NESTED_EXIT_DONE;
+ if (!(dr6 & DR6_BS))
+ return NESTED_EXIT_DONE;
+
+ /* if the guest is singlestepping, it should get the vmexit */
+ if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
+ disable_nmi_singlestep(svm);
+ return NESTED_EXIT_DONE;
+ }
+
+ /* it's ours, the nested hypervisor must not see this one */
+ return NESTED_EXIT_HOST;
+}
+
static int nested_svm_exit_special(struct vcpu_svm *svm)
{
u32 exit_code = svm->vmcb->control.exit_code;
@@ -2590,8 +2636,12 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
}
case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
- if (svm->nested.intercept_exceptions & excp_bits)
- vmexit = NESTED_EXIT_DONE;
+ if (svm->nested.intercept_exceptions & excp_bits) {
+ if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
+ vmexit = nested_svm_intercept_db(svm);
+ else
+ vmexit = NESTED_EXIT_DONE;
+ }
/* async page fault always cause vmexit */
else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
svm->apf_reason != 0)
@@ -4628,10 +4678,17 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
== HF_NMI_MASK)
return; /* IRET will cause a vm exit */
+ if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0)
+ return; /* STGI will cause a vm exit */
+
+ if (svm->nested.exit_required)
+ return; /* we're not going to run the guest yet */
+
/*
* Something prevents NMI from been injected. Single step over possible
* problem (IRET or exception injection or interrupt shadow)
*/
+ svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu);
svm->nmi_singlestep = true;
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
}
@@ -4772,6 +4829,22 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(svm->nested.exit_required))
return;
+ /*
+ * Disable singlestep if we're injecting an interrupt/exception.
+ * We don't want our modified rflags to be pushed on the stack where
+ * we might not be able to easily reset them if we disabled NMI
+ * singlestep later.
+ */
+ if (svm->nmi_singlestep && svm->vmcb->control.event_inj) {
+ /*
+ * Event injection happens before external interrupts cause a
+ * vmexit and interrupts are disabled here, so smp_send_reschedule
+ * is enough to force an immediate vmexit.
+ */
+ disable_nmi_singlestep(svm);
+ smp_send_reschedule(vcpu->cpu);
+ }
+
pre_svm_run(svm);
sync_lapic_to_cr8(vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 72f78396bc09..92ddea08f999 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2769,7 +2769,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
if (enable_ept_ad_bits) {
vmx->nested.nested_vmx_secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_PML;
- vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
+ vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
}
} else
vmx->nested.nested_vmx_ept_caps = 0;
@@ -3195,7 +3195,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
case MSR_IA32_BNDCFGS:
- if (!kvm_mpx_supported())
+ if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu))
return 1;
msr_info->data = vmcs_read64(GUEST_BNDCFGS);
break;
@@ -3277,7 +3277,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
case MSR_IA32_BNDCFGS:
- if (!kvm_mpx_supported())
+ if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu))
+ return 1;
+ if (is_noncanonical_address(data & PAGE_MASK) ||
+ (data & MSR_IA32_BNDCFGS_RSVD))
return 1;
vmcs_write64(GUEST_BNDCFGS, data);
break;
@@ -6547,7 +6550,6 @@ static __init int hardware_setup(void)
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
- vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
vmx_msr_bitmap_legacy, PAGE_SIZE);
@@ -6914,97 +6916,21 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
return 0;
}
-/*
- * This function performs the various checks including
- * - if it's 4KB aligned
- * - No bits beyond the physical address width are set
- * - Returns 0 on success or else 1
- * (Intel SDM Section 30.3)
- */
-static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
- gpa_t *vmpointer)
+static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
{
gva_t gva;
- gpa_t vmptr;
struct x86_exception e;
- struct page *page;
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- int maxphyaddr = cpuid_maxphyaddr(vcpu);
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
return 1;
- if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
- sizeof(vmptr), &e)) {
+ if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, vmpointer,
+ sizeof(*vmpointer), &e)) {
kvm_inject_page_fault(vcpu, &e);
return 1;
}
- switch (exit_reason) {
- case EXIT_REASON_VMON:
- /*
- * SDM 3: 24.11.5
- * The first 4 bytes of VMXON region contain the supported
- * VMCS revision identifier
- *
- * Note - IA32_VMX_BASIC[48] will never be 1
- * for the nested case;
- * which replaces physical address width with 32
- *
- */
- if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
- nested_vmx_failInvalid(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
- }
-
- page = nested_get_page(vcpu, vmptr);
- if (page == NULL) {
- nested_vmx_failInvalid(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
- }
- if (*(u32 *)kmap(page) != VMCS12_REVISION) {
- kunmap(page);
- nested_release_page_clean(page);
- nested_vmx_failInvalid(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
- }
- kunmap(page);
- nested_release_page_clean(page);
- vmx->nested.vmxon_ptr = vmptr;
- break;
- case EXIT_REASON_VMCLEAR:
- if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
- nested_vmx_failValid(vcpu,
- VMXERR_VMCLEAR_INVALID_ADDRESS);
- return kvm_skip_emulated_instruction(vcpu);
- }
-
- if (vmptr == vmx->nested.vmxon_ptr) {
- nested_vmx_failValid(vcpu,
- VMXERR_VMCLEAR_VMXON_POINTER);
- return kvm_skip_emulated_instruction(vcpu);
- }
- break;
- case EXIT_REASON_VMPTRLD:
- if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
- nested_vmx_failValid(vcpu,
- VMXERR_VMPTRLD_INVALID_ADDRESS);
- return kvm_skip_emulated_instruction(vcpu);
- }
-
- if (vmptr == vmx->nested.vmxon_ptr) {
- nested_vmx_failValid(vcpu,
- VMXERR_VMPTRLD_VMXON_POINTER);
- return kvm_skip_emulated_instruction(vcpu);
- }
- break;
- default:
- return 1; /* shouldn't happen */
- }
-
- if (vmpointer)
- *vmpointer = vmptr;
return 0;
}
@@ -7066,6 +6992,8 @@ out_msr_bitmap:
static int handle_vmon(struct kvm_vcpu *vcpu)
{
int ret;
+ gpa_t vmptr;
+ struct page *page;
struct vcpu_vmx *vmx = to_vmx(vcpu);
const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
| FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
@@ -7095,9 +7023,37 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
return 1;
}
- if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL))
+ if (nested_vmx_get_vmptr(vcpu, &vmptr))
return 1;
-
+
+ /*
+ * SDM 3: 24.11.5
+ * The first 4 bytes of VMXON region contain the supported
+ * VMCS revision identifier
+ *
+ * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
+ * which replaces physical address width with 32
+ */
+ if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
+ nested_vmx_failInvalid(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+
+ page = nested_get_page(vcpu, vmptr);
+ if (page == NULL) {
+ nested_vmx_failInvalid(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+ if (*(u32 *)kmap(page) != VMCS12_REVISION) {
+ kunmap(page);
+ nested_release_page_clean(page);
+ nested_vmx_failInvalid(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+ kunmap(page);
+ nested_release_page_clean(page);
+
+ vmx->nested.vmxon_ptr = vmptr;
ret = enter_vmx_operation(vcpu);
if (ret)
return ret;
@@ -7213,9 +7169,19 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr))
+ if (nested_vmx_get_vmptr(vcpu, &vmptr))
return 1;
+ if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
+ nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+
+ if (vmptr == vmx->nested.vmxon_ptr) {
+ nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+
if (vmptr == vmx->nested.current_vmptr)
nested_release_vmcs12(vmx);
@@ -7545,9 +7511,19 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMPTRLD, &vmptr))
+ if (nested_vmx_get_vmptr(vcpu, &vmptr))
return 1;
+ if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
+ nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+
+ if (vmptr == vmx->nested.vmxon_ptr) {
+ nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+
if (vmx->nested.current_vmptr != vmptr) {
struct vmcs12 *new_vmcs12;
struct page *page;
@@ -7677,7 +7653,10 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
unsigned long type, types;
gva_t gva;
struct x86_exception e;
- int vpid;
+ struct {
+ u64 vpid;
+ u64 gla;
+ } operand;
if (!(vmx->nested.nested_vmx_secondary_ctls_high &
SECONDARY_EXEC_ENABLE_VPID) ||
@@ -7707,17 +7686,28 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
vmx_instruction_info, false, &gva))
return 1;
- if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vpid,
- sizeof(u32), &e)) {
+ if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
+ sizeof(operand), &e)) {
kvm_inject_page_fault(vcpu, &e);
return 1;
}
+ if (operand.vpid >> 16) {
+ nested_vmx_failValid(vcpu,
+ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
switch (type) {
case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
+ if (is_noncanonical_address(operand.gla)) {
+ nested_vmx_failValid(vcpu,
+ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+ /* fall through */
case VMX_VPID_EXTENT_SINGLE_CONTEXT:
case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
- if (!vpid) {
+ if (!operand.vpid) {
nested_vmx_failValid(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
return kvm_skip_emulated_instruction(vcpu);
@@ -7913,11 +7903,13 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
{
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
int cr = exit_qualification & 15;
- int reg = (exit_qualification >> 8) & 15;
- unsigned long val = kvm_register_readl(vcpu, reg);
+ int reg;
+ unsigned long val;
switch ((exit_qualification >> 4) & 3) {
case 0: /* mov to cr */
+ reg = (exit_qualification >> 8) & 15;
+ val = kvm_register_readl(vcpu, reg);
switch (cr) {
case 0:
if (vmcs12->cr0_guest_host_mask &
@@ -7972,6 +7964,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
* lmsw can change bits 1..3 of cr0, and only set bit 0 of
* cr0. Other attempted changes are ignored, with no exit.
*/
+ val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
if (vmcs12->cr0_guest_host_mask & 0xe &
(val ^ vmcs12->cr0_read_shadow))
return true;
@@ -10733,8 +10726,7 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
}
- if (nested_cpu_has_ept(vmcs12))
- vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
+ vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
if (nested_cpu_has_vid(vmcs12))
vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
@@ -10759,8 +10751,6 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
if (kvm_mpx_supported())
vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
- if (nested_cpu_has_xsaves(vmcs12))
- vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP);
}
/*
@@ -11157,7 +11147,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
vmx->hv_deadline_tsc = tscl + delta_tsc;
vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
PIN_BASED_VMX_PREEMPTION_TIMER);
- return 0;
+
+ return delta_tsc == 0;
}
static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 02363e37d4a6..f2bd155883dd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2841,10 +2841,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_vcpu_write_tsc_offset(vcpu, offset);
vcpu->arch.tsc_catchup = 1;
}
- if (kvm_lapic_hv_timer_in_use(vcpu) &&
- kvm_x86_ops->set_hv_timer(vcpu,
- kvm_get_lapic_target_expiration_tsc(vcpu)))
- kvm_lapic_switch_to_sw_timer(vcpu);
+
+ if (kvm_lapic_hv_timer_in_use(vcpu))
+ kvm_lapic_restart_hv_timer(vcpu);
+
/*
* On a host with synchronized TSC, there is no need to update
* kvmclock on vcpu->cpu migration
@@ -6731,7 +6731,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
bool req_immediate_exit = false;
- if (vcpu->requests) {
+ if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
kvm_mmu_unload(vcpu);
if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
@@ -6895,7 +6895,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_x86_ops->sync_pir_to_irr(vcpu);
}
- if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
+ if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
|| need_resched() || signal_pending(current)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
@@ -8394,10 +8394,13 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
if (vcpu->arch.pv.pv_unhalted)
return true;
- if (atomic_read(&vcpu->arch.nmi_queued))
+ if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
+ (vcpu->arch.nmi_pending &&
+ kvm_x86_ops->nmi_allowed(vcpu)))
return true;
- if (kvm_test_request(KVM_REQ_SMI, vcpu))
+ if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
+ (vcpu->arch.smi_pending && !is_smm(vcpu)))
return true;
if (kvm_arch_interrupt_allowed(vcpu) &&