summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/svm.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r--arch/x86/kvm/svm.c1558
1 files changed, 930 insertions, 628 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1821c2078199..3de0b37ec038 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -15,20 +15,22 @@
*/
#include <linux/kvm_host.h>
-#include "kvm_svm.h"
#include "irq.h"
#include "mmu.h"
#include "kvm_cache_regs.h"
+#include "x86.h"
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/sched.h>
+#include <linux/ftrace_event.h>
#include <asm/desc.h>
#include <asm/virtext.h>
+#include "trace.h"
#define __ex(x) __kvm_handle_fault_on_reboot(x)
@@ -44,17 +46,70 @@ MODULE_LICENSE("GPL");
#define SVM_FEATURE_NPT (1 << 0)
#define SVM_FEATURE_LBRV (1 << 1)
#define SVM_FEATURE_SVML (1 << 2)
+#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
-#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
+#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
+#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
+#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */
-/* Turn on to get debugging output*/
-/* #define NESTED_DEBUG */
+#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
-#ifdef NESTED_DEBUG
-#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
-#else
-#define nsvm_printk(fmt, args...) do {} while(0)
+static const u32 host_save_user_msrs[] = {
+#ifdef CONFIG_X86_64
+ MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
+ MSR_FS_BASE,
#endif
+ MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
+};
+
+#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
+
+struct kvm_vcpu;
+
+struct nested_state {
+ struct vmcb *hsave;
+ u64 hsave_msr;
+ u64 vmcb;
+
+ /* These are the merged vectors */
+ u32 *msrpm;
+
+ /* gpa pointers to the real vectors */
+ u64 vmcb_msrpm;
+
+ /* A VMEXIT is required but not yet emulated */
+ bool exit_required;
+
+ /* cache for intercepts of the guest */
+ u16 intercept_cr_read;
+ u16 intercept_cr_write;
+ u16 intercept_dr_read;
+ u16 intercept_dr_write;
+ u32 intercept_exceptions;
+ u64 intercept;
+
+};
+
+struct vcpu_svm {
+ struct kvm_vcpu vcpu;
+ struct vmcb *vmcb;
+ unsigned long vmcb_pa;
+ struct svm_cpu_data *svm_data;
+ uint64_t asid_generation;
+ uint64_t sysenter_esp;
+ uint64_t sysenter_eip;
+
+ u64 next_rip;
+
+ u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
+ u64 host_gs_base;
+
+ u32 *msrpm;
+
+ struct nested_state nested;
+
+ bool nmi_singlestep;
+};
/* enable NPT for AMD64 and X86 with PAE */
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
@@ -66,16 +121,14 @@ static int npt = 1;
module_param(npt, int, S_IRUGO);
-static int nested = 0;
+static int nested = 1;
module_param(nested, int, S_IRUGO);
-static void kvm_reput_irq(struct vcpu_svm *svm);
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
+static void svm_complete_interrupts(struct vcpu_svm *svm);
-static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
+static int nested_svm_exit_handled(struct vcpu_svm *svm);
static int nested_svm_vmexit(struct vcpu_svm *svm);
-static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
- void *arg2, void *opaque);
static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
bool has_error_code, u32 error_code);
@@ -86,7 +139,22 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
static inline bool is_nested(struct vcpu_svm *svm)
{
- return svm->nested_vmcb;
+ return svm->nested.vmcb;
+}
+
+static inline void enable_gif(struct vcpu_svm *svm)
+{
+ svm->vcpu.arch.hflags |= HF_GIF_MASK;
+}
+
+static inline void disable_gif(struct vcpu_svm *svm)
+{
+ svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
+}
+
+static inline bool gif_set(struct vcpu_svm *svm)
+{
+ return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
}
static unsigned long iopm_base;
@@ -132,24 +200,6 @@ static inline u32 svm_has(u32 feat)
return svm_features & feat;
}
-static inline u8 pop_irq(struct kvm_vcpu *vcpu)
-{
- int word_index = __ffs(vcpu->arch.irq_summary);
- int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
- int irq = word_index * BITS_PER_LONG + bit_index;
-
- clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
- if (!vcpu->arch.irq_pending[word_index])
- clear_bit(word_index, &vcpu->arch.irq_summary);
- return irq;
-}
-
-static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
-{
- set_bit(irq, vcpu->arch.irq_pending);
- set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
-}
-
static inline void clgi(void)
{
asm volatile (__ex(SVM_CLGI));
@@ -165,19 +215,6 @@ static inline void invlpga(unsigned long addr, u32 asid)
asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid));
}
-static inline unsigned long kvm_read_cr2(void)
-{
- unsigned long cr2;
-
- asm volatile ("mov %%cr2, %0" : "=r" (cr2));
- return cr2;
-}
-
-static inline void kvm_write_cr2(unsigned long val)
-{
- asm volatile ("mov %0, %%cr2" :: "r" (val));
-}
-
static inline void force_new_asid(struct kvm_vcpu *vcpu)
{
to_svm(vcpu)->asid_generation--;
@@ -214,17 +251,31 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
svm->vmcb->control.event_inj_err = error_code;
}
-static bool svm_exception_injected(struct kvm_vcpu *vcpu)
+static int is_external_interrupt(u32 info)
+{
+ info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
+ return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
+}
+
+static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ u32 ret = 0;
- return !(svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID);
+ if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
+ ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS;
+ return ret & mask;
}
-static int is_external_interrupt(u32 info)
+static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
{
- info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
- return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (mask == 0)
+ svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
+ else
+ svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
+
}
static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
@@ -232,7 +283,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
if (!svm->next_rip) {
- printk(KERN_DEBUG "%s: NOP\n", __func__);
+ if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) !=
+ EMULATE_DONE)
+ printk(KERN_DEBUG "%s: NOP\n", __func__);
return;
}
if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
@@ -240,9 +293,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
__func__, kvm_rip_read(vcpu), svm->next_rip);
kvm_rip_write(vcpu, svm->next_rip);
- svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
-
- vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
+ svm_set_interrupt_shadow(vcpu, 0);
}
static int has_svm(void)
@@ -262,40 +313,46 @@ static void svm_hardware_disable(void *garbage)
cpu_svm_disable();
}
-static void svm_hardware_enable(void *garbage)
+static int svm_hardware_enable(void *garbage)
{
struct svm_cpu_data *svm_data;
uint64_t efer;
- struct desc_ptr gdt_descr;
+ struct descriptor_table gdt_descr;
struct desc_struct *gdt;
int me = raw_smp_processor_id();
+ rdmsrl(MSR_EFER, efer);
+ if (efer & EFER_SVME)
+ return -EBUSY;
+
if (!has_svm()) {
- printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
- return;
+ printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
+ me);
+ return -EINVAL;
}
svm_data = per_cpu(svm_data, me);
if (!svm_data) {
- printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
+ printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
me);
- return;
+ return -EINVAL;
}
svm_data->asid_generation = 1;
svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
svm_data->next_asid = svm_data->max_asid + 1;
- asm volatile ("sgdt %0" : "=m"(gdt_descr));
- gdt = (struct desc_struct *)gdt_descr.address;
+ kvm_get_gdt(&gdt_descr);
+ gdt = (struct desc_struct *)gdt_descr.base;
svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
- rdmsrl(MSR_EFER, efer);
wrmsrl(MSR_EFER, efer | EFER_SVME);
wrmsrl(MSR_VM_HSAVE_PA,
page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
+
+ return 0;
}
static void svm_cpu_uninit(int cpu)
@@ -371,8 +428,6 @@ static void svm_vcpu_init_msrpm(u32 *msrpm)
#endif
set_msr_interception(msrpm, MSR_K6_STAR, 1, 1);
set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1);
- set_msr_interception(msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
- set_msr_interception(msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
}
static void svm_enable_lbrv(struct vcpu_svm *svm)
@@ -411,7 +466,6 @@ static __init int svm_hardware_setup(void)
iopm_va = page_address(iopm_pages);
memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
- clear_bit(0x80, iopm_va); /* allow direct access to PC debug port */
iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
if (boot_cpu_has(X86_FEATURE_NX))
@@ -425,7 +479,7 @@ static __init int svm_hardware_setup(void)
kvm_enable_efer_bits(EFER_SVME);
}
- for_each_online_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
r = svm_cpu_init(cpu);
if (r)
goto err;
@@ -459,7 +513,7 @@ static __exit void svm_hardware_unsetup(void)
{
int cpu;
- for_each_online_cpu(cpu)
+ for_each_possible_cpu(cpu)
svm_cpu_uninit(cpu);
__free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
@@ -574,11 +628,12 @@ static void init_vmcb(struct vcpu_svm *svm)
save->rip = 0x0000fff0;
svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
- /*
- * cr0 val on cpu init should be 0x60000010, we enable cpu
- * cache by default. the orderly way is to enable cache in bios.
+ /* This is the guest-visible cr0 value.
+ * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
*/
- save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP;
+ svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
+ kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0);
+
save->cr4 = X86_CR4_PAE;
/* rdx = ?? */
@@ -593,15 +648,20 @@ static void init_vmcb(struct vcpu_svm *svm)
control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
INTERCEPT_CR3_MASK);
save->g_pat = 0x0007040600070406ULL;
- /* enable caching because the QEMU Bios doesn't enable it */
- save->cr0 = X86_CR0_ET;
save->cr3 = 0;
save->cr4 = 0;
}
force_new_asid(&svm->vcpu);
- svm->nested_vmcb = 0;
- svm->vcpu.arch.hflags = HF_GIF_MASK;
+ svm->nested.vmcb = 0;
+ svm->vcpu.arch.hflags = 0;
+
+ if (svm_has(SVM_FEATURE_PAUSE_FILTER)) {
+ control->pause_filter_count = 3000;
+ control->intercept |= (1ULL << INTERCEPT_PAUSE);
+ }
+
+ enable_gif(svm);
}
static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -610,7 +670,7 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
init_vmcb(svm);
- if (vcpu->vcpu_id != 0) {
+ if (!kvm_vcpu_is_bsp(vcpu)) {
kvm_rip_write(vcpu, 0);
svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
@@ -661,9 +721,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
hsave_page = alloc_page(GFP_KERNEL);
if (!hsave_page)
goto uninit;
- svm->hsave = page_address(hsave_page);
+ svm->nested.hsave = page_address(hsave_page);
- svm->nested_msrpm = page_address(nested_msrpm_pages);
+ svm->nested.msrpm = page_address(nested_msrpm_pages);
svm->vmcb = page_address(page);
clear_page(svm->vmcb);
@@ -674,7 +734,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
fx_init(&svm->vcpu);
svm->vcpu.fpu_active = 1;
svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
- if (svm->vcpu.vcpu_id == 0)
+ if (kvm_vcpu_is_bsp(&svm->vcpu))
svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
return &svm->vcpu;
@@ -693,8 +753,8 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
- __free_page(virt_to_page(svm->hsave));
- __free_pages(virt_to_page(svm->nested_msrpm), MSRPM_ALLOC_ORDER);
+ __free_page(virt_to_page(svm->nested.hsave));
+ __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, svm);
}
@@ -705,17 +765,19 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
int i;
if (unlikely(cpu != vcpu->cpu)) {
- u64 tsc_this, delta;
+ u64 delta;
/*
* Make sure that the guest sees a monotonically
* increasing TSC.
*/
- rdtscll(tsc_this);
- delta = vcpu->arch.host_tsc - tsc_this;
+ delta = vcpu->arch.host_tsc - native_read_tsc();
svm->vmcb->control.tsc_offset += delta;
+ if (is_nested(svm))
+ svm->nested.hsave->control.tsc_offset += delta;
vcpu->cpu = cpu;
kvm_migrate_timers(vcpu);
+ svm->asid_generation = 0;
}
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
@@ -731,7 +793,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
- rdtscll(vcpu->arch.host_tsc);
+ vcpu->arch.host_tsc = native_read_tsc();
}
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
@@ -744,6 +806,18 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
to_svm(vcpu)->vmcb->save.rflags = rflags;
}
+static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
+{
+ switch (reg) {
+ case VCPU_EXREG_PDPTR:
+ BUG_ON(!npt_enabled);
+ load_pdptrs(vcpu, vcpu->arch.cr3);
+ break;
+ default:
+ BUG();
+ }
+}
+
static void svm_set_vintr(struct vcpu_svm *svm)
{
svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
@@ -796,6 +870,11 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
+ /* AMD's VMCB does not have an explicit unusable field, so emulate it
+ * for cross vendor migration purposes by "not present"
+ */
+ var->unusable = !var->present || (var->type == 0);
+
switch (seg) {
case VCPU_SREG_CS:
/*
@@ -826,9 +905,16 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
if (!var->unusable)
var->type |= 0x1;
break;
+ case VCPU_SREG_SS:
+ /* On AMD CPUs sometimes the DB bit in the segment
+ * descriptor is left as 1, although the whole segment has
+ * been made unusable. Clear it here to pass an Intel VMX
+ * entry check when cross vendor migrating.
+ */
+ if (var->unusable)
+ var->db = 0;
+ break;
}
-
- var->unusable = !var->present;
}
static int svm_get_cpl(struct kvm_vcpu *vcpu)
@@ -958,15 +1044,16 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
}
-static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+static void update_db_intercept(struct kvm_vcpu *vcpu)
{
- int old_debug = vcpu->guest_debug;
struct vcpu_svm *svm = to_svm(vcpu);
- vcpu->guest_debug = dbg->control;
-
svm->vmcb->control.intercept_exceptions &=
~((1 << DB_VECTOR) | (1 << BP_VECTOR));
+
+ if (svm->nmi_singlestep)
+ svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
+
if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
if (vcpu->guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
@@ -977,28 +1064,18 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
1 << BP_VECTOR;
} else
vcpu->guest_debug = 0;
+}
+
+static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
else
svm->vmcb->save.dr7 = vcpu->arch.dr7;
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
- svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
- else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
- svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
-
- return 0;
-}
-
-static int svm_get_irq(struct kvm_vcpu *vcpu)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
- u32 exit_int_info = svm->vmcb->control.exit_int_info;
-
- if (is_external_interrupt(exit_int_info))
- return exit_int_info & SVM_EVTINJ_VEC_MASK;
- return -1;
+ update_db_intercept(vcpu);
}
static void load_host_msrs(struct kvm_vcpu *vcpu)
@@ -1023,7 +1100,6 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
}
- svm->vcpu.cpu = svm_data->cpu;
svm->asid_generation = svm_data->asid_generation;
svm->vmcb->control.asid = svm_data->next_asid++;
}
@@ -1053,7 +1129,6 @@ static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
val = 0;
}
- KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
return val;
}
@@ -1062,8 +1137,6 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
{
struct vcpu_svm *svm = to_svm(vcpu);
- KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)value, handler);
-
*exception = 0;
switch (dr) {
@@ -1103,76 +1176,72 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
}
}
-static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int pf_interception(struct vcpu_svm *svm)
{
- u32 exit_int_info = svm->vmcb->control.exit_int_info;
- struct kvm *kvm = svm->vcpu.kvm;
u64 fault_address;
u32 error_code;
- bool event_injection = false;
-
- if (!irqchip_in_kernel(kvm) &&
- is_external_interrupt(exit_int_info)) {
- event_injection = true;
- push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
- }
fault_address = svm->vmcb->control.exit_info_2;
error_code = svm->vmcb->control.exit_info_1;
- if (!npt_enabled)
- KVMTRACE_3D(PAGE_FAULT, &svm->vcpu, error_code,
- (u32)fault_address, (u32)(fault_address >> 32),
- handler);
- else
- KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code,
- (u32)fault_address, (u32)(fault_address >> 32),
- handler);
- /*
- * FIXME: Tis shouldn't be necessary here, but there is a flush
- * missing in the MMU code. Until we find this bug, flush the
- * complete TLB here on an NPF
- */
- if (npt_enabled)
- svm_flush_tlb(&svm->vcpu);
-
- if (!npt_enabled && event_injection)
+ trace_kvm_page_fault(fault_address, error_code);
+ if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
}
-static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int db_interception(struct vcpu_svm *svm)
{
+ struct kvm_run *kvm_run = svm->vcpu.run;
+
if (!(svm->vcpu.guest_debug &
- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
+ !svm->nmi_singlestep) {
kvm_queue_exception(&svm->vcpu, DB_VECTOR);
return 1;
}
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
- kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
- kvm_run->debug.arch.exception = DB_VECTOR;
- return 0;
+
+ if (svm->nmi_singlestep) {
+ svm->nmi_singlestep = false;
+ if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
+ svm->vmcb->save.rflags &=
+ ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+ update_db_intercept(&svm->vcpu);
+ }
+
+ if (svm->vcpu.guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){
+ kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ kvm_run->debug.arch.pc =
+ svm->vmcb->save.cs.base + svm->vmcb->save.rip;
+ kvm_run->debug.arch.exception = DB_VECTOR;
+ return 0;
+ }
+
+ return 1;
}
-static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int bp_interception(struct vcpu_svm *svm)
{
+ struct kvm_run *kvm_run = svm->vcpu.run;
+
kvm_run->exit_reason = KVM_EXIT_DEBUG;
kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
kvm_run->debug.arch.exception = BP_VECTOR;
return 0;
}
-static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int ud_interception(struct vcpu_svm *svm)
{
int er;
- er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD);
+ er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD);
if (er != EMULATE_DONE)
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
}
-static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int nm_interception(struct vcpu_svm *svm)
{
svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
if (!(svm->vcpu.arch.cr0 & X86_CR0_TS))
@@ -1182,7 +1251,7 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
-static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int mc_interception(struct vcpu_svm *svm)
{
/*
* On an #MC intercept the MCE handler is not called automatically in
@@ -1195,8 +1264,10 @@ static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
-static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int shutdown_interception(struct vcpu_svm *svm)
{
+ struct kvm_run *kvm_run = svm->vcpu.run;
+
/*
* VMCB is undefined after a SHUTDOWN intercept
* so reinitialize it.
@@ -1208,7 +1279,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 0;
}
-static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int io_interception(struct vcpu_svm *svm)
{
u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
int size, in, string;
@@ -1222,7 +1293,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
if (string) {
if (emulate_instruction(&svm->vcpu,
- kvm_run, 0, 0, 0) == EMULATE_DO_MMIO)
+ 0, 0, 0) == EMULATE_DO_MMIO)
return 0;
return 1;
}
@@ -1232,35 +1303,33 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
skip_emulated_instruction(&svm->vcpu);
- return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
+ return kvm_emulate_pio(&svm->vcpu, in, size, port);
}
-static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int nmi_interception(struct vcpu_svm *svm)
{
- KVMTRACE_0D(NMI, &svm->vcpu, handler);
return 1;
}
-static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int intr_interception(struct vcpu_svm *svm)
{
++svm->vcpu.stat.irq_exits;
- KVMTRACE_0D(INTR, &svm->vcpu, handler);
return 1;
}
-static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int nop_on_interception(struct vcpu_svm *svm)
{
return 1;
}
-static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int halt_interception(struct vcpu_svm *svm)
{
svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
skip_emulated_instruction(&svm->vcpu);
return kvm_emulate_halt(&svm->vcpu);
}
-static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int vmmcall_interception(struct vcpu_svm *svm)
{
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu);
@@ -1287,281 +1356,306 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm)
static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
bool has_error_code, u32 error_code)
{
- if (is_nested(svm)) {
- svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
- svm->vmcb->control.exit_code_hi = 0;
- svm->vmcb->control.exit_info_1 = error_code;
- svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
- if (nested_svm_exit_handled(svm, false)) {
- nsvm_printk("VMexit -> EXCP 0x%x\n", nr);
-
- nested_svm_vmexit(svm);
- return 1;
- }
- }
+ if (!is_nested(svm))
+ return 0;
- return 0;
+ svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
+ svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_info_1 = error_code;
+ svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
+
+ return nested_svm_exit_handled(svm);
}
static inline int nested_svm_intr(struct vcpu_svm *svm)
{
- if (is_nested(svm)) {
- if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
- return 0;
+ if (!is_nested(svm))
+ return 0;
- if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
- return 0;
+ if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
+ return 0;
- svm->vmcb->control.exit_code = SVM_EXIT_INTR;
+ if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
+ return 0;
- if (nested_svm_exit_handled(svm, false)) {
- nsvm_printk("VMexit -> INTR\n");
- nested_svm_vmexit(svm);
- return 1;
- }
+ svm->vmcb->control.exit_code = SVM_EXIT_INTR;
+
+ if (svm->nested.intercept & 1ULL) {
+ /*
+ * The #vmexit can't be emulated here directly because this
+ * code path runs with irqs and preemtion disabled. A
+ * #vmexit emulation might sleep. Only signal request for
+ * the #vmexit here.
+ */
+ svm->nested.exit_required = true;
+ trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
+ return 1;
}
return 0;
}
-static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
+static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx)
{
struct page *page;
- down_read(&current->mm->mmap_sem);
page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
- up_read(&current->mm->mmap_sem);
+ if (is_error_page(page))
+ goto error;
- if (is_error_page(page)) {
- printk(KERN_INFO "%s: could not find page at 0x%llx\n",
- __func__, gpa);
- kvm_release_page_clean(page);
- kvm_inject_gp(&svm->vcpu, 0);
- return NULL;
- }
- return page;
+ return kmap_atomic(page, idx);
+
+error:
+ kvm_release_page_clean(page);
+ kvm_inject_gp(&svm->vcpu, 0);
+
+ return NULL;
}
-static int nested_svm_do(struct vcpu_svm *svm,
- u64 arg1_gpa, u64 arg2_gpa, void *opaque,
- int (*handler)(struct vcpu_svm *svm,
- void *arg1,
- void *arg2,
- void *opaque))
+static void nested_svm_unmap(void *addr, enum km_type idx)
{
- struct page *arg1_page;
- struct page *arg2_page = NULL;
- void *arg1;
- void *arg2 = NULL;
- int retval;
+ struct page *page;
- arg1_page = nested_svm_get_page(svm, arg1_gpa);
- if(arg1_page == NULL)
- return 1;
+ if (!addr)
+ return;
- if (arg2_gpa) {
- arg2_page = nested_svm_get_page(svm, arg2_gpa);
- if(arg2_page == NULL) {
- kvm_release_page_clean(arg1_page);
- return 1;
- }
- }
+ page = kmap_atomic_to_page(addr);
+
+ kunmap_atomic(addr, idx);
+ kvm_release_page_dirty(page);
+}
- arg1 = kmap_atomic(arg1_page, KM_USER0);
- if (arg2_gpa)
- arg2 = kmap_atomic(arg2_page, KM_USER1);
+static bool nested_svm_exit_handled_msr(struct vcpu_svm *svm)
+{
+ u32 param = svm->vmcb->control.exit_info_1 & 1;
+ u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
+ bool ret = false;
+ u32 t0, t1;
+ u8 *msrpm;
- retval = handler(svm, arg1, arg2, opaque);
+ if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
+ return false;
+
+ msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0);
+
+ if (!msrpm)
+ goto out;
+
+ switch (msr) {
+ case 0 ... 0x1fff:
+ t0 = (msr * 2) % 8;
+ t1 = msr / 8;
+ break;
+ case 0xc0000000 ... 0xc0001fff:
+ t0 = (8192 + msr - 0xc0000000) * 2;
+ t1 = (t0 / 8);
+ t0 %= 8;
+ break;
+ case 0xc0010000 ... 0xc0011fff:
+ t0 = (16384 + msr - 0xc0010000) * 2;
+ t1 = (t0 / 8);
+ t0 %= 8;
+ break;
+ default:
+ ret = true;
+ goto out;
+ }
- kunmap_atomic(arg1, KM_USER0);
- if (arg2_gpa)
- kunmap_atomic(arg2, KM_USER1);
+ ret = msrpm[t1] & ((1 << param) << t0);
- kvm_release_page_dirty(arg1_page);
- if (arg2_gpa)
- kvm_release_page_dirty(arg2_page);
+out:
+ nested_svm_unmap(msrpm, KM_USER0);
- return retval;
+ return ret;
}
-static int nested_svm_exit_handled_real(struct vcpu_svm *svm,
- void *arg1,
- void *arg2,
- void *opaque)
+static int nested_svm_exit_special(struct vcpu_svm *svm)
{
- struct vmcb *nested_vmcb = (struct vmcb *)arg1;
- bool kvm_overrides = *(bool *)opaque;
u32 exit_code = svm->vmcb->control.exit_code;
- if (kvm_overrides) {
- switch (exit_code) {
- case SVM_EXIT_INTR:
- case SVM_EXIT_NMI:
- return 0;
+ switch (exit_code) {
+ case SVM_EXIT_INTR:
+ case SVM_EXIT_NMI:
+ return NESTED_EXIT_HOST;
/* For now we are always handling NPFs when using them */
- case SVM_EXIT_NPF:
- if (npt_enabled)
- return 0;
- break;
- /* When we're shadowing, trap PFs */
- case SVM_EXIT_EXCP_BASE + PF_VECTOR:
- if (!npt_enabled)
- return 0;
- break;
- default:
- break;
- }
+ case SVM_EXIT_NPF:
+ if (npt_enabled)
+ return NESTED_EXIT_HOST;
+ break;
+ /* When we're shadowing, trap PFs */
+ case SVM_EXIT_EXCP_BASE + PF_VECTOR:
+ if (!npt_enabled)
+ return NESTED_EXIT_HOST;
+ break;
+ default:
+ break;
}
+ return NESTED_EXIT_CONTINUE;
+}
+
+/*
+ * If this function returns true, this #vmexit was already handled
+ */
+static int nested_svm_exit_handled(struct vcpu_svm *svm)
+{
+ u32 exit_code = svm->vmcb->control.exit_code;
+ int vmexit = NESTED_EXIT_HOST;
+
switch (exit_code) {
+ case SVM_EXIT_MSR:
+ vmexit = nested_svm_exit_handled_msr(svm);
+ break;
case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
- if (nested_vmcb->control.intercept_cr_read & cr_bits)
- return 1;
+ if (svm->nested.intercept_cr_read & cr_bits)
+ vmexit = NESTED_EXIT_DONE;
break;
}
case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
- if (nested_vmcb->control.intercept_cr_write & cr_bits)
- return 1;
+ if (svm->nested.intercept_cr_write & cr_bits)
+ vmexit = NESTED_EXIT_DONE;
break;
}
case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
- if (nested_vmcb->control.intercept_dr_read & dr_bits)
- return 1;
+ if (svm->nested.intercept_dr_read & dr_bits)
+ vmexit = NESTED_EXIT_DONE;
break;
}
case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
- if (nested_vmcb->control.intercept_dr_write & dr_bits)
- return 1;
+ if (svm->nested.intercept_dr_write & dr_bits)
+ vmexit = NESTED_EXIT_DONE;
break;
}
case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
- if (nested_vmcb->control.intercept_exceptions & excp_bits)
- return 1;
+ if (svm->nested.intercept_exceptions & excp_bits)
+ vmexit = NESTED_EXIT_DONE;
break;
}
default: {
u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
- nsvm_printk("exit code: 0x%x\n", exit_code);
- if (nested_vmcb->control.intercept & exit_bits)
- return 1;
+ if (svm->nested.intercept & exit_bits)
+ vmexit = NESTED_EXIT_DONE;
}
}
- return 0;
+ if (vmexit == NESTED_EXIT_DONE) {
+ nested_svm_vmexit(svm);
+ }
+
+ return vmexit;
+}
+
+static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
+{
+ struct vmcb_control_area *dst = &dst_vmcb->control;
+ struct vmcb_control_area *from = &from_vmcb->control;
+
+ dst->intercept_cr_read = from->intercept_cr_read;
+ dst->intercept_cr_write = from->intercept_cr_write;
+ dst->intercept_dr_read = from->intercept_dr_read;
+ dst->intercept_dr_write = from->intercept_dr_write;
+ dst->intercept_exceptions = from->intercept_exceptions;
+ dst->intercept = from->intercept;
+ dst->iopm_base_pa = from->iopm_base_pa;
+ dst->msrpm_base_pa = from->msrpm_base_pa;
+ dst->tsc_offset = from->tsc_offset;
+ dst->asid = from->asid;
+ dst->tlb_ctl = from->tlb_ctl;
+ dst->int_ctl = from->int_ctl;
+ dst->int_vector = from->int_vector;
+ dst->int_state = from->int_state;
+ dst->exit_code = from->exit_code;
+ dst->exit_code_hi = from->exit_code_hi;
+ dst->exit_info_1 = from->exit_info_1;
+ dst->exit_info_2 = from->exit_info_2;
+ dst->exit_int_info = from->exit_int_info;
+ dst->exit_int_info_err = from->exit_int_info_err;
+ dst->nested_ctl = from->nested_ctl;
+ dst->event_inj = from->event_inj;
+ dst->event_inj_err = from->event_inj_err;
+ dst->nested_cr3 = from->nested_cr3;
+ dst->lbr_ctl = from->lbr_ctl;
}
-static int nested_svm_exit_handled_msr(struct vcpu_svm *svm,
- void *arg1, void *arg2,
- void *opaque)
+static int nested_svm_vmexit(struct vcpu_svm *svm)
{
- struct vmcb *nested_vmcb = (struct vmcb *)arg1;
- u8 *msrpm = (u8 *)arg2;
- u32 t0, t1;
- u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
- u32 param = svm->vmcb->control.exit_info_1 & 1;
+ struct vmcb *nested_vmcb;
+ struct vmcb *hsave = svm->nested.hsave;
+ struct vmcb *vmcb = svm->vmcb;
- if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_MSR_PROT)))
- return 0;
+ trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
+ vmcb->control.exit_info_1,
+ vmcb->control.exit_info_2,
+ vmcb->control.exit_int_info,
+ vmcb->control.exit_int_info_err);
- switch(msr) {
- case 0 ... 0x1fff:
- t0 = (msr * 2) % 8;
- t1 = msr / 8;
- break;
- case 0xc0000000 ... 0xc0001fff:
- t0 = (8192 + msr - 0xc0000000) * 2;
- t1 = (t0 / 8);
- t0 %= 8;
- break;
- case 0xc0010000 ... 0xc0011fff:
- t0 = (16384 + msr - 0xc0010000) * 2;
- t1 = (t0 / 8);
- t0 %= 8;
- break;
- default:
- return 1;
- break;
- }
- if (msrpm[t1] & ((1 << param) << t0))
+ nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0);
+ if (!nested_vmcb)
return 1;
- return 0;
-}
+ /* Give the current vmcb to the guest */
+ disable_gif(svm);
+
+ nested_vmcb->save.es = vmcb->save.es;
+ nested_vmcb->save.cs = vmcb->save.cs;
+ nested_vmcb->save.ss = vmcb->save.ss;
+ nested_vmcb->save.ds = vmcb->save.ds;
+ nested_vmcb->save.gdtr = vmcb->save.gdtr;
+ nested_vmcb->save.idtr = vmcb->save.idtr;
+ if (npt_enabled)
+ nested_vmcb->save.cr3 = vmcb->save.cr3;
+ nested_vmcb->save.cr2 = vmcb->save.cr2;
+ nested_vmcb->save.rflags = vmcb->save.rflags;
+ nested_vmcb->save.rip = vmcb->save.rip;
+ nested_vmcb->save.rsp = vmcb->save.rsp;
+ nested_vmcb->save.rax = vmcb->save.rax;
+ nested_vmcb->save.dr7 = vmcb->save.dr7;
+ nested_vmcb->save.dr6 = vmcb->save.dr6;
+ nested_vmcb->save.cpl = vmcb->save.cpl;
+
+ nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
+ nested_vmcb->control.int_vector = vmcb->control.int_vector;
+ nested_vmcb->control.int_state = vmcb->control.int_state;
+ nested_vmcb->control.exit_code = vmcb->control.exit_code;
+ nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi;
+ nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1;
+ nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
+ nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
+ nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
-static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override)
-{
- bool k = kvm_override;
+ /*
+ * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
+ * to make sure that we do not lose injected events. So check event_inj
+ * here and copy it to exit_int_info if it is valid.
+ * Exit_int_info and event_inj can't be both valid because the case
+ * below only happens on a VMRUN instruction intercept which has
+ * no valid exit_int_info set.
+ */
+ if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
+ struct vmcb_control_area *nc = &nested_vmcb->control;
- switch (svm->vmcb->control.exit_code) {
- case SVM_EXIT_MSR:
- return nested_svm_do(svm, svm->nested_vmcb,
- svm->nested_vmcb_msrpm, NULL,
- nested_svm_exit_handled_msr);
- default: break;
- }
-
- return nested_svm_do(svm, svm->nested_vmcb, 0, &k,
- nested_svm_exit_handled_real);
-}
-
-static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1,
- void *arg2, void *opaque)
-{
- struct vmcb *nested_vmcb = (struct vmcb *)arg1;
- struct vmcb *hsave = svm->hsave;
- u64 nested_save[] = { nested_vmcb->save.cr0,
- nested_vmcb->save.cr3,
- nested_vmcb->save.cr4,
- nested_vmcb->save.efer,
- nested_vmcb->control.intercept_cr_read,
- nested_vmcb->control.intercept_cr_write,
- nested_vmcb->control.intercept_dr_read,
- nested_vmcb->control.intercept_dr_write,
- nested_vmcb->control.intercept_exceptions,
- nested_vmcb->control.intercept,
- nested_vmcb->control.msrpm_base_pa,
- nested_vmcb->control.iopm_base_pa,
- nested_vmcb->control.tsc_offset };
+ nc->exit_int_info = vmcb->control.event_inj;
+ nc->exit_int_info_err = vmcb->control.event_inj_err;
+ }
- /* Give the current vmcb to the guest */
- memcpy(nested_vmcb, svm->vmcb, sizeof(struct vmcb));
- nested_vmcb->save.cr0 = nested_save[0];
- if (!npt_enabled)
- nested_vmcb->save.cr3 = nested_save[1];
- nested_vmcb->save.cr4 = nested_save[2];
- nested_vmcb->save.efer = nested_save[3];
- nested_vmcb->control.intercept_cr_read = nested_save[4];
- nested_vmcb->control.intercept_cr_write = nested_save[5];
- nested_vmcb->control.intercept_dr_read = nested_save[6];
- nested_vmcb->control.intercept_dr_write = nested_save[7];
- nested_vmcb->control.intercept_exceptions = nested_save[8];
- nested_vmcb->control.intercept = nested_save[9];
- nested_vmcb->control.msrpm_base_pa = nested_save[10];
- nested_vmcb->control.iopm_base_pa = nested_save[11];
- nested_vmcb->control.tsc_offset = nested_save[12];
+ nested_vmcb->control.tlb_ctl = 0;
+ nested_vmcb->control.event_inj = 0;
+ nested_vmcb->control.event_inj_err = 0;
/* We always set V_INTR_MASKING and remember the old value in hflags */
if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
- if ((nested_vmcb->control.int_ctl & V_IRQ_MASK) &&
- (nested_vmcb->control.int_vector)) {
- nsvm_printk("WARNING: IRQ 0x%x still enabled on #VMEXIT\n",
- nested_vmcb->control.int_vector);
- }
-
/* Restore the original control entries */
- svm->vmcb->control = hsave->control;
+ copy_vmcb_control_area(vmcb, hsave);
- /* Kill any pending exceptions */
- if (svm->vcpu.arch.exception.pending == true)
- nsvm_printk("WARNING: Pending Exception\n");
- svm->vcpu.arch.exception.pending = false;
+ kvm_clear_exception_queue(&svm->vcpu);
+ kvm_clear_interrupt_queue(&svm->vcpu);
/* Restore selected save entries */
svm->vmcb->save.es = hsave->save.es;
@@ -1587,19 +1681,10 @@ static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1,
svm->vmcb->save.cpl = 0;
svm->vmcb->control.exit_int_info = 0;
- svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
/* Exit nested SVM mode */
- svm->nested_vmcb = 0;
+ svm->nested.vmcb = 0;
- return 0;
-}
-
-static int nested_svm_vmexit(struct vcpu_svm *svm)
-{
- nsvm_printk("VMexit\n");
- if (nested_svm_do(svm, svm->nested_vmcb, 0,
- NULL, nested_svm_vmexit_real))
- return 1;
+ nested_svm_unmap(nested_vmcb, KM_USER0);
kvm_mmu_reset_context(&svm->vcpu);
kvm_mmu_load(&svm->vcpu);
@@ -1607,38 +1692,69 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
return 0;
}
-static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1,
- void *arg2, void *opaque)
+static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
{
+ u32 *nested_msrpm;
int i;
- u32 *nested_msrpm = (u32*)arg1;
+
+ nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0);
+ if (!nested_msrpm)
+ return false;
+
for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++)
- svm->nested_msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
- svm->vmcb->control.msrpm_base_pa = __pa(svm->nested_msrpm);
+ svm->nested.msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
- return 0;
+ svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
+
+ nested_svm_unmap(nested_msrpm, KM_USER0);
+
+ return true;
}
-static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
- void *arg2, void *opaque)
+static bool nested_svm_vmrun(struct vcpu_svm *svm)
{
- struct vmcb *nested_vmcb = (struct vmcb *)arg1;
- struct vmcb *hsave = svm->hsave;
+ struct vmcb *nested_vmcb;
+ struct vmcb *hsave = svm->nested.hsave;
+ struct vmcb *vmcb = svm->vmcb;
+
+ nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0);
+ if (!nested_vmcb)
+ return false;
/* nested_vmcb is our indicator if nested SVM is activated */
- svm->nested_vmcb = svm->vmcb->save.rax;
+ svm->nested.vmcb = svm->vmcb->save.rax;
+
+ trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb,
+ nested_vmcb->save.rip,
+ nested_vmcb->control.int_ctl,
+ nested_vmcb->control.event_inj,
+ nested_vmcb->control.nested_ctl);
/* Clear internal status */
- svm->vcpu.arch.exception.pending = false;
+ kvm_clear_exception_queue(&svm->vcpu);
+ kvm_clear_interrupt_queue(&svm->vcpu);
/* Save the old vmcb, so we don't need to pick what we save, but
can restore everything when a VMEXIT occurs */
- memcpy(hsave, svm->vmcb, sizeof(struct vmcb));
- /* We need to remember the original CR3 in the SPT case */
- if (!npt_enabled)
- hsave->save.cr3 = svm->vcpu.arch.cr3;
- hsave->save.cr4 = svm->vcpu.arch.cr4;
- hsave->save.rip = svm->next_rip;
+ hsave->save.es = vmcb->save.es;
+ hsave->save.cs = vmcb->save.cs;
+ hsave->save.ss = vmcb->save.ss;
+ hsave->save.ds = vmcb->save.ds;
+ hsave->save.gdtr = vmcb->save.gdtr;
+ hsave->save.idtr = vmcb->save.idtr;
+ hsave->save.efer = svm->vcpu.arch.shadow_efer;
+ hsave->save.cr0 = svm->vcpu.arch.cr0;
+ hsave->save.cr4 = svm->vcpu.arch.cr4;
+ hsave->save.rflags = vmcb->save.rflags;
+ hsave->save.rip = svm->next_rip;
+ hsave->save.rsp = vmcb->save.rsp;
+ hsave->save.rax = vmcb->save.rax;
+ if (npt_enabled)
+ hsave->save.cr3 = vmcb->save.cr3;
+ else
+ hsave->save.cr3 = svm->vcpu.arch.cr3;
+
+ copy_vmcb_control_area(hsave, vmcb);
if (svm->vmcb->save.rflags & X86_EFLAGS_IF)
svm->vcpu.arch.hflags |= HF_HIF_MASK;
@@ -1663,7 +1779,7 @@ static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
kvm_mmu_reset_context(&svm->vcpu);
}
- svm->vmcb->save.cr2 = nested_vmcb->save.cr2;
+ svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
@@ -1690,40 +1806,37 @@ static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
- svm->nested_vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
+ svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
+
+ /* cache intercepts */
+ svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read;
+ svm->nested.intercept_cr_write = nested_vmcb->control.intercept_cr_write;
+ svm->nested.intercept_dr_read = nested_vmcb->control.intercept_dr_read;
+ svm->nested.intercept_dr_write = nested_vmcb->control.intercept_dr_write;
+ svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
+ svm->nested.intercept = nested_vmcb->control.intercept;
force_new_asid(&svm->vcpu);
- svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
- svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
- if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
- nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
- nested_vmcb->control.int_ctl);
- }
if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
svm->vcpu.arch.hflags |= HF_VINTR_MASK;
else
svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
- nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
- nested_vmcb->control.exit_int_info,
- nested_vmcb->control.int_state);
-
svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
svm->vmcb->control.int_state = nested_vmcb->control.int_state;
svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
- if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
- nsvm_printk("Injecting Event: 0x%x\n",
- nested_vmcb->control.event_inj);
svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
- svm->vcpu.arch.hflags |= HF_GIF_MASK;
+ nested_svm_unmap(nested_vmcb, KM_USER0);
- return 0;
+ enable_gif(svm);
+
+ return true;
}
-static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
+static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
{
to_vmcb->save.fs = from_vmcb->save.fs;
to_vmcb->save.gs = from_vmcb->save.gs;
@@ -1737,69 +1850,77 @@ static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
-
- return 1;
-}
-
-static int nested_svm_vmload(struct vcpu_svm *svm, void *nested_vmcb,
- void *arg2, void *opaque)
-{
- return nested_svm_vmloadsave((struct vmcb *)nested_vmcb, svm->vmcb);
}
-static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
- void *arg2, void *opaque)
+static int vmload_interception(struct vcpu_svm *svm)
{
- return nested_svm_vmloadsave(svm->vmcb, (struct vmcb *)nested_vmcb);
-}
+ struct vmcb *nested_vmcb;
-static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
-{
if (nested_svm_check_permissions(svm))
return 1;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu);
- nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmload);
+ nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0);
+ if (!nested_vmcb)
+ return 1;
+
+ nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
+ nested_svm_unmap(nested_vmcb, KM_USER0);
return 1;
}
-static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int vmsave_interception(struct vcpu_svm *svm)
{
+ struct vmcb *nested_vmcb;
+
if (nested_svm_check_permissions(svm))
return 1;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu);
- nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmsave);
+ nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0);
+ if (!nested_vmcb)
+ return 1;
+
+ nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
+ nested_svm_unmap(nested_vmcb, KM_USER0);
return 1;
}
-static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int vmrun_interception(struct vcpu_svm *svm)
{
- nsvm_printk("VMrun\n");
if (nested_svm_check_permissions(svm))
return 1;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu);
- if (nested_svm_do(svm, svm->vmcb->save.rax, 0,
- NULL, nested_svm_vmrun))
+ if (!nested_svm_vmrun(svm))
return 1;
- if (nested_svm_do(svm, svm->nested_vmcb_msrpm, 0,
- NULL, nested_svm_vmrun_msrpm))
- return 1;
+ if (!nested_svm_vmrun_msrpm(svm))
+ goto failed;
+
+ return 1;
+
+failed:
+
+ svm->vmcb->control.exit_code = SVM_EXIT_ERR;
+ svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_info_1 = 0;
+ svm->vmcb->control.exit_info_2 = 0;
+
+ nested_svm_vmexit(svm);
return 1;
}
-static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int stgi_interception(struct vcpu_svm *svm)
{
if (nested_svm_check_permissions(svm))
return 1;
@@ -1807,12 +1928,12 @@ static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu);
- svm->vcpu.arch.hflags |= HF_GIF_MASK;
+ enable_gif(svm);
return 1;
}
-static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int clgi_interception(struct vcpu_svm *svm)
{
if (nested_svm_check_permissions(svm))
return 1;
@@ -1820,7 +1941,7 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu);
- svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
+ disable_gif(svm);
/* After a CLGI no interrupts should come */
svm_clear_vintr(svm);
@@ -1829,56 +1950,126 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
-static int invalid_op_interception(struct vcpu_svm *svm,
- struct kvm_run *kvm_run)
+static int invlpga_interception(struct vcpu_svm *svm)
+{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+
+ trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
+ vcpu->arch.regs[VCPU_REGS_RAX]);
+
+ /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
+ kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
+
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+ return 1;
+}
+
+static int skinit_interception(struct vcpu_svm *svm)
{
+ trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
+
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
}
-static int task_switch_interception(struct vcpu_svm *svm,
- struct kvm_run *kvm_run)
+static int invalid_op_interception(struct vcpu_svm *svm)
+{
+ kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ return 1;
+}
+
+static int task_switch_interception(struct vcpu_svm *svm)
{
u16 tss_selector;
+ int reason;
+ int int_type = svm->vmcb->control.exit_int_info &
+ SVM_EXITINTINFO_TYPE_MASK;
+ int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
+ uint32_t type =
+ svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
+ uint32_t idt_v =
+ svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
tss_selector = (u16)svm->vmcb->control.exit_info_1;
+
if (svm->vmcb->control.exit_info_2 &
(1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
- return kvm_task_switch(&svm->vcpu, tss_selector,
- TASK_SWITCH_IRET);
- if (svm->vmcb->control.exit_info_2 &
- (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
- return kvm_task_switch(&svm->vcpu, tss_selector,
- TASK_SWITCH_JMP);
- return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL);
+ reason = TASK_SWITCH_IRET;
+ else if (svm->vmcb->control.exit_info_2 &
+ (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
+ reason = TASK_SWITCH_JMP;
+ else if (idt_v)
+ reason = TASK_SWITCH_GATE;
+ else
+ reason = TASK_SWITCH_CALL;
+
+ if (reason == TASK_SWITCH_GATE) {
+ switch (type) {
+ case SVM_EXITINTINFO_TYPE_NMI:
+ svm->vcpu.arch.nmi_injected = false;
+ break;
+ case SVM_EXITINTINFO_TYPE_EXEPT:
+ kvm_clear_exception_queue(&svm->vcpu);
+ break;
+ case SVM_EXITINTINFO_TYPE_INTR:
+ kvm_clear_interrupt_queue(&svm->vcpu);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (reason != TASK_SWITCH_GATE ||
+ int_type == SVM_EXITINTINFO_TYPE_SOFT ||
+ (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
+ (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
+ skip_emulated_instruction(&svm->vcpu);
+
+ return kvm_task_switch(&svm->vcpu, tss_selector, reason);
}
-static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int cpuid_interception(struct vcpu_svm *svm)
{
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
kvm_emulate_cpuid(&svm->vcpu);
return 1;
}
-static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int iret_interception(struct vcpu_svm *svm)
{
- if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE)
+ ++svm->vcpu.stat.nmi_window_exits;
+ svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
+ svm->vcpu.arch.hflags |= HF_IRET_MASK;
+ return 1;
+}
+
+static int invlpg_interception(struct vcpu_svm *svm)
+{
+ if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
return 1;
}
-static int emulate_on_interception(struct vcpu_svm *svm,
- struct kvm_run *kvm_run)
+static int emulate_on_interception(struct vcpu_svm *svm)
{
- if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE)
+ if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
return 1;
}
-static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int cr8_write_interception(struct vcpu_svm *svm)
{
- emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
- if (irqchip_in_kernel(svm->vcpu.kvm))
+ struct kvm_run *kvm_run = svm->vcpu.run;
+
+ u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
+ /* instruction emulation calls kvm_set_cr8() */
+ emulate_instruction(&svm->vcpu, 0, 0, 0);
+ if (irqchip_in_kernel(svm->vcpu.kvm)) {
+ svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
+ return 1;
+ }
+ if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
return 1;
kvm_run->exit_reason = KVM_EXIT_SET_TPR;
return 0;
@@ -1889,11 +2080,15 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
struct vcpu_svm *svm = to_svm(vcpu);
switch (ecx) {
- case MSR_IA32_TIME_STAMP_COUNTER: {
- u64 tsc;
+ case MSR_IA32_TSC: {
+ u64 tsc_offset;
+
+ if (is_nested(svm))
+ tsc_offset = svm->nested.hsave->control.tsc_offset;
+ else
+ tsc_offset = svm->vmcb->control.tsc_offset;
- rdtscll(tsc);
- *data = svm->vmcb->control.tsc_offset + tsc;
+ *data = tsc_offset + native_read_tsc();
break;
}
case MSR_K6_STAR:
@@ -1917,10 +2112,10 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
*data = svm->vmcb->save.sysenter_cs;
break;
case MSR_IA32_SYSENTER_EIP:
- *data = svm->vmcb->save.sysenter_eip;
+ *data = svm->sysenter_eip;
break;
case MSR_IA32_SYSENTER_ESP:
- *data = svm->vmcb->save.sysenter_esp;
+ *data = svm->sysenter_esp;
break;
/* Nobody will change the following 5 values in the VMCB so
we can safely return them on rdmsr. They will always be 0
@@ -1941,7 +2136,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
*data = svm->vmcb->save.last_excp_to;
break;
case MSR_VM_HSAVE_PA:
- *data = svm->hsave_msr;
+ *data = svm->nested.hsave_msr;
break;
case MSR_VM_CR:
*data = 0;
@@ -1955,7 +2150,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
return 0;
}
-static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int rdmsr_interception(struct vcpu_svm *svm)
{
u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
u64 data;
@@ -1963,8 +2158,7 @@ static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
if (svm_get_msr(&svm->vcpu, ecx, &data))
kvm_inject_gp(&svm->vcpu, 0);
else {
- KVMTRACE_3D(MSR_READ, &svm->vcpu, ecx, (u32)data,
- (u32)(data >> 32), handler);
+ trace_kvm_msr_read(ecx, data);
svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
@@ -1979,11 +2173,18 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
struct vcpu_svm *svm = to_svm(vcpu);
switch (ecx) {
- case MSR_IA32_TIME_STAMP_COUNTER: {
- u64 tsc;
+ case MSR_IA32_TSC: {
+ u64 tsc_offset = data - native_read_tsc();
+ u64 g_tsc_offset = 0;
+
+ if (is_nested(svm)) {
+ g_tsc_offset = svm->vmcb->control.tsc_offset -
+ svm->nested.hsave->control.tsc_offset;
+ svm->nested.hsave->control.tsc_offset = tsc_offset;
+ }
+
+ svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset;
- rdtscll(tsc);
- svm->vmcb->control.tsc_offset = data - tsc;
break;
}
case MSR_K6_STAR:
@@ -2007,9 +2208,11 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
svm->vmcb->save.sysenter_cs = data;
break;
case MSR_IA32_SYSENTER_EIP:
+ svm->sysenter_eip = data;
svm->vmcb->save.sysenter_eip = data;
break;
case MSR_IA32_SYSENTER_ESP:
+ svm->sysenter_esp = data;
svm->vmcb->save.sysenter_esp = data;
break;
case MSR_IA32_DEBUGCTLMSR:
@@ -2027,24 +2230,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
else
svm_disable_lbrv(svm);
break;
- case MSR_K7_EVNTSEL0:
- case MSR_K7_EVNTSEL1:
- case MSR_K7_EVNTSEL2:
- case MSR_K7_EVNTSEL3:
- case MSR_K7_PERFCTR0:
- case MSR_K7_PERFCTR1:
- case MSR_K7_PERFCTR2:
- case MSR_K7_PERFCTR3:
- /*
- * Just discard all writes to the performance counters; this
- * should keep both older linux and windows 64-bit guests
- * happy
- */
- pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data);
-
- break;
case MSR_VM_HSAVE_PA:
- svm->hsave_msr = data;
+ svm->nested.hsave_msr = data;
+ break;
+ case MSR_VM_CR:
+ case MSR_VM_IGNNE:
+ pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
default:
return kvm_set_msr_common(vcpu, ecx, data);
@@ -2052,14 +2243,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
return 0;
}
-static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int wrmsr_interception(struct vcpu_svm *svm)
{
u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
- KVMTRACE_3D(MSR_WRITE, &svm->vcpu, ecx, (u32)data, (u32)(data >> 32),
- handler);
+ trace_kvm_msr_write(ecx, data);
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
if (svm_set_msr(&svm->vcpu, ecx, data))
@@ -2069,18 +2259,17 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
-static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int msr_interception(struct vcpu_svm *svm)
{
if (svm->vmcb->control.exit_info_1)
- return wrmsr_interception(svm, kvm_run);
+ return wrmsr_interception(svm);
else
- return rdmsr_interception(svm, kvm_run);
+ return rdmsr_interception(svm);
}
-static int interrupt_window_interception(struct vcpu_svm *svm,
- struct kvm_run *kvm_run)
+static int interrupt_window_interception(struct vcpu_svm *svm)
{
- KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
+ struct kvm_run *kvm_run = svm->vcpu.run;
svm_clear_vintr(svm);
svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
@@ -2088,8 +2277,9 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
* If the user space waits to inject interrupts, exit as soon as
* possible
*/
- if (kvm_run->request_interrupt_window &&
- !svm->vcpu.arch.irq_summary) {
+ if (!irqchip_in_kernel(svm->vcpu.kvm) &&
+ kvm_run->request_interrupt_window &&
+ !kvm_cpu_has_interrupt(&svm->vcpu)) {
++svm->vcpu.stat.irq_window_exits;
kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
return 0;
@@ -2098,8 +2288,13 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
return 1;
}
-static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
- struct kvm_run *kvm_run) = {
+static int pause_interception(struct vcpu_svm *svm)
+{
+ kvm_vcpu_on_spin(&(svm->vcpu));
+ return 1;
+}
+
+static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_READ_CR0] = emulate_on_interception,
[SVM_EXIT_READ_CR3] = emulate_on_interception,
[SVM_EXIT_READ_CR4] = emulate_on_interception,
@@ -2132,10 +2327,12 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_VINTR] = interrupt_window_interception,
/* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */
[SVM_EXIT_CPUID] = cpuid_interception,
+ [SVM_EXIT_IRET] = iret_interception,
[SVM_EXIT_INVD] = emulate_on_interception,
+ [SVM_EXIT_PAUSE] = pause_interception,
[SVM_EXIT_HLT] = halt_interception,
[SVM_EXIT_INVLPG] = invlpg_interception,
- [SVM_EXIT_INVLPGA] = invalid_op_interception,
+ [SVM_EXIT_INVLPGA] = invlpga_interception,
[SVM_EXIT_IOIO] = io_interception,
[SVM_EXIT_MSR] = msr_interception,
[SVM_EXIT_TASK_SWITCH] = task_switch_interception,
@@ -2146,32 +2343,48 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_VMSAVE] = vmsave_interception,
[SVM_EXIT_STGI] = stgi_interception,
[SVM_EXIT_CLGI] = clgi_interception,
- [SVM_EXIT_SKINIT] = invalid_op_interception,
+ [SVM_EXIT_SKINIT] = skinit_interception,
[SVM_EXIT_WBINVD] = emulate_on_interception,
[SVM_EXIT_MONITOR] = invalid_op_interception,
[SVM_EXIT_MWAIT] = invalid_op_interception,
[SVM_EXIT_NPF] = pf_interception,
};
-static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+static int handle_exit(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct kvm_run *kvm_run = vcpu->run;
u32 exit_code = svm->vmcb->control.exit_code;
- KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip,
- (u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
+ trace_kvm_exit(exit_code, svm->vmcb->save.rip);
+
+ if (unlikely(svm->nested.exit_required)) {
+ nested_svm_vmexit(svm);
+ svm->nested.exit_required = false;
+
+ return 1;
+ }
if (is_nested(svm)) {
- nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
- exit_code, svm->vmcb->control.exit_info_1,
- svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
- if (nested_svm_exit_handled(svm, true)) {
- nested_svm_vmexit(svm);
- nsvm_printk("-> #VMEXIT\n");
+ int vmexit;
+
+ trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
+ svm->vmcb->control.exit_info_1,
+ svm->vmcb->control.exit_info_2,
+ svm->vmcb->control.exit_int_info,
+ svm->vmcb->control.exit_int_info_err);
+
+ vmexit = nested_svm_exit_special(svm);
+
+ if (vmexit == NESTED_EXIT_CONTINUE)
+ vmexit = nested_svm_exit_handled(svm);
+
+ if (vmexit == NESTED_EXIT_DONE)
return 1;
- }
}
+ svm_complete_interrupts(svm);
+
if (npt_enabled) {
int mmu_reload = 0;
if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
@@ -2180,19 +2393,12 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
}
vcpu->arch.cr0 = svm->vmcb->save.cr0;
vcpu->arch.cr3 = svm->vmcb->save.cr3;
- if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
- if (!load_pdptrs(vcpu, vcpu->arch.cr3)) {
- kvm_inject_gp(vcpu, 0);
- return 1;
- }
- }
if (mmu_reload) {
kvm_mmu_reset_context(vcpu);
kvm_mmu_load(vcpu);
}
}
- kvm_reput_irq(svm);
if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -2203,7 +2409,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
- exit_code != SVM_EXIT_NPF)
+ exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH)
printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
"exit_code 0x%x\n",
__func__, svm->vmcb->control.exit_int_info,
@@ -2216,7 +2422,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
return 0;
}
- return svm_exit_handlers[exit_code](svm, kvm_run);
+ return svm_exit_handlers[exit_code](svm);
}
static void reload_tss(struct kvm_vcpu *vcpu)
@@ -2235,17 +2441,26 @@ static void pre_svm_run(struct vcpu_svm *svm)
struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
- if (svm->vcpu.cpu != cpu ||
- svm->asid_generation != svm_data->asid_generation)
+ /* FIXME: handle wraparound of asid_generation */
+ if (svm->asid_generation != svm_data->asid_generation)
new_asid(svm, svm_data);
}
+static void svm_inject_nmi(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
+ vcpu->arch.hflags |= HF_NMI_MASK;
+ svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
+ ++vcpu->stat.nmi_injections;
+}
static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
{
struct vmcb_control_area *control;
- KVMTRACE_1D(INJ_VIRQ, &svm->vcpu, (u32)irq, handler);
+ trace_kvm_inj_virq(irq);
++svm->vcpu.stat.irq_injections;
control = &svm->vmcb->control;
@@ -2255,134 +2470,103 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
}
-static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
+static void svm_set_irq(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- nested_svm_intr(svm);
+ BUG_ON(!(gif_set(svm)));
- svm_inject_irq(svm, irq);
+ svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
+ SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
}
-static void update_cr8_intercept(struct kvm_vcpu *vcpu)
+static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct vmcb *vmcb = svm->vmcb;
- int max_irr, tpr;
-
- if (!irqchip_in_kernel(vcpu->kvm) || vcpu->arch.apic->vapic_addr)
- return;
-
- vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
- max_irr = kvm_lapic_find_highest_irr(vcpu);
- if (max_irr == -1)
+ if (irr == -1)
return;
- tpr = kvm_lapic_get_cr8(vcpu) << 4;
-
- if (tpr >= (max_irr & 0xf0))
- vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
+ if (tpr >= irr)
+ svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
}
-static void svm_intr_assist(struct kvm_vcpu *vcpu)
+static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = svm->vmcb;
- int intr_vector = -1;
-
- if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) &&
- ((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) {
- intr_vector = vmcb->control.exit_int_info &
- SVM_EVTINJ_VEC_MASK;
- vmcb->control.exit_int_info = 0;
- svm_inject_irq(svm, intr_vector);
- goto out;
- }
-
- if (vmcb->control.int_ctl & V_IRQ_MASK)
- goto out;
+ return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
+ !(svm->vcpu.arch.hflags & HF_NMI_MASK);
+}
- if (!kvm_cpu_has_interrupt(vcpu))
- goto out;
+static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
- if (nested_svm_intr(svm))
- goto out;
+ return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
+}
- if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
- goto out;
+static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
- if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
- (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
- (vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
- /* unable to deliver irq, set pending irq */
- svm_set_vintr(svm);
- svm_inject_irq(svm, 0x0);
- goto out;
+ if (masked) {
+ svm->vcpu.arch.hflags |= HF_NMI_MASK;
+ svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
+ } else {
+ svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
+ svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
}
- /* Okay, we can deliver the interrupt: grab it and update PIC state. */
- intr_vector = kvm_cpu_get_interrupt(vcpu);
- svm_inject_irq(svm, intr_vector);
-out:
- update_cr8_intercept(vcpu);
}
-static void kvm_reput_irq(struct vcpu_svm *svm)
+static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
{
- struct vmcb_control_area *control = &svm->vmcb->control;
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb *vmcb = svm->vmcb;
+ int ret;
- if ((control->int_ctl & V_IRQ_MASK)
- && !irqchip_in_kernel(svm->vcpu.kvm)) {
- control->int_ctl &= ~V_IRQ_MASK;
- push_irq(&svm->vcpu, control->int_vector);
- }
+ if (!gif_set(svm) ||
+ (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
+ return 0;
- svm->vcpu.arch.interrupt_window_open =
- !(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
- (svm->vcpu.arch.hflags & HF_GIF_MASK);
-}
+ ret = !!(vmcb->save.rflags & X86_EFLAGS_IF);
-static void svm_do_inject_vector(struct vcpu_svm *svm)
-{
- struct kvm_vcpu *vcpu = &svm->vcpu;
- int word_index = __ffs(vcpu->arch.irq_summary);
- int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
- int irq = word_index * BITS_PER_LONG + bit_index;
+ if (is_nested(svm))
+ return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
- clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
- if (!vcpu->arch.irq_pending[word_index])
- clear_bit(word_index, &vcpu->arch.irq_summary);
- svm_inject_irq(svm, irq);
+ return ret;
}
-static void do_interrupt_requests(struct kvm_vcpu *vcpu,
- struct kvm_run *kvm_run)
+static void enable_irq_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct vmcb_control_area *control = &svm->vmcb->control;
- if (nested_svm_intr(svm))
- return;
+ nested_svm_intr(svm);
- svm->vcpu.arch.interrupt_window_open =
- (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
- (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
- (svm->vcpu.arch.hflags & HF_GIF_MASK));
+ /* In case GIF=0 we can't rely on the CPU to tell us when
+ * GIF becomes 1, because that's a separate STGI/VMRUN intercept.
+ * The next time we get that intercept, this function will be
+ * called again though and we'll get the vintr intercept. */
+ if (gif_set(svm)) {
+ svm_set_vintr(svm);
+ svm_inject_irq(svm, 0x0);
+ }
+}
- if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
- /*
- * If interrupts enabled, and not blocked by sti or mov ss. Good.
- */
- svm_do_inject_vector(svm);
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
- /*
- * Interrupts blocked. Wait for unblock.
- */
- if (!svm->vcpu.arch.interrupt_window_open &&
- (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
- svm_set_vintr(svm);
- else
- svm_clear_vintr(svm);
+ if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
+ == HF_NMI_MASK)
+ return; /* IRET will cause a vm exit */
+
+ /* Something prevents NMI from been injected. Single step over
+ possible problem (IRET or exception injection or interrupt
+ shadow) */
+ svm->nmi_singlestep = true;
+ svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
+ update_db_intercept(vcpu);
}
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -2405,7 +2589,7 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
- kvm_lapic_set_tpr(vcpu, cr8);
+ kvm_set_cr8(vcpu, cr8);
}
}
@@ -2414,27 +2598,76 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
u64 cr8;
- if (!irqchip_in_kernel(vcpu->kvm))
- return;
-
cr8 = kvm_get_cr8(vcpu);
svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
}
+static void svm_complete_interrupts(struct vcpu_svm *svm)
+{
+ u8 vector;
+ int type;
+ u32 exitintinfo = svm->vmcb->control.exit_int_info;
+
+ if (svm->vcpu.arch.hflags & HF_IRET_MASK)
+ svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
+
+ svm->vcpu.arch.nmi_injected = false;
+ kvm_clear_exception_queue(&svm->vcpu);
+ kvm_clear_interrupt_queue(&svm->vcpu);
+
+ if (!(exitintinfo & SVM_EXITINTINFO_VALID))
+ return;
+
+ vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
+ type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
+
+ switch (type) {
+ case SVM_EXITINTINFO_TYPE_NMI:
+ svm->vcpu.arch.nmi_injected = true;
+ break;
+ case SVM_EXITINTINFO_TYPE_EXEPT:
+ /* In case of software exception do not reinject an exception
+ vector, but re-execute and instruction instead */
+ if (is_nested(svm))
+ break;
+ if (kvm_exception_is_soft(vector))
+ break;
+ if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
+ u32 err = svm->vmcb->control.exit_int_info_err;
+ kvm_queue_exception_e(&svm->vcpu, vector, err);
+
+ } else
+ kvm_queue_exception(&svm->vcpu, vector);
+ break;
+ case SVM_EXITINTINFO_TYPE_INTR:
+ kvm_queue_interrupt(&svm->vcpu, vector, false);
+ break;
+ default:
+ break;
+ }
+}
+
#ifdef CONFIG_X86_64
#define R "r"
#else
#define R "e"
#endif
-static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void svm_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
u16 fs_selector;
u16 gs_selector;
u16 ldt_selector;
+ /*
+ * A vmexit emulation is required before the vcpu can be executed
+ * again.
+ */
+ if (unlikely(svm->nested.exit_required))
+ return;
+
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
@@ -2447,9 +2680,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
fs_selector = kvm_read_fs();
gs_selector = kvm_read_gs();
ldt_selector = kvm_read_ldt();
- svm->host_cr2 = kvm_read_cr2();
- if (!is_nested(svm))
- svm->vmcb->save.cr2 = vcpu->arch.cr2;
+ svm->vmcb->save.cr2 = vcpu->arch.cr2;
/* required for live migration with NPT */
if (npt_enabled)
svm->vmcb->save.cr3 = vcpu->arch.cr3;
@@ -2534,8 +2765,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
- kvm_write_cr2(svm->host_cr2);
-
kvm_load_fs(fs_selector);
kvm_load_gs(gs_selector);
kvm_load_ldt(ldt_selector);
@@ -2550,6 +2779,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
sync_cr8_to_lapic(vcpu);
svm->next_rip = 0;
+
+ if (npt_enabled) {
+ vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
+ vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
+ }
}
#undef R
@@ -2615,11 +2849,69 @@ static int get_npt_level(void)
#endif
}
-static int svm_get_mt_mask_shift(void)
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
{
return 0;
}
+static const struct trace_print_flags svm_exit_reasons_str[] = {
+ { SVM_EXIT_READ_CR0, "read_cr0" },
+ { SVM_EXIT_READ_CR3, "read_cr3" },
+ { SVM_EXIT_READ_CR4, "read_cr4" },
+ { SVM_EXIT_READ_CR8, "read_cr8" },
+ { SVM_EXIT_WRITE_CR0, "write_cr0" },
+ { SVM_EXIT_WRITE_CR3, "write_cr3" },
+ { SVM_EXIT_WRITE_CR4, "write_cr4" },
+ { SVM_EXIT_WRITE_CR8, "write_cr8" },
+ { SVM_EXIT_READ_DR0, "read_dr0" },
+ { SVM_EXIT_READ_DR1, "read_dr1" },
+ { SVM_EXIT_READ_DR2, "read_dr2" },
+ { SVM_EXIT_READ_DR3, "read_dr3" },
+ { SVM_EXIT_WRITE_DR0, "write_dr0" },
+ { SVM_EXIT_WRITE_DR1, "write_dr1" },
+ { SVM_EXIT_WRITE_DR2, "write_dr2" },
+ { SVM_EXIT_WRITE_DR3, "write_dr3" },
+ { SVM_EXIT_WRITE_DR5, "write_dr5" },
+ { SVM_EXIT_WRITE_DR7, "write_dr7" },
+ { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" },
+ { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" },
+ { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" },
+ { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" },
+ { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" },
+ { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" },
+ { SVM_EXIT_INTR, "interrupt" },
+ { SVM_EXIT_NMI, "nmi" },
+ { SVM_EXIT_SMI, "smi" },
+ { SVM_EXIT_INIT, "init" },
+ { SVM_EXIT_VINTR, "vintr" },
+ { SVM_EXIT_CPUID, "cpuid" },
+ { SVM_EXIT_INVD, "invd" },
+ { SVM_EXIT_HLT, "hlt" },
+ { SVM_EXIT_INVLPG, "invlpg" },
+ { SVM_EXIT_INVLPGA, "invlpga" },
+ { SVM_EXIT_IOIO, "io" },
+ { SVM_EXIT_MSR, "msr" },
+ { SVM_EXIT_TASK_SWITCH, "task_switch" },
+ { SVM_EXIT_SHUTDOWN, "shutdown" },
+ { SVM_EXIT_VMRUN, "vmrun" },
+ { SVM_EXIT_VMMCALL, "hypercall" },
+ { SVM_EXIT_VMLOAD, "vmload" },
+ { SVM_EXIT_VMSAVE, "vmsave" },
+ { SVM_EXIT_STGI, "stgi" },
+ { SVM_EXIT_CLGI, "clgi" },
+ { SVM_EXIT_SKINIT, "skinit" },
+ { SVM_EXIT_WBINVD, "wbinvd" },
+ { SVM_EXIT_MONITOR, "monitor" },
+ { SVM_EXIT_MWAIT, "mwait" },
+ { SVM_EXIT_NPF, "npf" },
+ { -1, NULL }
+};
+
+static bool svm_gb_page_enable(void)
+{
+ return true;
+}
+
static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -2657,6 +2949,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.set_gdt = svm_set_gdt,
.get_dr = svm_get_dr,
.set_dr = svm_set_dr,
+ .cache_reg = svm_cache_reg,
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,
@@ -2665,17 +2958,26 @@ static struct kvm_x86_ops svm_x86_ops = {
.run = svm_vcpu_run,
.handle_exit = handle_exit,
.skip_emulated_instruction = skip_emulated_instruction,
+ .set_interrupt_shadow = svm_set_interrupt_shadow,
+ .get_interrupt_shadow = svm_get_interrupt_shadow,
.patch_hypercall = svm_patch_hypercall,
- .get_irq = svm_get_irq,
.set_irq = svm_set_irq,
+ .set_nmi = svm_inject_nmi,
.queue_exception = svm_queue_exception,
- .exception_injected = svm_exception_injected,
- .inject_pending_irq = svm_intr_assist,
- .inject_pending_vectors = do_interrupt_requests,
+ .interrupt_allowed = svm_interrupt_allowed,
+ .nmi_allowed = svm_nmi_allowed,
+ .get_nmi_mask = svm_get_nmi_mask,
+ .set_nmi_mask = svm_set_nmi_mask,
+ .enable_nmi_window = enable_nmi_window,
+ .enable_irq_window = enable_irq_window,
+ .update_cr8_intercept = update_cr8_intercept,
.set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level,
- .get_mt_mask_shift = svm_get_mt_mask_shift,
+ .get_mt_mask = svm_get_mt_mask,
+
+ .exit_reasons_str = svm_exit_reasons_str,
+ .gb_page_enable = svm_gb_page_enable,
};
static int __init svm_init(void)