From 510043da8582ad49d22a1e9a6b211e6ede10cd2e Mon Sep 17 00:00:00 2001 From: Dor Laor Date: Mon, 19 Feb 2007 18:25:43 +0200 Subject: KVM: Use the generic skip_emulated_instruction() in hypercall code Instead of twiddling the rip registers directly, use the skip_emulated_instruction() function to do that for us. Signed-off-by: Dor Laor Signed-off-by: Avi Kivity --- drivers/kvm/svm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 3d8ea7ac2ecc..6787f11738cf 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1078,7 +1078,8 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - vcpu->svm->vmcb->save.rip += 3; + vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 3; + skip_emulated_instruction(vcpu); return kvm_hypercall(vcpu, kvm_run); } -- cgit v1.2.3 From 46fc1477887c41c8e900f2c95485e222b9a54822 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 22 Feb 2007 19:39:30 +0200 Subject: KVM: Do not communicate to userspace through cpu registers during PIO Currently when passing the a PIO emulation request to userspace, we rely on userspace updating %rax (on 'in' instructions) and %rsi/%rdi/%rcx (on string instructions). This (a) requires two extra ioctls for getting and setting the registers and (b) is unfriendly to non-x86 archs, when they get kvm ports. So fix by doing the register fixups in the kernel and passing to userspace only an abstract description of the PIO to be done. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 1 + drivers/kvm/kvm_main.c | 48 +++++++++++++++++++++++++++++++++++++++++++++--- drivers/kvm/svm.c | 2 ++ drivers/kvm/vmx.c | 2 ++ include/linux/kvm.h | 6 +++--- 5 files changed, 53 insertions(+), 6 deletions(-) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 901b8d917b55..59cbc5b1d905 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -274,6 +274,7 @@ struct kvm_vcpu { int mmio_size; unsigned char mmio_data[8]; gpa_t mmio_phys_addr; + int pio_pending; struct { int active; diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 42be8a8f299d..ff8bcfee76e5 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1504,6 +1504,44 @@ void save_msrs(struct vmx_msr_entry *e, int n) } EXPORT_SYMBOL_GPL(save_msrs); +static void complete_pio(struct kvm_vcpu *vcpu) +{ + struct kvm_io *io = &vcpu->run->io; + long delta; + + kvm_arch_ops->cache_regs(vcpu); + + if (!io->string) { + if (io->direction == KVM_EXIT_IO_IN) + memcpy(&vcpu->regs[VCPU_REGS_RAX], &io->value, + io->size); + } else { + delta = 1; + if (io->rep) { + delta *= io->count; + /* + * The size of the register should really depend on + * current address size. + */ + vcpu->regs[VCPU_REGS_RCX] -= delta; + } + if (io->string_down) + delta = -delta; + delta *= io->size; + if (io->direction == KVM_EXIT_IO_IN) + vcpu->regs[VCPU_REGS_RDI] += delta; + else + vcpu->regs[VCPU_REGS_RSI] += delta; + } + + vcpu->pio_pending = 0; + vcpu->run->io_completed = 0; + + kvm_arch_ops->decache_regs(vcpu); + + kvm_arch_ops->skip_emulated_instruction(vcpu); +} + static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; @@ -1518,9 +1556,13 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_run->emulated = 0; } - if (kvm_run->mmio_completed) { - memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); - vcpu->mmio_read_completed = 1; + if (kvm_run->io_completed) { + if (vcpu->pio_pending) + complete_pio(vcpu); + else { + memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); + vcpu->mmio_read_completed = 1; + } } vcpu->mmio_needed = 0; diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 6787f11738cf..c35b8c83bf3f 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1037,6 +1037,7 @@ static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_run->io.size = ((io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT); kvm_run->io.string = (io_info & SVM_IOIO_STR_MASK) != 0; kvm_run->io.rep = (io_info & SVM_IOIO_REP_MASK) != 0; + kvm_run->io.count = 1; if (kvm_run->io.string) { unsigned addr_mask; @@ -1056,6 +1057,7 @@ static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } } else kvm_run->io.value = vcpu->svm->vmcb->save.rax; + vcpu->pio_pending = 1; return 0; } diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index a721b60f7385..4d5f40fcb651 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1459,12 +1459,14 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; kvm_run->io.rep = (exit_qualification & 32) != 0; kvm_run->io.port = exit_qualification >> 16; + kvm_run->io.count = 1; if (kvm_run->io.string) { if (!get_io_count(vcpu, &kvm_run->io.count)) return 1; kvm_run->io.address = vmcs_readl(GUEST_LINEAR_ADDRESS); } else kvm_run->io.value = vcpu->regs[VCPU_REGS_RAX]; /* rax */ + vcpu->pio_pending = 1; return 0; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index d88e7508ee0a..19aeb3385188 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -11,7 +11,7 @@ #include #include -#define KVM_API_VERSION 5 +#define KVM_API_VERSION 6 /* * Architectural interrupt line count, and the size of the bitmap needed @@ -53,7 +53,7 @@ enum kvm_exit_reason { struct kvm_run { /* in */ __u32 emulated; /* skip current instruction */ - __u32 mmio_completed; /* mmio request completed */ + __u32 io_completed; /* mmio/pio request completed */ __u8 request_interrupt_window; __u8 padding1[7]; @@ -80,7 +80,7 @@ struct kvm_run { __u32 error_code; } ex; /* KVM_EXIT_IO */ - struct { + struct kvm_io { #define KVM_EXIT_IO_IN 0 #define KVM_EXIT_IO_OUT 1 __u8 direction; -- cgit v1.2.3 From 06465c5a3aa9948a7b00af49cd22ed8f235cdb0f Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 28 Feb 2007 20:46:53 +0200 Subject: KVM: Handle cpuid in the kernel instead of punting to userspace KVM used to handle cpuid by letting userspace decide what values to return to the guest. We now handle cpuid completely in the kernel. We still let userspace decide which values the guest will see by having userspace set up the value table beforehand (this is necessary to allow management software to set the cpu features to the least common denominator, so that live migration can work). The motivation for the change is that kvm kernel code can be impacted by cpuid features, for example the x86 emulator. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 5 ++++ drivers/kvm/kvm_main.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/kvm/svm.c | 4 +-- drivers/kvm/vmx.c | 4 +-- include/linux/kvm.h | 18 ++++++++++++- 5 files changed, 95 insertions(+), 5 deletions(-) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 59cbc5b1d905..be3a0e7ecae4 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -55,6 +55,7 @@ #define KVM_NUM_MMU_PAGES 256 #define KVM_MIN_FREE_MMU_PAGES 5 #define KVM_REFILL_PAGES 25 +#define KVM_MAX_CPUID_ENTRIES 40 #define FX_IMAGE_SIZE 512 #define FX_IMAGE_ALIGN 16 @@ -286,6 +287,9 @@ struct kvm_vcpu { u32 ar; } tr, es, ds, fs, gs; } rmode; + + int cpuid_nent; + struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES]; }; struct kvm_memory_slot { @@ -446,6 +450,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, struct x86_emulate_ctxt; +void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); int emulate_clts(struct kvm_vcpu *vcpu); int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index ff8bcfee76e5..caec54fbf07f 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1504,6 +1504,43 @@ void save_msrs(struct vmx_msr_entry *e, int n) } EXPORT_SYMBOL_GPL(save_msrs); +void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) +{ + int i; + u32 function; + struct kvm_cpuid_entry *e, *best; + + kvm_arch_ops->cache_regs(vcpu); + function = vcpu->regs[VCPU_REGS_RAX]; + vcpu->regs[VCPU_REGS_RAX] = 0; + vcpu->regs[VCPU_REGS_RBX] = 0; + vcpu->regs[VCPU_REGS_RCX] = 0; + vcpu->regs[VCPU_REGS_RDX] = 0; + best = NULL; + for (i = 0; i < vcpu->cpuid_nent; ++i) { + e = &vcpu->cpuid_entries[i]; + if (e->function == function) { + best = e; + break; + } + /* + * Both basic or both extended? + */ + if (((e->function ^ function) & 0x80000000) == 0) + if (!best || e->function > best->function) + best = e; + } + if (best) { + vcpu->regs[VCPU_REGS_RAX] = best->eax; + vcpu->regs[VCPU_REGS_RBX] = best->ebx; + vcpu->regs[VCPU_REGS_RCX] = best->ecx; + vcpu->regs[VCPU_REGS_RDX] = best->edx; + } + kvm_arch_ops->decache_regs(vcpu); + kvm_arch_ops->skip_emulated_instruction(vcpu); +} +EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); + static void complete_pio(struct kvm_vcpu *vcpu) { struct kvm_io *io = &vcpu->run->io; @@ -2075,6 +2112,26 @@ out: return r; } +static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, + struct kvm_cpuid *cpuid, + struct kvm_cpuid_entry __user *entries) +{ + int r; + + r = -E2BIG; + if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) + goto out; + r = -EFAULT; + if (copy_from_user(&vcpu->cpuid_entries, entries, + cpuid->nent * sizeof(struct kvm_cpuid_entry))) + goto out; + vcpu->cpuid_nent = cpuid->nent; + return 0; + +out: + return r; +} + static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2181,6 +2238,18 @@ static long kvm_vcpu_ioctl(struct file *filp, case KVM_SET_MSRS: r = msr_io(vcpu, argp, do_set_msr, 0); break; + case KVM_SET_CPUID: { + struct kvm_cpuid __user *cpuid_arg = argp; + struct kvm_cpuid cpuid; + + r = -EFAULT; + if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) + goto out; + r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries); + if (r) + goto out; + break; + } default: ; } diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index c35b8c83bf3f..d4b2936479d9 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1101,8 +1101,8 @@ static int task_switch_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_r static int cpuid_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2; - kvm_run->exit_reason = KVM_EXIT_CPUID; - return 0; + kvm_emulate_cpuid(vcpu); + return 1; } static int emulate_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 4d5f40fcb651..71410a65bb90 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1585,8 +1585,8 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - kvm_run->exit_reason = KVM_EXIT_CPUID; - return 0; + kvm_emulate_cpuid(vcpu); + return 1; } static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 19aeb3385188..15e23bc06e8b 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -41,7 +41,6 @@ enum kvm_exit_reason { KVM_EXIT_UNKNOWN = 0, KVM_EXIT_EXCEPTION = 1, KVM_EXIT_IO = 2, - KVM_EXIT_CPUID = 3, KVM_EXIT_DEBUG = 4, KVM_EXIT_HLT = 5, KVM_EXIT_MMIO = 6, @@ -210,6 +209,22 @@ struct kvm_dirty_log { }; }; +struct kvm_cpuid_entry { + __u32 function; + __u32 eax; + __u32 ebx; + __u32 ecx; + __u32 edx; + __u32 padding; +}; + +/* for KVM_SET_CPUID */ +struct kvm_cpuid { + __u32 nent; + __u32 padding; + struct kvm_cpuid_entry entries[0]; +}; + #define KVMIO 0xAE /* @@ -243,5 +258,6 @@ struct kvm_dirty_log { #define KVM_DEBUG_GUEST _IOW(KVMIO, 9, struct kvm_debug_guest) #define KVM_GET_MSRS _IOWR(KVMIO, 13, struct kvm_msrs) #define KVM_SET_MSRS _IOW(KVMIO, 14, struct kvm_msrs) +#define KVM_SET_CPUID _IOW(KVMIO, 17, struct kvm_cpuid) #endif -- cgit v1.2.3 From 8eb7d334bd8e693340ee198280f7d45035cdab8c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 4 Mar 2007 14:17:08 +0200 Subject: KVM: Fold kvm_run::exit_type into kvm_run::exit_reason Currently, userspace is told about the nature of the last exit from the guest using two fields, exit_type and exit_reason, where exit_type has just two enumerations (and no need for more). So fold exit_type into exit_reason, reducing the complexity of determining what really happened. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 3 +-- drivers/kvm/svm.c | 7 +++---- drivers/kvm/vmx.c | 7 +++---- include/linux/kvm.h | 15 ++++++++------- 4 files changed, 15 insertions(+), 17 deletions(-) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index de93117f1e97..ac44df551aa8 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1608,8 +1608,7 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->mmio_needed = 0; - if (kvm_run->exit_type == KVM_EXIT_TYPE_VM_EXIT - && kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { + if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { kvm_arch_ops->cache_regs(vcpu); vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; kvm_arch_ops->decache_regs(vcpu); diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index d4b2936479d9..b09928f14219 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1298,8 +1298,6 @@ static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u32 exit_code = vcpu->svm->vmcb->control.exit_code; - kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT; - if (is_external_interrupt(vcpu->svm->vmcb->control.exit_int_info) && exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR) printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " @@ -1609,8 +1607,9 @@ again: vcpu->svm->next_rip = 0; if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) { - kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; - kvm_run->exit_reason = vcpu->svm->vmcb->control.exit_code; + kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; + kvm_run->fail_entry.hardware_entry_failure_reason + = vcpu->svm->vmcb->control.exit_code; post_kvm_run_save(vcpu, kvm_run); return 0; } diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 71410a65bb90..cf9568fbe8a5 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1922,10 +1922,10 @@ again: asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); - kvm_run->exit_type = 0; if (fail) { - kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; - kvm_run->exit_reason = vmcs_read32(VM_INSTRUCTION_ERROR); + kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; + kvm_run->fail_entry.hardware_entry_failure_reason + = vmcs_read32(VM_INSTRUCTION_ERROR); r = 0; } else { /* @@ -1935,7 +1935,6 @@ again: profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); vcpu->launched = 1; - kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT; r = kvm_handle_exit(kvm_run, vcpu); if (r > 0) { /* Give scheduler a change to reschedule. */ diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 9151ebfa22e9..57f47ef93829 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -11,7 +11,7 @@ #include #include -#define KVM_API_VERSION 7 +#define KVM_API_VERSION 8 /* * Architectural interrupt line count, and the size of the bitmap needed @@ -34,9 +34,6 @@ struct kvm_memory_region { #define KVM_MEM_LOG_DIRTY_PAGES 1UL -#define KVM_EXIT_TYPE_FAIL_ENTRY 1 -#define KVM_EXIT_TYPE_VM_EXIT 2 - enum kvm_exit_reason { KVM_EXIT_UNKNOWN = 0, KVM_EXIT_EXCEPTION = 1, @@ -47,6 +44,7 @@ enum kvm_exit_reason { KVM_EXIT_MMIO = 6, KVM_EXIT_IRQ_WINDOW_OPEN = 7, KVM_EXIT_SHUTDOWN = 8, + KVM_EXIT_FAIL_ENTRY = 9, }; /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ @@ -57,12 +55,11 @@ struct kvm_run { __u8 padding1[3]; /* out */ - __u32 exit_type; __u32 exit_reason; __u32 instruction_length; __u8 ready_for_interrupt_injection; __u8 if_flag; - __u16 padding2; + __u8 padding2[6]; /* in (pre_kvm_run), out (post_kvm_run) */ __u64 cr8; @@ -71,8 +68,12 @@ struct kvm_run { union { /* KVM_EXIT_UNKNOWN */ struct { - __u32 hardware_exit_reason; + __u64 hardware_exit_reason; } hw; + /* KVM_EXIT_FAIL_ENTRY */ + struct { + __u64 hardware_entry_failure_reason; + } fail_entry; /* KVM_EXIT_EXCEPTION */ struct { __u32 exception; -- cgit v1.2.3 From 1b19f3e61d7e1edb395dd64bf7d63621a37af8ca Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 4 Mar 2007 14:24:03 +0200 Subject: KVM: Add a special exit reason when exiting due to an interrupt This is redundant, as we also return -EINTR from the ioctl, but it allows us to examine the exit_reason field on resume without seeing old data. Signed-off-by: Avi Kivity --- drivers/kvm/svm.c | 2 ++ drivers/kvm/vmx.c | 2 ++ include/linux/kvm.h | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index b09928f14219..0311665e3c83 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1619,12 +1619,14 @@ again: if (signal_pending(current)) { ++kvm_stat.signal_exits; post_kvm_run_save(vcpu, kvm_run); + kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; } if (dm_request_for_irq_injection(vcpu, kvm_run)) { ++kvm_stat.request_irq_exits; post_kvm_run_save(vcpu, kvm_run); + kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; } kvm_resched(vcpu); diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index cf9568fbe8a5..e69bab6d811d 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1941,12 +1941,14 @@ again: if (signal_pending(current)) { ++kvm_stat.signal_exits; post_kvm_run_save(vcpu, kvm_run); + kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; } if (dm_request_for_irq_injection(vcpu, kvm_run)) { ++kvm_stat.request_irq_exits; post_kvm_run_save(vcpu, kvm_run); + kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 57f47ef93829..b3af92e7bf5d 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -11,7 +11,7 @@ #include #include -#define KVM_API_VERSION 8 +#define KVM_API_VERSION 9 /* * Architectural interrupt line count, and the size of the bitmap needed @@ -45,6 +45,7 @@ enum kvm_exit_reason { KVM_EXIT_IRQ_WINDOW_OPEN = 7, KVM_EXIT_SHUTDOWN = 8, KVM_EXIT_FAIL_ENTRY = 9, + KVM_EXIT_INTR = 10, }; /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ -- cgit v1.2.3 From 6722c51c51518af9581ab6cd9b6aec93774334a6 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 5 Mar 2007 17:45:40 +0200 Subject: KVM: Initialize the apic_base msr on svm too Older userspace didn't care, but newer userspace (with the cpuid changes) does. Signed-off-by: Avi Kivity --- drivers/kvm/svm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 0311665e3c83..2396ada23777 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -582,6 +582,9 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) init_vmcb(vcpu->svm->vmcb); fx_init(vcpu); + vcpu->apic_base = 0xfee00000 | + /*for vcpu 0*/ MSR_IA32_APICBASE_BSP | + MSR_IA32_APICBASE_ENABLE; return 0; -- cgit v1.2.3 From 039576c03c35e2f990ad9bb9c39e1bad3cd60d34 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 20 Mar 2007 12:46:50 +0200 Subject: KVM: Avoid guest virtual addresses in string pio userspace interface The current string pio interface communicates using guest virtual addresses, relying on userspace to translate addresses and to check permissions. This interface cannot fully support guest smp, as the check needs to take into account two pages at one in case an unaligned string transfer straddles a page boundary. Change the interface not to communicate guest addresses at all; instead use a buffer page (mmaped by userspace) and do transfers there. The kernel manages the virtual to physical translation and can perform the checks atomically by taking the appropriate locks. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 21 +++++- drivers/kvm/kvm_main.c | 183 +++++++++++++++++++++++++++++++++++++++++++++---- drivers/kvm/mmu.c | 9 +++ drivers/kvm/svm.c | 40 +++++------ drivers/kvm/vmx.c | 40 +++++------ include/linux/kvm.h | 11 +-- 6 files changed, 238 insertions(+), 66 deletions(-) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 1c4a581938bf..7866b34b6c96 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -74,6 +74,8 @@ #define IOPL_SHIFT 12 +#define KVM_PIO_PAGE_OFFSET 1 + /* * Address types: * @@ -220,6 +222,18 @@ enum { VCPU_SREG_LDTR, }; +struct kvm_pio_request { + unsigned long count; + int cur_count; + struct page *guest_pages[2]; + unsigned guest_page_offset; + int in; + int size; + int string; + int down; + int rep; +}; + struct kvm_vcpu { struct kvm *kvm; union { @@ -275,7 +289,8 @@ struct kvm_vcpu { int mmio_size; unsigned char mmio_data[8]; gpa_t mmio_phys_addr; - int pio_pending; + struct kvm_pio_request pio; + void *pio_data; int sigset_active; sigset_t sigset; @@ -421,6 +436,7 @@ hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa); #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva); +struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); void kvm_emulator_want_group7_invlpg(void); @@ -453,6 +469,9 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, struct x86_emulate_ctxt; +int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, + int size, unsigned long count, int string, int down, + gva_t address, int rep, unsigned port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); int emulate_clts(struct kvm_vcpu *vcpu); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index ba7f43a4459e..205998c141fb 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -346,6 +346,17 @@ static void kvm_free_physmem(struct kvm *kvm) kvm_free_physmem_slot(&kvm->memslots[i], NULL); } +static void free_pio_guest_pages(struct kvm_vcpu *vcpu) +{ + int i; + + for (i = 0; i < 2; ++i) + if (vcpu->pio.guest_pages[i]) { + __free_page(vcpu->pio.guest_pages[i]); + vcpu->pio.guest_pages[i] = NULL; + } +} + static void kvm_free_vcpu(struct kvm_vcpu *vcpu) { if (!vcpu->vmcs) @@ -357,6 +368,9 @@ static void kvm_free_vcpu(struct kvm_vcpu *vcpu) kvm_arch_ops->vcpu_free(vcpu); free_page((unsigned long)vcpu->run); vcpu->run = NULL; + free_page((unsigned long)vcpu->pio_data); + vcpu->pio_data = NULL; + free_pio_guest_pages(vcpu); } static void kvm_free_vcpus(struct kvm *kvm) @@ -1550,44 +1564,168 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); -static void complete_pio(struct kvm_vcpu *vcpu) +static int pio_copy_data(struct kvm_vcpu *vcpu) { - struct kvm_io *io = &vcpu->run->io; + void *p = vcpu->pio_data; + void *q; + unsigned bytes; + int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1; + + kvm_arch_ops->vcpu_put(vcpu); + q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE, + PAGE_KERNEL); + if (!q) { + kvm_arch_ops->vcpu_load(vcpu); + free_pio_guest_pages(vcpu); + return -ENOMEM; + } + q += vcpu->pio.guest_page_offset; + bytes = vcpu->pio.size * vcpu->pio.cur_count; + if (vcpu->pio.in) + memcpy(q, p, bytes); + else + memcpy(p, q, bytes); + q -= vcpu->pio.guest_page_offset; + vunmap(q); + kvm_arch_ops->vcpu_load(vcpu); + free_pio_guest_pages(vcpu); + return 0; +} + +static int complete_pio(struct kvm_vcpu *vcpu) +{ + struct kvm_pio_request *io = &vcpu->pio; long delta; + int r; kvm_arch_ops->cache_regs(vcpu); if (!io->string) { - if (io->direction == KVM_EXIT_IO_IN) - memcpy(&vcpu->regs[VCPU_REGS_RAX], &io->value, + if (io->in) + memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data, io->size); } else { + if (io->in) { + r = pio_copy_data(vcpu); + if (r) { + kvm_arch_ops->cache_regs(vcpu); + return r; + } + } + delta = 1; if (io->rep) { - delta *= io->count; + delta *= io->cur_count; /* * The size of the register should really depend on * current address size. */ vcpu->regs[VCPU_REGS_RCX] -= delta; } - if (io->string_down) + if (io->down) delta = -delta; delta *= io->size; - if (io->direction == KVM_EXIT_IO_IN) + if (io->in) vcpu->regs[VCPU_REGS_RDI] += delta; else vcpu->regs[VCPU_REGS_RSI] += delta; } - vcpu->pio_pending = 0; vcpu->run->io_completed = 0; kvm_arch_ops->decache_regs(vcpu); - kvm_arch_ops->skip_emulated_instruction(vcpu); + io->count -= io->cur_count; + io->cur_count = 0; + + if (!io->count) + kvm_arch_ops->skip_emulated_instruction(vcpu); + return 0; } +int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, + int size, unsigned long count, int string, int down, + gva_t address, int rep, unsigned port) +{ + unsigned now, in_page; + int i; + int nr_pages = 1; + struct page *page; + + vcpu->run->exit_reason = KVM_EXIT_IO; + vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; + vcpu->run->io.size = size; + vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; + vcpu->run->io.count = count; + vcpu->run->io.port = port; + vcpu->pio.count = count; + vcpu->pio.cur_count = count; + vcpu->pio.size = size; + vcpu->pio.in = in; + vcpu->pio.string = string; + vcpu->pio.down = down; + vcpu->pio.guest_page_offset = offset_in_page(address); + vcpu->pio.rep = rep; + + if (!string) { + kvm_arch_ops->cache_regs(vcpu); + memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); + kvm_arch_ops->decache_regs(vcpu); + return 0; + } + + if (!count) { + kvm_arch_ops->skip_emulated_instruction(vcpu); + return 1; + } + + now = min(count, PAGE_SIZE / size); + + if (!down) + in_page = PAGE_SIZE - offset_in_page(address); + else + in_page = offset_in_page(address) + size; + now = min(count, (unsigned long)in_page / size); + if (!now) { + /* + * String I/O straddles page boundary. Pin two guest pages + * so that we satisfy atomicity constraints. Do just one + * transaction to avoid complexity. + */ + nr_pages = 2; + now = 1; + } + if (down) { + /* + * String I/O in reverse. Yuck. Kill the guest, fix later. + */ + printk(KERN_ERR "kvm: guest string pio down\n"); + inject_gp(vcpu); + return 1; + } + vcpu->run->io.count = now; + vcpu->pio.cur_count = now; + + for (i = 0; i < nr_pages; ++i) { + spin_lock(&vcpu->kvm->lock); + page = gva_to_page(vcpu, address + i * PAGE_SIZE); + if (page) + get_page(page); + vcpu->pio.guest_pages[i] = page; + spin_unlock(&vcpu->kvm->lock); + if (!page) { + inject_gp(vcpu); + free_pio_guest_pages(vcpu); + return 1; + } + } + + if (!vcpu->pio.in) + return pio_copy_data(vcpu); + return 0; +} +EXPORT_SYMBOL_GPL(kvm_setup_pio); + static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; @@ -1602,9 +1740,11 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->cr8 = kvm_run->cr8; if (kvm_run->io_completed) { - if (vcpu->pio_pending) - complete_pio(vcpu); - else { + if (vcpu->pio.cur_count) { + r = complete_pio(vcpu); + if (r) + goto out; + } else { memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); vcpu->mmio_read_completed = 1; } @@ -1620,6 +1760,7 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) r = kvm_arch_ops->run(vcpu, kvm_run); +out: if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); @@ -1995,9 +2136,12 @@ static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, *type = VM_FAULT_MINOR; pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - if (pgoff != 0) + if (pgoff == 0) + page = virt_to_page(vcpu->run); + else if (pgoff == KVM_PIO_PAGE_OFFSET) + page = virt_to_page(vcpu->pio_data); + else return NOPAGE_SIGBUS; - page = virt_to_page(vcpu->run); get_page(page); return page; } @@ -2094,6 +2238,12 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) goto out_unlock; vcpu->run = page_address(page); + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + r = -ENOMEM; + if (!page) + goto out_free_run; + vcpu->pio_data = page_address(page); + vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, FX_IMAGE_ALIGN); vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; @@ -2123,6 +2273,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) out_free_vcpus: kvm_free_vcpu(vcpu); +out_free_run: + free_page((unsigned long)vcpu->run); + vcpu->run = NULL; out_unlock: mutex_unlock(&vcpu->mutex); out: @@ -2491,7 +2644,7 @@ static long kvm_dev_ioctl(struct file *filp, r = -EINVAL; if (arg) goto out; - r = PAGE_SIZE; + r = 2 * PAGE_SIZE; break; default: ; diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 2d905770fd88..4843e95e54e1 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -735,6 +735,15 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva) return gpa_to_hpa(vcpu, gpa); } +struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) +{ + gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); + + if (gpa == UNMAPPED_GVA) + return NULL; + return pfn_to_page(gpa_to_hpa(vcpu, gpa) >> PAGE_SHIFT); +} + static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) { } diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 2396ada23777..64afc5cf890d 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -984,7 +984,7 @@ static int io_get_override(struct kvm_vcpu *vcpu, return 0; } -static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, u64 *address) +static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, gva_t *address) { unsigned long addr_mask; unsigned long *reg; @@ -1028,40 +1028,38 @@ static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, u64 *address) static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u32 io_info = vcpu->svm->vmcb->control.exit_info_1; //address size bug? - int _in = io_info & SVM_IOIO_TYPE_MASK; + int size, down, in, string, rep; + unsigned port; + unsigned long count; + gva_t address = 0; ++kvm_stat.io_exits; vcpu->svm->next_rip = vcpu->svm->vmcb->control.exit_info_2; - kvm_run->exit_reason = KVM_EXIT_IO; - kvm_run->io.port = io_info >> 16; - kvm_run->io.direction = (_in) ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; - kvm_run->io.size = ((io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT); - kvm_run->io.string = (io_info & SVM_IOIO_STR_MASK) != 0; - kvm_run->io.rep = (io_info & SVM_IOIO_REP_MASK) != 0; - kvm_run->io.count = 1; + in = (io_info & SVM_IOIO_TYPE_MASK) != 0; + port = io_info >> 16; + size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; + string = (io_info & SVM_IOIO_STR_MASK) != 0; + rep = (io_info & SVM_IOIO_REP_MASK) != 0; + count = 1; + down = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; - if (kvm_run->io.string) { + if (string) { unsigned addr_mask; - addr_mask = io_adress(vcpu, _in, &kvm_run->io.address); + addr_mask = io_adress(vcpu, in, &address); if (!addr_mask) { printk(KERN_DEBUG "%s: get io address failed\n", __FUNCTION__); return 1; } - if (kvm_run->io.rep) { - kvm_run->io.count - = vcpu->regs[VCPU_REGS_RCX] & addr_mask; - kvm_run->io.string_down = (vcpu->svm->vmcb->save.rflags - & X86_EFLAGS_DF) != 0; - } - } else - kvm_run->io.value = vcpu->svm->vmcb->save.rax; - vcpu->pio_pending = 1; - return 0; + if (rep) + count = vcpu->regs[VCPU_REGS_RCX] & addr_mask; + } + return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down, + address, rep, port); } static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index e69bab6d811d..0d9bf0b36d37 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1394,7 +1394,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 0; } -static int get_io_count(struct kvm_vcpu *vcpu, u64 *count) +static int get_io_count(struct kvm_vcpu *vcpu, unsigned long *count) { u64 inst; gva_t rip; @@ -1439,35 +1439,35 @@ static int get_io_count(struct kvm_vcpu *vcpu, u64 *count) done: countr_size *= 8; *count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size)); + //printk("cx: %lx\n", vcpu->regs[VCPU_REGS_RCX]); return 1; } static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u64 exit_qualification; + int size, down, in, string, rep; + unsigned port; + unsigned long count; + gva_t address; ++kvm_stat.io_exits; exit_qualification = vmcs_read64(EXIT_QUALIFICATION); - kvm_run->exit_reason = KVM_EXIT_IO; - if (exit_qualification & 8) - kvm_run->io.direction = KVM_EXIT_IO_IN; - else - kvm_run->io.direction = KVM_EXIT_IO_OUT; - kvm_run->io.size = (exit_qualification & 7) + 1; - kvm_run->io.string = (exit_qualification & 16) != 0; - kvm_run->io.string_down - = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; - kvm_run->io.rep = (exit_qualification & 32) != 0; - kvm_run->io.port = exit_qualification >> 16; - kvm_run->io.count = 1; - if (kvm_run->io.string) { - if (!get_io_count(vcpu, &kvm_run->io.count)) + in = (exit_qualification & 8) != 0; + size = (exit_qualification & 7) + 1; + string = (exit_qualification & 16) != 0; + down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; + count = 1; + rep = (exit_qualification & 32) != 0; + port = exit_qualification >> 16; + address = 0; + if (string) { + if (rep && !get_io_count(vcpu, &count)) return 1; - kvm_run->io.address = vmcs_readl(GUEST_LINEAR_ADDRESS); - } else - kvm_run->io.value = vcpu->regs[VCPU_REGS_RAX]; /* rax */ - vcpu->pio_pending = 1; - return 0; + address = vmcs_readl(GUEST_LINEAR_ADDRESS); + } + return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down, + address, rep, port); } static void diff --git a/include/linux/kvm.h b/include/linux/kvm.h index dad90816cad8..728b24cf5d77 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -86,16 +86,9 @@ struct kvm_run { #define KVM_EXIT_IO_OUT 1 __u8 direction; __u8 size; /* bytes */ - __u8 string; - __u8 string_down; - __u8 rep; - __u8 pad; __u16 port; - __u64 count; - union { - __u64 address; - __u32 value; - }; + __u32 count; + __u64 data_offset; /* relative to kvm_run start */ } io; struct { } debug; -- cgit v1.2.3 From f6528b03f167785301908bf124db7be591e983ca Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 20 Mar 2007 18:44:51 +0200 Subject: KVM: Remove set_cr0_no_modeswitch() arch op set_cr0_no_modeswitch() was a hack to avoid corrupting segment registers. As we now cache the protected mode values on entry to real mode, this isn't an issue anymore, and it interferes with reboot (which usually _is_ a modeswitch). Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 2 -- drivers/kvm/kvm_main.c | 2 +- drivers/kvm/svm.c | 1 - drivers/kvm/vmx.c | 17 ----------------- 4 files changed, 1 insertion(+), 21 deletions(-) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index a4331da816d0..7361c45d70c9 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -383,8 +383,6 @@ struct kvm_arch_ops { void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); void (*decache_cr0_cr4_guest_bits)(struct kvm_vcpu *vcpu); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); - void (*set_cr0_no_modeswitch)(struct kvm_vcpu *vcpu, - unsigned long cr0); void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 205998c141fb..b998bc68e9f2 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1936,7 +1936,7 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); mmu_reset_needed |= vcpu->cr0 != sregs->cr0; - kvm_arch_ops->set_cr0_no_modeswitch(vcpu, sregs->cr0); + kvm_arch_ops->set_cr0(vcpu, sregs->cr0); mmu_reset_needed |= vcpu->cr4 != sregs->cr4; kvm_arch_ops->set_cr4(vcpu, sregs->cr4); diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 64afc5cf890d..d3cc1157d23b 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1716,7 +1716,6 @@ static struct kvm_arch_ops svm_arch_ops = { .get_cs_db_l_bits = svm_get_cs_db_l_bits, .decache_cr0_cr4_guest_bits = svm_decache_cr0_cr4_guest_bits, .set_cr0 = svm_set_cr0, - .set_cr0_no_modeswitch = svm_set_cr0, .set_cr3 = svm_set_cr3, .set_cr4 = svm_set_cr4, .set_efer = svm_set_efer, diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index aa7e2ba6fb5a..027a9625ef90 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -788,22 +788,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) vcpu->cr0 = cr0; } -/* - * Used when restoring the VM to avoid corrupting segment registers - */ -static void vmx_set_cr0_no_modeswitch(struct kvm_vcpu *vcpu, unsigned long cr0) -{ - if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK)) - enter_rmode(vcpu); - - vcpu->rmode.active = ((cr0 & CR0_PE_MASK) == 0); - update_exception_bitmap(vcpu); - vmcs_writel(CR0_READ_SHADOW, cr0); - vmcs_writel(GUEST_CR0, - (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); - vcpu->cr0 = cr0; -} - static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { vmcs_writel(GUEST_CR3, cr3); @@ -2069,7 +2053,6 @@ static struct kvm_arch_ops vmx_arch_ops = { .get_cs_db_l_bits = vmx_get_cs_db_l_bits, .decache_cr0_cr4_guest_bits = vmx_decache_cr0_cr4_guest_bits, .set_cr0 = vmx_set_cr0, - .set_cr0_no_modeswitch = vmx_set_cr0_no_modeswitch, .set_cr3 = vmx_set_cr3, .set_cr4 = vmx_set_cr4, #ifdef CONFIG_X86_64 -- cgit v1.2.3 From 6da63cf95f6a19fe0a302232048452c96b178e45 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 21 Mar 2007 18:11:36 +0200 Subject: KVM: Don't allow the guest to turn off the cpu cache The cpu cache is a host resource; the guest should not be able to turn it off (even for itself). Signed-off-by: Avi Kivity --- drivers/kvm/svm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index d3cc1157d23b..191bc45b83e0 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -737,8 +737,10 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } #endif vcpu->svm->cr0 = cr0; - vcpu->svm->vmcb->save.cr0 = cr0 | CR0_PG_MASK | CR0_WP_MASK; vcpu->cr0 = cr0; + cr0 |= CR0_PG_MASK | CR0_WP_MASK; + cr0 &= ~(CR0_CD_MASK | CR0_NW_MASK); + vcpu->svm->vmcb->save.cr0 = cr0; } static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) -- cgit v1.2.3 From fcd3410870049cb74bb1a3a2458cb3ec21185cd1 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 21 Mar 2007 18:14:42 +0200 Subject: KVM: Remove unused and write-only variables Trivial cleanup. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_svm.h | 2 -- drivers/kvm/svm.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/kvm_svm.h b/drivers/kvm/kvm_svm.h index 624f1ca48657..a1a9eba50d47 100644 --- a/drivers/kvm/kvm_svm.h +++ b/drivers/kvm/kvm_svm.h @@ -28,8 +28,6 @@ struct vcpu_svm { struct svm_cpu_data *svm_data; uint64_t asid_generation; - unsigned long cr0; - unsigned long cr4; unsigned long db_regs[NUM_DB_REGS]; u64 next_rip; diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 191bc45b83e0..ddc0505c3374 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -576,7 +576,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) vcpu->svm->vmcb = page_address(page); memset(vcpu->svm->vmcb, 0, PAGE_SIZE); vcpu->svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; - vcpu->svm->cr0 = 0x00000010; vcpu->svm->asid_generation = 0; memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs)); init_vmcb(vcpu->svm->vmcb); @@ -736,7 +735,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } } #endif - vcpu->svm->cr0 = cr0; vcpu->cr0 = cr0; cr0 |= CR0_PG_MASK | CR0_WP_MASK; cr0 &= ~(CR0_CD_MASK | CR0_NW_MASK); -- cgit v1.2.3 From 916ce2360fadc71d924e02403b31280112a31280 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 21 Mar 2007 19:47:00 +0100 Subject: KVM: SVM: forbid guest to execute monitor/mwait This patch forbids the guest to execute monitor/mwait instructions on SVM. This is necessary because the guest can execute these instructions if they are available even if the kvm cpuid doesn't report its existence. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- drivers/kvm/svm.c | 6 +++++- drivers/kvm/svm.h | 6 ++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index ddc0505c3374..0542d3357ce1 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -511,7 +511,9 @@ static void init_vmcb(struct vmcb *vmcb) (1ULL << INTERCEPT_VMSAVE) | (1ULL << INTERCEPT_STGI) | (1ULL << INTERCEPT_CLGI) | - (1ULL << INTERCEPT_SKINIT); + (1ULL << INTERCEPT_SKINIT) | + (1ULL << INTERCEPT_MONITOR) | + (1ULL << INTERCEPT_MWAIT); control->iopm_base_pa = iopm_base; control->msrpm_base_pa = msrpm_base; @@ -1292,6 +1294,8 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu, [SVM_EXIT_STGI] = invalid_op_interception, [SVM_EXIT_CLGI] = invalid_op_interception, [SVM_EXIT_SKINIT] = invalid_op_interception, + [SVM_EXIT_MONITOR] = invalid_op_interception, + [SVM_EXIT_MWAIT] = invalid_op_interception, }; diff --git a/drivers/kvm/svm.h b/drivers/kvm/svm.h index df731c3fb588..5e93814400ce 100644 --- a/drivers/kvm/svm.h +++ b/drivers/kvm/svm.h @@ -44,6 +44,9 @@ enum { INTERCEPT_RDTSCP, INTERCEPT_ICEBP, INTERCEPT_WBINVD, + INTERCEPT_MONITOR, + INTERCEPT_MWAIT, + INTERCEPT_MWAIT_COND, }; @@ -298,6 +301,9 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_EXIT_RDTSCP 0x087 #define SVM_EXIT_ICEBP 0x088 #define SVM_EXIT_WBINVD 0x089 +#define SVM_EXIT_MONITOR 0x08a +#define SVM_EXIT_MWAIT 0x08b +#define SVM_EXIT_MWAIT_COND 0x08c #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_ERR -1 -- cgit v1.2.3 From 0cc5064d335543a72c5ef904a3f528966fa3f2d2 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 25 Mar 2007 12:07:27 +0200 Subject: KVM: SVM: Ensure timestamp counter monotonicity When a vcpu is migrated from one cpu to another, its timestamp counter may lose its monotonic property if the host has unsynced timestamp counters. This can confuse the guest, sometimes to the point of refusing to boot. As the rdtsc instruction is rather fast on AMD processors (7-10 cycles), we can simply record the last host tsc when we drop the cpu, and adjust the vcpu tsc offset when we detect that we've migrated to a different cpu. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 1 + drivers/kvm/svm.c | 21 +++++++++++++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index f5e343cb06b0..6d0bd7aab92e 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -244,6 +244,7 @@ struct kvm_vcpu { struct mutex mutex; int cpu; int launched; + u64 host_tsc; struct kvm_run *run; int interrupt_window_open; unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 0542d3357ce1..ca2642fc091f 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -459,7 +459,6 @@ static void init_vmcb(struct vmcb *vmcb) { struct vmcb_control_area *control = &vmcb->control; struct vmcb_save_area *save = &vmcb->save; - u64 tsc; control->intercept_cr_read = INTERCEPT_CR0_MASK | INTERCEPT_CR3_MASK | @@ -517,8 +516,7 @@ static void init_vmcb(struct vmcb *vmcb) control->iopm_base_pa = iopm_base; control->msrpm_base_pa = msrpm_base; - rdtscll(tsc); - control->tsc_offset = -tsc; + control->tsc_offset = 0; control->int_ctl = V_INTR_MASKING_MASK; init_seg(&save->es); @@ -606,11 +604,26 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) static void svm_vcpu_load(struct kvm_vcpu *vcpu) { - get_cpu(); + int cpu; + + cpu = get_cpu(); + if (unlikely(cpu != vcpu->cpu)) { + u64 tsc_this, delta; + + /* + * Make sure that the guest sees a monotonically + * increasing TSC. + */ + rdtscll(tsc_this); + delta = vcpu->host_tsc - tsc_this; + vcpu->svm->vmcb->control.tsc_offset += delta; + vcpu->cpu = cpu; + } } static void svm_vcpu_put(struct kvm_vcpu *vcpu) { + rdtscll(vcpu->host_tsc); put_cpu(); } -- cgit v1.2.3 From 55bf4028342d96b21fe5dc0721b481b0bc1e81f6 Mon Sep 17 00:00:00 2001 From: Michal Piotrowski Date: Sun, 25 Mar 2007 17:59:32 +0200 Subject: KVM: Remove unused function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove unused function CC drivers/kvm/svm.o drivers/kvm/svm.c:207: warning: ‘inject_db’ defined but not used Signed-off-by: Michal Piotrowski Signed-off-by: Avi Kivity --- drivers/kvm/svm.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index ca2642fc091f..303e95957e7b 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -203,13 +203,6 @@ static void inject_ud(struct kvm_vcpu *vcpu) UD_VECTOR; } -static void inject_db(struct kvm_vcpu *vcpu) -{ - vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | - SVM_EVTINJ_TYPE_EXEPT | - DB_VECTOR; -} - static int is_page_fault(uint32_t info) { info &= SVM_EVTINJ_VEC_MASK | SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; -- cgit v1.2.3 From 80b7706e4cbaa51d65bd6fea83bd0e59856f50e9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 30 Mar 2007 17:02:14 +0300 Subject: KVM: SVM: enable LBRV virtualization if available This patch enables the virtualization of the last branch record MSRs on SVM if this feature is available in hardware. It also introduces a small and simple check feature for specific SVM extensions. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- drivers/kvm/svm.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 303e95957e7b..b7e1410e4007 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -44,6 +44,10 @@ MODULE_LICENSE("GPL"); #define KVM_EFER_LMA (1 << 10) #define KVM_EFER_LME (1 << 8) +#define SVM_FEATURE_NPT (1 << 0) +#define SVM_FEATURE_LBRV (1 << 1) +#define SVM_DEATURE_SVML (1 << 2) + unsigned long iopm_base; unsigned long msrpm_base; @@ -68,6 +72,7 @@ struct svm_cpu_data { }; static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); +static uint32_t svm_features; struct svm_init_data { int cpu; @@ -82,6 +87,11 @@ static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; #define MAX_INST_SIZE 15 +static inline u32 svm_has(u32 feat) +{ + return svm_features & feat; +} + static unsigned get_addr_size(struct kvm_vcpu *vcpu) { struct vmcb_save_area *sa = &vcpu->svm->vmcb->save; @@ -302,6 +312,7 @@ static void svm_hardware_enable(void *garbage) svm_data->asid_generation = 1; svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; svm_data->next_asid = svm_data->max_asid + 1; + svm_features = cpuid_edx(SVM_CPUID_FUNC); asm volatile ( "sgdt %0" : "=m"(gdt_descr) ); gdt = (struct desc_struct *)gdt_descr.address; @@ -511,6 +522,8 @@ static void init_vmcb(struct vmcb *vmcb) control->msrpm_base_pa = msrpm_base; control->tsc_offset = 0; control->int_ctl = V_INTR_MASKING_MASK; + if (svm_has(SVM_FEATURE_LBRV)) + control->lbr_ctl = 1ULL; init_seg(&save->es); init_seg(&save->ss); -- cgit v1.2.3 From 5008fdf5b6a31240da060c0867d8f16f08ce2384 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 2 Apr 2007 13:05:50 +0300 Subject: KVM: Use kernel-standard types Noted by Joerg Roedel. Signed-off-by: Avi Kivity --- drivers/kvm/svm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index b7e1410e4007..c9b700d87801 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -63,9 +63,9 @@ struct kvm_ldttss_desc { struct svm_cpu_data { int cpu; - uint64_t asid_generation; - uint32_t max_asid; - uint32_t next_asid; + u64 asid_generation; + u32 max_asid; + u32 next_asid; struct kvm_ldttss_desc *tss_desc; struct page *save_area; -- cgit v1.2.3 From 364b625b561b1dd74e6fa696949ae3de28999a66 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 16 Apr 2007 14:28:40 +0300 Subject: KVM: SVM: Report hardware exit reason to userspace instead of dmesg Signed-off-by: Avi Kivity --- drivers/kvm/svm.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index c9b700d87801..61ed7352e506 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1332,12 +1332,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (exit_code >= ARRAY_SIZE(svm_exit_handlers) || svm_exit_handlers[exit_code] == 0) { kvm_run->exit_reason = KVM_EXIT_UNKNOWN; - printk(KERN_ERR "%s: 0x%x @ 0x%llx cr0 0x%lx rflags 0x%llx\n", - __FUNCTION__, - exit_code, - vcpu->svm->vmcb->save.rip, - vcpu->cr0, - vcpu->svm->vmcb->save.rflags); + kvm_run->hw.hardware_exit_reason = exit_code; return 0; } -- cgit v1.2.3 From 1165f5fec18c077bdba88e7125fd41f8e3617cb4 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 19 Apr 2007 17:27:43 +0300 Subject: KVM: Per-vcpu statistics Make the exit statistics per-vcpu instead of global. This gives a 3.5% boost when running one virtual machine per core on my two socket dual core (4 cores total) machine. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 35 ++++++++++++++------------- drivers/kvm/kvm_main.c | 61 +++++++++++++++++++++++++++++++++-------------- drivers/kvm/mmu.c | 2 +- drivers/kvm/paging_tmpl.h | 2 +- drivers/kvm/svm.c | 14 +++++------ drivers/kvm/vmx.c | 18 +++++++------- 6 files changed, 79 insertions(+), 53 deletions(-) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index b9c318a9e334..d1a90c5d76ce 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -236,6 +236,22 @@ struct kvm_pio_request { int rep; }; +struct kvm_stat { + u32 pf_fixed; + u32 pf_guest; + u32 tlb_flush; + u32 invlpg; + + u32 exits; + u32 io_exits; + u32 mmio_exits; + u32 signal_exits; + u32 irq_window_exits; + u32 halt_exits; + u32 request_irq_exits; + u32 irq_exits; +}; + struct kvm_vcpu { struct kvm *kvm; union { @@ -298,6 +314,8 @@ struct kvm_vcpu { int sigset_active; sigset_t sigset; + struct kvm_stat stat; + struct { int active; u8 save_iopl; @@ -347,22 +365,6 @@ struct kvm { struct file *filp; }; -struct kvm_stat { - u32 pf_fixed; - u32 pf_guest; - u32 tlb_flush; - u32 invlpg; - - u32 exits; - u32 io_exits; - u32 mmio_exits; - u32 signal_exits; - u32 irq_window_exits; - u32 halt_exits; - u32 request_irq_exits; - u32 irq_exits; -}; - struct descriptor_table { u16 limit; unsigned long base; @@ -424,7 +426,6 @@ struct kvm_arch_ops { unsigned char *hypercall_addr); }; -extern struct kvm_stat kvm_stat; extern struct kvm_arch_ops *kvm_arch_ops; #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index f5356358acff..911c8175cc08 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -51,27 +51,27 @@ static DEFINE_SPINLOCK(kvm_lock); static LIST_HEAD(vm_list); struct kvm_arch_ops *kvm_arch_ops; -struct kvm_stat kvm_stat; -EXPORT_SYMBOL_GPL(kvm_stat); + +#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) static struct kvm_stats_debugfs_item { const char *name; - u32 *data; + int offset; struct dentry *dentry; } debugfs_entries[] = { - { "pf_fixed", &kvm_stat.pf_fixed }, - { "pf_guest", &kvm_stat.pf_guest }, - { "tlb_flush", &kvm_stat.tlb_flush }, - { "invlpg", &kvm_stat.invlpg }, - { "exits", &kvm_stat.exits }, - { "io_exits", &kvm_stat.io_exits }, - { "mmio_exits", &kvm_stat.mmio_exits }, - { "signal_exits", &kvm_stat.signal_exits }, - { "irq_window", &kvm_stat.irq_window_exits }, - { "halt_exits", &kvm_stat.halt_exits }, - { "request_irq", &kvm_stat.request_irq_exits }, - { "irq_exits", &kvm_stat.irq_exits }, - { NULL, NULL } + { "pf_fixed", STAT_OFFSET(pf_fixed) }, + { "pf_guest", STAT_OFFSET(pf_guest) }, + { "tlb_flush", STAT_OFFSET(tlb_flush) }, + { "invlpg", STAT_OFFSET(invlpg) }, + { "exits", STAT_OFFSET(exits) }, + { "io_exits", STAT_OFFSET(io_exits) }, + { "mmio_exits", STAT_OFFSET(mmio_exits) }, + { "signal_exits", STAT_OFFSET(signal_exits) }, + { "irq_window", STAT_OFFSET(irq_window_exits) }, + { "halt_exits", STAT_OFFSET(halt_exits) }, + { "request_irq", STAT_OFFSET(request_irq_exits) }, + { "irq_exits", STAT_OFFSET(irq_exits) }, + { NULL } }; static struct dentry *debugfs_dir; @@ -2930,14 +2930,39 @@ static struct notifier_block kvm_cpu_notifier = { .priority = 20, /* must be > scheduler priority */ }; +static u64 stat_get(void *_offset) +{ + unsigned offset = (long)_offset; + u64 total = 0; + struct kvm *kvm; + struct kvm_vcpu *vcpu; + int i; + + spin_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = &kvm->vcpus[i]; + total += *(u32 *)((void *)vcpu + offset); + } + spin_unlock(&kvm_lock); + return total; +} + +static void stat_set(void *offset, u64 val) +{ +} + +DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, stat_set, "%llu\n"); + static __init void kvm_init_debug(void) { struct kvm_stats_debugfs_item *p; debugfs_dir = debugfs_create_dir("kvm", NULL); for (p = debugfs_entries; p->name; ++p) - p->dentry = debugfs_create_u32(p->name, 0444, debugfs_dir, - p->data); + p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir, + (void *)(long)p->offset, + &stat_fops); } static void kvm_exit_debug(void) diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 8ccf84e3fda8..32c64f682081 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -936,7 +936,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) { - ++kvm_stat.tlb_flush; + ++vcpu->stat.tlb_flush; kvm_arch_ops->tlb_flush(vcpu); } diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index b94010dad809..73ffbffb1097 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -448,7 +448,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, if (is_io_pte(*shadow_pte)) return 1; - ++kvm_stat.pf_fixed; + ++vcpu->stat.pf_fixed; kvm_mmu_audit(vcpu, "post page fault (fixed)"); return write_pt; diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 61ed7352e506..644efc5381ad 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -914,7 +914,7 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) case EMULATE_DONE: return 1; case EMULATE_DO_MMIO: - ++kvm_stat.mmio_exits; + ++vcpu->stat.mmio_exits; kvm_run->exit_reason = KVM_EXIT_MMIO; return 0; case EMULATE_FAIL: @@ -1054,7 +1054,7 @@ static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) unsigned long count; gva_t address = 0; - ++kvm_stat.io_exits; + ++vcpu->stat.io_exits; vcpu->svm->next_rip = vcpu->svm->vmcb->control.exit_info_2; @@ -1096,7 +1096,7 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; kvm_run->exit_reason = KVM_EXIT_HLT; - ++kvm_stat.halt_exits; + ++vcpu->stat.halt_exits; return 0; } @@ -1264,7 +1264,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu, */ if (kvm_run->request_interrupt_window && !vcpu->irq_summary) { - ++kvm_stat.irq_window_exits; + ++vcpu->stat.irq_window_exits; kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; return 0; } @@ -1636,14 +1636,14 @@ again: r = handle_exit(vcpu, kvm_run); if (r > 0) { if (signal_pending(current)) { - ++kvm_stat.signal_exits; + ++vcpu->stat.signal_exits; post_kvm_run_save(vcpu, kvm_run); kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; } if (dm_request_for_irq_injection(vcpu, kvm_run)) { - ++kvm_stat.request_irq_exits; + ++vcpu->stat.request_irq_exits; post_kvm_run_save(vcpu, kvm_run); kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; @@ -1672,7 +1672,7 @@ static void svm_inject_page_fault(struct kvm_vcpu *vcpu, { uint32_t exit_int_info = vcpu->svm->vmcb->control.exit_int_info; - ++kvm_stat.pf_guest; + ++vcpu->stat.pf_guest; if (is_page_fault(exit_int_info)) { diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 37537af126d1..10845b7ff297 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1396,7 +1396,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) case EMULATE_DONE: return 1; case EMULATE_DO_MMIO: - ++kvm_stat.mmio_exits; + ++vcpu->stat.mmio_exits; kvm_run->exit_reason = KVM_EXIT_MMIO; return 0; case EMULATE_FAIL: @@ -1425,7 +1425,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_external_interrupt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - ++kvm_stat.irq_exits; + ++vcpu->stat.irq_exits; return 1; } @@ -1492,7 +1492,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) unsigned long count; gva_t address; - ++kvm_stat.io_exits; + ++vcpu->stat.io_exits; exit_qualification = vmcs_read64(EXIT_QUALIFICATION); in = (exit_qualification & 8) != 0; size = (exit_qualification & 7) + 1; @@ -1682,7 +1682,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, if (kvm_run->request_interrupt_window && !vcpu->irq_summary) { kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; - ++kvm_stat.irq_window_exits; + ++vcpu->stat.irq_window_exits; return 0; } return 1; @@ -1695,7 +1695,7 @@ static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; kvm_run->exit_reason = KVM_EXIT_HLT; - ++kvm_stat.halt_exits; + ++vcpu->stat.halt_exits; return 0; } @@ -1956,7 +1956,7 @@ again: reload_tss(); } - ++kvm_stat.exits; + ++vcpu->stat.exits; #ifdef CONFIG_X86_64 if (is_long_mode(vcpu)) { @@ -1988,14 +1988,14 @@ again: if (r > 0) { /* Give scheduler a change to reschedule. */ if (signal_pending(current)) { - ++kvm_stat.signal_exits; + ++vcpu->stat.signal_exits; post_kvm_run_save(vcpu, kvm_run); kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; } if (dm_request_for_irq_injection(vcpu, kvm_run)) { - ++kvm_stat.request_irq_exits; + ++vcpu->stat.request_irq_exits; post_kvm_run_save(vcpu, kvm_run); kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; @@ -2021,7 +2021,7 @@ static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, { u32 vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); - ++kvm_stat.pf_guest; + ++vcpu->stat.pf_guest; if (is_page_fault(vect_info)) { printk(KERN_DEBUG "inject_page_fault: " -- cgit v1.2.3 From 7807fa6ca5af2e5660a0eb3cd90276ca0c5bdfc8 Mon Sep 17 00:00:00 2001 From: Anthony Liguori Date: Mon, 23 Apr 2007 09:17:21 -0500 Subject: KVM: Lazy FPU support for SVM Avoid saving and restoring the guest fpu state on every exit. This shaves ~100 cycles off the guest/host switch. Signed-off-by: Anthony Liguori Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 2 ++ drivers/kvm/svm.c | 35 +++++++++++++++++++++++++++++++---- 2 files changed, 33 insertions(+), 4 deletions(-) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index d1a90c5d76ce..61ff085df7e6 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -63,6 +63,7 @@ #define FX_BUF_SIZE (2 * FX_IMAGE_SIZE + FX_IMAGE_ALIGN) #define DE_VECTOR 0 +#define NM_VECTOR 7 #define DF_VECTOR 8 #define TS_VECTOR 10 #define NP_VECTOR 11 @@ -301,6 +302,7 @@ struct kvm_vcpu { char fx_buf[FX_BUF_SIZE]; char *host_fx_image; char *guest_fx_image; + int fpu_active; int mmio_needed; int mmio_read_completed; diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 644efc5381ad..2a7a0390bfb1 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -587,6 +587,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) init_vmcb(vcpu->svm->vmcb); fx_init(vcpu); + vcpu->fpu_active = 1; vcpu->apic_base = 0xfee00000 | /*for vcpu 0*/ MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE; @@ -756,6 +757,11 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } } #endif + if ((vcpu->cr0 & CR0_TS_MASK) && !(cr0 & CR0_TS_MASK)) { + vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); + vcpu->fpu_active = 1; + } + vcpu->cr0 = cr0; cr0 |= CR0_PG_MASK | CR0_WP_MASK; cr0 &= ~(CR0_CD_MASK | CR0_NW_MASK); @@ -928,6 +934,16 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 0; } +static int nm_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); + if (!(vcpu->cr0 & CR0_TS_MASK)) + vcpu->svm->vmcb->save.cr0 &= ~CR0_TS_MASK; + vcpu->fpu_active = 1; + + return 1; +} + static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { /* @@ -1292,6 +1308,7 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu, [SVM_EXIT_WRITE_DR5] = emulate_on_interception, [SVM_EXIT_WRITE_DR7] = emulate_on_interception, [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, + [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, [SVM_EXIT_INTR] = nop_on_interception, [SVM_EXIT_NMI] = nop_on_interception, [SVM_EXIT_SMI] = nop_on_interception, @@ -1481,8 +1498,10 @@ again: load_db_regs(vcpu->svm->db_regs); } - fx_save(vcpu->host_fx_image); - fx_restore(vcpu->guest_fx_image); + if (vcpu->fpu_active) { + fx_save(vcpu->host_fx_image); + fx_restore(vcpu->guest_fx_image); + } asm volatile ( #ifdef CONFIG_X86_64 @@ -1593,8 +1612,10 @@ again: #endif : "cc", "memory" ); - fx_save(vcpu->guest_fx_image); - fx_restore(vcpu->host_fx_image); + if (vcpu->fpu_active) { + fx_save(vcpu->guest_fx_image); + fx_restore(vcpu->host_fx_image); + } if ((vcpu->svm->vmcb->save.dr7 & 0xff)) load_db_regs(vcpu->svm->host_db_regs); @@ -1664,6 +1685,12 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) { vcpu->svm->vmcb->save.cr3 = root; force_new_asid(vcpu); + + if (vcpu->fpu_active) { + vcpu->svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); + vcpu->svm->vmcb->save.cr0 |= CR0_TS_MASK; + vcpu->fpu_active = 0; + } } static void svm_inject_page_fault(struct kvm_vcpu *vcpu, -- cgit v1.2.3 From 25c4c2762e31a75403eca0dd59f2cab85e3a2532 Mon Sep 17 00:00:00 2001 From: Anthony Liguori Date: Fri, 27 Apr 2007 09:29:21 +0300 Subject: KVM: VMX: Properly shadow the CR0 register in the vcpu struct Set all of the host mask bits for CR0 so that we can maintain a proper shadow of CR0. This exposes CR0.TS, paving the way for lazy fpu handling. Signed-off-by: Anthony Liguori Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 2 +- drivers/kvm/kvm_main.c | 8 +++----- drivers/kvm/svm.c | 4 ++-- drivers/kvm/vmx.c | 14 ++++++++------ 4 files changed, 14 insertions(+), 14 deletions(-) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 61ff085df7e6..f99e89e185b2 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -397,7 +397,7 @@ struct kvm_arch_ops { void (*set_segment)(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); - void (*decache_cr0_cr4_guest_bits)(struct kvm_vcpu *vcpu); + void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 67554034d001..cdf0b176851d 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -510,7 +510,6 @@ EXPORT_SYMBOL_GPL(set_cr0); void lmsw(struct kvm_vcpu *vcpu, unsigned long msw) { - kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f)); } EXPORT_SYMBOL_GPL(lmsw); @@ -1117,7 +1116,6 @@ int emulate_clts(struct kvm_vcpu *vcpu) { unsigned long cr0; - kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); cr0 = vcpu->cr0 & ~CR0_TS_MASK; kvm_arch_ops->set_cr0(vcpu, cr0); return X86EMUL_CONTINUE; @@ -1318,7 +1316,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) { - kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); + kvm_arch_ops->decache_cr4_guest_bits(vcpu); switch (cr) { case 0: return vcpu->cr0; @@ -1934,7 +1932,7 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, sregs->gdt.limit = dt.limit; sregs->gdt.base = dt.base; - kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); + kvm_arch_ops->decache_cr4_guest_bits(vcpu); sregs->cr0 = vcpu->cr0; sregs->cr2 = vcpu->cr2; sregs->cr3 = vcpu->cr3; @@ -1985,7 +1983,7 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, #endif vcpu->apic_base = sregs->apic_base; - kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); + kvm_arch_ops->decache_cr4_guest_bits(vcpu); mmu_reset_needed |= vcpu->cr0 != sregs->cr0; kvm_arch_ops->set_cr0(vcpu, sregs->cr0); diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 2a7a0390bfb1..bddd0238869d 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -738,7 +738,7 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) vcpu->svm->vmcb->save.gdtr.base = dt->base ; } -static void svm_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu) +static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) { } @@ -1759,7 +1759,7 @@ static struct kvm_arch_ops svm_arch_ops = { .get_segment = svm_get_segment, .set_segment = svm_set_segment, .get_cs_db_l_bits = svm_get_cs_db_l_bits, - .decache_cr0_cr4_guest_bits = svm_decache_cr0_cr4_guest_bits, + .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, .set_cr0 = svm_set_cr0, .set_cr3 = svm_set_cr3, .set_cr4 = svm_set_cr4, diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index d28c848138ce..09608114e29a 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -810,11 +810,8 @@ static void exit_lmode(struct kvm_vcpu *vcpu) #endif -static void vmx_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu) +static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) { - vcpu->cr0 &= KVM_GUEST_CR0_MASK; - vcpu->cr0 |= vmcs_readl(GUEST_CR0) & ~KVM_GUEST_CR0_MASK; - vcpu->cr4 &= KVM_GUEST_CR4_MASK; vcpu->cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; } @@ -1205,7 +1202,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) vmcs_writel(TPR_THRESHOLD, 0); #endif - vmcs_writel(CR0_GUEST_HOST_MASK, KVM_GUEST_CR0_MASK); + vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); vcpu->cr0 = 0x60000010; @@ -1557,6 +1554,11 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; }; break; + case 2: /* clts */ + vcpu_load_rsp_rip(vcpu); + set_cr0(vcpu, vcpu->cr0 & ~CR0_TS_MASK); + skip_emulated_instruction(vcpu); + return 1; case 1: /*mov from cr*/ switch (cr) { case 3: @@ -2112,7 +2114,7 @@ static struct kvm_arch_ops vmx_arch_ops = { .get_segment = vmx_get_segment, .set_segment = vmx_set_segment, .get_cs_db_l_bits = vmx_get_cs_db_l_bits, - .decache_cr0_cr4_guest_bits = vmx_decache_cr0_cr4_guest_bits, + .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, .set_cr0 = vmx_set_cr0, .set_cr3 = vmx_set_cr3, .set_cr4 = vmx_set_cr4, -- cgit v1.2.3 From 94dfbdb3894eda2f673b70e20da2743c4a8d3968 Mon Sep 17 00:00:00 2001 From: Anthony Liguori Date: Sun, 29 Apr 2007 11:56:06 +0300 Subject: KVM: SVM: Only save/restore MSRs when needed We only have to save/restore MSR_GS_BASE on every VMEXIT. The rest can be saved/restored when we leave the VCPU. Since we don't emulate the DEBUGCTL MSRs and the guest cannot write to them, we don't have to worry about saving/restoring them at all. This shaves a whopping 40% off raw vmexit costs on AMD. Signed-off-by: Anthony Liguori Signed-off-by: Avi Kivity --- drivers/kvm/kvm_svm.h | 11 +++++------ drivers/kvm/svm.c | 26 +++++++++++++++----------- 2 files changed, 20 insertions(+), 17 deletions(-) (limited to 'drivers/kvm/svm.c') diff --git a/drivers/kvm/kvm_svm.h b/drivers/kvm/kvm_svm.h index a1a9eba50d47..a869983d683d 100644 --- a/drivers/kvm/kvm_svm.h +++ b/drivers/kvm/kvm_svm.h @@ -9,17 +9,15 @@ #include "svm.h" #include "kvm.h" -static const u32 host_save_msrs[] = { +static const u32 host_save_user_msrs[] = { #ifdef CONFIG_X86_64 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, - MSR_FS_BASE, MSR_GS_BASE, + MSR_FS_BASE, #endif MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, - MSR_IA32_DEBUGCTLMSR, /*MSR_IA32_LASTBRANCHFROMIP, - MSR_IA32_LASTBRANCHTOIP, MSR_IA32_LASTINTFROMIP,MSR_IA32_LASTINTTOIP,*/ }; -#define NR_HOST_SAVE_MSRS ARRAY_SIZE(host_save_msrs) +#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) #define NUM_DB_REGS 4 struct vcpu_svm { @@ -32,7 +30,8 @@ struct vcpu_svm { u64 next_rip; - u64 host_msrs[NR_HOST_SAVE_MSRS]; + u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; + u64 host_gs_base; unsigned long host_cr2; unsigned long host_db_regs[NUM_DB_REGS]; unsigned long host_dr6; diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index bddd0238869d..9c15f32eea18 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -522,8 +522,6 @@ static void init_vmcb(struct vmcb *vmcb) control->msrpm_base_pa = msrpm_base; control->tsc_offset = 0; control->int_ctl = V_INTR_MASKING_MASK; - if (svm_has(SVM_FEATURE_LBRV)) - control->lbr_ctl = 1ULL; init_seg(&save->es); init_seg(&save->ss); @@ -611,7 +609,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) static void svm_vcpu_load(struct kvm_vcpu *vcpu) { - int cpu; + int cpu, i; cpu = get_cpu(); if (unlikely(cpu != vcpu->cpu)) { @@ -626,10 +624,18 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu) vcpu->svm->vmcb->control.tsc_offset += delta; vcpu->cpu = cpu; } + + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) + rdmsrl(host_save_user_msrs[i], vcpu->svm->host_user_msrs[i]); } static void svm_vcpu_put(struct kvm_vcpu *vcpu) { + int i; + + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) + wrmsrl(host_save_user_msrs[i], vcpu->svm->host_user_msrs[i]); + rdtscll(vcpu->host_tsc); put_cpu(); } @@ -815,18 +821,16 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) static void load_host_msrs(struct kvm_vcpu *vcpu) { - int i; - - for ( i = 0; i < NR_HOST_SAVE_MSRS; i++) - wrmsrl(host_save_msrs[i], vcpu->svm->host_msrs[i]); +#ifdef CONFIG_X86_64 + wrmsrl(MSR_GS_BASE, vcpu->svm->host_gs_base); +#endif } static void save_host_msrs(struct kvm_vcpu *vcpu) { - int i; - - for ( i = 0; i < NR_HOST_SAVE_MSRS; i++) - rdmsrl(host_save_msrs[i], vcpu->svm->host_msrs[i]); +#ifdef CONFIG_X86_64 + rdmsrl(MSR_GS_BASE, vcpu->svm->host_gs_base); +#endif } static void new_asid(struct kvm_vcpu *vcpu, struct svm_cpu_data *svm_data) -- cgit v1.2.3