diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/cpufeatures.h | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 9 | ||||
-rw-r--r-- | arch/x86/include/asm/reboot.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/virtext.h | 16 | ||||
-rw-r--r-- | arch/x86/include/asm/xen/hypervisor.h | 4 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/kvm.h | 5 | ||||
-rw-r--r-- | arch/x86/kernel/crash.c | 17 | ||||
-rw-r--r-- | arch/x86/kernel/reboot.c | 88 | ||||
-rw-r--r-- | arch/x86/kernel/smp.c | 6 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.c | 31 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 6 | ||||
-rw-r--r-- | arch/x86/kvm/hyperv.c | 55 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 45 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu_internal.h | 14 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/paging_tmpl.h | 13 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.c | 6 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.h | 16 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/tdp_iter.c | 11 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/tdp_mmu.c | 6 | ||||
-rw-r--r-- | arch/x86/kvm/pmu.c | 3 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 9 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 34 | ||||
-rw-r--r-- | arch/x86/kvm/xen.c | 26 | ||||
-rw-r--r-- | arch/x86/kvm/xen.h | 7 |
24 files changed, 276 insertions, 156 deletions
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 61012476d66e..cdb7e1492311 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -312,6 +312,9 @@ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ #define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */ +#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */ +#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */ +#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */ #define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */ #define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index adb92fc4d7c9..37983871ed61 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -679,6 +679,11 @@ struct kvm_vcpu_hv { } nested; }; +struct kvm_hypervisor_cpuid { + u32 base; + u32 limit; +}; + /* Xen HVM per vcpu emulation context */ struct kvm_vcpu_xen { u64 hypercall_rip; @@ -699,6 +704,7 @@ struct kvm_vcpu_xen { struct hrtimer timer; int poll_evtchn; struct timer_list poll_timer; + struct kvm_hypervisor_cpuid cpuid; }; struct kvm_queued_exception { @@ -827,7 +833,7 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 *cpuid_entries; - u32 kvm_cpuid_base; + struct kvm_hypervisor_cpuid kvm_cpuid; u64 reserved_gpa_bits; int maxphyaddr; @@ -1340,7 +1346,6 @@ struct kvm_arch { u32 bsp_vcpu_id; u64 disabled_quirks; - int cpu_dirty_logging_count; enum kvm_irqchip_mode irqchip_mode; u8 nr_reserved_ioapic_pins; diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 04c17be9b5fd..bc5b4d788c08 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -25,6 +25,8 @@ void __noreturn machine_real_restart(unsigned int type); #define MRR_BIOS 0 #define MRR_APM 1 +void cpu_emergency_disable_virtualization(void); + typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); void nmi_panic_self_stop(struct pt_regs *regs); void nmi_shootdown_cpus(nmi_shootdown_cb callback); diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index 8757078d4442..3b12e6b99412 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -126,7 +126,21 @@ static inline void cpu_svm_disable(void) wrmsrl(MSR_VM_HSAVE_PA, 0); rdmsrl(MSR_EFER, efer); - wrmsrl(MSR_EFER, efer & ~EFER_SVME); + if (efer & EFER_SVME) { + /* + * Force GIF=1 prior to disabling SVM to ensure INIT and NMI + * aren't blocked, e.g. if a fatal error occurred between CLGI + * and STGI. Note, STGI may #UD if SVM is disabled from NMI + * context between reading EFER and executing STGI. In that + * case, GIF must already be set, otherwise the NMI would have + * been blocked, so just eat the fault. + */ + asm_volatile_goto("1: stgi\n\t" + _ASM_EXTABLE(1b, %l[fault]) + ::: "memory" : fault); +fault: + wrmsrl(MSR_EFER, efer & ~EFER_SVME); + } } /** Makes sure SVM is disabled, if it is supported on the CPU diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 16f548a661cf..5fc35f889cd1 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -38,9 +38,11 @@ extern struct start_info *xen_start_info; #include <asm/processor.h> +#define XEN_SIGNATURE "XenVMMXenVMM" + static inline uint32_t xen_cpuid_base(void) { - return hypervisor_cpuid_base("XenVMMXenVMM", 2); + return hypervisor_cpuid_base(XEN_SIGNATURE, 2); } struct pci_dev; diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index f142f3ebf4e4..7f467fe05d42 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -9,6 +9,7 @@ #include <linux/types.h> #include <linux/ioctl.h> +#include <linux/stddef.h> #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 @@ -507,8 +508,8 @@ struct kvm_nested_state { * KVM_{GET,PUT}_NESTED_STATE ioctl values. */ union { - struct kvm_vmx_nested_state_data vmx[0]; - struct kvm_svm_nested_state_data svm[0]; + __DECLARE_FLEX_ARRAY(struct kvm_vmx_nested_state_data, vmx); + __DECLARE_FLEX_ARRAY(struct kvm_svm_nested_state_data, svm); } data; }; diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 305514431f26..cdd92ab43cda 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -37,7 +37,6 @@ #include <linux/kdebug.h> #include <asm/cpu.h> #include <asm/reboot.h> -#include <asm/virtext.h> #include <asm/intel_pt.h> #include <asm/crash.h> #include <asm/cmdline.h> @@ -81,15 +80,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs) */ cpu_crash_vmclear_loaded_vmcss(); - /* Disable VMX or SVM if needed. - * - * We need to disable virtualization on all CPUs. - * Having VMX or SVM enabled on any CPU may break rebooting - * after the kdump kernel has finished its task. - */ - cpu_emergency_vmxoff(); - cpu_emergency_svm_disable(); - /* * Disable Intel PT to stop its logging */ @@ -148,12 +138,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) */ cpu_crash_vmclear_loaded_vmcss(); - /* Booting kdump kernel with VMX or SVM enabled won't work, - * because (among other limitations) we can't disable paging - * with the virt flags. - */ - cpu_emergency_vmxoff(); - cpu_emergency_svm_disable(); + cpu_emergency_disable_virtualization(); /* * Disable Intel PT to stop its logging diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index c3636ea4aa71..d03c551defcc 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -528,33 +528,29 @@ static inline void kb_wait(void) } } -static void vmxoff_nmi(int cpu, struct pt_regs *regs) -{ - cpu_emergency_vmxoff(); -} +static inline void nmi_shootdown_cpus_on_restart(void); -/* Use NMIs as IPIs to tell all CPUs to disable virtualization */ -static void emergency_vmx_disable_all(void) +static void emergency_reboot_disable_virtualization(void) { /* Just make sure we won't change CPUs while doing this */ local_irq_disable(); /* - * Disable VMX on all CPUs before rebooting, otherwise we risk hanging - * the machine, because the CPU blocks INIT when it's in VMX root. + * Disable virtualization on all CPUs before rebooting to avoid hanging + * the system, as VMX and SVM block INIT when running in the host. * * We can't take any locks and we may be on an inconsistent state, so - * use NMIs as IPIs to tell the other CPUs to exit VMX root and halt. + * use NMIs as IPIs to tell the other CPUs to disable VMX/SVM and halt. * - * Do the NMI shootdown even if VMX if off on _this_ CPU, as that - * doesn't prevent a different CPU from being in VMX root operation. + * Do the NMI shootdown even if virtualization is off on _this_ CPU, as + * other CPUs may have virtualization enabled. */ - if (cpu_has_vmx()) { - /* Safely force _this_ CPU out of VMX root operation. */ - __cpu_emergency_vmxoff(); + if (cpu_has_vmx() || cpu_has_svm(NULL)) { + /* Safely force _this_ CPU out of VMX/SVM operation. */ + cpu_emergency_disable_virtualization(); - /* Halt and exit VMX root operation on the other CPUs. */ - nmi_shootdown_cpus(vmxoff_nmi); + /* Disable VMX/SVM and halt on other CPUs. */ + nmi_shootdown_cpus_on_restart(); } } @@ -590,7 +586,7 @@ static void native_machine_emergency_restart(void) unsigned short mode; if (reboot_emergency) - emergency_vmx_disable_all(); + emergency_reboot_disable_virtualization(); tboot_shutdown(TB_SHUTDOWN_REBOOT); @@ -795,6 +791,17 @@ void machine_crash_shutdown(struct pt_regs *regs) /* This is the CPU performing the emergency shutdown work. */ int crashing_cpu = -1; +/* + * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during + * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if + * GIF=0, i.e. if the crash occurred between CLGI and STGI. + */ +void cpu_emergency_disable_virtualization(void) +{ + cpu_emergency_vmxoff(); + cpu_emergency_svm_disable(); +} + #if defined(CONFIG_SMP) static nmi_shootdown_cb shootdown_callback; @@ -817,7 +824,14 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) return NMI_HANDLED; local_irq_disable(); - shootdown_callback(cpu, regs); + if (shootdown_callback) + shootdown_callback(cpu, regs); + + /* + * Prepare the CPU for reboot _after_ invoking the callback so that the + * callback can safely use virtualization instructions, e.g. VMCLEAR. + */ + cpu_emergency_disable_virtualization(); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ @@ -828,18 +842,32 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) return NMI_HANDLED; } -/* - * Halt all other CPUs, calling the specified function on each of them +/** + * nmi_shootdown_cpus - Stop other CPUs via NMI + * @callback: Optional callback to be invoked from the NMI handler + * + * The NMI handler on the remote CPUs invokes @callback, if not + * NULL, first and then disables virtualization to ensure that + * INIT is recognized during reboot. * - * This function can be used to halt all other CPUs on crash - * or emergency reboot time. The function passed as parameter - * will be called inside a NMI handler on all CPUs. + * nmi_shootdown_cpus() can only be invoked once. After the first + * invocation all other CPUs are stuck in crash_nmi_callback() and + * cannot respond to a second NMI. */ void nmi_shootdown_cpus(nmi_shootdown_cb callback) { unsigned long msecs; + local_irq_disable(); + /* + * Avoid certain doom if a shootdown already occurred; re-registering + * the NMI handler will cause list corruption, modifying the callback + * will do who knows what, etc... + */ + if (WARN_ON_ONCE(crash_ipi_issued)) + return; + /* Make a note of crashing cpu. Will be used in NMI callback. */ crashing_cpu = safe_smp_processor_id(); @@ -867,7 +895,17 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) msecs--; } - /* Leave the nmi callback set */ + /* + * Leave the nmi callback set, shootdown is a one-time thing. Clearing + * the callback could result in a NULL pointer dereference if a CPU + * (finally) responds after the timeout expires. + */ +} + +static inline void nmi_shootdown_cpus_on_restart(void) +{ + if (!crash_ipi_issued) + nmi_shootdown_cpus(NULL); } /* @@ -897,6 +935,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) /* No other CPUs to shoot down */ } +static inline void nmi_shootdown_cpus_on_restart(void) { } + void run_crash_ipi_callback(struct pt_regs *regs) { } diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 06db901fabe8..375b33ecafa2 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -32,7 +32,7 @@ #include <asm/mce.h> #include <asm/trace/irq_vectors.h> #include <asm/kexec.h> -#include <asm/virtext.h> +#include <asm/reboot.h> /* * Some notes on x86 processor bugs affecting SMP operation: @@ -122,7 +122,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) if (raw_smp_processor_id() == atomic_read(&stopping_cpu)) return NMI_HANDLED; - cpu_emergency_vmxoff(); + cpu_emergency_disable_virtualization(); stop_this_cpu(NULL); return NMI_HANDLED; @@ -134,7 +134,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) DEFINE_IDTENTRY_SYSVEC(sysvec_reboot) { ack_APIC_irq(); - cpu_emergency_vmxoff(); + cpu_emergency_disable_virtualization(); stop_this_cpu(NULL); } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 2a9f1e200dbc..8f8edeaf8177 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -26,6 +26,7 @@ #include "mmu.h" #include "trace.h" #include "pmu.h" +#include "xen.h" /* * Unlike "struct cpuinfo_x86.x86_capability", kvm_cpu_caps doesn't need to be @@ -181,15 +182,15 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 return 0; } -static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu) +static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu, + const char *sig) { - u32 function; + struct kvm_hypervisor_cpuid cpuid = {}; struct kvm_cpuid_entry2 *entry; + u32 base; - vcpu->arch.kvm_cpuid_base = 0; - - for_each_possible_hypervisor_cpuid_base(function) { - entry = kvm_find_cpuid_entry(vcpu, function); + for_each_possible_hypervisor_cpuid_base(base) { + entry = kvm_find_cpuid_entry(vcpu, base); if (entry) { u32 signature[3]; @@ -198,19 +199,21 @@ static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu) signature[1] = entry->ecx; signature[2] = entry->edx; - BUILD_BUG_ON(sizeof(signature) > sizeof(KVM_SIGNATURE)); - if (!memcmp(signature, KVM_SIGNATURE, sizeof(signature))) { - vcpu->arch.kvm_cpuid_base = function; + if (!memcmp(signature, sig, sizeof(signature))) { + cpuid.base = base; + cpuid.limit = entry->eax; break; } } } + + return cpuid; } static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries, int nent) { - u32 base = vcpu->arch.kvm_cpuid_base; + u32 base = vcpu->arch.kvm_cpuid.base; if (!base) return NULL; @@ -440,7 +443,8 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, vcpu->arch.cpuid_entries = e2; vcpu->arch.cpuid_nent = nent; - kvm_update_kvm_cpuid_base(vcpu); + vcpu->arch.kvm_cpuid = kvm_get_hypervisor_cpuid(vcpu, KVM_SIGNATURE); + vcpu->arch.xen.cpuid = kvm_get_hypervisor_cpuid(vcpu, XEN_SIGNATURE); kvm_vcpu_after_set_cpuid(vcpu); return 0; @@ -664,8 +668,9 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD); kvm_cpu_cap_mask(CPUID_7_1_EAX, - F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) | F(AMX_FP16) | - F(AVX_IFMA) + F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) | + F(FZRM) | F(FSRS) | F(FSRC) | + F(AMX_FP16) | F(AVX_IFMA) ); kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c3443045cd93..a630c5db971c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1634,7 +1634,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, case VCPU_SREG_SS: /* * segment is not a writable data segment or segment - * selector's RPL != CPL or segment selector's RPL != CPL + * selector's RPL != CPL or DPL != CPL */ if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl) goto exception; @@ -1696,11 +1696,11 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, /* * segment is not a data or readable code segment or * ((segment is a data or nonconforming code segment) - * and (both RPL and CPL > DPL)) + * and ((RPL > DPL) or (CPL > DPL))) */ if ((seg_desc.type & 0xa) == 0x8 || (((seg_desc.type & 0xc) != 0xc) && - (rpl > dpl && cpl > dpl))) + (rpl > dpl || cpl > dpl))) goto exception; break; } diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 3eb8caf87ee4..b28fd020066f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -44,6 +44,24 @@ #define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, HV_VCPUS_PER_SPARSE_BANK) +/* + * As per Hyper-V TLFS, extended hypercalls start from 0x8001 + * (HvExtCallQueryCapabilities). Response of this hypercalls is a 64 bit value + * where each bit tells which extended hypercall is available besides + * HvExtCallQueryCapabilities. + * + * 0x8001 - First extended hypercall, HvExtCallQueryCapabilities, no bit + * assigned. + * + * 0x8002 - Bit 0 + * 0x8003 - Bit 1 + * .. + * 0x8041 - Bit 63 + * + * Therefore, HV_EXT_CALL_MAX = 0x8001 + 64 + */ +#define HV_EXT_CALL_MAX (HV_EXT_CALL_QUERY_CAPABILITIES + 64) + static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, bool vcpu_kick); @@ -2437,6 +2455,9 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code) case HVCALL_SEND_IPI: return hv_vcpu->cpuid_cache.enlightenments_eax & HV_X64_CLUSTER_IPI_RECOMMENDED; + case HV_EXT_CALL_QUERY_CAPABILITIES ... HV_EXT_CALL_MAX: + return hv_vcpu->cpuid_cache.features_ebx & + HV_ENABLE_EXTENDED_HYPERCALLS; default: break; } @@ -2529,14 +2550,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) ret = HV_STATUS_INVALID_HYPERCALL_INPUT; break; } - vcpu->run->exit_reason = KVM_EXIT_HYPERV; - vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; - vcpu->run->hyperv.u.hcall.input = hc.param; - vcpu->run->hyperv.u.hcall.params[0] = hc.ingpa; - vcpu->run->hyperv.u.hcall.params[1] = hc.outgpa; - vcpu->arch.complete_userspace_io = - kvm_hv_hypercall_complete_userspace; - return 0; + goto hypercall_userspace_exit; case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: if (unlikely(hc.var_cnt)) { ret = HV_STATUS_INVALID_HYPERCALL_INPUT; @@ -2595,15 +2609,14 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) ret = HV_STATUS_OPERATION_DENIED; break; } - vcpu->run->exit_reason = KVM_EXIT_HYPERV; - vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; - vcpu->run->hyperv.u.hcall.input = hc.param; - vcpu->run->hyperv.u.hcall.params[0] = hc.ingpa; - vcpu->run->hyperv.u.hcall.params[1] = hc.outgpa; - vcpu->arch.complete_userspace_io = - kvm_hv_hypercall_complete_userspace; - return 0; + goto hypercall_userspace_exit; } + case HV_EXT_CALL_QUERY_CAPABILITIES ... HV_EXT_CALL_MAX: + if (unlikely(hc.fast)) { + ret = HV_STATUS_INVALID_PARAMETER; + break; + } + goto hypercall_userspace_exit; default: ret = HV_STATUS_INVALID_HYPERCALL_CODE; break; @@ -2611,6 +2624,15 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) hypercall_complete: return kvm_hv_hypercall_complete(vcpu, ret); + +hypercall_userspace_exit: + vcpu->run->exit_reason = KVM_EXIT_HYPERV; + vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; + vcpu->run->hyperv.u.hcall.input = hc.param; + vcpu->run->hyperv.u.hcall.params[0] = hc.ingpa; + vcpu->run->hyperv.u.hcall.params[1] = hc.outgpa; + vcpu->arch.complete_userspace_io = kvm_hv_hypercall_complete_userspace; + return 0; } void kvm_hv_init_vm(struct kvm *kvm) @@ -2754,6 +2776,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, ent->ebx |= HV_POST_MESSAGES; ent->ebx |= HV_SIGNAL_EVENTS; + ent->ebx |= HV_ENABLE_EXTENDED_HYPERCALLS; ent->edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE; ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index aeb240b339f5..c91ee2927dd7 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -44,6 +44,7 @@ #include <linux/uaccess.h> #include <linux/hash.h> #include <linux/kern_levels.h> +#include <linux/kstrtox.h> #include <linux/kthread.h> #include <asm/page.h> @@ -269,6 +270,17 @@ void kvm_flush_remote_tlbs_with_address(struct kvm *kvm, kvm_flush_remote_tlbs_with_range(kvm, &range); } +static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index); + +/* Flush the range of guest memory mapped by the given SPTE. */ +static void kvm_flush_remote_tlbs_sptep(struct kvm *kvm, u64 *sptep) +{ + struct kvm_mmu_page *sp = sptep_to_sp(sptep); + gfn_t gfn = kvm_mmu_page_get_gfn(sp, spte_index(sptep)); + + kvm_flush_remote_tlbs_gfn(kvm, gfn, sp->role.level); +} + static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, unsigned int access) { @@ -813,7 +825,7 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_mmu_gfn_disallow_lpage(slot, gfn); if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K)) - kvm_flush_remote_tlbs_with_address(kvm, gfn, 1); + kvm_flush_remote_tlbs_gfn(kvm, gfn, PG_LEVEL_4K); } void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) @@ -1187,8 +1199,7 @@ static void drop_large_spte(struct kvm *kvm, u64 *sptep, bool flush) drop_spte(kvm, sptep); if (flush) - kvm_flush_remote_tlbs_with_address(kvm, sp->gfn, - KVM_PAGES_PER_HPAGE(sp->role.level)); + kvm_flush_remote_tlbs_sptep(kvm, sptep); } /* @@ -1469,7 +1480,7 @@ restart: } if (need_flush && kvm_available_flush_tlb_with_range()) { - kvm_flush_remote_tlbs_with_address(kvm, gfn, 1); + kvm_flush_remote_tlbs_gfn(kvm, gfn, level); return false; } @@ -1639,8 +1650,7 @@ static void __rmap_add(struct kvm *kvm, kvm->stat.max_mmu_rmap_size = rmap_count; if (rmap_count > RMAP_RECYCLE_THRESHOLD) { kvm_zap_all_rmap_sptes(kvm, rmap_head); - kvm_flush_remote_tlbs_with_address( - kvm, sp->gfn, KVM_PAGES_PER_HPAGE(sp->role.level)); + kvm_flush_remote_tlbs_gfn(kvm, gfn, sp->role.level); } } @@ -2405,7 +2415,7 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, return; drop_parent_pte(child, sptep); - kvm_flush_remote_tlbs_with_address(vcpu->kvm, child->gfn, 1); + kvm_flush_remote_tlbs_sptep(vcpu->kvm, sptep); } } @@ -2889,8 +2899,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, } if (flush) - kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn, - KVM_PAGES_PER_HPAGE(level)); + kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level); pgprintk("%s: setting spte %llx\n", __func__, *sptep); @@ -3169,7 +3178,7 @@ static int direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) if (fault->nx_huge_page_workaround_enabled) disallowed_hugepage_adjust(fault, *it.sptep, it.level); - base_gfn = fault->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + base_gfn = gfn_round_for_level(fault->gfn, it.level); if (it.level == fault->goal_level) break; @@ -4440,7 +4449,8 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) if (shadow_memtype_mask && kvm_arch_has_noncoherent_dma(vcpu->kvm)) { for ( ; fault->max_level > PG_LEVEL_4K; --fault->max_level) { int page_num = KVM_PAGES_PER_HPAGE(fault->max_level); - gfn_t base = fault->gfn & ~(page_num - 1); + gfn_t base = gfn_round_for_level(fault->gfn, + fault->max_level); if (kvm_mtrr_check_gfn_range_consistency(vcpu, base, page_num)) break; @@ -4556,10 +4566,12 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd) struct kvm_mmu *mmu = vcpu->arch.mmu; union kvm_mmu_page_role new_role = mmu->root_role; - if (!fast_pgd_switch(vcpu->kvm, mmu, new_pgd, new_role)) { - /* kvm_mmu_ensure_valid_pgd will set up a new root. */ + /* + * Return immediately if no usable root was found, kvm_mmu_reload() + * will establish a valid root prior to the next VM-Enter. + */ + if (!fast_pgd_switch(vcpu->kvm, mmu, new_pgd, new_role)) return; - } /* * It's possible that the cached previous root page is obsolete because @@ -6518,8 +6530,7 @@ restart: kvm_zap_one_rmap_spte(kvm, rmap_head, sptep); if (kvm_available_flush_tlb_with_range()) - kvm_flush_remote_tlbs_with_address(kvm, sp->gfn, - KVM_PAGES_PER_HPAGE(sp->role.level)); + kvm_flush_remote_tlbs_sptep(kvm, sptep); else need_tlb_flush = 1; @@ -6752,7 +6763,7 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) new_val = 1; else if (sysfs_streq(val, "auto")) new_val = get_nx_auto_mode(); - else if (strtobool(val, &new_val) < 0) + else if (kstrtobool(val, &new_val) < 0) return -EINVAL; __set_nx_huge_pages(new_val); diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index ac00bfbf32f6..cc58631e2336 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -156,6 +156,11 @@ static inline bool kvm_mmu_page_ad_need_write_protect(struct kvm_mmu_page *sp) return kvm_x86_ops.cpu_dirty_log_size && sp->role.guest_mode; } +static inline gfn_t gfn_round_for_level(gfn_t gfn, int level) +{ + return gfn & -KVM_PAGES_PER_HPAGE(level); +} + int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t gfn, bool can_unsync, bool prefetch); @@ -164,8 +169,17 @@ void kvm_mmu_gfn_allow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn); bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn, int min_level); + void kvm_flush_remote_tlbs_with_address(struct kvm *kvm, u64 start_gfn, u64 pages); + +/* Flush the given page (huge or not) of guest memory. */ +static inline void kvm_flush_remote_tlbs_gfn(struct kvm *kvm, gfn_t gfn, int level) +{ + kvm_flush_remote_tlbs_with_address(kvm, gfn_round_for_level(gfn, level), + KVM_PAGES_PER_HPAGE(level)); +} + unsigned int pte_list_count(struct kvm_rmap_head *rmap_head); extern int nx_huge_pages; diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index e5662dbd519c..57f0b75c80f9 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -642,12 +642,12 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa))) goto out_gpte_changed; - for (shadow_walk_init(&it, vcpu, fault->addr); - shadow_walk_okay(&it) && it.level > gw->level; - shadow_walk_next(&it)) { + for_each_shadow_entry(vcpu, fault->addr, it) { gfn_t table_gfn; clear_sp_write_flooding_count(it.sptep); + if (it.level == gw->level) + break; table_gfn = gw->table_gfn[it.level - 2]; access = gw->pt_access[it.level - 2]; @@ -692,8 +692,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, trace_kvm_mmu_spte_requested(fault); for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { - clear_sp_write_flooding_count(it.sptep); - /* * We cannot overwrite existing page tables with an NX * large page, as the leaf could be executable. @@ -701,7 +699,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, if (fault->nx_huge_page_workaround_enabled) disallowed_hugepage_adjust(fault, *it.sptep, it.level); - base_gfn = fault->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + base_gfn = gfn_round_for_level(fault->gfn, it.level); if (it.level == fault->goal_level) break; @@ -929,8 +927,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa) mmu_page_zap_pte(vcpu->kvm, sp, sptep, NULL); if (is_shadow_present_pte(old_spte)) - kvm_flush_remote_tlbs_with_address(vcpu->kvm, - sp->gfn, KVM_PAGES_PER_HPAGE(sp->role.level)); + kvm_flush_remote_tlbs_sptep(vcpu->kvm, sptep); if (!rmap_can_add(vcpu)) break; diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index fce6f047399f..c15bfca3ed15 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -147,9 +147,9 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, WARN_ON_ONCE(!pte_access && !shadow_present_mask); if (sp->role.ad_disabled) - spte |= SPTE_TDP_AD_DISABLED_MASK; + spte |= SPTE_TDP_AD_DISABLED; else if (kvm_mmu_page_ad_need_write_protect(sp)) - spte |= SPTE_TDP_AD_WRPROT_ONLY_MASK; + spte |= SPTE_TDP_AD_WRPROT_ONLY; /* * For the EPT case, shadow_present_mask is 0 if hardware @@ -317,7 +317,7 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled) shadow_user_mask | shadow_x_mask | shadow_me_value; if (ad_disabled) - spte |= SPTE_TDP_AD_DISABLED_MASK; + spte |= SPTE_TDP_AD_DISABLED; else spte |= shadow_accessed_mask; diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 0d8deefee66c..1279db2eab44 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -28,10 +28,10 @@ */ #define SPTE_TDP_AD_SHIFT 52 #define SPTE_TDP_AD_MASK (3ULL << SPTE_TDP_AD_SHIFT) -#define SPTE_TDP_AD_ENABLED_MASK (0ULL << SPTE_TDP_AD_SHIFT) -#define SPTE_TDP_AD_DISABLED_MASK (1ULL << SPTE_TDP_AD_SHIFT) -#define SPTE_TDP_AD_WRPROT_ONLY_MASK (2ULL << SPTE_TDP_AD_SHIFT) -static_assert(SPTE_TDP_AD_ENABLED_MASK == 0); +#define SPTE_TDP_AD_ENABLED (0ULL << SPTE_TDP_AD_SHIFT) +#define SPTE_TDP_AD_DISABLED (1ULL << SPTE_TDP_AD_SHIFT) +#define SPTE_TDP_AD_WRPROT_ONLY (2ULL << SPTE_TDP_AD_SHIFT) +static_assert(SPTE_TDP_AD_ENABLED == 0); #ifdef CONFIG_DYNAMIC_PHYSICAL_MASK #define SPTE_BASE_ADDR_MASK (physical_mask & ~(u64)(PAGE_SIZE-1)) @@ -164,7 +164,7 @@ extern u64 __read_mostly shadow_me_value; extern u64 __read_mostly shadow_me_mask; /* - * SPTEs in MMUs without A/D bits are marked with SPTE_TDP_AD_DISABLED_MASK; + * SPTEs in MMUs without A/D bits are marked with SPTE_TDP_AD_DISABLED; * shadow_acc_track_mask is the set of bits to be cleared in non-accessed * pages. */ @@ -266,18 +266,18 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp) static inline bool spte_ad_enabled(u64 spte) { MMU_WARN_ON(!is_shadow_present_pte(spte)); - return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_DISABLED_MASK; + return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_DISABLED; } static inline bool spte_ad_need_write_protect(u64 spte) { MMU_WARN_ON(!is_shadow_present_pte(spte)); /* - * This is benign for non-TDP SPTEs as SPTE_TDP_AD_ENABLED_MASK is '0', + * This is benign for non-TDP SPTEs as SPTE_TDP_AD_ENABLED is '0', * and non-TDP SPTEs will never set these bits. Optimize for 64-bit * TDP and do the A/D type check unconditionally. */ - return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_ENABLED_MASK; + return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_ENABLED; } static inline u64 spte_shadow_accessed_mask(u64 spte) diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c index e26e744df1d1..d2eb0d4f8710 100644 --- a/arch/x86/kvm/mmu/tdp_iter.c +++ b/arch/x86/kvm/mmu/tdp_iter.c @@ -16,11 +16,6 @@ static void tdp_iter_refresh_sptep(struct tdp_iter *iter) iter->old_spte = kvm_tdp_mmu_read_spte(iter->sptep); } -static gfn_t round_gfn_for_level(gfn_t gfn, int level) -{ - return gfn & -KVM_PAGES_PER_HPAGE(level); -} - /* * Return the TDP iterator to the root PT and allow it to continue its * traversal over the paging structure from there. @@ -31,7 +26,7 @@ void tdp_iter_restart(struct tdp_iter *iter) iter->yielded_gfn = iter->next_last_level_gfn; iter->level = iter->root_level; - iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level); + iter->gfn = gfn_round_for_level(iter->next_last_level_gfn, iter->level); tdp_iter_refresh_sptep(iter); iter->valid = true; @@ -98,7 +93,7 @@ static bool try_step_down(struct tdp_iter *iter) iter->level--; iter->pt_path[iter->level - 1] = child_pt; - iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level); + iter->gfn = gfn_round_for_level(iter->next_last_level_gfn, iter->level); tdp_iter_refresh_sptep(iter); return true; @@ -140,7 +135,7 @@ static bool try_step_up(struct tdp_iter *iter) return false; iter->level++; - iter->gfn = round_gfn_for_level(iter->gfn, iter->level); + iter->gfn = gfn_round_for_level(iter->gfn, iter->level); tdp_iter_refresh_sptep(iter); return true; diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index bba33aea0fb0..7c25dbf32ecc 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -680,8 +680,7 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm, if (ret) return ret; - kvm_flush_remote_tlbs_with_address(kvm, iter->gfn, - KVM_PAGES_PER_HPAGE(iter->level)); + kvm_flush_remote_tlbs_gfn(kvm, iter->gfn, iter->level); /* * No other thread can overwrite the removed SPTE as they must either @@ -1080,8 +1079,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, return RET_PF_RETRY; else if (is_shadow_present_pte(iter->old_spte) && !is_last_spte(iter->old_spte, iter->level)) - kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn, - KVM_PAGES_PER_HPAGE(iter->level + 1)); + kvm_flush_remote_tlbs_gfn(vcpu->kvm, iter->gfn, iter->level); /* * If the page fault was caused by a write but the page is write diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 7b6c3ba2c8e1..612e6c70ce2e 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -820,6 +820,7 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) mutex_lock(&kvm->lock); filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter, mutex_is_locked(&kvm->lock)); + mutex_unlock(&kvm->lock); synchronize_srcu_expedited(&kvm->srcu); BUILD_BUG_ON(sizeof(((struct kvm_pmu *)0)->reprogram_pmi) > @@ -830,8 +831,6 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) kvm_make_all_cpus_request(kvm, KVM_REQ_PMU); - mutex_unlock(&kvm->lock); - r = 0; cleanup: kfree(filter); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 8f0f67c75f35..2690d018da11 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4604,7 +4604,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) * it needs to be set here when dirty logging is already active, e.g. * if this vCPU was created after dirty logging was enabled. */ - if (!vcpu->kvm->arch.cpu_dirty_logging_count) + if (!enable_pml || !atomic_read(&vcpu->kvm->nr_memslots_dirty_logging)) exec_control &= ~SECONDARY_EXEC_ENABLE_PML; if (cpu_has_vmx_xsaves()) { @@ -7986,17 +7986,20 @@ void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + if (WARN_ON_ONCE(!enable_pml)) + return; + if (is_guest_mode(vcpu)) { vmx->nested.update_vmcs01_cpu_dirty_logging = true; return; } /* - * Note, cpu_dirty_logging_count can be changed concurrent with this + * Note, nr_memslots_dirty_logging can be changed concurrent with this * code, but in that case another update request will be made and so * the guest will never run with a stale PML value. */ - if (vcpu->kvm->arch.cpu_dirty_logging_count) + if (atomic_read(&vcpu->kvm->nr_memslots_dirty_logging)) secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_ENABLE_PML); else secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_ENABLE_PML); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 64c567a1b32b..199a9ff0cd4b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3164,6 +3164,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) &vcpu->hv_clock.tsc_shift, &vcpu->hv_clock.tsc_to_system_mul); vcpu->hw_tsc_khz = tgt_tsc_khz; + kvm_xen_update_tsc_info(v); } vcpu->hv_clock.tsc_timestamp = tsc_timestamp; @@ -4292,8 +4293,8 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, { struct kvm_msrs msrs; struct kvm_msr_entry *entries; - int r, n; unsigned size; + int r; r = -EFAULT; if (copy_from_user(&msrs, user_msrs, sizeof(msrs))) @@ -4310,17 +4311,11 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, goto out; } - r = n = __msr_io(vcpu, &msrs, entries, do_msr); - if (r < 0) - goto out_free; + r = __msr_io(vcpu, &msrs, entries, do_msr); - r = -EFAULT; if (writeback && copy_to_user(user_msrs->entries, entries, size)) - goto out_free; - - r = n; + r = -EFAULT; -out_free: kfree(entries); out: return r; @@ -6468,7 +6463,7 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, struct kvm_x86_msr_filter *new_filter, *old_filter; bool default_allow; bool empty = true; - int r = 0; + int r; u32 i; if (filter->flags & ~KVM_MSR_FILTER_VALID_MASK) @@ -6494,17 +6489,14 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, } mutex_lock(&kvm->lock); - - /* The per-VM filter is protected by kvm->lock... */ - old_filter = srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1); - - rcu_assign_pointer(kvm->arch.msr_filter, new_filter); + old_filter = rcu_replace_pointer(kvm->arch.msr_filter, new_filter, + mutex_is_locked(&kvm->lock)); + mutex_unlock(&kvm->lock); synchronize_srcu(&kvm->srcu); kvm_free_msr_filter(old_filter); kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED); - mutex_unlock(&kvm->lock); return 0; } @@ -12291,7 +12283,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, */ hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0); - if (IS_ERR((void *)hva)) + if (IS_ERR_VALUE(hva)) return (void __user *)hva; } else { if (!slot || !slot->npages) @@ -12506,16 +12498,14 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, static void kvm_mmu_update_cpu_dirty_logging(struct kvm *kvm, bool enable) { - struct kvm_arch *ka = &kvm->arch; + int nr_slots; if (!kvm_x86_ops.cpu_dirty_log_size) return; - if ((enable && ++ka->cpu_dirty_logging_count == 1) || - (!enable && --ka->cpu_dirty_logging_count == 0)) + nr_slots = atomic_read(&kvm->nr_memslots_dirty_logging); + if ((enable && nr_slots == 1) || !nr_slots) kvm_make_all_cpus_request(kvm, KVM_REQ_UPDATE_CPU_DIRTY_LOGGING); - - WARN_ON_ONCE(ka->cpu_dirty_logging_count < 0); } static void kvm_mmu_slot_apply_flags(struct kvm *kvm, diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 2681e2007e39..40edf4d1974c 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -23,6 +23,9 @@ #include <xen/interface/event_channel.h> #include <xen/interface/sched.h> +#include <asm/xen/cpuid.h> + +#include "cpuid.h" #include "trace.h" static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm); @@ -2077,6 +2080,29 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) del_timer_sync(&vcpu->arch.xen.poll_timer); } +void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *entry; + u32 function; + + if (!vcpu->arch.xen.cpuid.base) + return; + + function = vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3); + if (function > vcpu->arch.xen.cpuid.limit) + return; + + entry = kvm_find_cpuid_entry_index(vcpu, function, 1); + if (entry) { + entry->ecx = vcpu->arch.hv_clock.tsc_to_system_mul; + entry->edx = vcpu->arch.hv_clock.tsc_shift; + } + + entry = kvm_find_cpuid_entry_index(vcpu, function, 2); + if (entry) + entry->eax = vcpu->arch.hw_tsc_khz; +} + void kvm_xen_init_vm(struct kvm *kvm) { mutex_init(&kvm->arch.xen.xen_lock); diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h index ea33d80a0c51..f8f1fe22d090 100644 --- a/arch/x86/kvm/xen.h +++ b/arch/x86/kvm/xen.h @@ -9,6 +9,8 @@ #ifndef __ARCH_X86_KVM_XEN_H__ #define __ARCH_X86_KVM_XEN_H__ +#include <asm/xen/hypervisor.h> + #ifdef CONFIG_KVM_XEN #include <linux/jump_label_ratelimit.h> @@ -32,6 +34,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, int kvm_xen_setup_evtchn(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue); +void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu); static inline bool kvm_xen_msr_enabled(struct kvm *kvm) { @@ -135,6 +138,10 @@ static inline bool kvm_xen_timer_enabled(struct kvm_vcpu *vcpu) { return false; } + +static inline void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu) +{ +} #endif int kvm_xen_hypercall(struct kvm_vcpu *vcpu); |