From 8ffdaa7f491961efb4d02d3c8d806e9a60105adb Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 21 May 2019 14:06:52 +0800 Subject: KVM: Documentation: Add disable pause exits to KVM_CAP_X86_DISABLE_EXITS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit b31c114b (KVM: X86: Provide a capability to disable PAUSE intercepts) forgot to add the KVM_X86_DISABLE_EXITS_PAUSE into api doc. This patch adds it. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Sean Christopherson Cc: Liran Alon Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation/virtual/kvm') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index ba6c42c576dd..33cd92dd6aa5 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4893,6 +4893,7 @@ Valid bits in args[0] are #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) +#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) Enabling this capability on a VM provides userspace with a way to no longer intercept some instructions for improved latency in some -- cgit v1.2.3 From b51700632e0e53254733ff706e5bdca22d19dbe5 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 21 May 2019 14:06:53 +0800 Subject: KVM: X86: Provide a capability to disable cstate msr read intercepts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow guest reads CORE cstate when exposing host CPU power management capabilities to the guest. PKG cstate is restricted to avoid a guest to get the whole package information in multi-tenant scenario. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Sean Christopherson Cc: Liran Alon Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx/vmx.c | 6 ++++++ arch/x86/kvm/x86.c | 5 ++++- arch/x86/kvm/x86.h | 5 +++++ include/uapi/linux/kvm.h | 4 +++- tools/include/uapi/linux/kvm.h | 4 +++- 7 files changed, 23 insertions(+), 3 deletions(-) (limited to 'Documentation/virtual/kvm') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 33cd92dd6aa5..91fd86fcc49f 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4894,6 +4894,7 @@ Valid bits in args[0] are #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) +#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) Enabling this capability on a VM provides userspace with a way to no longer intercept some instructions for improved latency in some diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 15e973d9b840..aeadbc770eb2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -882,6 +882,7 @@ struct kvm_arch { bool mwait_in_guest; bool hlt_in_guest; bool pause_in_guest; + bool cstate_in_guest; unsigned long irq_sources_bitmap; s64 kvmclock_offset; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 0861c71a4379..da24f1858acc 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6637,6 +6637,12 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); + if (kvm_cstate_in_guest(kvm)) { + vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R); + } vmx->msr_bitmap_mode = 0; vmx->loaded_vmcs = &vmx->vmcs01; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8aa6b5a75e7a..17e9533f51eb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3098,7 +3098,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_CLOCK_TSC_STABLE; break; case KVM_CAP_X86_DISABLE_EXITS: - r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE; + r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE | + KVM_X86_DISABLE_EXITS_CSTATE; if(kvm_can_mwait_in_guest()) r |= KVM_X86_DISABLE_EXITS_MWAIT; break; @@ -4615,6 +4616,8 @@ split_irqchip_unlock: kvm->arch.hlt_in_guest = true; if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) kvm->arch.pause_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE) + kvm->arch.cstate_in_guest = true; r = 0; break; case KVM_CAP_MSR_PLATFORM_INFO: diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index a470ff0868c5..275b3b646023 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -333,6 +333,11 @@ static inline bool kvm_pause_in_guest(struct kvm *kvm) return kvm->arch.pause_in_guest; } +static inline bool kvm_cstate_in_guest(struct kvm *kvm) +{ + return kvm->arch.cstate_in_guest; +} + DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu); static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 2fe12b40d503..c2152f3dd02d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -696,9 +696,11 @@ struct kvm_ioeventfd { #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) +#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) #define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ KVM_X86_DISABLE_EXITS_HLT | \ - KVM_X86_DISABLE_EXITS_PAUSE) + KVM_X86_DISABLE_EXITS_PAUSE | \ + KVM_X86_DISABLE_EXITS_CSTATE) /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 6d4ea4b6c922..ef3303f72c46 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -696,9 +696,11 @@ struct kvm_ioeventfd { #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) +#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) #define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ KVM_X86_DISABLE_EXITS_HLT | \ - KVM_X86_DISABLE_EXITS_PAUSE) + KVM_X86_DISABLE_EXITS_PAUSE | \ + KVM_X86_DISABLE_EXITS_CSTATE) /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { -- cgit v1.2.3 From 0d9ce162cf46c99628cc5da9510b959c7976735b Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Thu, 3 Jan 2019 17:14:28 -0800 Subject: kvm: Convert kvm_lock to a mutex It doesn't seem as if there is any particular need for kvm_lock to be a spinlock, so convert the lock to a mutex so that sleepable functions (in particular cond_resched()) can be called while holding it. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/locking.txt | 4 +--- arch/s390/kvm/kvm-s390.c | 4 ++-- arch/x86/kvm/mmu.c | 4 ++-- arch/x86/kvm/x86.c | 14 +++++++------- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 30 +++++++++++++++--------------- 6 files changed, 28 insertions(+), 30 deletions(-) (limited to 'Documentation/virtual/kvm') diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index 1bb8bcaf8497..635cd6eaf714 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt @@ -15,8 +15,6 @@ The acquisition orders for mutexes are as follows: On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock. -For spinlocks, kvm_lock is taken outside kvm->mmu_lock. - Everything else is a leaf: no other lock is taken inside the critical sections. @@ -169,7 +167,7 @@ which time it will be set using the Dirty tracking mechanism described above. ------------ Name: kvm_lock -Type: spinlock_t +Type: mutex Arch: any Protects: - vm_list diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7936af0a971f..0fef9192f6ac 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2423,13 +2423,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); if (!kvm->arch.sca) goto out_err; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); sca_offset += 16; if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) sca_offset = 0; kvm->arch.sca = (struct bsca_block *) ((char *) kvm->arch.sca + sca_offset); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); sprintf(debug_name, "kvm-%u", current->pid); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 95ac393e2959..3384c539d150 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5956,7 +5956,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) int nr_to_scan = sc->nr_to_scan; unsigned long freed = 0; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { int idx; @@ -5998,7 +5998,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) break; } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return freed; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 10feed6a01eb..6200d5a51f13 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6719,7 +6719,7 @@ static void kvm_hyperv_tsc_notifier(void) struct kvm_vcpu *vcpu; int cpu; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_make_mclock_inprogress_request(kvm); @@ -6745,7 +6745,7 @@ static void kvm_hyperv_tsc_notifier(void) spin_unlock(&ka->pvclock_gtod_sync_lock); } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); } #endif @@ -6796,17 +6796,17 @@ static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu) smp_call_function_single(cpu, tsc_khz_changed, freq, 1); - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->cpu != cpu) continue; kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); - if (vcpu->cpu != smp_processor_id()) + if (vcpu->cpu != raw_smp_processor_id()) send_ipi = 1; } } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); if (freq->old < freq->new && send_ipi) { /* @@ -6929,12 +6929,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work) struct kvm_vcpu *vcpu; int i; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); atomic_set(&kvm_guest_has_master_clock, 0); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); } static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5e9fd7ad8018..abafddb9fe2c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -162,7 +162,7 @@ static inline bool is_error_page(struct page *page) extern struct kmem_cache *kvm_vcpu_cache; -extern spinlock_t kvm_lock; +extern struct mutex kvm_lock; extern struct list_head vm_list; struct kvm_io_range { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b2579841263f..9613987ef4c8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -98,7 +98,7 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); * kvm->lock --> kvm->slots_lock --> kvm->irq_lock */ -DEFINE_SPINLOCK(kvm_lock); +DEFINE_MUTEX(kvm_lock); static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); @@ -683,9 +683,9 @@ static struct kvm *kvm_create_vm(unsigned long type) if (r) goto out_err; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); preempt_notifier_inc(); @@ -731,9 +731,9 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm); kvm_destroy_vm_debugfs(kvm); kvm_arch_sync_events(kvm); - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_del(&kvm->vm_list); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) { struct kvm_io_bus *bus = kvm_get_bus(kvm, i); @@ -4034,13 +4034,13 @@ static int vm_stat_get(void *_offset, u64 *val) u64 tmp_val; *val = 0; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val); *val += tmp_val; } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -4053,12 +4053,12 @@ static int vm_stat_clear(void *_offset, u64 val) if (val) return -EINVAL; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vm_stat_clear_per_vm((void *)&stat_tmp, 0); } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -4073,13 +4073,13 @@ static int vcpu_stat_get(void *_offset, u64 *val) u64 tmp_val; *val = 0; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val); *val += tmp_val; } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -4092,12 +4092,12 @@ static int vcpu_stat_clear(void *_offset, u64 val) if (val) return -EINVAL; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vcpu_stat_clear_per_vm((void *)&stat_tmp, 0); } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -4118,7 +4118,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) if (!kvm_dev.this_device || !kvm) return; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); if (type == KVM_EVENT_CREATE_VM) { kvm_createvm_count++; kvm_active_vms++; @@ -4127,7 +4127,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) } created = kvm_createvm_count; active = kvm_active_vms; - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); env = kzalloc(sizeof(*env), GFP_KERNEL_ACCOUNT); if (!env) -- cgit v1.2.3 From 2d5ba19bdfef4dd06add144eb04287ee98409f75 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 3 Jun 2019 19:52:44 -0300 Subject: kvm: x86: add host poll control msrs Add an MSRs which allows the guest to disable host polling (specifically the cpuidle-haltpoll, when performing polling in the guest, disables host side polling). Signed-off-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/msr.txt | 9 +++++++++ arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/include/uapi/asm/kvm_para.h | 2 ++ arch/x86/kvm/Kconfig | 1 + arch/x86/kvm/cpuid.c | 3 ++- arch/x86/kvm/x86.c | 23 +++++++++++++++++++++++ 6 files changed, 39 insertions(+), 1 deletion(-) (limited to 'Documentation/virtual/kvm') diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt index f3f0d57ced8e..df1f4338b3ca 100644 --- a/Documentation/virtual/kvm/msr.txt +++ b/Documentation/virtual/kvm/msr.txt @@ -273,3 +273,12 @@ MSR_KVM_EOI_EN: 0x4b564d04 guest must both read the least significant bit in the memory area and clear it using a single CPU instruction, such as test and clear, or compare and exchange. + +MSR_KVM_POLL_CONTROL: 0x4b564d05 + Control host-side polling. + + data: Bit 0 enables (1) or disables (0) host-side HLT polling logic. + + KVM guests can request the host not to poll on HLT, for example if + they are performing polling themselves. + diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index aeadbc770eb2..a86026969b19 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -755,6 +755,8 @@ struct kvm_vcpu_arch { struct gfn_to_hva_cache data; } pv_eoi; + u64 msr_kvm_poll_control; + /* * Indicate whether the access faults on its page table in guest * which is set when fix page fault and used to detect unhandeable diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 19980ec1a316..21d5f0240595 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -29,6 +29,7 @@ #define KVM_FEATURE_PV_TLB_FLUSH 9 #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 #define KVM_FEATURE_PV_SEND_IPI 11 +#define KVM_FEATURE_POLL_CONTROL 12 #define KVM_HINTS_REALTIME 0 @@ -47,6 +48,7 @@ #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 #define MSR_KVM_STEAL_TIME 0x4b564d03 #define MSR_KVM_PV_EOI_EN 0x4b564d04 +#define MSR_KVM_POLL_CONTROL 0x4b564d05 struct kvm_steal_time { __u64 steal; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index fc042419e670..840e12583b85 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -41,6 +41,7 @@ config KVM select PERF_EVENTS select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT + select HAVE_KVM_NO_POLL select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_VFIO select SRCU diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9872f86fdbe9..68c74e948e28 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -658,7 +658,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, (1 << KVM_FEATURE_PV_UNHALT) | (1 << KVM_FEATURE_PV_TLB_FLUSH) | (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) | - (1 << KVM_FEATURE_PV_SEND_IPI); + (1 << KVM_FEATURE_PV_SEND_IPI) | + (1 << KVM_FEATURE_POLL_CONTROL); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eadd987ae350..eb87d71ec14a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1177,6 +1177,7 @@ static u32 emulated_msrs[] = { MSR_IA32_POWER_CTL, MSR_K7_HWCR, + MSR_KVM_POLL_CONTROL, }; static unsigned num_emulated_msrs; @@ -2636,6 +2637,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; break; + case MSR_KVM_POLL_CONTROL: + /* only enable bit supported */ + if (data & (-1ULL << 1)) + return 1; + + vcpu->arch.msr_kvm_poll_control = data; + break; + case MSR_IA32_MCG_CTL: case MSR_IA32_MCG_STATUS: case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: @@ -2885,6 +2894,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_KVM_PV_EOI_EN: msr_info->data = vcpu->arch.pv_eoi.msr_val; break; + case MSR_KVM_POLL_CONTROL: + msr_info->data = vcpu->arch.msr_kvm_poll_control; + break; case MSR_IA32_P5_MC_ADDR: case MSR_IA32_P5_MC_TYPE: case MSR_IA32_MCG_CAP: @@ -8861,6 +8873,10 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) msr.host_initiated = true; kvm_write_tsc(vcpu, &msr); vcpu_put(vcpu); + + /* poll control enabled by default */ + vcpu->arch.msr_kvm_poll_control = 1; + mutex_unlock(&vcpu->mutex); if (!kvmclock_periodic_sync) @@ -9972,6 +9988,13 @@ bool kvm_vector_hashing_enabled(void) } EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled); +bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.msr_kvm_poll_control & 1) == 0; +} +EXPORT_SYMBOL_GPL(kvm_arch_no_poll); + + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); -- cgit v1.2.3 From f85f6e7bc9682a6d8b342c010cd6aa58521fdeec Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 11 Jun 2019 20:23:48 +0800 Subject: KVM: X86: Yield to IPI target if necessary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When sending a call-function IPI-many to vCPUs, yield if any of the IPI target vCPUs was preempted, we just select the first preempted target vCPU which we found since the state of target vCPUs can change underneath and to avoid race conditions. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Liran Alon Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/hypercalls.txt | 11 +++++++++++ arch/x86/include/uapi/asm/kvm_para.h | 1 + arch/x86/kernel/kvm.c | 21 +++++++++++++++++++++ include/uapi/linux/kvm_para.h | 1 + 4 files changed, 34 insertions(+) (limited to 'Documentation/virtual/kvm') diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt index da24c138c8d1..da210651f714 100644 --- a/Documentation/virtual/kvm/hypercalls.txt +++ b/Documentation/virtual/kvm/hypercalls.txt @@ -141,3 +141,14 @@ a0 corresponds to the APIC ID in the third argument (a2), bit 1 corresponds to the APIC ID a2+1, and so on. Returns the number of CPUs to which the IPIs were delivered successfully. + +7. KVM_HC_SCHED_YIELD +------------------------ +Architecture: x86 +Status: active +Purpose: Hypercall used to yield if the IPI target vCPU is preempted + +a0: destination APIC ID + +Usage example: When sending a call-function IPI-many to vCPUs, yield if +any of the IPI target vCPUs was preempted. diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 21d5f0240595..2a8e0b6b9805 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -30,6 +30,7 @@ #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 #define KVM_FEATURE_PV_SEND_IPI 11 #define KVM_FEATURE_POLL_CONTROL 12 +#define KVM_FEATURE_PV_SCHED_YIELD 13 #define KVM_HINTS_REALTIME 0 diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 5169b8cc35bb..82caf01b63dd 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -527,6 +527,21 @@ static void kvm_setup_pv_ipi(void) pr_info("KVM setup pv IPIs\n"); } +static void kvm_smp_send_call_func_ipi(const struct cpumask *mask) +{ + int cpu; + + native_send_call_func_ipi(mask); + + /* Make sure other vCPUs get a chance to run if they need to. */ + for_each_cpu(cpu, mask) { + if (vcpu_is_preempted(cpu)) { + kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu)); + break; + } + } +} + static void __init kvm_smp_prepare_cpus(unsigned int max_cpus) { native_smp_prepare_cpus(max_cpus); @@ -638,6 +653,12 @@ static void __init kvm_guest_init(void) #ifdef CONFIG_SMP smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus; smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; + if (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) && + !kvm_para_has_hint(KVM_HINTS_REALTIME) && + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { + smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi; + pr_info("KVM setup pv sched yield\n"); + } if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online", kvm_cpu_online, kvm_cpu_down_prepare) < 0) pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n"); diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h index 6c0ce49931e5..8b86609849b9 100644 --- a/include/uapi/linux/kvm_para.h +++ b/include/uapi/linux/kvm_para.h @@ -28,6 +28,7 @@ #define KVM_HC_MIPS_CONSOLE_OUTPUT 8 #define KVM_HC_CLOCK_PAIRING 9 #define KVM_HC_SEND_IPI 10 +#define KVM_HC_SCHED_YIELD 11 /* * hypercalls use architecture specific -- cgit v1.2.3 From 32b72ecc83b651fb8633ac4bd44957c54367699d Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 11 Jun 2019 20:23:50 +0800 Subject: KVM: X86: Expose PV_SCHED_YIELD CPUID feature bit to guest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose PV_SCHED_YIELD feature bit to guest, the guest can check this feature bit before using paravirtualized sched yield. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Liran Alon Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/cpuid.txt | 4 ++++ arch/x86/kvm/cpuid.c | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'Documentation/virtual/kvm') diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt index 97ca1940a0dc..979a77ba5377 100644 --- a/Documentation/virtual/kvm/cpuid.txt +++ b/Documentation/virtual/kvm/cpuid.txt @@ -66,6 +66,10 @@ KVM_FEATURE_PV_SEND_IPI || 11 || guest checks this feature bit || || before using paravirtualized || || send IPIs. ------------------------------------------------------------------------------ +KVM_FEATURE_PV_SCHED_YIELD || 13 || guest checks this feature bit + || || before using paravirtualized + || || sched yield. +------------------------------------------------------------------------------ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side || || per-cpu warps are expected in || || kvmclock. diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 68c74e948e28..004cbd84c351 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -659,7 +659,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, (1 << KVM_FEATURE_PV_TLB_FLUSH) | (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) | (1 << KVM_FEATURE_PV_SEND_IPI) | - (1 << KVM_FEATURE_POLL_CONTROL); + (1 << KVM_FEATURE_POLL_CONTROL) | + (1 << KVM_FEATURE_PV_SCHED_YIELD); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); -- cgit v1.2.3 From 9824c83f92bc8351dfb5c387436cc2816616fb4a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 2 Jul 2019 18:57:29 +0200 Subject: Documentation: kvm: document CPUID bit for MSR_KVM_POLL_CONTROL Cc: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/cpuid.txt | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'Documentation/virtual/kvm') diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt index 979a77ba5377..2bdac528e4a2 100644 --- a/Documentation/virtual/kvm/cpuid.txt +++ b/Documentation/virtual/kvm/cpuid.txt @@ -66,6 +66,10 @@ KVM_FEATURE_PV_SEND_IPI || 11 || guest checks this feature bit || || before using paravirtualized || || send IPIs. ------------------------------------------------------------------------------ +KVM_FEATURE_PV_POLL_CONTROL || 12 || host-side polling on HLT can + || || be disabled by writing + || || to msr 0x4b564d05. +------------------------------------------------------------------------------ KVM_FEATURE_PV_SCHED_YIELD || 13 || guest checks this feature bit || || before using paravirtualized || || sched yield. -- cgit v1.2.3 From 49caebe9b3e2a83161f4374ac347eb14e11c3b54 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 3 May 2019 15:27:50 +0100 Subject: KVM: doc: Add API documentation on the KVM_REG_ARM_WORKAROUNDS register Add documentation for the newly defined firmware registers to save and restore any vulnerability mitigation status. Signed-off-by: Andre Przywara Reviewed-by: Steven Price Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/arm/psci.txt | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'Documentation/virtual/kvm') diff --git a/Documentation/virtual/kvm/arm/psci.txt b/Documentation/virtual/kvm/arm/psci.txt index aafdab887b04..559586fc9d37 100644 --- a/Documentation/virtual/kvm/arm/psci.txt +++ b/Documentation/virtual/kvm/arm/psci.txt @@ -28,3 +28,34 @@ The following register is defined: - Allows any PSCI version implemented by KVM and compatible with v0.2 to be set with SET_ONE_REG - Affects the whole VM (even if the register view is per-vcpu) + +* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: + Holds the state of the firmware support to mitigate CVE-2017-5715, as + offered by KVM to the guest via a HVC call. The workaround is described + under SMCCC_ARCH_WORKAROUND_1 in [1]. + Accepted values are: + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL: KVM does not offer + firmware support for the workaround. The mitigation status for the + guest is unknown. + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL: The workaround HVC call is + available to the guest and required for the mitigation. + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED: The workaround HVC call + is available to the guest, but it is not needed on this VCPU. + +* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: + Holds the state of the firmware support to mitigate CVE-2018-3639, as + offered by KVM to the guest via a HVC call. The workaround is described + under SMCCC_ARCH_WORKAROUND_2 in [1]. + Accepted values are: + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: A workaround is not + available. KVM does not offer firmware support for the workaround. + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: The workaround state is + unknown. KVM does not offer firmware support for the workaround. + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: The workaround is available, + and can be disabled by a vCPU. If + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for + this vCPU. + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: The workaround is + always active on this vCPU or it is not needed. + +[1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf -- cgit v1.2.3 From 66bb8a065f5aedd4551d8d3fbce582972f65c2e1 Mon Sep 17 00:00:00 2001 From: Eric Hankland Date: Wed, 10 Jul 2019 18:25:15 -0700 Subject: KVM: x86: PMU Event Filter Some events can provide a guest with information about other guests or the host (e.g. L3 cache stats); providing the capability to restrict access to a "safe" set of events would limit the potential for the PMU to be used in any side channel attacks. This change introduces a new VM ioctl that sets an event filter. If the guest attempts to program a counter for any blacklisted or non-whitelisted event, the kernel counter won't be created, so any RDPMC/RDMSR will show 0 instances of that event. Signed-off-by: Eric Hankland [Lots of changes. All remaining bugs are probably mine. - Paolo] Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 26 ++++++++++++++++ arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/include/uapi/asm/kvm.h | 10 +++++++ arch/x86/kvm/pmu.c | 63 +++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/pmu.h | 1 + arch/x86/kvm/x86.c | 5 ++++ include/uapi/linux/kvm.h | 3 ++ 7 files changed, 110 insertions(+) (limited to 'Documentation/virtual/kvm') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 91fd86fcc49f..38b0d4451a24 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4065,6 +4065,32 @@ KVM_ARM_VCPU_FINALIZE call. See KVM_ARM_VCPU_INIT for details of vcpu features that require finalization using this ioctl. +4.120 KVM_SET_PMU_EVENT_FILTER + +Capability: KVM_CAP_PMU_EVENT_FILTER +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_pmu_event_filter (in) +Returns: 0 on success, -1 on error + +struct kvm_pmu_event_filter { + __u32 action; + __u32 nevents; + __u64 events[0]; +}; + +This ioctl restricts the set of PMU events that the guest can program. +The argument holds a list of events which will be allowed or denied. +The eventsel+umask of each event the guest attempts to program is compared +against the events field to determine whether the guest should have access. +This only affects general purpose counters; fixed purpose counters can +be disabled by changing the perfmon CPUID leaf. + +Valid values for 'action': +#define KVM_PMU_EVENT_ALLOW 0 +#define KVM_PMU_EVENT_DENY 1 + + 5. The kvm_run structure ------------------------ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f46a12a5cf2e..34d017bd1d1b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -933,6 +933,8 @@ struct kvm_arch { bool guest_can_read_msr_platform_info; bool exception_payload_enabled; + + struct kvm_pmu_event_filter *pmu_event_filter; }; struct kvm_vm_stat { diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index f9b021e16ebc..46588f5d6283 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -422,4 +422,14 @@ struct kvm_nested_state { __u8 data[0]; }; +/* for KVM_CAP_PMU_EVENT_FILTER */ +struct kvm_pmu_event_filter { + __u32 action; + __u32 nevents; + __u64 events[0]; +}; + +#define KVM_PMU_EVENT_ALLOW 0 +#define KVM_PMU_EVENT_DENY 1 + #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index dd745b58ffd8..9d92c4d3cd44 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -22,6 +22,9 @@ #include "lapic.h" #include "pmu.h" +/* This keeps the total size of the filter under 4k. */ +#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 63 + /* NOTE: * - Each perf counter is defined as "struct kvm_pmc"; * - There are two types of perf counters: general purpose (gp) and fixed. @@ -144,6 +147,10 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) { unsigned config, type = PERF_TYPE_RAW; u8 event_select, unit_mask; + struct kvm *kvm = pmc->vcpu->kvm; + struct kvm_pmu_event_filter *filter; + int i; + bool allow_event = true; if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) printk_once("kvm pmu: pin control bit is ignored\n"); @@ -155,6 +162,22 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc)) return; + filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu); + if (filter) { + for (i = 0; i < filter->nevents; i++) + if (filter->events[i] == + (eventsel & AMD64_RAW_EVENT_MASK_NB)) + break; + if (filter->action == KVM_PMU_EVENT_ALLOW && + i == filter->nevents) + allow_event = false; + if (filter->action == KVM_PMU_EVENT_DENY && + i < filter->nevents) + allow_event = false; + } + if (!allow_event) + return; + event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT; unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; @@ -351,3 +374,43 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu) { kvm_pmu_reset(vcpu); } + +int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) +{ + struct kvm_pmu_event_filter tmp, *filter; + size_t size; + int r; + + if (copy_from_user(&tmp, argp, sizeof(tmp))) + return -EFAULT; + + if (tmp.action != KVM_PMU_EVENT_ALLOW && + tmp.action != KVM_PMU_EVENT_DENY) + return -EINVAL; + + if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS) + return -E2BIG; + + size = struct_size(filter, events, tmp.nevents); + filter = kmalloc(size, GFP_KERNEL_ACCOUNT); + if (!filter) + return -ENOMEM; + + r = -EFAULT; + if (copy_from_user(filter, argp, size)) + goto cleanup; + + /* Ensure nevents can't be changed between the user copies. */ + *filter = tmp; + + mutex_lock(&kvm->lock); + rcu_swap_protected(kvm->arch.pmu_event_filter, filter, + mutex_is_locked(&kvm->lock)); + mutex_unlock(&kvm->lock); + + synchronize_srcu_expedited(&kvm->srcu); + r = 0; +cleanup: + kfree(filter); + return r; +} diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 22dff661145a..58265f761c3b 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -118,6 +118,7 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu); void kvm_pmu_reset(struct kvm_vcpu *vcpu); void kvm_pmu_init(struct kvm_vcpu *vcpu); void kvm_pmu_destroy(struct kvm_vcpu *vcpu); +int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp); bool is_vmware_backdoor_pmc(u32 pmc_idx); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2e302e977dac..81faceba8cec 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3132,6 +3132,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SET_BOOT_CPU_ID: case KVM_CAP_SPLIT_IRQCHIP: case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_PMU_EVENT_FILTER: case KVM_CAP_GET_MSR_FEATURES: case KVM_CAP_MSR_PLATFORM_INFO: case KVM_CAP_EXCEPTION_PAYLOAD: @@ -4978,6 +4979,9 @@ set_identity_unlock: r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd); break; } + case KVM_SET_PMU_EVENT_FILTER: + r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp); + break; default: r = -ENOTTY; } @@ -9428,6 +9432,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_ioapic_destroy(kvm); kvm_free_vcpus(kvm); kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); + kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1)); kvm_mmu_uninit_vm(kvm); kvm_page_track_cleanup(kvm); kvm_hv_destroy_vm(kvm); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index c2152f3dd02d..a7c19540ce21 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -995,6 +995,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_SVE 170 #define KVM_CAP_ARM_PTRAUTH_ADDRESS 171 #define KVM_CAP_ARM_PTRAUTH_GENERIC 172 +#define KVM_CAP_PMU_EVENT_FILTER 173 #ifdef KVM_CAP_IRQ_ROUTING @@ -1329,6 +1330,8 @@ struct kvm_s390_ucas_mapping { #define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info) /* Available with KVM_CAP_PPC_GET_CPU_CHAR */ #define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char) +/* Available with KVM_CAP_PMU_EVENT_FILTER */ +#define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) -- cgit v1.2.3 From e287d6de62f745e223d2f7d9b621c892d4b2b45a Mon Sep 17 00:00:00 2001 From: Luke Nowakowski-Krijger Date: Wed, 10 Jul 2019 08:30:53 -0700 Subject: Documentation: kvm: Convert cpuid.txt to .rst Convert cpuid.txt to .rst format to be parsable by sphinx. Change format and spacing to make function definitions and return values much more clear. Also added a table that is parsable by sphinx and makes the information much more clean. Updated Author email to their new active email address. Added license identifier with the consent of the author. Signed-off-by: Luke Nowakowski-Krijger Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/cpuid.rst | 107 ++++++++++++++++++++++++++++++++++++ Documentation/virtual/kvm/cpuid.txt | 91 ------------------------------ 2 files changed, 107 insertions(+), 91 deletions(-) create mode 100644 Documentation/virtual/kvm/cpuid.rst delete mode 100644 Documentation/virtual/kvm/cpuid.txt (limited to 'Documentation/virtual/kvm') diff --git a/Documentation/virtual/kvm/cpuid.rst b/Documentation/virtual/kvm/cpuid.rst new file mode 100644 index 000000000000..01b081f6e7ea --- /dev/null +++ b/Documentation/virtual/kvm/cpuid.rst @@ -0,0 +1,107 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============== +KVM CPUID bits +============== + +:Author: Glauber Costa + +A guest running on a kvm host, can check some of its features using +cpuid. This is not always guaranteed to work, since userspace can +mask-out some, or even all KVM-related cpuid features before launching +a guest. + +KVM cpuid functions are: + +function: KVM_CPUID_SIGNATURE (0x40000000) + +returns:: + + eax = 0x40000001 + ebx = 0x4b4d564b + ecx = 0x564b4d56 + edx = 0x4d + +Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM". +The value in eax corresponds to the maximum cpuid function present in this leaf, +and will be updated if more functions are added in the future. +Note also that old hosts set eax value to 0x0. This should +be interpreted as if the value was 0x40000001. +This function queries the presence of KVM cpuid leafs. + +function: define KVM_CPUID_FEATURES (0x40000001) + +returns:: + + ebx, ecx + eax = an OR'ed group of (1 << flag) + +where ``flag`` is defined as below: + +================================= =========== ================================ +flag value meaning +================================= =========== ================================ +KVM_FEATURE_CLOCKSOURCE 0 kvmclock available at msrs + 0x11 and 0x12 + +KVM_FEATURE_NOP_IO_DELAY 1 not necessary to perform delays + on PIO operations + +KVM_FEATURE_MMU_OP 2 deprecated + +KVM_FEATURE_CLOCKSOURCE2 3 kvmclock available at msrs + + 0x4b564d00 and 0x4b564d01 +KVM_FEATURE_ASYNC_PF 4 async pf can be enabled by + writing to msr 0x4b564d02 + +KVM_FEATURE_STEAL_TIME 5 steal time can be enabled by + writing to msr 0x4b564d03 + +KVM_FEATURE_PV_EOI 6 paravirtualized end of interrupt + handler can be enabled by + writing to msr 0x4b564d04 + +KVM_FEATURE_PV_UNHAULT 7 guest checks this feature bit + before enabling paravirtualized + spinlock support + +KVM_FEATURE_PV_TLB_FLUSH 9 guest checks this feature bit + before enabling paravirtualized + tlb flush + +KVM_FEATURE_ASYNC_PF_VMEXIT 10 paravirtualized async PF VM EXIT + can be enabled by setting bit 2 + when writing to msr 0x4b564d02 + +KVM_FEATURE_PV_SEND_IPI 11 guest checks this feature bit + before enabling paravirtualized + sebd IPIs + +KVM_FEATURE_PV_POLL_CONTROL 12 host-side polling on HLT can + be disabled by writing + to msr 0x4b564d05. + +KVM_FEATURE_PV_SCHED_YIELD 13 guest checks this feature bit + before using paravirtualized + sched yield. + +KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24 host will warn if no guest-side + per-cpu warps are expeced in + kvmclock +================================= =========== ================================ + +:: + + edx = an OR'ed group of (1 << flag) + +Where ``flag`` here is defined as below: + +================== ============ ================================= +flag value meaning +================== ============ ================================= +KVM_HINTS_REALTIME 0 guest checks this feature bit to + determine that vCPUs are never + preempted for an unlimited time + allowing optimizations +================== ============ ================================= diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt deleted file mode 100644 index 2bdac528e4a2..000000000000 --- a/Documentation/virtual/kvm/cpuid.txt +++ /dev/null @@ -1,91 +0,0 @@ -KVM CPUID bits -Glauber Costa , Red Hat Inc, 2010 -===================================================== - -A guest running on a kvm host, can check some of its features using -cpuid. This is not always guaranteed to work, since userspace can -mask-out some, or even all KVM-related cpuid features before launching -a guest. - -KVM cpuid functions are: - -function: KVM_CPUID_SIGNATURE (0x40000000) -returns : eax = 0x40000001, - ebx = 0x4b4d564b, - ecx = 0x564b4d56, - edx = 0x4d. -Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM". -The value in eax corresponds to the maximum cpuid function present in this leaf, -and will be updated if more functions are added in the future. -Note also that old hosts set eax value to 0x0. This should -be interpreted as if the value was 0x40000001. -This function queries the presence of KVM cpuid leafs. - - -function: define KVM_CPUID_FEATURES (0x40000001) -returns : ebx, ecx - eax = an OR'ed group of (1 << flag), where each flags is: - - -flag || value || meaning -============================================================================= -KVM_FEATURE_CLOCKSOURCE || 0 || kvmclock available at msrs - || || 0x11 and 0x12. ------------------------------------------------------------------------------- -KVM_FEATURE_NOP_IO_DELAY || 1 || not necessary to perform delays - || || on PIO operations. ------------------------------------------------------------------------------- -KVM_FEATURE_MMU_OP || 2 || deprecated. ------------------------------------------------------------------------------- -KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs - || || 0x4b564d00 and 0x4b564d01 ------------------------------------------------------------------------------- -KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by - || || writing to msr 0x4b564d02 ------------------------------------------------------------------------------- -KVM_FEATURE_STEAL_TIME || 5 || steal time can be enabled by - || || writing to msr 0x4b564d03. ------------------------------------------------------------------------------- -KVM_FEATURE_PV_EOI || 6 || paravirtualized end of interrupt - || || handler can be enabled by writing - || || to msr 0x4b564d04. ------------------------------------------------------------------------------- -KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit - || || before enabling paravirtualized - || || spinlock support. ------------------------------------------------------------------------------- -KVM_FEATURE_PV_TLB_FLUSH || 9 || guest checks this feature bit - || || before enabling paravirtualized - || || tlb flush. ------------------------------------------------------------------------------- -KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit - || || can be enabled by setting bit 2 - || || when writing to msr 0x4b564d02 ------------------------------------------------------------------------------- -KVM_FEATURE_PV_SEND_IPI || 11 || guest checks this feature bit - || || before using paravirtualized - || || send IPIs. ------------------------------------------------------------------------------- -KVM_FEATURE_PV_POLL_CONTROL || 12 || host-side polling on HLT can - || || be disabled by writing - || || to msr 0x4b564d05. ------------------------------------------------------------------------------- -KVM_FEATURE_PV_SCHED_YIELD || 13 || guest checks this feature bit - || || before using paravirtualized - || || sched yield. ------------------------------------------------------------------------------- -KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side - || || per-cpu warps are expected in - || || kvmclock. ------------------------------------------------------------------------------- - - edx = an OR'ed group of (1 << flag), where each flags is: - - -flag || value || meaning -================================================================================== -KVM_HINTS_REALTIME || 0 || guest checks this feature bit to - || || determine that vCPUs are never - || || preempted for an unlimited time, - || || allowing optimizations ----------------------------------------------------------------------------------- -- cgit v1.2.3 From 429bb83af8bcea0115eb34fd7ed94a35166d8384 Mon Sep 17 00:00:00 2001 From: Luke Nowakowski-Krijger Date: Wed, 10 Jul 2019 08:30:54 -0700 Subject: Documentation: virtual: Add toctree hooks Added toctree hooks for indexing. Hooks added only for newly added files. The hook for the top of the tree will be added in a later patch series when a few more substantial changes have been added. Signed-off-by: Luke Nowakowski-Krijger Signed-off-by: Paolo Bonzini --- Documentation/virtual/index.rst | 18 ++++++++++++++++++ Documentation/virtual/kvm/index.rst | 11 +++++++++++ 2 files changed, 29 insertions(+) create mode 100644 Documentation/virtual/index.rst create mode 100644 Documentation/virtual/kvm/index.rst (limited to 'Documentation/virtual/kvm') diff --git a/Documentation/virtual/index.rst b/Documentation/virtual/index.rst new file mode 100644 index 000000000000..062ffb527043 --- /dev/null +++ b/Documentation/virtual/index.rst @@ -0,0 +1,18 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================ +Linux Virtualization Support +============================ + +.. toctree:: + :maxdepth: 2 + + kvm/index + paravirt_ops + +.. only:: html and subproject + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/virtual/kvm/index.rst b/Documentation/virtual/kvm/index.rst new file mode 100644 index 000000000000..0b206a06f5be --- /dev/null +++ b/Documentation/virtual/kvm/index.rst @@ -0,0 +1,11 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=== +KVM +=== + +.. toctree:: + :maxdepth: 2 + + amd-memory-encryption + cpuid -- cgit v1.2.3