diff options
Diffstat (limited to 'arch/x86/include/asm/kvm_host.h')
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 274 |
1 files changed, 179 insertions, 95 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e159e44a6a1b..f19a76d3ca0e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -24,20 +24,24 @@ #include <linux/pvclock_gtod.h> #include <linux/clocksource.h> #include <linux/irqbypass.h> -#include <linux/hyperv.h> #include <linux/kfifo.h> #include <linux/sched/vhost_task.h> +#include <linux/call_once.h> +#include <linux/atomic.h> #include <asm/apic.h> #include <asm/pvclock-abi.h> +#include <asm/debugreg.h> #include <asm/desc.h> #include <asm/mtrr.h> #include <asm/msr-index.h> +#include <asm/msr.h> #include <asm/asm.h> +#include <asm/irq_remapping.h> #include <asm/kvm_page_track.h> #include <asm/kvm_vcpu_regs.h> -#include <asm/hyperv-tlfs.h> #include <asm/reboot.h> +#include <hyperv/hvhdk.h> #define __KVM_HAVE_ARCH_VCPU_DEBUGFS @@ -123,7 +127,8 @@ KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_HV_TLB_FLUSH \ KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE KVM_ARCH_REQ(34) +#define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE \ + KVM_ARCH_REQ_FLAGS(34, KVM_REQUEST_WAIT) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ @@ -245,7 +250,6 @@ enum x86_intercept_stage; #define DR7_BP_EN_MASK 0x000000ff #define DR7_GE (1 << 9) #define DR7_GD (1 << 13) -#define DR7_FIXED_1 0x00000400 #define DR7_VOLATILE 0xffff2bff #define KVM_GUESTDBG_VALID_MASK \ @@ -293,6 +297,7 @@ enum x86_intercept_stage; */ #define KVM_APIC_PV_EOI_PENDING 1 +struct kvm_kernel_irqfd; struct kvm_kernel_irq_routing_entry; /* @@ -313,10 +318,11 @@ struct kvm_kernel_irq_routing_entry; * the number of unique SPs that can theoretically be created is 2^n, where n * is the number of bits that are used to compute the role. * - * But, even though there are 19 bits in the mask below, not all combinations + * But, even though there are 20 bits in the mask below, not all combinations * of modes and flags are possible: * - * - invalid shadow pages are not accounted, so the bits are effectively 18 + * - invalid shadow pages are not accounted, mirror pages are not shadowed, + * so the bits are effectively 18. * * - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging); * execonly and ad_disabled are only used for nested EPT which has @@ -349,7 +355,8 @@ union kvm_mmu_page_role { unsigned ad_disabled:1; unsigned guest_mode:1; unsigned passthrough:1; - unsigned :5; + unsigned is_mirror:1; + unsigned :4; /* * This is left at the top of the word so that @@ -403,11 +410,10 @@ union kvm_cpu_role { }; struct kvm_rmap_head { - unsigned long val; + atomic_long_t val; }; struct kvm_pio_request { - unsigned long linear_rip; unsigned long count; int in; int port; @@ -457,6 +463,7 @@ struct kvm_mmu { int (*sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int i); struct kvm_mmu_root_info root; + hpa_t mirror_root_hpa; union kvm_cpu_role cpu_role; union kvm_mmu_page_role root_role; @@ -603,8 +610,15 @@ struct kvm_pmu { struct kvm_pmu_ops; enum { - KVM_DEBUGREG_BP_ENABLED = 1, - KVM_DEBUGREG_WONT_EXIT = 2, + KVM_DEBUGREG_BP_ENABLED = BIT(0), + KVM_DEBUGREG_WONT_EXIT = BIT(1), + /* + * Guest debug registers (DR0-3, DR6 and DR7) are saved/restored by + * hardware on exit from or enter to guest. KVM needn't switch them. + * DR0-3, DR6 and DR7 are set to their architectural INIT value on VM + * exit, host values need to be restored. + */ + KVM_DEBUGREG_AUTO_SWITCH = BIT(2), }; struct kvm_mtrr { @@ -687,8 +701,13 @@ struct kvm_vcpu_hv { struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS]; - /* Preallocated buffer for handling hypercalls passing sparse vCPU set */ + /* + * Preallocated buffers for handling hypercalls that pass sparse vCPU + * sets (for high vCPU counts, they're too large to comfortably fit on + * the stack). + */ u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS]; + DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); struct hv_vp_assist_page vp_assist_page; @@ -739,6 +758,24 @@ struct kvm_queued_exception { bool has_payload; }; +/* + * Hardware-defined CPUID leafs that are either scattered by the kernel or are + * unknown to the kernel, but need to be directly used by KVM. Note, these + * word values conflict with the kernel's "bug" caps, but KVM doesn't use those. + */ +enum kvm_only_cpuid_leafs { + CPUID_12_EAX = NCAPINTS, + CPUID_7_1_EDX, + CPUID_8000_0007_EDX, + CPUID_8000_0022_EAX, + CPUID_7_2_EDX, + CPUID_24_0_EBX, + CPUID_8000_0021_ECX, + NR_KVM_CPU_CAPS, + + NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, +}; + struct kvm_vcpu_arch { /* * rip and regs accesses must go through @@ -760,6 +797,7 @@ struct kvm_vcpu_arch { u32 pkru; u32 hflags; u64 efer; + u64 host_debugctl; u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ bool load_eoi_exitmap_pending; @@ -813,6 +851,11 @@ struct kvm_vcpu_arch { struct kvm_mmu_memory_cache mmu_shadow_page_cache; struct kvm_mmu_memory_cache mmu_shadowed_info_cache; struct kvm_mmu_memory_cache mmu_page_header_cache; + /* + * This cache is to allocate external page table. E.g. private EPT used + * by the TDX module. + */ + struct kvm_mmu_memory_cache mmu_external_spt_cache; /* * QEMU userspace and the guest each have their own FPU state. @@ -854,27 +897,24 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 *cpuid_entries; - struct kvm_hypervisor_cpuid kvm_cpuid; + bool cpuid_dynamic_bits_dirty; bool is_amd_compatible; /* - * FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly - * when "struct kvm_vcpu_arch" is no longer defined in an - * arch/x86/include/asm header. The max is mostly arbitrary, i.e. - * can be increased as necessary. + * cpu_caps holds the effective guest capabilities, i.e. the features + * the vCPU is allowed to use. Typically, but not always, features can + * be used by the guest if and only if both KVM and userspace want to + * expose the feature to the guest. + * + * A common exception is for virtualization holes, i.e. when KVM can't + * prevent the guest from using a feature, in which case the vCPU "has" + * the feature regardless of what KVM or userspace desires. + * + * Note, features that don't require KVM involvement in any way are + * NOT enforced/sanitized by KVM, i.e. are taken verbatim from the + * guest CPUID provided by userspace. */ -#define KVM_MAX_NR_GOVERNED_FEATURES BITS_PER_LONG - - /* - * Track whether or not the guest is allowed to use features that are - * governed by KVM, where "governed" means KVM needs to manage state - * and/or explicitly enable the feature in hardware. Typically, but - * not always, governed features can be used by the guest if and only - * if both KVM and userspace want to expose the feature to the guest. - */ - struct { - DECLARE_BITMAP(enabled, KVM_MAX_NR_GOVERNED_FEATURES); - } governed_features; + u32 cpu_caps[NR_KVM_CPU_CAPS]; u64 reserved_gpa_bits; int maxphyaddr; @@ -885,9 +925,11 @@ struct kvm_vcpu_arch { bool emulate_regs_need_sync_to_vcpu; bool emulate_regs_need_sync_from_vcpu; int (*complete_userspace_io)(struct kvm_vcpu *vcpu); + unsigned long cui_linear_rip; gpa_t time; - struct pvclock_vcpu_time_info hv_clock; + s8 pvclock_tsc_shift; + u32 pvclock_tsc_mul; unsigned int hw_tsc_khz; struct gfn_to_pfn_cache pv_time; /* set guest stopped flag in pvclock flags field */ @@ -975,8 +1017,8 @@ struct kvm_vcpu_arch { u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */ u16 vec; u32 id; - bool send_user_only; u32 host_apf_flags; + bool send_always; bool delivery_as_pf_vmexit; bool pageready_pending; } apf; @@ -1001,6 +1043,7 @@ struct kvm_vcpu_arch { int pending_ioapic_eoi; int pending_external_vector; + int highest_stale_pending_ioapic_eoi; /* be preempted when it's in kernel-mode(cpl=0) */ bool preempted_in_kernel; @@ -1031,6 +1074,7 @@ struct kvm_vcpu_arch { /* Protected Guests */ bool guest_state_protected; + bool guest_tsc_protected; /* * Set when PDPTS were loaded directly by the userspace without @@ -1167,6 +1211,8 @@ struct kvm_xen { struct gfn_to_pfn_cache shinfo_cache; struct idr evtchn_ports; unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; + + struct kvm_xen_hvm_config hvm_config; }; #endif @@ -1275,6 +1321,12 @@ enum kvm_apicv_inhibit { */ APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED, + /* + * AVIC is disabled because the vCPU's APIC ID is beyond the max + * supported by AVIC/x2AVIC, i.e. the vCPU is unaddressable. + */ + APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG, + NR_APICV_INHIBIT_REASONS, }; @@ -1293,7 +1345,8 @@ enum kvm_apicv_inhibit { __APICV_INHIBIT_REASON(IRQWIN), \ __APICV_INHIBIT_REASON(PIT_REINJ), \ __APICV_INHIBIT_REASON(SEV), \ - __APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED) + __APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED), \ + __APICV_INHIBIT_REASON(PHYSICAL_ID_TOO_BIG) struct kvm_arch { unsigned long n_used_mmu_pages; @@ -1305,7 +1358,7 @@ struct kvm_arch { bool has_private_mem; bool has_protected_state; bool pre_fault_allowed; - struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; + struct hlist_head *mmu_page_hash; struct list_head active_mmu_pages; /* * A list of kvm_mmu_page structs that, if zapped, could possibly be @@ -1332,15 +1385,15 @@ struct kvm_arch { u64 shadow_mmio_value; - struct iommu_domain *iommu_domain; - bool iommu_noncoherent; #define __KVM_HAVE_ARCH_NONCOHERENT_DMA atomic_t noncoherent_dma_count; -#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE - atomic_t assigned_device_count; + unsigned long nr_possible_bypass_irqs; + +#ifdef CONFIG_KVM_IOAPIC struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; +#endif atomic_t vapics_in_nmi_mode; struct mutex apic_map_lock; struct kvm_apic_map __rcu *apic_map; @@ -1355,12 +1408,8 @@ struct kvm_arch { gpa_t wall_clock; - bool mwait_in_guest; - bool hlt_in_guest; - bool pause_in_guest; - bool cstate_in_guest; + u64 disabled_exits; - unsigned long irq_sources_bitmap; s64 kvmclock_offset; /* @@ -1389,11 +1438,6 @@ struct kvm_arch { struct delayed_work kvmclock_update_work; struct delayed_work kvmclock_sync_work; - struct kvm_xen_hvm_config xen_hvm_config; - - /* reads protected by irq_srcu, writes by irq_lock */ - struct hlist_head mask_notifier_list; - #ifdef CONFIG_KVM_HYPERV struct kvm_hv hyperv; #endif @@ -1416,6 +1460,7 @@ struct kvm_arch { bool x2apic_format; bool x2apic_broadcast_quirk_disabled; + bool has_mapped_host_mmio; bool guest_can_read_msr_platform_info; bool exception_payload_enabled; @@ -1445,10 +1490,16 @@ struct kvm_arch { struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter; struct vhost_task *nx_huge_page_recovery_thread; u64 nx_huge_page_last; + struct once nx_once; #ifdef CONFIG_X86_64 - /* The number of TDP MMU pages across all roots. */ +#ifdef CONFIG_KVM_PROVE_MMU + /* + * The number of TDP MMU pages across all roots. Used only to sanity + * check that KVM isn't leaking TDP MMU pages. + */ atomic64_t tdp_mmu_pages; +#endif /* * List of struct kvm_mmu_pages being used as roots. @@ -1456,6 +1507,7 @@ struct kvm_arch { * tdp_mmu_page set. * * For reads, this list is protected by: + * RCU alone or * the MMU lock in read mode + RCU or * the MMU lock in write mode * @@ -1536,6 +1588,15 @@ struct kvm_arch { */ #define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1) struct kvm_mmu_memory_cache split_desc_cache; + + gfn_t gfn_direct_bits; + + /* + * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A Zero + * value indicates CPU dirty logging is unsupported or disabled in + * current VM. + */ + int cpu_dirty_log_size; }; struct kvm_vm_stat { @@ -1623,6 +1684,12 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL; } +enum kvm_x86_run_flags { + KVM_RUN_FORCE_IMMEDIATE_EXIT = BIT(0), + KVM_RUN_LOAD_GUEST_DR6 = BIT(1), + KVM_RUN_LOAD_DEBUGCTL = BIT(2), +}; + struct kvm_x86_ops { const char *name; @@ -1639,6 +1706,7 @@ struct kvm_x86_ops { unsigned int vm_size; int (*vm_init)(struct kvm *kvm); void (*vm_destroy)(struct kvm *kvm); + void (*vm_pre_destroy)(struct kvm *kvm); /* Create, but do not attach this VCPU */ int (*vcpu_precreate)(struct kvm *kvm); @@ -1650,6 +1718,12 @@ struct kvm_x86_ops { void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); + /* + * Mask of DEBUGCTL bits that are owned by the host, i.e. that need to + * match the host's value even while the guest is active. + */ + const u64 HOST_OWNED_DEBUGCTL; + void (*update_exception_bitmap)(struct kvm_vcpu *vcpu); int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); @@ -1702,7 +1776,7 @@ struct kvm_x86_ops { int (*vcpu_pre_run)(struct kvm_vcpu *vcpu); enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu, - bool force_immediate_exit); + u64 run_flags); int (*handle_exit)(struct kvm_vcpu *vcpu, enum exit_fastpath_completion exit_fastpath); int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); @@ -1734,8 +1808,7 @@ struct kvm_x86_ops { const unsigned long required_apicv_inhibits; bool allow_apicv_in_x2apic_without_x2apic_virtualization; void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); - void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); - void (*hwapic_isr_update)(int isr); + void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu); @@ -1749,6 +1822,21 @@ struct kvm_x86_ops { void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level); + /* Update external mapping with page table link. */ + int (*link_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level, + void *external_spt); + /* Update the external page table from spte getting set. */ + int (*set_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level, + kvm_pfn_t pfn_for_gfn); + + /* Update external page tables for page table about to be freed. */ + int (*free_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level, + void *external_spt); + + /* Update external page table from spte getting removed, and flush TLB. */ + int (*remove_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level, + kvm_pfn_t pfn_for_gfn); + bool (*has_wbinvd_exit)(void); u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu); @@ -1757,12 +1845,15 @@ struct kvm_x86_ops { void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu); /* - * Retrieve somewhat arbitrary exit information. Intended to + * Retrieve somewhat arbitrary exit/entry information. Intended to * be used only from within tracepoints or error paths. */ void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason, u64 *info1, u64 *info2, - u32 *exit_int_info, u32 *exit_int_info_err_code); + u32 *intr_info, u32 *error_code); + + void (*get_entry_info)(struct kvm_vcpu *vcpu, + u32 *intr_info, u32 *error_code); int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, @@ -1770,11 +1861,6 @@ struct kvm_x86_ops { struct x86_exception *exception); void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu); - /* - * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A zero - * value indicates CPU dirty logging is unsupported or disabled. - */ - int cpu_dirty_log_size; void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu); const struct kvm_x86_nested_ops *nested_ops; @@ -1782,12 +1868,14 @@ struct kvm_x86_ops { void (*vcpu_blocking)(struct kvm_vcpu *vcpu); void (*vcpu_unblocking)(struct kvm_vcpu *vcpu); - int (*pi_update_irte)(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set); - void (*pi_start_assignment)(struct kvm *kvm); + int (*pi_update_irte)(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, + unsigned int host_irq, uint32_t guest_irq, + struct kvm_vcpu *vcpu, u32 vector); + void (*pi_start_bypass)(struct kvm *kvm); void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); + bool (*protected_apic_has_interrupt)(struct kvm_vcpu *vcpu); int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, bool *expired); @@ -1804,6 +1892,7 @@ struct kvm_x86_ops { int (*dev_get_attr)(u32 group, u64 attr, u64 *val); int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp); + int (*vcpu_mem_enc_ioctl)(struct kvm_vcpu *vcpu, void __user *argp); int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd); @@ -1819,7 +1908,7 @@ struct kvm_x86_ops { int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu); void (*migrate_timers)(struct kvm_vcpu *vcpu); - void (*msr_filter_changed)(struct kvm_vcpu *vcpu); + void (*recalc_msr_intercepts)(struct kvm_vcpu *vcpu); int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); @@ -1877,6 +1966,8 @@ struct kvm_arch_async_pf { extern u32 __read_mostly kvm_nr_uret_msrs; extern bool __read_mostly allow_smaller_maxphyaddr; extern bool __read_mostly enable_apicv; +extern bool __read_mostly enable_ipiv; +extern bool __read_mostly enable_device_posted_irqs; extern struct kvm_x86_ops kvm_x86_ops; #define kvm_x86_call(func) static_call(kvm_x86_##func) @@ -1894,7 +1985,7 @@ void kvm_x86_vendor_exit(void); #define __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) { - return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); + return kvzalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT); } #define __KVM_HAVE_ARCH_VM_FREE @@ -1939,7 +2030,7 @@ void kvm_mmu_vendor_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); -void kvm_mmu_init_vm(struct kvm *kvm); +int kvm_mmu_init_vm(struct kvm *kvm); void kvm_mmu_uninit_vm(struct kvm *kvm); void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm, @@ -1970,19 +2061,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, const void *val, int bytes); -struct kvm_irq_mask_notifier { - void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); - int irq; - struct hlist_node link; -}; - -void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, - struct kvm_irq_mask_notifier *kimn); -void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, - struct kvm_irq_mask_notifier *kimn); -void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, - bool mask); - extern bool tdp_enabled; u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); @@ -2019,8 +2097,8 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); * VMware backdoor emulation handles select instructions * and reinjects the #GP for all other cases. * - * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which - * case the CR2/GPA value pass on the stack is valid. + * EMULTYPE_PF - Set when an intercepted #PF triggers the emulation, in which case + * the CR2/GPA value pass on the stack is valid. * * EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility * state and inject single-step #DBs after skipping @@ -2055,6 +2133,11 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); #define EMULTYPE_COMPLETE_USER_EXIT (1 << 7) #define EMULTYPE_WRITE_PF_TO_SP (1 << 8) +static inline bool kvm_can_emulate_event_vectoring(int emul_type) +{ + return !(emul_type & EMULTYPE_PF); +} + int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, void *insn, int insn_len); @@ -2062,6 +2145,8 @@ void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data, u8 ndata); void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu); +void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa); + void kvm_enable_efer_bits(u64); bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data); @@ -2114,8 +2199,8 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu); void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload); -void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); -void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); +void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr, + bool has_error_code, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); @@ -2134,9 +2219,6 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state, return !!(*irq_state); } -int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level); -void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); - void kvm_inject_nmi(struct kvm_vcpu *vcpu); int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu); @@ -2181,12 +2263,6 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm, kvm_set_or_clear_apicv_inhibit(kvm, reason, false); } -unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, - unsigned long a0, unsigned long a1, - unsigned long a2, unsigned long a3, - int op_64_bit, int cpl); -int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); - int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len); void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg); @@ -2225,7 +2301,7 @@ static inline unsigned long read_msr(unsigned long msr) { u64 value; - rdmsrl(msr, value); + rdmsrq(msr, value); return value; } #endif @@ -2279,6 +2355,7 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, int kvm_add_user_return_msr(u32 msr); int kvm_find_user_return_msr(u32 msr); int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); +void kvm_user_return_msr_update_cache(unsigned int index, u64 val); static inline bool kvm_is_supported_user_return_msr(u32 msr) { @@ -2318,9 +2395,6 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu); -void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, - struct kvm_lapic_irq *irq); - static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) { /* We can only post Fixed and LowPrio IRQs */ @@ -2362,7 +2436,12 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \ KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \ KVM_X86_QUIRK_SLOT_ZAP_ALL | \ - KVM_X86_QUIRK_STUFF_FEATURE_MSRS) + KVM_X86_QUIRK_STUFF_FEATURE_MSRS | \ + KVM_X86_QUIRK_IGNORE_GUEST_PAT) + +#define KVM_X86_CONDITIONAL_QUIRKS \ + (KVM_X86_QUIRK_CD_NW_CLEARED | \ + KVM_X86_QUIRK_IGNORE_GUEST_PAT) /* * KVM previously used a u32 field in kvm_run to indicate the hypercall was @@ -2371,4 +2450,9 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); */ #define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1) +static inline bool kvm_arch_has_irq_bypass(void) +{ + return enable_device_posted_irqs; +} + #endif /* _ASM_X86_KVM_HOST_H */ |