diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-04-03 01:13:15 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-04-03 01:13:15 +0300 |
commit | 8c1b724ddb218f221612d4c649bc9c7819d8d7a6 (patch) | |
tree | 0e226f4156b554eec2690adb8f30ba54b15b68cc /arch/x86/include | |
parent | f14a9532ee30c68a56ff502c382860f674cc180c (diff) | |
parent | 514ccc194971d0649e4e7ec8a9b3a6e33561d7bf (diff) | |
download | linux-8c1b724ddb218f221612d4c649bc9c7819d8d7a6.tar.xz |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini:
"ARM:
- GICv4.1 support
- 32bit host removal
PPC:
- secure (encrypted) using under the Protected Execution Framework
ultravisor
s390:
- allow disabling GISA (hardware interrupt injection) and protected
VMs/ultravisor support.
x86:
- New dirty bitmap flag that sets all bits in the bitmap when dirty
page logging is enabled; this is faster because it doesn't require
bulk modification of the page tables.
- Initial work on making nested SVM event injection more similar to
VMX, and less buggy.
- Various cleanups to MMU code (though the big ones and related
optimizations were delayed to 5.8). Instead of using cr3 in
function names which occasionally means eptp, KVM too has
standardized on "pgd".
- A large refactoring of CPUID features, which now use an array that
parallels the core x86_features.
- Some removal of pointer chasing from kvm_x86_ops, which will also
be switched to static calls as soon as they are available.
- New Tigerlake CPUID features.
- More bugfixes, optimizations and cleanups.
Generic:
- selftests: cleanups, new MMU notifier stress test, steal-time test
- CSV output for kvm_stat"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (277 commits)
x86/kvm: fix a missing-prototypes "vmread_error"
KVM: x86: Fix BUILD_BUG() in __cpuid_entry_get_reg() w/ CONFIG_UBSAN=y
KVM: VMX: Add a trampoline to fix VMREAD error handling
KVM: SVM: Annotate svm_x86_ops as __initdata
KVM: VMX: Annotate vmx_x86_ops as __initdata
KVM: x86: Drop __exit from kvm_x86_ops' hardware_unsetup()
KVM: x86: Copy kvm_x86_ops by value to eliminate layer of indirection
KVM: x86: Set kvm_x86_ops only after ->hardware_setup() completes
KVM: VMX: Configure runtime hooks using vmx_x86_ops
KVM: VMX: Move hardware_setup() definition below vmx_x86_ops
KVM: x86: Move init-only kvm_x86_ops to separate struct
KVM: Pass kvm_init()'s opaque param to additional arch funcs
s390/gmap: return proper error code on ksm unsharing
KVM: selftests: Fix cosmetic copy-paste error in vm_mem_region_move()
KVM: Fix out of range accesses to memslots
KVM: X86: Micro-optimize IPI fastpath delay
KVM: X86: Delay read msr data iff writes ICR MSR
KVM: PPC: Book3S HV: Add a capability for enabling secure guests
KVM: arm64: GICv4.1: Expose HW-based SGIs in debugfs
KVM: arm64: GICv4.1: Allow non-trapping WFI when using HW SGIs
...
Diffstat (limited to 'arch/x86/include')
-rw-r--r-- | arch/x86/include/asm/kvm_emulate.h | 474 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 105 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_page_track.h | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/vmx.h | 12 |
4 files changed, 65 insertions, 529 deletions
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h deleted file mode 100644 index c06e8353efd3..000000000000 --- a/arch/x86/include/asm/kvm_emulate.h +++ /dev/null @@ -1,474 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/****************************************************************************** - * x86_emulate.h - * - * Generic x86 (32-bit and 64-bit) instruction decoder and emulator. - * - * Copyright (c) 2005 Keir Fraser - * - * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4 - */ - -#ifndef _ASM_X86_KVM_X86_EMULATE_H -#define _ASM_X86_KVM_X86_EMULATE_H - -#include <asm/desc_defs.h> - -struct x86_emulate_ctxt; -enum x86_intercept; -enum x86_intercept_stage; - -struct x86_exception { - u8 vector; - bool error_code_valid; - u16 error_code; - bool nested_page_fault; - u64 address; /* cr2 or nested page fault gpa */ - u8 async_page_fault; -}; - -/* - * This struct is used to carry enough information from the instruction - * decoder to main KVM so that a decision can be made whether the - * instruction needs to be intercepted or not. - */ -struct x86_instruction_info { - u8 intercept; /* which intercept */ - u8 rep_prefix; /* rep prefix? */ - u8 modrm_mod; /* mod part of modrm */ - u8 modrm_reg; /* index of register used */ - u8 modrm_rm; /* rm part of modrm */ - u64 src_val; /* value of source operand */ - u64 dst_val; /* value of destination operand */ - u8 src_bytes; /* size of source operand */ - u8 dst_bytes; /* size of destination operand */ - u8 ad_bytes; /* size of src/dst address */ - u64 next_rip; /* rip following the instruction */ -}; - -/* - * x86_emulate_ops: - * - * These operations represent the instruction emulator's interface to memory. - * There are two categories of operation: those that act on ordinary memory - * regions (*_std), and those that act on memory regions known to require - * special treatment or emulation (*_emulated). - * - * The emulator assumes that an instruction accesses only one 'emulated memory' - * location, that this location is the given linear faulting address (cr2), and - * that this is one of the instruction's data operands. Instruction fetches and - * stack operations are assumed never to access emulated memory. The emulator - * automatically deduces which operand of a string-move operation is accessing - * emulated memory, and assumes that the other operand accesses normal memory. - * - * NOTES: - * 1. The emulator isn't very smart about emulated vs. standard memory. - * 'Emulated memory' access addresses should be checked for sanity. - * 'Normal memory' accesses may fault, and the caller must arrange to - * detect and handle reentrancy into the emulator via recursive faults. - * Accesses may be unaligned and may cross page boundaries. - * 2. If the access fails (cannot emulate, or a standard access faults) then - * it is up to the memop to propagate the fault to the guest VM via - * some out-of-band mechanism, unknown to the emulator. The memop signals - * failure by returning X86EMUL_PROPAGATE_FAULT to the emulator, which will - * then immediately bail. - * 3. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only - * cmpxchg8b_emulated need support 8-byte accesses. - * 4. The emulator cannot handle 64-bit mode emulation on an x86/32 system. - */ -/* Access completed successfully: continue emulation as normal. */ -#define X86EMUL_CONTINUE 0 -/* Access is unhandleable: bail from emulation and return error to caller. */ -#define X86EMUL_UNHANDLEABLE 1 -/* Terminate emulation but return success to the caller. */ -#define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ -#define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ -#define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ -#define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ -#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */ - -struct x86_emulate_ops { - /* - * read_gpr: read a general purpose register (rax - r15) - * - * @reg: gpr number. - */ - ulong (*read_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg); - /* - * write_gpr: write a general purpose register (rax - r15) - * - * @reg: gpr number. - * @val: value to write. - */ - void (*write_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val); - /* - * read_std: Read bytes of standard (non-emulated/special) memory. - * Used for descriptor reading. - * @addr: [IN ] Linear address from which to read. - * @val: [OUT] Value read from memory, zero-extended to 'u_long'. - * @bytes: [IN ] Number of bytes to read from memory. - * @system:[IN ] Whether the access is forced to be at CPL0. - */ - int (*read_std)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, void *val, - unsigned int bytes, - struct x86_exception *fault, bool system); - - /* - * read_phys: Read bytes of standard (non-emulated/special) memory. - * Used for descriptor reading. - * @addr: [IN ] Physical address from which to read. - * @val: [OUT] Value read from memory. - * @bytes: [IN ] Number of bytes to read from memory. - */ - int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr, - void *val, unsigned int bytes); - - /* - * write_std: Write bytes of standard (non-emulated/special) memory. - * Used for descriptor writing. - * @addr: [IN ] Linear address to which to write. - * @val: [OUT] Value write to memory, zero-extended to 'u_long'. - * @bytes: [IN ] Number of bytes to write to memory. - * @system:[IN ] Whether the access is forced to be at CPL0. - */ - int (*write_std)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, void *val, unsigned int bytes, - struct x86_exception *fault, bool system); - /* - * fetch: Read bytes of standard (non-emulated/special) memory. - * Used for instruction fetch. - * @addr: [IN ] Linear address from which to read. - * @val: [OUT] Value read from memory, zero-extended to 'u_long'. - * @bytes: [IN ] Number of bytes to read from memory. - */ - int (*fetch)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, void *val, unsigned int bytes, - struct x86_exception *fault); - - /* - * read_emulated: Read bytes from emulated/special memory area. - * @addr: [IN ] Linear address from which to read. - * @val: [OUT] Value read from memory, zero-extended to 'u_long'. - * @bytes: [IN ] Number of bytes to read from memory. - */ - int (*read_emulated)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, void *val, unsigned int bytes, - struct x86_exception *fault); - - /* - * write_emulated: Write bytes to emulated/special memory area. - * @addr: [IN ] Linear address to which to write. - * @val: [IN ] Value to write to memory (low-order bytes used as - * required). - * @bytes: [IN ] Number of bytes to write to memory. - */ - int (*write_emulated)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, const void *val, - unsigned int bytes, - struct x86_exception *fault); - - /* - * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an - * emulated/special memory area. - * @addr: [IN ] Linear address to access. - * @old: [IN ] Value expected to be current at @addr. - * @new: [IN ] Value to write to @addr. - * @bytes: [IN ] Number of bytes to access using CMPXCHG. - */ - int (*cmpxchg_emulated)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, - const void *old, - const void *new, - unsigned int bytes, - struct x86_exception *fault); - void (*invlpg)(struct x86_emulate_ctxt *ctxt, ulong addr); - - int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt, - int size, unsigned short port, void *val, - unsigned int count); - - int (*pio_out_emulated)(struct x86_emulate_ctxt *ctxt, - int size, unsigned short port, const void *val, - unsigned int count); - - bool (*get_segment)(struct x86_emulate_ctxt *ctxt, u16 *selector, - struct desc_struct *desc, u32 *base3, int seg); - void (*set_segment)(struct x86_emulate_ctxt *ctxt, u16 selector, - struct desc_struct *desc, u32 base3, int seg); - unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt, - int seg); - void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); - void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); - void (*set_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); - void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); - ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); - int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); - int (*cpl)(struct x86_emulate_ctxt *ctxt); - int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); - int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); - u64 (*get_smbase)(struct x86_emulate_ctxt *ctxt); - void (*set_smbase)(struct x86_emulate_ctxt *ctxt, u64 smbase); - int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); - int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); - int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc); - int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata); - void (*halt)(struct x86_emulate_ctxt *ctxt); - void (*wbinvd)(struct x86_emulate_ctxt *ctxt); - int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); - int (*intercept)(struct x86_emulate_ctxt *ctxt, - struct x86_instruction_info *info, - enum x86_intercept_stage stage); - - bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx, - u32 *ecx, u32 *edx, bool check_limit); - bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt); - bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt); - bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt); - - void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); - - unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); - void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags); - int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt, - const char *smstate); - void (*post_leave_smm)(struct x86_emulate_ctxt *ctxt); - int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr); -}; - -typedef u32 __attribute__((vector_size(16))) sse128_t; - -/* Type, address-of, and value of an instruction's operand. */ -struct operand { - enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_MM, OP_NONE } type; - unsigned int bytes; - unsigned int count; - union { - unsigned long orig_val; - u64 orig_val64; - }; - union { - unsigned long *reg; - struct segmented_address { - ulong ea; - unsigned seg; - } mem; - unsigned xmm; - unsigned mm; - } addr; - union { - unsigned long val; - u64 val64; - char valptr[sizeof(sse128_t)]; - sse128_t vec_val; - u64 mm_val; - void *data; - }; -}; - -struct fetch_cache { - u8 data[15]; - u8 *ptr; - u8 *end; -}; - -struct read_cache { - u8 data[1024]; - unsigned long pos; - unsigned long end; -}; - -/* Execution mode, passed to the emulator. */ -enum x86emul_mode { - X86EMUL_MODE_REAL, /* Real mode. */ - X86EMUL_MODE_VM86, /* Virtual 8086 mode. */ - X86EMUL_MODE_PROT16, /* 16-bit protected mode. */ - X86EMUL_MODE_PROT32, /* 32-bit protected mode. */ - X86EMUL_MODE_PROT64, /* 64-bit (long) mode. */ -}; - -/* These match some of the HF_* flags defined in kvm_host.h */ -#define X86EMUL_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ -#define X86EMUL_SMM_MASK (1 << 6) -#define X86EMUL_SMM_INSIDE_NMI_MASK (1 << 7) - -/* - * fastop functions are declared as taking a never-defined fastop parameter, - * so they can't be called from C directly. - */ -struct fastop; - -typedef void (*fastop_t)(struct fastop *); - -struct x86_emulate_ctxt { - const struct x86_emulate_ops *ops; - - /* Register state before/after emulation. */ - unsigned long eflags; - unsigned long eip; /* eip before instruction emulation */ - /* Emulated execution mode, represented by an X86EMUL_MODE value. */ - enum x86emul_mode mode; - - /* interruptibility state, as a result of execution of STI or MOV SS */ - int interruptibility; - - bool perm_ok; /* do not check permissions if true */ - bool ud; /* inject an #UD if host doesn't support insn */ - bool tf; /* TF value before instruction (after for syscall/sysret) */ - - bool have_exception; - struct x86_exception exception; - - /* - * decode cache - */ - - /* current opcode length in bytes */ - u8 opcode_len; - u8 b; - u8 intercept; - u8 op_bytes; - u8 ad_bytes; - struct operand src; - struct operand src2; - struct operand dst; - union { - int (*execute)(struct x86_emulate_ctxt *ctxt); - fastop_t fop; - }; - int (*check_perm)(struct x86_emulate_ctxt *ctxt); - /* - * The following six fields are cleared together, - * the rest are initialized unconditionally in x86_decode_insn - * or elsewhere - */ - bool rip_relative; - u8 rex_prefix; - u8 lock_prefix; - u8 rep_prefix; - /* bitmaps of registers in _regs[] that can be read */ - u32 regs_valid; - /* bitmaps of registers in _regs[] that have been written */ - u32 regs_dirty; - /* modrm */ - u8 modrm; - u8 modrm_mod; - u8 modrm_reg; - u8 modrm_rm; - u8 modrm_seg; - u8 seg_override; - u64 d; - unsigned long _eip; - struct operand memop; - unsigned long _regs[NR_VCPU_REGS]; - struct operand *memopp; - struct fetch_cache fetch; - struct read_cache io_read; - struct read_cache mem_read; -}; - -/* Repeat String Operation Prefix */ -#define REPE_PREFIX 0xf3 -#define REPNE_PREFIX 0xf2 - -/* CPUID vendors */ -#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541 -#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163 -#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65 - -#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx 0x69444d41 -#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx 0x21726574 -#define X86EMUL_CPUID_VENDOR_AMDisbetterI_edx 0x74656273 - -#define X86EMUL_CPUID_VENDOR_HygonGenuine_ebx 0x6f677948 -#define X86EMUL_CPUID_VENDOR_HygonGenuine_ecx 0x656e6975 -#define X86EMUL_CPUID_VENDOR_HygonGenuine_edx 0x6e65476e - -#define X86EMUL_CPUID_VENDOR_GenuineIntel_ebx 0x756e6547 -#define X86EMUL_CPUID_VENDOR_GenuineIntel_ecx 0x6c65746e -#define X86EMUL_CPUID_VENDOR_GenuineIntel_edx 0x49656e69 - -enum x86_intercept_stage { - X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */ - X86_ICPT_PRE_EXCEPT, - X86_ICPT_POST_EXCEPT, - X86_ICPT_POST_MEMACCESS, -}; - -enum x86_intercept { - x86_intercept_none, - x86_intercept_cr_read, - x86_intercept_cr_write, - x86_intercept_clts, - x86_intercept_lmsw, - x86_intercept_smsw, - x86_intercept_dr_read, - x86_intercept_dr_write, - x86_intercept_lidt, - x86_intercept_sidt, - x86_intercept_lgdt, - x86_intercept_sgdt, - x86_intercept_lldt, - x86_intercept_sldt, - x86_intercept_ltr, - x86_intercept_str, - x86_intercept_rdtsc, - x86_intercept_rdpmc, - x86_intercept_pushf, - x86_intercept_popf, - x86_intercept_cpuid, - x86_intercept_rsm, - x86_intercept_iret, - x86_intercept_intn, - x86_intercept_invd, - x86_intercept_pause, - x86_intercept_hlt, - x86_intercept_invlpg, - x86_intercept_invlpga, - x86_intercept_vmrun, - x86_intercept_vmload, - x86_intercept_vmsave, - x86_intercept_vmmcall, - x86_intercept_stgi, - x86_intercept_clgi, - x86_intercept_skinit, - x86_intercept_rdtscp, - x86_intercept_icebp, - x86_intercept_wbinvd, - x86_intercept_monitor, - x86_intercept_mwait, - x86_intercept_rdmsr, - x86_intercept_wrmsr, - x86_intercept_in, - x86_intercept_ins, - x86_intercept_out, - x86_intercept_outs, - x86_intercept_xsetbv, - - nr_x86_intercepts -}; - -/* Host execution mode. */ -#if defined(CONFIG_X86_32) -#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 -#elif defined(CONFIG_X86_64) -#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 -#endif - -int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); -bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); -#define EMULATION_FAILED -1 -#define EMULATION_OK 0 -#define EMULATION_RESTART 1 -#define EMULATION_INTERCEPTED 2 -void init_decode_cache(struct x86_emulate_ctxt *ctxt); -int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); -int emulator_task_switch(struct x86_emulate_ctxt *ctxt, - u16 tss_selector, int idt_index, int reason, - bool has_error_code, u32 error_code); -int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); -void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt); -void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt); -bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt); - -#endif /* _ASM_X86_KVM_X86_EMULATE_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 98959e8cd448..42a2d0d3984a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -49,13 +49,16 @@ #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS +#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ + KVM_DIRTY_LOG_INITIALLY_SET) + /* x86-specific vcpu->requests bit members */ #define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0) #define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1) #define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2) #define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3) #define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4) -#define KVM_REQ_LOAD_CR3 KVM_ARCH_REQ(5) +#define KVM_REQ_LOAD_MMU_PGD KVM_ARCH_REQ(5) #define KVM_REQ_EVENT KVM_ARCH_REQ(6) #define KVM_REQ_APF_HALT KVM_ARCH_REQ(7) #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8) @@ -182,7 +185,10 @@ enum exit_fastpath_completion { EXIT_FASTPATH_SKIP_EMUL_INS, }; -#include <asm/kvm_emulate.h> +struct x86_emulate_ctxt; +struct x86_exception; +enum x86_intercept; +enum x86_intercept_stage; #define KVM_NR_MEM_OBJS 40 @@ -297,7 +303,6 @@ union kvm_mmu_extended_role { unsigned int cr4_pke:1; unsigned int cr4_smap:1; unsigned int cr4_smep:1; - unsigned int cr4_la57:1; unsigned int maxphyaddr:6; }; }; @@ -382,8 +387,7 @@ struct kvm_mmu_root_info { * current mmu mode. */ struct kvm_mmu { - void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); - unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); + unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu); u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err, bool prefault); @@ -678,7 +682,7 @@ struct kvm_vcpu_arch { /* emulate context */ - struct x86_emulate_ctxt emulate_ctxt; + struct x86_emulate_ctxt *emulate_ctxt; bool emulate_regs_need_sync_to_vcpu; bool emulate_regs_need_sync_from_vcpu; int (*complete_userspace_io)(struct kvm_vcpu *vcpu); @@ -808,10 +812,6 @@ struct kvm_vcpu_arch { int pending_ioapic_eoi; int pending_external_vector; - /* GPA available */ - bool gpa_available; - gpa_t gpa_val; - /* be preempted when it's in kernel-mode(cpl=0) */ bool preempted_in_kernel; @@ -890,6 +890,7 @@ enum kvm_irqchip_mode { #define APICV_INHIBIT_REASON_NESTED 2 #define APICV_INHIBIT_REASON_IRQWIN 3 #define APICV_INHIBIT_REASON_PIT_REINJ 4 +#define APICV_INHIBIT_REASON_X2APIC 5 struct kvm_arch { unsigned long n_used_mmu_pages; @@ -920,6 +921,7 @@ struct kvm_arch { atomic_t vapics_in_nmi_mode; struct mutex apic_map_lock; struct kvm_apic_map *apic_map; + bool apic_map_dirty; bool apic_access_page_done; unsigned long apicv_inhibit_reasons; @@ -1052,19 +1054,14 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) } struct kvm_x86_ops { - int (*cpu_has_kvm_support)(void); /* __init */ - int (*disabled_by_bios)(void); /* __init */ int (*hardware_enable)(void); void (*hardware_disable)(void); - int (*check_processor_compatibility)(void);/* __init */ - int (*hardware_setup)(void); /* __init */ - void (*hardware_unsetup)(void); /* __exit */ + void (*hardware_unsetup)(void); bool (*cpu_has_accelerated_tpr)(void); bool (*has_emulated_msr)(int index); void (*cpuid_update)(struct kvm_vcpu *vcpu); - struct kvm *(*vm_alloc)(void); - void (*vm_free)(struct kvm *); + unsigned int vm_size; int (*vm_init)(struct kvm *kvm); void (*vm_destroy)(struct kvm *kvm); @@ -1090,7 +1087,6 @@ struct kvm_x86_ops { void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu); void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); - void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); @@ -1153,13 +1149,8 @@ struct kvm_x86_ops { int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr); int (*get_tdp_level)(struct kvm_vcpu *vcpu); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); - int (*get_lpage_level)(void); - bool (*rdtscp_supported)(void); - bool (*invpcid_supported)(void); - - void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); - void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); + void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, unsigned long cr3); bool (*has_wbinvd_exit)(void); @@ -1171,16 +1162,12 @@ struct kvm_x86_ops { int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, - enum x86_intercept_stage stage); + enum x86_intercept_stage stage, + struct x86_exception *exception); void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu, enum exit_fastpath_completion *exit_fastpath); - bool (*mpx_supported)(void); - bool (*xsaves_supported)(void); - bool (*umip_emulated)(void); - bool (*pt_supported)(void); - bool (*pku_supported)(void); - int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); + int (*check_nested_events)(struct kvm_vcpu *vcpu); void (*request_immediate_exit)(struct kvm_vcpu *vcpu); void (*sched_in)(struct kvm_vcpu *kvm, int cpu); @@ -1269,6 +1256,15 @@ struct kvm_x86_ops { int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); }; +struct kvm_x86_init_ops { + int (*cpu_has_kvm_support)(void); + int (*disabled_by_bios)(void); + int (*check_processor_compatibility)(void); + int (*hardware_setup)(void); + + struct kvm_x86_ops *runtime_ops; +}; + struct kvm_arch_async_pf { u32 token; gfn_t gfn; @@ -1276,25 +1272,24 @@ struct kvm_arch_async_pf { bool direct_map; }; -extern struct kvm_x86_ops *kvm_x86_ops; +extern u64 __read_mostly host_efer; + +extern struct kvm_x86_ops kvm_x86_ops; extern struct kmem_cache *x86_fpu_cache; #define __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) { - return kvm_x86_ops->vm_alloc(); -} - -static inline void kvm_arch_free_vm(struct kvm *kvm) -{ - return kvm_x86_ops->vm_free(kvm); + return __vmalloc(kvm_x86_ops.vm_size, + GFP_KERNEL_ACCOUNT | __GFP_ZERO, PAGE_KERNEL); } +void kvm_arch_free_vm(struct kvm *kvm); #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) { - if (kvm_x86_ops->tlb_remote_flush && - !kvm_x86_ops->tlb_remote_flush(kvm)) + if (kvm_x86_ops.tlb_remote_flush && + !kvm_x86_ops.tlb_remote_flush(kvm)) return 0; else return -ENOTSUPP; @@ -1313,7 +1308,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, - struct kvm_memory_slot *memslot); + struct kvm_memory_slot *memslot, + int start_level); void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, const struct kvm_memory_slot *memslot); void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, @@ -1379,10 +1375,11 @@ extern u64 kvm_mce_cap_supported; * * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to * decode the instruction length. For use *only* by - * kvm_x86_ops->skip_emulated_instruction() implementations. + * kvm_x86_ops.skip_emulated_instruction() implementations. * - * EMULTYPE_ALLOW_RETRY - Set when the emulator should resume the guest to - * retry native execution under certain conditions. + * EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to + * retry native execution under certain conditions, + * Can only be set in conjunction with EMULTYPE_PF. * * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was * triggered by KVM's magic "force emulation" prefix, @@ -1395,13 +1392,18 @@ extern u64 kvm_mce_cap_supported; * backdoor emulation, which is opt in via module param. * VMware backoor emulation handles select instructions * and reinjects the #GP for all other cases. + * + * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which + * case the CR2/GPA value pass on the stack is valid. */ #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) -#define EMULTYPE_ALLOW_RETRY (1 << 3) +#define EMULTYPE_ALLOW_RETRY_PF (1 << 3) #define EMULTYPE_TRAP_UD_FORCED (1 << 4) #define EMULTYPE_VMWARE_GP (1 << 5) +#define EMULTYPE_PF (1 << 6) + int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, void *insn, int insn_len); @@ -1414,8 +1416,6 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data); int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu); int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu); -struct x86_emulate_ctxt; - int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in); int kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); @@ -1512,8 +1512,7 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush); -void kvm_enable_tdp(void); -void kvm_disable_tdp(void); +void kvm_configure_mmu(bool enable_tdp, int tdp_page_level); static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, struct x86_exception *exception) @@ -1670,14 +1669,14 @@ static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) { - if (kvm_x86_ops->vcpu_blocking) - kvm_x86_ops->vcpu_blocking(vcpu); + if (kvm_x86_ops.vcpu_blocking) + kvm_x86_ops.vcpu_blocking(vcpu); } static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) { - if (kvm_x86_ops->vcpu_unblocking) - kvm_x86_ops->vcpu_unblocking(vcpu); + if (kvm_x86_ops.vcpu_unblocking) + kvm_x86_ops.vcpu_unblocking(vcpu); } static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h index 172f9749dbb2..87bd6025d91d 100644 --- a/arch/x86/include/asm/kvm_page_track.h +++ b/arch/x86/include/asm/kvm_page_track.h @@ -49,8 +49,7 @@ struct kvm_page_track_notifier_node { void kvm_page_track_init(struct kvm *kvm); void kvm_page_track_cleanup(struct kvm *kvm); -void kvm_page_track_free_memslot(struct kvm_memory_slot *free, - struct kvm_memory_slot *dont); +void kvm_page_track_free_memslot(struct kvm_memory_slot *slot); int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 8521af3fef27..5e090d1f03f8 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -500,6 +500,18 @@ enum vmcs_field { VMX_EPT_EXECUTABLE_MASK) #define VMX_EPT_MT_MASK (7ull << VMX_EPT_MT_EPTE_SHIFT) +static inline u8 vmx_eptp_page_walk_level(u64 eptp) +{ + u64 encoded_level = eptp & VMX_EPTP_PWL_MASK; + + if (encoded_level == VMX_EPTP_PWL_5) + return 5; + + /* @eptp must be pre-validated by the caller. */ + WARN_ON_ONCE(encoded_level != VMX_EPTP_PWL_4); + return 4; +} + /* The mask to use to trigger an EPT Misconfiguration in order to track MMIO */ #define VMX_EPT_MISCONFIG_WX_VALUE (VMX_EPT_WRITABLE_MASK | \ VMX_EPT_EXECUTABLE_MASK) |