diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-05 00:59:54 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-05 00:59:54 +0300 |
commit | 7c5c3a6177fa9646884114fc7f2e970b0bc50dc9 (patch) | |
tree | 956857522574ae7cb07d2227dc16e53d7e9e00e7 /arch/arm64/include/asm/kvm_host.h | |
parent | f0a892f599c46af673e47418c47c15e69a7b67f4 (diff) | |
parent | 281106f938d3daaea6f8b6723a8217a2a1ef6936 (diff) | |
download | linux-7c5c3a6177fa9646884114fc7f2e970b0bc50dc9.tar.xz |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini:
"Quite a large pull request due to a selftest API overhaul and some
patches that had come in too late for 5.19.
ARM:
- Unwinder implementations for both nVHE modes (classic and
protected), complete with an overflow stack
- Rework of the sysreg access from userspace, with a complete rewrite
of the vgic-v3 view to allign with the rest of the infrastructure
- Disagregation of the vcpu flags in separate sets to better track
their use model.
- A fix for the GICv2-on-v3 selftest
- A small set of cosmetic fixes
RISC-V:
- Track ISA extensions used by Guest using bitmap
- Added system instruction emulation framework
- Added CSR emulation framework
- Added gfp_custom flag in struct kvm_mmu_memory_cache
- Added G-stage ioremap() and iounmap() functions
- Added support for Svpbmt inside Guest
s390:
- add an interface to provide a hypervisor dump for secure guests
- improve selftests to use TAP interface
- enable interpretive execution of zPCI instructions (for PCI
passthrough)
- First part of deferred teardown
- CPU Topology
- PV attestation
- Minor fixes
x86:
- Permit guests to ignore single-bit ECC errors
- Intel IPI virtualization
- Allow getting/setting pending triple fault with
KVM_GET/SET_VCPU_EVENTS
- PEBS virtualization
- Simplify PMU emulation by just using PERF_TYPE_RAW events
- More accurate event reinjection on SVM (avoid retrying
instructions)
- Allow getting/setting the state of the speaker port data bit
- Refuse starting the kvm-intel module if VM-Entry/VM-Exit controls
are inconsistent
- "Notify" VM exit (detect microarchitectural hangs) for Intel
- Use try_cmpxchg64 instead of cmpxchg64
- Ignore benign host accesses to PMU MSRs when PMU is disabled
- Allow disabling KVM's "MONITOR/MWAIT are NOPs!" behavior
- Allow NX huge page mitigation to be disabled on a per-vm basis
- Port eager page splitting to shadow MMU as well
- Enable CMCI capability by default and handle injected UCNA errors
- Expose pid of vcpu threads in debugfs
- x2AVIC support for AMD
- cleanup PIO emulation
- Fixes for LLDT/LTR emulation
- Don't require refcounted "struct page" to create huge SPTEs
- Miscellaneous cleanups:
- MCE MSR emulation
- Use separate namespaces for guest PTEs and shadow PTEs bitmasks
- PIO emulation
- Reorganize rmap API, mostly around rmap destruction
- Do not workaround very old KVM bugs for L0 that runs with nesting enabled
- new selftests API for CPUID
Generic:
- Fix races in gfn->pfn cache refresh; do not pin pages tracked by
the cache
- new selftests API using struct kvm_vcpu instead of a (vm, id)
tuple"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (606 commits)
selftests: kvm: set rax before vmcall
selftests: KVM: Add exponent check for boolean stats
selftests: KVM: Provide descriptive assertions in kvm_binary_stats_test
selftests: KVM: Check stat name before other fields
KVM: x86/mmu: remove unused variable
RISC-V: KVM: Add support for Svpbmt inside Guest/VM
RISC-V: KVM: Use PAGE_KERNEL_IO in kvm_riscv_gstage_ioremap()
RISC-V: KVM: Add G-stage ioremap() and iounmap() functions
KVM: Add gfp_custom flag in struct kvm_mmu_memory_cache
RISC-V: KVM: Add extensible CSR emulation framework
RISC-V: KVM: Add extensible system instruction emulation framework
RISC-V: KVM: Factor-out instruction emulation into separate sources
RISC-V: KVM: move preempt_disable() call in kvm_arch_vcpu_ioctl_run
RISC-V: KVM: Make kvm_riscv_guest_timer_init a void function
RISC-V: KVM: Fix variable spelling mistake
RISC-V: KVM: Improve ISA extension by using a bitmap
KVM, x86/mmu: Fix the comment around kvm_tdp_mmu_zap_leafs()
KVM: SVM: Dump Virtual Machine Save Area (VMSA) to klog
KVM: x86/mmu: Treat NX as a valid SPTE bit for NPT
KVM: x86: Do not block APIC write for non ICR registers
...
Diffstat (limited to 'arch/arm64/include/asm/kvm_host.h')
-rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 205 |
1 files changed, 148 insertions, 57 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index de32152cea04..f38ef299f13b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -325,8 +325,30 @@ struct kvm_vcpu_arch { /* Exception Information */ struct kvm_vcpu_fault_info fault; - /* Miscellaneous vcpu state flags */ - u64 flags; + /* Ownership of the FP regs */ + enum { + FP_STATE_FREE, + FP_STATE_HOST_OWNED, + FP_STATE_GUEST_OWNED, + } fp_state; + + /* Configuration flags, set once and for all before the vcpu can run */ + u8 cflags; + + /* Input flags to the hypervisor code, potentially cleared after use */ + u8 iflags; + + /* State flags for kernel bookkeeping, unused by the hypervisor code */ + u8 sflags; + + /* + * Don't run the guest (internal implementation need). + * + * Contrary to the flags above, this is set/cleared outside of + * a vcpu context, and thus cannot be mixed with the flags + * themselves (or the flag accesses need to be made atomic). + */ + bool pause; /* * We maintain more than a single set of debug registers to support @@ -376,9 +398,6 @@ struct kvm_vcpu_arch { /* vcpu power state */ struct kvm_mp_state mp_state; - /* Don't run the guest (internal implementation need) */ - bool pause; - /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; @@ -392,10 +411,6 @@ struct kvm_vcpu_arch { /* Additional reset state */ struct vcpu_reset_state reset_state; - /* True when deferrable sysregs are loaded on the physical CPU, - * see kvm_vcpu_load_sysregs_vhe and kvm_vcpu_put_sysregs_vhe. */ - bool sysregs_loaded_on_cpu; - /* Guest PV state */ struct { u64 last_steal; @@ -403,6 +418,124 @@ struct kvm_vcpu_arch { } steal; }; +/* + * Each 'flag' is composed of a comma-separated triplet: + * + * - the flag-set it belongs to in the vcpu->arch structure + * - the value for that flag + * - the mask for that flag + * + * __vcpu_single_flag() builds such a triplet for a single-bit flag. + * unpack_vcpu_flag() extract the flag value from the triplet for + * direct use outside of the flag accessors. + */ +#define __vcpu_single_flag(_set, _f) _set, (_f), (_f) + +#define __unpack_flag(_set, _f, _m) _f +#define unpack_vcpu_flag(...) __unpack_flag(__VA_ARGS__) + +#define __build_check_flag(v, flagset, f, m) \ + do { \ + typeof(v->arch.flagset) *_fset; \ + \ + /* Check that the flags fit in the mask */ \ + BUILD_BUG_ON(HWEIGHT(m) != HWEIGHT((f) | (m))); \ + /* Check that the flags fit in the type */ \ + BUILD_BUG_ON((sizeof(*_fset) * 8) <= __fls(m)); \ + } while (0) + +#define __vcpu_get_flag(v, flagset, f, m) \ + ({ \ + __build_check_flag(v, flagset, f, m); \ + \ + v->arch.flagset & (m); \ + }) + +#define __vcpu_set_flag(v, flagset, f, m) \ + do { \ + typeof(v->arch.flagset) *fset; \ + \ + __build_check_flag(v, flagset, f, m); \ + \ + fset = &v->arch.flagset; \ + if (HWEIGHT(m) > 1) \ + *fset &= ~(m); \ + *fset |= (f); \ + } while (0) + +#define __vcpu_clear_flag(v, flagset, f, m) \ + do { \ + typeof(v->arch.flagset) *fset; \ + \ + __build_check_flag(v, flagset, f, m); \ + \ + fset = &v->arch.flagset; \ + *fset &= ~(m); \ + } while (0) + +#define vcpu_get_flag(v, ...) __vcpu_get_flag((v), __VA_ARGS__) +#define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__) +#define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__) + +/* SVE exposed to guest */ +#define GUEST_HAS_SVE __vcpu_single_flag(cflags, BIT(0)) +/* SVE config completed */ +#define VCPU_SVE_FINALIZED __vcpu_single_flag(cflags, BIT(1)) +/* PTRAUTH exposed to guest */ +#define GUEST_HAS_PTRAUTH __vcpu_single_flag(cflags, BIT(2)) + +/* Exception pending */ +#define PENDING_EXCEPTION __vcpu_single_flag(iflags, BIT(0)) +/* + * PC increment. Overlaps with EXCEPT_MASK on purpose so that it can't + * be set together with an exception... + */ +#define INCREMENT_PC __vcpu_single_flag(iflags, BIT(1)) +/* Target EL/MODE (not a single flag, but let's abuse the macro) */ +#define EXCEPT_MASK __vcpu_single_flag(iflags, GENMASK(3, 1)) + +/* Helpers to encode exceptions with minimum fuss */ +#define __EXCEPT_MASK_VAL unpack_vcpu_flag(EXCEPT_MASK) +#define __EXCEPT_SHIFT __builtin_ctzl(__EXCEPT_MASK_VAL) +#define __vcpu_except_flags(_f) iflags, (_f << __EXCEPT_SHIFT), __EXCEPT_MASK_VAL + +/* + * When PENDING_EXCEPTION is set, EXCEPT_MASK can take the following + * values: + * + * For AArch32 EL1: + */ +#define EXCEPT_AA32_UND __vcpu_except_flags(0) +#define EXCEPT_AA32_IABT __vcpu_except_flags(1) +#define EXCEPT_AA32_DABT __vcpu_except_flags(2) +/* For AArch64: */ +#define EXCEPT_AA64_EL1_SYNC __vcpu_except_flags(0) +#define EXCEPT_AA64_EL1_IRQ __vcpu_except_flags(1) +#define EXCEPT_AA64_EL1_FIQ __vcpu_except_flags(2) +#define EXCEPT_AA64_EL1_SERR __vcpu_except_flags(3) +/* For AArch64 with NV (one day): */ +#define EXCEPT_AA64_EL2_SYNC __vcpu_except_flags(4) +#define EXCEPT_AA64_EL2_IRQ __vcpu_except_flags(5) +#define EXCEPT_AA64_EL2_FIQ __vcpu_except_flags(6) +#define EXCEPT_AA64_EL2_SERR __vcpu_except_flags(7) +/* Guest debug is live */ +#define DEBUG_DIRTY __vcpu_single_flag(iflags, BIT(4)) +/* Save SPE context if active */ +#define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5)) +/* Save TRBE context if active */ +#define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6)) + +/* SVE enabled for host EL0 */ +#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0)) +/* SME enabled for EL0 */ +#define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1)) +/* Physical CPU not in supported_cpus */ +#define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(2)) +/* WFIT instruction trapped */ +#define IN_WFIT __vcpu_single_flag(sflags, BIT(3)) +/* vcpu system registers loaded on physical CPU */ +#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4)) + /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ #define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \ sve_ffr_offset((vcpu)->arch.sve_max_vl)) @@ -423,70 +556,31 @@ struct kvm_vcpu_arch { __size_ret; \ }) -/* vcpu_arch flags field values: */ -#define KVM_ARM64_DEBUG_DIRTY (1 << 0) -#define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */ -#define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */ -#define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */ -#define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */ -#define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */ -#define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */ -#define KVM_ARM64_PENDING_EXCEPTION (1 << 8) /* Exception pending */ -/* - * Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be - * set together with an exception... - */ -#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */ -#define KVM_ARM64_EXCEPT_MASK (7 << 9) /* Target EL/MODE */ -/* - * When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can - * take the following values: - * - * For AArch32 EL1: - */ -#define KVM_ARM64_EXCEPT_AA32_UND (0 << 9) -#define KVM_ARM64_EXCEPT_AA32_IABT (1 << 9) -#define KVM_ARM64_EXCEPT_AA32_DABT (2 << 9) -/* For AArch64: */ -#define KVM_ARM64_EXCEPT_AA64_ELx_SYNC (0 << 9) -#define KVM_ARM64_EXCEPT_AA64_ELx_IRQ (1 << 9) -#define KVM_ARM64_EXCEPT_AA64_ELx_FIQ (2 << 9) -#define KVM_ARM64_EXCEPT_AA64_ELx_SERR (3 << 9) -#define KVM_ARM64_EXCEPT_AA64_EL1 (0 << 11) -#define KVM_ARM64_EXCEPT_AA64_EL2 (1 << 11) - -#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */ -#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */ -#define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14) -#define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */ -#define KVM_ARM64_HOST_SME_ENABLED (1 << 16) /* SME enabled for EL0 */ -#define KVM_ARM64_WFIT (1 << 17) /* WFIT instruction trapped */ - #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ KVM_GUESTDBG_USE_SW_BP | \ KVM_GUESTDBG_USE_HW | \ KVM_GUESTDBG_SINGLESTEP) #define vcpu_has_sve(vcpu) (system_supports_sve() && \ - ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE)) + vcpu_get_flag(vcpu, GUEST_HAS_SVE)) #ifdef CONFIG_ARM64_PTR_AUTH #define vcpu_has_ptrauth(vcpu) \ ((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) || \ cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) && \ - (vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_PTRAUTH) + vcpu_get_flag(vcpu, GUEST_HAS_PTRAUTH)) #else #define vcpu_has_ptrauth(vcpu) false #endif #define vcpu_on_unsupported_cpu(vcpu) \ - ((vcpu)->arch.flags & KVM_ARM64_ON_UNSUPPORTED_CPU) + vcpu_get_flag(vcpu, ON_UNSUPPORTED_CPU) #define vcpu_set_on_unsupported_cpu(vcpu) \ - ((vcpu)->arch.flags |= KVM_ARM64_ON_UNSUPPORTED_CPU) + vcpu_set_flag(vcpu, ON_UNSUPPORTED_CPU) #define vcpu_clear_on_unsupported_cpu(vcpu) \ - ((vcpu)->arch.flags &= ~KVM_ARM64_ON_UNSUPPORTED_CPU) + vcpu_clear_flag(vcpu, ON_UNSUPPORTED_CPU) #define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs) @@ -620,8 +714,6 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu); int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); -int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); -int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events); @@ -831,8 +923,7 @@ void kvm_init_protected_traps(struct kvm_vcpu *vcpu); int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature); bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); -#define kvm_arm_vcpu_sve_finalized(vcpu) \ - ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED) +#define kvm_arm_vcpu_sve_finalized(vcpu) vcpu_get_flag(vcpu, VCPU_SVE_FINALIZED) #define kvm_has_mte(kvm) \ (system_supports_mte() && \ |