diff options
27 files changed, 956 insertions, 501 deletions
diff --git a/arch/riscv/include/asm/kvm_gstage.h b/arch/riscv/include/asm/kvm_gstage.h index 595e2183173e..9c908432bc17 100644 --- a/arch/riscv/include/asm/kvm_gstage.h +++ b/arch/riscv/include/asm/kvm_gstage.h @@ -15,6 +15,7 @@ struct kvm_gstage { #define KVM_GSTAGE_FLAGS_LOCAL BIT(0) unsigned long vmid; pgd_t *pgd; + unsigned long pgd_levels; }; struct kvm_gstage_mapping { @@ -29,16 +30,22 @@ struct kvm_gstage_mapping { #define kvm_riscv_gstage_index_bits 10 #endif -extern unsigned long kvm_riscv_gstage_mode; -extern unsigned long kvm_riscv_gstage_pgd_levels; +extern unsigned long kvm_riscv_gstage_max_pgd_levels; #define kvm_riscv_gstage_pgd_xbits 2 #define kvm_riscv_gstage_pgd_size (1UL << (HGATP_PAGE_SHIFT + kvm_riscv_gstage_pgd_xbits)) -#define kvm_riscv_gstage_gpa_bits (HGATP_PAGE_SHIFT + \ - (kvm_riscv_gstage_pgd_levels * \ - kvm_riscv_gstage_index_bits) + \ - kvm_riscv_gstage_pgd_xbits) -#define kvm_riscv_gstage_gpa_size ((gpa_t)(1ULL << kvm_riscv_gstage_gpa_bits)) + +static inline unsigned long kvm_riscv_gstage_gpa_bits(unsigned long pgd_levels) +{ + return (HGATP_PAGE_SHIFT + + pgd_levels * kvm_riscv_gstage_index_bits + + kvm_riscv_gstage_pgd_xbits); +} + +static inline gpa_t kvm_riscv_gstage_gpa_size(unsigned long pgd_levels) +{ + return BIT_ULL(kvm_riscv_gstage_gpa_bits(pgd_levels)); +} bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr, pte_t **ptepp, u32 *ptep_level); @@ -53,6 +60,10 @@ int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage, bool page_rdonly, bool page_exec, struct kvm_gstage_mapping *out_map); +int kvm_riscv_gstage_split_huge(struct kvm_gstage *gstage, + struct kvm_mmu_memory_cache *pcache, + gpa_t addr, u32 target_level, bool flush); + enum kvm_riscv_gstage_op { GSTAGE_OP_NOP = 0, /* Nothing */ GSTAGE_OP_CLEAR, /* Clear/Unmap */ @@ -69,4 +80,30 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end void kvm_riscv_gstage_mode_detect(void); +static inline unsigned long kvm_riscv_gstage_mode(unsigned long pgd_levels) +{ + switch (pgd_levels) { + case 2: + return HGATP_MODE_SV32X4; + case 3: + return HGATP_MODE_SV39X4; + case 4: + return HGATP_MODE_SV48X4; + case 5: + return HGATP_MODE_SV57X4; + default: + WARN_ON_ONCE(1); + return HGATP_MODE_OFF; + } +} + +static inline void kvm_riscv_gstage_init(struct kvm_gstage *gstage, struct kvm *kvm) +{ + gstage->kvm = kvm; + gstage->flags = 0; + gstage->vmid = READ_ONCE(kvm->arch.vmid.vmid); + gstage->pgd = kvm->arch.pgd; + gstage->pgd_levels = kvm->arch.pgd_levels; +} + #endif diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 24585304c02b..75b0a951c1bc 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -18,6 +18,7 @@ #include <asm/ptrace.h> #include <asm/kvm_tlb.h> #include <asm/kvm_vmid.h> +#include <asm/kvm_vcpu_config.h> #include <asm/kvm_vcpu_fp.h> #include <asm/kvm_vcpu_insn.h> #include <asm/kvm_vcpu_sbi.h> @@ -47,18 +48,6 @@ #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE -#define KVM_HEDELEG_DEFAULT (BIT(EXC_INST_MISALIGNED) | \ - BIT(EXC_INST_ILLEGAL) | \ - BIT(EXC_BREAKPOINT) | \ - BIT(EXC_SYSCALL) | \ - BIT(EXC_INST_PAGE_FAULT) | \ - BIT(EXC_LOAD_PAGE_FAULT) | \ - BIT(EXC_STORE_PAGE_FAULT)) - -#define KVM_HIDELEG_DEFAULT (BIT(IRQ_VS_SOFT) | \ - BIT(IRQ_VS_TIMER) | \ - BIT(IRQ_VS_EXT)) - #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) @@ -94,6 +83,7 @@ struct kvm_arch { /* G-stage page table */ pgd_t *pgd; phys_addr_t pgd_phys; + unsigned long pgd_levels; /* Guest Timer */ struct kvm_guest_timer timer; @@ -167,12 +157,6 @@ struct kvm_vcpu_csr { unsigned long senvcfg; }; -struct kvm_vcpu_config { - u64 henvcfg; - u64 hstateen0; - unsigned long hedeleg; -}; - struct kvm_vcpu_smstateen_csr { unsigned long sstateen0; }; @@ -273,6 +257,9 @@ struct kvm_vcpu_arch { /* 'static' configurations which are set only once */ struct kvm_vcpu_config cfg; + /* Indicates modified guest CSRs */ + bool csr_dirty; + /* SBI steal-time accounting */ struct { gpa_t shmem; diff --git a/arch/riscv/include/asm/kvm_isa.h b/arch/riscv/include/asm/kvm_isa.h new file mode 100644 index 000000000000..bc4b956d5f17 --- /dev/null +++ b/arch/riscv/include/asm/kvm_isa.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2026 Qualcomm Technologies, Inc. + */ + +#ifndef __KVM_RISCV_ISA_H +#define __KVM_RISCV_ISA_H + +#include <linux/types.h> + +unsigned long kvm_riscv_base2isa_ext(unsigned long base_ext); + +int __kvm_riscv_isa_check_host(unsigned long ext, unsigned long *base_ext); +#define kvm_riscv_isa_check_host(ext) \ + __kvm_riscv_isa_check_host(KVM_RISCV_ISA_EXT_##ext, NULL) + +bool kvm_riscv_isa_enable_allowed(unsigned long ext); +bool kvm_riscv_isa_disable_allowed(unsigned long ext); + +#endif diff --git a/arch/riscv/include/asm/kvm_vcpu_config.h b/arch/riscv/include/asm/kvm_vcpu_config.h new file mode 100644 index 000000000000..fcc15a0296b3 --- /dev/null +++ b/arch/riscv/include/asm/kvm_vcpu_config.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2026 Qualcomm Technologies, Inc. + */ + +#ifndef __KVM_VCPU_RISCV_CONFIG_H +#define __KVM_VCPU_RISCV_CONFIG_H + +#include <linux/types.h> + +struct kvm_vcpu; + +struct kvm_vcpu_config { + u64 henvcfg; + u64 hstateen0; + unsigned long hedeleg; + unsigned long hideleg; +}; + +void kvm_riscv_vcpu_config_init(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_config_guest_debug(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_config_ran_once(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_config_load(struct kvm_vcpu *vcpu); + +#endif diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 6a89c1d00a72..504e73305343 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -110,6 +110,10 @@ struct kvm_riscv_timer { __u64 state; }; +/* Possible states for kvm_riscv_timer */ +#define KVM_RISCV_TIMER_STATE_OFF 0 +#define KVM_RISCV_TIMER_STATE_ON 1 + /* * ISA extension IDs specific to KVM. This is not the same as the host ISA * extension IDs as that is internal to the host and should not be exposed @@ -238,10 +242,6 @@ struct kvm_riscv_sbi_fwft { struct kvm_riscv_sbi_fwft_feature pointer_masking; }; -/* Possible states for kvm_riscv_timer */ -#define KVM_RISCV_TIMER_STATE_OFF 0 -#define KVM_RISCV_TIMER_STATE_ON 1 - /* If you need to interpret the index values, here is the key: */ #define KVM_REG_RISCV_TYPE_MASK 0x00000000FF000000 #define KVM_REG_RISCV_TYPE_SHIFT 24 diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index 3b8afb038b35..296c2ba05089 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -15,11 +15,13 @@ kvm-y += aia_aplic.o kvm-y += aia_device.o kvm-y += aia_imsic.o kvm-y += gstage.o +kvm-y += isa.o kvm-y += main.o kvm-y += mmu.o kvm-y += nacl.o kvm-y += tlb.o kvm-y += vcpu.o +kvm-y += vcpu_config.o kvm-y += vcpu_exit.o kvm-y += vcpu_fp.o kvm-y += vcpu_insn.o diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c index 49c71d3cdb00..3d1e81e2a36b 100644 --- a/arch/riscv/kvm/aia_device.c +++ b/arch/riscv/kvm/aia_device.c @@ -11,7 +11,7 @@ #include <linux/irqchip/riscv-imsic.h> #include <linux/kvm_host.h> #include <linux/uaccess.h> -#include <linux/cpufeature.h> +#include <asm/kvm_isa.h> static int aia_create(struct kvm_device *dev, u32 type) { @@ -23,7 +23,7 @@ static int aia_create(struct kvm_device *dev, u32 type) if (irqchip_in_kernel(kvm)) return -EEXIST; - if (!riscv_isa_extension_available(NULL, SSAIA)) + if (kvm_riscv_isa_check_host(SSAIA)) return -ENODEV; ret = -EBUSY; diff --git a/arch/riscv/kvm/gstage.c b/arch/riscv/kvm/gstage.c index b67d60d722c2..d9fe8be2a151 100644 --- a/arch/riscv/kvm/gstage.c +++ b/arch/riscv/kvm/gstage.c @@ -12,22 +12,21 @@ #include <asm/kvm_gstage.h> #ifdef CONFIG_64BIT -unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV39X4; -unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 3; +unsigned long kvm_riscv_gstage_max_pgd_levels __ro_after_init = 3; #else -unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV32X4; -unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 2; +unsigned long kvm_riscv_gstage_max_pgd_levels __ro_after_init = 2; #endif #define gstage_pte_leaf(__ptep) \ (pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)) -static inline unsigned long gstage_pte_index(gpa_t addr, u32 level) +static inline unsigned long gstage_pte_index(struct kvm_gstage *gstage, + gpa_t addr, u32 level) { unsigned long mask; unsigned long shift = HGATP_PAGE_SHIFT + (kvm_riscv_gstage_index_bits * level); - if (level == (kvm_riscv_gstage_pgd_levels - 1)) + if (level == gstage->pgd_levels - 1) mask = (PTRS_PER_PTE * (1UL << kvm_riscv_gstage_pgd_xbits)) - 1; else mask = PTRS_PER_PTE - 1; @@ -40,12 +39,13 @@ static inline unsigned long gstage_pte_page_vaddr(pte_t pte) return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte))); } -static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level) +static int gstage_page_size_to_level(struct kvm_gstage *gstage, unsigned long page_size, + u32 *out_level) { u32 i; unsigned long psz = 1UL << 12; - for (i = 0; i < kvm_riscv_gstage_pgd_levels; i++) { + for (i = 0; i < gstage->pgd_levels; i++) { if (page_size == (psz << (i * kvm_riscv_gstage_index_bits))) { *out_level = i; return 0; @@ -55,21 +55,23 @@ static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level) return -EINVAL; } -static int gstage_level_to_page_order(u32 level, unsigned long *out_pgorder) +static int gstage_level_to_page_order(struct kvm_gstage *gstage, u32 level, + unsigned long *out_pgorder) { - if (kvm_riscv_gstage_pgd_levels < level) + if (gstage->pgd_levels < level) return -EINVAL; *out_pgorder = 12 + (level * kvm_riscv_gstage_index_bits); return 0; } -static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize) +static int gstage_level_to_page_size(struct kvm_gstage *gstage, u32 level, + unsigned long *out_pgsize) { int rc; unsigned long page_order = PAGE_SHIFT; - rc = gstage_level_to_page_order(level, &page_order); + rc = gstage_level_to_page_order(gstage, level, &page_order); if (rc) return rc; @@ -81,11 +83,11 @@ bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr, pte_t **ptepp, u32 *ptep_level) { pte_t *ptep; - u32 current_level = kvm_riscv_gstage_pgd_levels - 1; + u32 current_level = gstage->pgd_levels - 1; *ptep_level = current_level; ptep = (pte_t *)gstage->pgd; - ptep = &ptep[gstage_pte_index(addr, current_level)]; + ptep = &ptep[gstage_pte_index(gstage, addr, current_level)]; while (ptep && pte_val(ptep_get(ptep))) { if (gstage_pte_leaf(ptep)) { *ptep_level = current_level; @@ -97,7 +99,7 @@ bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr, current_level--; *ptep_level = current_level; ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep)); - ptep = &ptep[gstage_pte_index(addr, current_level)]; + ptep = &ptep[gstage_pte_index(gstage, addr, current_level)]; } else { ptep = NULL; } @@ -110,7 +112,7 @@ static void gstage_tlb_flush(struct kvm_gstage *gstage, u32 level, gpa_t addr) { unsigned long order = PAGE_SHIFT; - if (gstage_level_to_page_order(level, &order)) + if (gstage_level_to_page_order(gstage, level, &order)) return; addr &= ~(BIT(order) - 1); @@ -125,9 +127,9 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage, struct kvm_mmu_memory_cache *pcache, const struct kvm_gstage_mapping *map) { - u32 current_level = kvm_riscv_gstage_pgd_levels - 1; + u32 current_level = gstage->pgd_levels - 1; pte_t *next_ptep = (pte_t *)gstage->pgd; - pte_t *ptep = &next_ptep[gstage_pte_index(map->addr, current_level)]; + pte_t *ptep = &next_ptep[gstage_pte_index(gstage, map->addr, current_level)]; if (current_level < map->level) return -EINVAL; @@ -151,7 +153,7 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage, } current_level--; - ptep = &next_ptep[gstage_pte_index(map->addr, current_level)]; + ptep = &next_ptep[gstage_pte_index(gstage, map->addr, current_level)]; } if (pte_val(*ptep) != pte_val(map->pte)) { @@ -163,19 +165,38 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage, return 0; } +static void kvm_riscv_gstage_update_pte_prot(struct kvm_gstage *gstage, u32 level, + gpa_t addr, pte_t *ptep, pgprot_t prot) +{ + pte_t new_pte; + + if (pgprot_val(pte_pgprot(ptep_get(ptep))) == pgprot_val(prot)) + return; + + new_pte = pfn_pte(pte_pfn(ptep_get(ptep)), prot); + new_pte = pte_mkdirty(new_pte); + + set_pte(ptep, new_pte); + + gstage_tlb_flush(gstage, level, addr); +} + int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage, struct kvm_mmu_memory_cache *pcache, gpa_t gpa, phys_addr_t hpa, unsigned long page_size, bool page_rdonly, bool page_exec, struct kvm_gstage_mapping *out_map) { + bool found_leaf; + u32 ptep_level; pgprot_t prot; + pte_t *ptep; int ret; out_map->addr = gpa; out_map->level = 0; - ret = gstage_page_size_to_level(page_size, &out_map->level); + ret = gstage_page_size_to_level(gstage, page_size, &out_map->level); if (ret) return ret; @@ -203,12 +224,119 @@ int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage, else prot = PAGE_WRITE; } + + found_leaf = kvm_riscv_gstage_get_leaf(gstage, gpa, &ptep, &ptep_level); + if (found_leaf) { + /* + * ptep_level is the current gstage mapping level of addr, out_map->level + * is the required mapping level during fault handling. + * + * 1) ptep_level > out_map->level + * This happens when dirty logging is enabled and huge pages are used. + * KVM must track the pages at 4K level, and split the huge mapping + * into 4K mappings. + * + * 2) ptep_level < out_map->level + * This happens when dirty logging is disabled and huge pages are used. + * The gstage is split into 4K mappings, but the out_map level is now + * back to the huge page level. Ignore the out_map level this time, and + * just update the pte prot here. Otherwise, we would fall back to mapping + * the gstage at huge page level in `kvm_riscv_gstage_set_pte`, with the + * overhead of freeing the page tables(not support now), which would slow + * down the vCPUs' performance. + * + * It is better to recover the huge page mapping in the ioctl context when + * disabling dirty logging. + * + * 3) ptep_level == out_map->level + * We already have the ptep, just update the pte prot if the pfn not change. + * There is no need to invoke `kvm_riscv_gstage_set_pte` again. + */ + if (ptep_level > out_map->level) { + kvm_riscv_gstage_split_huge(gstage, pcache, gpa, + out_map->level, true); + } else if (ALIGN_DOWN(PFN_PHYS(pte_pfn(ptep_get(ptep))), page_size) == hpa) { + kvm_riscv_gstage_update_pte_prot(gstage, ptep_level, gpa, ptep, prot); + return 0; + } + } + out_map->pte = pfn_pte(PFN_DOWN(hpa), prot); out_map->pte = pte_mkdirty(out_map->pte); return kvm_riscv_gstage_set_pte(gstage, pcache, out_map); } +static inline unsigned long make_child_pte(unsigned long huge_pte, int index, + unsigned long child_page_size) +{ + unsigned long child_pte = huge_pte; + unsigned long child_pfn_offset; + + /* + * The child_pte already has the base address of the huge page being + * split. So we just have to OR in the offset to the page at the next + * lower level for the given index. + */ + child_pfn_offset = index * (child_page_size / PAGE_SIZE); + child_pte |= pte_val(pfn_pte(child_pfn_offset, __pgprot(0))); + + return child_pte; +} + +int kvm_riscv_gstage_split_huge(struct kvm_gstage *gstage, + struct kvm_mmu_memory_cache *pcache, + gpa_t addr, u32 target_level, bool flush) +{ + u32 current_level = gstage->pgd_levels - 1; + pte_t *next_ptep = (pte_t *)gstage->pgd; + unsigned long huge_pte, child_pte; + unsigned long child_page_size; + pte_t *ptep; + int i, ret; + + if (!pcache) + return -ENOMEM; + + while(current_level > target_level) { + ptep = (pte_t *)&next_ptep[gstage_pte_index(gstage, addr, current_level)]; + + if (!pte_val(ptep_get(ptep))) + break; + + if (!gstage_pte_leaf(ptep)) { + next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep)); + current_level--; + continue; + } + + huge_pte = pte_val(ptep_get(ptep)); + + ret = gstage_level_to_page_size(gstage, current_level - 1, &child_page_size); + if (ret) + return ret; + + next_ptep = kvm_mmu_memory_cache_alloc(pcache); + if (!next_ptep) + return -ENOMEM; + + for (i = 0; i < PTRS_PER_PTE; i++) { + child_pte = make_child_pte(huge_pte, i, child_page_size); + set_pte((pte_t *)&next_ptep[i], __pte(child_pte)); + } + + set_pte(ptep, pfn_pte(PFN_DOWN(__pa(next_ptep)), + __pgprot(_PAGE_TABLE))); + + if (flush) + gstage_tlb_flush(gstage, current_level, addr); + + current_level--; + } + + return 0; +} + void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr, pte_t *ptep, u32 ptep_level, enum kvm_riscv_gstage_op op) { @@ -217,7 +345,7 @@ void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr, u32 next_ptep_level; unsigned long next_page_size, page_size; - ret = gstage_level_to_page_size(ptep_level, &page_size); + ret = gstage_level_to_page_size(gstage, ptep_level, &page_size); if (ret) return; @@ -229,7 +357,7 @@ void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr, if (ptep_level && !gstage_pte_leaf(ptep)) { next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep)); next_ptep_level = ptep_level - 1; - ret = gstage_level_to_page_size(next_ptep_level, &next_page_size); + ret = gstage_level_to_page_size(gstage, next_ptep_level, &next_page_size); if (ret) return; @@ -263,7 +391,7 @@ void kvm_riscv_gstage_unmap_range(struct kvm_gstage *gstage, while (addr < end) { found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level); - ret = gstage_level_to_page_size(ptep_level, &page_size); + ret = gstage_level_to_page_size(gstage, ptep_level, &page_size); if (ret) break; @@ -297,17 +425,16 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end while (addr < end) { found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level); - ret = gstage_level_to_page_size(ptep_level, &page_size); + ret = gstage_level_to_page_size(gstage, ptep_level, &page_size); if (ret) break; if (!found_leaf) goto next; - if (!(addr & (page_size - 1)) && ((end - addr) >= page_size)) - kvm_riscv_gstage_op_pte(gstage, addr, ptep, - ptep_level, GSTAGE_OP_WP); - + addr = ALIGN_DOWN(addr, page_size); + kvm_riscv_gstage_op_pte(gstage, addr, ptep, + ptep_level, GSTAGE_OP_WP); next: addr += page_size; } @@ -319,39 +446,34 @@ void __init kvm_riscv_gstage_mode_detect(void) /* Try Sv57x4 G-stage mode */ csr_write(CSR_HGATP, HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT); if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV57X4) { - kvm_riscv_gstage_mode = HGATP_MODE_SV57X4; - kvm_riscv_gstage_pgd_levels = 5; + kvm_riscv_gstage_max_pgd_levels = 5; goto done; } /* Try Sv48x4 G-stage mode */ csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT); if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) { - kvm_riscv_gstage_mode = HGATP_MODE_SV48X4; - kvm_riscv_gstage_pgd_levels = 4; + kvm_riscv_gstage_max_pgd_levels = 4; goto done; } /* Try Sv39x4 G-stage mode */ csr_write(CSR_HGATP, HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT); if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV39X4) { - kvm_riscv_gstage_mode = HGATP_MODE_SV39X4; - kvm_riscv_gstage_pgd_levels = 3; + kvm_riscv_gstage_max_pgd_levels = 3; goto done; } #else /* CONFIG_32BIT */ /* Try Sv32x4 G-stage mode */ csr_write(CSR_HGATP, HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT); if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV32X4) { - kvm_riscv_gstage_mode = HGATP_MODE_SV32X4; - kvm_riscv_gstage_pgd_levels = 2; + kvm_riscv_gstage_max_pgd_levels = 2; goto done; } #endif /* KVM depends on !HGATP_MODE_OFF */ - kvm_riscv_gstage_mode = HGATP_MODE_OFF; - kvm_riscv_gstage_pgd_levels = 0; + kvm_riscv_gstage_max_pgd_levels = 0; done: csr_write(CSR_HGATP, 0); diff --git a/arch/riscv/kvm/isa.c b/arch/riscv/kvm/isa.c new file mode 100644 index 000000000000..1132d909cc25 --- /dev/null +++ b/arch/riscv/kvm/isa.c @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2026 Qualcomm Technologies, Inc. + */ + +#include <linux/cpufeature.h> +#include <linux/errno.h> +#include <linux/kvm_host.h> +#include <linux/nospec.h> +#include <linux/pgtable.h> +#include <asm/kvm_isa.h> +#include <asm/vector.h> + +#define KVM_ISA_EXT_ARR(ext) \ +[KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext + +/* Mapping between KVM ISA Extension ID & guest ISA extension ID */ +static const unsigned long kvm_isa_ext_arr[] = { + /* Single letter extensions (alphabetically sorted) */ + [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a, + [KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c, + [KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d, + [KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f, + [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h, + [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i, + [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m, + [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v, + /* Multi letter extensions (alphabetically sorted) */ + KVM_ISA_EXT_ARR(SMNPM), + KVM_ISA_EXT_ARR(SMSTATEEN), + KVM_ISA_EXT_ARR(SSAIA), + KVM_ISA_EXT_ARR(SSCOFPMF), + KVM_ISA_EXT_ARR(SSNPM), + KVM_ISA_EXT_ARR(SSTC), + KVM_ISA_EXT_ARR(SVADE), + KVM_ISA_EXT_ARR(SVADU), + KVM_ISA_EXT_ARR(SVINVAL), + KVM_ISA_EXT_ARR(SVNAPOT), + KVM_ISA_EXT_ARR(SVPBMT), + KVM_ISA_EXT_ARR(SVVPTC), + KVM_ISA_EXT_ARR(ZAAMO), + KVM_ISA_EXT_ARR(ZABHA), + KVM_ISA_EXT_ARR(ZACAS), + KVM_ISA_EXT_ARR(ZALASR), + KVM_ISA_EXT_ARR(ZALRSC), + KVM_ISA_EXT_ARR(ZAWRS), + KVM_ISA_EXT_ARR(ZBA), + KVM_ISA_EXT_ARR(ZBB), + KVM_ISA_EXT_ARR(ZBC), + KVM_ISA_EXT_ARR(ZBKB), + KVM_ISA_EXT_ARR(ZBKC), + KVM_ISA_EXT_ARR(ZBKX), + KVM_ISA_EXT_ARR(ZBS), + KVM_ISA_EXT_ARR(ZCA), + KVM_ISA_EXT_ARR(ZCB), + KVM_ISA_EXT_ARR(ZCD), + KVM_ISA_EXT_ARR(ZCF), + KVM_ISA_EXT_ARR(ZCLSD), + KVM_ISA_EXT_ARR(ZCMOP), + KVM_ISA_EXT_ARR(ZFA), + KVM_ISA_EXT_ARR(ZFBFMIN), + KVM_ISA_EXT_ARR(ZFH), + KVM_ISA_EXT_ARR(ZFHMIN), + KVM_ISA_EXT_ARR(ZICBOM), + KVM_ISA_EXT_ARR(ZICBOP), + KVM_ISA_EXT_ARR(ZICBOZ), + KVM_ISA_EXT_ARR(ZICCRSE), + KVM_ISA_EXT_ARR(ZICNTR), + KVM_ISA_EXT_ARR(ZICOND), + KVM_ISA_EXT_ARR(ZICSR), + KVM_ISA_EXT_ARR(ZIFENCEI), + KVM_ISA_EXT_ARR(ZIHINTNTL), + KVM_ISA_EXT_ARR(ZIHINTPAUSE), + KVM_ISA_EXT_ARR(ZIHPM), + KVM_ISA_EXT_ARR(ZILSD), + KVM_ISA_EXT_ARR(ZIMOP), + KVM_ISA_EXT_ARR(ZKND), + KVM_ISA_EXT_ARR(ZKNE), + KVM_ISA_EXT_ARR(ZKNH), + KVM_ISA_EXT_ARR(ZKR), + KVM_ISA_EXT_ARR(ZKSED), + KVM_ISA_EXT_ARR(ZKSH), + KVM_ISA_EXT_ARR(ZKT), + KVM_ISA_EXT_ARR(ZTSO), + KVM_ISA_EXT_ARR(ZVBB), + KVM_ISA_EXT_ARR(ZVBC), + KVM_ISA_EXT_ARR(ZVFBFMIN), + KVM_ISA_EXT_ARR(ZVFBFWMA), + KVM_ISA_EXT_ARR(ZVFH), + KVM_ISA_EXT_ARR(ZVFHMIN), + KVM_ISA_EXT_ARR(ZVKB), + KVM_ISA_EXT_ARR(ZVKG), + KVM_ISA_EXT_ARR(ZVKNED), + KVM_ISA_EXT_ARR(ZVKNHA), + KVM_ISA_EXT_ARR(ZVKNHB), + KVM_ISA_EXT_ARR(ZVKSED), + KVM_ISA_EXT_ARR(ZVKSH), + KVM_ISA_EXT_ARR(ZVKT), +}; + +unsigned long kvm_riscv_base2isa_ext(unsigned long base_ext) +{ + unsigned long i; + + for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) { + if (kvm_isa_ext_arr[i] == base_ext) + return i; + } + + return KVM_RISCV_ISA_EXT_MAX; +} + +int __kvm_riscv_isa_check_host(unsigned long kvm_ext, unsigned long *base_ext) +{ + unsigned long host_ext; + + if (kvm_ext >= KVM_RISCV_ISA_EXT_MAX || + kvm_ext >= ARRAY_SIZE(kvm_isa_ext_arr)) + return -ENOENT; + + kvm_ext = array_index_nospec(kvm_ext, ARRAY_SIZE(kvm_isa_ext_arr)); + switch (kvm_isa_ext_arr[kvm_ext]) { + case RISCV_ISA_EXT_SMNPM: + /* + * Pointer masking effective in (H)S-mode is provided by the + * Smnpm extension, so that extension is reported to the guest, + * even though the CSR bits for configuring VS-mode pointer + * masking on the host side are part of the Ssnpm extension. + */ + host_ext = RISCV_ISA_EXT_SSNPM; + break; + default: + host_ext = kvm_isa_ext_arr[kvm_ext]; + break; + } + + if (!__riscv_isa_extension_available(NULL, host_ext)) + return -ENOENT; + + if (base_ext) + *base_ext = kvm_isa_ext_arr[kvm_ext]; + + return 0; +} + +bool kvm_riscv_isa_enable_allowed(unsigned long ext) +{ + switch (ext) { + case KVM_RISCV_ISA_EXT_H: + return false; + case KVM_RISCV_ISA_EXT_SSCOFPMF: + /* Sscofpmf depends on interrupt filtering defined in ssaia */ + return !kvm_riscv_isa_check_host(SSAIA); + case KVM_RISCV_ISA_EXT_SVADU: + /* + * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. + * Guest OS can use Svadu only when host OS enable Svadu. + */ + return arch_has_hw_pte_young(); + case KVM_RISCV_ISA_EXT_V: + return riscv_v_vstate_ctrl_user_allowed(); + default: + break; + } + + return true; +} + +bool kvm_riscv_isa_disable_allowed(unsigned long ext) +{ + switch (ext) { + /* Extensions which don't have any mechanism to disable */ + case KVM_RISCV_ISA_EXT_A: + case KVM_RISCV_ISA_EXT_C: + case KVM_RISCV_ISA_EXT_I: + case KVM_RISCV_ISA_EXT_M: + /* There is not architectural config bit to disable sscofpmf completely */ + case KVM_RISCV_ISA_EXT_SSCOFPMF: + case KVM_RISCV_ISA_EXT_SSNPM: + case KVM_RISCV_ISA_EXT_SSTC: + case KVM_RISCV_ISA_EXT_SVINVAL: + case KVM_RISCV_ISA_EXT_SVNAPOT: + case KVM_RISCV_ISA_EXT_SVVPTC: + case KVM_RISCV_ISA_EXT_ZAAMO: + case KVM_RISCV_ISA_EXT_ZABHA: + case KVM_RISCV_ISA_EXT_ZACAS: + case KVM_RISCV_ISA_EXT_ZALASR: + case KVM_RISCV_ISA_EXT_ZALRSC: + case KVM_RISCV_ISA_EXT_ZAWRS: + case KVM_RISCV_ISA_EXT_ZBA: + case KVM_RISCV_ISA_EXT_ZBB: + case KVM_RISCV_ISA_EXT_ZBC: + case KVM_RISCV_ISA_EXT_ZBKB: + case KVM_RISCV_ISA_EXT_ZBKC: + case KVM_RISCV_ISA_EXT_ZBKX: + case KVM_RISCV_ISA_EXT_ZBS: + case KVM_RISCV_ISA_EXT_ZCA: + case KVM_RISCV_ISA_EXT_ZCB: + case KVM_RISCV_ISA_EXT_ZCD: + case KVM_RISCV_ISA_EXT_ZCF: + case KVM_RISCV_ISA_EXT_ZCMOP: + case KVM_RISCV_ISA_EXT_ZFA: + case KVM_RISCV_ISA_EXT_ZFBFMIN: + case KVM_RISCV_ISA_EXT_ZFH: + case KVM_RISCV_ISA_EXT_ZFHMIN: + case KVM_RISCV_ISA_EXT_ZICBOP: + case KVM_RISCV_ISA_EXT_ZICCRSE: + case KVM_RISCV_ISA_EXT_ZICNTR: + case KVM_RISCV_ISA_EXT_ZICOND: + case KVM_RISCV_ISA_EXT_ZICSR: + case KVM_RISCV_ISA_EXT_ZIFENCEI: + case KVM_RISCV_ISA_EXT_ZIHINTNTL: + case KVM_RISCV_ISA_EXT_ZIHINTPAUSE: + case KVM_RISCV_ISA_EXT_ZIHPM: + case KVM_RISCV_ISA_EXT_ZIMOP: + case KVM_RISCV_ISA_EXT_ZKND: + case KVM_RISCV_ISA_EXT_ZKNE: + case KVM_RISCV_ISA_EXT_ZKNH: + case KVM_RISCV_ISA_EXT_ZKR: + case KVM_RISCV_ISA_EXT_ZKSED: + case KVM_RISCV_ISA_EXT_ZKSH: + case KVM_RISCV_ISA_EXT_ZKT: + case KVM_RISCV_ISA_EXT_ZTSO: + case KVM_RISCV_ISA_EXT_ZVBB: + case KVM_RISCV_ISA_EXT_ZVBC: + case KVM_RISCV_ISA_EXT_ZVFBFMIN: + case KVM_RISCV_ISA_EXT_ZVFBFWMA: + case KVM_RISCV_ISA_EXT_ZVFH: + case KVM_RISCV_ISA_EXT_ZVFHMIN: + case KVM_RISCV_ISA_EXT_ZVKB: + case KVM_RISCV_ISA_EXT_ZVKG: + case KVM_RISCV_ISA_EXT_ZVKNED: + case KVM_RISCV_ISA_EXT_ZVKNHA: + case KVM_RISCV_ISA_EXT_ZVKNHB: + case KVM_RISCV_ISA_EXT_ZVKSED: + case KVM_RISCV_ISA_EXT_ZVKSH: + case KVM_RISCV_ISA_EXT_ZVKT: + return false; + /* Extensions which can be disabled using Smstateen */ + case KVM_RISCV_ISA_EXT_SSAIA: + return riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN); + case KVM_RISCV_ISA_EXT_SVADE: + /* + * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. + * Svade can't be disabled unless we support Svadu. + */ + return arch_has_hw_pte_young(); + default: + break; + } + + return true; +} diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 0f3fe3986fc0..cb8a65273c1f 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -41,8 +41,8 @@ int kvm_arch_enable_virtualization_cpu(void) if (rc) return rc; - csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT); - csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT); + csr_write(CSR_HEDELEG, 0); + csr_write(CSR_HIDELEG, 0); /* VS should access only the time counter directly. Everything else should trap */ csr_write(CSR_HCOUNTEREN, 0x02); @@ -105,17 +105,17 @@ static int __init riscv_kvm_init(void) return rc; kvm_riscv_gstage_mode_detect(); - switch (kvm_riscv_gstage_mode) { - case HGATP_MODE_SV32X4: + switch (kvm_riscv_gstage_max_pgd_levels) { + case 2: str = "Sv32x4"; break; - case HGATP_MODE_SV39X4: + case 3: str = "Sv39x4"; break; - case HGATP_MODE_SV48X4: + case 4: str = "Sv48x4"; break; - case HGATP_MODE_SV57X4: + case 5: str = "Sv57x4"; break; default: @@ -164,7 +164,7 @@ static int __init riscv_kvm_init(void) (rc) ? slist : "no features"); } - kvm_info("using %s G-stage page table format\n", str); + kvm_info("highest G-stage page table mode is %s\n", str); kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits()); diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 088d33ba90ed..2d3def024270 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -24,10 +24,7 @@ static void mmu_wp_memory_region(struct kvm *kvm, int slot) phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; struct kvm_gstage gstage; - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_init(&gstage, kvm); spin_lock(&kvm->mmu_lock); kvm_riscv_gstage_wp_range(&gstage, start, end); @@ -49,10 +46,7 @@ int kvm_riscv_mmu_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa, struct kvm_gstage_mapping map; struct kvm_gstage gstage; - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_init(&gstage, kvm); end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK; pfn = __phys_to_pfn(hpa); @@ -67,7 +61,7 @@ int kvm_riscv_mmu_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa, if (!writable) map.pte = pte_wrprotect(map.pte); - ret = kvm_mmu_topup_memory_cache(&pcache, kvm_riscv_gstage_pgd_levels); + ret = kvm_mmu_topup_memory_cache(&pcache, kvm->arch.pgd_levels); if (ret) goto out; @@ -89,10 +83,7 @@ void kvm_riscv_mmu_iounmap(struct kvm *kvm, gpa_t gpa, unsigned long size) { struct kvm_gstage gstage; - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_init(&gstage, kvm); spin_lock(&kvm->mmu_lock); kvm_riscv_gstage_unmap_range(&gstage, gpa, size, false); @@ -109,10 +100,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; struct kvm_gstage gstage; - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_init(&gstage, kvm); kvm_riscv_gstage_wp_range(&gstage, start, end); } @@ -141,10 +129,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, phys_addr_t size = slot->npages << PAGE_SHIFT; struct kvm_gstage gstage; - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_init(&gstage, kvm); spin_lock(&kvm->mmu_lock); kvm_riscv_gstage_unmap_range(&gstage, gpa, size, false); @@ -186,7 +171,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, * space addressable by the KVM guest GPA space. */ if ((new->base_gfn + new->npages) >= - (kvm_riscv_gstage_gpa_size >> PAGE_SHIFT)) + kvm_riscv_gstage_gpa_size(kvm->arch.pgd_levels) >> PAGE_SHIFT) return -EFAULT; hva = new->userspace_addr; @@ -250,10 +235,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) if (!kvm->arch.pgd) return false; - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_init(&gstage, kvm); mmu_locked = spin_trylock(&kvm->mmu_lock); kvm_riscv_gstage_unmap_range(&gstage, range->start << PAGE_SHIFT, (range->end - range->start) << PAGE_SHIFT, @@ -275,10 +257,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_init(&gstage, kvm); if (!kvm_riscv_gstage_get_leaf(&gstage, range->start << PAGE_SHIFT, &ptep, &ptep_level)) return false; @@ -298,10 +277,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_init(&gstage, kvm); if (!kvm_riscv_gstage_get_leaf(&gstage, range->start << PAGE_SHIFT, &ptep, &ptep_level)) return false; @@ -463,16 +439,13 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, struct kvm_gstage gstage; struct page *page; - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_init(&gstage, kvm); /* Setup initial state of output mapping */ memset(out_map, 0, sizeof(*out_map)); /* We need minimum second+third level pages */ - ret = kvm_mmu_topup_memory_cache(pcache, kvm_riscv_gstage_pgd_levels); + ret = kvm_mmu_topup_memory_cache(pcache, kvm->arch.pgd_levels); if (ret) { kvm_err("Failed to topup G-stage cache\n"); return ret; @@ -575,6 +548,7 @@ int kvm_riscv_mmu_alloc_pgd(struct kvm *kvm) return -ENOMEM; kvm->arch.pgd = page_to_virt(pgd_page); kvm->arch.pgd_phys = page_to_phys(pgd_page); + kvm->arch.pgd_levels = kvm_riscv_gstage_max_pgd_levels; return 0; } @@ -586,14 +560,13 @@ void kvm_riscv_mmu_free_pgd(struct kvm *kvm) spin_lock(&kvm->mmu_lock); if (kvm->arch.pgd) { - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; - kvm_riscv_gstage_unmap_range(&gstage, 0UL, kvm_riscv_gstage_gpa_size, false); + kvm_riscv_gstage_init(&gstage, kvm); + kvm_riscv_gstage_unmap_range(&gstage, 0UL, + kvm_riscv_gstage_gpa_size(kvm->arch.pgd_levels), false); pgd = READ_ONCE(kvm->arch.pgd); kvm->arch.pgd = NULL; kvm->arch.pgd_phys = 0; + kvm->arch.pgd_levels = 0; } spin_unlock(&kvm->mmu_lock); @@ -603,11 +576,12 @@ void kvm_riscv_mmu_free_pgd(struct kvm *kvm) void kvm_riscv_mmu_update_hgatp(struct kvm_vcpu *vcpu) { - unsigned long hgatp = kvm_riscv_gstage_mode << HGATP_MODE_SHIFT; - struct kvm_arch *k = &vcpu->kvm->arch; + struct kvm_arch *ka = &vcpu->kvm->arch; + unsigned long hgatp = kvm_riscv_gstage_mode(ka->pgd_levels) + << HGATP_MODE_SHIFT; - hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID; - hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN; + hgatp |= (READ_ONCE(ka->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID; + hgatp |= (ka->pgd_phys >> PAGE_SHIFT) & HGATP_PPN; ncsr_write(CSR_HGATP, hgatp); diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c index ff1aeac4eb8e..439c20c2775a 100644 --- a/arch/riscv/kvm/tlb.c +++ b/arch/riscv/kvm/tlb.c @@ -338,7 +338,8 @@ static void make_xfence_request(struct kvm *kvm, bitmap_zero(vcpu_mask, KVM_MAX_VCPUS); kvm_for_each_vcpu(i, vcpu, kvm) { if (hbase != -1UL) { - if (vcpu->vcpu_id < hbase) + if (vcpu->vcpu_id < hbase || + vcpu->vcpu_id >= hbase + BITS_PER_LONG) continue; if (!(hmask & (1UL << (vcpu->vcpu_id - hbase)))) continue; diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index fdd99ac1e714..a73690eda84b 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -24,6 +24,8 @@ #define CREATE_TRACE_POINTS #include "trace.h" +static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_former_vcpu); + const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, ecall_exit_stat), @@ -133,10 +135,12 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) /* Mark this VCPU never ran */ vcpu->arch.ran_atleast_once = false; - vcpu->arch.cfg.hedeleg = KVM_HEDELEG_DEFAULT; vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX); + /* Setup VCPU config */ + kvm_riscv_vcpu_config_init(vcpu); + /* Setup ISA features available to VCPU */ kvm_riscv_vcpu_setup_isa(vcpu); @@ -529,57 +533,41 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - if (dbg->control & KVM_GUESTDBG_ENABLE) { + if (dbg->control & KVM_GUESTDBG_ENABLE) vcpu->guest_debug = dbg->control; - vcpu->arch.cfg.hedeleg &= ~BIT(EXC_BREAKPOINT); - } else { + else vcpu->guest_debug = 0; - vcpu->arch.cfg.hedeleg |= BIT(EXC_BREAKPOINT); - } return 0; } -static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) -{ - const unsigned long *isa = vcpu->arch.isa; - struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; - - if (riscv_isa_extension_available(isa, SVPBMT)) - cfg->henvcfg |= ENVCFG_PBMTE; - - if (riscv_isa_extension_available(isa, SSTC)) - cfg->henvcfg |= ENVCFG_STCE; - - if (riscv_isa_extension_available(isa, ZICBOM)) - cfg->henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE); - - if (riscv_isa_extension_available(isa, ZICBOZ)) - cfg->henvcfg |= ENVCFG_CBZE; - - if (riscv_isa_extension_available(isa, SVADU) && - !riscv_isa_extension_available(isa, SVADE)) - cfg->henvcfg |= ENVCFG_ADUE; - - if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { - cfg->hstateen0 |= SMSTATEEN0_HSENVCFG; - if (riscv_isa_extension_available(isa, SSAIA)) - cfg->hstateen0 |= SMSTATEEN0_AIA_IMSIC | - SMSTATEEN0_AIA | - SMSTATEEN0_AIA_ISEL; - if (riscv_isa_extension_available(isa, SMSTATEEN)) - cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0; - } - - if (vcpu->guest_debug) - cfg->hedeleg &= ~BIT(EXC_BREAKPOINT); -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { void *nsh; struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; - struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; + + /* + * If VCPU is being reloaded on the same physical CPU and no + * other KVM VCPU has run on this CPU since it was last put, + * we can skip the expensive CSR and HGATP writes. + * + * Note: If a new CSR is added to this fast-path skip block, + * make sure that 'csr_dirty' is set to true in any + * ioctl (e.g., KVM_SET_ONE_REG) that modifies it. + */ + if (vcpu != __this_cpu_read(kvm_former_vcpu)) + __this_cpu_write(kvm_former_vcpu, vcpu); + else if (vcpu->arch.last_exit_cpu == cpu && !vcpu->arch.csr_dirty) + goto csr_restore_done; + + vcpu->arch.csr_dirty = false; + + /* + * Load VCPU config CSRs before other CSRs because + * the read/write behaviour of certain CSRs change + * based on VCPU config CSRs. + */ + kvm_riscv_vcpu_config_load(vcpu); if (kvm_riscv_nacl_sync_csr_available()) { nsh = nacl_shmem(); @@ -590,17 +578,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) nacl_csr_write(nsh, CSR_VSEPC, csr->vsepc); nacl_csr_write(nsh, CSR_VSCAUSE, csr->vscause); nacl_csr_write(nsh, CSR_VSTVAL, csr->vstval); - nacl_csr_write(nsh, CSR_HEDELEG, cfg->hedeleg); nacl_csr_write(nsh, CSR_HVIP, csr->hvip); nacl_csr_write(nsh, CSR_VSATP, csr->vsatp); - nacl_csr_write(nsh, CSR_HENVCFG, cfg->henvcfg); - if (IS_ENABLED(CONFIG_32BIT)) - nacl_csr_write(nsh, CSR_HENVCFGH, cfg->henvcfg >> 32); - if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { - nacl_csr_write(nsh, CSR_HSTATEEN0, cfg->hstateen0); - if (IS_ENABLED(CONFIG_32BIT)) - nacl_csr_write(nsh, CSR_HSTATEEN0H, cfg->hstateen0 >> 32); - } } else { csr_write(CSR_VSSTATUS, csr->vsstatus); csr_write(CSR_VSIE, csr->vsie); @@ -609,21 +588,15 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) csr_write(CSR_VSEPC, csr->vsepc); csr_write(CSR_VSCAUSE, csr->vscause); csr_write(CSR_VSTVAL, csr->vstval); - csr_write(CSR_HEDELEG, cfg->hedeleg); csr_write(CSR_HVIP, csr->hvip); csr_write(CSR_VSATP, csr->vsatp); - csr_write(CSR_HENVCFG, cfg->henvcfg); - if (IS_ENABLED(CONFIG_32BIT)) - csr_write(CSR_HENVCFGH, cfg->henvcfg >> 32); - if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { - csr_write(CSR_HSTATEEN0, cfg->hstateen0); - if (IS_ENABLED(CONFIG_32BIT)) - csr_write(CSR_HSTATEEN0H, cfg->hstateen0 >> 32); - } } kvm_riscv_mmu_update_hgatp(vcpu); + kvm_riscv_vcpu_aia_load(vcpu, cpu); + +csr_restore_done: kvm_riscv_vcpu_timer_restore(vcpu); kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context); @@ -633,8 +606,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_riscv_vcpu_guest_vector_restore(&vcpu->arch.guest_context, vcpu->arch.isa); - kvm_riscv_vcpu_aia_load(vcpu, cpu); - kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); vcpu->cpu = cpu; @@ -750,28 +721,22 @@ static __always_inline void kvm_riscv_vcpu_swap_in_guest_state(struct kvm_vcpu * { struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr; struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; - struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; vcpu->arch.host_scounteren = csr_swap(CSR_SCOUNTEREN, csr->scounteren); vcpu->arch.host_senvcfg = csr_swap(CSR_SENVCFG, csr->senvcfg); - if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) && - (cfg->hstateen0 & SMSTATEEN0_SSTATEEN0)) - vcpu->arch.host_sstateen0 = csr_swap(CSR_SSTATEEN0, - smcsr->sstateen0); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) + vcpu->arch.host_sstateen0 = csr_swap(CSR_SSTATEEN0, smcsr->sstateen0); } static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *vcpu) { struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr; struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; - struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; csr->scounteren = csr_swap(CSR_SCOUNTEREN, vcpu->arch.host_scounteren); csr->senvcfg = csr_swap(CSR_SENVCFG, vcpu->arch.host_senvcfg); - if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) && - (cfg->hstateen0 & SMSTATEEN0_SSTATEEN0)) - smcsr->sstateen0 = csr_swap(CSR_SSTATEEN0, - vcpu->arch.host_sstateen0); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) + smcsr->sstateen0 = csr_swap(CSR_SSTATEEN0, vcpu->arch.host_sstateen0); } /* @@ -868,7 +833,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) struct kvm_run *run = vcpu->run; if (!vcpu->arch.ran_atleast_once) - kvm_riscv_vcpu_setup_config(vcpu); + kvm_riscv_vcpu_config_ran_once(vcpu); /* Mark this VCPU ran at least once */ vcpu->arch.ran_atleast_once = true; diff --git a/arch/riscv/kvm/vcpu_config.c b/arch/riscv/kvm/vcpu_config.c new file mode 100644 index 000000000000..238418fed2b9 --- /dev/null +++ b/arch/riscv/kvm/vcpu_config.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2026 Qualcomm Technologies, Inc. + */ + +#include <linux/kvm_host.h> +#include <asm/kvm_nacl.h> + +#define KVM_HEDELEG_DEFAULT (BIT(EXC_INST_MISALIGNED) | \ + BIT(EXC_INST_ILLEGAL) | \ + BIT(EXC_BREAKPOINT) | \ + BIT(EXC_SYSCALL) | \ + BIT(EXC_INST_PAGE_FAULT) | \ + BIT(EXC_LOAD_PAGE_FAULT) | \ + BIT(EXC_STORE_PAGE_FAULT)) + +#define KVM_HIDELEG_DEFAULT (BIT(IRQ_VS_SOFT) | \ + BIT(IRQ_VS_TIMER) | \ + BIT(IRQ_VS_EXT)) + +void kvm_riscv_vcpu_config_init(struct kvm_vcpu *vcpu) +{ + vcpu->arch.cfg.hedeleg = KVM_HEDELEG_DEFAULT; + vcpu->arch.cfg.hideleg = KVM_HIDELEG_DEFAULT; +} + +void kvm_riscv_vcpu_config_guest_debug(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; + + if (vcpu->guest_debug) + cfg->hedeleg &= ~BIT(EXC_BREAKPOINT); + else + cfg->hedeleg |= BIT(EXC_BREAKPOINT); + + vcpu->arch.csr_dirty = true; +} + +void kvm_riscv_vcpu_config_ran_once(struct kvm_vcpu *vcpu) +{ + const unsigned long *isa = vcpu->arch.isa; + struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; + + if (riscv_isa_extension_available(isa, SVPBMT)) + cfg->henvcfg |= ENVCFG_PBMTE; + + if (riscv_isa_extension_available(isa, SSTC)) + cfg->henvcfg |= ENVCFG_STCE; + + if (riscv_isa_extension_available(isa, ZICBOM)) + cfg->henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE); + + if (riscv_isa_extension_available(isa, ZICBOZ)) + cfg->henvcfg |= ENVCFG_CBZE; + + if (riscv_isa_extension_available(isa, SVADU) && + !riscv_isa_extension_available(isa, SVADE)) + cfg->henvcfg |= ENVCFG_ADUE; + + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { + cfg->hstateen0 |= SMSTATEEN0_HSENVCFG; + if (riscv_isa_extension_available(isa, SSAIA)) + cfg->hstateen0 |= SMSTATEEN0_AIA_IMSIC | + SMSTATEEN0_AIA | + SMSTATEEN0_AIA_ISEL; + if (riscv_isa_extension_available(isa, SMSTATEEN)) + cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0; + } + + if (vcpu->guest_debug) + cfg->hedeleg &= ~BIT(EXC_BREAKPOINT); +} + +void kvm_riscv_vcpu_config_load(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; + void *nsh; + + if (kvm_riscv_nacl_sync_csr_available()) { + nsh = nacl_shmem(); + nacl_csr_write(nsh, CSR_HEDELEG, cfg->hedeleg); + nacl_csr_write(nsh, CSR_HIDELEG, cfg->hideleg); + nacl_csr_write(nsh, CSR_HENVCFG, cfg->henvcfg); + if (IS_ENABLED(CONFIG_32BIT)) + nacl_csr_write(nsh, CSR_HENVCFGH, cfg->henvcfg >> 32); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { + nacl_csr_write(nsh, CSR_HSTATEEN0, cfg->hstateen0); + if (IS_ENABLED(CONFIG_32BIT)) + nacl_csr_write(nsh, CSR_HSTATEEN0H, cfg->hstateen0 >> 32); + } + } else { + csr_write(CSR_HEDELEG, cfg->hedeleg); + csr_write(CSR_HIDELEG, cfg->hideleg); + csr_write(CSR_HENVCFG, cfg->henvcfg); + if (IS_ENABLED(CONFIG_32BIT)) + csr_write(CSR_HENVCFGH, cfg->henvcfg >> 32); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { + csr_write(CSR_HSTATEEN0, cfg->hstateen0); + if (IS_ENABLED(CONFIG_32BIT)) + csr_write(CSR_HSTATEEN0H, cfg->hstateen0 >> 32); + } + } +} diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c index bd5a9e7e7165..6ad6df26a2fd 100644 --- a/arch/riscv/kvm/vcpu_fp.c +++ b/arch/riscv/kvm/vcpu_fp.c @@ -13,6 +13,7 @@ #include <linux/nospec.h> #include <linux/uaccess.h> #include <asm/cpufeature.h> +#include <asm/kvm_isa.h> #ifdef CONFIG_FPU void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu) @@ -60,17 +61,17 @@ void kvm_riscv_vcpu_guest_fp_restore(struct kvm_cpu_context *cntx, void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx) { /* No need to check host sstatus as it can be modified outside */ - if (riscv_isa_extension_available(NULL, d)) + if (!kvm_riscv_isa_check_host(D)) __kvm_riscv_fp_d_save(cntx); - else if (riscv_isa_extension_available(NULL, f)) + else if (!kvm_riscv_isa_check_host(F)) __kvm_riscv_fp_f_save(cntx); } void kvm_riscv_vcpu_host_fp_restore(struct kvm_cpu_context *cntx) { - if (riscv_isa_extension_available(NULL, d)) + if (!kvm_riscv_isa_check_host(D)) __kvm_riscv_fp_d_restore(cntx); - else if (riscv_isa_extension_available(NULL, f)) + else if (!kvm_riscv_isa_check_host(F)) __kvm_riscv_fp_f_restore(cntx); } #endif diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 45ecc0082e90..bb920e8923c9 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -15,259 +15,19 @@ #include <linux/kvm_host.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> +#include <asm/kvm_isa.h> #include <asm/kvm_vcpu_vector.h> -#include <asm/pgtable.h> -#include <asm/vector.h> #define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0) -#define KVM_ISA_EXT_ARR(ext) \ -[KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext - -/* Mapping between KVM ISA Extension ID & guest ISA extension ID */ -static const unsigned long kvm_isa_ext_arr[] = { - /* Single letter extensions (alphabetically sorted) */ - [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a, - [KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c, - [KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d, - [KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f, - [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h, - [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i, - [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m, - [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v, - /* Multi letter extensions (alphabetically sorted) */ - KVM_ISA_EXT_ARR(SMNPM), - KVM_ISA_EXT_ARR(SMSTATEEN), - KVM_ISA_EXT_ARR(SSAIA), - KVM_ISA_EXT_ARR(SSCOFPMF), - KVM_ISA_EXT_ARR(SSNPM), - KVM_ISA_EXT_ARR(SSTC), - KVM_ISA_EXT_ARR(SVADE), - KVM_ISA_EXT_ARR(SVADU), - KVM_ISA_EXT_ARR(SVINVAL), - KVM_ISA_EXT_ARR(SVNAPOT), - KVM_ISA_EXT_ARR(SVPBMT), - KVM_ISA_EXT_ARR(SVVPTC), - KVM_ISA_EXT_ARR(ZAAMO), - KVM_ISA_EXT_ARR(ZABHA), - KVM_ISA_EXT_ARR(ZACAS), - KVM_ISA_EXT_ARR(ZALASR), - KVM_ISA_EXT_ARR(ZALRSC), - KVM_ISA_EXT_ARR(ZAWRS), - KVM_ISA_EXT_ARR(ZBA), - KVM_ISA_EXT_ARR(ZBB), - KVM_ISA_EXT_ARR(ZBC), - KVM_ISA_EXT_ARR(ZBKB), - KVM_ISA_EXT_ARR(ZBKC), - KVM_ISA_EXT_ARR(ZBKX), - KVM_ISA_EXT_ARR(ZBS), - KVM_ISA_EXT_ARR(ZCA), - KVM_ISA_EXT_ARR(ZCB), - KVM_ISA_EXT_ARR(ZCD), - KVM_ISA_EXT_ARR(ZCF), - KVM_ISA_EXT_ARR(ZCLSD), - KVM_ISA_EXT_ARR(ZCMOP), - KVM_ISA_EXT_ARR(ZFA), - KVM_ISA_EXT_ARR(ZFBFMIN), - KVM_ISA_EXT_ARR(ZFH), - KVM_ISA_EXT_ARR(ZFHMIN), - KVM_ISA_EXT_ARR(ZICBOM), - KVM_ISA_EXT_ARR(ZICBOP), - KVM_ISA_EXT_ARR(ZICBOZ), - KVM_ISA_EXT_ARR(ZICCRSE), - KVM_ISA_EXT_ARR(ZICNTR), - KVM_ISA_EXT_ARR(ZICOND), - KVM_ISA_EXT_ARR(ZICSR), - KVM_ISA_EXT_ARR(ZIFENCEI), - KVM_ISA_EXT_ARR(ZIHINTNTL), - KVM_ISA_EXT_ARR(ZIHINTPAUSE), - KVM_ISA_EXT_ARR(ZIHPM), - KVM_ISA_EXT_ARR(ZILSD), - KVM_ISA_EXT_ARR(ZIMOP), - KVM_ISA_EXT_ARR(ZKND), - KVM_ISA_EXT_ARR(ZKNE), - KVM_ISA_EXT_ARR(ZKNH), - KVM_ISA_EXT_ARR(ZKR), - KVM_ISA_EXT_ARR(ZKSED), - KVM_ISA_EXT_ARR(ZKSH), - KVM_ISA_EXT_ARR(ZKT), - KVM_ISA_EXT_ARR(ZTSO), - KVM_ISA_EXT_ARR(ZVBB), - KVM_ISA_EXT_ARR(ZVBC), - KVM_ISA_EXT_ARR(ZVFBFMIN), - KVM_ISA_EXT_ARR(ZVFBFWMA), - KVM_ISA_EXT_ARR(ZVFH), - KVM_ISA_EXT_ARR(ZVFHMIN), - KVM_ISA_EXT_ARR(ZVKB), - KVM_ISA_EXT_ARR(ZVKG), - KVM_ISA_EXT_ARR(ZVKNED), - KVM_ISA_EXT_ARR(ZVKNHA), - KVM_ISA_EXT_ARR(ZVKNHB), - KVM_ISA_EXT_ARR(ZVKSED), - KVM_ISA_EXT_ARR(ZVKSH), - KVM_ISA_EXT_ARR(ZVKT), -}; - -static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext) -{ - unsigned long i; - - for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) { - if (kvm_isa_ext_arr[i] == base_ext) - return i; - } - - return KVM_RISCV_ISA_EXT_MAX; -} - -static int kvm_riscv_vcpu_isa_check_host(unsigned long kvm_ext, unsigned long *guest_ext) -{ - unsigned long host_ext; - - if (kvm_ext >= KVM_RISCV_ISA_EXT_MAX || - kvm_ext >= ARRAY_SIZE(kvm_isa_ext_arr)) - return -ENOENT; - - kvm_ext = array_index_nospec(kvm_ext, ARRAY_SIZE(kvm_isa_ext_arr)); - *guest_ext = kvm_isa_ext_arr[kvm_ext]; - switch (*guest_ext) { - case RISCV_ISA_EXT_SMNPM: - /* - * Pointer masking effective in (H)S-mode is provided by the - * Smnpm extension, so that extension is reported to the guest, - * even though the CSR bits for configuring VS-mode pointer - * masking on the host side are part of the Ssnpm extension. - */ - host_ext = RISCV_ISA_EXT_SSNPM; - break; - default: - host_ext = *guest_ext; - break; - } - - if (!__riscv_isa_extension_available(NULL, host_ext)) - return -ENOENT; - - return 0; -} - -static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext) -{ - switch (ext) { - case KVM_RISCV_ISA_EXT_H: - return false; - case KVM_RISCV_ISA_EXT_SSCOFPMF: - /* Sscofpmf depends on interrupt filtering defined in ssaia */ - return __riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA); - case KVM_RISCV_ISA_EXT_SVADU: - /* - * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. - * Guest OS can use Svadu only when host OS enable Svadu. - */ - return arch_has_hw_pte_young(); - case KVM_RISCV_ISA_EXT_V: - return riscv_v_vstate_ctrl_user_allowed(); - default: - break; - } - - return true; -} - -static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) -{ - switch (ext) { - /* Extensions which don't have any mechanism to disable */ - case KVM_RISCV_ISA_EXT_A: - case KVM_RISCV_ISA_EXT_C: - case KVM_RISCV_ISA_EXT_I: - case KVM_RISCV_ISA_EXT_M: - /* There is not architectural config bit to disable sscofpmf completely */ - case KVM_RISCV_ISA_EXT_SSCOFPMF: - case KVM_RISCV_ISA_EXT_SSNPM: - case KVM_RISCV_ISA_EXT_SSTC: - case KVM_RISCV_ISA_EXT_SVINVAL: - case KVM_RISCV_ISA_EXT_SVNAPOT: - case KVM_RISCV_ISA_EXT_SVVPTC: - case KVM_RISCV_ISA_EXT_ZAAMO: - case KVM_RISCV_ISA_EXT_ZABHA: - case KVM_RISCV_ISA_EXT_ZACAS: - case KVM_RISCV_ISA_EXT_ZALASR: - case KVM_RISCV_ISA_EXT_ZALRSC: - case KVM_RISCV_ISA_EXT_ZAWRS: - case KVM_RISCV_ISA_EXT_ZBA: - case KVM_RISCV_ISA_EXT_ZBB: - case KVM_RISCV_ISA_EXT_ZBC: - case KVM_RISCV_ISA_EXT_ZBKB: - case KVM_RISCV_ISA_EXT_ZBKC: - case KVM_RISCV_ISA_EXT_ZBKX: - case KVM_RISCV_ISA_EXT_ZBS: - case KVM_RISCV_ISA_EXT_ZCA: - case KVM_RISCV_ISA_EXT_ZCB: - case KVM_RISCV_ISA_EXT_ZCD: - case KVM_RISCV_ISA_EXT_ZCF: - case KVM_RISCV_ISA_EXT_ZCMOP: - case KVM_RISCV_ISA_EXT_ZFA: - case KVM_RISCV_ISA_EXT_ZFBFMIN: - case KVM_RISCV_ISA_EXT_ZFH: - case KVM_RISCV_ISA_EXT_ZFHMIN: - case KVM_RISCV_ISA_EXT_ZICBOP: - case KVM_RISCV_ISA_EXT_ZICCRSE: - case KVM_RISCV_ISA_EXT_ZICNTR: - case KVM_RISCV_ISA_EXT_ZICOND: - case KVM_RISCV_ISA_EXT_ZICSR: - case KVM_RISCV_ISA_EXT_ZIFENCEI: - case KVM_RISCV_ISA_EXT_ZIHINTNTL: - case KVM_RISCV_ISA_EXT_ZIHINTPAUSE: - case KVM_RISCV_ISA_EXT_ZIHPM: - case KVM_RISCV_ISA_EXT_ZIMOP: - case KVM_RISCV_ISA_EXT_ZKND: - case KVM_RISCV_ISA_EXT_ZKNE: - case KVM_RISCV_ISA_EXT_ZKNH: - case KVM_RISCV_ISA_EXT_ZKR: - case KVM_RISCV_ISA_EXT_ZKSED: - case KVM_RISCV_ISA_EXT_ZKSH: - case KVM_RISCV_ISA_EXT_ZKT: - case KVM_RISCV_ISA_EXT_ZTSO: - case KVM_RISCV_ISA_EXT_ZVBB: - case KVM_RISCV_ISA_EXT_ZVBC: - case KVM_RISCV_ISA_EXT_ZVFBFMIN: - case KVM_RISCV_ISA_EXT_ZVFBFWMA: - case KVM_RISCV_ISA_EXT_ZVFH: - case KVM_RISCV_ISA_EXT_ZVFHMIN: - case KVM_RISCV_ISA_EXT_ZVKB: - case KVM_RISCV_ISA_EXT_ZVKG: - case KVM_RISCV_ISA_EXT_ZVKNED: - case KVM_RISCV_ISA_EXT_ZVKNHA: - case KVM_RISCV_ISA_EXT_ZVKNHB: - case KVM_RISCV_ISA_EXT_ZVKSED: - case KVM_RISCV_ISA_EXT_ZVKSH: - case KVM_RISCV_ISA_EXT_ZVKT: - return false; - /* Extensions which can be disabled using Smstateen */ - case KVM_RISCV_ISA_EXT_SSAIA: - return riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN); - case KVM_RISCV_ISA_EXT_SVADE: - /* - * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. - * Svade can't be disabled unless we support Svadu. - */ - return arch_has_hw_pte_young(); - default: - break; - } - - return true; -} - void kvm_riscv_vcpu_setup_isa(struct kvm_vcpu *vcpu) { unsigned long guest_ext, i; - for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) { - if (kvm_riscv_vcpu_isa_check_host(i, &guest_ext)) + for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) { + if (__kvm_riscv_isa_check_host(i, &guest_ext)) continue; - if (kvm_riscv_vcpu_isa_enable_allowed(i)) + if (kvm_riscv_isa_enable_allowed(i)) set_bit(guest_ext, vcpu->arch.isa); } } @@ -290,17 +50,17 @@ static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu, reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK; break; case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): - if (!riscv_isa_extension_available(NULL, ZICBOM)) + if (kvm_riscv_isa_check_host(ZICBOM)) return -ENOENT; reg_val = riscv_cbom_block_size; break; case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): - if (!riscv_isa_extension_available(NULL, ZICBOZ)) + if (kvm_riscv_isa_check_host(ZICBOZ)) return -ENOENT; reg_val = riscv_cboz_block_size; break; case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size): - if (!riscv_isa_extension_available(NULL, ZICBOP)) + if (kvm_riscv_isa_check_host(ZICBOP)) return -ENOENT; reg_val = riscv_cbop_block_size; break; @@ -361,15 +121,15 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu, if (!vcpu->arch.ran_atleast_once) { /* Ignore the enable/disable request for certain extensions */ for (i = 0; i < RISCV_ISA_EXT_BASE; i++) { - isa_ext = kvm_riscv_vcpu_base2isa_ext(i); + isa_ext = kvm_riscv_base2isa_ext(i); if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) { reg_val &= ~BIT(i); continue; } - if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext)) + if (!kvm_riscv_isa_enable_allowed(isa_ext)) if (reg_val & BIT(i)) reg_val &= ~BIT(i); - if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext)) + if (!kvm_riscv_isa_disable_allowed(isa_ext)) if (!(reg_val & BIT(i))) reg_val |= BIT(i); } @@ -384,19 +144,19 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu, } break; case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): - if (!riscv_isa_extension_available(NULL, ZICBOM)) + if (kvm_riscv_isa_check_host(ZICBOM)) return -ENOENT; if (reg_val != riscv_cbom_block_size) return -EINVAL; break; case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): - if (!riscv_isa_extension_available(NULL, ZICBOZ)) + if (kvm_riscv_isa_check_host(ZICBOZ)) return -ENOENT; if (reg_val != riscv_cboz_block_size) return -EINVAL; break; case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size): - if (!riscv_isa_extension_available(NULL, ZICBOP)) + if (kvm_riscv_isa_check_host(ZICBOP)) return -ENOENT; if (reg_val != riscv_cbop_block_size) return -EINVAL; @@ -670,6 +430,8 @@ static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu, if (rc) return rc; + vcpu->arch.csr_dirty = true; + return 0; } @@ -680,7 +442,7 @@ static int riscv_vcpu_get_isa_ext_single(struct kvm_vcpu *vcpu, unsigned long guest_ext; int ret; - ret = kvm_riscv_vcpu_isa_check_host(reg_num, &guest_ext); + ret = __kvm_riscv_isa_check_host(reg_num, &guest_ext); if (ret) return ret; @@ -698,7 +460,7 @@ static int riscv_vcpu_set_isa_ext_single(struct kvm_vcpu *vcpu, unsigned long guest_ext; int ret; - ret = kvm_riscv_vcpu_isa_check_host(reg_num, &guest_ext); + ret = __kvm_riscv_isa_check_host(reg_num, &guest_ext); if (ret) return ret; @@ -711,10 +473,10 @@ static int riscv_vcpu_set_isa_ext_single(struct kvm_vcpu *vcpu, * extension can be disabled */ if (reg_val == 1 && - kvm_riscv_vcpu_isa_enable_allowed(reg_num)) + kvm_riscv_isa_enable_allowed(reg_num)) set_bit(guest_ext, vcpu->arch.isa); else if (!reg_val && - kvm_riscv_vcpu_isa_disable_allowed(reg_num)) + kvm_riscv_isa_disable_allowed(reg_num)) clear_bit(guest_ext, vcpu->arch.isa); else return -EINVAL; @@ -857,13 +619,13 @@ static int copy_config_reg_indices(const struct kvm_vcpu *vcpu, * was not available. */ if (i == KVM_REG_RISCV_CONFIG_REG(zicbom_block_size) && - !riscv_isa_extension_available(NULL, ZICBOM)) + kvm_riscv_isa_check_host(ZICBOM)) continue; else if (i == KVM_REG_RISCV_CONFIG_REG(zicboz_block_size) && - !riscv_isa_extension_available(NULL, ZICBOZ)) + kvm_riscv_isa_check_host(ZICBOZ)) continue; else if (i == KVM_REG_RISCV_CONFIG_REG(zicbop_block_size) && - !riscv_isa_extension_available(NULL, ZICBOP)) + kvm_riscv_isa_check_host(ZICBOP)) continue; size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; @@ -1084,7 +846,7 @@ static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu, KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_ISA_EXT | i; - if (kvm_riscv_vcpu_isa_check_host(i, &guest_ext)) + if (__kvm_riscv_isa_check_host(i, &guest_ext)) continue; if (uindices) { diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index e873430e596b..18ef07b3f13a 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -7,16 +7,17 @@ */ #define pr_fmt(fmt) "riscv-kvm-pmu: " fmt +#include <linux/bitops.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> #include <linux/nospec.h> #include <linux/perf/riscv_pmu.h> #include <asm/csr.h> +#include <asm/kvm_isa.h> #include <asm/kvm_vcpu_sbi.h> #include <asm/kvm_vcpu_pmu.h> #include <asm/sbi.h> -#include <linux/bitops.h> #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs) #define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16) @@ -226,7 +227,14 @@ static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW) return -EINVAL; + if (pmc->event_idx == SBI_PMU_EVENT_IDX_INVALID) + return -EINVAL; + fevent_code = get_event_code(pmc->event_idx); + if (WARN_ONCE(fevent_code >= SBI_PMU_FW_MAX, + "Invalid firmware event code: %d\n", fevent_code)) + return -EINVAL; + pmc->counter_val = kvpmu->fw_event[fevent_code].value; *out_val = pmc->counter_val >> 32; @@ -251,7 +259,14 @@ static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, pmc = &kvpmu->pmc[cidx]; if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { + if (pmc->event_idx == SBI_PMU_EVENT_IDX_INVALID) + return -EINVAL; + fevent_code = get_event_code(pmc->event_idx); + if (WARN_ONCE(fevent_code >= SBI_PMU_FW_MAX, + "Invalid firmware event code: %d\n", fevent_code)) + return -EINVAL; + pmc->counter_val = kvpmu->fw_event[fevent_code].value; } else if (pmc->perf_event) { pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running); @@ -266,8 +281,10 @@ static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base, unsigned long ctr_mask) { - /* Make sure the we have a valid counter mask requested from the caller */ - if (!ctr_mask || (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu))) + unsigned long num_ctrs = kvm_pmu_num_counters(kvpmu); + + /* Make sure we have a valid counter mask requested from the caller */ + if (!ctr_mask || ctr_base >= num_ctrs || (ctr_base + __fls(ctr_mask) >= num_ctrs)) return -EINVAL; return 0; @@ -427,11 +444,12 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s saddr = saddr_low; if (saddr_high != 0) { - if (IS_ENABLED(CONFIG_32BIT)) + if (IS_ENABLED(CONFIG_32BIT)) { saddr |= ((gpa_t)saddr_high << 32); - else + } else { sbiret = SBI_ERR_INVALID_ADDRESS; - goto out; + goto out; + } } kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); @@ -441,6 +459,7 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s /* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */ if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { kfree(kvpmu->sdata); + kvpmu->sdata = NULL; sbiret = SBI_ERR_INVALID_ADDRESS; goto out; } @@ -827,7 +846,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) * filtering is available in the host. Otherwise, guest will always count * events while the execution is in hypervisor mode. */ - if (!riscv_isa_extension_available(NULL, SSCOFPMF)) + if (kvm_riscv_isa_check_host(SSCOFPMF)) return; ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs); diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c index afa0545c3bcf..3b834709b429 100644 --- a/arch/riscv/kvm/vcpu_sbi_sta.c +++ b/arch/riscv/kvm/vcpu_sbi_sta.c @@ -181,6 +181,7 @@ static int kvm_sbi_ext_sta_set_reg(struct kvm_vcpu *vcpu, unsigned long reg_num, unsigned long reg_size, const void *reg_val) { unsigned long value; + gpa_t new_shmem = INVALID_GPA; if (reg_size != sizeof(unsigned long)) return -EINVAL; @@ -191,18 +192,18 @@ static int kvm_sbi_ext_sta_set_reg(struct kvm_vcpu *vcpu, unsigned long reg_num, if (IS_ENABLED(CONFIG_32BIT)) { gpa_t hi = upper_32_bits(vcpu->arch.sta.shmem); - vcpu->arch.sta.shmem = value; - vcpu->arch.sta.shmem |= hi << 32; + new_shmem = value; + new_shmem |= hi << 32; } else { - vcpu->arch.sta.shmem = value; + new_shmem = value; } break; case KVM_REG_RISCV_SBI_STA_REG(shmem_hi): if (IS_ENABLED(CONFIG_32BIT)) { gpa_t lo = lower_32_bits(vcpu->arch.sta.shmem); - vcpu->arch.sta.shmem = ((gpa_t)value << 32); - vcpu->arch.sta.shmem |= lo; + new_shmem = ((gpa_t)value << 32); + new_shmem |= lo; } else if (value != 0) { return -EINVAL; } @@ -211,6 +212,11 @@ static int kvm_sbi_ext_sta_set_reg(struct kvm_vcpu *vcpu, unsigned long reg_num, return -ENOENT; } + if (new_shmem != INVALID_GPA && !IS_ALIGNED(new_shmem, 64)) + return -EINVAL; + + vcpu->arch.sta.shmem = new_shmem; + return 0; } diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c index f36247e4c783..9817ff802821 100644 --- a/arch/riscv/kvm/vcpu_timer.c +++ b/arch/riscv/kvm/vcpu_timer.c @@ -12,6 +12,7 @@ #include <linux/uaccess.h> #include <clocksource/timer-riscv.h> #include <asm/delay.h> +#include <asm/kvm_isa.h> #include <asm/kvm_nacl.h> #include <asm/kvm_vcpu_timer.h> @@ -253,7 +254,7 @@ int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu) t->next_set = false; /* Enable sstc for every vcpu if available in hardware */ - if (riscv_isa_extension_available(NULL, SSTC)) { + if (!kvm_riscv_isa_check_host(SSTC)) { t->sstc_enabled = true; hrtimer_setup(&t->hrt, kvm_riscv_vcpu_vstimer_expired, CLOCK_MONOTONIC, HRTIMER_MODE_REL); diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c index 05f3cc2d8e31..62d2fb77bb9b 100644 --- a/arch/riscv/kvm/vcpu_vector.c +++ b/arch/riscv/kvm/vcpu_vector.c @@ -12,6 +12,7 @@ #include <linux/kvm_host.h> #include <linux/uaccess.h> #include <asm/cpufeature.h> +#include <asm/kvm_isa.h> #include <asm/kvm_vcpu_vector.h> #include <asm/vector.h> @@ -63,13 +64,13 @@ void kvm_riscv_vcpu_guest_vector_restore(struct kvm_cpu_context *cntx, void kvm_riscv_vcpu_host_vector_save(struct kvm_cpu_context *cntx) { /* No need to check host sstatus as it can be modified outside */ - if (riscv_isa_extension_available(NULL, v)) + if (!kvm_riscv_isa_check_host(V)) __kvm_riscv_vector_save(cntx); } void kvm_riscv_vcpu_host_vector_restore(struct kvm_cpu_context *cntx) { - if (riscv_isa_extension_available(NULL, v)) + if (!kvm_riscv_isa_check_host(V)) __kvm_riscv_vector_restore(cntx); } @@ -80,8 +81,11 @@ int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu) return -ENOMEM; vcpu->arch.host_context.vector.datap = kzalloc(riscv_v_vsize, GFP_KERNEL); - if (!vcpu->arch.host_context.vector.datap) + if (!vcpu->arch.host_context.vector.datap) { + kfree(vcpu->arch.guest_context.vector.datap); + vcpu->arch.guest_context.vector.datap = NULL; return -ENOMEM; + } return 0; } @@ -127,6 +131,7 @@ static int kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu, } else if (reg_num <= KVM_REG_RISCV_VECTOR_REG(31)) { if (reg_size != vlenb) return -EINVAL; + WARN_ON(!cntx->vector.datap); *reg_addr = cntx->vector.datap + (reg_num - KVM_REG_RISCV_VECTOR_REG(0)) * vlenb; } else { diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index 13c63ae1a78b..a9f083feeb76 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -199,7 +199,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_USER_MEM_SLOTS; break; case KVM_CAP_VM_GPA_BITS: - r = kvm_riscv_gstage_gpa_bits; + if (!kvm) + r = kvm_riscv_gstage_gpa_bits(kvm_riscv_gstage_max_pgd_levels); + else + r = kvm_riscv_gstage_gpa_bits(kvm->arch.pgd_levels); break; default: r = 0; @@ -211,12 +214,52 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) { + if (cap->flags) + return -EINVAL; + switch (cap->cap) { case KVM_CAP_RISCV_MP_STATE_RESET: - if (cap->flags) - return -EINVAL; kvm->arch.mp_state_reset = true; return 0; + case KVM_CAP_VM_GPA_BITS: { + unsigned long gpa_bits = cap->args[0]; + unsigned long new_levels; + int r = 0; + + /* Decide target pgd levels from requested gpa_bits */ +#ifdef CONFIG_64BIT + if (gpa_bits <= 41) + new_levels = 3; /* Sv39x4 */ + else if (gpa_bits <= 50) + new_levels = 4; /* Sv48x4 */ + else if (gpa_bits <= 59) + new_levels = 5; /* Sv57x4 */ + else + return -EINVAL; +#else + /* 32-bit: only Sv32x4*/ + if (gpa_bits <= 34) + new_levels = 2; + else + return -EINVAL; +#endif + if (new_levels > kvm_riscv_gstage_max_pgd_levels) + return -EINVAL; + + /* Follow KVM's lock ordering: kvm->lock -> kvm->slots_lock. */ + mutex_lock(&kvm->lock); + mutex_lock(&kvm->slots_lock); + + if (kvm->created_vcpus || !kvm_are_all_memslots_empty(kvm)) + r = -EBUSY; + else + kvm->arch.pgd_levels = new_levels; + + mutex_unlock(&kvm->slots_lock); + mutex_unlock(&kvm->lock); + + return r; + } default: return -EINVAL; } diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c index cf34d448289d..c15bdb1dd8be 100644 --- a/arch/riscv/kvm/vmid.c +++ b/arch/riscv/kvm/vmid.c @@ -26,7 +26,8 @@ static DEFINE_SPINLOCK(vmid_lock); void __init kvm_riscv_gstage_vmid_detect(void) { /* Figure-out number of VMID bits in HW */ - csr_write(CSR_HGATP, (kvm_riscv_gstage_mode << HGATP_MODE_SHIFT) | HGATP_VMID); + csr_write(CSR_HGATP, (kvm_riscv_gstage_mode(kvm_riscv_gstage_max_pgd_levels) << + HGATP_MODE_SHIFT) | HGATP_VMID); vmid_bits = csr_read(CSR_HGATP); vmid_bits = (vmid_bits & HGATP_VMID) >> HGATP_VMID_SHIFT; vmid_bits = fls_long(vmid_bits); diff --git a/tools/testing/selftests/kvm/include/kvm_util_types.h b/tools/testing/selftests/kvm/include/kvm_util_types.h index ec787b97cf18..0366e9bce7f9 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_types.h +++ b/tools/testing/selftests/kvm/include/kvm_util_types.h @@ -17,4 +17,6 @@ typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ +#define INVALID_GPA (~(uint64_t)0) + #endif /* SELFTEST_KVM_UTIL_TYPES_H */ diff --git a/tools/testing/selftests/kvm/include/riscv/sbi.h b/tools/testing/selftests/kvm/include/riscv/sbi.h index 046b432ae896..16f1815ac48f 100644 --- a/tools/testing/selftests/kvm/include/riscv/sbi.h +++ b/tools/testing/selftests/kvm/include/riscv/sbi.h @@ -97,6 +97,43 @@ enum sbi_pmu_hw_generic_events_t { SBI_PMU_HW_GENERAL_MAX, }; +enum sbi_pmu_fw_generic_events_t { + SBI_PMU_FW_MISALIGNED_LOAD = 0, + SBI_PMU_FW_MISALIGNED_STORE = 1, + SBI_PMU_FW_ACCESS_LOAD = 2, + SBI_PMU_FW_ACCESS_STORE = 3, + SBI_PMU_FW_ILLEGAL_INSN = 4, + SBI_PMU_FW_SET_TIMER = 5, + SBI_PMU_FW_IPI_SENT = 6, + SBI_PMU_FW_IPI_RCVD = 7, + SBI_PMU_FW_FENCE_I_SENT = 8, + SBI_PMU_FW_FENCE_I_RCVD = 9, + SBI_PMU_FW_SFENCE_VMA_SENT = 10, + SBI_PMU_FW_SFENCE_VMA_RCVD = 11, + SBI_PMU_FW_SFENCE_VMA_ASID_SENT = 12, + SBI_PMU_FW_SFENCE_VMA_ASID_RCVD = 13, + + SBI_PMU_FW_HFENCE_GVMA_SENT = 14, + SBI_PMU_FW_HFENCE_GVMA_RCVD = 15, + SBI_PMU_FW_HFENCE_GVMA_VMID_SENT = 16, + SBI_PMU_FW_HFENCE_GVMA_VMID_RCVD = 17, + + SBI_PMU_FW_HFENCE_VVMA_SENT = 18, + SBI_PMU_FW_HFENCE_VVMA_RCVD = 19, + SBI_PMU_FW_HFENCE_VVMA_ASID_SENT = 20, + SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD = 21, + SBI_PMU_FW_MAX, +}; + +/* SBI PMU event types */ +enum sbi_pmu_event_type { + SBI_PMU_EVENT_TYPE_HW = 0x0, + SBI_PMU_EVENT_TYPE_CACHE = 0x1, + SBI_PMU_EVENT_TYPE_RAW = 0x2, + SBI_PMU_EVENT_TYPE_RAW_V2 = 0x3, + SBI_PMU_EVENT_TYPE_FW = 0xf, +}; + /* SBI PMU counter types */ enum sbi_pmu_ctr_type { SBI_PMU_CTR_TYPE_HW = 0x0, diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 51dd455ff52c..067c6b2c15b0 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -566,3 +566,8 @@ unsigned long riscv64_get_satp_mode(void) return val; } + +bool kvm_arch_has_default_irqchip(void) +{ + return kvm_check_cap(KVM_CAP_IRQCHIP); +} diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c index 924a335d2262..cec1621ace23 100644 --- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c +++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c @@ -436,6 +436,7 @@ static void test_pmu_basic_sanity(void) struct sbiret ret; int num_counters = 0, i; union sbi_pmu_ctr_info ctrinfo; + unsigned long fw_eidx; probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val); GUEST_ASSERT(probe && out_val == 1); @@ -461,7 +462,24 @@ static void test_pmu_basic_sanity(void) pmu_csr_read_num(ctrinfo.csr); GUEST_ASSERT(illegal_handler_invoked); } else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW) { - read_fw_counter(i, ctrinfo); + /* Read without configure should fail */ + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ, + i, 0, 0, 0, 0, 0); + GUEST_ASSERT(ret.error == SBI_ERR_INVALID_PARAM); + + /* + * Try to configure with a common firmware event. + * If configuration succeeds, verify we can read it. + */ + fw_eidx = ((unsigned long)SBI_PMU_EVENT_TYPE_FW << 16) | + SBI_PMU_FW_ACCESS_LOAD; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, + i, 1, 0, fw_eidx, 0, 0); + if (ret.error == 0) { + GUEST_ASSERT(ret.value == i); + read_fw_counter(i, ctrinfo); + } } } diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index 7be8adfe5dd3..efe56a10d13e 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -69,16 +69,10 @@ static bool is_steal_time_supported(struct kvm_vcpu *vcpu) static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) { - int ret; - /* ST_GPA_BASE is identity mapped */ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); sync_global_to_guest(vcpu->vm, st_gva[i]); - ret = _vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME, - (ulong)st_gva[i] | KVM_STEAL_RESERVED_MASK); - TEST_ASSERT(ret == 0, "Bad GPA didn't fail"); - vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED); } @@ -99,6 +93,21 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) st->pad[8], st->pad[9], st->pad[10]); } +static void check_steal_time_uapi(void) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + int ret; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + ret = _vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME, + (ulong)ST_GPA_BASE | KVM_STEAL_RESERVED_MASK); + TEST_ASSERT(ret == 0, "Bad GPA didn't fail"); + + kvm_vm_free(vm); +} + #elif defined(__aarch64__) /* PV_TIME_ST must have 64-byte alignment */ @@ -170,7 +179,6 @@ static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) { struct kvm_vm *vm = vcpu->vm; uint64_t st_ipa; - int ret; struct kvm_device_attr dev = { .group = KVM_ARM_VCPU_PVTIME_CTRL, @@ -178,21 +186,12 @@ static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) .addr = (uint64_t)&st_ipa, }; - vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev); - /* ST_GPA_BASE is identity mapped */ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); sync_global_to_guest(vm, st_gva[i]); - st_ipa = (ulong)st_gva[i] | 1; - ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); - TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL"); - st_ipa = (ulong)st_gva[i]; vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); - - ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); - TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST"); } static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) @@ -205,6 +204,36 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) ksft_print_msg(" st_time: %ld\n", st->st_time); } +static void check_steal_time_uapi(void) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + uint64_t st_ipa; + int ret; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + struct kvm_device_attr dev = { + .group = KVM_ARM_VCPU_PVTIME_CTRL, + .attr = KVM_ARM_VCPU_PVTIME_IPA, + .addr = (uint64_t)&st_ipa, + }; + + vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev); + + st_ipa = (ulong)ST_GPA_BASE | 1; + ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); + TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL"); + + st_ipa = (ulong)ST_GPA_BASE; + vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); + + ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); + TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST"); + + kvm_vm_free(vm); +} + #elif defined(__riscv) /* SBI STA shmem must have 64-byte alignment */ @@ -301,6 +330,41 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) pr_info("\n"); } +static void check_steal_time_uapi(void) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + struct kvm_one_reg reg; + uint64_t shmem; + int ret; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + reg.id = KVM_REG_RISCV | + KVM_REG_SIZE_ULONG | + KVM_REG_RISCV_SBI_STATE | + KVM_REG_RISCV_SBI_STA | + KVM_REG_RISCV_SBI_STA_REG(shmem_lo); + reg.addr = (uint64_t)&shmem; + + shmem = ST_GPA_BASE + 1; + ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "misaligned STA shmem returns -EINVAL"); + + shmem = ST_GPA_BASE; + ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); + TEST_ASSERT(ret == 0, + "aligned STA shmem succeeds"); + + shmem = INVALID_GPA; + ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); + TEST_ASSERT(ret == 0, + "all-ones for STA shmem succeeds"); + + kvm_vm_free(vm); +} + #elif defined(__loongarch__) /* steal_time must have 64-byte alignment */ @@ -465,6 +529,8 @@ int main(int ac, char **av) TEST_REQUIRE(is_steal_time_supported(vcpus[0])); ksft_set_plan(NR_VCPUS); + check_steal_time_uapi(); + /* Run test on each VCPU */ for (i = 0; i < NR_VCPUS; ++i) { steal_time_init(vcpus[i], i); |
