summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/riscv/include/asm/kvm_gstage.h51
-rw-r--r--arch/riscv/include/asm/kvm_host.h23
-rw-r--r--arch/riscv/include/asm/kvm_isa.h20
-rw-r--r--arch/riscv/include/asm/kvm_vcpu_config.h25
-rw-r--r--arch/riscv/include/uapi/asm/kvm.h8
-rw-r--r--arch/riscv/kvm/Makefile2
-rw-r--r--arch/riscv/kvm/aia_device.c4
-rw-r--r--arch/riscv/kvm/gstage.c198
-rw-r--r--arch/riscv/kvm/isa.c253
-rw-r--r--arch/riscv/kvm/main.c16
-rw-r--r--arch/riscv/kvm/mmu.c70
-rw-r--r--arch/riscv/kvm/tlb.c3
-rw-r--r--arch/riscv/kvm/vcpu.c111
-rw-r--r--arch/riscv/kvm/vcpu_config.c103
-rw-r--r--arch/riscv/kvm/vcpu_fp.c9
-rw-r--r--arch/riscv/kvm/vcpu_onereg.c284
-rw-r--r--arch/riscv/kvm/vcpu_pmu.c33
-rw-r--r--arch/riscv/kvm/vcpu_sbi_sta.c16
-rw-r--r--arch/riscv/kvm/vcpu_timer.c3
-rw-r--r--arch/riscv/kvm/vcpu_vector.c11
-rw-r--r--arch/riscv/kvm/vm.c49
-rw-r--r--arch/riscv/kvm/vmid.c3
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_types.h2
-rw-r--r--tools/testing/selftests/kvm/include/riscv/sbi.h37
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c5
-rw-r--r--tools/testing/selftests/kvm/riscv/sbi_pmu_test.c20
-rw-r--r--tools/testing/selftests/kvm/steal_time.c98
27 files changed, 956 insertions, 501 deletions
diff --git a/arch/riscv/include/asm/kvm_gstage.h b/arch/riscv/include/asm/kvm_gstage.h
index 595e2183173e..9c908432bc17 100644
--- a/arch/riscv/include/asm/kvm_gstage.h
+++ b/arch/riscv/include/asm/kvm_gstage.h
@@ -15,6 +15,7 @@ struct kvm_gstage {
#define KVM_GSTAGE_FLAGS_LOCAL BIT(0)
unsigned long vmid;
pgd_t *pgd;
+ unsigned long pgd_levels;
};
struct kvm_gstage_mapping {
@@ -29,16 +30,22 @@ struct kvm_gstage_mapping {
#define kvm_riscv_gstage_index_bits 10
#endif
-extern unsigned long kvm_riscv_gstage_mode;
-extern unsigned long kvm_riscv_gstage_pgd_levels;
+extern unsigned long kvm_riscv_gstage_max_pgd_levels;
#define kvm_riscv_gstage_pgd_xbits 2
#define kvm_riscv_gstage_pgd_size (1UL << (HGATP_PAGE_SHIFT + kvm_riscv_gstage_pgd_xbits))
-#define kvm_riscv_gstage_gpa_bits (HGATP_PAGE_SHIFT + \
- (kvm_riscv_gstage_pgd_levels * \
- kvm_riscv_gstage_index_bits) + \
- kvm_riscv_gstage_pgd_xbits)
-#define kvm_riscv_gstage_gpa_size ((gpa_t)(1ULL << kvm_riscv_gstage_gpa_bits))
+
+static inline unsigned long kvm_riscv_gstage_gpa_bits(unsigned long pgd_levels)
+{
+ return (HGATP_PAGE_SHIFT +
+ pgd_levels * kvm_riscv_gstage_index_bits +
+ kvm_riscv_gstage_pgd_xbits);
+}
+
+static inline gpa_t kvm_riscv_gstage_gpa_size(unsigned long pgd_levels)
+{
+ return BIT_ULL(kvm_riscv_gstage_gpa_bits(pgd_levels));
+}
bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
pte_t **ptepp, u32 *ptep_level);
@@ -53,6 +60,10 @@ int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage,
bool page_rdonly, bool page_exec,
struct kvm_gstage_mapping *out_map);
+int kvm_riscv_gstage_split_huge(struct kvm_gstage *gstage,
+ struct kvm_mmu_memory_cache *pcache,
+ gpa_t addr, u32 target_level, bool flush);
+
enum kvm_riscv_gstage_op {
GSTAGE_OP_NOP = 0, /* Nothing */
GSTAGE_OP_CLEAR, /* Clear/Unmap */
@@ -69,4 +80,30 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end
void kvm_riscv_gstage_mode_detect(void);
+static inline unsigned long kvm_riscv_gstage_mode(unsigned long pgd_levels)
+{
+ switch (pgd_levels) {
+ case 2:
+ return HGATP_MODE_SV32X4;
+ case 3:
+ return HGATP_MODE_SV39X4;
+ case 4:
+ return HGATP_MODE_SV48X4;
+ case 5:
+ return HGATP_MODE_SV57X4;
+ default:
+ WARN_ON_ONCE(1);
+ return HGATP_MODE_OFF;
+ }
+}
+
+static inline void kvm_riscv_gstage_init(struct kvm_gstage *gstage, struct kvm *kvm)
+{
+ gstage->kvm = kvm;
+ gstage->flags = 0;
+ gstage->vmid = READ_ONCE(kvm->arch.vmid.vmid);
+ gstage->pgd = kvm->arch.pgd;
+ gstage->pgd_levels = kvm->arch.pgd_levels;
+}
+
#endif
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 24585304c02b..75b0a951c1bc 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -18,6 +18,7 @@
#include <asm/ptrace.h>
#include <asm/kvm_tlb.h>
#include <asm/kvm_vmid.h>
+#include <asm/kvm_vcpu_config.h>
#include <asm/kvm_vcpu_fp.h>
#include <asm/kvm_vcpu_insn.h>
#include <asm/kvm_vcpu_sbi.h>
@@ -47,18 +48,6 @@
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
-#define KVM_HEDELEG_DEFAULT (BIT(EXC_INST_MISALIGNED) | \
- BIT(EXC_INST_ILLEGAL) | \
- BIT(EXC_BREAKPOINT) | \
- BIT(EXC_SYSCALL) | \
- BIT(EXC_INST_PAGE_FAULT) | \
- BIT(EXC_LOAD_PAGE_FAULT) | \
- BIT(EXC_STORE_PAGE_FAULT))
-
-#define KVM_HIDELEG_DEFAULT (BIT(IRQ_VS_SOFT) | \
- BIT(IRQ_VS_TIMER) | \
- BIT(IRQ_VS_EXT))
-
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
@@ -94,6 +83,7 @@ struct kvm_arch {
/* G-stage page table */
pgd_t *pgd;
phys_addr_t pgd_phys;
+ unsigned long pgd_levels;
/* Guest Timer */
struct kvm_guest_timer timer;
@@ -167,12 +157,6 @@ struct kvm_vcpu_csr {
unsigned long senvcfg;
};
-struct kvm_vcpu_config {
- u64 henvcfg;
- u64 hstateen0;
- unsigned long hedeleg;
-};
-
struct kvm_vcpu_smstateen_csr {
unsigned long sstateen0;
};
@@ -273,6 +257,9 @@ struct kvm_vcpu_arch {
/* 'static' configurations which are set only once */
struct kvm_vcpu_config cfg;
+ /* Indicates modified guest CSRs */
+ bool csr_dirty;
+
/* SBI steal-time accounting */
struct {
gpa_t shmem;
diff --git a/arch/riscv/include/asm/kvm_isa.h b/arch/riscv/include/asm/kvm_isa.h
new file mode 100644
index 000000000000..bc4b956d5f17
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_isa.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2026 Qualcomm Technologies, Inc.
+ */
+
+#ifndef __KVM_RISCV_ISA_H
+#define __KVM_RISCV_ISA_H
+
+#include <linux/types.h>
+
+unsigned long kvm_riscv_base2isa_ext(unsigned long base_ext);
+
+int __kvm_riscv_isa_check_host(unsigned long ext, unsigned long *base_ext);
+#define kvm_riscv_isa_check_host(ext) \
+ __kvm_riscv_isa_check_host(KVM_RISCV_ISA_EXT_##ext, NULL)
+
+bool kvm_riscv_isa_enable_allowed(unsigned long ext);
+bool kvm_riscv_isa_disable_allowed(unsigned long ext);
+
+#endif
diff --git a/arch/riscv/include/asm/kvm_vcpu_config.h b/arch/riscv/include/asm/kvm_vcpu_config.h
new file mode 100644
index 000000000000..fcc15a0296b3
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_vcpu_config.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2026 Qualcomm Technologies, Inc.
+ */
+
+#ifndef __KVM_VCPU_RISCV_CONFIG_H
+#define __KVM_VCPU_RISCV_CONFIG_H
+
+#include <linux/types.h>
+
+struct kvm_vcpu;
+
+struct kvm_vcpu_config {
+ u64 henvcfg;
+ u64 hstateen0;
+ unsigned long hedeleg;
+ unsigned long hideleg;
+};
+
+void kvm_riscv_vcpu_config_init(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_config_guest_debug(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_config_ran_once(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_config_load(struct kvm_vcpu *vcpu);
+
+#endif
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index 6a89c1d00a72..504e73305343 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -110,6 +110,10 @@ struct kvm_riscv_timer {
__u64 state;
};
+/* Possible states for kvm_riscv_timer */
+#define KVM_RISCV_TIMER_STATE_OFF 0
+#define KVM_RISCV_TIMER_STATE_ON 1
+
/*
* ISA extension IDs specific to KVM. This is not the same as the host ISA
* extension IDs as that is internal to the host and should not be exposed
@@ -238,10 +242,6 @@ struct kvm_riscv_sbi_fwft {
struct kvm_riscv_sbi_fwft_feature pointer_masking;
};
-/* Possible states for kvm_riscv_timer */
-#define KVM_RISCV_TIMER_STATE_OFF 0
-#define KVM_RISCV_TIMER_STATE_ON 1
-
/* If you need to interpret the index values, here is the key: */
#define KVM_REG_RISCV_TYPE_MASK 0x00000000FF000000
#define KVM_REG_RISCV_TYPE_SHIFT 24
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 3b8afb038b35..296c2ba05089 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -15,11 +15,13 @@ kvm-y += aia_aplic.o
kvm-y += aia_device.o
kvm-y += aia_imsic.o
kvm-y += gstage.o
+kvm-y += isa.o
kvm-y += main.o
kvm-y += mmu.o
kvm-y += nacl.o
kvm-y += tlb.o
kvm-y += vcpu.o
+kvm-y += vcpu_config.o
kvm-y += vcpu_exit.o
kvm-y += vcpu_fp.o
kvm-y += vcpu_insn.o
diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c
index 49c71d3cdb00..3d1e81e2a36b 100644
--- a/arch/riscv/kvm/aia_device.c
+++ b/arch/riscv/kvm/aia_device.c
@@ -11,7 +11,7 @@
#include <linux/irqchip/riscv-imsic.h>
#include <linux/kvm_host.h>
#include <linux/uaccess.h>
-#include <linux/cpufeature.h>
+#include <asm/kvm_isa.h>
static int aia_create(struct kvm_device *dev, u32 type)
{
@@ -23,7 +23,7 @@ static int aia_create(struct kvm_device *dev, u32 type)
if (irqchip_in_kernel(kvm))
return -EEXIST;
- if (!riscv_isa_extension_available(NULL, SSAIA))
+ if (kvm_riscv_isa_check_host(SSAIA))
return -ENODEV;
ret = -EBUSY;
diff --git a/arch/riscv/kvm/gstage.c b/arch/riscv/kvm/gstage.c
index b67d60d722c2..d9fe8be2a151 100644
--- a/arch/riscv/kvm/gstage.c
+++ b/arch/riscv/kvm/gstage.c
@@ -12,22 +12,21 @@
#include <asm/kvm_gstage.h>
#ifdef CONFIG_64BIT
-unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV39X4;
-unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 3;
+unsigned long kvm_riscv_gstage_max_pgd_levels __ro_after_init = 3;
#else
-unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV32X4;
-unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 2;
+unsigned long kvm_riscv_gstage_max_pgd_levels __ro_after_init = 2;
#endif
#define gstage_pte_leaf(__ptep) \
(pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC))
-static inline unsigned long gstage_pte_index(gpa_t addr, u32 level)
+static inline unsigned long gstage_pte_index(struct kvm_gstage *gstage,
+ gpa_t addr, u32 level)
{
unsigned long mask;
unsigned long shift = HGATP_PAGE_SHIFT + (kvm_riscv_gstage_index_bits * level);
- if (level == (kvm_riscv_gstage_pgd_levels - 1))
+ if (level == gstage->pgd_levels - 1)
mask = (PTRS_PER_PTE * (1UL << kvm_riscv_gstage_pgd_xbits)) - 1;
else
mask = PTRS_PER_PTE - 1;
@@ -40,12 +39,13 @@ static inline unsigned long gstage_pte_page_vaddr(pte_t pte)
return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte)));
}
-static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
+static int gstage_page_size_to_level(struct kvm_gstage *gstage, unsigned long page_size,
+ u32 *out_level)
{
u32 i;
unsigned long psz = 1UL << 12;
- for (i = 0; i < kvm_riscv_gstage_pgd_levels; i++) {
+ for (i = 0; i < gstage->pgd_levels; i++) {
if (page_size == (psz << (i * kvm_riscv_gstage_index_bits))) {
*out_level = i;
return 0;
@@ -55,21 +55,23 @@ static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
return -EINVAL;
}
-static int gstage_level_to_page_order(u32 level, unsigned long *out_pgorder)
+static int gstage_level_to_page_order(struct kvm_gstage *gstage, u32 level,
+ unsigned long *out_pgorder)
{
- if (kvm_riscv_gstage_pgd_levels < level)
+ if (gstage->pgd_levels < level)
return -EINVAL;
*out_pgorder = 12 + (level * kvm_riscv_gstage_index_bits);
return 0;
}
-static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize)
+static int gstage_level_to_page_size(struct kvm_gstage *gstage, u32 level,
+ unsigned long *out_pgsize)
{
int rc;
unsigned long page_order = PAGE_SHIFT;
- rc = gstage_level_to_page_order(level, &page_order);
+ rc = gstage_level_to_page_order(gstage, level, &page_order);
if (rc)
return rc;
@@ -81,11 +83,11 @@ bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
pte_t **ptepp, u32 *ptep_level)
{
pte_t *ptep;
- u32 current_level = kvm_riscv_gstage_pgd_levels - 1;
+ u32 current_level = gstage->pgd_levels - 1;
*ptep_level = current_level;
ptep = (pte_t *)gstage->pgd;
- ptep = &ptep[gstage_pte_index(addr, current_level)];
+ ptep = &ptep[gstage_pte_index(gstage, addr, current_level)];
while (ptep && pte_val(ptep_get(ptep))) {
if (gstage_pte_leaf(ptep)) {
*ptep_level = current_level;
@@ -97,7 +99,7 @@ bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
current_level--;
*ptep_level = current_level;
ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
- ptep = &ptep[gstage_pte_index(addr, current_level)];
+ ptep = &ptep[gstage_pte_index(gstage, addr, current_level)];
} else {
ptep = NULL;
}
@@ -110,7 +112,7 @@ static void gstage_tlb_flush(struct kvm_gstage *gstage, u32 level, gpa_t addr)
{
unsigned long order = PAGE_SHIFT;
- if (gstage_level_to_page_order(level, &order))
+ if (gstage_level_to_page_order(gstage, level, &order))
return;
addr &= ~(BIT(order) - 1);
@@ -125,9 +127,9 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage,
struct kvm_mmu_memory_cache *pcache,
const struct kvm_gstage_mapping *map)
{
- u32 current_level = kvm_riscv_gstage_pgd_levels - 1;
+ u32 current_level = gstage->pgd_levels - 1;
pte_t *next_ptep = (pte_t *)gstage->pgd;
- pte_t *ptep = &next_ptep[gstage_pte_index(map->addr, current_level)];
+ pte_t *ptep = &next_ptep[gstage_pte_index(gstage, map->addr, current_level)];
if (current_level < map->level)
return -EINVAL;
@@ -151,7 +153,7 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage,
}
current_level--;
- ptep = &next_ptep[gstage_pte_index(map->addr, current_level)];
+ ptep = &next_ptep[gstage_pte_index(gstage, map->addr, current_level)];
}
if (pte_val(*ptep) != pte_val(map->pte)) {
@@ -163,19 +165,38 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage,
return 0;
}
+static void kvm_riscv_gstage_update_pte_prot(struct kvm_gstage *gstage, u32 level,
+ gpa_t addr, pte_t *ptep, pgprot_t prot)
+{
+ pte_t new_pte;
+
+ if (pgprot_val(pte_pgprot(ptep_get(ptep))) == pgprot_val(prot))
+ return;
+
+ new_pte = pfn_pte(pte_pfn(ptep_get(ptep)), prot);
+ new_pte = pte_mkdirty(new_pte);
+
+ set_pte(ptep, new_pte);
+
+ gstage_tlb_flush(gstage, level, addr);
+}
+
int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage,
struct kvm_mmu_memory_cache *pcache,
gpa_t gpa, phys_addr_t hpa, unsigned long page_size,
bool page_rdonly, bool page_exec,
struct kvm_gstage_mapping *out_map)
{
+ bool found_leaf;
+ u32 ptep_level;
pgprot_t prot;
+ pte_t *ptep;
int ret;
out_map->addr = gpa;
out_map->level = 0;
- ret = gstage_page_size_to_level(page_size, &out_map->level);
+ ret = gstage_page_size_to_level(gstage, page_size, &out_map->level);
if (ret)
return ret;
@@ -203,12 +224,119 @@ int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage,
else
prot = PAGE_WRITE;
}
+
+ found_leaf = kvm_riscv_gstage_get_leaf(gstage, gpa, &ptep, &ptep_level);
+ if (found_leaf) {
+ /*
+ * ptep_level is the current gstage mapping level of addr, out_map->level
+ * is the required mapping level during fault handling.
+ *
+ * 1) ptep_level > out_map->level
+ * This happens when dirty logging is enabled and huge pages are used.
+ * KVM must track the pages at 4K level, and split the huge mapping
+ * into 4K mappings.
+ *
+ * 2) ptep_level < out_map->level
+ * This happens when dirty logging is disabled and huge pages are used.
+ * The gstage is split into 4K mappings, but the out_map level is now
+ * back to the huge page level. Ignore the out_map level this time, and
+ * just update the pte prot here. Otherwise, we would fall back to mapping
+ * the gstage at huge page level in `kvm_riscv_gstage_set_pte`, with the
+ * overhead of freeing the page tables(not support now), which would slow
+ * down the vCPUs' performance.
+ *
+ * It is better to recover the huge page mapping in the ioctl context when
+ * disabling dirty logging.
+ *
+ * 3) ptep_level == out_map->level
+ * We already have the ptep, just update the pte prot if the pfn not change.
+ * There is no need to invoke `kvm_riscv_gstage_set_pte` again.
+ */
+ if (ptep_level > out_map->level) {
+ kvm_riscv_gstage_split_huge(gstage, pcache, gpa,
+ out_map->level, true);
+ } else if (ALIGN_DOWN(PFN_PHYS(pte_pfn(ptep_get(ptep))), page_size) == hpa) {
+ kvm_riscv_gstage_update_pte_prot(gstage, ptep_level, gpa, ptep, prot);
+ return 0;
+ }
+ }
+
out_map->pte = pfn_pte(PFN_DOWN(hpa), prot);
out_map->pte = pte_mkdirty(out_map->pte);
return kvm_riscv_gstage_set_pte(gstage, pcache, out_map);
}
+static inline unsigned long make_child_pte(unsigned long huge_pte, int index,
+ unsigned long child_page_size)
+{
+ unsigned long child_pte = huge_pte;
+ unsigned long child_pfn_offset;
+
+ /*
+ * The child_pte already has the base address of the huge page being
+ * split. So we just have to OR in the offset to the page at the next
+ * lower level for the given index.
+ */
+ child_pfn_offset = index * (child_page_size / PAGE_SIZE);
+ child_pte |= pte_val(pfn_pte(child_pfn_offset, __pgprot(0)));
+
+ return child_pte;
+}
+
+int kvm_riscv_gstage_split_huge(struct kvm_gstage *gstage,
+ struct kvm_mmu_memory_cache *pcache,
+ gpa_t addr, u32 target_level, bool flush)
+{
+ u32 current_level = gstage->pgd_levels - 1;
+ pte_t *next_ptep = (pte_t *)gstage->pgd;
+ unsigned long huge_pte, child_pte;
+ unsigned long child_page_size;
+ pte_t *ptep;
+ int i, ret;
+
+ if (!pcache)
+ return -ENOMEM;
+
+ while(current_level > target_level) {
+ ptep = (pte_t *)&next_ptep[gstage_pte_index(gstage, addr, current_level)];
+
+ if (!pte_val(ptep_get(ptep)))
+ break;
+
+ if (!gstage_pte_leaf(ptep)) {
+ next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
+ current_level--;
+ continue;
+ }
+
+ huge_pte = pte_val(ptep_get(ptep));
+
+ ret = gstage_level_to_page_size(gstage, current_level - 1, &child_page_size);
+ if (ret)
+ return ret;
+
+ next_ptep = kvm_mmu_memory_cache_alloc(pcache);
+ if (!next_ptep)
+ return -ENOMEM;
+
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ child_pte = make_child_pte(huge_pte, i, child_page_size);
+ set_pte((pte_t *)&next_ptep[i], __pte(child_pte));
+ }
+
+ set_pte(ptep, pfn_pte(PFN_DOWN(__pa(next_ptep)),
+ __pgprot(_PAGE_TABLE)));
+
+ if (flush)
+ gstage_tlb_flush(gstage, current_level, addr);
+
+ current_level--;
+ }
+
+ return 0;
+}
+
void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr,
pte_t *ptep, u32 ptep_level, enum kvm_riscv_gstage_op op)
{
@@ -217,7 +345,7 @@ void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr,
u32 next_ptep_level;
unsigned long next_page_size, page_size;
- ret = gstage_level_to_page_size(ptep_level, &page_size);
+ ret = gstage_level_to_page_size(gstage, ptep_level, &page_size);
if (ret)
return;
@@ -229,7 +357,7 @@ void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr,
if (ptep_level && !gstage_pte_leaf(ptep)) {
next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
next_ptep_level = ptep_level - 1;
- ret = gstage_level_to_page_size(next_ptep_level, &next_page_size);
+ ret = gstage_level_to_page_size(gstage, next_ptep_level, &next_page_size);
if (ret)
return;
@@ -263,7 +391,7 @@ void kvm_riscv_gstage_unmap_range(struct kvm_gstage *gstage,
while (addr < end) {
found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level);
- ret = gstage_level_to_page_size(ptep_level, &page_size);
+ ret = gstage_level_to_page_size(gstage, ptep_level, &page_size);
if (ret)
break;
@@ -297,17 +425,16 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end
while (addr < end) {
found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level);
- ret = gstage_level_to_page_size(ptep_level, &page_size);
+ ret = gstage_level_to_page_size(gstage, ptep_level, &page_size);
if (ret)
break;
if (!found_leaf)
goto next;
- if (!(addr & (page_size - 1)) && ((end - addr) >= page_size))
- kvm_riscv_gstage_op_pte(gstage, addr, ptep,
- ptep_level, GSTAGE_OP_WP);
-
+ addr = ALIGN_DOWN(addr, page_size);
+ kvm_riscv_gstage_op_pte(gstage, addr, ptep,
+ ptep_level, GSTAGE_OP_WP);
next:
addr += page_size;
}
@@ -319,39 +446,34 @@ void __init kvm_riscv_gstage_mode_detect(void)
/* Try Sv57x4 G-stage mode */
csr_write(CSR_HGATP, HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT);
if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV57X4) {
- kvm_riscv_gstage_mode = HGATP_MODE_SV57X4;
- kvm_riscv_gstage_pgd_levels = 5;
+ kvm_riscv_gstage_max_pgd_levels = 5;
goto done;
}
/* Try Sv48x4 G-stage mode */
csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) {
- kvm_riscv_gstage_mode = HGATP_MODE_SV48X4;
- kvm_riscv_gstage_pgd_levels = 4;
+ kvm_riscv_gstage_max_pgd_levels = 4;
goto done;
}
/* Try Sv39x4 G-stage mode */
csr_write(CSR_HGATP, HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT);
if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV39X4) {
- kvm_riscv_gstage_mode = HGATP_MODE_SV39X4;
- kvm_riscv_gstage_pgd_levels = 3;
+ kvm_riscv_gstage_max_pgd_levels = 3;
goto done;
}
#else /* CONFIG_32BIT */
/* Try Sv32x4 G-stage mode */
csr_write(CSR_HGATP, HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT);
if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV32X4) {
- kvm_riscv_gstage_mode = HGATP_MODE_SV32X4;
- kvm_riscv_gstage_pgd_levels = 2;
+ kvm_riscv_gstage_max_pgd_levels = 2;
goto done;
}
#endif
/* KVM depends on !HGATP_MODE_OFF */
- kvm_riscv_gstage_mode = HGATP_MODE_OFF;
- kvm_riscv_gstage_pgd_levels = 0;
+ kvm_riscv_gstage_max_pgd_levels = 0;
done:
csr_write(CSR_HGATP, 0);
diff --git a/arch/riscv/kvm/isa.c b/arch/riscv/kvm/isa.c
new file mode 100644
index 000000000000..1132d909cc25
--- /dev/null
+++ b/arch/riscv/kvm/isa.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Qualcomm Technologies, Inc.
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/errno.h>
+#include <linux/kvm_host.h>
+#include <linux/nospec.h>
+#include <linux/pgtable.h>
+#include <asm/kvm_isa.h>
+#include <asm/vector.h>
+
+#define KVM_ISA_EXT_ARR(ext) \
+[KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext
+
+/* Mapping between KVM ISA Extension ID & guest ISA extension ID */
+static const unsigned long kvm_isa_ext_arr[] = {
+ /* Single letter extensions (alphabetically sorted) */
+ [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a,
+ [KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c,
+ [KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d,
+ [KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f,
+ [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h,
+ [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i,
+ [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
+ [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v,
+ /* Multi letter extensions (alphabetically sorted) */
+ KVM_ISA_EXT_ARR(SMNPM),
+ KVM_ISA_EXT_ARR(SMSTATEEN),
+ KVM_ISA_EXT_ARR(SSAIA),
+ KVM_ISA_EXT_ARR(SSCOFPMF),
+ KVM_ISA_EXT_ARR(SSNPM),
+ KVM_ISA_EXT_ARR(SSTC),
+ KVM_ISA_EXT_ARR(SVADE),
+ KVM_ISA_EXT_ARR(SVADU),
+ KVM_ISA_EXT_ARR(SVINVAL),
+ KVM_ISA_EXT_ARR(SVNAPOT),
+ KVM_ISA_EXT_ARR(SVPBMT),
+ KVM_ISA_EXT_ARR(SVVPTC),
+ KVM_ISA_EXT_ARR(ZAAMO),
+ KVM_ISA_EXT_ARR(ZABHA),
+ KVM_ISA_EXT_ARR(ZACAS),
+ KVM_ISA_EXT_ARR(ZALASR),
+ KVM_ISA_EXT_ARR(ZALRSC),
+ KVM_ISA_EXT_ARR(ZAWRS),
+ KVM_ISA_EXT_ARR(ZBA),
+ KVM_ISA_EXT_ARR(ZBB),
+ KVM_ISA_EXT_ARR(ZBC),
+ KVM_ISA_EXT_ARR(ZBKB),
+ KVM_ISA_EXT_ARR(ZBKC),
+ KVM_ISA_EXT_ARR(ZBKX),
+ KVM_ISA_EXT_ARR(ZBS),
+ KVM_ISA_EXT_ARR(ZCA),
+ KVM_ISA_EXT_ARR(ZCB),
+ KVM_ISA_EXT_ARR(ZCD),
+ KVM_ISA_EXT_ARR(ZCF),
+ KVM_ISA_EXT_ARR(ZCLSD),
+ KVM_ISA_EXT_ARR(ZCMOP),
+ KVM_ISA_EXT_ARR(ZFA),
+ KVM_ISA_EXT_ARR(ZFBFMIN),
+ KVM_ISA_EXT_ARR(ZFH),
+ KVM_ISA_EXT_ARR(ZFHMIN),
+ KVM_ISA_EXT_ARR(ZICBOM),
+ KVM_ISA_EXT_ARR(ZICBOP),
+ KVM_ISA_EXT_ARR(ZICBOZ),
+ KVM_ISA_EXT_ARR(ZICCRSE),
+ KVM_ISA_EXT_ARR(ZICNTR),
+ KVM_ISA_EXT_ARR(ZICOND),
+ KVM_ISA_EXT_ARR(ZICSR),
+ KVM_ISA_EXT_ARR(ZIFENCEI),
+ KVM_ISA_EXT_ARR(ZIHINTNTL),
+ KVM_ISA_EXT_ARR(ZIHINTPAUSE),
+ KVM_ISA_EXT_ARR(ZIHPM),
+ KVM_ISA_EXT_ARR(ZILSD),
+ KVM_ISA_EXT_ARR(ZIMOP),
+ KVM_ISA_EXT_ARR(ZKND),
+ KVM_ISA_EXT_ARR(ZKNE),
+ KVM_ISA_EXT_ARR(ZKNH),
+ KVM_ISA_EXT_ARR(ZKR),
+ KVM_ISA_EXT_ARR(ZKSED),
+ KVM_ISA_EXT_ARR(ZKSH),
+ KVM_ISA_EXT_ARR(ZKT),
+ KVM_ISA_EXT_ARR(ZTSO),
+ KVM_ISA_EXT_ARR(ZVBB),
+ KVM_ISA_EXT_ARR(ZVBC),
+ KVM_ISA_EXT_ARR(ZVFBFMIN),
+ KVM_ISA_EXT_ARR(ZVFBFWMA),
+ KVM_ISA_EXT_ARR(ZVFH),
+ KVM_ISA_EXT_ARR(ZVFHMIN),
+ KVM_ISA_EXT_ARR(ZVKB),
+ KVM_ISA_EXT_ARR(ZVKG),
+ KVM_ISA_EXT_ARR(ZVKNED),
+ KVM_ISA_EXT_ARR(ZVKNHA),
+ KVM_ISA_EXT_ARR(ZVKNHB),
+ KVM_ISA_EXT_ARR(ZVKSED),
+ KVM_ISA_EXT_ARR(ZVKSH),
+ KVM_ISA_EXT_ARR(ZVKT),
+};
+
+unsigned long kvm_riscv_base2isa_ext(unsigned long base_ext)
+{
+ unsigned long i;
+
+ for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
+ if (kvm_isa_ext_arr[i] == base_ext)
+ return i;
+ }
+
+ return KVM_RISCV_ISA_EXT_MAX;
+}
+
+int __kvm_riscv_isa_check_host(unsigned long kvm_ext, unsigned long *base_ext)
+{
+ unsigned long host_ext;
+
+ if (kvm_ext >= KVM_RISCV_ISA_EXT_MAX ||
+ kvm_ext >= ARRAY_SIZE(kvm_isa_ext_arr))
+ return -ENOENT;
+
+ kvm_ext = array_index_nospec(kvm_ext, ARRAY_SIZE(kvm_isa_ext_arr));
+ switch (kvm_isa_ext_arr[kvm_ext]) {
+ case RISCV_ISA_EXT_SMNPM:
+ /*
+ * Pointer masking effective in (H)S-mode is provided by the
+ * Smnpm extension, so that extension is reported to the guest,
+ * even though the CSR bits for configuring VS-mode pointer
+ * masking on the host side are part of the Ssnpm extension.
+ */
+ host_ext = RISCV_ISA_EXT_SSNPM;
+ break;
+ default:
+ host_ext = kvm_isa_ext_arr[kvm_ext];
+ break;
+ }
+
+ if (!__riscv_isa_extension_available(NULL, host_ext))
+ return -ENOENT;
+
+ if (base_ext)
+ *base_ext = kvm_isa_ext_arr[kvm_ext];
+
+ return 0;
+}
+
+bool kvm_riscv_isa_enable_allowed(unsigned long ext)
+{
+ switch (ext) {
+ case KVM_RISCV_ISA_EXT_H:
+ return false;
+ case KVM_RISCV_ISA_EXT_SSCOFPMF:
+ /* Sscofpmf depends on interrupt filtering defined in ssaia */
+ return !kvm_riscv_isa_check_host(SSAIA);
+ case KVM_RISCV_ISA_EXT_SVADU:
+ /*
+ * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero.
+ * Guest OS can use Svadu only when host OS enable Svadu.
+ */
+ return arch_has_hw_pte_young();
+ case KVM_RISCV_ISA_EXT_V:
+ return riscv_v_vstate_ctrl_user_allowed();
+ default:
+ break;
+ }
+
+ return true;
+}
+
+bool kvm_riscv_isa_disable_allowed(unsigned long ext)
+{
+ switch (ext) {
+ /* Extensions which don't have any mechanism to disable */
+ case KVM_RISCV_ISA_EXT_A:
+ case KVM_RISCV_ISA_EXT_C:
+ case KVM_RISCV_ISA_EXT_I:
+ case KVM_RISCV_ISA_EXT_M:
+ /* There is not architectural config bit to disable sscofpmf completely */
+ case KVM_RISCV_ISA_EXT_SSCOFPMF:
+ case KVM_RISCV_ISA_EXT_SSNPM:
+ case KVM_RISCV_ISA_EXT_SSTC:
+ case KVM_RISCV_ISA_EXT_SVINVAL:
+ case KVM_RISCV_ISA_EXT_SVNAPOT:
+ case KVM_RISCV_ISA_EXT_SVVPTC:
+ case KVM_RISCV_ISA_EXT_ZAAMO:
+ case KVM_RISCV_ISA_EXT_ZABHA:
+ case KVM_RISCV_ISA_EXT_ZACAS:
+ case KVM_RISCV_ISA_EXT_ZALASR:
+ case KVM_RISCV_ISA_EXT_ZALRSC:
+ case KVM_RISCV_ISA_EXT_ZAWRS:
+ case KVM_RISCV_ISA_EXT_ZBA:
+ case KVM_RISCV_ISA_EXT_ZBB:
+ case KVM_RISCV_ISA_EXT_ZBC:
+ case KVM_RISCV_ISA_EXT_ZBKB:
+ case KVM_RISCV_ISA_EXT_ZBKC:
+ case KVM_RISCV_ISA_EXT_ZBKX:
+ case KVM_RISCV_ISA_EXT_ZBS:
+ case KVM_RISCV_ISA_EXT_ZCA:
+ case KVM_RISCV_ISA_EXT_ZCB:
+ case KVM_RISCV_ISA_EXT_ZCD:
+ case KVM_RISCV_ISA_EXT_ZCF:
+ case KVM_RISCV_ISA_EXT_ZCMOP:
+ case KVM_RISCV_ISA_EXT_ZFA:
+ case KVM_RISCV_ISA_EXT_ZFBFMIN:
+ case KVM_RISCV_ISA_EXT_ZFH:
+ case KVM_RISCV_ISA_EXT_ZFHMIN:
+ case KVM_RISCV_ISA_EXT_ZICBOP:
+ case KVM_RISCV_ISA_EXT_ZICCRSE:
+ case KVM_RISCV_ISA_EXT_ZICNTR:
+ case KVM_RISCV_ISA_EXT_ZICOND:
+ case KVM_RISCV_ISA_EXT_ZICSR:
+ case KVM_RISCV_ISA_EXT_ZIFENCEI:
+ case KVM_RISCV_ISA_EXT_ZIHINTNTL:
+ case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
+ case KVM_RISCV_ISA_EXT_ZIHPM:
+ case KVM_RISCV_ISA_EXT_ZIMOP:
+ case KVM_RISCV_ISA_EXT_ZKND:
+ case KVM_RISCV_ISA_EXT_ZKNE:
+ case KVM_RISCV_ISA_EXT_ZKNH:
+ case KVM_RISCV_ISA_EXT_ZKR:
+ case KVM_RISCV_ISA_EXT_ZKSED:
+ case KVM_RISCV_ISA_EXT_ZKSH:
+ case KVM_RISCV_ISA_EXT_ZKT:
+ case KVM_RISCV_ISA_EXT_ZTSO:
+ case KVM_RISCV_ISA_EXT_ZVBB:
+ case KVM_RISCV_ISA_EXT_ZVBC:
+ case KVM_RISCV_ISA_EXT_ZVFBFMIN:
+ case KVM_RISCV_ISA_EXT_ZVFBFWMA:
+ case KVM_RISCV_ISA_EXT_ZVFH:
+ case KVM_RISCV_ISA_EXT_ZVFHMIN:
+ case KVM_RISCV_ISA_EXT_ZVKB:
+ case KVM_RISCV_ISA_EXT_ZVKG:
+ case KVM_RISCV_ISA_EXT_ZVKNED:
+ case KVM_RISCV_ISA_EXT_ZVKNHA:
+ case KVM_RISCV_ISA_EXT_ZVKNHB:
+ case KVM_RISCV_ISA_EXT_ZVKSED:
+ case KVM_RISCV_ISA_EXT_ZVKSH:
+ case KVM_RISCV_ISA_EXT_ZVKT:
+ return false;
+ /* Extensions which can be disabled using Smstateen */
+ case KVM_RISCV_ISA_EXT_SSAIA:
+ return riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN);
+ case KVM_RISCV_ISA_EXT_SVADE:
+ /*
+ * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero.
+ * Svade can't be disabled unless we support Svadu.
+ */
+ return arch_has_hw_pte_young();
+ default:
+ break;
+ }
+
+ return true;
+}
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index 0f3fe3986fc0..cb8a65273c1f 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -41,8 +41,8 @@ int kvm_arch_enable_virtualization_cpu(void)
if (rc)
return rc;
- csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT);
- csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT);
+ csr_write(CSR_HEDELEG, 0);
+ csr_write(CSR_HIDELEG, 0);
/* VS should access only the time counter directly. Everything else should trap */
csr_write(CSR_HCOUNTEREN, 0x02);
@@ -105,17 +105,17 @@ static int __init riscv_kvm_init(void)
return rc;
kvm_riscv_gstage_mode_detect();
- switch (kvm_riscv_gstage_mode) {
- case HGATP_MODE_SV32X4:
+ switch (kvm_riscv_gstage_max_pgd_levels) {
+ case 2:
str = "Sv32x4";
break;
- case HGATP_MODE_SV39X4:
+ case 3:
str = "Sv39x4";
break;
- case HGATP_MODE_SV48X4:
+ case 4:
str = "Sv48x4";
break;
- case HGATP_MODE_SV57X4:
+ case 5:
str = "Sv57x4";
break;
default:
@@ -164,7 +164,7 @@ static int __init riscv_kvm_init(void)
(rc) ? slist : "no features");
}
- kvm_info("using %s G-stage page table format\n", str);
+ kvm_info("highest G-stage page table mode is %s\n", str);
kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits());
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 088d33ba90ed..2d3def024270 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -24,10 +24,7 @@ static void mmu_wp_memory_region(struct kvm *kvm, int slot)
phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
struct kvm_gstage gstage;
- gstage.kvm = kvm;
- gstage.flags = 0;
- gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
- gstage.pgd = kvm->arch.pgd;
+ kvm_riscv_gstage_init(&gstage, kvm);
spin_lock(&kvm->mmu_lock);
kvm_riscv_gstage_wp_range(&gstage, start, end);
@@ -49,10 +46,7 @@ int kvm_riscv_mmu_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
struct kvm_gstage_mapping map;
struct kvm_gstage gstage;
- gstage.kvm = kvm;
- gstage.flags = 0;
- gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
- gstage.pgd = kvm->arch.pgd;
+ kvm_riscv_gstage_init(&gstage, kvm);
end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK;
pfn = __phys_to_pfn(hpa);
@@ -67,7 +61,7 @@ int kvm_riscv_mmu_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
if (!writable)
map.pte = pte_wrprotect(map.pte);
- ret = kvm_mmu_topup_memory_cache(&pcache, kvm_riscv_gstage_pgd_levels);
+ ret = kvm_mmu_topup_memory_cache(&pcache, kvm->arch.pgd_levels);
if (ret)
goto out;
@@ -89,10 +83,7 @@ void kvm_riscv_mmu_iounmap(struct kvm *kvm, gpa_t gpa, unsigned long size)
{
struct kvm_gstage gstage;
- gstage.kvm = kvm;
- gstage.flags = 0;
- gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
- gstage.pgd = kvm->arch.pgd;
+ kvm_riscv_gstage_init(&gstage, kvm);
spin_lock(&kvm->mmu_lock);
kvm_riscv_gstage_unmap_range(&gstage, gpa, size, false);
@@ -109,10 +100,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
struct kvm_gstage gstage;
- gstage.kvm = kvm;
- gstage.flags = 0;
- gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
- gstage.pgd = kvm->arch.pgd;
+ kvm_riscv_gstage_init(&gstage, kvm);
kvm_riscv_gstage_wp_range(&gstage, start, end);
}
@@ -141,10 +129,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
phys_addr_t size = slot->npages << PAGE_SHIFT;
struct kvm_gstage gstage;
- gstage.kvm = kvm;
- gstage.flags = 0;
- gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
- gstage.pgd = kvm->arch.pgd;
+ kvm_riscv_gstage_init(&gstage, kvm);
spin_lock(&kvm->mmu_lock);
kvm_riscv_gstage_unmap_range(&gstage, gpa, size, false);
@@ -186,7 +171,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* space addressable by the KVM guest GPA space.
*/
if ((new->base_gfn + new->npages) >=
- (kvm_riscv_gstage_gpa_size >> PAGE_SHIFT))
+ kvm_riscv_gstage_gpa_size(kvm->arch.pgd_levels) >> PAGE_SHIFT)
return -EFAULT;
hva = new->userspace_addr;
@@ -250,10 +235,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
if (!kvm->arch.pgd)
return false;
- gstage.kvm = kvm;
- gstage.flags = 0;
- gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
- gstage.pgd = kvm->arch.pgd;
+ kvm_riscv_gstage_init(&gstage, kvm);
mmu_locked = spin_trylock(&kvm->mmu_lock);
kvm_riscv_gstage_unmap_range(&gstage, range->start << PAGE_SHIFT,
(range->end - range->start) << PAGE_SHIFT,
@@ -275,10 +257,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
- gstage.kvm = kvm;
- gstage.flags = 0;
- gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
- gstage.pgd = kvm->arch.pgd;
+ kvm_riscv_gstage_init(&gstage, kvm);
if (!kvm_riscv_gstage_get_leaf(&gstage, range->start << PAGE_SHIFT,
&ptep, &ptep_level))
return false;
@@ -298,10 +277,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
- gstage.kvm = kvm;
- gstage.flags = 0;
- gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
- gstage.pgd = kvm->arch.pgd;
+ kvm_riscv_gstage_init(&gstage, kvm);
if (!kvm_riscv_gstage_get_leaf(&gstage, range->start << PAGE_SHIFT,
&ptep, &ptep_level))
return false;
@@ -463,16 +439,13 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
struct kvm_gstage gstage;
struct page *page;
- gstage.kvm = kvm;
- gstage.flags = 0;
- gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
- gstage.pgd = kvm->arch.pgd;
+ kvm_riscv_gstage_init(&gstage, kvm);
/* Setup initial state of output mapping */
memset(out_map, 0, sizeof(*out_map));
/* We need minimum second+third level pages */
- ret = kvm_mmu_topup_memory_cache(pcache, kvm_riscv_gstage_pgd_levels);
+ ret = kvm_mmu_topup_memory_cache(pcache, kvm->arch.pgd_levels);
if (ret) {
kvm_err("Failed to topup G-stage cache\n");
return ret;
@@ -575,6 +548,7 @@ int kvm_riscv_mmu_alloc_pgd(struct kvm *kvm)
return -ENOMEM;
kvm->arch.pgd = page_to_virt(pgd_page);
kvm->arch.pgd_phys = page_to_phys(pgd_page);
+ kvm->arch.pgd_levels = kvm_riscv_gstage_max_pgd_levels;
return 0;
}
@@ -586,14 +560,13 @@ void kvm_riscv_mmu_free_pgd(struct kvm *kvm)
spin_lock(&kvm->mmu_lock);
if (kvm->arch.pgd) {
- gstage.kvm = kvm;
- gstage.flags = 0;
- gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
- gstage.pgd = kvm->arch.pgd;
- kvm_riscv_gstage_unmap_range(&gstage, 0UL, kvm_riscv_gstage_gpa_size, false);
+ kvm_riscv_gstage_init(&gstage, kvm);
+ kvm_riscv_gstage_unmap_range(&gstage, 0UL,
+ kvm_riscv_gstage_gpa_size(kvm->arch.pgd_levels), false);
pgd = READ_ONCE(kvm->arch.pgd);
kvm->arch.pgd = NULL;
kvm->arch.pgd_phys = 0;
+ kvm->arch.pgd_levels = 0;
}
spin_unlock(&kvm->mmu_lock);
@@ -603,11 +576,12 @@ void kvm_riscv_mmu_free_pgd(struct kvm *kvm)
void kvm_riscv_mmu_update_hgatp(struct kvm_vcpu *vcpu)
{
- unsigned long hgatp = kvm_riscv_gstage_mode << HGATP_MODE_SHIFT;
- struct kvm_arch *k = &vcpu->kvm->arch;
+ struct kvm_arch *ka = &vcpu->kvm->arch;
+ unsigned long hgatp = kvm_riscv_gstage_mode(ka->pgd_levels)
+ << HGATP_MODE_SHIFT;
- hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID;
- hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN;
+ hgatp |= (READ_ONCE(ka->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID;
+ hgatp |= (ka->pgd_phys >> PAGE_SHIFT) & HGATP_PPN;
ncsr_write(CSR_HGATP, hgatp);
diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c
index ff1aeac4eb8e..439c20c2775a 100644
--- a/arch/riscv/kvm/tlb.c
+++ b/arch/riscv/kvm/tlb.c
@@ -338,7 +338,8 @@ static void make_xfence_request(struct kvm *kvm,
bitmap_zero(vcpu_mask, KVM_MAX_VCPUS);
kvm_for_each_vcpu(i, vcpu, kvm) {
if (hbase != -1UL) {
- if (vcpu->vcpu_id < hbase)
+ if (vcpu->vcpu_id < hbase ||
+ vcpu->vcpu_id >= hbase + BITS_PER_LONG)
continue;
if (!(hmask & (1UL << (vcpu->vcpu_id - hbase))))
continue;
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index fdd99ac1e714..a73690eda84b 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -24,6 +24,8 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
+static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_former_vcpu);
+
const struct kvm_stats_desc kvm_vcpu_stats_desc[] = {
KVM_GENERIC_VCPU_STATS(),
STATS_DESC_COUNTER(VCPU, ecall_exit_stat),
@@ -133,10 +135,12 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
/* Mark this VCPU never ran */
vcpu->arch.ran_atleast_once = false;
- vcpu->arch.cfg.hedeleg = KVM_HEDELEG_DEFAULT;
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX);
+ /* Setup VCPU config */
+ kvm_riscv_vcpu_config_init(vcpu);
+
/* Setup ISA features available to VCPU */
kvm_riscv_vcpu_setup_isa(vcpu);
@@ -529,57 +533,41 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
- if (dbg->control & KVM_GUESTDBG_ENABLE) {
+ if (dbg->control & KVM_GUESTDBG_ENABLE)
vcpu->guest_debug = dbg->control;
- vcpu->arch.cfg.hedeleg &= ~BIT(EXC_BREAKPOINT);
- } else {
+ else
vcpu->guest_debug = 0;
- vcpu->arch.cfg.hedeleg |= BIT(EXC_BREAKPOINT);
- }
return 0;
}
-static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu)
-{
- const unsigned long *isa = vcpu->arch.isa;
- struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
-
- if (riscv_isa_extension_available(isa, SVPBMT))
- cfg->henvcfg |= ENVCFG_PBMTE;
-
- if (riscv_isa_extension_available(isa, SSTC))
- cfg->henvcfg |= ENVCFG_STCE;
-
- if (riscv_isa_extension_available(isa, ZICBOM))
- cfg->henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE);
-
- if (riscv_isa_extension_available(isa, ZICBOZ))
- cfg->henvcfg |= ENVCFG_CBZE;
-
- if (riscv_isa_extension_available(isa, SVADU) &&
- !riscv_isa_extension_available(isa, SVADE))
- cfg->henvcfg |= ENVCFG_ADUE;
-
- if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
- cfg->hstateen0 |= SMSTATEEN0_HSENVCFG;
- if (riscv_isa_extension_available(isa, SSAIA))
- cfg->hstateen0 |= SMSTATEEN0_AIA_IMSIC |
- SMSTATEEN0_AIA |
- SMSTATEEN0_AIA_ISEL;
- if (riscv_isa_extension_available(isa, SMSTATEEN))
- cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0;
- }
-
- if (vcpu->guest_debug)
- cfg->hedeleg &= ~BIT(EXC_BREAKPOINT);
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
void *nsh;
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
- struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
+
+ /*
+ * If VCPU is being reloaded on the same physical CPU and no
+ * other KVM VCPU has run on this CPU since it was last put,
+ * we can skip the expensive CSR and HGATP writes.
+ *
+ * Note: If a new CSR is added to this fast-path skip block,
+ * make sure that 'csr_dirty' is set to true in any
+ * ioctl (e.g., KVM_SET_ONE_REG) that modifies it.
+ */
+ if (vcpu != __this_cpu_read(kvm_former_vcpu))
+ __this_cpu_write(kvm_former_vcpu, vcpu);
+ else if (vcpu->arch.last_exit_cpu == cpu && !vcpu->arch.csr_dirty)
+ goto csr_restore_done;
+
+ vcpu->arch.csr_dirty = false;
+
+ /*
+ * Load VCPU config CSRs before other CSRs because
+ * the read/write behaviour of certain CSRs change
+ * based on VCPU config CSRs.
+ */
+ kvm_riscv_vcpu_config_load(vcpu);
if (kvm_riscv_nacl_sync_csr_available()) {
nsh = nacl_shmem();
@@ -590,17 +578,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
nacl_csr_write(nsh, CSR_VSEPC, csr->vsepc);
nacl_csr_write(nsh, CSR_VSCAUSE, csr->vscause);
nacl_csr_write(nsh, CSR_VSTVAL, csr->vstval);
- nacl_csr_write(nsh, CSR_HEDELEG, cfg->hedeleg);
nacl_csr_write(nsh, CSR_HVIP, csr->hvip);
nacl_csr_write(nsh, CSR_VSATP, csr->vsatp);
- nacl_csr_write(nsh, CSR_HENVCFG, cfg->henvcfg);
- if (IS_ENABLED(CONFIG_32BIT))
- nacl_csr_write(nsh, CSR_HENVCFGH, cfg->henvcfg >> 32);
- if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
- nacl_csr_write(nsh, CSR_HSTATEEN0, cfg->hstateen0);
- if (IS_ENABLED(CONFIG_32BIT))
- nacl_csr_write(nsh, CSR_HSTATEEN0H, cfg->hstateen0 >> 32);
- }
} else {
csr_write(CSR_VSSTATUS, csr->vsstatus);
csr_write(CSR_VSIE, csr->vsie);
@@ -609,21 +588,15 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
csr_write(CSR_VSEPC, csr->vsepc);
csr_write(CSR_VSCAUSE, csr->vscause);
csr_write(CSR_VSTVAL, csr->vstval);
- csr_write(CSR_HEDELEG, cfg->hedeleg);
csr_write(CSR_HVIP, csr->hvip);
csr_write(CSR_VSATP, csr->vsatp);
- csr_write(CSR_HENVCFG, cfg->henvcfg);
- if (IS_ENABLED(CONFIG_32BIT))
- csr_write(CSR_HENVCFGH, cfg->henvcfg >> 32);
- if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
- csr_write(CSR_HSTATEEN0, cfg->hstateen0);
- if (IS_ENABLED(CONFIG_32BIT))
- csr_write(CSR_HSTATEEN0H, cfg->hstateen0 >> 32);
- }
}
kvm_riscv_mmu_update_hgatp(vcpu);
+ kvm_riscv_vcpu_aia_load(vcpu, cpu);
+
+csr_restore_done:
kvm_riscv_vcpu_timer_restore(vcpu);
kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context);
@@ -633,8 +606,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_riscv_vcpu_guest_vector_restore(&vcpu->arch.guest_context,
vcpu->arch.isa);
- kvm_riscv_vcpu_aia_load(vcpu, cpu);
-
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
vcpu->cpu = cpu;
@@ -750,28 +721,22 @@ static __always_inline void kvm_riscv_vcpu_swap_in_guest_state(struct kvm_vcpu *
{
struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr;
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
- struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
vcpu->arch.host_scounteren = csr_swap(CSR_SCOUNTEREN, csr->scounteren);
vcpu->arch.host_senvcfg = csr_swap(CSR_SENVCFG, csr->senvcfg);
- if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) &&
- (cfg->hstateen0 & SMSTATEEN0_SSTATEEN0))
- vcpu->arch.host_sstateen0 = csr_swap(CSR_SSTATEEN0,
- smcsr->sstateen0);
+ if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN))
+ vcpu->arch.host_sstateen0 = csr_swap(CSR_SSTATEEN0, smcsr->sstateen0);
}
static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr;
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
- struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
csr->scounteren = csr_swap(CSR_SCOUNTEREN, vcpu->arch.host_scounteren);
csr->senvcfg = csr_swap(CSR_SENVCFG, vcpu->arch.host_senvcfg);
- if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) &&
- (cfg->hstateen0 & SMSTATEEN0_SSTATEEN0))
- smcsr->sstateen0 = csr_swap(CSR_SSTATEEN0,
- vcpu->arch.host_sstateen0);
+ if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN))
+ smcsr->sstateen0 = csr_swap(CSR_SSTATEEN0, vcpu->arch.host_sstateen0);
}
/*
@@ -868,7 +833,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
struct kvm_run *run = vcpu->run;
if (!vcpu->arch.ran_atleast_once)
- kvm_riscv_vcpu_setup_config(vcpu);
+ kvm_riscv_vcpu_config_ran_once(vcpu);
/* Mark this VCPU ran at least once */
vcpu->arch.ran_atleast_once = true;
diff --git a/arch/riscv/kvm/vcpu_config.c b/arch/riscv/kvm/vcpu_config.c
new file mode 100644
index 000000000000..238418fed2b9
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_config.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Qualcomm Technologies, Inc.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_nacl.h>
+
+#define KVM_HEDELEG_DEFAULT (BIT(EXC_INST_MISALIGNED) | \
+ BIT(EXC_INST_ILLEGAL) | \
+ BIT(EXC_BREAKPOINT) | \
+ BIT(EXC_SYSCALL) | \
+ BIT(EXC_INST_PAGE_FAULT) | \
+ BIT(EXC_LOAD_PAGE_FAULT) | \
+ BIT(EXC_STORE_PAGE_FAULT))
+
+#define KVM_HIDELEG_DEFAULT (BIT(IRQ_VS_SOFT) | \
+ BIT(IRQ_VS_TIMER) | \
+ BIT(IRQ_VS_EXT))
+
+void kvm_riscv_vcpu_config_init(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.cfg.hedeleg = KVM_HEDELEG_DEFAULT;
+ vcpu->arch.cfg.hideleg = KVM_HIDELEG_DEFAULT;
+}
+
+void kvm_riscv_vcpu_config_guest_debug(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
+
+ if (vcpu->guest_debug)
+ cfg->hedeleg &= ~BIT(EXC_BREAKPOINT);
+ else
+ cfg->hedeleg |= BIT(EXC_BREAKPOINT);
+
+ vcpu->arch.csr_dirty = true;
+}
+
+void kvm_riscv_vcpu_config_ran_once(struct kvm_vcpu *vcpu)
+{
+ const unsigned long *isa = vcpu->arch.isa;
+ struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
+
+ if (riscv_isa_extension_available(isa, SVPBMT))
+ cfg->henvcfg |= ENVCFG_PBMTE;
+
+ if (riscv_isa_extension_available(isa, SSTC))
+ cfg->henvcfg |= ENVCFG_STCE;
+
+ if (riscv_isa_extension_available(isa, ZICBOM))
+ cfg->henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE);
+
+ if (riscv_isa_extension_available(isa, ZICBOZ))
+ cfg->henvcfg |= ENVCFG_CBZE;
+
+ if (riscv_isa_extension_available(isa, SVADU) &&
+ !riscv_isa_extension_available(isa, SVADE))
+ cfg->henvcfg |= ENVCFG_ADUE;
+
+ if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
+ cfg->hstateen0 |= SMSTATEEN0_HSENVCFG;
+ if (riscv_isa_extension_available(isa, SSAIA))
+ cfg->hstateen0 |= SMSTATEEN0_AIA_IMSIC |
+ SMSTATEEN0_AIA |
+ SMSTATEEN0_AIA_ISEL;
+ if (riscv_isa_extension_available(isa, SMSTATEEN))
+ cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0;
+ }
+
+ if (vcpu->guest_debug)
+ cfg->hedeleg &= ~BIT(EXC_BREAKPOINT);
+}
+
+void kvm_riscv_vcpu_config_load(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
+ void *nsh;
+
+ if (kvm_riscv_nacl_sync_csr_available()) {
+ nsh = nacl_shmem();
+ nacl_csr_write(nsh, CSR_HEDELEG, cfg->hedeleg);
+ nacl_csr_write(nsh, CSR_HIDELEG, cfg->hideleg);
+ nacl_csr_write(nsh, CSR_HENVCFG, cfg->henvcfg);
+ if (IS_ENABLED(CONFIG_32BIT))
+ nacl_csr_write(nsh, CSR_HENVCFGH, cfg->henvcfg >> 32);
+ if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
+ nacl_csr_write(nsh, CSR_HSTATEEN0, cfg->hstateen0);
+ if (IS_ENABLED(CONFIG_32BIT))
+ nacl_csr_write(nsh, CSR_HSTATEEN0H, cfg->hstateen0 >> 32);
+ }
+ } else {
+ csr_write(CSR_HEDELEG, cfg->hedeleg);
+ csr_write(CSR_HIDELEG, cfg->hideleg);
+ csr_write(CSR_HENVCFG, cfg->henvcfg);
+ if (IS_ENABLED(CONFIG_32BIT))
+ csr_write(CSR_HENVCFGH, cfg->henvcfg >> 32);
+ if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
+ csr_write(CSR_HSTATEEN0, cfg->hstateen0);
+ if (IS_ENABLED(CONFIG_32BIT))
+ csr_write(CSR_HSTATEEN0H, cfg->hstateen0 >> 32);
+ }
+ }
+}
diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c
index bd5a9e7e7165..6ad6df26a2fd 100644
--- a/arch/riscv/kvm/vcpu_fp.c
+++ b/arch/riscv/kvm/vcpu_fp.c
@@ -13,6 +13,7 @@
#include <linux/nospec.h>
#include <linux/uaccess.h>
#include <asm/cpufeature.h>
+#include <asm/kvm_isa.h>
#ifdef CONFIG_FPU
void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
@@ -60,17 +61,17 @@ void kvm_riscv_vcpu_guest_fp_restore(struct kvm_cpu_context *cntx,
void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx)
{
/* No need to check host sstatus as it can be modified outside */
- if (riscv_isa_extension_available(NULL, d))
+ if (!kvm_riscv_isa_check_host(D))
__kvm_riscv_fp_d_save(cntx);
- else if (riscv_isa_extension_available(NULL, f))
+ else if (!kvm_riscv_isa_check_host(F))
__kvm_riscv_fp_f_save(cntx);
}
void kvm_riscv_vcpu_host_fp_restore(struct kvm_cpu_context *cntx)
{
- if (riscv_isa_extension_available(NULL, d))
+ if (!kvm_riscv_isa_check_host(D))
__kvm_riscv_fp_d_restore(cntx);
- else if (riscv_isa_extension_available(NULL, f))
+ else if (!kvm_riscv_isa_check_host(F))
__kvm_riscv_fp_f_restore(cntx);
}
#endif
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index 45ecc0082e90..bb920e8923c9 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -15,259 +15,19 @@
#include <linux/kvm_host.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
+#include <asm/kvm_isa.h>
#include <asm/kvm_vcpu_vector.h>
-#include <asm/pgtable.h>
-#include <asm/vector.h>
#define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0)
-#define KVM_ISA_EXT_ARR(ext) \
-[KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext
-
-/* Mapping between KVM ISA Extension ID & guest ISA extension ID */
-static const unsigned long kvm_isa_ext_arr[] = {
- /* Single letter extensions (alphabetically sorted) */
- [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a,
- [KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c,
- [KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d,
- [KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f,
- [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h,
- [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i,
- [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
- [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v,
- /* Multi letter extensions (alphabetically sorted) */
- KVM_ISA_EXT_ARR(SMNPM),
- KVM_ISA_EXT_ARR(SMSTATEEN),
- KVM_ISA_EXT_ARR(SSAIA),
- KVM_ISA_EXT_ARR(SSCOFPMF),
- KVM_ISA_EXT_ARR(SSNPM),
- KVM_ISA_EXT_ARR(SSTC),
- KVM_ISA_EXT_ARR(SVADE),
- KVM_ISA_EXT_ARR(SVADU),
- KVM_ISA_EXT_ARR(SVINVAL),
- KVM_ISA_EXT_ARR(SVNAPOT),
- KVM_ISA_EXT_ARR(SVPBMT),
- KVM_ISA_EXT_ARR(SVVPTC),
- KVM_ISA_EXT_ARR(ZAAMO),
- KVM_ISA_EXT_ARR(ZABHA),
- KVM_ISA_EXT_ARR(ZACAS),
- KVM_ISA_EXT_ARR(ZALASR),
- KVM_ISA_EXT_ARR(ZALRSC),
- KVM_ISA_EXT_ARR(ZAWRS),
- KVM_ISA_EXT_ARR(ZBA),
- KVM_ISA_EXT_ARR(ZBB),
- KVM_ISA_EXT_ARR(ZBC),
- KVM_ISA_EXT_ARR(ZBKB),
- KVM_ISA_EXT_ARR(ZBKC),
- KVM_ISA_EXT_ARR(ZBKX),
- KVM_ISA_EXT_ARR(ZBS),
- KVM_ISA_EXT_ARR(ZCA),
- KVM_ISA_EXT_ARR(ZCB),
- KVM_ISA_EXT_ARR(ZCD),
- KVM_ISA_EXT_ARR(ZCF),
- KVM_ISA_EXT_ARR(ZCLSD),
- KVM_ISA_EXT_ARR(ZCMOP),
- KVM_ISA_EXT_ARR(ZFA),
- KVM_ISA_EXT_ARR(ZFBFMIN),
- KVM_ISA_EXT_ARR(ZFH),
- KVM_ISA_EXT_ARR(ZFHMIN),
- KVM_ISA_EXT_ARR(ZICBOM),
- KVM_ISA_EXT_ARR(ZICBOP),
- KVM_ISA_EXT_ARR(ZICBOZ),
- KVM_ISA_EXT_ARR(ZICCRSE),
- KVM_ISA_EXT_ARR(ZICNTR),
- KVM_ISA_EXT_ARR(ZICOND),
- KVM_ISA_EXT_ARR(ZICSR),
- KVM_ISA_EXT_ARR(ZIFENCEI),
- KVM_ISA_EXT_ARR(ZIHINTNTL),
- KVM_ISA_EXT_ARR(ZIHINTPAUSE),
- KVM_ISA_EXT_ARR(ZIHPM),
- KVM_ISA_EXT_ARR(ZILSD),
- KVM_ISA_EXT_ARR(ZIMOP),
- KVM_ISA_EXT_ARR(ZKND),
- KVM_ISA_EXT_ARR(ZKNE),
- KVM_ISA_EXT_ARR(ZKNH),
- KVM_ISA_EXT_ARR(ZKR),
- KVM_ISA_EXT_ARR(ZKSED),
- KVM_ISA_EXT_ARR(ZKSH),
- KVM_ISA_EXT_ARR(ZKT),
- KVM_ISA_EXT_ARR(ZTSO),
- KVM_ISA_EXT_ARR(ZVBB),
- KVM_ISA_EXT_ARR(ZVBC),
- KVM_ISA_EXT_ARR(ZVFBFMIN),
- KVM_ISA_EXT_ARR(ZVFBFWMA),
- KVM_ISA_EXT_ARR(ZVFH),
- KVM_ISA_EXT_ARR(ZVFHMIN),
- KVM_ISA_EXT_ARR(ZVKB),
- KVM_ISA_EXT_ARR(ZVKG),
- KVM_ISA_EXT_ARR(ZVKNED),
- KVM_ISA_EXT_ARR(ZVKNHA),
- KVM_ISA_EXT_ARR(ZVKNHB),
- KVM_ISA_EXT_ARR(ZVKSED),
- KVM_ISA_EXT_ARR(ZVKSH),
- KVM_ISA_EXT_ARR(ZVKT),
-};
-
-static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
-{
- unsigned long i;
-
- for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
- if (kvm_isa_ext_arr[i] == base_ext)
- return i;
- }
-
- return KVM_RISCV_ISA_EXT_MAX;
-}
-
-static int kvm_riscv_vcpu_isa_check_host(unsigned long kvm_ext, unsigned long *guest_ext)
-{
- unsigned long host_ext;
-
- if (kvm_ext >= KVM_RISCV_ISA_EXT_MAX ||
- kvm_ext >= ARRAY_SIZE(kvm_isa_ext_arr))
- return -ENOENT;
-
- kvm_ext = array_index_nospec(kvm_ext, ARRAY_SIZE(kvm_isa_ext_arr));
- *guest_ext = kvm_isa_ext_arr[kvm_ext];
- switch (*guest_ext) {
- case RISCV_ISA_EXT_SMNPM:
- /*
- * Pointer masking effective in (H)S-mode is provided by the
- * Smnpm extension, so that extension is reported to the guest,
- * even though the CSR bits for configuring VS-mode pointer
- * masking on the host side are part of the Ssnpm extension.
- */
- host_ext = RISCV_ISA_EXT_SSNPM;
- break;
- default:
- host_ext = *guest_ext;
- break;
- }
-
- if (!__riscv_isa_extension_available(NULL, host_ext))
- return -ENOENT;
-
- return 0;
-}
-
-static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
-{
- switch (ext) {
- case KVM_RISCV_ISA_EXT_H:
- return false;
- case KVM_RISCV_ISA_EXT_SSCOFPMF:
- /* Sscofpmf depends on interrupt filtering defined in ssaia */
- return __riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA);
- case KVM_RISCV_ISA_EXT_SVADU:
- /*
- * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero.
- * Guest OS can use Svadu only when host OS enable Svadu.
- */
- return arch_has_hw_pte_young();
- case KVM_RISCV_ISA_EXT_V:
- return riscv_v_vstate_ctrl_user_allowed();
- default:
- break;
- }
-
- return true;
-}
-
-static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
-{
- switch (ext) {
- /* Extensions which don't have any mechanism to disable */
- case KVM_RISCV_ISA_EXT_A:
- case KVM_RISCV_ISA_EXT_C:
- case KVM_RISCV_ISA_EXT_I:
- case KVM_RISCV_ISA_EXT_M:
- /* There is not architectural config bit to disable sscofpmf completely */
- case KVM_RISCV_ISA_EXT_SSCOFPMF:
- case KVM_RISCV_ISA_EXT_SSNPM:
- case KVM_RISCV_ISA_EXT_SSTC:
- case KVM_RISCV_ISA_EXT_SVINVAL:
- case KVM_RISCV_ISA_EXT_SVNAPOT:
- case KVM_RISCV_ISA_EXT_SVVPTC:
- case KVM_RISCV_ISA_EXT_ZAAMO:
- case KVM_RISCV_ISA_EXT_ZABHA:
- case KVM_RISCV_ISA_EXT_ZACAS:
- case KVM_RISCV_ISA_EXT_ZALASR:
- case KVM_RISCV_ISA_EXT_ZALRSC:
- case KVM_RISCV_ISA_EXT_ZAWRS:
- case KVM_RISCV_ISA_EXT_ZBA:
- case KVM_RISCV_ISA_EXT_ZBB:
- case KVM_RISCV_ISA_EXT_ZBC:
- case KVM_RISCV_ISA_EXT_ZBKB:
- case KVM_RISCV_ISA_EXT_ZBKC:
- case KVM_RISCV_ISA_EXT_ZBKX:
- case KVM_RISCV_ISA_EXT_ZBS:
- case KVM_RISCV_ISA_EXT_ZCA:
- case KVM_RISCV_ISA_EXT_ZCB:
- case KVM_RISCV_ISA_EXT_ZCD:
- case KVM_RISCV_ISA_EXT_ZCF:
- case KVM_RISCV_ISA_EXT_ZCMOP:
- case KVM_RISCV_ISA_EXT_ZFA:
- case KVM_RISCV_ISA_EXT_ZFBFMIN:
- case KVM_RISCV_ISA_EXT_ZFH:
- case KVM_RISCV_ISA_EXT_ZFHMIN:
- case KVM_RISCV_ISA_EXT_ZICBOP:
- case KVM_RISCV_ISA_EXT_ZICCRSE:
- case KVM_RISCV_ISA_EXT_ZICNTR:
- case KVM_RISCV_ISA_EXT_ZICOND:
- case KVM_RISCV_ISA_EXT_ZICSR:
- case KVM_RISCV_ISA_EXT_ZIFENCEI:
- case KVM_RISCV_ISA_EXT_ZIHINTNTL:
- case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
- case KVM_RISCV_ISA_EXT_ZIHPM:
- case KVM_RISCV_ISA_EXT_ZIMOP:
- case KVM_RISCV_ISA_EXT_ZKND:
- case KVM_RISCV_ISA_EXT_ZKNE:
- case KVM_RISCV_ISA_EXT_ZKNH:
- case KVM_RISCV_ISA_EXT_ZKR:
- case KVM_RISCV_ISA_EXT_ZKSED:
- case KVM_RISCV_ISA_EXT_ZKSH:
- case KVM_RISCV_ISA_EXT_ZKT:
- case KVM_RISCV_ISA_EXT_ZTSO:
- case KVM_RISCV_ISA_EXT_ZVBB:
- case KVM_RISCV_ISA_EXT_ZVBC:
- case KVM_RISCV_ISA_EXT_ZVFBFMIN:
- case KVM_RISCV_ISA_EXT_ZVFBFWMA:
- case KVM_RISCV_ISA_EXT_ZVFH:
- case KVM_RISCV_ISA_EXT_ZVFHMIN:
- case KVM_RISCV_ISA_EXT_ZVKB:
- case KVM_RISCV_ISA_EXT_ZVKG:
- case KVM_RISCV_ISA_EXT_ZVKNED:
- case KVM_RISCV_ISA_EXT_ZVKNHA:
- case KVM_RISCV_ISA_EXT_ZVKNHB:
- case KVM_RISCV_ISA_EXT_ZVKSED:
- case KVM_RISCV_ISA_EXT_ZVKSH:
- case KVM_RISCV_ISA_EXT_ZVKT:
- return false;
- /* Extensions which can be disabled using Smstateen */
- case KVM_RISCV_ISA_EXT_SSAIA:
- return riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN);
- case KVM_RISCV_ISA_EXT_SVADE:
- /*
- * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero.
- * Svade can't be disabled unless we support Svadu.
- */
- return arch_has_hw_pte_young();
- default:
- break;
- }
-
- return true;
-}
-
void kvm_riscv_vcpu_setup_isa(struct kvm_vcpu *vcpu)
{
unsigned long guest_ext, i;
- for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) {
- if (kvm_riscv_vcpu_isa_check_host(i, &guest_ext))
+ for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
+ if (__kvm_riscv_isa_check_host(i, &guest_ext))
continue;
- if (kvm_riscv_vcpu_isa_enable_allowed(i))
+ if (kvm_riscv_isa_enable_allowed(i))
set_bit(guest_ext, vcpu->arch.isa);
}
}
@@ -290,17 +50,17 @@ static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK;
break;
case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
- if (!riscv_isa_extension_available(NULL, ZICBOM))
+ if (kvm_riscv_isa_check_host(ZICBOM))
return -ENOENT;
reg_val = riscv_cbom_block_size;
break;
case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
- if (!riscv_isa_extension_available(NULL, ZICBOZ))
+ if (kvm_riscv_isa_check_host(ZICBOZ))
return -ENOENT;
reg_val = riscv_cboz_block_size;
break;
case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size):
- if (!riscv_isa_extension_available(NULL, ZICBOP))
+ if (kvm_riscv_isa_check_host(ZICBOP))
return -ENOENT;
reg_val = riscv_cbop_block_size;
break;
@@ -361,15 +121,15 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
if (!vcpu->arch.ran_atleast_once) {
/* Ignore the enable/disable request for certain extensions */
for (i = 0; i < RISCV_ISA_EXT_BASE; i++) {
- isa_ext = kvm_riscv_vcpu_base2isa_ext(i);
+ isa_ext = kvm_riscv_base2isa_ext(i);
if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) {
reg_val &= ~BIT(i);
continue;
}
- if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext))
+ if (!kvm_riscv_isa_enable_allowed(isa_ext))
if (reg_val & BIT(i))
reg_val &= ~BIT(i);
- if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext))
+ if (!kvm_riscv_isa_disable_allowed(isa_ext))
if (!(reg_val & BIT(i)))
reg_val |= BIT(i);
}
@@ -384,19 +144,19 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
}
break;
case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
- if (!riscv_isa_extension_available(NULL, ZICBOM))
+ if (kvm_riscv_isa_check_host(ZICBOM))
return -ENOENT;
if (reg_val != riscv_cbom_block_size)
return -EINVAL;
break;
case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
- if (!riscv_isa_extension_available(NULL, ZICBOZ))
+ if (kvm_riscv_isa_check_host(ZICBOZ))
return -ENOENT;
if (reg_val != riscv_cboz_block_size)
return -EINVAL;
break;
case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size):
- if (!riscv_isa_extension_available(NULL, ZICBOP))
+ if (kvm_riscv_isa_check_host(ZICBOP))
return -ENOENT;
if (reg_val != riscv_cbop_block_size)
return -EINVAL;
@@ -670,6 +430,8 @@ static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
if (rc)
return rc;
+ vcpu->arch.csr_dirty = true;
+
return 0;
}
@@ -680,7 +442,7 @@ static int riscv_vcpu_get_isa_ext_single(struct kvm_vcpu *vcpu,
unsigned long guest_ext;
int ret;
- ret = kvm_riscv_vcpu_isa_check_host(reg_num, &guest_ext);
+ ret = __kvm_riscv_isa_check_host(reg_num, &guest_ext);
if (ret)
return ret;
@@ -698,7 +460,7 @@ static int riscv_vcpu_set_isa_ext_single(struct kvm_vcpu *vcpu,
unsigned long guest_ext;
int ret;
- ret = kvm_riscv_vcpu_isa_check_host(reg_num, &guest_ext);
+ ret = __kvm_riscv_isa_check_host(reg_num, &guest_ext);
if (ret)
return ret;
@@ -711,10 +473,10 @@ static int riscv_vcpu_set_isa_ext_single(struct kvm_vcpu *vcpu,
* extension can be disabled
*/
if (reg_val == 1 &&
- kvm_riscv_vcpu_isa_enable_allowed(reg_num))
+ kvm_riscv_isa_enable_allowed(reg_num))
set_bit(guest_ext, vcpu->arch.isa);
else if (!reg_val &&
- kvm_riscv_vcpu_isa_disable_allowed(reg_num))
+ kvm_riscv_isa_disable_allowed(reg_num))
clear_bit(guest_ext, vcpu->arch.isa);
else
return -EINVAL;
@@ -857,13 +619,13 @@ static int copy_config_reg_indices(const struct kvm_vcpu *vcpu,
* was not available.
*/
if (i == KVM_REG_RISCV_CONFIG_REG(zicbom_block_size) &&
- !riscv_isa_extension_available(NULL, ZICBOM))
+ kvm_riscv_isa_check_host(ZICBOM))
continue;
else if (i == KVM_REG_RISCV_CONFIG_REG(zicboz_block_size) &&
- !riscv_isa_extension_available(NULL, ZICBOZ))
+ kvm_riscv_isa_check_host(ZICBOZ))
continue;
else if (i == KVM_REG_RISCV_CONFIG_REG(zicbop_block_size) &&
- !riscv_isa_extension_available(NULL, ZICBOP))
+ kvm_riscv_isa_check_host(ZICBOP))
continue;
size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
@@ -1084,7 +846,7 @@ static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu,
KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_ISA_EXT | i;
- if (kvm_riscv_vcpu_isa_check_host(i, &guest_ext))
+ if (__kvm_riscv_isa_check_host(i, &guest_ext))
continue;
if (uindices) {
diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c
index e873430e596b..18ef07b3f13a 100644
--- a/arch/riscv/kvm/vcpu_pmu.c
+++ b/arch/riscv/kvm/vcpu_pmu.c
@@ -7,16 +7,17 @@
*/
#define pr_fmt(fmt) "riscv-kvm-pmu: " fmt
+#include <linux/bitops.h>
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kvm_host.h>
#include <linux/nospec.h>
#include <linux/perf/riscv_pmu.h>
#include <asm/csr.h>
+#include <asm/kvm_isa.h>
#include <asm/kvm_vcpu_sbi.h>
#include <asm/kvm_vcpu_pmu.h>
#include <asm/sbi.h>
-#include <linux/bitops.h>
#define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
#define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16)
@@ -226,7 +227,14 @@ static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW)
return -EINVAL;
+ if (pmc->event_idx == SBI_PMU_EVENT_IDX_INVALID)
+ return -EINVAL;
+
fevent_code = get_event_code(pmc->event_idx);
+ if (WARN_ONCE(fevent_code >= SBI_PMU_FW_MAX,
+ "Invalid firmware event code: %d\n", fevent_code))
+ return -EINVAL;
+
pmc->counter_val = kvpmu->fw_event[fevent_code].value;
*out_val = pmc->counter_val >> 32;
@@ -251,7 +259,14 @@ static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
pmc = &kvpmu->pmc[cidx];
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
+ if (pmc->event_idx == SBI_PMU_EVENT_IDX_INVALID)
+ return -EINVAL;
+
fevent_code = get_event_code(pmc->event_idx);
+ if (WARN_ONCE(fevent_code >= SBI_PMU_FW_MAX,
+ "Invalid firmware event code: %d\n", fevent_code))
+ return -EINVAL;
+
pmc->counter_val = kvpmu->fw_event[fevent_code].value;
} else if (pmc->perf_event) {
pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
@@ -266,8 +281,10 @@ static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base,
unsigned long ctr_mask)
{
- /* Make sure the we have a valid counter mask requested from the caller */
- if (!ctr_mask || (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu)))
+ unsigned long num_ctrs = kvm_pmu_num_counters(kvpmu);
+
+ /* Make sure we have a valid counter mask requested from the caller */
+ if (!ctr_mask || ctr_base >= num_ctrs || (ctr_base + __fls(ctr_mask) >= num_ctrs))
return -EINVAL;
return 0;
@@ -427,11 +444,12 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s
saddr = saddr_low;
if (saddr_high != 0) {
- if (IS_ENABLED(CONFIG_32BIT))
+ if (IS_ENABLED(CONFIG_32BIT)) {
saddr |= ((gpa_t)saddr_high << 32);
- else
+ } else {
sbiret = SBI_ERR_INVALID_ADDRESS;
- goto out;
+ goto out;
+ }
}
kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
@@ -441,6 +459,7 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s
/* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */
if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
kfree(kvpmu->sdata);
+ kvpmu->sdata = NULL;
sbiret = SBI_ERR_INVALID_ADDRESS;
goto out;
}
@@ -827,7 +846,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
* filtering is available in the host. Otherwise, guest will always count
* events while the execution is in hypervisor mode.
*/
- if (!riscv_isa_extension_available(NULL, SSCOFPMF))
+ if (kvm_riscv_isa_check_host(SSCOFPMF))
return;
ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs);
diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c
index afa0545c3bcf..3b834709b429 100644
--- a/arch/riscv/kvm/vcpu_sbi_sta.c
+++ b/arch/riscv/kvm/vcpu_sbi_sta.c
@@ -181,6 +181,7 @@ static int kvm_sbi_ext_sta_set_reg(struct kvm_vcpu *vcpu, unsigned long reg_num,
unsigned long reg_size, const void *reg_val)
{
unsigned long value;
+ gpa_t new_shmem = INVALID_GPA;
if (reg_size != sizeof(unsigned long))
return -EINVAL;
@@ -191,18 +192,18 @@ static int kvm_sbi_ext_sta_set_reg(struct kvm_vcpu *vcpu, unsigned long reg_num,
if (IS_ENABLED(CONFIG_32BIT)) {
gpa_t hi = upper_32_bits(vcpu->arch.sta.shmem);
- vcpu->arch.sta.shmem = value;
- vcpu->arch.sta.shmem |= hi << 32;
+ new_shmem = value;
+ new_shmem |= hi << 32;
} else {
- vcpu->arch.sta.shmem = value;
+ new_shmem = value;
}
break;
case KVM_REG_RISCV_SBI_STA_REG(shmem_hi):
if (IS_ENABLED(CONFIG_32BIT)) {
gpa_t lo = lower_32_bits(vcpu->arch.sta.shmem);
- vcpu->arch.sta.shmem = ((gpa_t)value << 32);
- vcpu->arch.sta.shmem |= lo;
+ new_shmem = ((gpa_t)value << 32);
+ new_shmem |= lo;
} else if (value != 0) {
return -EINVAL;
}
@@ -211,6 +212,11 @@ static int kvm_sbi_ext_sta_set_reg(struct kvm_vcpu *vcpu, unsigned long reg_num,
return -ENOENT;
}
+ if (new_shmem != INVALID_GPA && !IS_ALIGNED(new_shmem, 64))
+ return -EINVAL;
+
+ vcpu->arch.sta.shmem = new_shmem;
+
return 0;
}
diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c
index f36247e4c783..9817ff802821 100644
--- a/arch/riscv/kvm/vcpu_timer.c
+++ b/arch/riscv/kvm/vcpu_timer.c
@@ -12,6 +12,7 @@
#include <linux/uaccess.h>
#include <clocksource/timer-riscv.h>
#include <asm/delay.h>
+#include <asm/kvm_isa.h>
#include <asm/kvm_nacl.h>
#include <asm/kvm_vcpu_timer.h>
@@ -253,7 +254,7 @@ int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu)
t->next_set = false;
/* Enable sstc for every vcpu if available in hardware */
- if (riscv_isa_extension_available(NULL, SSTC)) {
+ if (!kvm_riscv_isa_check_host(SSTC)) {
t->sstc_enabled = true;
hrtimer_setup(&t->hrt, kvm_riscv_vcpu_vstimer_expired, CLOCK_MONOTONIC,
HRTIMER_MODE_REL);
diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c
index 05f3cc2d8e31..62d2fb77bb9b 100644
--- a/arch/riscv/kvm/vcpu_vector.c
+++ b/arch/riscv/kvm/vcpu_vector.c
@@ -12,6 +12,7 @@
#include <linux/kvm_host.h>
#include <linux/uaccess.h>
#include <asm/cpufeature.h>
+#include <asm/kvm_isa.h>
#include <asm/kvm_vcpu_vector.h>
#include <asm/vector.h>
@@ -63,13 +64,13 @@ void kvm_riscv_vcpu_guest_vector_restore(struct kvm_cpu_context *cntx,
void kvm_riscv_vcpu_host_vector_save(struct kvm_cpu_context *cntx)
{
/* No need to check host sstatus as it can be modified outside */
- if (riscv_isa_extension_available(NULL, v))
+ if (!kvm_riscv_isa_check_host(V))
__kvm_riscv_vector_save(cntx);
}
void kvm_riscv_vcpu_host_vector_restore(struct kvm_cpu_context *cntx)
{
- if (riscv_isa_extension_available(NULL, v))
+ if (!kvm_riscv_isa_check_host(V))
__kvm_riscv_vector_restore(cntx);
}
@@ -80,8 +81,11 @@ int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu)
return -ENOMEM;
vcpu->arch.host_context.vector.datap = kzalloc(riscv_v_vsize, GFP_KERNEL);
- if (!vcpu->arch.host_context.vector.datap)
+ if (!vcpu->arch.host_context.vector.datap) {
+ kfree(vcpu->arch.guest_context.vector.datap);
+ vcpu->arch.guest_context.vector.datap = NULL;
return -ENOMEM;
+ }
return 0;
}
@@ -127,6 +131,7 @@ static int kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu,
} else if (reg_num <= KVM_REG_RISCV_VECTOR_REG(31)) {
if (reg_size != vlenb)
return -EINVAL;
+ WARN_ON(!cntx->vector.datap);
*reg_addr = cntx->vector.datap +
(reg_num - KVM_REG_RISCV_VECTOR_REG(0)) * vlenb;
} else {
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index 13c63ae1a78b..a9f083feeb76 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -199,7 +199,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_USER_MEM_SLOTS;
break;
case KVM_CAP_VM_GPA_BITS:
- r = kvm_riscv_gstage_gpa_bits;
+ if (!kvm)
+ r = kvm_riscv_gstage_gpa_bits(kvm_riscv_gstage_max_pgd_levels);
+ else
+ r = kvm_riscv_gstage_gpa_bits(kvm->arch.pgd_levels);
break;
default:
r = 0;
@@ -211,12 +214,52 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
{
+ if (cap->flags)
+ return -EINVAL;
+
switch (cap->cap) {
case KVM_CAP_RISCV_MP_STATE_RESET:
- if (cap->flags)
- return -EINVAL;
kvm->arch.mp_state_reset = true;
return 0;
+ case KVM_CAP_VM_GPA_BITS: {
+ unsigned long gpa_bits = cap->args[0];
+ unsigned long new_levels;
+ int r = 0;
+
+ /* Decide target pgd levels from requested gpa_bits */
+#ifdef CONFIG_64BIT
+ if (gpa_bits <= 41)
+ new_levels = 3; /* Sv39x4 */
+ else if (gpa_bits <= 50)
+ new_levels = 4; /* Sv48x4 */
+ else if (gpa_bits <= 59)
+ new_levels = 5; /* Sv57x4 */
+ else
+ return -EINVAL;
+#else
+ /* 32-bit: only Sv32x4*/
+ if (gpa_bits <= 34)
+ new_levels = 2;
+ else
+ return -EINVAL;
+#endif
+ if (new_levels > kvm_riscv_gstage_max_pgd_levels)
+ return -EINVAL;
+
+ /* Follow KVM's lock ordering: kvm->lock -> kvm->slots_lock. */
+ mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->slots_lock);
+
+ if (kvm->created_vcpus || !kvm_are_all_memslots_empty(kvm))
+ r = -EBUSY;
+ else
+ kvm->arch.pgd_levels = new_levels;
+
+ mutex_unlock(&kvm->slots_lock);
+ mutex_unlock(&kvm->lock);
+
+ return r;
+ }
default:
return -EINVAL;
}
diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
index cf34d448289d..c15bdb1dd8be 100644
--- a/arch/riscv/kvm/vmid.c
+++ b/arch/riscv/kvm/vmid.c
@@ -26,7 +26,8 @@ static DEFINE_SPINLOCK(vmid_lock);
void __init kvm_riscv_gstage_vmid_detect(void)
{
/* Figure-out number of VMID bits in HW */
- csr_write(CSR_HGATP, (kvm_riscv_gstage_mode << HGATP_MODE_SHIFT) | HGATP_VMID);
+ csr_write(CSR_HGATP, (kvm_riscv_gstage_mode(kvm_riscv_gstage_max_pgd_levels) <<
+ HGATP_MODE_SHIFT) | HGATP_VMID);
vmid_bits = csr_read(CSR_HGATP);
vmid_bits = (vmid_bits & HGATP_VMID) >> HGATP_VMID_SHIFT;
vmid_bits = fls_long(vmid_bits);
diff --git a/tools/testing/selftests/kvm/include/kvm_util_types.h b/tools/testing/selftests/kvm/include/kvm_util_types.h
index ec787b97cf18..0366e9bce7f9 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_types.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_types.h
@@ -17,4 +17,6 @@
typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+#define INVALID_GPA (~(uint64_t)0)
+
#endif /* SELFTEST_KVM_UTIL_TYPES_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/sbi.h b/tools/testing/selftests/kvm/include/riscv/sbi.h
index 046b432ae896..16f1815ac48f 100644
--- a/tools/testing/selftests/kvm/include/riscv/sbi.h
+++ b/tools/testing/selftests/kvm/include/riscv/sbi.h
@@ -97,6 +97,43 @@ enum sbi_pmu_hw_generic_events_t {
SBI_PMU_HW_GENERAL_MAX,
};
+enum sbi_pmu_fw_generic_events_t {
+ SBI_PMU_FW_MISALIGNED_LOAD = 0,
+ SBI_PMU_FW_MISALIGNED_STORE = 1,
+ SBI_PMU_FW_ACCESS_LOAD = 2,
+ SBI_PMU_FW_ACCESS_STORE = 3,
+ SBI_PMU_FW_ILLEGAL_INSN = 4,
+ SBI_PMU_FW_SET_TIMER = 5,
+ SBI_PMU_FW_IPI_SENT = 6,
+ SBI_PMU_FW_IPI_RCVD = 7,
+ SBI_PMU_FW_FENCE_I_SENT = 8,
+ SBI_PMU_FW_FENCE_I_RCVD = 9,
+ SBI_PMU_FW_SFENCE_VMA_SENT = 10,
+ SBI_PMU_FW_SFENCE_VMA_RCVD = 11,
+ SBI_PMU_FW_SFENCE_VMA_ASID_SENT = 12,
+ SBI_PMU_FW_SFENCE_VMA_ASID_RCVD = 13,
+
+ SBI_PMU_FW_HFENCE_GVMA_SENT = 14,
+ SBI_PMU_FW_HFENCE_GVMA_RCVD = 15,
+ SBI_PMU_FW_HFENCE_GVMA_VMID_SENT = 16,
+ SBI_PMU_FW_HFENCE_GVMA_VMID_RCVD = 17,
+
+ SBI_PMU_FW_HFENCE_VVMA_SENT = 18,
+ SBI_PMU_FW_HFENCE_VVMA_RCVD = 19,
+ SBI_PMU_FW_HFENCE_VVMA_ASID_SENT = 20,
+ SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD = 21,
+ SBI_PMU_FW_MAX,
+};
+
+/* SBI PMU event types */
+enum sbi_pmu_event_type {
+ SBI_PMU_EVENT_TYPE_HW = 0x0,
+ SBI_PMU_EVENT_TYPE_CACHE = 0x1,
+ SBI_PMU_EVENT_TYPE_RAW = 0x2,
+ SBI_PMU_EVENT_TYPE_RAW_V2 = 0x3,
+ SBI_PMU_EVENT_TYPE_FW = 0xf,
+};
+
/* SBI PMU counter types */
enum sbi_pmu_ctr_type {
SBI_PMU_CTR_TYPE_HW = 0x0,
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index 51dd455ff52c..067c6b2c15b0 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -566,3 +566,8 @@ unsigned long riscv64_get_satp_mode(void)
return val;
}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return kvm_check_cap(KVM_CAP_IRQCHIP);
+}
diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
index 924a335d2262..cec1621ace23 100644
--- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
+++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
@@ -436,6 +436,7 @@ static void test_pmu_basic_sanity(void)
struct sbiret ret;
int num_counters = 0, i;
union sbi_pmu_ctr_info ctrinfo;
+ unsigned long fw_eidx;
probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val);
GUEST_ASSERT(probe && out_val == 1);
@@ -461,7 +462,24 @@ static void test_pmu_basic_sanity(void)
pmu_csr_read_num(ctrinfo.csr);
GUEST_ASSERT(illegal_handler_invoked);
} else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW) {
- read_fw_counter(i, ctrinfo);
+ /* Read without configure should fail */
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ,
+ i, 0, 0, 0, 0, 0);
+ GUEST_ASSERT(ret.error == SBI_ERR_INVALID_PARAM);
+
+ /*
+ * Try to configure with a common firmware event.
+ * If configuration succeeds, verify we can read it.
+ */
+ fw_eidx = ((unsigned long)SBI_PMU_EVENT_TYPE_FW << 16) |
+ SBI_PMU_FW_ACCESS_LOAD;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH,
+ i, 1, 0, fw_eidx, 0, 0);
+ if (ret.error == 0) {
+ GUEST_ASSERT(ret.value == i);
+ read_fw_counter(i, ctrinfo);
+ }
}
}
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index 7be8adfe5dd3..efe56a10d13e 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -69,16 +69,10 @@ static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
{
- int ret;
-
/* ST_GPA_BASE is identity mapped */
st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
sync_global_to_guest(vcpu->vm, st_gva[i]);
- ret = _vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME,
- (ulong)st_gva[i] | KVM_STEAL_RESERVED_MASK);
- TEST_ASSERT(ret == 0, "Bad GPA didn't fail");
-
vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED);
}
@@ -99,6 +93,21 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
st->pad[8], st->pad[9], st->pad[10]);
}
+static void check_steal_time_uapi(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ int ret;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ ret = _vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME,
+ (ulong)ST_GPA_BASE | KVM_STEAL_RESERVED_MASK);
+ TEST_ASSERT(ret == 0, "Bad GPA didn't fail");
+
+ kvm_vm_free(vm);
+}
+
#elif defined(__aarch64__)
/* PV_TIME_ST must have 64-byte alignment */
@@ -170,7 +179,6 @@ static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
{
struct kvm_vm *vm = vcpu->vm;
uint64_t st_ipa;
- int ret;
struct kvm_device_attr dev = {
.group = KVM_ARM_VCPU_PVTIME_CTRL,
@@ -178,21 +186,12 @@ static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
.addr = (uint64_t)&st_ipa,
};
- vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev);
-
/* ST_GPA_BASE is identity mapped */
st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
sync_global_to_guest(vm, st_gva[i]);
- st_ipa = (ulong)st_gva[i] | 1;
- ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
- TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL");
-
st_ipa = (ulong)st_gva[i];
vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
-
- ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
- TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST");
}
static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
@@ -205,6 +204,36 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
ksft_print_msg(" st_time: %ld\n", st->st_time);
}
+static void check_steal_time_uapi(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ uint64_t st_ipa;
+ int ret;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ struct kvm_device_attr dev = {
+ .group = KVM_ARM_VCPU_PVTIME_CTRL,
+ .attr = KVM_ARM_VCPU_PVTIME_IPA,
+ .addr = (uint64_t)&st_ipa,
+ };
+
+ vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev);
+
+ st_ipa = (ulong)ST_GPA_BASE | 1;
+ ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
+ TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL");
+
+ st_ipa = (ulong)ST_GPA_BASE;
+ vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
+
+ ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
+ TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST");
+
+ kvm_vm_free(vm);
+}
+
#elif defined(__riscv)
/* SBI STA shmem must have 64-byte alignment */
@@ -301,6 +330,41 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
pr_info("\n");
}
+static void check_steal_time_uapi(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ struct kvm_one_reg reg;
+ uint64_t shmem;
+ int ret;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ reg.id = KVM_REG_RISCV |
+ KVM_REG_SIZE_ULONG |
+ KVM_REG_RISCV_SBI_STATE |
+ KVM_REG_RISCV_SBI_STA |
+ KVM_REG_RISCV_SBI_STA_REG(shmem_lo);
+ reg.addr = (uint64_t)&shmem;
+
+ shmem = ST_GPA_BASE + 1;
+ ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "misaligned STA shmem returns -EINVAL");
+
+ shmem = ST_GPA_BASE;
+ ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+ TEST_ASSERT(ret == 0,
+ "aligned STA shmem succeeds");
+
+ shmem = INVALID_GPA;
+ ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+ TEST_ASSERT(ret == 0,
+ "all-ones for STA shmem succeeds");
+
+ kvm_vm_free(vm);
+}
+
#elif defined(__loongarch__)
/* steal_time must have 64-byte alignment */
@@ -465,6 +529,8 @@ int main(int ac, char **av)
TEST_REQUIRE(is_steal_time_supported(vcpus[0]));
ksft_set_plan(NR_VCPUS);
+ check_steal_time_uapi();
+
/* Run test on each VCPU */
for (i = 0; i < NR_VCPUS; ++i) {
steal_time_init(vcpus[i], i);