diff options
Diffstat (limited to 'arch/riscv/kvm')
-rw-r--r-- | arch/riscv/kvm/Makefile | 10 | ||||
-rw-r--r-- | arch/riscv/kvm/main.c | 8 | ||||
-rw-r--r-- | arch/riscv/kvm/mmu.c | 102 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu.c | 28 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_exit.c | 2 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_fp.c | 2 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi.c | 213 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi_base.c | 99 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi_hsm.c | 105 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi_replace.c | 135 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi_v01.c | 126 | ||||
-rw-r--r-- | arch/riscv/kvm/vm.c | 13 | ||||
-rw-r--r-- | arch/riscv/kvm/vmid.c | 2 |
13 files changed, 645 insertions, 200 deletions
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index 30cdd1df0098..e5c56182f48f 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -5,14 +5,10 @@ ccflags-y += -I $(srctree)/$(src) -KVM := ../../../virt/kvm +include $(srctree)/virt/kvm/Makefile.kvm obj-$(CONFIG_KVM) += kvm.o -kvm-y += $(KVM)/kvm_main.o -kvm-y += $(KVM)/coalesced_mmio.o -kvm-y += $(KVM)/binary_stats.o -kvm-y += $(KVM)/eventfd.o kvm-y += main.o kvm-y += vm.o kvm-y += vmid.o @@ -23,4 +19,8 @@ kvm-y += vcpu_exit.o kvm-y += vcpu_fp.o kvm-y += vcpu_switch.o kvm-y += vcpu_sbi.o +kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o +kvm-y += vcpu_sbi_base.o +kvm-y += vcpu_sbi_replace.o +kvm-y += vcpu_sbi_hsm.o kvm-y += vcpu_timer.o diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 421ecf4e6360..2e5ca43c8c49 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -58,6 +58,14 @@ int kvm_arch_hardware_enable(void) void kvm_arch_hardware_disable(void) { + /* + * After clearing the hideleg CSR, the host kernel will receive + * spurious interrupts if hvip CSR has pending interrupts and the + * corresponding enable bits in vsie CSR are asserted. To avoid it, + * hvip CSR and vsie CSR must be cleared before clearing hideleg CSR. + */ + csr_write(CSR_VSIE, 0); + csr_write(CSR_HVIP, 0); csr_write(CSR_HEDELEG, 0); csr_write(CSR_HIDELEG, 0); } diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index fc058ff5f4b6..9af67dbdc66a 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -83,43 +83,6 @@ static int stage2_level_to_page_size(u32 level, unsigned long *out_pgsize) return 0; } -static int stage2_cache_topup(struct kvm_mmu_page_cache *pcache, - int min, int max) -{ - void *page; - - BUG_ON(max > KVM_MMU_PAGE_CACHE_NR_OBJS); - if (pcache->nobjs >= min) - return 0; - while (pcache->nobjs < max) { - page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - if (!page) - return -ENOMEM; - pcache->objects[pcache->nobjs++] = page; - } - - return 0; -} - -static void stage2_cache_flush(struct kvm_mmu_page_cache *pcache) -{ - while (pcache && pcache->nobjs) - free_page((unsigned long)pcache->objects[--pcache->nobjs]); -} - -static void *stage2_cache_alloc(struct kvm_mmu_page_cache *pcache) -{ - void *p; - - if (!pcache) - return NULL; - - BUG_ON(!pcache->nobjs); - p = pcache->objects[--pcache->nobjs]; - - return p; -} - static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr, pte_t **ptepp, u32 *ptep_level) { @@ -171,7 +134,7 @@ static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr) } static int stage2_set_pte(struct kvm *kvm, u32 level, - struct kvm_mmu_page_cache *pcache, + struct kvm_mmu_memory_cache *pcache, gpa_t addr, const pte_t *new_pte) { u32 current_level = stage2_pgd_levels - 1; @@ -186,7 +149,9 @@ static int stage2_set_pte(struct kvm *kvm, u32 level, return -EEXIST; if (!pte_val(*ptep)) { - next_ptep = stage2_cache_alloc(pcache); + if (!pcache) + return -ENOMEM; + next_ptep = kvm_mmu_memory_cache_alloc(pcache); if (!next_ptep) return -ENOMEM; *ptep = pfn_pte(PFN_DOWN(__pa(next_ptep)), @@ -209,7 +174,7 @@ static int stage2_set_pte(struct kvm *kvm, u32 level, } static int stage2_map_page(struct kvm *kvm, - struct kvm_mmu_page_cache *pcache, + struct kvm_mmu_memory_cache *pcache, gpa_t gpa, phys_addr_t hpa, unsigned long page_size, bool page_rdonly, bool page_exec) @@ -384,7 +349,10 @@ static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa, int ret = 0; unsigned long pfn; phys_addr_t addr, end; - struct kvm_mmu_page_cache pcache = { 0, }; + struct kvm_mmu_memory_cache pcache; + + memset(&pcache, 0, sizeof(pcache)); + pcache.gfp_zero = __GFP_ZERO; end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK; pfn = __phys_to_pfn(hpa); @@ -395,9 +363,7 @@ static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa, if (!writable) pte = pte_wrprotect(pte); - ret = stage2_cache_topup(&pcache, - stage2_pgd_levels, - KVM_MMU_PAGE_CACHE_NR_OBJS); + ret = kvm_mmu_topup_memory_cache(&pcache, stage2_pgd_levels); if (ret) goto out; @@ -411,7 +377,7 @@ static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa, } out: - stage2_cache_flush(&pcache); + kvm_mmu_free_memory_cache(&pcache); return ret; } @@ -462,7 +428,6 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, } void kvm_arch_commit_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem, struct kvm_memory_slot *old, const struct kvm_memory_slot *new, enum kvm_mr_change change) @@ -472,18 +437,18 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * allocated dirty_bitmap[], dirty pages will be tracked while * the memory slot is write protected. */ - if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) - stage2_wp_memory_region(kvm, mem->slot); + if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES) + stage2_wp_memory_region(kvm, new->id); } int kvm_arch_prepare_memory_region(struct kvm *kvm, - struct kvm_memory_slot *memslot, - const struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + struct kvm_memory_slot *new, enum kvm_mr_change change) { - hva_t hva = mem->userspace_addr; - hva_t reg_end = hva + mem->memory_size; - bool writable = !(mem->flags & KVM_MEM_READONLY); + hva_t hva, reg_end, size; + gpa_t base_gpa; + bool writable; int ret = 0; if (change != KVM_MR_CREATE && change != KVM_MR_MOVE && @@ -494,10 +459,16 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, * Prevent userspace from creating a memory region outside of the GPA * space addressable by the KVM guest GPA space. */ - if ((memslot->base_gfn + memslot->npages) >= + if ((new->base_gfn + new->npages) >= (stage2_gpa_size >> PAGE_SHIFT)) return -EFAULT; + hva = new->userspace_addr; + size = new->npages << PAGE_SHIFT; + reg_end = hva + size; + base_gpa = new->base_gfn << PAGE_SHIFT; + writable = !(new->flags & KVM_MEM_READONLY); + mmap_read_lock(current->mm); /* @@ -533,15 +504,14 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, vm_end = min(reg_end, vma->vm_end); if (vma->vm_flags & VM_PFNMAP) { - gpa_t gpa = mem->guest_phys_addr + - (vm_start - mem->userspace_addr); + gpa_t gpa = base_gpa + (vm_start - hva); phys_addr_t pa; pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; pa += vm_start - vma->vm_start; /* IO region dirty page logging not allowed */ - if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) { + if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) { ret = -EINVAL; goto out; } @@ -559,8 +529,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, spin_lock(&kvm->mmu_lock); if (ret) - stage2_unmap_range(kvm, mem->guest_phys_addr, - mem->memory_size, false); + stage2_unmap_range(kvm, base_gpa, size, false); spin_unlock(&kvm->mmu_lock); out: @@ -646,7 +615,7 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, gfn_t gfn = gpa >> PAGE_SHIFT; struct vm_area_struct *vma; struct kvm *kvm = vcpu->kvm; - struct kvm_mmu_page_cache *pcache = &vcpu->arch.mmu_page_cache; + struct kvm_mmu_memory_cache *pcache = &vcpu->arch.mmu_page_cache; bool logging = (memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY)) ? true : false; unsigned long vma_pagesize, mmu_seq; @@ -681,8 +650,7 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, } /* We need minimum second+third level pages */ - ret = stage2_cache_topup(pcache, stage2_pgd_levels, - KVM_MMU_PAGE_CACHE_NR_OBJS); + ret = kvm_mmu_topup_memory_cache(pcache, stage2_pgd_levels); if (ret) { kvm_err("Failed to topup stage2 cache\n"); return ret; @@ -731,11 +699,6 @@ out_unlock: return ret; } -void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu) -{ - stage2_cache_flush(&vcpu->arch.mmu_page_cache); -} - int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm) { struct page *pgd_page; @@ -806,3 +769,8 @@ unsigned long kvm_riscv_stage2_mode(void) { return stage2_mode >> HGATP_MODE_SHIFT; } + +int kvm_riscv_stage2_gpa_bits(void) +{ + return stage2_gpa_bits; +} diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index fb84619df012..0c5239e05721 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -53,6 +53,17 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr; struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context; + bool loaded; + + /** + * The preemption should be disabled here because it races with + * kvm_sched_out/kvm_sched_in(called from preempt notifiers) which + * also calls vcpu_load/put. + */ + get_cpu(); + loaded = (vcpu->cpu != -1); + if (loaded) + kvm_arch_vcpu_put(vcpu); memcpy(csr, reset_csr, sizeof(*csr)); @@ -64,6 +75,11 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) WRITE_ONCE(vcpu->arch.irqs_pending, 0); WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0); + + /* Reset the guest CSRs for hotplug usecase */ + if (loaded) + kvm_arch_vcpu_load(vcpu, smp_processor_id()); + put_cpu(); } int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) @@ -77,6 +93,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) /* Mark this VCPU never ran */ vcpu->arch.ran_atleast_once = false; + vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; /* Setup ISA features available to VCPU */ vcpu->arch.isa = riscv_isa_extension_base(NULL) & KVM_RISCV_ISA_ALLOWED; @@ -100,6 +117,13 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) { + /** + * vcpu with id 0 is the designated boot cpu. + * Keep all vcpus with non-zero id in power-off state so that + * they can be brought up using SBI HSM extension. + */ + if (vcpu->vcpu_idx != 0) + kvm_riscv_vcpu_power_off(vcpu); } void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -107,8 +131,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) /* Cleanup VCPU timer */ kvm_riscv_vcpu_timer_deinit(vcpu); - /* Flush the pages pre-allocated for Stage2 page table mappings */ - kvm_riscv_stage2_flush_cache(vcpu); + /* Free unused pages pre-allocated for Stage2 page table mappings */ + kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); } int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c index 7f2d742ae4c6..571f319e995a 100644 --- a/arch/riscv/kvm/vcpu_exit.c +++ b/arch/riscv/kvm/vcpu_exit.c @@ -146,7 +146,7 @@ static int system_opcode_insn(struct kvm_vcpu *vcpu, vcpu->stat.wfi_exit_stat++; if (!kvm_arch_vcpu_runnable(vcpu)) { srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx); - kvm_vcpu_block(vcpu); + kvm_vcpu_halt(vcpu); vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); kvm_clear_request(KVM_REQ_UNHALT, vcpu); } diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c index 1b070152578f..4449a976e5a6 100644 --- a/arch/riscv/kvm/vcpu_fp.c +++ b/arch/riscv/kvm/vcpu_fp.c @@ -26,7 +26,7 @@ void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu) cntx->sstatus |= SR_FS_OFF; } -void kvm_riscv_vcpu_fp_clean(struct kvm_cpu_context *cntx) +static void kvm_riscv_vcpu_fp_clean(struct kvm_cpu_context *cntx) { cntx->sstatus &= ~SR_FS; cntx->sstatus |= SR_FS_CLEAN; diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index 3b0e703d22cf..78aa3db76225 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -9,15 +9,58 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> -#include <asm/csr.h> #include <asm/sbi.h> -#include <asm/kvm_vcpu_timer.h> +#include <asm/kvm_vcpu_sbi.h> -#define SBI_VERSION_MAJOR 0 -#define SBI_VERSION_MINOR 1 +static int kvm_linux_err_map_sbi(int err) +{ + switch (err) { + case 0: + return SBI_SUCCESS; + case -EPERM: + return SBI_ERR_DENIED; + case -EINVAL: + return SBI_ERR_INVALID_PARAM; + case -EFAULT: + return SBI_ERR_INVALID_ADDRESS; + case -EOPNOTSUPP: + return SBI_ERR_NOT_SUPPORTED; + case -EALREADY: + return SBI_ERR_ALREADY_AVAILABLE; + default: + return SBI_ERR_FAILURE; + }; +} -static void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, - struct kvm_run *run) +#ifdef CONFIG_RISCV_SBI_V01 +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01; +#else +static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = { + .extid_start = -1UL, + .extid_end = -1UL, + .handler = NULL, +}; +#endif +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; + +static const struct kvm_vcpu_sbi_extension *sbi_ext[] = { + &vcpu_sbi_ext_v01, + &vcpu_sbi_ext_base, + &vcpu_sbi_ext_time, + &vcpu_sbi_ext_ipi, + &vcpu_sbi_ext_rfence, + &vcpu_sbi_ext_hsm, + &vcpu_sbi_ext_experimental, + &vcpu_sbi_ext_vendor, +}; + +void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run) { struct kvm_cpu_context *cp = &vcpu->arch.guest_context; @@ -55,131 +98,73 @@ int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run) return 0; } -#ifdef CONFIG_RISCV_SBI_V01 - -static void kvm_sbi_system_shutdown(struct kvm_vcpu *vcpu, - struct kvm_run *run, u32 type) +const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid) { - int i; - struct kvm_vcpu *tmp; + int i = 0; - kvm_for_each_vcpu(i, tmp, vcpu->kvm) - tmp->arch.power_off = true; - kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); + for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { + if (sbi_ext[i]->extid_start <= extid && + sbi_ext[i]->extid_end >= extid) + return sbi_ext[i]; + } - memset(&run->system_event, 0, sizeof(run->system_event)); - run->system_event.type = type; - run->exit_reason = KVM_EXIT_SYSTEM_EVENT; + return NULL; } int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run) { - ulong hmask; - int i, ret = 1; - u64 next_cycle; - struct kvm_vcpu *rvcpu; + int ret = 1; bool next_sepc = true; - struct cpumask cm, hm; - struct kvm *kvm = vcpu->kvm; - struct kvm_cpu_trap utrap = { 0 }; + bool userspace_exit = false; struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + const struct kvm_vcpu_sbi_extension *sbi_ext; + struct kvm_cpu_trap utrap = { 0 }; + unsigned long out_val = 0; + bool ext_is_v01 = false; - if (!cp) - return -EINVAL; - - switch (cp->a7) { - case SBI_EXT_0_1_CONSOLE_GETCHAR: - case SBI_EXT_0_1_CONSOLE_PUTCHAR: - /* - * The CONSOLE_GETCHAR/CONSOLE_PUTCHAR SBI calls cannot be - * handled in kernel so we forward these to user-space - */ - kvm_riscv_vcpu_sbi_forward(vcpu, run); - next_sepc = false; - ret = 0; - break; - case SBI_EXT_0_1_SET_TIMER: -#if __riscv_xlen == 32 - next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0; -#else - next_cycle = (u64)cp->a0; + sbi_ext = kvm_vcpu_sbi_find_ext(cp->a7); + if (sbi_ext && sbi_ext->handler) { +#ifdef CONFIG_RISCV_SBI_V01 + if (cp->a7 >= SBI_EXT_0_1_SET_TIMER && + cp->a7 <= SBI_EXT_0_1_SHUTDOWN) + ext_is_v01 = true; #endif - kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle); - break; - case SBI_EXT_0_1_CLEAR_IPI: - kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_SOFT); - break; - case SBI_EXT_0_1_SEND_IPI: - if (cp->a0) - hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0, - &utrap); - else - hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1; - if (utrap.scause) { - utrap.sepc = cp->sepc; - kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); - next_sepc = false; - break; - } - for_each_set_bit(i, &hmask, BITS_PER_LONG) { - rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i); - kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_VS_SOFT); - } - break; - case SBI_EXT_0_1_SHUTDOWN: - kvm_sbi_system_shutdown(vcpu, run, KVM_SYSTEM_EVENT_SHUTDOWN); - next_sepc = false; - ret = 0; - break; - case SBI_EXT_0_1_REMOTE_FENCE_I: - case SBI_EXT_0_1_REMOTE_SFENCE_VMA: - case SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID: - if (cp->a0) - hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0, - &utrap); - else - hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1; - if (utrap.scause) { - utrap.sepc = cp->sepc; - kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); - next_sepc = false; - break; - } - cpumask_clear(&cm); - for_each_set_bit(i, &hmask, BITS_PER_LONG) { - rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i); - if (rvcpu->cpu < 0) - continue; - cpumask_set_cpu(rvcpu->cpu, &cm); - } - riscv_cpuid_to_hartid_mask(&cm, &hm); - if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I) - sbi_remote_fence_i(cpumask_bits(&hm)); - else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA) - sbi_remote_hfence_vvma(cpumask_bits(&hm), - cp->a1, cp->a2); - else - sbi_remote_hfence_vvma_asid(cpumask_bits(&hm), - cp->a1, cp->a2, cp->a3); - break; - default: + ret = sbi_ext->handler(vcpu, run, &out_val, &utrap, &userspace_exit); + } else { /* Return error for unsupported SBI calls */ cp->a0 = SBI_ERR_NOT_SUPPORTED; - break; + goto ecall_done; + } + + /* Handle special error cases i.e trap, exit or userspace forward */ + if (utrap.scause) { + /* No need to increment sepc or exit ioctl loop */ + ret = 1; + utrap.sepc = cp->sepc; + kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); + next_sepc = false; + goto ecall_done; } + /* Exit ioctl loop or Propagate the error code the guest */ + if (userspace_exit) { + next_sepc = false; + ret = 0; + } else { + /** + * SBI extension handler always returns an Linux error code. Convert + * it to the SBI specific error code that can be propagated the SBI + * caller. + */ + ret = kvm_linux_err_map_sbi(ret); + cp->a0 = ret; + ret = 1; + } +ecall_done: if (next_sepc) cp->sepc += 4; + if (!ext_is_v01) + cp->a1 = out_val; return ret; } - -#else - -int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - kvm_riscv_vcpu_sbi_forward(vcpu, run); - return 0; -} - -#endif diff --git a/arch/riscv/kvm/vcpu_sbi_base.c b/arch/riscv/kvm/vcpu_sbi_base.c new file mode 100644 index 000000000000..4ecf377f483b --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_base.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * + * Authors: + * Atish Patra <atish.patra@wdc.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <asm/csr.h> +#include <asm/sbi.h> +#include <asm/kvm_vcpu_timer.h> +#include <asm/kvm_vcpu_sbi.h> + +static int kvm_sbi_ext_base_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long *out_val, + struct kvm_cpu_trap *trap, bool *exit) +{ + int ret = 0; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + struct sbiret ecall_ret; + + switch (cp->a6) { + case SBI_EXT_BASE_GET_SPEC_VERSION: + *out_val = (KVM_SBI_VERSION_MAJOR << + SBI_SPEC_VERSION_MAJOR_SHIFT) | + KVM_SBI_VERSION_MINOR; + break; + case SBI_EXT_BASE_GET_IMP_ID: + *out_val = KVM_SBI_IMPID; + break; + case SBI_EXT_BASE_GET_IMP_VERSION: + *out_val = 0; + break; + case SBI_EXT_BASE_PROBE_EXT: + if ((cp->a0 >= SBI_EXT_EXPERIMENTAL_START && + cp->a0 <= SBI_EXT_EXPERIMENTAL_END) || + (cp->a0 >= SBI_EXT_VENDOR_START && + cp->a0 <= SBI_EXT_VENDOR_END)) { + /* + * For experimental/vendor extensions + * forward it to the userspace + */ + kvm_riscv_vcpu_sbi_forward(vcpu, run); + *exit = true; + } else + *out_val = kvm_vcpu_sbi_find_ext(cp->a0) ? 1 : 0; + break; + case SBI_EXT_BASE_GET_MVENDORID: + case SBI_EXT_BASE_GET_MARCHID: + case SBI_EXT_BASE_GET_MIMPID: + ecall_ret = sbi_ecall(SBI_EXT_BASE, cp->a6, 0, 0, 0, 0, 0, 0); + if (!ecall_ret.error) + *out_val = ecall_ret.value; + /*TODO: We are unnecessarily converting the error twice */ + ret = sbi_err_map_linux_errno(ecall_ret.error); + break; + default: + ret = -EOPNOTSUPP; + break; + } + + return ret; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base = { + .extid_start = SBI_EXT_BASE, + .extid_end = SBI_EXT_BASE, + .handler = kvm_sbi_ext_base_handler, +}; + +static int kvm_sbi_ext_forward_handler(struct kvm_vcpu *vcpu, + struct kvm_run *run, + unsigned long *out_val, + struct kvm_cpu_trap *utrap, + bool *exit) +{ + /* + * Both SBI experimental and vendor extensions are + * unconditionally forwarded to userspace. + */ + kvm_riscv_vcpu_sbi_forward(vcpu, run); + *exit = true; + return 0; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental = { + .extid_start = SBI_EXT_EXPERIMENTAL_START, + .extid_end = SBI_EXT_EXPERIMENTAL_END, + .handler = kvm_sbi_ext_forward_handler, +}; + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor = { + .extid_start = SBI_EXT_VENDOR_START, + .extid_end = SBI_EXT_VENDOR_END, + .handler = kvm_sbi_ext_forward_handler, +}; diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c new file mode 100644 index 000000000000..2e383687fa48 --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_hsm.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * + * Authors: + * Atish Patra <atish.patra@wdc.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <asm/csr.h> +#include <asm/sbi.h> +#include <asm/kvm_vcpu_sbi.h> + +static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *reset_cntx; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + struct kvm_vcpu *target_vcpu; + unsigned long target_vcpuid = cp->a0; + + target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); + if (!target_vcpu) + return -EINVAL; + if (!target_vcpu->arch.power_off) + return -EALREADY; + + reset_cntx = &target_vcpu->arch.guest_reset_context; + /* start address */ + reset_cntx->sepc = cp->a1; + /* target vcpu id to start */ + reset_cntx->a0 = target_vcpuid; + /* private data passed from kernel */ + reset_cntx->a1 = cp->a2; + kvm_make_request(KVM_REQ_VCPU_RESET, target_vcpu); + + kvm_riscv_vcpu_power_on(target_vcpu); + + return 0; +} + +static int kvm_sbi_hsm_vcpu_stop(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.power_off) + return -EINVAL; + + kvm_riscv_vcpu_power_off(vcpu); + + return 0; +} + +static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long target_vcpuid = cp->a0; + struct kvm_vcpu *target_vcpu; + + target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); + if (!target_vcpu) + return -EINVAL; + if (!target_vcpu->arch.power_off) + return SBI_HSM_HART_STATUS_STARTED; + else + return SBI_HSM_HART_STATUS_STOPPED; +} + +static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long *out_val, + struct kvm_cpu_trap *utrap, + bool *exit) +{ + int ret = 0; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + struct kvm *kvm = vcpu->kvm; + unsigned long funcid = cp->a6; + + switch (funcid) { + case SBI_EXT_HSM_HART_START: + mutex_lock(&kvm->lock); + ret = kvm_sbi_hsm_vcpu_start(vcpu); + mutex_unlock(&kvm->lock); + break; + case SBI_EXT_HSM_HART_STOP: + ret = kvm_sbi_hsm_vcpu_stop(vcpu); + break; + case SBI_EXT_HSM_HART_STATUS: + ret = kvm_sbi_hsm_vcpu_get_status(vcpu); + if (ret >= 0) { + *out_val = ret; + ret = 0; + } + break; + default: + ret = -EOPNOTSUPP; + } + + return ret; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm = { + .extid_start = SBI_EXT_HSM, + .extid_end = SBI_EXT_HSM, + .handler = kvm_sbi_ext_hsm_handler, +}; diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c new file mode 100644 index 000000000000..00036b7f83b9 --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_replace.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * + * Authors: + * Atish Patra <atish.patra@wdc.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <asm/csr.h> +#include <asm/sbi.h> +#include <asm/kvm_vcpu_timer.h> +#include <asm/kvm_vcpu_sbi.h> + +static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long *out_val, + struct kvm_cpu_trap *utrap, bool *exit) +{ + int ret = 0; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + u64 next_cycle; + + if (cp->a6 != SBI_EXT_TIME_SET_TIMER) + return -EINVAL; + +#if __riscv_xlen == 32 + next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0; +#else + next_cycle = (u64)cp->a0; +#endif + kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle); + + return ret; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time = { + .extid_start = SBI_EXT_TIME, + .extid_end = SBI_EXT_TIME, + .handler = kvm_sbi_ext_time_handler, +}; + +static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long *out_val, + struct kvm_cpu_trap *utrap, bool *exit) +{ + int ret = 0; + unsigned long i; + struct kvm_vcpu *tmp; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long hmask = cp->a0; + unsigned long hbase = cp->a1; + + if (cp->a6 != SBI_EXT_IPI_SEND_IPI) + return -EINVAL; + + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + if (hbase != -1UL) { + if (tmp->vcpu_id < hbase) + continue; + if (!(hmask & (1UL << (tmp->vcpu_id - hbase)))) + continue; + } + ret = kvm_riscv_vcpu_set_interrupt(tmp, IRQ_VS_SOFT); + if (ret < 0) + break; + } + + return ret; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi = { + .extid_start = SBI_EXT_IPI, + .extid_end = SBI_EXT_IPI, + .handler = kvm_sbi_ext_ipi_handler, +}; + +static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long *out_val, + struct kvm_cpu_trap *utrap, bool *exit) +{ + int ret = 0; + unsigned long i; + struct cpumask cm, hm; + struct kvm_vcpu *tmp; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long hmask = cp->a0; + unsigned long hbase = cp->a1; + unsigned long funcid = cp->a6; + + cpumask_clear(&cm); + cpumask_clear(&hm); + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + if (hbase != -1UL) { + if (tmp->vcpu_id < hbase) + continue; + if (!(hmask & (1UL << (tmp->vcpu_id - hbase)))) + continue; + } + if (tmp->cpu < 0) + continue; + cpumask_set_cpu(tmp->cpu, &cm); + } + + riscv_cpuid_to_hartid_mask(&cm, &hm); + + switch (funcid) { + case SBI_EXT_RFENCE_REMOTE_FENCE_I: + ret = sbi_remote_fence_i(cpumask_bits(&hm)); + break; + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: + ret = sbi_remote_hfence_vvma(cpumask_bits(&hm), cp->a2, cp->a3); + break; + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: + ret = sbi_remote_hfence_vvma_asid(cpumask_bits(&hm), cp->a2, + cp->a3, cp->a4); + break; + case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA: + case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID: + case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA: + case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID: + /* TODO: implement for nested hypervisor case */ + default: + ret = -EOPNOTSUPP; + } + + return ret; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence = { + .extid_start = SBI_EXT_RFENCE, + .extid_end = SBI_EXT_RFENCE, + .handler = kvm_sbi_ext_rfence_handler, +}; diff --git a/arch/riscv/kvm/vcpu_sbi_v01.c b/arch/riscv/kvm/vcpu_sbi_v01.c new file mode 100644 index 000000000000..4c7e13ec9ccc --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_v01.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * + * Authors: + * Atish Patra <atish.patra@wdc.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <asm/csr.h> +#include <asm/sbi.h> +#include <asm/kvm_vcpu_timer.h> +#include <asm/kvm_vcpu_sbi.h> + +static void kvm_sbi_system_shutdown(struct kvm_vcpu *vcpu, + struct kvm_run *run, u32 type) +{ + unsigned long i; + struct kvm_vcpu *tmp; + + kvm_for_each_vcpu(i, tmp, vcpu->kvm) + tmp->arch.power_off = true; + kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); + + memset(&run->system_event, 0, sizeof(run->system_event)); + run->system_event.type = type; + run->exit_reason = KVM_EXIT_SYSTEM_EVENT; +} + +static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long *out_val, + struct kvm_cpu_trap *utrap, + bool *exit) +{ + ulong hmask; + int i, ret = 0; + u64 next_cycle; + struct kvm_vcpu *rvcpu; + struct cpumask cm, hm; + struct kvm *kvm = vcpu->kvm; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + + switch (cp->a7) { + case SBI_EXT_0_1_CONSOLE_GETCHAR: + case SBI_EXT_0_1_CONSOLE_PUTCHAR: + /* + * The CONSOLE_GETCHAR/CONSOLE_PUTCHAR SBI calls cannot be + * handled in kernel so we forward these to user-space + */ + kvm_riscv_vcpu_sbi_forward(vcpu, run); + *exit = true; + break; + case SBI_EXT_0_1_SET_TIMER: +#if __riscv_xlen == 32 + next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0; +#else + next_cycle = (u64)cp->a0; +#endif + ret = kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle); + break; + case SBI_EXT_0_1_CLEAR_IPI: + ret = kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_SOFT); + break; + case SBI_EXT_0_1_SEND_IPI: + if (cp->a0) + hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0, + utrap); + else + hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1; + if (utrap->scause) + break; + + for_each_set_bit(i, &hmask, BITS_PER_LONG) { + rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i); + ret = kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_VS_SOFT); + if (ret < 0) + break; + } + break; + case SBI_EXT_0_1_SHUTDOWN: + kvm_sbi_system_shutdown(vcpu, run, KVM_SYSTEM_EVENT_SHUTDOWN); + *exit = true; + break; + case SBI_EXT_0_1_REMOTE_FENCE_I: + case SBI_EXT_0_1_REMOTE_SFENCE_VMA: + case SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID: + if (cp->a0) + hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0, + utrap); + else + hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1; + if (utrap->scause) + break; + + cpumask_clear(&cm); + for_each_set_bit(i, &hmask, BITS_PER_LONG) { + rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i); + if (rvcpu->cpu < 0) + continue; + cpumask_set_cpu(rvcpu->cpu, &cm); + } + riscv_cpuid_to_hartid_mask(&cm, &hm); + if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I) + ret = sbi_remote_fence_i(cpumask_bits(&hm)); + else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA) + ret = sbi_remote_hfence_vvma(cpumask_bits(&hm), + cp->a1, cp->a2); + else + ret = sbi_remote_hfence_vvma_asid(cpumask_bits(&hm), + cp->a1, cp->a2, cp->a3); + break; + default: + ret = -EINVAL; + break; + }; + + return ret; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = { + .extid_start = SBI_EXT_0_1_SET_TIMER, + .extid_end = SBI_EXT_0_1_SHUTDOWN, + .handler = kvm_sbi_ext_v01_handler, +}; diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index fb18af34a4b5..c768f75279ef 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -46,15 +46,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) void kvm_arch_destroy_vm(struct kvm *kvm) { - int i; - - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - if (kvm->vcpus[i]) { - kvm_vcpu_destroy(kvm->vcpus[i]); - kvm->vcpus[i] = NULL; - } - } - atomic_set(&kvm->online_vcpus, 0); + kvm_destroy_vcpus(kvm); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -82,6 +74,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_NR_MEMSLOTS: r = KVM_USER_MEM_SLOTS; break; + case KVM_CAP_VM_GPA_BITS: + r = kvm_riscv_stage2_gpa_bits(); + break; default: r = 0; break; diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c index 2c6253b293bc..807228f8f409 100644 --- a/arch/riscv/kvm/vmid.c +++ b/arch/riscv/kvm/vmid.c @@ -65,7 +65,7 @@ bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid) void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu) { - int i; + unsigned long i; struct kvm_vcpu *v; struct cpumask hmask; struct kvm_vmid *vmid = &vcpu->kvm->arch.vmid; |