diff options
Diffstat (limited to 'arch/riscv')
-rw-r--r-- | arch/riscv/Kconfig | 2 | ||||
-rw-r--r-- | arch/riscv/Makefile | 1 | ||||
-rw-r--r-- | arch/riscv/include/asm/csr.h | 87 | ||||
-rw-r--r-- | arch/riscv/include/asm/kvm_host.h | 264 | ||||
-rw-r--r-- | arch/riscv/include/asm/kvm_types.h | 7 | ||||
-rw-r--r-- | arch/riscv/include/asm/kvm_vcpu_fp.h | 59 | ||||
-rw-r--r-- | arch/riscv/include/asm/kvm_vcpu_timer.h | 44 | ||||
-rw-r--r-- | arch/riscv/include/uapi/asm/kvm.h | 128 | ||||
-rw-r--r-- | arch/riscv/kernel/asm-offsets.c | 156 | ||||
-rw-r--r-- | arch/riscv/kvm/Kconfig | 35 | ||||
-rw-r--r-- | arch/riscv/kvm/Makefile | 26 | ||||
-rw-r--r-- | arch/riscv/kvm/main.c | 118 | ||||
-rw-r--r-- | arch/riscv/kvm/mmu.c | 802 | ||||
-rw-r--r-- | arch/riscv/kvm/tlb.S | 74 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu.c | 825 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_exit.c | 701 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_fp.c | 167 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi.c | 185 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_switch.S | 400 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_timer.c | 225 | ||||
-rw-r--r-- | arch/riscv/kvm/vm.c | 97 | ||||
-rw-r--r-- | arch/riscv/kvm/vmid.c | 120 |
22 files changed, 4523 insertions, 0 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c28b743eba57..a34c531be4e7 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -566,3 +566,5 @@ menu "Power management options" source "kernel/power/Kconfig" endmenu + +source "arch/riscv/kvm/Kconfig" diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 0eb4568fbd29..58c1a28e20bb 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -100,6 +100,7 @@ endif head-y := arch/riscv/kernel/head.o core-$(CONFIG_RISCV_ERRATA_ALTERNATIVE) += arch/riscv/errata/ +core-$(CONFIG_KVM) += arch/riscv/kvm/ libs-y += arch/riscv/lib/ libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 87ac65696871..5046f431645c 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -58,22 +58,32 @@ /* Interrupt causes (minus the high bit) */ #define IRQ_S_SOFT 1 +#define IRQ_VS_SOFT 2 #define IRQ_M_SOFT 3 #define IRQ_S_TIMER 5 +#define IRQ_VS_TIMER 6 #define IRQ_M_TIMER 7 #define IRQ_S_EXT 9 +#define IRQ_VS_EXT 10 #define IRQ_M_EXT 11 /* Exception causes */ #define EXC_INST_MISALIGNED 0 #define EXC_INST_ACCESS 1 +#define EXC_INST_ILLEGAL 2 #define EXC_BREAKPOINT 3 #define EXC_LOAD_ACCESS 5 #define EXC_STORE_ACCESS 7 #define EXC_SYSCALL 8 +#define EXC_HYPERVISOR_SYSCALL 9 +#define EXC_SUPERVISOR_SYSCALL 10 #define EXC_INST_PAGE_FAULT 12 #define EXC_LOAD_PAGE_FAULT 13 #define EXC_STORE_PAGE_FAULT 15 +#define EXC_INST_GUEST_PAGE_FAULT 20 +#define EXC_LOAD_GUEST_PAGE_FAULT 21 +#define EXC_VIRTUAL_INST_FAULT 22 +#define EXC_STORE_GUEST_PAGE_FAULT 23 /* PMP configuration */ #define PMP_R 0x01 @@ -85,6 +95,58 @@ #define PMP_A_NAPOT 0x18 #define PMP_L 0x80 +/* HSTATUS flags */ +#ifdef CONFIG_64BIT +#define HSTATUS_VSXL _AC(0x300000000, UL) +#define HSTATUS_VSXL_SHIFT 32 +#endif +#define HSTATUS_VTSR _AC(0x00400000, UL) +#define HSTATUS_VTW _AC(0x00200000, UL) +#define HSTATUS_VTVM _AC(0x00100000, UL) +#define HSTATUS_VGEIN _AC(0x0003f000, UL) +#define HSTATUS_VGEIN_SHIFT 12 +#define HSTATUS_HU _AC(0x00000200, UL) +#define HSTATUS_SPVP _AC(0x00000100, UL) +#define HSTATUS_SPV _AC(0x00000080, UL) +#define HSTATUS_GVA _AC(0x00000040, UL) +#define HSTATUS_VSBE _AC(0x00000020, UL) + +/* HGATP flags */ +#define HGATP_MODE_OFF _AC(0, UL) +#define HGATP_MODE_SV32X4 _AC(1, UL) +#define HGATP_MODE_SV39X4 _AC(8, UL) +#define HGATP_MODE_SV48X4 _AC(9, UL) + +#define HGATP32_MODE_SHIFT 31 +#define HGATP32_VMID_SHIFT 22 +#define HGATP32_VMID_MASK _AC(0x1FC00000, UL) +#define HGATP32_PPN _AC(0x003FFFFF, UL) + +#define HGATP64_MODE_SHIFT 60 +#define HGATP64_VMID_SHIFT 44 +#define HGATP64_VMID_MASK _AC(0x03FFF00000000000, UL) +#define HGATP64_PPN _AC(0x00000FFFFFFFFFFF, UL) + +#define HGATP_PAGE_SHIFT 12 + +#ifdef CONFIG_64BIT +#define HGATP_PPN HGATP64_PPN +#define HGATP_VMID_SHIFT HGATP64_VMID_SHIFT +#define HGATP_VMID_MASK HGATP64_VMID_MASK +#define HGATP_MODE_SHIFT HGATP64_MODE_SHIFT +#else +#define HGATP_PPN HGATP32_PPN +#define HGATP_VMID_SHIFT HGATP32_VMID_SHIFT +#define HGATP_VMID_MASK HGATP32_VMID_MASK +#define HGATP_MODE_SHIFT HGATP32_MODE_SHIFT +#endif + +/* VSIP & HVIP relation */ +#define VSIP_TO_HVIP_SHIFT (IRQ_VS_SOFT - IRQ_S_SOFT) +#define VSIP_VALID_MASK ((_AC(1, UL) << IRQ_S_SOFT) | \ + (_AC(1, UL) << IRQ_S_TIMER) | \ + (_AC(1, UL) << IRQ_S_EXT)) + /* symbolic CSR names: */ #define CSR_CYCLE 0xc00 #define CSR_TIME 0xc01 @@ -104,6 +166,31 @@ #define CSR_SIP 0x144 #define CSR_SATP 0x180 +#define CSR_VSSTATUS 0x200 +#define CSR_VSIE 0x204 +#define CSR_VSTVEC 0x205 +#define CSR_VSSCRATCH 0x240 +#define CSR_VSEPC 0x241 +#define CSR_VSCAUSE 0x242 +#define CSR_VSTVAL 0x243 +#define CSR_VSIP 0x244 +#define CSR_VSATP 0x280 + +#define CSR_HSTATUS 0x600 +#define CSR_HEDELEG 0x602 +#define CSR_HIDELEG 0x603 +#define CSR_HIE 0x604 +#define CSR_HTIMEDELTA 0x605 +#define CSR_HCOUNTEREN 0x606 +#define CSR_HGEIE 0x607 +#define CSR_HTIMEDELTAH 0x615 +#define CSR_HTVAL 0x643 +#define CSR_HIP 0x644 +#define CSR_HVIP 0x645 +#define CSR_HTINST 0x64a +#define CSR_HGATP 0x680 +#define CSR_HGEIP 0xe12 + #define CSR_MSTATUS 0x300 #define CSR_MISA 0x301 #define CSR_MIE 0x304 diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h new file mode 100644 index 000000000000..25ba21f98504 --- /dev/null +++ b/arch/riscv/include/asm/kvm_host.h @@ -0,0 +1,264 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * + * Authors: + * Anup Patel <anup.patel@wdc.com> + */ + +#ifndef __RISCV_KVM_HOST_H__ +#define __RISCV_KVM_HOST_H__ + +#include <linux/types.h> +#include <linux/kvm.h> +#include <linux/kvm_types.h> +#include <asm/kvm_vcpu_fp.h> +#include <asm/kvm_vcpu_timer.h> + +#ifdef CONFIG_64BIT +#define KVM_MAX_VCPUS (1U << 16) +#else +#define KVM_MAX_VCPUS (1U << 9) +#endif + +#define KVM_HALT_POLL_NS_DEFAULT 500000 + +#define KVM_VCPU_MAX_FEATURES 0 + +#define KVM_REQ_SLEEP \ + KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(1) +#define KVM_REQ_UPDATE_HGATP KVM_ARCH_REQ(2) + +struct kvm_vm_stat { + struct kvm_vm_stat_generic generic; +}; + +struct kvm_vcpu_stat { + struct kvm_vcpu_stat_generic generic; + u64 ecall_exit_stat; + u64 wfi_exit_stat; + u64 mmio_exit_user; + u64 mmio_exit_kernel; + u64 exits; +}; + +struct kvm_arch_memory_slot { +}; + +struct kvm_vmid { + /* + * Writes to vmid_version and vmid happen with vmid_lock held + * whereas reads happen without any lock held. + */ + unsigned long vmid_version; + unsigned long vmid; +}; + +struct kvm_arch { + /* stage2 vmid */ + struct kvm_vmid vmid; + + /* stage2 page table */ + pgd_t *pgd; + phys_addr_t pgd_phys; + + /* Guest Timer */ + struct kvm_guest_timer timer; +}; + +struct kvm_mmio_decode { + unsigned long insn; + int insn_len; + int len; + int shift; + int return_handled; +}; + +struct kvm_sbi_context { + int return_handled; +}; + +#define KVM_MMU_PAGE_CACHE_NR_OBJS 32 + +struct kvm_mmu_page_cache { + int nobjs; + void *objects[KVM_MMU_PAGE_CACHE_NR_OBJS]; +}; + +struct kvm_cpu_trap { + unsigned long sepc; + unsigned long scause; + unsigned long stval; + unsigned long htval; + unsigned long htinst; +}; + +struct kvm_cpu_context { + unsigned long zero; + unsigned long ra; + unsigned long sp; + unsigned long gp; + unsigned long tp; + unsigned long t0; + unsigned long t1; + unsigned long t2; + unsigned long s0; + unsigned long s1; + unsigned long a0; + unsigned long a1; + unsigned long a2; + unsigned long a3; + unsigned long a4; + unsigned long a5; + unsigned long a6; + unsigned long a7; + unsigned long s2; + unsigned long s3; + unsigned long s4; + unsigned long s5; + unsigned long s6; + unsigned long s7; + unsigned long s8; + unsigned long s9; + unsigned long s10; + unsigned long s11; + unsigned long t3; + unsigned long t4; + unsigned long t5; + unsigned long t6; + unsigned long sepc; + unsigned long sstatus; + unsigned long hstatus; + union __riscv_fp_state fp; +}; + +struct kvm_vcpu_csr { + unsigned long vsstatus; + unsigned long vsie; + unsigned long vstvec; + unsigned long vsscratch; + unsigned long vsepc; + unsigned long vscause; + unsigned long vstval; + unsigned long hvip; + unsigned long vsatp; + unsigned long scounteren; +}; + +struct kvm_vcpu_arch { + /* VCPU ran at least once */ + bool ran_atleast_once; + + /* ISA feature bits (similar to MISA) */ + unsigned long isa; + + /* SSCRATCH, STVEC, and SCOUNTEREN of Host */ + unsigned long host_sscratch; + unsigned long host_stvec; + unsigned long host_scounteren; + + /* CPU context of Host */ + struct kvm_cpu_context host_context; + + /* CPU context of Guest VCPU */ + struct kvm_cpu_context guest_context; + + /* CPU CSR context of Guest VCPU */ + struct kvm_vcpu_csr guest_csr; + + /* CPU context upon Guest VCPU reset */ + struct kvm_cpu_context guest_reset_context; + + /* CPU CSR context upon Guest VCPU reset */ + struct kvm_vcpu_csr guest_reset_csr; + + /* + * VCPU interrupts + * + * We have a lockless approach for tracking pending VCPU interrupts + * implemented using atomic bitops. The irqs_pending bitmap represent + * pending interrupts whereas irqs_pending_mask represent bits changed + * in irqs_pending. Our approach is modeled around multiple producer + * and single consumer problem where the consumer is the VCPU itself. + */ + unsigned long irqs_pending; + unsigned long irqs_pending_mask; + + /* VCPU Timer */ + struct kvm_vcpu_timer timer; + + /* MMIO instruction details */ + struct kvm_mmio_decode mmio_decode; + + /* SBI context */ + struct kvm_sbi_context sbi_context; + + /* Cache pages needed to program page tables with spinlock held */ + struct kvm_mmu_page_cache mmu_page_cache; + + /* VCPU power-off state */ + bool power_off; + + /* Don't run the VCPU (blocked) */ + bool pause; + + /* SRCU lock index for in-kernel run loop */ + int srcu_idx; +}; + +static inline void kvm_arch_hardware_unsetup(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} + +#define KVM_ARCH_WANT_MMU_NOTIFIER + +void __kvm_riscv_hfence_gvma_vmid_gpa(unsigned long gpa_divby_4, + unsigned long vmid); +void __kvm_riscv_hfence_gvma_vmid(unsigned long vmid); +void __kvm_riscv_hfence_gvma_gpa(unsigned long gpa_divby_4); +void __kvm_riscv_hfence_gvma_all(void); + +int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, + struct kvm_memory_slot *memslot, + gpa_t gpa, unsigned long hva, bool is_write); +void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu); +int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm); +void kvm_riscv_stage2_free_pgd(struct kvm *kvm); +void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu); +void kvm_riscv_stage2_mode_detect(void); +unsigned long kvm_riscv_stage2_mode(void); + +void kvm_riscv_stage2_vmid_detect(void); +unsigned long kvm_riscv_stage2_vmid_bits(void); +int kvm_riscv_stage2_vmid_init(struct kvm *kvm); +bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid); +void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu); + +void __kvm_riscv_unpriv_trap(void); + +unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu, + bool read_insn, + unsigned long guest_addr, + struct kvm_cpu_trap *trap); +void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu, + struct kvm_cpu_trap *trap); +int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_cpu_trap *trap); + +void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch); + +int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); +int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); +void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu); +bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask); +void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu); + +int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run); + +#endif /* __RISCV_KVM_HOST_H__ */ diff --git a/arch/riscv/include/asm/kvm_types.h b/arch/riscv/include/asm/kvm_types.h new file mode 100644 index 000000000000..e476b404eb67 --- /dev/null +++ b/arch/riscv/include/asm/kvm_types.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_KVM_TYPES_H +#define _ASM_RISCV_KVM_TYPES_H + +#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40 + +#endif /* _ASM_RISCV_KVM_TYPES_H */ diff --git a/arch/riscv/include/asm/kvm_vcpu_fp.h b/arch/riscv/include/asm/kvm_vcpu_fp.h new file mode 100644 index 000000000000..4da9b8e0f050 --- /dev/null +++ b/arch/riscv/include/asm/kvm_vcpu_fp.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + * + * Authors: + * Atish Patra <atish.patra@wdc.com> + * Anup Patel <anup.patel@wdc.com> + */ + +#ifndef __KVM_VCPU_RISCV_FP_H +#define __KVM_VCPU_RISCV_FP_H + +#include <linux/types.h> + +struct kvm_cpu_context; + +#ifdef CONFIG_FPU +void __kvm_riscv_fp_f_save(struct kvm_cpu_context *context); +void __kvm_riscv_fp_f_restore(struct kvm_cpu_context *context); +void __kvm_riscv_fp_d_save(struct kvm_cpu_context *context); +void __kvm_riscv_fp_d_restore(struct kvm_cpu_context *context); + +void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx, + unsigned long isa); +void kvm_riscv_vcpu_guest_fp_restore(struct kvm_cpu_context *cntx, + unsigned long isa); +void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx); +void kvm_riscv_vcpu_host_fp_restore(struct kvm_cpu_context *cntx); +#else +static inline void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu) +{ +} +static inline void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx, + unsigned long isa) +{ +} +static inline void kvm_riscv_vcpu_guest_fp_restore( + struct kvm_cpu_context *cntx, + unsigned long isa) +{ +} +static inline void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx) +{ +} +static inline void kvm_riscv_vcpu_host_fp_restore( + struct kvm_cpu_context *cntx) +{ +} +#endif + +int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, + unsigned long rtype); +int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, + unsigned long rtype); + +#endif diff --git a/arch/riscv/include/asm/kvm_vcpu_timer.h b/arch/riscv/include/asm/kvm_vcpu_timer.h new file mode 100644 index 000000000000..375281eb49e0 --- /dev/null +++ b/arch/riscv/include/asm/kvm_vcpu_timer.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * + * Authors: + * Atish Patra <atish.patra@wdc.com> + */ + +#ifndef __KVM_VCPU_RISCV_TIMER_H +#define __KVM_VCPU_RISCV_TIMER_H + +#include <linux/hrtimer.h> + +struct kvm_guest_timer { + /* Mult & Shift values to get nanoseconds from cycles */ + u32 nsec_mult; + u32 nsec_shift; + /* Time delta value */ + u64 time_delta; +}; + +struct kvm_vcpu_timer { + /* Flag for whether init is done */ + bool init_done; + /* Flag for whether timer event is configured */ + bool next_set; + /* Next timer event cycles */ + u64 next_cycles; + /* Underlying hrtimer instance */ + struct hrtimer hrt; +}; + +int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles); +int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg); +int kvm_riscv_vcpu_set_reg_timer(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg); +int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu); +int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu); +int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu); +int kvm_riscv_guest_timer_init(struct kvm *kvm); + +#endif diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h new file mode 100644 index 000000000000..f808ad1ce500 --- /dev/null +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * + * Authors: + * Anup Patel <anup.patel@wdc.com> + */ + +#ifndef __LINUX_KVM_RISCV_H +#define __LINUX_KVM_RISCV_H + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> +#include <asm/ptrace.h> + +#define __KVM_HAVE_READONLY_MEM + +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + +#define KVM_INTERRUPT_SET -1U +#define KVM_INTERRUPT_UNSET -2U + +/* for KVM_GET_REGS and KVM_SET_REGS */ +struct kvm_regs { +}; + +/* for KVM_GET_FPU and KVM_SET_FPU */ +struct kvm_fpu { +}; + +/* KVM Debug exit structure */ +struct kvm_debug_exit_arch { +}; + +/* for KVM_SET_GUEST_DEBUG */ +struct kvm_guest_debug_arch { +}; + +/* definition of registers in kvm_run */ +struct kvm_sync_regs { +}; + +/* for KVM_GET_SREGS and KVM_SET_SREGS */ +struct kvm_sregs { +}; + +/* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +struct kvm_riscv_config { + unsigned long isa; +}; + +/* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +struct kvm_riscv_core { + struct user_regs_struct regs; + unsigned long mode; +}; + +/* Possible privilege modes for kvm_riscv_core */ +#define KVM_RISCV_MODE_S 1 +#define KVM_RISCV_MODE_U 0 + +/* CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +struct kvm_riscv_csr { + unsigned long sstatus; + unsigned long sie; + unsigned long stvec; + unsigned long sscratch; + unsigned long sepc; + unsigned long scause; + unsigned long stval; + unsigned long sip; + unsigned long satp; + unsigned long scounteren; +}; + +/* TIMER registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +struct kvm_riscv_timer { + __u64 frequency; + __u64 time; + __u64 compare; + __u64 state; +}; + +/* Possible states for kvm_riscv_timer */ +#define KVM_RISCV_TIMER_STATE_OFF 0 +#define KVM_RISCV_TIMER_STATE_ON 1 + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + +/* If you need to interpret the index values, here is the key: */ +#define KVM_REG_RISCV_TYPE_MASK 0x00000000FF000000 +#define KVM_REG_RISCV_TYPE_SHIFT 24 + +/* Config registers are mapped as type 1 */ +#define KVM_REG_RISCV_CONFIG (0x01 << KVM_REG_RISCV_TYPE_SHIFT) +#define KVM_REG_RISCV_CONFIG_REG(name) \ + (offsetof(struct kvm_riscv_config, name) / sizeof(unsigned long)) + +/* Core registers are mapped as type 2 */ +#define KVM_REG_RISCV_CORE (0x02 << KVM_REG_RISCV_TYPE_SHIFT) +#define KVM_REG_RISCV_CORE_REG(name) \ + (offsetof(struct kvm_riscv_core, name) / sizeof(unsigned long)) + +/* Control and status registers are mapped as type 3 */ +#define KVM_REG_RISCV_CSR (0x03 << KVM_REG_RISCV_TYPE_SHIFT) +#define KVM_REG_RISCV_CSR_REG(name) \ + (offsetof(struct kvm_riscv_csr, name) / sizeof(unsigned long)) + +/* Timer registers are mapped as type 4 */ +#define KVM_REG_RISCV_TIMER (0x04 << KVM_REG_RISCV_TYPE_SHIFT) +#define KVM_REG_RISCV_TIMER_REG(name) \ + (offsetof(struct kvm_riscv_timer, name) / sizeof(__u64)) + +/* F extension registers are mapped as type 5 */ +#define KVM_REG_RISCV_FP_F (0x05 << KVM_REG_RISCV_TYPE_SHIFT) +#define KVM_REG_RISCV_FP_F_REG(name) \ + (offsetof(struct __riscv_f_ext_state, name) / sizeof(__u32)) + +/* D extension registers are mapped as type 6 */ +#define KVM_REG_RISCV_FP_D (0x06 << KVM_REG_RISCV_TYPE_SHIFT) +#define KVM_REG_RISCV_FP_D_REG(name) \ + (offsetof(struct __riscv_d_ext_state, name) / sizeof(__u64)) + +#endif + +#endif /* __LINUX_KVM_RISCV_H */ diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index 478d9f02dab5..253126e4beef 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -7,7 +7,9 @@ #define GENERATING_ASM_OFFSETS #include <linux/kbuild.h> +#include <linux/mm.h> #include <linux/sched.h> +#include <asm/kvm_host.h> #include <asm/thread_info.h> #include <asm/ptrace.h> @@ -110,6 +112,160 @@ void asm_offsets(void) OFFSET(PT_BADADDR, pt_regs, badaddr); OFFSET(PT_CAUSE, pt_regs, cause); + OFFSET(KVM_ARCH_GUEST_ZERO, kvm_vcpu_arch, guest_context.zero); + OFFSET(KVM_ARCH_GUEST_RA, kvm_vcpu_arch, guest_context.ra); + OFFSET(KVM_ARCH_GUEST_SP, kvm_vcpu_arch, guest_context.sp); + OFFSET(KVM_ARCH_GUEST_GP, kvm_vcpu_arch, guest_context.gp); + OFFSET(KVM_ARCH_GUEST_TP, kvm_vcpu_arch, guest_context.tp); + OFFSET(KVM_ARCH_GUEST_T0, kvm_vcpu_arch, guest_context.t0); + OFFSET(KVM_ARCH_GUEST_T1, kvm_vcpu_arch, guest_context.t1); + OFFSET(KVM_ARCH_GUEST_T2, kvm_vcpu_arch, guest_context.t2); + OFFSET(KVM_ARCH_GUEST_S0, kvm_vcpu_arch, guest_context.s0); + OFFSET(KVM_ARCH_GUEST_S1, kvm_vcpu_arch, guest_context.s1); + OFFSET(KVM_ARCH_GUEST_A0, kvm_vcpu_arch, guest_context.a0); + OFFSET(KVM_ARCH_GUEST_A1, kvm_vcpu_arch, guest_context.a1); + OFFSET(KVM_ARCH_GUEST_A2, kvm_vcpu_arch, guest_context.a2); + OFFSET(KVM_ARCH_GUEST_A3, kvm_vcpu_arch, guest_context.a3); + OFFSET(KVM_ARCH_GUEST_A4, kvm_vcpu_arch, guest_context.a4); + OFFSET(KVM_ARCH_GUEST_A5, kvm_vcpu_arch, guest_context.a5); + OFFSET(KVM_ARCH_GUEST_A6, kvm_vcpu_arch, guest_context.a6); + OFFSET(KVM_ARCH_GUEST_A7, kvm_vcpu_arch, guest_context.a7); + OFFSET(KVM_ARCH_GUEST_S2, kvm_vcpu_arch, guest_context.s2); + OFFSET(KVM_ARCH_GUEST_S3, kvm_vcpu_arch, guest_context.s3); + OFFSET(KVM_ARCH_GUEST_S4, kvm_vcpu_arch, guest_context.s4); + OFFSET(KVM_ARCH_GUEST_S5, kvm_vcpu_arch, guest_context.s5); + OFFSET(KVM_ARCH_GUEST_S6, kvm_vcpu_arch, guest_context.s6); + OFFSET(KVM_ARCH_GUEST_S7, kvm_vcpu_arch, guest_context.s7); + OFFSET(KVM_ARCH_GUEST_S8, kvm_vcpu_arch, guest_context.s8); + OFFSET(KVM_ARCH_GUEST_S9, kvm_vcpu_arch, guest_context.s9); + OFFSET(KVM_ARCH_GUEST_S10, kvm_vcpu_arch, guest_context.s10); + OFFSET(KVM_ARCH_GUEST_S11, kvm_vcpu_arch, guest_context.s11); + OFFSET(KVM_ARCH_GUEST_T3, kvm_vcpu_arch, guest_context.t3); + OFFSET(KVM_ARCH_GUEST_T4, kvm_vcpu_arch, guest_context.t4); + OFFSET(KVM_ARCH_GUEST_T5, kvm_vcpu_arch, guest_context.t5); + OFFSET(KVM_ARCH_GUEST_T6, kvm_vcpu_arch, guest_context.t6); + OFFSET(KVM_ARCH_GUEST_SEPC, kvm_vcpu_arch, guest_context.sepc); + OFFSET(KVM_ARCH_GUEST_SSTATUS, kvm_vcpu_arch, guest_context.sstatus); + OFFSET(KVM_ARCH_GUEST_HSTATUS, kvm_vcpu_arch, guest_context.hstatus); + OFFSET(KVM_ARCH_GUEST_SCOUNTEREN, kvm_vcpu_arch, guest_csr.scounteren); + + OFFSET(KVM_ARCH_HOST_ZERO, kvm_vcpu_arch, host_context.zero); + OFFSET(KVM_ARCH_HOST_RA, kvm_vcpu_arch, host_context.ra); + OFFSET(KVM_ARCH_HOST_SP, kvm_vcpu_arch, host_context.sp); + OFFSET(KVM_ARCH_HOST_GP, kvm_vcpu_arch, host_context.gp); + OFFSET(KVM_ARCH_HOST_TP, kvm_vcpu_arch, host_context.tp); + OFFSET(KVM_ARCH_HOST_T0, kvm_vcpu_arch, host_context.t0); + OFFSET(KVM_ARCH_HOST_T1, kvm_vcpu_arch, host_context.t1); + OFFSET(KVM_ARCH_HOST_T2, kvm_vcpu_arch, host_context.t2); + OFFSET(KVM_ARCH_HOST_S0, kvm_vcpu_arch, host_context.s0); + OFFSET(KVM_ARCH_HOST_S1, kvm_vcpu_arch, host_context.s1); + OFFSET(KVM_ARCH_HOST_A0, kvm_vcpu_arch, host_context.a0); + OFFSET(KVM_ARCH_HOST_A1, kvm_vcpu_arch, host_context.a1); + OFFSET(KVM_ARCH_HOST_A2, kvm_vcpu_arch, host_context.a2); + OFFSET(KVM_ARCH_HOST_A3, kvm_vcpu_arch, host_context.a3); + OFFSET(KVM_ARCH_HOST_A4, kvm_vcpu_arch, host_context.a4); + OFFSET(KVM_ARCH_HOST_A5, kvm_vcpu_arch, host_context.a5); + OFFSET(KVM_ARCH_HOST_A6, kvm_vcpu_arch, host_context.a6); + OFFSET(KVM_ARCH_HOST_A7, kvm_vcpu_arch, host_context.a7); + OFFSET(KVM_ARCH_HOST_S2, kvm_vcpu_arch, host_context.s2); + OFFSET(KVM_ARCH_HOST_S3, kvm_vcpu_arch, host_context.s3); + OFFSET(KVM_ARCH_HOST_S4, kvm_vcpu_arch, host_context.s4); + OFFSET(KVM_ARCH_HOST_S5, kvm_vcpu_arch, host_context.s5); + OFFSET(KVM_ARCH_HOST_S6, kvm_vcpu_arch, host_context.s6); + OFFSET(KVM_ARCH_HOST_S7, kvm_vcpu_arch, host_context.s7); + OFFSET(KVM_ARCH_HOST_S8, kvm_vcpu_arch, host_context.s8); + OFFSET(KVM_ARCH_HOST_S9, kvm_vcpu_arch, host_context.s9); + OFFSET(KVM_ARCH_HOST_S10, kvm_vcpu_arch, host_context.s10); + OFFSET(KVM_ARCH_HOST_S11, kvm_vcpu_arch, host_context.s11); + OFFSET(KVM_ARCH_HOST_T3, kvm_vcpu_arch, host_context.t3); + OFFSET(KVM_ARCH_HOST_T4, kvm_vcpu_arch, host_context.t4); + OFFSET(KVM_ARCH_HOST_T5, kvm_vcpu_arch, host_context.t5); + OFFSET(KVM_ARCH_HOST_T6, kvm_vcpu_arch, host_context.t6); + OFFSET(KVM_ARCH_HOST_SEPC, kvm_vcpu_arch, host_context.sepc); + OFFSET(KVM_ARCH_HOST_SSTATUS, kvm_vcpu_arch, host_context.sstatus); + OFFSET(KVM_ARCH_HOST_HSTATUS, kvm_vcpu_arch, host_context.hstatus); + OFFSET(KVM_ARCH_HOST_SSCRATCH, kvm_vcpu_arch, host_sscratch); + OFFSET(KVM_ARCH_HOST_STVEC, kvm_vcpu_arch, host_stvec); + OFFSET(KVM_ARCH_HOST_SCOUNTEREN, kvm_vcpu_arch, host_scounteren); + + OFFSET(KVM_ARCH_TRAP_SEPC, kvm_cpu_trap, sepc); + OFFSET(KVM_ARCH_TRAP_SCAUSE, kvm_cpu_trap, scause); + OFFSET(KVM_ARCH_TRAP_STVAL, kvm_cpu_trap, stval); + OFFSET(KVM_ARCH_TRAP_HTVAL, kvm_cpu_trap, htval); + OFFSET(KVM_ARCH_TRAP_HTINST, kvm_cpu_trap, htinst); + + /* F extension */ + + OFFSET(KVM_ARCH_FP_F_F0, kvm_cpu_context, fp.f.f[0]); + OFFSET(KVM_ARCH_FP_F_F1, kvm_cpu_context, fp.f.f[1]); + OFFSET(KVM_ARCH_FP_F_F2, kvm_cpu_context, fp.f.f[2]); + OFFSET(KVM_ARCH_FP_F_F3, kvm_cpu_context, fp.f.f[3]); + OFFSET(KVM_ARCH_FP_F_F4, kvm_cpu_context, fp.f.f[4]); + OFFSET(KVM_ARCH_FP_F_F5, kvm_cpu_context, fp.f.f[5]); + OFFSET(KVM_ARCH_FP_F_F6, kvm_cpu_context, fp.f.f[6]); + OFFSET(KVM_ARCH_FP_F_F7, kvm_cpu_context, fp.f.f[7]); + OFFSET(KVM_ARCH_FP_F_F8, kvm_cpu_context, fp.f.f[8]); + OFFSET(KVM_ARCH_FP_F_F9, kvm_cpu_context, fp.f.f[9]); + OFFSET(KVM_ARCH_FP_F_F10, kvm_cpu_context, fp.f.f[10]); + OFFSET(KVM_ARCH_FP_F_F11, kvm_cpu_context, fp.f.f[11]); + OFFSET(KVM_ARCH_FP_F_F12, kvm_cpu_context, fp.f.f[12]); + OFFSET(KVM_ARCH_FP_F_F13, kvm_cpu_context, fp.f.f[13]); + OFFSET(KVM_ARCH_FP_F_F14, kvm_cpu_context, fp.f.f[14]); + OFFSET(KVM_ARCH_FP_F_F15, kvm_cpu_context, fp.f.f[15]); + OFFSET(KVM_ARCH_FP_F_F16, kvm_cpu_context, fp.f.f[16]); + OFFSET(KVM_ARCH_FP_F_F17, kvm_cpu_context, fp.f.f[17]); + OFFSET(KVM_ARCH_FP_F_F18, kvm_cpu_context, fp.f.f[18]); + OFFSET(KVM_ARCH_FP_F_F19, kvm_cpu_context, fp.f.f[19]); + OFFSET(KVM_ARCH_FP_F_F20, kvm_cpu_context, fp.f.f[20]); + OFFSET(KVM_ARCH_FP_F_F21, kvm_cpu_context, fp.f.f[21]); + OFFSET(KVM_ARCH_FP_F_F22, kvm_cpu_context, fp.f.f[22]); + OFFSET(KVM_ARCH_FP_F_F23, kvm_cpu_context, fp.f.f[23]); + OFFSET(KVM_ARCH_FP_F_F24, kvm_cpu_context, fp.f.f[24]); + OFFSET(KVM_ARCH_FP_F_F25, kvm_cpu_context, fp.f.f[25]); + OFFSET(KVM_ARCH_FP_F_F26, kvm_cpu_context, fp.f.f[26]); + OFFSET(KVM_ARCH_FP_F_F27, kvm_cpu_context, fp.f.f[27]); + OFFSET(KVM_ARCH_FP_F_F28, kvm_cpu_context, fp.f.f[28]); + OFFSET(KVM_ARCH_FP_F_F29, kvm_cpu_context, fp.f.f[29]); + OFFSET(KVM_ARCH_FP_F_F30, kvm_cpu_context, fp.f.f[30]); + OFFSET(KVM_ARCH_FP_F_F31, kvm_cpu_context, fp.f.f[31]); + OFFSET(KVM_ARCH_FP_F_FCSR, kvm_cpu_context, fp.f.fcsr); + + /* D extension */ + + OFFSET(KVM_ARCH_FP_D_F0, kvm_cpu_context, fp.d.f[0]); + OFFSET(KVM_ARCH_FP_D_F1, kvm_cpu_context, fp.d.f[1]); + OFFSET(KVM_ARCH_FP_D_F2, kvm_cpu_context, fp.d.f[2]); + OFFSET(KVM_ARCH_FP_D_F3, kvm_cpu_context, fp.d.f[3]); + OFFSET(KVM_ARCH_FP_D_F4, kvm_cpu_context, fp.d.f[4]); + OFFSET(KVM_ARCH_FP_D_F5, kvm_cpu_context, fp.d.f[5]); + OFFSET(KVM_ARCH_FP_D_F6, kvm_cpu_context, fp.d.f[6]); + OFFSET(KVM_ARCH_FP_D_F7, kvm_cpu_context, fp.d.f[7]); + OFFSET(KVM_ARCH_FP_D_F8, kvm_cpu_context, fp.d.f[8]); + OFFSET(KVM_ARCH_FP_D_F9, kvm_cpu_context, fp.d.f[9]); + OFFSET(KVM_ARCH_FP_D_F10, kvm_cpu_context, fp.d.f[10]); + OFFSET(KVM_ARCH_FP_D_F11, kvm_cpu_context, fp.d.f[11]); + OFFSET(KVM_ARCH_FP_D_F12, kvm_cpu_context, fp.d.f[12]); + OFFSET(KVM_ARCH_FP_D_F13, kvm_cpu_context, fp.d.f[13]); + OFFSET(KVM_ARCH_FP_D_F14, kvm_cpu_context, fp.d.f[14]); + OFFSET(KVM_ARCH_FP_D_F15, kvm_cpu_context, fp.d.f[15]); + OFFSET(KVM_ARCH_FP_D_F16, kvm_cpu_context, fp.d.f[16]); + OFFSET(KVM_ARCH_FP_D_F17, kvm_cpu_context, fp.d.f[17]); + OFFSET(KVM_ARCH_FP_D_F18, kvm_cpu_context, fp.d.f[18]); + OFFSET(KVM_ARCH_FP_D_F19, kvm_cpu_context, fp.d.f[19]); + OFFSET(KVM_ARCH_FP_D_F20, kvm_cpu_context, fp.d.f[20]); + OFFSET(KVM_ARCH_FP_D_F21, kvm_cpu_context, fp.d.f[21]); + OFFSET(KVM_ARCH_FP_D_F22, kvm_cpu_context, fp.d.f[22]); + OFFSET(KVM_ARCH_FP_D_F23, kvm_cpu_context, fp.d.f[23]); + OFFSET(KVM_ARCH_FP_D_F24, kvm_cpu_context, fp.d.f[24]); + OFFSET(KVM_ARCH_FP_D_F25, kvm_cpu_context, fp.d.f[25]); + OFFSET(KVM_ARCH_FP_D_F26, kvm_cpu_context, fp.d.f[26]); + OFFSET(KVM_ARCH_FP_D_F27, kvm_cpu_context, fp.d.f[27]); + OFFSET(KVM_ARCH_FP_D_F28, kvm_cpu_context, fp.d.f[28]); + OFFSET(KVM_ARCH_FP_D_F29, kvm_cpu_context, fp.d.f[29]); + OFFSET(KVM_ARCH_FP_D_F30, kvm_cpu_context, fp.d.f[30]); + OFFSET(KVM_ARCH_FP_D_F31, kvm_cpu_context, fp.d.f[31]); + OFFSET(KVM_ARCH_FP_D_FCSR, kvm_cpu_context, fp.d.fcsr); + /* * THREAD_{F,X}* might be larger than a S-type offset can handle, but * these are used in performance-sensitive assembly so we can't resort diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig new file mode 100644 index 000000000000..f5a342fa1b1d --- /dev/null +++ b/arch/riscv/kvm/Kconfig @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# KVM configuration +# + +source "virt/kvm/Kconfig" + +menuconfig VIRTUALIZATION + bool "Virtualization" + help + Say Y here to get to see options for using your Linux host to run + other operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and + disabled. + +if VIRTUALIZATION + +config KVM + tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)" + depends on RISCV_SBI && MMU + select MMU_NOTIFIER + select PREEMPT_NOTIFIERS + select KVM_MMIO + select KVM_GENERIC_DIRTYLOG_READ_PROTECT + select HAVE_KVM_VCPU_ASYNC_IOCTL + select HAVE_KVM_EVENTFD + select SRCU + help + Support hosting virtualized guest machines. + + If unsure, say N. + +endif # VIRTUALIZATION diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile new file mode 100644 index 000000000000..30cdd1df0098 --- /dev/null +++ b/arch/riscv/kvm/Makefile @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for RISC-V KVM support +# + +ccflags-y += -I $(srctree)/$(src) + +KVM := ../../../virt/kvm + +obj-$(CONFIG_KVM) += kvm.o + +kvm-y += $(KVM)/kvm_main.o +kvm-y += $(KVM)/coalesced_mmio.o +kvm-y += $(KVM)/binary_stats.o +kvm-y += $(KVM)/eventfd.o +kvm-y += main.o +kvm-y += vm.o +kvm-y += vmid.o +kvm-y += tlb.o +kvm-y += mmu.o +kvm-y += vcpu.o +kvm-y += vcpu_exit.o +kvm-y += vcpu_fp.o +kvm-y += vcpu_switch.o +kvm-y += vcpu_sbi.o +kvm-y += vcpu_timer.o diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c new file mode 100644 index 000000000000..421ecf4e6360 --- /dev/null +++ b/arch/riscv/kvm/main.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * + * Authors: + * Anup Patel <anup.patel@wdc.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/kvm_host.h> +#include <asm/csr.h> +#include <asm/hwcap.h> +#include <asm/sbi.h> + +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +int kvm_arch_check_processor_compat(void *opaque) +{ + return 0; +} + +int kvm_arch_hardware_setup(void *opaque) +{ + return 0; +} + +int kvm_arch_hardware_enable(void) +{ + unsigned long hideleg, hedeleg; + + hedeleg = 0; + hedeleg |= (1UL << EXC_INST_MISALIGNED); + hedeleg |= (1UL << EXC_BREAKPOINT); + hedeleg |= (1UL << EXC_SYSCALL); + hedeleg |= (1UL << EXC_INST_PAGE_FAULT); + hedeleg |= (1UL << EXC_LOAD_PAGE_FAULT); + hedeleg |= (1UL << EXC_STORE_PAGE_FAULT); + csr_write(CSR_HEDELEG, hedeleg); + + hideleg = 0; + hideleg |= (1UL << IRQ_VS_SOFT); + hideleg |= (1UL << IRQ_VS_TIMER); + hideleg |= (1UL << IRQ_VS_EXT); + csr_write(CSR_HIDELEG, hideleg); + + csr_write(CSR_HCOUNTEREN, -1UL); + + csr_write(CSR_HVIP, 0); + + return 0; +} + +void kvm_arch_hardware_disable(void) +{ + csr_write(CSR_HEDELEG, 0); + csr_write(CSR_HIDELEG, 0); +} + +int kvm_arch_init(void *opaque) +{ + const char *str; + + if (!riscv_isa_extension_available(NULL, h)) { + kvm_info("hypervisor extension not available\n"); + return -ENODEV; + } + + if (sbi_spec_is_0_1()) { + kvm_info("require SBI v0.2 or higher\n"); + return -ENODEV; + } + + if (sbi_probe_extension(SBI_EXT_RFENCE) <= 0) { + kvm_info("require SBI RFENCE extension\n"); + return -ENODEV; + } + + kvm_riscv_stage2_mode_detect(); + + kvm_riscv_stage2_vmid_detect(); + + kvm_info("hypervisor extension available\n"); + + switch (kvm_riscv_stage2_mode()) { + case HGATP_MODE_SV32X4: + str = "Sv32x4"; + break; + case HGATP_MODE_SV39X4: + str = "Sv39x4"; + break; + case HGATP_MODE_SV48X4: + str = "Sv48x4"; + break; + default: + return -ENODEV; + } + kvm_info("using %s G-stage page table format\n", str); + + kvm_info("VMID %ld bits available\n", kvm_riscv_stage2_vmid_bits()); + + return 0; +} + +void kvm_arch_exit(void) +{ +} + +static int riscv_kvm_init(void) +{ + return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); +} +module_init(riscv_kvm_init); diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c new file mode 100644 index 000000000000..d81bae8eb55e --- /dev/null +++ b/arch/riscv/kvm/mmu.c @@ -0,0 +1,802 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * + * Authors: + * Anup Patel <anup.patel@wdc.com> + */ + +#include <linux/bitops.h> +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/hugetlb.h> +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> +#include <linux/kvm_host.h> +#include <linux/sched/signal.h> +#include <asm/csr.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/sbi.h> + +#ifdef CONFIG_64BIT +static unsigned long stage2_mode = (HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT); +static unsigned long stage2_pgd_levels = 3; +#define stage2_index_bits 9 +#else +static unsigned long stage2_mode = (HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT); +static unsigned long stage2_pgd_levels = 2; +#define stage2_index_bits 10 +#endif + +#define stage2_pgd_xbits 2 +#define stage2_pgd_size (1UL << (HGATP_PAGE_SHIFT + stage2_pgd_xbits)) +#define stage2_gpa_bits (HGATP_PAGE_SHIFT + \ + (stage2_pgd_levels * stage2_index_bits) + \ + stage2_pgd_xbits) +#define stage2_gpa_size ((gpa_t)(1ULL << stage2_gpa_bits)) + +#define stage2_pte_leaf(__ptep) \ + (pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)) + +static inline unsigned long stage2_pte_index(gpa_t addr, u32 level) +{ + unsigned long mask; + unsigned long shift = HGATP_PAGE_SHIFT + (stage2_index_bits * level); + + if (level == (stage2_pgd_levels - 1)) + mask = (PTRS_PER_PTE * (1UL << stage2_pgd_xbits)) - 1; + else + mask = PTRS_PER_PTE - 1; + + return (addr >> shift) & mask; +} + +static inline unsigned long stage2_pte_page_vaddr(pte_t pte) +{ + return (unsigned long)pfn_to_virt(pte_val(pte) >> _PAGE_PFN_SHIFT); +} + +static int stage2_page_size_to_level(unsigned long page_size, u32 *out_level) +{ + u32 i; + unsigned long psz = 1UL << 12; + + for (i = 0; i < stage2_pgd_levels; i++) { + if (page_size == (psz << (i * stage2_index_bits))) { + *out_level = i; + return 0; + } + } + + return -EINVAL; +} + +static int stage2_level_to_page_size(u32 level, unsigned long *out_pgsize) +{ + if (stage2_pgd_levels < level) + return -EINVAL; + + *out_pgsize = 1UL << (12 + (level * stage2_index_bits)); + + return 0; +} + +static int stage2_cache_topup(struct kvm_mmu_page_cache *pcache, + int min, int max) +{ + void *page; + + BUG_ON(max > KVM_MMU_PAGE_CACHE_NR_OBJS); + if (pcache->nobjs >= min) + return 0; + while (pcache->nobjs < max) { + page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + return -ENOMEM; + pcache->objects[pcache->nobjs++] = page; + } + + return 0; +} + +static void stage2_cache_flush(struct kvm_mmu_page_cache *pcache) +{ + while (pcache && pcache->nobjs) + free_page((unsigned long)pcache->objects[--pcache->nobjs]); +} + +static void *stage2_cache_alloc(struct kvm_mmu_page_cache *pcache) +{ + void *p; + + if (!pcache) + return NULL; + + BUG_ON(!pcache->nobjs); + p = pcache->objects[--pcache->nobjs]; + + return p; +} + +static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr, + pte_t **ptepp, u32 *ptep_level) +{ + pte_t *ptep; + u32 current_level = stage2_pgd_levels - 1; + + *ptep_level = current_level; + ptep = (pte_t *)kvm->arch.pgd; + ptep = &ptep[stage2_pte_index(addr, current_level)]; + while (ptep && pte_val(*ptep)) { + if (stage2_pte_leaf(ptep)) { + *ptep_level = current_level; + *ptepp = ptep; + return true; + } + + if (current_level) { + current_level--; + *ptep_level = current_level; + ptep = (pte_t *)stage2_pte_page_vaddr(*ptep); + ptep = &ptep[stage2_pte_index(addr, current_level)]; + } else { + ptep = NULL; + } + } + + return false; +} + +static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr) +{ + struct cpumask hmask; + unsigned long size = PAGE_SIZE; + struct kvm_vmid *vmid = &kvm->arch.vmid; + + if (stage2_level_to_page_size(level, &size)) + return; + addr &= ~(size - 1); + + /* + * TODO: Instead of cpu_online_mask, we should only target CPUs + * where the Guest/VM is running. + */ + preempt_disable(); + riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask); + sbi_remote_hfence_gvma_vmid(cpumask_bits(&hmask), addr, size, + READ_ONCE(vmid->vmid)); + preempt_enable(); +} + +static int stage2_set_pte(struct kvm *kvm, u32 level, + struct kvm_mmu_page_cache *pcache, + gpa_t addr, const pte_t *new_pte) +{ + u32 current_level = stage2_pgd_levels - 1; + pte_t *next_ptep = (pte_t *)kvm->arch.pgd; + pte_t *ptep = &next_ptep[stage2_pte_index(addr, current_level)]; + + if (current_level < level) + return -EINVAL; + + while (current_level != level) { + if (stage2_pte_leaf(ptep)) + return -EEXIST; + + if (!pte_val(*ptep)) { + next_ptep = stage2_cache_alloc(pcache); + if (!next_ptep) + return -ENOMEM; + *ptep = pfn_pte(PFN_DOWN(__pa(next_ptep)), + __pgprot(_PAGE_TABLE)); + } else { + if (stage2_pte_leaf(ptep)) + return -EEXIST; + next_ptep = (pte_t *)stage2_pte_page_vaddr(*ptep); + } + + current_level--; + ptep = &next_ptep[stage2_pte_index(addr, current_level)]; + } + + *ptep = *new_pte; + if (stage2_pte_leaf(ptep)) + stage2_remote_tlb_flush(kvm, current_level, addr); + + return 0; +} + +static int stage2_map_page(struct kvm *kvm, + struct kvm_mmu_page_cache *pcache, + gpa_t gpa, phys_addr_t hpa, + unsigned long page_size, + bool page_rdonly, bool page_exec) +{ + int ret; + u32 level = 0; + pte_t new_pte; + pgprot_t prot; + + ret = stage2_page_size_to_level(page_size, &level); + if (ret) + return ret; + + /* + * A RISC-V implementation can choose to either: + * 1) Update 'A' and 'D' PTE bits in hardware + * 2) Generate page fault when 'A' and/or 'D' bits are not set + * PTE so that software can update these bits. + * + * We support both options mentioned above. To achieve this, we + * always set 'A' and 'D' PTE bits at time of creating stage2 + * mapping. To support KVM dirty page logging with both options + * mentioned above, we will write-protect stage2 PTEs to track + * dirty pages. + */ + + if (page_exec) { + if (page_rdonly) + prot = PAGE_READ_EXEC; + else + prot = PAGE_WRITE_EXEC; + } else { + if (page_rdonly) + prot = PAGE_READ; + else + prot = PAGE_WRITE; + } + new_pte = pfn_pte(PFN_DOWN(hpa), prot); + new_pte = pte_mkdirty(new_pte); + + return stage2_set_pte(kvm, level, pcache, gpa, &new_pte); +} + +enum stage2_op { + STAGE2_OP_NOP = 0, /* Nothing */ + STAGE2_OP_CLEAR, /* Clear/Unmap */ + STAGE2_OP_WP, /* Write-protect */ +}; + +static void stage2_op_pte(struct kvm *kvm, gpa_t addr, + pte_t *ptep, u32 ptep_level, enum stage2_op op) +{ + int i, ret; + pte_t *next_ptep; + u32 next_ptep_level; + unsigned long next_page_size, page_size; + + ret = stage2_level_to_page_size(ptep_level, &page_size); + if (ret) + return; + + BUG_ON(addr & (page_size - 1)); + + if (!pte_val(*ptep)) + return; + + if (ptep_level && !stage2_pte_leaf(ptep)) { + next_ptep = (pte_t *)stage2_pte_page_vaddr(*ptep); + next_ptep_level = ptep_level - 1; + ret = stage2_level_to_page_size(next_ptep_level, + &next_page_size); + if (ret) + return; + + if (op == STAGE2_OP_CLEAR) + set_pte(ptep, __pte(0)); + for (i = 0; i < PTRS_PER_PTE; i++) + stage2_op_pte(kvm, addr + i * next_page_size, + &next_ptep[i], next_ptep_level, op); + if (op == STAGE2_OP_CLEAR) + put_page(virt_to_page(next_ptep)); + } else { + if (op == STAGE2_OP_CLEAR) + set_pte(ptep, __pte(0)); + else if (op == STAGE2_OP_WP) + set_pte(ptep, __pte(pte_val(*ptep) & ~_PAGE_WRITE)); + stage2_remote_tlb_flush(kvm, ptep_level, addr); + } +} + +static void stage2_unmap_range(struct kvm *kvm, gpa_t start, + gpa_t size, bool may_block) +{ + int ret; + pte_t *ptep; + u32 ptep_level; + bool found_leaf; + unsigned long page_size; + gpa_t addr = start, end = start + size; + + while (addr < end) { + found_leaf = stage2_get_leaf_entry(kvm, addr, + &ptep, &ptep_level); + ret = stage2_level_to_page_size(ptep_level, &page_size); + if (ret) + break; + + if (!found_leaf) + goto next; + + if (!(addr & (page_size - 1)) && ((end - addr) >= page_size)) + stage2_op_pte(kvm, addr, ptep, + ptep_level, STAGE2_OP_CLEAR); + +next: + addr += page_size; + + /* + * If the range is too large, release the kvm->mmu_lock + * to prevent starvation and lockup detector warnings. + */ + if (may_block && addr < end) + cond_resched_lock(&kvm->mmu_lock); + } +} + +static void stage2_wp_range(struct kvm *kvm, gpa_t start, gpa_t end) +{ + int ret; + pte_t *ptep; + u32 ptep_level; + bool found_leaf; + gpa_t addr = start; + unsigned long page_size; + + while (addr < end) { + found_leaf = stage2_get_leaf_entry(kvm, addr, + &ptep, &ptep_level); + ret = stage2_level_to_page_size(ptep_level, &page_size); + if (ret) + break; + + if (!found_leaf) + goto next; + + if (!(addr & (page_size - 1)) && ((end - addr) >= page_size)) + stage2_op_pte(kvm, addr, ptep, + ptep_level, STAGE2_OP_WP); + +next: + addr += page_size; + } +} + +static void stage2_wp_memory_region(struct kvm *kvm, int slot) +{ + struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memory_slot *memslot = id_to_memslot(slots, slot); + phys_addr_t start = memslot->base_gfn << PAGE_SHIFT; + phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; + + spin_lock(&kvm->mmu_lock); + stage2_wp_range(kvm, start, end); + spin_unlock(&kvm->mmu_lock); + kvm_flush_remote_tlbs(kvm); +} + +static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa, + unsigned long size, bool writable) +{ + pte_t pte; + int ret = 0; + unsigned long pfn; + phys_addr_t addr, end; + struct kvm_mmu_page_cache pcache = { 0, }; + + end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK; + pfn = __phys_to_pfn(hpa); + + for (addr = gpa; addr < end; addr += PAGE_SIZE) { + pte = pfn_pte(pfn, PAGE_KERNEL); + + if (!writable) + pte = pte_wrprotect(pte); + + ret = stage2_cache_topup(&pcache, + stage2_pgd_levels, + KVM_MMU_PAGE_CACHE_NR_OBJS); + if (ret) + goto out; + + spin_lock(&kvm->mmu_lock); + ret = stage2_set_pte(kvm, 0, &pcache, addr, &pte); + spin_unlock(&kvm->mmu_lock); + if (ret) + goto out; + + pfn++; + } + +out: + stage2_cache_flush(&pcache); + return ret; +} + +void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn_offset, + unsigned long mask) +{ + phys_addr_t base_gfn = slot->base_gfn + gfn_offset; + phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT; + phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; + + stage2_wp_range(kvm, start, end); +} + +void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) +{ +} + +void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, + const struct kvm_memory_slot *memslot) +{ + kvm_flush_remote_tlbs(kvm); +} + +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free) +{ +} + +void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) +{ +} + +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ + kvm_riscv_stage2_free_pgd(kvm); +} + +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ +} + +void kvm_arch_commit_memory_region(struct kvm *kvm, + const struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, + enum kvm_mr_change change) +{ + /* + * At this point memslot has been committed and there is an + * allocated dirty_bitmap[], dirty pages will be tracked while + * the memory slot is write protected. + */ + if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) + stage2_wp_memory_region(kvm, mem->slot); +} + +int kvm_arch_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + const struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) +{ + hva_t hva = mem->userspace_addr; + hva_t reg_end = hva + mem->memory_size; + bool writable = !(mem->flags & KVM_MEM_READONLY); + int ret = 0; + + if (change != KVM_MR_CREATE && change != KVM_MR_MOVE && + change != KVM_MR_FLAGS_ONLY) + return 0; + + /* + * Prevent userspace from creating a memory region outside of the GPA + * space addressable by the KVM guest GPA space. + */ + if ((memslot->base_gfn + memslot->npages) >= + (stage2_gpa_size >> PAGE_SHIFT)) + return -EFAULT; + + mmap_read_lock(current->mm); + + /* + * A memory region could potentially cover multiple VMAs, and + * any holes between them, so iterate over all of them to find + * out if we can map any of them right now. + * + * +--------------------------------------------+ + * +---------------+----------------+ +----------------+ + * | : VMA 1 | VMA 2 | | VMA 3 : | + * +---------------+----------------+ +----------------+ + * | memory region | + * +--------------------------------------------+ + */ + do { + struct vm_area_struct *vma = find_vma(current->mm, hva); + hva_t vm_start, vm_end; + + if (!vma || vma->vm_start >= reg_end) + break; + + /* + * Mapping a read-only VMA is only allowed if the + * memory region is configured as read-only. + */ + if (writable && !(vma->vm_flags & VM_WRITE)) { + ret = -EPERM; + break; + } + + /* Take the intersection of this VMA with the memory region */ + vm_start = max(hva, vma->vm_start); + vm_end = min(reg_end, vma->vm_end); + + if (vma->vm_flags & VM_PFNMAP) { + gpa_t gpa = mem->guest_phys_addr + + (vm_start - mem->userspace_addr); + phys_addr_t pa; + + pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; + pa += vm_start - vma->vm_start; + + /* IO region dirty page logging not allowed */ + if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) { + ret = -EINVAL; + goto out; + } + + ret = stage2_ioremap(kvm, gpa, pa, + vm_end - vm_start, writable); + if (ret) + break; + } + hva = vm_end; + } while (hva < reg_end); + + if (change == KVM_MR_FLAGS_ONLY) + goto out; + + spin_lock(&kvm->mmu_lock); + if (ret) + stage2_unmap_range(kvm, mem->guest_phys_addr, + mem->memory_size, false); + spin_unlock(&kvm->mmu_lock); + +out: + mmap_read_unlock(current->mm); + return ret; +} + +bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) +{ + if (!kvm->arch.pgd) + return false; + + stage2_unmap_range(kvm, range->start << PAGE_SHIFT, + (range->end - range->start) << PAGE_SHIFT, + range->may_block); + return false; +} + +bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) +{ + int ret; + kvm_pfn_t pfn = pte_pfn(range->pte); + + if (!kvm->arch.pgd) + return false; + + WARN_ON(range->end - range->start != 1); + + ret = stage2_map_page(kvm, NULL, range->start << PAGE_SHIFT, + __pfn_to_phys(pfn), PAGE_SIZE, true, true); + if (ret) { + kvm_debug("Failed to map stage2 page (error %d)\n", ret); + return true; + } + + return false; +} + +bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) +{ + pte_t *ptep; + u32 ptep_level = 0; + u64 size = (range->end - range->start) << PAGE_SHIFT; + + if (!kvm->arch.pgd) + return false; + + WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE); + + if (!stage2_get_leaf_entry(kvm, range->start << PAGE_SHIFT, + &ptep, &ptep_level)) + return false; + + return ptep_test_and_clear_young(NULL, 0, ptep); +} + +bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) +{ + pte_t *ptep; + u32 ptep_level = 0; + u64 size = (range->end - range->start) << PAGE_SHIFT; + + if (!kvm->arch.pgd) + return false; + + WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE); + + if (!stage2_get_leaf_entry(kvm, range->start << PAGE_SHIFT, + &ptep, &ptep_level)) + return false; + + return pte_young(*ptep); +} + +int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, + struct kvm_memory_slot *memslot, + gpa_t gpa, unsigned long hva, bool is_write) +{ + int ret; + kvm_pfn_t hfn; + bool writeable; + short vma_pageshift; + gfn_t gfn = gpa >> PAGE_SHIFT; + struct vm_area_struct *vma; + struct kvm *kvm = vcpu->kvm; + struct kvm_mmu_page_cache *pcache = &vcpu->arch.mmu_page_cache; + bool logging = (memslot->dirty_bitmap && + !(memslot->flags & KVM_MEM_READONLY)) ? true : false; + unsigned long vma_pagesize, mmu_seq; + + mmap_read_lock(current->mm); + + vma = find_vma_intersection(current->mm, hva, hva + 1); + if (unlikely(!vma)) { + kvm_err("Failed to find VMA for hva 0x%lx\n", hva); + mmap_read_unlock(current->mm); + return -EFAULT; + } + + if (is_vm_hugetlb_page(vma)) + vma_pageshift = huge_page_shift(hstate_vma(vma)); + else + vma_pageshift = PAGE_SHIFT; + vma_pagesize = 1ULL << vma_pageshift; + if (logging || (vma->vm_flags & VM_PFNMAP)) + vma_pagesize = PAGE_SIZE; + + if (vma_pagesize == PMD_SIZE || vma_pagesize == PGDIR_SIZE) + gfn = (gpa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT; + + mmap_read_unlock(current->mm); + + if (vma_pagesize != PGDIR_SIZE && + vma_pagesize != PMD_SIZE && + vma_pagesize != PAGE_SIZE) { + kvm_err("Invalid VMA page size 0x%lx\n", vma_pagesize); + return -EFAULT; + } + + /* We need minimum second+third level pages */ + ret = stage2_cache_topup(pcache, stage2_pgd_levels, + KVM_MMU_PAGE_CACHE_NR_OBJS); + if (ret) { + kvm_err("Failed to topup stage2 cache\n"); + return ret; + } + + mmu_seq = kvm->mmu_notifier_seq; + + hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writeable); + if (hfn == KVM_PFN_ERR_HWPOISON) { + send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva, + vma_pageshift, current); + return 0; + } + if (is_error_noslot_pfn(hfn)) + return -EFAULT; + + /* + * If logging is active then we allow writable pages only + * for write faults. + */ + if (logging && !is_write) + writeable = false; + + spin_lock(&kvm->mmu_lock); + + if (mmu_notifier_retry(kvm, mmu_seq)) + goto out_unlock; + + if (writeable) { + kvm_set_pfn_dirty(hfn); + mark_page_dirty(kvm, gfn); + ret = stage2_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT, + vma_pagesize, false, true); + } else { + ret = stage2_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT, + vma_pagesize, true, true); + } + + if (ret) + kvm_err("Failed to map in stage2\n"); + +out_unlock: + spin_unlock(&kvm->mmu_lock); + kvm_set_pfn_accessed(hfn); + kvm_release_pfn_clean(hfn); + return ret; +} + +void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu) +{ + stage2_cache_flush(&vcpu->arch.mmu_page_cache); +} + +int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm) +{ + struct page *pgd_page; + + if (kvm->arch.pgd != NULL) { + kvm_err("kvm_arch already initialized?\n"); + return -EINVAL; + } + + pgd_page = alloc_pages(GFP_KERNEL | __GFP_ZERO, + get_order(stage2_pgd_size)); + if (!pgd_page) + return -ENOMEM; + kvm->arch.pgd = page_to_virt(pgd_page); + kvm->arch.pgd_phys = page_to_phys(pgd_page); + + return 0; +} + +void kvm_riscv_stage2_free_pgd(struct kvm *kvm) +{ + void *pgd = NULL; + + spin_lock(&kvm->mmu_lock); + if (kvm->arch.pgd) { + stage2_unmap_range(kvm, 0UL, stage2_gpa_size, false); + pgd = READ_ONCE(kvm->arch.pgd); + kvm->arch.pgd = NULL; + kvm->arch.pgd_phys = 0; + } + spin_unlock(&kvm->mmu_lock); + + if (pgd) + free_pages((unsigned long)pgd, get_order(stage2_pgd_size)); +} + +void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu) +{ + unsigned long hgatp = stage2_mode; + struct kvm_arch *k = &vcpu->kvm->arch; + + hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & + HGATP_VMID_MASK; + hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN; + + csr_write(CSR_HGATP, hgatp); + + if (!kvm_riscv_stage2_vmid_bits()) + __kvm_riscv_hfence_gvma_all(); +} + +void kvm_riscv_stage2_mode_detect(void) +{ +#ifdef CONFIG_64BIT + /* Try Sv48x4 stage2 mode */ + csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT); + if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) { + stage2_mode = (HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT); + stage2_pgd_levels = 4; + } + csr_write(CSR_HGATP, 0); + + __kvm_riscv_hfence_gvma_all(); +#endif +} + +unsigned long kvm_riscv_stage2_mode(void) +{ + return stage2_mode >> HGATP_MODE_SHIFT; +} diff --git a/arch/riscv/kvm/tlb.S b/arch/riscv/kvm/tlb.S new file mode 100644 index 000000000000..899f75d60bad --- /dev/null +++ b/arch/riscv/kvm/tlb.S @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * + * Authors: + * Anup Patel <anup.patel@wdc.com> + */ + +#include <linux/linkage.h> +#include <asm/asm.h> + + .text + .altmacro + .option norelax + + /* + * Instruction encoding of hfence.gvma is: + * HFENCE.GVMA rs1, rs2 + * HFENCE.GVMA zero, rs2 + * HFENCE.GVMA rs1 + * HFENCE.GVMA + * + * rs1!=zero and rs2!=zero ==> HFENCE.GVMA rs1, rs2 + * rs1==zero and rs2!=zero ==> HFENCE.GVMA zero, rs2 + * rs1!=zero and rs2==zero ==> HFENCE.GVMA rs1 + * rs1==zero and rs2==zero ==> HFENCE.GVMA + * + * Instruction encoding of HFENCE.GVMA is: + * 0110001 rs2(5) rs1(5) 000 00000 1110011 + */ + +ENTRY(__kvm_riscv_hfence_gvma_vmid_gpa) + /* + * rs1 = a0 (GPA >> 2) + * rs2 = a1 (VMID) + * HFENCE.GVMA a0, a1 + * 0110001 01011 01010 000 00000 1110011 + */ + .word 0x62b50073 + ret +ENDPROC(__kvm_riscv_hfence_gvma_vmid_gpa) + +ENTRY(__kvm_riscv_hfence_gvma_vmid) + /* + * rs1 = zero + * rs2 = a0 (VMID) + * HFENCE.GVMA zero, a0 + * 0110001 01010 00000 000 00000 1110011 + */ + .word 0x62a00073 + ret +ENDPROC(__kvm_riscv_hfence_gvma_vmid) + +ENTRY(__kvm_riscv_hfence_gvma_gpa) + /* + * rs1 = a0 (GPA >> 2) + * rs2 = zero + * HFENCE.GVMA a0 + * 0110001 00000 01010 000 00000 1110011 + */ + .word 0x62050073 + ret +ENDPROC(__kvm_riscv_hfence_gvma_gpa) + +ENTRY(__kvm_riscv_hfence_gvma_all) + /* + * rs1 = zero + * rs2 = zero + * HFENCE.GVMA + * 0110001 00000 00000 000 00000 1110011 + */ + .word 0x62000073 + ret +ENDPROC(__kvm_riscv_hfence_gvma_all) diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c new file mode 100644 index 000000000000..e3d3aed46184 --- /dev/null +++ b/arch/riscv/kvm/vcpu.c @@ -0,0 +1,825 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * + * Authors: + * Anup Patel <anup.patel@wdc.com> + */ + +#include <linux/bitops.h> +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kdebug.h> +#include <linux/module.h> +#include <linux/percpu.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> +#include <linux/sched/signal.h> +#include <linux/fs.h> +#include <linux/kvm_host.h> +#include <asm/csr.h> +#include <asm/hwcap.h> + +const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { + KVM_GENERIC_VCPU_STATS(), + STATS_DESC_COUNTER(VCPU, ecall_exit_stat), + STATS_DESC_COUNTER(VCPU, wfi_exit_stat), + STATS_DESC_COUNTER(VCPU, mmio_exit_user), + STATS_DESC_COUNTER(VCPU, mmio_exit_kernel), + STATS_DESC_COUNTER(VCPU, exits) +}; + +const struct kvm_stats_header kvm_vcpu_stats_header = { + .name_size = KVM_STATS_NAME_SIZE, + .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc), + .id_offset = sizeof(struct kvm_stats_header), + .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, + .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + + sizeof(kvm_vcpu_stats_desc), +}; + +#define KVM_RISCV_ISA_ALLOWED (riscv_isa_extension_mask(a) | \ + riscv_isa_extension_mask(c) | \ + riscv_isa_extension_mask(d) | \ + riscv_isa_extension_mask(f) | \ + riscv_isa_extension_mask(i) | \ + riscv_isa_extension_mask(m) | \ + riscv_isa_extension_mask(s) | \ + riscv_isa_extension_mask(u)) + +static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr; + struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; + struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context; + + memcpy(csr, reset_csr, sizeof(*csr)); + + memcpy(cntx, reset_cntx, sizeof(*cntx)); + + kvm_riscv_vcpu_fp_reset(vcpu); + + kvm_riscv_vcpu_timer_reset(vcpu); + + WRITE_ONCE(vcpu->arch.irqs_pending, 0); + WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0); +} + +int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) +{ + return 0; +} + +int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *cntx; + + /* Mark this VCPU never ran */ + vcpu->arch.ran_atleast_once = false; + + /* Setup ISA features available to VCPU */ + vcpu->arch.isa = riscv_isa_extension_base(NULL) & KVM_RISCV_ISA_ALLOWED; + + /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */ + cntx = &vcpu->arch.guest_reset_context; + cntx->sstatus = SR_SPP | SR_SPIE; + cntx->hstatus = 0; + cntx->hstatus |= HSTATUS_VTW; + cntx->hstatus |= HSTATUS_SPVP; + cntx->hstatus |= HSTATUS_SPV; + + /* Setup VCPU timer */ + kvm_riscv_vcpu_timer_init(vcpu); + + /* Reset VCPU */ + kvm_riscv_reset_vcpu(vcpu); + + return 0; +} + +void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) +{ +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + /* Cleanup VCPU timer */ + kvm_riscv_vcpu_timer_deinit(vcpu); + + /* Flush the pages pre-allocated for Stage2 page table mappings */ + kvm_riscv_stage2_flush_cache(vcpu); +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + return kvm_riscv_vcpu_has_interrupts(vcpu, 1UL << IRQ_VS_TIMER); +} + +void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) +{ +} + +void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) +{ +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) +{ + return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) && + !vcpu->arch.power_off && !vcpu->arch.pause); +} + +int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; +} + +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false; +} + +vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_CONFIG); + unsigned long reg_val; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + + switch (reg_num) { + case KVM_REG_RISCV_CONFIG_REG(isa): + reg_val = vcpu->arch.isa; + break; + default: + return -EINVAL; + } + + if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + +static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_CONFIG); + unsigned long reg_val; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + + if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + switch (reg_num) { + case KVM_REG_RISCV_CONFIG_REG(isa): + if (!vcpu->arch.ran_atleast_once) { + vcpu->arch.isa = reg_val; + vcpu->arch.isa &= riscv_isa_extension_base(NULL); + vcpu->arch.isa &= KVM_RISCV_ISA_ALLOWED; + kvm_riscv_vcpu_fp_reset(vcpu); + } else { + return -EOPNOTSUPP; + } + break; + default: + return -EINVAL; + } + + return 0; +} + +static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_CORE); + unsigned long reg_val; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long)) + return -EINVAL; + + if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc)) + reg_val = cntx->sepc; + else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num && + reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6)) + reg_val = ((unsigned long *)cntx)[reg_num]; + else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) + reg_val = (cntx->sstatus & SR_SPP) ? + KVM_RISCV_MODE_S : KVM_RISCV_MODE_U; + else + return -EINVAL; + + if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + +static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_CORE); + unsigned long reg_val; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long)) + return -EINVAL; + + if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc)) + cntx->sepc = reg_val; + else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num && + reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6)) + ((unsigned long *)cntx)[reg_num] = reg_val; + else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) { + if (reg_val == KVM_RISCV_MODE_S) + cntx->sstatus |= SR_SPP; + else + cntx->sstatus &= ~SR_SPP; + } else + return -EINVAL; + + return 0; +} + +static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_CSR); + unsigned long reg_val; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long)) + return -EINVAL; + + if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) { + kvm_riscv_vcpu_flush_interrupts(vcpu); + reg_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK; + } else + reg_val = ((unsigned long *)csr)[reg_num]; + + if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + +static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_CSR); + unsigned long reg_val; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long)) + return -EINVAL; + + if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) { + reg_val &= VSIP_VALID_MASK; + reg_val <<= VSIP_TO_HVIP_SHIFT; + } + + ((unsigned long *)csr)[reg_num] = reg_val; + + if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) + WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0); + + return 0; +} + +static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG) + return kvm_riscv_vcpu_set_reg_config(vcpu, reg); + else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE) + return kvm_riscv_vcpu_set_reg_core(vcpu, reg); + else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR) + return kvm_riscv_vcpu_set_reg_csr(vcpu, reg); + else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER) + return kvm_riscv_vcpu_set_reg_timer(vcpu, reg); + else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F) + return kvm_riscv_vcpu_set_reg_fp(vcpu, reg, + KVM_REG_RISCV_FP_F); + else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D) + return kvm_riscv_vcpu_set_reg_fp(vcpu, reg, + KVM_REG_RISCV_FP_D); + + return -EINVAL; +} + +static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG) + return kvm_riscv_vcpu_get_reg_config(vcpu, reg); + else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE) + return kvm_riscv_vcpu_get_reg_core(vcpu, reg); + else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR) + return kvm_riscv_vcpu_get_reg_csr(vcpu, reg); + else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER) + return kvm_riscv_vcpu_get_reg_timer(vcpu, reg); + else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F) + return kvm_riscv_vcpu_get_reg_fp(vcpu, reg, + KVM_REG_RISCV_FP_F); + else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D) + return kvm_riscv_vcpu_get_reg_fp(vcpu, reg, + KVM_REG_RISCV_FP_D); + + return -EINVAL; +} + +long kvm_arch_vcpu_async_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + + if (ioctl == KVM_INTERRUPT) { + struct kvm_interrupt irq; + + if (copy_from_user(&irq, argp, sizeof(irq))) + return -EFAULT; + + if (irq.irq == KVM_INTERRUPT_SET) + return kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT); + else + return kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT); + } + + return -ENOIOCTLCMD; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + long r = -EINVAL; + + switch (ioctl) { + case KVM_SET_ONE_REG: + case KVM_GET_ONE_REG: { + struct kvm_one_reg reg; + + r = -EFAULT; + if (copy_from_user(®, argp, sizeof(reg))) + break; + + if (ioctl == KVM_SET_ONE_REG) + r = kvm_riscv_vcpu_set_reg(vcpu, ®); + else + r = kvm_riscv_vcpu_get_reg(vcpu, ®); + break; + } + default: + break; + } + + return r; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + unsigned long mask, val; + + if (READ_ONCE(vcpu->arch.irqs_pending_mask)) { + mask = xchg_acquire(&vcpu->arch.irqs_pending_mask, 0); + val = READ_ONCE(vcpu->arch.irqs_pending) & mask; + + csr->hvip &= ~mask; + csr->hvip |= val; + } +} + +void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) +{ + unsigned long hvip; + struct kvm_vcpu_arch *v = &vcpu->arch; + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + + /* Read current HVIP and VSIE CSRs */ + csr->vsie = csr_read(CSR_VSIE); + + /* Sync-up HVIP.VSSIP bit changes does by Guest */ + hvip = csr_read(CSR_HVIP); + if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) { + if (hvip & (1UL << IRQ_VS_SOFT)) { + if (!test_and_set_bit(IRQ_VS_SOFT, + &v->irqs_pending_mask)) + set_bit(IRQ_VS_SOFT, &v->irqs_pending); + } else { + if (!test_and_set_bit(IRQ_VS_SOFT, + &v->irqs_pending_mask)) + clear_bit(IRQ_VS_SOFT, &v->irqs_pending); + } + } +} + +int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) +{ + if (irq != IRQ_VS_SOFT && + irq != IRQ_VS_TIMER && + irq != IRQ_VS_EXT) + return -EINVAL; + + set_bit(irq, &vcpu->arch.irqs_pending); + smp_mb__before_atomic(); + set_bit(irq, &vcpu->arch.irqs_pending_mask); + + kvm_vcpu_kick(vcpu); + + return 0; +} + +int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) +{ + if (irq != IRQ_VS_SOFT && + irq != IRQ_VS_TIMER && + irq != IRQ_VS_EXT) + return -EINVAL; + + clear_bit(irq, &vcpu->arch.irqs_pending); + smp_mb__before_atomic(); + set_bit(irq, &vcpu->arch.irqs_pending_mask); + + return 0; +} + +bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask) +{ + unsigned long ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK) + << VSIP_TO_HVIP_SHIFT) & mask; + + return (READ_ONCE(vcpu->arch.irqs_pending) & ie) ? true : false; +} + +void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) +{ + vcpu->arch.power_off = true; + kvm_make_request(KVM_REQ_SLEEP, vcpu); + kvm_vcpu_kick(vcpu); +} + +void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) +{ + vcpu->arch.power_off = false; + kvm_vcpu_wake_up(vcpu); +} + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + if (vcpu->arch.power_off) + mp_state->mp_state = KVM_MP_STATE_STOPPED; + else + mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + + return 0; +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + int ret = 0; + + switch (mp_state->mp_state) { + case KVM_MP_STATE_RUNNABLE: + vcpu->arch.power_off = false; + break; + case KVM_MP_STATE_STOPPED: + kvm_riscv_vcpu_power_off(vcpu); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + /* TODO; To be implemented later. */ + return -EINVAL; +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + + csr_write(CSR_VSSTATUS, csr->vsstatus); + csr_write(CSR_VSIE, csr->vsie); + csr_write(CSR_VSTVEC, csr->vstvec); + csr_write(CSR_VSSCRATCH, csr->vsscratch); + csr_write(CSR_VSEPC, csr->vsepc); + csr_write(CSR_VSCAUSE, csr->vscause); + csr_write(CSR_VSTVAL, csr->vstval); + csr_write(CSR_HVIP, csr->hvip); + csr_write(CSR_VSATP, csr->vsatp); + + kvm_riscv_stage2_update_hgatp(vcpu); + + kvm_riscv_vcpu_timer_restore(vcpu); + + kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context); + kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context, + vcpu->arch.isa); + + vcpu->cpu = cpu; +} + +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + + vcpu->cpu = -1; + + kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context, + vcpu->arch.isa); + kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context); + + csr_write(CSR_HGATP, 0); + + csr->vsstatus = csr_read(CSR_VSSTATUS); + csr->vsie = csr_read(CSR_VSIE); + csr->vstvec = csr_read(CSR_VSTVEC); + csr->vsscratch = csr_read(CSR_VSSCRATCH); + csr->vsepc = csr_read(CSR_VSEPC); + csr->vscause = csr_read(CSR_VSCAUSE); + csr->vstval = csr_read(CSR_VSTVAL); + csr->hvip = csr_read(CSR_HVIP); + csr->vsatp = csr_read(CSR_VSATP); +} + +static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) +{ + struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); + + if (kvm_request_pending(vcpu)) { + if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { + rcuwait_wait_event(wait, + (!vcpu->arch.power_off) && (!vcpu->arch.pause), + TASK_INTERRUPTIBLE); + + if (vcpu->arch.power_off || vcpu->arch.pause) { + /* + * Awaken to handle a signal, request to + * sleep again later. + */ + kvm_make_request(KVM_REQ_SLEEP, vcpu); + } + } + + if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) + kvm_riscv_reset_vcpu(vcpu); + + if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu)) + kvm_riscv_stage2_update_hgatp(vcpu); + + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) + __kvm_riscv_hfence_gvma_all(); + } +} + +static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + + csr_write(CSR_HVIP, csr->hvip); +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) +{ + int ret; + struct kvm_cpu_trap trap; + struct kvm_run *run = vcpu->run; + + /* Mark this VCPU ran at least once */ + vcpu->arch.ran_atleast_once = true; + + vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + + /* Process MMIO value returned from user-space */ + if (run->exit_reason == KVM_EXIT_MMIO) { + ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run); + if (ret) { + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx); + return ret; + } + } + + /* Process SBI value returned from user-space */ + if (run->exit_reason == KVM_EXIT_RISCV_SBI) { + ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run); + if (ret) { + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx); + return ret; + } + } + + if (run->immediate_exit) { + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx); + return -EINTR; + } + + vcpu_load(vcpu); + + kvm_sigset_activate(vcpu); + + ret = 1; + run->exit_reason = KVM_EXIT_UNKNOWN; + while (ret > 0) { + /* Check conditions before entering the guest */ + cond_resched(); + + kvm_riscv_stage2_vmid_update(vcpu); + + kvm_riscv_check_vcpu_requests(vcpu); + + preempt_disable(); + + local_irq_disable(); + + /* + * Exit if we have a signal pending so that we can deliver + * the signal to user space. + */ + if (signal_pending(current)) { + ret = -EINTR; + run->exit_reason = KVM_EXIT_INTR; + } + + /* + * Ensure we set mode to IN_GUEST_MODE after we disable + * interrupts and before the final VCPU requests check. + * See the comment in kvm_vcpu_exiting_guest_mode() and + * Documentation/virtual/kvm/vcpu-requests.rst + */ + vcpu->mode = IN_GUEST_MODE; + + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx); + smp_mb__after_srcu_read_unlock(); + + /* + * We might have got VCPU interrupts updated asynchronously + * so update it in HW. + */ + kvm_riscv_vcpu_flush_interrupts(vcpu); + + /* Update HVIP CSR for current CPU */ + kvm_riscv_update_hvip(vcpu); + + if (ret <= 0 || + kvm_riscv_stage2_vmid_ver_changed(&vcpu->kvm->arch.vmid) || + kvm_request_pending(vcpu)) { + vcpu->mode = OUTSIDE_GUEST_MODE; + local_irq_enable(); + preempt_enable(); + vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + continue; + } + + guest_enter_irqoff(); + + __kvm_riscv_switch_to(&vcpu->arch); + + vcpu->mode = OUTSIDE_GUEST_MODE; + vcpu->stat.exits++; + + /* + * Save SCAUSE, STVAL, HTVAL, and HTINST because we might + * get an interrupt between __kvm_riscv_switch_to() and + * local_irq_enable() which can potentially change CSRs. + */ + trap.sepc = vcpu->arch.guest_context.sepc; + trap.scause = csr_read(CSR_SCAUSE); + trap.stval = csr_read(CSR_STVAL); + trap.htval = csr_read(CSR_HTVAL); + trap.htinst = csr_read(CSR_HTINST); + + /* Syncup interrupts state with HW */ + kvm_riscv_vcpu_sync_interrupts(vcpu); + + /* + * We may have taken a host interrupt in VS/VU-mode (i.e. + * while executing the guest). This interrupt is still + * pending, as we haven't serviced it yet! + * + * We're now back in HS-mode with interrupts disabled + * so enabling the interrupts now will have the effect + * of taking the interrupt again, in HS-mode this time. + */ + local_irq_enable(); + + /* + * We do local_irq_enable() before calling guest_exit() so + * that if a timer interrupt hits while running the guest + * we account that tick as being spent in the guest. We + * enable preemption after calling guest_exit() so that if + * we get preempted we make sure ticks after that is not + * counted as guest time. + */ + guest_exit(); + + preempt_enable(); + + vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + + ret = kvm_riscv_vcpu_exit(vcpu, run, &trap); + } + + kvm_sigset_deactivate(vcpu); + + vcpu_put(vcpu); + + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx); + + return ret; +} diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c new file mode 100644 index 000000000000..7f2d742ae4c6 --- /dev/null +++ b/arch/riscv/kvm/vcpu_exit.c @@ -0,0 +1,701 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * + * Authors: + * Anup Patel <anup.patel@wdc.com> + */ + +#include <linux/bitops.h> +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <asm/csr.h> + +#define INSN_OPCODE_MASK 0x007c +#define INSN_OPCODE_SHIFT 2 +#define INSN_OPCODE_SYSTEM 28 + +#define INSN_MASK_WFI 0xffffffff +#define INSN_MATCH_WFI 0x10500073 + +#define INSN_MATCH_LB 0x3 +#define INSN_MASK_LB 0x707f +#define INSN_MATCH_LH 0x1003 +#define INSN_MASK_LH 0x707f +#define INSN_MATCH_LW 0x2003 +#define INSN_MASK_LW 0x707f +#define INSN_MATCH_LD 0x3003 +#define INSN_MASK_LD 0x707f +#define INSN_MATCH_LBU 0x4003 +#define INSN_MASK_LBU 0x707f +#define INSN_MATCH_LHU 0x5003 +#define INSN_MASK_LHU 0x707f +#define INSN_MATCH_LWU 0x6003 +#define INSN_MASK_LWU 0x707f +#define INSN_MATCH_SB 0x23 +#define INSN_MASK_SB 0x707f +#define INSN_MATCH_SH 0x1023 +#define INSN_MASK_SH 0x707f +#define INSN_MATCH_SW 0x2023 +#define INSN_MASK_SW 0x707f +#define INSN_MATCH_SD 0x3023 +#define INSN_MASK_SD 0x707f + +#define INSN_MATCH_C_LD 0x6000 +#define INSN_MASK_C_LD 0xe003 +#define INSN_MATCH_C_SD 0xe000 +#define INSN_MASK_C_SD 0xe003 +#define INSN_MATCH_C_LW 0x4000 +#define INSN_MASK_C_LW 0xe003 +#define INSN_MATCH_C_SW 0xc000 +#define INSN_MASK_C_SW 0xe003 +#define INSN_MATCH_C_LDSP 0x6002 +#define INSN_MASK_C_LDSP 0xe003 +#define INSN_MATCH_C_SDSP 0xe002 +#define INSN_MASK_C_SDSP 0xe003 +#define INSN_MATCH_C_LWSP 0x4002 +#define INSN_MASK_C_LWSP 0xe003 +#define INSN_MATCH_C_SWSP 0xc002 +#define INSN_MASK_C_SWSP 0xe003 + +#define INSN_16BIT_MASK 0x3 + +#define INSN_IS_16BIT(insn) (((insn) & INSN_16BIT_MASK) != INSN_16BIT_MASK) + +#define INSN_LEN(insn) (INSN_IS_16BIT(insn) ? 2 : 4) + +#ifdef CONFIG_64BIT +#define LOG_REGBYTES 3 +#else +#define LOG_REGBYTES 2 +#endif +#define REGBYTES (1 << LOG_REGBYTES) + +#define SH_RD 7 +#define SH_RS1 15 +#define SH_RS2 20 +#define SH_RS2C 2 + +#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) +#define RVC_LW_IMM(x) ((RV_X(x, 6, 1) << 2) | \ + (RV_X(x, 10, 3) << 3) | \ + (RV_X(x, 5, 1) << 6)) +#define RVC_LD_IMM(x) ((RV_X(x, 10, 3) << 3) | \ + (RV_X(x, 5, 2) << 6)) +#define RVC_LWSP_IMM(x) ((RV_X(x, 4, 3) << 2) | \ + (RV_X(x, 12, 1) << 5) | \ + (RV_X(x, 2, 2) << 6)) +#define RVC_LDSP_IMM(x) ((RV_X(x, 5, 2) << 3) | \ + (RV_X(x, 12, 1) << 5) | \ + (RV_X(x, 2, 3) << 6)) +#define RVC_SWSP_IMM(x) ((RV_X(x, 9, 4) << 2) | \ + (RV_X(x, 7, 2) << 6)) +#define RVC_SDSP_IMM(x) ((RV_X(x, 10, 3) << 3) | \ + (RV_X(x, 7, 3) << 6)) +#define RVC_RS1S(insn) (8 + RV_X(insn, SH_RD, 3)) +#define RVC_RS2S(insn) (8 + RV_X(insn, SH_RS2C, 3)) +#define RVC_RS2(insn) RV_X(insn, SH_RS2C, 5) + +#define SHIFT_RIGHT(x, y) \ + ((y) < 0 ? ((x) << -(y)) : ((x) >> (y))) + +#define REG_MASK \ + ((1 << (5 + LOG_REGBYTES)) - (1 << LOG_REGBYTES)) + +#define REG_OFFSET(insn, pos) \ + (SHIFT_RIGHT((insn), (pos) - LOG_REGBYTES) & REG_MASK) + +#define REG_PTR(insn, pos, regs) \ + ((ulong *)((ulong)(regs) + REG_OFFSET(insn, pos))) + +#define GET_RM(insn) (((insn) >> 12) & 7) + +#define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs)) +#define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs)) +#define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs)) +#define GET_RS2S(insn, regs) (*REG_PTR(RVC_RS2S(insn), 0, regs)) +#define GET_RS2C(insn, regs) (*REG_PTR(insn, SH_RS2C, regs)) +#define GET_SP(regs) (*REG_PTR(2, 0, regs)) +#define SET_RD(insn, regs, val) (*REG_PTR(insn, SH_RD, regs) = (val)) +#define IMM_I(insn) ((s32)(insn) >> 20) +#define IMM_S(insn) (((s32)(insn) >> 25 << 5) | \ + (s32)(((insn) >> 7) & 0x1f)) +#define MASK_FUNCT3 0x7000 + +static int truly_illegal_insn(struct kvm_vcpu *vcpu, + struct kvm_run *run, + ulong insn) +{ + struct kvm_cpu_trap utrap = { 0 }; + + /* Redirect trap to Guest VCPU */ + utrap.sepc = vcpu->arch.guest_context.sepc; + utrap.scause = EXC_INST_ILLEGAL; + utrap.stval = insn; + kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); + + return 1; +} + +static int system_opcode_insn(struct kvm_vcpu *vcpu, + struct kvm_run *run, + ulong insn) +{ + if ((insn & INSN_MASK_WFI) == INSN_MATCH_WFI) { + vcpu->stat.wfi_exit_stat++; + if (!kvm_arch_vcpu_runnable(vcpu)) { + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx); + kvm_vcpu_block(vcpu); + vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); + } + vcpu->arch.guest_context.sepc += INSN_LEN(insn); + return 1; + } + + return truly_illegal_insn(vcpu, run, insn); +} + +static int virtual_inst_fault(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_cpu_trap *trap) +{ + unsigned long insn = trap->stval; + struct kvm_cpu_trap utrap = { 0 }; + struct kvm_cpu_context *ct; + + if (unlikely(INSN_IS_16BIT(insn))) { + if (insn == 0) { + ct = &vcpu->arch.guest_context; + insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, + ct->sepc, + &utrap); + if (utrap.scause) { + utrap.sepc = ct->sepc; + kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); + return 1; + } + } + if (INSN_IS_16BIT(insn)) + return truly_illegal_insn(vcpu, run, insn); + } + + switch ((insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT) { + case INSN_OPCODE_SYSTEM: + return system_opcode_insn(vcpu, run, insn); + default: + return truly_illegal_insn(vcpu, run, insn); + } +} + +static int emulate_load(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long fault_addr, unsigned long htinst) +{ + u8 data_buf[8]; + unsigned long insn; + int shift = 0, len = 0, insn_len = 0; + struct kvm_cpu_trap utrap = { 0 }; + struct kvm_cpu_context *ct = &vcpu->arch.guest_context; + + /* Determine trapped instruction */ + if (htinst & 0x1) { + /* + * Bit[0] == 1 implies trapped instruction value is + * transformed instruction or custom instruction. + */ + insn = htinst | INSN_16BIT_MASK; + insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2; + } else { + /* + * Bit[0] == 0 implies trapped instruction value is + * zero or special value. + */ + insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc, + &utrap); + if (utrap.scause) { + /* Redirect trap if we failed to read instruction */ + utrap.sepc = ct->sepc; + kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); + return 1; + } + insn_len = INSN_LEN(insn); + } + + /* Decode length of MMIO and shift */ + if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) { + len = 4; + shift = 8 * (sizeof(ulong) - len); + } else if ((insn & INSN_MASK_LB) == INSN_MATCH_LB) { + len = 1; + shift = 8 * (sizeof(ulong) - len); + } else if ((insn & INSN_MASK_LBU) == INSN_MATCH_LBU) { + len = 1; + shift = 8 * (sizeof(ulong) - len); +#ifdef CONFIG_64BIT + } else if ((insn & INSN_MASK_LD) == INSN_MATCH_LD) { + len = 8; + shift = 8 * (sizeof(ulong) - len); + } else if ((insn & INSN_MASK_LWU) == INSN_MATCH_LWU) { + len = 4; +#endif + } else if ((insn & INSN_MASK_LH) == INSN_MATCH_LH) { + len = 2; + shift = 8 * (sizeof(ulong) - len); + } else if ((insn & INSN_MASK_LHU) == INSN_MATCH_LHU) { + len = 2; +#ifdef CONFIG_64BIT + } else if ((insn & INSN_MASK_C_LD) == INSN_MATCH_C_LD) { + len = 8; + shift = 8 * (sizeof(ulong) - len); + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_LDSP) == INSN_MATCH_C_LDSP && + ((insn >> SH_RD) & 0x1f)) { + len = 8; + shift = 8 * (sizeof(ulong) - len); +#endif + } else if ((insn & INSN_MASK_C_LW) == INSN_MATCH_C_LW) { + len = 4; + shift = 8 * (sizeof(ulong) - len); + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_LWSP) == INSN_MATCH_C_LWSP && + ((insn >> SH_RD) & 0x1f)) { + len = 4; + shift = 8 * (sizeof(ulong) - len); + } else { + return -EOPNOTSUPP; + } + + /* Fault address should be aligned to length of MMIO */ + if (fault_addr & (len - 1)) + return -EIO; + + /* Save instruction decode info */ + vcpu->arch.mmio_decode.insn = insn; + vcpu->arch.mmio_decode.insn_len = insn_len; + vcpu->arch.mmio_decode.shift = shift; + vcpu->arch.mmio_decode.len = len; + vcpu->arch.mmio_decode.return_handled = 0; + + /* Update MMIO details in kvm_run struct */ + run->mmio.is_write = false; + run->mmio.phys_addr = fault_addr; + run->mmio.len = len; + + /* Try to handle MMIO access in the kernel */ + if (!kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_addr, len, data_buf)) { + /* Successfully handled MMIO access in the kernel so resume */ + memcpy(run->mmio.data, data_buf, len); + vcpu->stat.mmio_exit_kernel++; + kvm_riscv_vcpu_mmio_return(vcpu, run); + return 1; + } + + /* Exit to userspace for MMIO emulation */ + vcpu->stat.mmio_exit_user++; + run->exit_reason = KVM_EXIT_MMIO; + + return 0; +} + +static int emulate_store(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long fault_addr, unsigned long htinst) +{ + u8 data8; + u16 data16; + u32 data32; + u64 data64; + ulong data; + unsigned long insn; + int len = 0, insn_len = 0; + struct kvm_cpu_trap utrap = { 0 }; + struct kvm_cpu_context *ct = &vcpu->arch.guest_context; + + /* Determine trapped instruction */ + if (htinst & 0x1) { + /* + * Bit[0] == 1 implies trapped instruction value is + * transformed instruction or custom instruction. + */ + insn = htinst | INSN_16BIT_MASK; + insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2; + } else { + /* + * Bit[0] == 0 implies trapped instruction value is + * zero or special value. + */ + insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc, + &utrap); + if (utrap.scause) { + /* Redirect trap if we failed to read instruction */ + utrap.sepc = ct->sepc; + kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); + return 1; + } + insn_len = INSN_LEN(insn); + } + + data = GET_RS2(insn, &vcpu->arch.guest_context); + data8 = data16 = data32 = data64 = data; + + if ((insn & INSN_MASK_SW) == INSN_MATCH_SW) { + len = 4; + } else if ((insn & INSN_MASK_SB) == INSN_MATCH_SB) { + len = 1; +#ifdef CONFIG_64BIT + } else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) { + len = 8; +#endif + } else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) { + len = 2; +#ifdef CONFIG_64BIT + } else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) { + len = 8; + data64 = GET_RS2S(insn, &vcpu->arch.guest_context); + } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP && + ((insn >> SH_RD) & 0x1f)) { + len = 8; + data64 = GET_RS2C(insn, &vcpu->arch.guest_context); +#endif + } else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) { + len = 4; + data32 = GET_RS2S(insn, &vcpu->arch.guest_context); + } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP && + ((insn >> SH_RD) & 0x1f)) { + len = 4; + data32 = GET_RS2C(insn, &vcpu->arch.guest_context); + } else { + return -EOPNOTSUPP; + } + + /* Fault address should be aligned to length of MMIO */ + if (fault_addr & (len - 1)) + return -EIO; + + /* Save instruction decode info */ + vcpu->arch.mmio_decode.insn = insn; + vcpu->arch.mmio_decode.insn_len = insn_len; + vcpu->arch.mmio_decode.shift = 0; + vcpu->arch.mmio_decode.len = len; + vcpu->arch.mmio_decode.return_handled = 0; + + /* Copy data to kvm_run instance */ + switch (len) { + case 1: + *((u8 *)run->mmio.data) = data8; + break; + case 2: + *((u16 *)run->mmio.data) = data16; + break; + case 4: + *((u32 *)run->mmio.data) = data32; + break; + case 8: + *((u64 *)run->mmio.data) = data64; + break; + default: + return -EOPNOTSUPP; + } + + /* Update MMIO details in kvm_run struct */ + run->mmio.is_write = true; + run->mmio.phys_addr = fault_addr; + run->mmio.len = len; + + /* Try to handle MMIO access in the kernel */ + if (!kvm_io_bus_write(vcpu, KVM_MMIO_BUS, + fault_addr, len, run->mmio.data)) { + /* Successfully handled MMIO access in the kernel so resume */ + vcpu->stat.mmio_exit_kernel++; + kvm_riscv_vcpu_mmio_return(vcpu, run); + return 1; + } + + /* Exit to userspace for MMIO emulation */ + vcpu->stat.mmio_exit_user++; + run->exit_reason = KVM_EXIT_MMIO; + + return 0; +} + +static int stage2_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_cpu_trap *trap) +{ + struct kvm_memory_slot *memslot; + unsigned long hva, fault_addr; + bool writeable; + gfn_t gfn; + int ret; + + fault_addr = (trap->htval << 2) | (trap->stval & 0x3); + gfn = fault_addr >> PAGE_SHIFT; + memslot = gfn_to_memslot(vcpu->kvm, gfn); + hva = gfn_to_hva_memslot_prot(memslot, gfn, &writeable); + + if (kvm_is_error_hva(hva) || + (trap->scause == EXC_STORE_GUEST_PAGE_FAULT && !writeable)) { + switch (trap->scause) { + case EXC_LOAD_GUEST_PAGE_FAULT: + return emulate_load(vcpu, run, fault_addr, + trap->htinst); + case EXC_STORE_GUEST_PAGE_FAULT: + return emulate_store(vcpu, run, fault_addr, + trap->htinst); + default: + return -EOPNOTSUPP; + }; + } + + ret = kvm_riscv_stage2_map(vcpu, memslot, fault_addr, hva, + (trap->scause == EXC_STORE_GUEST_PAGE_FAULT) ? true : false); + if (ret < 0) + return ret; + + return 1; +} + +/** + * kvm_riscv_vcpu_unpriv_read -- Read machine word from Guest memory + * + * @vcpu: The VCPU pointer + * @read_insn: Flag representing whether we are reading instruction + * @guest_addr: Guest address to read + * @trap: Output pointer to trap details + */ +unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu, + bool read_insn, + unsigned long guest_addr, + struct kvm_cpu_trap *trap) +{ + register unsigned long taddr asm("a0") = (unsigned long)trap; + register unsigned long ttmp asm("a1"); + register unsigned long val asm("t0"); + register unsigned long tmp asm("t1"); + register unsigned long addr asm("t2") = guest_addr; + unsigned long flags; + unsigned long old_stvec, old_hstatus; + + local_irq_save(flags); + + old_hstatus = csr_swap(CSR_HSTATUS, vcpu->arch.guest_context.hstatus); + old_stvec = csr_swap(CSR_STVEC, (ulong)&__kvm_riscv_unpriv_trap); + + if (read_insn) { + /* + * HLVX.HU instruction + * 0110010 00011 rs1 100 rd 1110011 + */ + asm volatile ("\n" + ".option push\n" + ".option norvc\n" + "add %[ttmp], %[taddr], 0\n" + /* + * HLVX.HU %[val], (%[addr]) + * HLVX.HU t0, (t2) + * 0110010 00011 00111 100 00101 1110011 + */ + ".word 0x6433c2f3\n" + "andi %[tmp], %[val], 3\n" + "addi %[tmp], %[tmp], -3\n" + "bne %[tmp], zero, 2f\n" + "addi %[addr], %[addr], 2\n" + /* + * HLVX.HU %[tmp], (%[addr]) + * HLVX.HU t1, (t2) + * 0110010 00011 00111 100 00110 1110011 + */ + ".word 0x6433c373\n" + "sll %[tmp], %[tmp], 16\n" + "add %[val], %[val], %[tmp]\n" + "2:\n" + ".option pop" + : [val] "=&r" (val), [tmp] "=&r" (tmp), + [taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp), + [addr] "+&r" (addr) : : "memory"); + + if (trap->scause == EXC_LOAD_PAGE_FAULT) + trap->scause = EXC_INST_PAGE_FAULT; + } else { + /* + * HLV.D instruction + * 0110110 00000 rs1 100 rd 1110011 + * + * HLV.W instruction + * 0110100 00000 rs1 100 rd 1110011 + */ + asm volatile ("\n" + ".option push\n" + ".option norvc\n" + "add %[ttmp], %[taddr], 0\n" +#ifdef CONFIG_64BIT + /* + * HLV.D %[val], (%[addr]) + * HLV.D t0, (t2) + * 0110110 00000 00111 100 00101 1110011 + */ + ".word 0x6c03c2f3\n" +#else + /* + * HLV.W %[val], (%[addr]) + * HLV.W t0, (t2) + * 0110100 00000 00111 100 00101 1110011 + */ + ".word 0x6803c2f3\n" +#endif + ".option pop" + : [val] "=&r" (val), + [taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp) + : [addr] "r" (addr) : "memory"); + } + + csr_write(CSR_STVEC, old_stvec); + csr_write(CSR_HSTATUS, old_hstatus); + + local_irq_restore(flags); + + return val; +} + +/** + * kvm_riscv_vcpu_trap_redirect -- Redirect trap to Guest + * + * @vcpu: The VCPU pointer + * @trap: Trap details + */ +void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu, + struct kvm_cpu_trap *trap) +{ + unsigned long vsstatus = csr_read(CSR_VSSTATUS); + + /* Change Guest SSTATUS.SPP bit */ + vsstatus &= ~SR_SPP; + if (vcpu->arch.guest_context.sstatus & SR_SPP) + vsstatus |= SR_SPP; + + /* Change Guest SSTATUS.SPIE bit */ + vsstatus &= ~SR_SPIE; + if (vsstatus & SR_SIE) + vsstatus |= SR_SPIE; + + /* Clear Guest SSTATUS.SIE bit */ + vsstatus &= ~SR_SIE; + + /* Update Guest SSTATUS */ + csr_write(CSR_VSSTATUS, vsstatus); + + /* Update Guest SCAUSE, STVAL, and SEPC */ + csr_write(CSR_VSCAUSE, trap->scause); + csr_write(CSR_VSTVAL, trap->stval); + csr_write(CSR_VSEPC, trap->sepc); + + /* Set Guest PC to Guest exception vector */ + vcpu->arch.guest_context.sepc = csr_read(CSR_VSTVEC); +} + +/** + * kvm_riscv_vcpu_mmio_return -- Handle MMIO loads after user space emulation + * or in-kernel IO emulation + * + * @vcpu: The VCPU pointer + * @run: The VCPU run struct containing the mmio data + */ +int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + u8 data8; + u16 data16; + u32 data32; + u64 data64; + ulong insn; + int len, shift; + + if (vcpu->arch.mmio_decode.return_handled) + return 0; + + vcpu->arch.mmio_decode.return_handled = 1; + insn = vcpu->arch.mmio_decode.insn; + + if (run->mmio.is_write) + goto done; + + len = vcpu->arch.mmio_decode.len; + shift = vcpu->arch.mmio_decode.shift; + + switch (len) { + case 1: + data8 = *((u8 *)run->mmio.data); + SET_RD(insn, &vcpu->arch.guest_context, + (ulong)data8 << shift >> shift); + break; + case 2: + data16 = *((u16 *)run->mmio.data); + SET_RD(insn, &vcpu->arch.guest_context, + (ulong)data16 << shift >> shift); + break; + case 4: + data32 = *((u32 *)run->mmio.data); + SET_RD(insn, &vcpu->arch.guest_context, + (ulong)data32 << shift >> shift); + break; + case 8: + data64 = *((u64 *)run->mmio.data); + SET_RD(insn, &vcpu->arch.guest_context, + (ulong)data64 << shift >> shift); + break; + default: + return -EOPNOTSUPP; + } + +done: + /* Move to next instruction */ + vcpu->arch.guest_context.sepc += vcpu->arch.mmio_decode.insn_len; + + return 0; +} + +/* + * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on + * proper exit to userspace. + */ +int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_cpu_trap *trap) +{ + int ret; + + /* If we got host interrupt then do nothing */ + if (trap->scause & CAUSE_IRQ_FLAG) + return 1; + + /* Handle guest traps */ + ret = -EFAULT; + run->exit_reason = KVM_EXIT_UNKNOWN; + switch (trap->scause) { + case EXC_VIRTUAL_INST_FAULT: + if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) + ret = virtual_inst_fault(vcpu, run, trap); + break; + case EXC_INST_GUEST_PAGE_FAULT: + case EXC_LOAD_GUEST_PAGE_FAULT: + case EXC_STORE_GUEST_PAGE_FAULT: + if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) + ret = stage2_page_fault(vcpu, run, trap); + break; + case EXC_SUPERVISOR_SYSCALL: + if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) + ret = kvm_riscv_vcpu_sbi_ecall(vcpu, run); + break; + default: + break; + } + + /* Print details in-case of error */ + if (ret < 0) { + kvm_err("VCPU exit error %d\n", ret); + kvm_err("SEPC=0x%lx SSTATUS=0x%lx HSTATUS=0x%lx\n", + vcpu->arch.guest_context.sepc, + vcpu->arch.guest_context.sstatus, + vcpu->arch.guest_context.hstatus); + kvm_err("SCAUSE=0x%lx STVAL=0x%lx HTVAL=0x%lx HTINST=0x%lx\n", + trap->scause, trap->stval, trap->htval, trap->htinst); + } + + return ret; +} diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c new file mode 100644 index 000000000000..1b070152578f --- /dev/null +++ b/arch/riscv/kvm/vcpu_fp.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + * + * Authors: + * Atish Patra <atish.patra@wdc.com> + * Anup Patel <anup.patel@wdc.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <linux/uaccess.h> + +#ifdef CONFIG_FPU +void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu) +{ + unsigned long isa = vcpu->arch.isa; + struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; + + cntx->sstatus &= ~SR_FS; + if (riscv_isa_extension_available(&isa, f) || + riscv_isa_extension_available(&isa, d)) + cntx->sstatus |= SR_FS_INITIAL; + else + cntx->sstatus |= SR_FS_OFF; +} + +void kvm_riscv_vcpu_fp_clean(struct kvm_cpu_context *cntx) +{ + cntx->sstatus &= ~SR_FS; + cntx->sstatus |= SR_FS_CLEAN; +} + +void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx, + unsigned long isa) +{ + if ((cntx->sstatus & SR_FS) == SR_FS_DIRTY) { + if (riscv_isa_extension_available(&isa, d)) + __kvm_riscv_fp_d_save(cntx); + else if (riscv_isa_extension_available(&isa, f)) + __kvm_riscv_fp_f_save(cntx); + kvm_riscv_vcpu_fp_clean(cntx); + } +} + +void kvm_riscv_vcpu_guest_fp_restore(struct kvm_cpu_context *cntx, + unsigned long isa) +{ + if ((cntx->sstatus & SR_FS) != SR_FS_OFF) { + if (riscv_isa_extension_available(&isa, d)) + __kvm_riscv_fp_d_restore(cntx); + else if (riscv_isa_extension_available(&isa, f)) + __kvm_riscv_fp_f_restore(cntx); + kvm_riscv_vcpu_fp_clean(cntx); + } +} + +void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx) +{ + /* No need to check host sstatus as it can be modified outside */ + if (riscv_isa_extension_available(NULL, d)) + __kvm_riscv_fp_d_save(cntx); + else if (riscv_isa_extension_available(NULL, f)) + __kvm_riscv_fp_f_save(cntx); +} + +void kvm_riscv_vcpu_host_fp_restore(struct kvm_cpu_context *cntx) +{ + if (riscv_isa_extension_available(NULL, d)) + __kvm_riscv_fp_d_restore(cntx); + else if (riscv_isa_extension_available(NULL, f)) + __kvm_riscv_fp_f_restore(cntx); +} +#endif + +int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, + unsigned long rtype) +{ + struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; + unsigned long isa = vcpu->arch.isa; + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + rtype); + void *reg_val; + + if ((rtype == KVM_REG_RISCV_FP_F) && + riscv_isa_extension_available(&isa, f)) { + if (KVM_REG_SIZE(reg->id) != sizeof(u32)) + return -EINVAL; + if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr)) + reg_val = &cntx->fp.f.fcsr; + else if ((KVM_REG_RISCV_FP_F_REG(f[0]) <= reg_num) && + reg_num <= KVM_REG_RISCV_FP_F_REG(f[31])) + reg_val = &cntx->fp.f.f[reg_num]; + else + return -EINVAL; + } else if ((rtype == KVM_REG_RISCV_FP_D) && + riscv_isa_extension_available(&isa, d)) { + if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) { + if (KVM_REG_SIZE(reg->id) != sizeof(u32)) + return -EINVAL; + reg_val = &cntx->fp.d.fcsr; + } else if ((KVM_REG_RISCV_FP_D_REG(f[0]) <= reg_num) && + reg_num <= KVM_REG_RISCV_FP_D_REG(f[31])) { + if (KVM_REG_SIZE(reg->id) != sizeof(u64)) + return -EINVAL; + reg_val = &cntx->fp.d.f[reg_num]; + } else + return -EINVAL; + } else + return -EINVAL; + + if (copy_to_user(uaddr, reg_val, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + +int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, + unsigned long rtype) +{ + struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; + unsigned long isa = vcpu->arch.isa; + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + rtype); + void *reg_val; + + if ((rtype == KVM_REG_RISCV_FP_F) && + riscv_isa_extension_available(&isa, f)) { + if (KVM_REG_SIZE(reg->id) != sizeof(u32)) + return -EINVAL; + if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr)) + reg_val = &cntx->fp.f.fcsr; + else if ((KVM_REG_RISCV_FP_F_REG(f[0]) <= reg_num) && + reg_num <= KVM_REG_RISCV_FP_F_REG(f[31])) + reg_val = &cntx->fp.f.f[reg_num]; + else + return -EINVAL; + } else if ((rtype == KVM_REG_RISCV_FP_D) && + riscv_isa_extension_available(&isa, d)) { + if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) { + if (KVM_REG_SIZE(reg->id) != sizeof(u32)) + return -EINVAL; + reg_val = &cntx->fp.d.fcsr; + } else if ((KVM_REG_RISCV_FP_D_REG(f[0]) <= reg_num) && + reg_num <= KVM_REG_RISCV_FP_D_REG(f[31])) { + if (KVM_REG_SIZE(reg->id) != sizeof(u64)) + return -EINVAL; + reg_val = &cntx->fp.d.f[reg_num]; + } else + return -EINVAL; + } else + return -EINVAL; + + if (copy_from_user(reg_val, uaddr, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c new file mode 100644 index 000000000000..eb3c045edf11 --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0 +/** + * Copyright (c) 2019 Western Digital Corporation or its affiliates. + * + * Authors: + * Atish Patra <atish.patra@wdc.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <asm/csr.h> +#include <asm/sbi.h> +#include <asm/kvm_vcpu_timer.h> + +#define SBI_VERSION_MAJOR 0 +#define SBI_VERSION_MINOR 1 + +static void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, + struct kvm_run *run) +{ + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + + vcpu->arch.sbi_context.return_handled = 0; + vcpu->stat.ecall_exit_stat++; + run->exit_reason = KVM_EXIT_RISCV_SBI; + run->riscv_sbi.extension_id = cp->a7; + run->riscv_sbi.function_id = cp->a6; + run->riscv_sbi.args[0] = cp->a0; + run->riscv_sbi.args[1] = cp->a1; + run->riscv_sbi.args[2] = cp->a2; + run->riscv_sbi.args[3] = cp->a3; + run->riscv_sbi.args[4] = cp->a4; + run->riscv_sbi.args[5] = cp->a5; + run->riscv_sbi.ret[0] = cp->a0; + run->riscv_sbi.ret[1] = cp->a1; +} + +int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + + /* Handle SBI return only once */ + if (vcpu->arch.sbi_context.return_handled) + return 0; + vcpu->arch.sbi_context.return_handled = 1; + + /* Update return values */ + cp->a0 = run->riscv_sbi.ret[0]; + cp->a1 = run->riscv_sbi.ret[1]; + + /* Move to next instruction */ + vcpu->arch.guest_context.sepc += 4; + + return 0; +} + +#ifdef CONFIG_RISCV_SBI_V01 + +static void kvm_sbi_system_shutdown(struct kvm_vcpu *vcpu, + struct kvm_run *run, u32 type) +{ + int i; + struct kvm_vcpu *tmp; + + kvm_for_each_vcpu(i, tmp, vcpu->kvm) + tmp->arch.power_off = true; + kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); + + memset(&run->system_event, 0, sizeof(run->system_event)); + run->system_event.type = type; + run->exit_reason = KVM_EXIT_SYSTEM_EVENT; +} + +int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + ulong hmask; + int i, ret = 1; + u64 next_cycle; + struct kvm_vcpu *rvcpu; + bool next_sepc = true; + struct cpumask cm, hm; + struct kvm *kvm = vcpu->kvm; + struct kvm_cpu_trap utrap = { 0 }; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + + if (!cp) + return -EINVAL; + + switch (cp->a7) { + case SBI_EXT_0_1_CONSOLE_GETCHAR: + case SBI_EXT_0_1_CONSOLE_PUTCHAR: + /* + * The CONSOLE_GETCHAR/CONSOLE_PUTCHAR SBI calls cannot be + * handled in kernel so we forward these to user-space + */ + kvm_riscv_vcpu_sbi_forward(vcpu, run); + next_sepc = false; + ret = 0; + break; + case SBI_EXT_0_1_SET_TIMER: +#if __riscv_xlen == 32 + next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0; +#else + next_cycle = (u64)cp->a0; +#endif + kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle); + break; + case SBI_EXT_0_1_CLEAR_IPI: + kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_SOFT); + break; + case SBI_EXT_0_1_SEND_IPI: + if (cp->a0) + hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0, + &utrap); + else + hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1; + if (utrap.scause) { + utrap.sepc = cp->sepc; + kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); + next_sepc = false; + break; + } + for_each_set_bit(i, &hmask, BITS_PER_LONG) { + rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i); + kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_VS_SOFT); + } + break; + case SBI_EXT_0_1_SHUTDOWN: + kvm_sbi_system_shutdown(vcpu, run, KVM_SYSTEM_EVENT_SHUTDOWN); + next_sepc = false; + ret = 0; + break; + case SBI_EXT_0_1_REMOTE_FENCE_I: + case SBI_EXT_0_1_REMOTE_SFENCE_VMA: + case SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID: + if (cp->a0) + hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0, + &utrap); + else + hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1; + if (utrap.scause) { + utrap.sepc = cp->sepc; + kvm_riscv_vcpu_trap_redirect(vcpu, &utrap); + next_sepc = false; + break; + } + cpumask_clear(&cm); + for_each_set_bit(i, &hmask, BITS_PER_LONG) { + rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i); + if (rvcpu->cpu < 0) + continue; + cpumask_set_cpu(rvcpu->cpu, &cm); + } + riscv_cpuid_to_hartid_mask(&cm, &hm); + if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I) + sbi_remote_fence_i(cpumask_bits(&hm)); + else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA) + sbi_remote_hfence_vvma(cpumask_bits(&hm), + cp->a1, cp->a2); + else + sbi_remote_hfence_vvma_asid(cpumask_bits(&hm), + cp->a1, cp->a2, cp->a3); + break; + default: + /* Return error for unsupported SBI calls */ + cp->a0 = SBI_ERR_NOT_SUPPORTED; + break; + } + + if (next_sepc) + cp->sepc += 4; + + return ret; +} + +#else + +int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + kvm_riscv_vcpu_sbi_forward(vcpu, run); + return 0; +} + +#endif diff --git a/arch/riscv/kvm/vcpu_switch.S b/arch/riscv/kvm/vcpu_switch.S new file mode 100644 index 000000000000..029a28a195c6 --- /dev/null +++ b/arch/riscv/kvm/vcpu_switch.S @@ -0,0 +1,400 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * + * Authors: + * Anup Patel <anup.patel@wdc.com> + */ + +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm/asm-offsets.h> +#include <asm/csr.h> + + .text + .altmacro + .option norelax + +ENTRY(__kvm_riscv_switch_to) + /* Save Host GPRs (except A0 and T0-T6) */ + REG_S ra, (KVM_ARCH_HOST_RA)(a0) + REG_S sp, (KVM_ARCH_HOST_SP)(a0) + REG_S gp, (KVM_ARCH_HOST_GP)(a0) + REG_S tp, (KVM_ARCH_HOST_TP)(a0) + REG_S s0, (KVM_ARCH_HOST_S0)(a0) + REG_S s1, (KVM_ARCH_HOST_S1)(a0) + REG_S a1, (KVM_ARCH_HOST_A1)(a0) + REG_S a2, (KVM_ARCH_HOST_A2)(a0) + REG_S a3, (KVM_ARCH_HOST_A3)(a0) + REG_S a4, (KVM_ARCH_HOST_A4)(a0) + REG_S a5, (KVM_ARCH_HOST_A5)(a0) + REG_S a6, (KVM_ARCH_HOST_A6)(a0) + REG_S a7, (KVM_ARCH_HOST_A7)(a0) + REG_S s2, (KVM_ARCH_HOST_S2)(a0) + REG_S s3, (KVM_ARCH_HOST_S3)(a0) + REG_S s4, (KVM_ARCH_HOST_S4)(a0) + REG_S s5, (KVM_ARCH_HOST_S5)(a0) + REG_S s6, (KVM_ARCH_HOST_S6)(a0) + REG_S s7, (KVM_ARCH_HOST_S7)(a0) + REG_S s8, (KVM_ARCH_HOST_S8)(a0) + REG_S s9, (KVM_ARCH_HOST_S9)(a0) + REG_S s10, (KVM_ARCH_HOST_S10)(a0) + REG_S s11, (KVM_ARCH_HOST_S11)(a0) + + /* Save Host and Restore Guest SSTATUS */ + REG_L t0, (KVM_ARCH_GUEST_SSTATUS)(a0) + csrrw t0, CSR_SSTATUS, t0 + REG_S t0, (KVM_ARCH_HOST_SSTATUS)(a0) + + /* Save Host and Restore Guest HSTATUS */ + REG_L t1, (KVM_ARCH_GUEST_HSTATUS)(a0) + csrrw t1, CSR_HSTATUS, t1 + REG_S t1, (KVM_ARCH_HOST_HSTATUS)(a0) + + /* Save Host and Restore Guest SCOUNTEREN */ + REG_L t2, (KVM_ARCH_GUEST_SCOUNTEREN)(a0) + csrrw t2, CSR_SCOUNTEREN, t2 + REG_S t2, (KVM_ARCH_HOST_SCOUNTEREN)(a0) + + /* Save Host SSCRATCH and change it to struct kvm_vcpu_arch pointer */ + csrrw t3, CSR_SSCRATCH, a0 + REG_S t3, (KVM_ARCH_HOST_SSCRATCH)(a0) + + /* Save Host STVEC and change it to return path */ + la t4, __kvm_switch_return + csrrw t4, CSR_STVEC, t4 + REG_S t4, (KVM_ARCH_HOST_STVEC)(a0) + + /* Restore Guest SEPC */ + REG_L t0, (KVM_ARCH_GUEST_SEPC)(a0) + csrw CSR_SEPC, t0 + + /* Restore Guest GPRs (except A0) */ + REG_L ra, (KVM_ARCH_GUEST_RA)(a0) + REG_L sp, (KVM_ARCH_GUEST_SP)(a0) + REG_L gp, (KVM_ARCH_GUEST_GP)(a0) + REG_L tp, (KVM_ARCH_GUEST_TP)(a0) + REG_L t0, (KVM_ARCH_GUEST_T0)(a0) + REG_L t1, (KVM_ARCH_GUEST_T1)(a0) + REG_L t2, (KVM_ARCH_GUEST_T2)(a0) + REG_L s0, (KVM_ARCH_GUEST_S0)(a0) + REG_L s1, (KVM_ARCH_GUEST_S1)(a0) + REG_L a1, (KVM_ARCH_GUEST_A1)(a0) + REG_L a2, (KVM_ARCH_GUEST_A2)(a0) + REG_L a3, (KVM_ARCH_GUEST_A3)(a0) + REG_L a4, (KVM_ARCH_GUEST_A4)(a0) + REG_L a5, (KVM_ARCH_GUEST_A5)(a0) + REG_L a6, (KVM_ARCH_GUEST_A6)(a0) + REG_L a7, (KVM_ARCH_GUEST_A7)(a0) + REG_L s2, (KVM_ARCH_GUEST_S2)(a0) + REG_L s3, (KVM_ARCH_GUEST_S3)(a0) + REG_L s4, (KVM_ARCH_GUEST_S4)(a0) + REG_L s5, (KVM_ARCH_GUEST_S5)(a0) + REG_L s6, (KVM_ARCH_GUEST_S6)(a0) + REG_L s7, (KVM_ARCH_GUEST_S7)(a0) + REG_L s8, (KVM_ARCH_GUEST_S8)(a0) + REG_L s9, (KVM_ARCH_GUEST_S9)(a0) + REG_L s10, (KVM_ARCH_GUEST_S10)(a0) + REG_L s11, (KVM_ARCH_GUEST_S11)(a0) + REG_L t3, (KVM_ARCH_GUEST_T3)(a0) + REG_L t4, (KVM_ARCH_GUEST_T4)(a0) + REG_L t5, (KVM_ARCH_GUEST_T5)(a0) + REG_L t6, (KVM_ARCH_GUEST_T6)(a0) + + /* Restore Guest A0 */ + REG_L a0, (KVM_ARCH_GUEST_A0)(a0) + + /* Resume Guest */ + sret + + /* Back to Host */ + .align 2 +__kvm_switch_return: + /* Swap Guest A0 with SSCRATCH */ + csrrw a0, CSR_SSCRATCH, a0 + + /* Save Guest GPRs (except A0) */ + REG_S ra, (KVM_ARCH_GUEST_RA)(a0) + REG_S sp, (KVM_ARCH_GUEST_SP)(a0) + REG_S gp, (KVM_ARCH_GUEST_GP)(a0) + REG_S tp, (KVM_ARCH_GUEST_TP)(a0) + REG_S t0, (KVM_ARCH_GUEST_T0)(a0) + REG_S t1, (KVM_ARCH_GUEST_T1)(a0) + REG_S t2, (KVM_ARCH_GUEST_T2)(a0) + REG_S s0, (KVM_ARCH_GUEST_S0)(a0) + REG_S s1, (KVM_ARCH_GUEST_S1)(a0) + REG_S a1, (KVM_ARCH_GUEST_A1)(a0) + REG_S a2, (KVM_ARCH_GUEST_A2)(a0) + REG_S a3, (KVM_ARCH_GUEST_A3)(a0) + REG_S a4, (KVM_ARCH_GUEST_A4)(a0) + REG_S a5, (KVM_ARCH_GUEST_A5)(a0) + REG_S a6, (KVM_ARCH_GUEST_A6)(a0) + REG_S a7, (KVM_ARCH_GUEST_A7)(a0) + REG_S s2, (KVM_ARCH_GUEST_S2)(a0) + REG_S s3, (KVM_ARCH_GUEST_S3)(a0) + REG_S s4, (KVM_ARCH_GUEST_S4)(a0) + REG_S s5, (KVM_ARCH_GUEST_S5)(a0) + REG_S s6, (KVM_ARCH_GUEST_S6)(a0) + REG_S s7, (KVM_ARCH_GUEST_S7)(a0) + REG_S s8, (KVM_ARCH_GUEST_S8)(a0) + REG_S s9, (KVM_ARCH_GUEST_S9)(a0) + REG_S s10, (KVM_ARCH_GUEST_S10)(a0) + REG_S s11, (KVM_ARCH_GUEST_S11)(a0) + REG_S t3, (KVM_ARCH_GUEST_T3)(a0) + REG_S t4, (KVM_ARCH_GUEST_T4)(a0) + REG_S t5, (KVM_ARCH_GUEST_T5)(a0) + REG_S t6, (KVM_ARCH_GUEST_T6)(a0) + + /* Save Guest SEPC */ + csrr t0, CSR_SEPC + REG_S t0, (KVM_ARCH_GUEST_SEPC)(a0) + + /* Restore Host STVEC */ + REG_L t1, (KVM_ARCH_HOST_STVEC)(a0) + csrw CSR_STVEC, t1 + + /* Save Guest A0 and Restore Host SSCRATCH */ + REG_L t2, (KVM_ARCH_HOST_SSCRATCH)(a0) + csrrw t2, CSR_SSCRATCH, t2 + REG_S t2, (KVM_ARCH_GUEST_A0)(a0) + + /* Save Guest and Restore Host SCOUNTEREN */ + REG_L t3, (KVM_ARCH_HOST_SCOUNTEREN)(a0) + csrrw t3, CSR_SCOUNTEREN, t3 + REG_S t3, (KVM_ARCH_GUEST_SCOUNTEREN)(a0) + + /* Save Guest and Restore Host HSTATUS */ + REG_L t4, (KVM_ARCH_HOST_HSTATUS)(a0) + csrrw t4, CSR_HSTATUS, t4 + REG_S t4, (KVM_ARCH_GUEST_HSTATUS)(a0) + + /* Save Guest and Restore Host SSTATUS */ + REG_L t5, (KVM_ARCH_HOST_SSTATUS)(a0) + csrrw t5, CSR_SSTATUS, t5 + REG_S t5, (KVM_ARCH_GUEST_SSTATUS)(a0) + + /* Restore Host GPRs (except A0 and T0-T6) */ + REG_L ra, (KVM_ARCH_HOST_RA)(a0) + REG_L sp, (KVM_ARCH_HOST_SP)(a0) + REG_L gp, (KVM_ARCH_HOST_GP)(a0) + REG_L tp, (KVM_ARCH_HOST_TP)(a0) + REG_L s0, (KVM_ARCH_HOST_S0)(a0) + REG_L s1, (KVM_ARCH_HOST_S1)(a0) + REG_L a1, (KVM_ARCH_HOST_A1)(a0) + REG_L a2, (KVM_ARCH_HOST_A2)(a0) + REG_L a3, (KVM_ARCH_HOST_A3)(a0) + REG_L a4, (KVM_ARCH_HOST_A4)(a0) + REG_L a5, (KVM_ARCH_HOST_A5)(a0) + REG_L a6, (KVM_ARCH_HOST_A6)(a0) + REG_L a7, (KVM_ARCH_HOST_A7)(a0) + REG_L s2, (KVM_ARCH_HOST_S2)(a0) + REG_L s3, (KVM_ARCH_HOST_S3)(a0) + REG_L s4, (KVM_ARCH_HOST_S4)(a0) + REG_L s5, (KVM_ARCH_HOST_S5)(a0) + REG_L s6, (KVM_ARCH_HOST_S6)(a0) + REG_L s7, (KVM_ARCH_HOST_S7)(a0) + REG_L s8, (KVM_ARCH_HOST_S8)(a0) + REG_L s9, (KVM_ARCH_HOST_S9)(a0) + REG_L s10, (KVM_ARCH_HOST_S10)(a0) + REG_L s11, (KVM_ARCH_HOST_S11)(a0) + + /* Return to C code */ + ret +ENDPROC(__kvm_riscv_switch_to) + +ENTRY(__kvm_riscv_unpriv_trap) + /* + * We assume that faulting unpriv load/store instruction is + * 4-byte long and blindly increment SEPC by 4. + * + * The trap details will be saved at address pointed by 'A0' + * register and we use 'A1' register as temporary. + */ + csrr a1, CSR_SEPC + REG_S a1, (KVM_ARCH_TRAP_SEPC)(a0) + addi a1, a1, 4 + csrw CSR_SEPC, a1 + csrr a1, CSR_SCAUSE + REG_S a1, (KVM_ARCH_TRAP_SCAUSE)(a0) + csrr a1, CSR_STVAL + REG_S a1, (KVM_ARCH_TRAP_STVAL)(a0) + csrr a1, CSR_HTVAL + REG_S a1, (KVM_ARCH_TRAP_HTVAL)(a0) + csrr a1, CSR_HTINST + REG_S a1, (KVM_ARCH_TRAP_HTINST)(a0) + sret +ENDPROC(__kvm_riscv_unpriv_trap) + +#ifdef CONFIG_FPU + .align 3 + .global __kvm_riscv_fp_f_save +__kvm_riscv_fp_f_save: + csrr t2, CSR_SSTATUS + li t1, SR_FS + csrs CSR_SSTATUS, t1 + frcsr t0 + fsw f0, KVM_ARCH_FP_F_F0(a0) + fsw f1, KVM_ARCH_FP_F_F1(a0) + fsw f2, KVM_ARCH_FP_F_F2(a0) + fsw f3, KVM_ARCH_FP_F_F3(a0) + fsw f4, KVM_ARCH_FP_F_F4(a0) + fsw f5, KVM_ARCH_FP_F_F5(a0) + fsw f6, KVM_ARCH_FP_F_F6(a0) + fsw f7, KVM_ARCH_FP_F_F7(a0) + fsw f8, KVM_ARCH_FP_F_F8(a0) + fsw f9, KVM_ARCH_FP_F_F9(a0) + fsw f10, KVM_ARCH_FP_F_F10(a0) + fsw f11, KVM_ARCH_FP_F_F11(a0) + fsw f12, KVM_ARCH_FP_F_F12(a0) + fsw f13, KVM_ARCH_FP_F_F13(a0) + fsw f14, KVM_ARCH_FP_F_F14(a0) + fsw f15, KVM_ARCH_FP_F_F15(a0) + fsw f16, KVM_ARCH_FP_F_F16(a0) + fsw f17, KVM_ARCH_FP_F_F17(a0) + fsw f18, KVM_ARCH_FP_F_F18(a0) + fsw f19, KVM_ARCH_FP_F_F19(a0) + fsw f20, KVM_ARCH_FP_F_F20(a0) + fsw f21, KVM_ARCH_FP_F_F21(a0) + fsw f22, KVM_ARCH_FP_F_F22(a0) + fsw f23, KVM_ARCH_FP_F_F23(a0) + fsw f24, KVM_ARCH_FP_F_F24(a0) + fsw f25, KVM_ARCH_FP_F_F25(a0) + fsw f26, KVM_ARCH_FP_F_F26(a0) + fsw f27, KVM_ARCH_FP_F_F27(a0) + fsw f28, KVM_ARCH_FP_F_F28(a0) + fsw f29, KVM_ARCH_FP_F_F29(a0) + fsw f30, KVM_ARCH_FP_F_F30(a0) + fsw f31, KVM_ARCH_FP_F_F31(a0) + sw t0, KVM_ARCH_FP_F_FCSR(a0) + csrw CSR_SSTATUS, t2 + ret + + .align 3 + .global __kvm_riscv_fp_d_save +__kvm_riscv_fp_d_save: + csrr t2, CSR_SSTATUS + li t1, SR_FS + csrs CSR_SSTATUS, t1 + frcsr t0 + fsd f0, KVM_ARCH_FP_D_F0(a0) + fsd f1, KVM_ARCH_FP_D_F1(a0) + fsd f2, KVM_ARCH_FP_D_F2(a0) + fsd f3, KVM_ARCH_FP_D_F3(a0) + fsd f4, KVM_ARCH_FP_D_F4(a0) + fsd f5, KVM_ARCH_FP_D_F5(a0) + fsd f6, KVM_ARCH_FP_D_F6(a0) + fsd f7, KVM_ARCH_FP_D_F7(a0) + fsd f8, KVM_ARCH_FP_D_F8(a0) + fsd f9, KVM_ARCH_FP_D_F9(a0) + fsd f10, KVM_ARCH_FP_D_F10(a0) + fsd f11, KVM_ARCH_FP_D_F11(a0) + fsd f12, KVM_ARCH_FP_D_F12(a0) + fsd f13, KVM_ARCH_FP_D_F13(a0) + fsd f14, KVM_ARCH_FP_D_F14(a0) + fsd f15, KVM_ARCH_FP_D_F15(a0) + fsd f16, KVM_ARCH_FP_D_F16(a0) + fsd f17, KVM_ARCH_FP_D_F17(a0) + fsd f18, KVM_ARCH_FP_D_F18(a0) + fsd f19, KVM_ARCH_FP_D_F19(a0) + fsd f20, KVM_ARCH_FP_D_F20(a0) + fsd f21, KVM_ARCH_FP_D_F21(a0) + fsd f22, KVM_ARCH_FP_D_F22(a0) + fsd f23, KVM_ARCH_FP_D_F23(a0) + fsd f24, KVM_ARCH_FP_D_F24(a0) + fsd f25, KVM_ARCH_FP_D_F25(a0) + fsd f26, KVM_ARCH_FP_D_F26(a0) + fsd f27, KVM_ARCH_FP_D_F27(a0) + fsd f28, KVM_ARCH_FP_D_F28(a0) + fsd f29, KVM_ARCH_FP_D_F29(a0) + fsd f30, KVM_ARCH_FP_D_F30(a0) + fsd f31, KVM_ARCH_FP_D_F31(a0) + sw t0, KVM_ARCH_FP_D_FCSR(a0) + csrw CSR_SSTATUS, t2 + ret + + .align 3 + .global __kvm_riscv_fp_f_restore +__kvm_riscv_fp_f_restore: + csrr t2, CSR_SSTATUS + li t1, SR_FS + lw t0, KVM_ARCH_FP_F_FCSR(a0) + csrs CSR_SSTATUS, t1 + flw f0, KVM_ARCH_FP_F_F0(a0) + flw f1, KVM_ARCH_FP_F_F1(a0) + flw f2, KVM_ARCH_FP_F_F2(a0) + flw f3, KVM_ARCH_FP_F_F3(a0) + flw f4, KVM_ARCH_FP_F_F4(a0) + flw f5, KVM_ARCH_FP_F_F5(a0) + flw f6, KVM_ARCH_FP_F_F6(a0) + flw f7, KVM_ARCH_FP_F_F7(a0) + flw f8, KVM_ARCH_FP_F_F8(a0) + flw f9, KVM_ARCH_FP_F_F9(a0) + flw f10, KVM_ARCH_FP_F_F10(a0) + flw f11, KVM_ARCH_FP_F_F11(a0) + flw f12, KVM_ARCH_FP_F_F12(a0) + flw f13, KVM_ARCH_FP_F_F13(a0) + flw f14, KVM_ARCH_FP_F_F14(a0) + flw f15, KVM_ARCH_FP_F_F15(a0) + flw f16, KVM_ARCH_FP_F_F16(a0) + flw f17, KVM_ARCH_FP_F_F17(a0) + flw f18, KVM_ARCH_FP_F_F18(a0) + flw f19, KVM_ARCH_FP_F_F19(a0) + flw f20, KVM_ARCH_FP_F_F20(a0) + flw f21, KVM_ARCH_FP_F_F21(a0) + flw f22, KVM_ARCH_FP_F_F22(a0) + flw f23, KVM_ARCH_FP_F_F23(a0) + flw f24, KVM_ARCH_FP_F_F24(a0) + flw f25, KVM_ARCH_FP_F_F25(a0) + flw f26, KVM_ARCH_FP_F_F26(a0) + flw f27, KVM_ARCH_FP_F_F27(a0) + flw f28, KVM_ARCH_FP_F_F28(a0) + flw f29, KVM_ARCH_FP_F_F29(a0) + flw f30, KVM_ARCH_FP_F_F30(a0) + flw f31, KVM_ARCH_FP_F_F31(a0) + fscsr t0 + csrw CSR_SSTATUS, t2 + ret + + .align 3 + .global __kvm_riscv_fp_d_restore +__kvm_riscv_fp_d_restore: + csrr t2, CSR_SSTATUS + li t1, SR_FS + lw t0, KVM_ARCH_FP_D_FCSR(a0) + csrs CSR_SSTATUS, t1 + fld f0, KVM_ARCH_FP_D_F0(a0) + fld f1, KVM_ARCH_FP_D_F1(a0) + fld f2, KVM_ARCH_FP_D_F2(a0) + fld f3, KVM_ARCH_FP_D_F3(a0) + fld f4, KVM_ARCH_FP_D_F4(a0) + fld f5, KVM_ARCH_FP_D_F5(a0) + fld f6, KVM_ARCH_FP_D_F6(a0) + fld f7, KVM_ARCH_FP_D_F7(a0) + fld f8, KVM_ARCH_FP_D_F8(a0) + fld f9, KVM_ARCH_FP_D_F9(a0) + fld f10, KVM_ARCH_FP_D_F10(a0) + fld f11, KVM_ARCH_FP_D_F11(a0) + fld f12, KVM_ARCH_FP_D_F12(a0) + fld f13, KVM_ARCH_FP_D_F13(a0) + fld f14, KVM_ARCH_FP_D_F14(a0) + fld f15, KVM_ARCH_FP_D_F15(a0) + fld f16, KVM_ARCH_FP_D_F16(a0) + fld f17, KVM_ARCH_FP_D_F17(a0) + fld f18, KVM_ARCH_FP_D_F18(a0) + fld f19, KVM_ARCH_FP_D_F19(a0) + fld f20, KVM_ARCH_FP_D_F20(a0) + fld f21, KVM_ARCH_FP_D_F21(a0) + fld f22, KVM_ARCH_FP_D_F22(a0) + fld f23, KVM_ARCH_FP_D_F23(a0) + fld f24, KVM_ARCH_FP_D_F24(a0) + fld f25, KVM_ARCH_FP_D_F25(a0) + fld f26, KVM_ARCH_FP_D_F26(a0) + fld f27, KVM_ARCH_FP_D_F27(a0) + fld f28, KVM_ARCH_FP_D_F28(a0) + fld f29, KVM_ARCH_FP_D_F29(a0) + fld f30, KVM_ARCH_FP_D_F30(a0) + fld f31, KVM_ARCH_FP_D_F31(a0) + fscsr t0 + csrw CSR_SSTATUS, t2 + ret +#endif diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c new file mode 100644 index 000000000000..5c4c37ff2d48 --- /dev/null +++ b/arch/riscv/kvm/vcpu_timer.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * + * Authors: + * Atish Patra <atish.patra@wdc.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <linux/uaccess.h> +#include <clocksource/timer-riscv.h> +#include <asm/csr.h> +#include <asm/delay.h> +#include <asm/kvm_vcpu_timer.h> + +static u64 kvm_riscv_current_cycles(struct kvm_guest_timer *gt) +{ + return get_cycles64() + gt->time_delta; +} + +static u64 kvm_riscv_delta_cycles2ns(u64 cycles, + struct kvm_guest_timer *gt, + struct kvm_vcpu_timer *t) +{ + unsigned long flags; + u64 cycles_now, cycles_delta, delta_ns; + + local_irq_save(flags); + cycles_now = kvm_riscv_current_cycles(gt); + if (cycles_now < cycles) + cycles_delta = cycles - cycles_now; + else + cycles_delta = 0; + delta_ns = (cycles_delta * gt->nsec_mult) >> gt->nsec_shift; + local_irq_restore(flags); + + return delta_ns; +} + +static enum hrtimer_restart kvm_riscv_vcpu_hrtimer_expired(struct hrtimer *h) +{ + u64 delta_ns; + struct kvm_vcpu_timer *t = container_of(h, struct kvm_vcpu_timer, hrt); + struct kvm_vcpu *vcpu = container_of(t, struct kvm_vcpu, arch.timer); + struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer; + + if (kvm_riscv_current_cycles(gt) < t->next_cycles) { + delta_ns = kvm_riscv_delta_cycles2ns(t->next_cycles, gt, t); + hrtimer_forward_now(&t->hrt, ktime_set(0, delta_ns)); + return HRTIMER_RESTART; + } + + t->next_set = false; + kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_TIMER); + + return HRTIMER_NORESTART; +} + +static int kvm_riscv_vcpu_timer_cancel(struct kvm_vcpu_timer *t) +{ + if (!t->init_done || !t->next_set) + return -EINVAL; + + hrtimer_cancel(&t->hrt); + t->next_set = false; + + return 0; +} + +int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles) +{ + struct kvm_vcpu_timer *t = &vcpu->arch.timer; + struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer; + u64 delta_ns; + + if (!t->init_done) + return -EINVAL; + + kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_TIMER); + + delta_ns = kvm_riscv_delta_cycles2ns(ncycles, gt, t); + t->next_cycles = ncycles; + hrtimer_start(&t->hrt, ktime_set(0, delta_ns), HRTIMER_MODE_REL); + t->next_set = true; + + return 0; +} + +int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + struct kvm_vcpu_timer *t = &vcpu->arch.timer; + struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer; + u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_TIMER); + u64 reg_val; + + if (KVM_REG_SIZE(reg->id) != sizeof(u64)) + return -EINVAL; + if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64)) + return -EINVAL; + + switch (reg_num) { + case KVM_REG_RISCV_TIMER_REG(frequency): + reg_val = riscv_timebase; + break; + case KVM_REG_RISCV_TIMER_REG(time): + reg_val = kvm_riscv_current_cycles(gt); + break; + case KVM_REG_RISCV_TIMER_REG(compare): + reg_val = t->next_cycles; + break; + case KVM_REG_RISCV_TIMER_REG(state): + reg_val = (t->next_set) ? KVM_RISCV_TIMER_STATE_ON : + KVM_RISCV_TIMER_STATE_OFF; + break; + default: + return -EINVAL; + } + + if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + +int kvm_riscv_vcpu_set_reg_timer(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + struct kvm_vcpu_timer *t = &vcpu->arch.timer; + struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer; + u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_TIMER); + u64 reg_val; + int ret = 0; + + if (KVM_REG_SIZE(reg->id) != sizeof(u64)) + return -EINVAL; + if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64)) + return -EINVAL; + + if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + switch (reg_num) { + case KVM_REG_RISCV_TIMER_REG(frequency): + ret = -EOPNOTSUPP; + break; + case KVM_REG_RISCV_TIMER_REG(time): + gt->time_delta = reg_val - get_cycles64(); + break; + case KVM_REG_RISCV_TIMER_REG(compare): + t->next_cycles = reg_val; + break; + case KVM_REG_RISCV_TIMER_REG(state): + if (reg_val == KVM_RISCV_TIMER_STATE_ON) + ret = kvm_riscv_vcpu_timer_next_event(vcpu, reg_val); + else + ret = kvm_riscv_vcpu_timer_cancel(t); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_timer *t = &vcpu->arch.timer; + + if (t->init_done) + return -EINVAL; + + hrtimer_init(&t->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + t->hrt.function = kvm_riscv_vcpu_hrtimer_expired; + t->init_done = true; + t->next_set = false; + + return 0; +} + +int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu) +{ + int ret; + + ret = kvm_riscv_vcpu_timer_cancel(&vcpu->arch.timer); + vcpu->arch.timer.init_done = false; + + return ret; +} + +int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu) +{ + return kvm_riscv_vcpu_timer_cancel(&vcpu->arch.timer); +} + +void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu) +{ + struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer; + +#ifdef CONFIG_64BIT + csr_write(CSR_HTIMEDELTA, gt->time_delta); +#else + csr_write(CSR_HTIMEDELTA, (u32)(gt->time_delta)); + csr_write(CSR_HTIMEDELTAH, (u32)(gt->time_delta >> 32)); +#endif +} + +int kvm_riscv_guest_timer_init(struct kvm *kvm) +{ + struct kvm_guest_timer *gt = &kvm->arch.timer; + + riscv_cs_get_mult_shift(>->nsec_mult, >->nsec_shift); + gt->time_delta = -get_cycles64(); + + return 0; +} diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c new file mode 100644 index 000000000000..26399df15b63 --- /dev/null +++ b/arch/riscv/kvm/vm.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * + * Authors: + * Anup Patel <anup.patel@wdc.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/kvm_host.h> + +const struct _kvm_stats_desc kvm_vm_stats_desc[] = { + KVM_GENERIC_VM_STATS() +}; +static_assert(ARRAY_SIZE(kvm_vm_stats_desc) == + sizeof(struct kvm_vm_stat) / sizeof(u64)); + +const struct kvm_stats_header kvm_vm_stats_header = { + .name_size = KVM_STATS_NAME_SIZE, + .num_desc = ARRAY_SIZE(kvm_vm_stats_desc), + .id_offset = sizeof(struct kvm_stats_header), + .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, + .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + + sizeof(kvm_vm_stats_desc), +}; + +int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) +{ + int r; + + r = kvm_riscv_stage2_alloc_pgd(kvm); + if (r) + return r; + + r = kvm_riscv_stage2_vmid_init(kvm); + if (r) { + kvm_riscv_stage2_free_pgd(kvm); + return r; + } + + return kvm_riscv_guest_timer_init(kvm); +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + int i; + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (kvm->vcpus[i]) { + kvm_vcpu_destroy(kvm->vcpus[i]); + kvm->vcpus[i] = NULL; + } + } + atomic_set(&kvm->online_vcpus, 0); +} + +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) +{ + int r; + + switch (ext) { + case KVM_CAP_IOEVENTFD: + case KVM_CAP_DEVICE_CTRL: + case KVM_CAP_USER_MEMORY: + case KVM_CAP_SYNC_MMU: + case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: + case KVM_CAP_ONE_REG: + case KVM_CAP_READONLY_MEM: + case KVM_CAP_MP_STATE: + case KVM_CAP_IMMEDIATE_EXIT: + r = 1; + break; + case KVM_CAP_NR_VCPUS: + r = num_online_cpus(); + break; + case KVM_CAP_MAX_VCPUS: + r = KVM_MAX_VCPUS; + break; + case KVM_CAP_NR_MEMSLOTS: + r = KVM_USER_MEM_SLOTS; + break; + default: + r = 0; + break; + } + + return r; +} + +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c new file mode 100644 index 000000000000..2c6253b293bc --- /dev/null +++ b/arch/riscv/kvm/vmid.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * + * Authors: + * Anup Patel <anup.patel@wdc.com> + */ + +#include <linux/bitops.h> +#include <linux/cpumask.h> +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/kvm_host.h> +#include <asm/csr.h> +#include <asm/sbi.h> + +static unsigned long vmid_version = 1; +static unsigned long vmid_next; +static unsigned long vmid_bits; +static DEFINE_SPINLOCK(vmid_lock); + +void kvm_riscv_stage2_vmid_detect(void) +{ + unsigned long old; + + /* Figure-out number of VMID bits in HW */ + old = csr_read(CSR_HGATP); + csr_write(CSR_HGATP, old | HGATP_VMID_MASK); + vmid_bits = csr_read(CSR_HGATP); + vmid_bits = (vmid_bits & HGATP_VMID_MASK) >> HGATP_VMID_SHIFT; + vmid_bits = fls_long(vmid_bits); + csr_write(CSR_HGATP, old); + + /* We polluted local TLB so flush all guest TLB */ + __kvm_riscv_hfence_gvma_all(); + + /* We don't use VMID bits if they are not sufficient */ + if ((1UL << vmid_bits) < num_possible_cpus()) + vmid_bits = 0; +} + +unsigned long kvm_riscv_stage2_vmid_bits(void) +{ + return vmid_bits; +} + +int kvm_riscv_stage2_vmid_init(struct kvm *kvm) +{ + /* Mark the initial VMID and VMID version invalid */ + kvm->arch.vmid.vmid_version = 0; + kvm->arch.vmid.vmid = 0; + + return 0; +} + +bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid) +{ + if (!vmid_bits) + return false; + + return unlikely(READ_ONCE(vmid->vmid_version) != + READ_ONCE(vmid_version)); +} + +void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu) +{ + int i; + struct kvm_vcpu *v; + struct cpumask hmask; + struct kvm_vmid *vmid = &vcpu->kvm->arch.vmid; + + if (!kvm_riscv_stage2_vmid_ver_changed(vmid)) + return; + + spin_lock(&vmid_lock); + + /* + * We need to re-check the vmid_version here to ensure that if + * another vcpu already allocated a valid vmid for this vm. + */ + if (!kvm_riscv_stage2_vmid_ver_changed(vmid)) { + spin_unlock(&vmid_lock); + return; + } + + /* First user of a new VMID version? */ + if (unlikely(vmid_next == 0)) { + WRITE_ONCE(vmid_version, READ_ONCE(vmid_version) + 1); + vmid_next = 1; + + /* + * We ran out of VMIDs so we increment vmid_version and + * start assigning VMIDs from 1. + * + * This also means existing VMIDs assignement to all Guest + * instances is invalid and we have force VMID re-assignement + * for all Guest instances. The Guest instances that were not + * running will automatically pick-up new VMIDs because will + * call kvm_riscv_stage2_vmid_update() whenever they enter + * in-kernel run loop. For Guest instances that are already + * running, we force VM exits on all host CPUs using IPI and + * flush all Guest TLBs. + */ + riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask); + sbi_remote_hfence_gvma(cpumask_bits(&hmask), 0, 0); + } + + vmid->vmid = vmid_next; + vmid_next++; + vmid_next &= (1 << vmid_bits) - 1; + + WRITE_ONCE(vmid->vmid_version, READ_ONCE(vmid_version)); + + spin_unlock(&vmid_lock); + + /* Request stage2 page table update for all VCPUs */ + kvm_for_each_vcpu(i, v, vcpu->kvm) + kvm_make_request(KVM_REQ_UPDATE_HGATP, v); +} |