summaryrefslogtreecommitdiff
path: root/arch/riscv
diff options
context:
space:
mode:
Diffstat (limited to 'arch/riscv')
-rw-r--r--arch/riscv/Kconfig2
-rw-r--r--arch/riscv/Makefile7
-rwxr-xr-x[-rw-r--r--]arch/riscv/boot/install.sh21
-rw-r--r--arch/riscv/include/asm/Kbuild4
-rw-r--r--arch/riscv/include/asm/csr.h1
-rw-r--r--arch/riscv/include/asm/kvm_host.h124
-rw-r--r--arch/riscv/include/asm/pgtable-64.h5
-rw-r--r--arch/riscv/include/asm/pgtable.h66
-rw-r--r--arch/riscv/include/asm/spinlock.h135
-rw-r--r--arch/riscv/include/asm/spinlock_types.h25
-rw-r--r--arch/riscv/include/uapi/asm/kvm.h20
-rw-r--r--arch/riscv/kernel/crash_dump.c26
-rw-r--r--arch/riscv/kernel/ftrace.c6
-rw-r--r--arch/riscv/kvm/main.c11
-rw-r--r--arch/riscv/kvm/mmu.c264
-rw-r--r--arch/riscv/kvm/tlb.S74
-rw-r--r--arch/riscv/kvm/tlb.c461
-rw-r--r--arch/riscv/kvm/vcpu.c144
-rw-r--r--arch/riscv/kvm/vcpu_exit.c6
-rw-r--r--arch/riscv/kvm/vcpu_sbi_replace.c40
-rw-r--r--arch/riscv/kvm/vcpu_sbi_v01.c35
-rw-r--r--arch/riscv/kvm/vm.c8
-rw-r--r--arch/riscv/kvm/vmid.c30
-rw-r--r--arch/riscv/mm/init.c8
-rw-r--r--arch/riscv/net/bpf_jit.h67
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c110
26 files changed, 1165 insertions, 535 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 3ac2a81a55eb..c0853f1474a7 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -38,7 +38,9 @@ config RISCV
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
select ARCH_SUPPORTS_HUGETLBFS if MMU
+ select ARCH_SUPPORTS_PAGE_TABLE_CHECK
select ARCH_USE_MEMTEST
+ select ARCH_USE_QUEUED_RWLOCKS
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_GENERAL_HUGETLB
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 7d81102cffd4..2b93ca9f4fc3 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -139,11 +139,10 @@ $(BOOT_TARGETS): vmlinux
Image.%: Image
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
-install: install-image = Image
-zinstall: install-image = Image.gz
+install: KBUILD_IMAGE := $(boot)/Image
+zinstall: KBUILD_IMAGE := $(boot)/Image.gz
install zinstall:
- $(CONFIG_SHELL) $(srctree)/$(boot)/install.sh $(KERNELRELEASE) \
- $(boot)/$(install-image) System.map "$(INSTALL_PATH)"
+ $(call cmd,install)
PHONY += rv32_randconfig
rv32_randconfig:
diff --git a/arch/riscv/boot/install.sh b/arch/riscv/boot/install.sh
index 18c39159c0ff..4c63f3f0643d 100644..100755
--- a/arch/riscv/boot/install.sh
+++ b/arch/riscv/boot/install.sh
@@ -1,7 +1,5 @@
#!/bin/sh
#
-# arch/riscv/boot/install.sh
-#
# This file is subject to the terms and conditions of the GNU General Public
# License. See the file "COPYING" in the main directory of this archive
# for more details.
@@ -18,25 +16,6 @@
# $2 - kernel image file
# $3 - kernel map file
# $4 - default install path (blank if root directory)
-#
-
-verify () {
- if [ ! -f "$1" ]; then
- echo "" 1>&2
- echo " *** Missing file: $1" 1>&2
- echo ' *** You need to run "make" before "make install".' 1>&2
- echo "" 1>&2
- exit 1
- fi
-}
-
-# Make sure the files actually exist
-verify "$2"
-verify "$3"
-
-# User may have a custom install script
-if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
-if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
if [ "$(basename $2)" = "Image.gz" ]; then
# Compressed install
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 5edf5b8587e7..504f8b7e72d4 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -3,5 +3,9 @@ generic-y += early_ioremap.h
generic-y += flat.h
generic-y += kvm_para.h
generic-y += parport.h
+generic-y += spinlock.h
+generic-y += spinlock_types.h
+generic-y += qrwlock.h
+generic-y += qrwlock_types.h
generic-y += user.h
generic-y += vmlinux.lds.h
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index e935f27b10fd..cc40521e438b 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -117,6 +117,7 @@
#define HGATP_MODE_SV32X4 _AC(1, UL)
#define HGATP_MODE_SV39X4 _AC(8, UL)
#define HGATP_MODE_SV48X4 _AC(9, UL)
+#define HGATP_MODE_SV57X4 _AC(10, UL)
#define HGATP32_MODE_SHIFT 31
#define HGATP32_VMID_SHIFT 22
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index cd4bbcecb0fb..319c8aeb42af 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -12,12 +12,12 @@
#include <linux/types.h>
#include <linux/kvm.h>
#include <linux/kvm_types.h>
+#include <linux/spinlock.h>
#include <asm/csr.h>
#include <asm/kvm_vcpu_fp.h>
#include <asm/kvm_vcpu_timer.h>
-#define KVM_MAX_VCPUS \
- ((HGATP_VMID_MASK >> HGATP_VMID_SHIFT) + 1)
+#define KVM_MAX_VCPUS 1024
#define KVM_HALT_POLL_NS_DEFAULT 500000
@@ -27,6 +27,31 @@
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(1)
#define KVM_REQ_UPDATE_HGATP KVM_ARCH_REQ(2)
+#define KVM_REQ_FENCE_I \
+ KVM_ARCH_REQ_FLAGS(3, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_HFENCE_GVMA_VMID_ALL KVM_REQ_TLB_FLUSH
+#define KVM_REQ_HFENCE_VVMA_ALL \
+ KVM_ARCH_REQ_FLAGS(4, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_HFENCE \
+ KVM_ARCH_REQ_FLAGS(5, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+
+enum kvm_riscv_hfence_type {
+ KVM_RISCV_HFENCE_UNKNOWN = 0,
+ KVM_RISCV_HFENCE_GVMA_VMID_GPA,
+ KVM_RISCV_HFENCE_VVMA_ASID_GVA,
+ KVM_RISCV_HFENCE_VVMA_ASID_ALL,
+ KVM_RISCV_HFENCE_VVMA_GVA,
+};
+
+struct kvm_riscv_hfence {
+ enum kvm_riscv_hfence_type type;
+ unsigned long asid;
+ unsigned long order;
+ gpa_t addr;
+ gpa_t size;
+};
+
+#define KVM_RISCV_VCPU_MAX_HFENCE 64
struct kvm_vm_stat {
struct kvm_vm_stat_generic generic;
@@ -54,10 +79,10 @@ struct kvm_vmid {
};
struct kvm_arch {
- /* stage2 vmid */
+ /* G-stage vmid */
struct kvm_vmid vmid;
- /* stage2 page table */
+ /* G-stage page table */
pgd_t *pgd;
phys_addr_t pgd_phys;
@@ -141,6 +166,9 @@ struct kvm_vcpu_arch {
/* VCPU ran at least once */
bool ran_atleast_once;
+ /* Last Host CPU on which Guest VCPU exited */
+ int last_exit_cpu;
+
/* ISA feature bits (similar to MISA) */
unsigned long isa;
@@ -179,6 +207,12 @@ struct kvm_vcpu_arch {
/* VCPU Timer */
struct kvm_vcpu_timer timer;
+ /* HFENCE request queue */
+ spinlock_t hfence_lock;
+ unsigned long hfence_head;
+ unsigned long hfence_tail;
+ struct kvm_riscv_hfence hfence_queue[KVM_RISCV_VCPU_MAX_HFENCE];
+
/* MMIO instruction details */
struct kvm_mmio_decode mmio_decode;
@@ -201,27 +235,71 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
#define KVM_ARCH_WANT_MMU_NOTIFIER
-void __kvm_riscv_hfence_gvma_vmid_gpa(unsigned long gpa_divby_4,
- unsigned long vmid);
-void __kvm_riscv_hfence_gvma_vmid(unsigned long vmid);
-void __kvm_riscv_hfence_gvma_gpa(unsigned long gpa_divby_4);
-void __kvm_riscv_hfence_gvma_all(void);
-
-int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
+#define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12
+
+void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid,
+ gpa_t gpa, gpa_t gpsz,
+ unsigned long order);
+void kvm_riscv_local_hfence_gvma_vmid_all(unsigned long vmid);
+void kvm_riscv_local_hfence_gvma_gpa(gpa_t gpa, gpa_t gpsz,
+ unsigned long order);
+void kvm_riscv_local_hfence_gvma_all(void);
+void kvm_riscv_local_hfence_vvma_asid_gva(unsigned long vmid,
+ unsigned long asid,
+ unsigned long gva,
+ unsigned long gvsz,
+ unsigned long order);
+void kvm_riscv_local_hfence_vvma_asid_all(unsigned long vmid,
+ unsigned long asid);
+void kvm_riscv_local_hfence_vvma_gva(unsigned long vmid,
+ unsigned long gva, unsigned long gvsz,
+ unsigned long order);
+void kvm_riscv_local_hfence_vvma_all(unsigned long vmid);
+
+void kvm_riscv_local_tlb_sanitize(struct kvm_vcpu *vcpu);
+
+void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu);
+void kvm_riscv_hfence_gvma_vmid_all_process(struct kvm_vcpu *vcpu);
+void kvm_riscv_hfence_vvma_all_process(struct kvm_vcpu *vcpu);
+void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu);
+
+void kvm_riscv_fence_i(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask);
+void kvm_riscv_hfence_gvma_vmid_gpa(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ gpa_t gpa, gpa_t gpsz,
+ unsigned long order);
+void kvm_riscv_hfence_gvma_vmid_all(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask);
+void kvm_riscv_hfence_vvma_asid_gva(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ unsigned long gva, unsigned long gvsz,
+ unsigned long order, unsigned long asid);
+void kvm_riscv_hfence_vvma_asid_all(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ unsigned long asid);
+void kvm_riscv_hfence_vvma_gva(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ unsigned long gva, unsigned long gvsz,
+ unsigned long order);
+void kvm_riscv_hfence_vvma_all(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask);
+
+int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
struct kvm_memory_slot *memslot,
gpa_t gpa, unsigned long hva, bool is_write);
-int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm);
-void kvm_riscv_stage2_free_pgd(struct kvm *kvm);
-void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu);
-void kvm_riscv_stage2_mode_detect(void);
-unsigned long kvm_riscv_stage2_mode(void);
-int kvm_riscv_stage2_gpa_bits(void);
-
-void kvm_riscv_stage2_vmid_detect(void);
-unsigned long kvm_riscv_stage2_vmid_bits(void);
-int kvm_riscv_stage2_vmid_init(struct kvm *kvm);
-bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid);
-void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu);
+int kvm_riscv_gstage_alloc_pgd(struct kvm *kvm);
+void kvm_riscv_gstage_free_pgd(struct kvm *kvm);
+void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu);
+void kvm_riscv_gstage_mode_detect(void);
+unsigned long kvm_riscv_gstage_mode(void);
+int kvm_riscv_gstage_gpa_bits(void);
+
+void kvm_riscv_gstage_vmid_detect(void);
+unsigned long kvm_riscv_gstage_vmid_bits(void);
+int kvm_riscv_gstage_vmid_init(struct kvm *kvm);
+bool kvm_riscv_gstage_vmid_ver_changed(struct kvm_vmid *vmid);
+void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu);
void __kvm_riscv_unpriv_trap(void);
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 7e246e9f8d70..ba2494cbc24f 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -86,6 +86,11 @@ static inline int pud_leaf(pud_t pud)
return pud_present(pud) && (pud_val(pud) & _PAGE_LEAF);
}
+static inline int pud_user(pud_t pud)
+{
+ return pud_val(pud) & _PAGE_USER;
+}
+
static inline void set_pud(pud_t *pudp, pud_t pud)
{
*pudp = pud;
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 046b44225623..4200ddede880 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -114,6 +114,8 @@
#include <asm/pgtable-32.h>
#endif /* CONFIG_64BIT */
+#include <linux/page_table_check.h>
+
#ifdef CONFIG_XIP_KERNEL
#define XIP_FIXUP(addr) ({ \
uintptr_t __a = (uintptr_t)(addr); \
@@ -315,6 +317,11 @@ static inline int pte_exec(pte_t pte)
return pte_val(pte) & _PAGE_EXEC;
}
+static inline int pte_user(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_USER;
+}
+
static inline int pte_huge(pte_t pte)
{
return pte_present(pte) && (pte_val(pte) & _PAGE_LEAF);
@@ -446,7 +453,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
void flush_icache_pte(pte_t pte);
-static inline void set_pte_at(struct mm_struct *mm,
+static inline void __set_pte_at(struct mm_struct *mm,
unsigned long addr, pte_t *ptep, pte_t pteval)
{
if (pte_present(pteval) && pte_exec(pteval))
@@ -455,10 +462,17 @@ static inline void set_pte_at(struct mm_struct *mm,
set_pte(ptep, pteval);
}
+static inline void set_pte_at(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep, pte_t pteval)
+{
+ page_table_check_pte_set(mm, addr, ptep, pteval);
+ __set_pte_at(mm, addr, ptep, pteval);
+}
+
static inline void pte_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- set_pte_at(mm, addr, ptep, __pte(0));
+ __set_pte_at(mm, addr, ptep, __pte(0));
}
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
@@ -479,7 +493,11 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
{
- return __pte(atomic_long_xchg((atomic_long_t *)ptep, 0));
+ pte_t pte = __pte(atomic_long_xchg((atomic_long_t *)ptep, 0));
+
+ page_table_check_pte_clear(mm, address, pte);
+
+ return pte;
}
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
@@ -546,6 +564,13 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
return ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT);
}
+#define __pud_to_phys(pud) (pud_val(pud) >> _PAGE_PFN_SHIFT << PAGE_SHIFT)
+
+static inline unsigned long pud_pfn(pud_t pud)
+{
+ return ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT);
+}
+
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
@@ -567,6 +592,11 @@ static inline int pmd_young(pmd_t pmd)
return pte_young(pmd_pte(pmd));
}
+static inline int pmd_user(pmd_t pmd)
+{
+ return pte_user(pmd_pte(pmd));
+}
+
static inline pmd_t pmd_mkold(pmd_t pmd)
{
return pte_pmd(pte_mkold(pmd_pte(pmd)));
@@ -600,14 +630,33 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
- return set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
+ page_table_check_pmd_set(mm, addr, pmdp, pmd);
+ return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
}
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pud)
{
- return set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
+ page_table_check_pud_set(mm, addr, pudp, pud);
+ return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
+}
+
+#ifdef CONFIG_PAGE_TABLE_CHECK
+static inline bool pte_user_accessible_page(pte_t pte)
+{
+ return pte_present(pte) && pte_user(pte);
+}
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+ return pmd_leaf(pmd) && pmd_user(pmd);
+}
+
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+ return pud_leaf(pud) && pud_user(pud);
}
+#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline int pmd_trans_huge(pmd_t pmd)
@@ -634,7 +683,11 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp)
{
- return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
+ pmd_t pmd = __pmd(atomic_long_xchg((atomic_long_t *)pmdp, 0));
+
+ page_table_check_pmd_clear(mm, address, pmd);
+
+ return pmd;
}
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
@@ -648,6 +701,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp, pmd_t pmd)
{
+ page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
return __pmd(atomic_long_xchg((atomic_long_t *)pmdp, pmd_val(pmd)));
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
deleted file mode 100644
index f4f7fa1b7ca8..000000000000
--- a/arch/riscv/include/asm/spinlock.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2015 Regents of the University of California
- * Copyright (C) 2017 SiFive
- */
-
-#ifndef _ASM_RISCV_SPINLOCK_H
-#define _ASM_RISCV_SPINLOCK_H
-
-#include <linux/kernel.h>
-#include <asm/current.h>
-#include <asm/fence.h>
-
-/*
- * Simple spin lock operations. These provide no fairness guarantees.
- */
-
-/* FIXME: Replace this with a ticket lock, like MIPS. */
-
-#define arch_spin_is_locked(x) (READ_ONCE((x)->lock) != 0)
-
-static inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
- smp_store_release(&lock->lock, 0);
-}
-
-static inline int arch_spin_trylock(arch_spinlock_t *lock)
-{
- int tmp = 1, busy;
-
- __asm__ __volatile__ (
- " amoswap.w %0, %2, %1\n"
- RISCV_ACQUIRE_BARRIER
- : "=r" (busy), "+A" (lock->lock)
- : "r" (tmp)
- : "memory");
-
- return !busy;
-}
-
-static inline void arch_spin_lock(arch_spinlock_t *lock)
-{
- while (1) {
- if (arch_spin_is_locked(lock))
- continue;
-
- if (arch_spin_trylock(lock))
- break;
- }
-}
-
-/***********************************************************/
-
-static inline void arch_read_lock(arch_rwlock_t *lock)
-{
- int tmp;
-
- __asm__ __volatile__(
- "1: lr.w %1, %0\n"
- " bltz %1, 1b\n"
- " addi %1, %1, 1\n"
- " sc.w %1, %1, %0\n"
- " bnez %1, 1b\n"
- RISCV_ACQUIRE_BARRIER
- : "+A" (lock->lock), "=&r" (tmp)
- :: "memory");
-}
-
-static inline void arch_write_lock(arch_rwlock_t *lock)
-{
- int tmp;
-
- __asm__ __volatile__(
- "1: lr.w %1, %0\n"
- " bnez %1, 1b\n"
- " li %1, -1\n"
- " sc.w %1, %1, %0\n"
- " bnez %1, 1b\n"
- RISCV_ACQUIRE_BARRIER
- : "+A" (lock->lock), "=&r" (tmp)
- :: "memory");
-}
-
-static inline int arch_read_trylock(arch_rwlock_t *lock)
-{
- int busy;
-
- __asm__ __volatile__(
- "1: lr.w %1, %0\n"
- " bltz %1, 1f\n"
- " addi %1, %1, 1\n"
- " sc.w %1, %1, %0\n"
- " bnez %1, 1b\n"
- RISCV_ACQUIRE_BARRIER
- "1:\n"
- : "+A" (lock->lock), "=&r" (busy)
- :: "memory");
-
- return !busy;
-}
-
-static inline int arch_write_trylock(arch_rwlock_t *lock)
-{
- int busy;
-
- __asm__ __volatile__(
- "1: lr.w %1, %0\n"
- " bnez %1, 1f\n"
- " li %1, -1\n"
- " sc.w %1, %1, %0\n"
- " bnez %1, 1b\n"
- RISCV_ACQUIRE_BARRIER
- "1:\n"
- : "+A" (lock->lock), "=&r" (busy)
- :: "memory");
-
- return !busy;
-}
-
-static inline void arch_read_unlock(arch_rwlock_t *lock)
-{
- __asm__ __volatile__(
- RISCV_RELEASE_BARRIER
- " amoadd.w x0, %1, %0\n"
- : "+A" (lock->lock)
- : "r" (-1)
- : "memory");
-}
-
-static inline void arch_write_unlock(arch_rwlock_t *lock)
-{
- smp_store_release(&lock->lock, 0);
-}
-
-#endif /* _ASM_RISCV_SPINLOCK_H */
diff --git a/arch/riscv/include/asm/spinlock_types.h b/arch/riscv/include/asm/spinlock_types.h
deleted file mode 100644
index 5a35a49505da..000000000000
--- a/arch/riscv/include/asm/spinlock_types.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2015 Regents of the University of California
- */
-
-#ifndef _ASM_RISCV_SPINLOCK_TYPES_H
-#define _ASM_RISCV_SPINLOCK_TYPES_H
-
-#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
-# error "please don't include this file directly"
-#endif
-
-typedef struct {
- volatile unsigned int lock;
-} arch_spinlock_t;
-
-#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
-
-typedef struct {
- volatile unsigned int lock;
-} arch_rwlock_t;
-
-#define __ARCH_RW_LOCK_UNLOCKED { 0 }
-
-#endif /* _ASM_RISCV_SPINLOCK_TYPES_H */
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index f808ad1ce500..6119368ba6d5 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -82,6 +82,23 @@ struct kvm_riscv_timer {
__u64 state;
};
+/*
+ * ISA extension IDs specific to KVM. This is not the same as the host ISA
+ * extension IDs as that is internal to the host and should not be exposed
+ * to the guest. This should always be contiguous to keep the mapping simple
+ * in KVM implementation.
+ */
+enum KVM_RISCV_ISA_EXT_ID {
+ KVM_RISCV_ISA_EXT_A = 0,
+ KVM_RISCV_ISA_EXT_C,
+ KVM_RISCV_ISA_EXT_D,
+ KVM_RISCV_ISA_EXT_F,
+ KVM_RISCV_ISA_EXT_H,
+ KVM_RISCV_ISA_EXT_I,
+ KVM_RISCV_ISA_EXT_M,
+ KVM_RISCV_ISA_EXT_MAX,
+};
+
/* Possible states for kvm_riscv_timer */
#define KVM_RISCV_TIMER_STATE_OFF 0
#define KVM_RISCV_TIMER_STATE_ON 1
@@ -123,6 +140,9 @@ struct kvm_riscv_timer {
#define KVM_REG_RISCV_FP_D_REG(name) \
(offsetof(struct __riscv_d_ext_state, name) / sizeof(__u64))
+/* ISA Extension registers are mapped as type 7 */
+#define KVM_REG_RISCV_ISA_EXT (0x07 << KVM_REG_RISCV_TYPE_SHIFT)
+
#endif
#endif /* __LINUX_KVM_RISCV_H */
diff --git a/arch/riscv/kernel/crash_dump.c b/arch/riscv/kernel/crash_dump.c
index 86cc0ada5752..ea2158cee97b 100644
--- a/arch/riscv/kernel/crash_dump.c
+++ b/arch/riscv/kernel/crash_dump.c
@@ -7,22 +7,10 @@
#include <linux/crash_dump.h>
#include <linux/io.h>
+#include <linux/uio.h>
-/**
- * copy_oldmem_page() - copy one page from old kernel memory
- * @pfn: page frame number to be copied
- * @buf: buffer where the copied page is placed
- * @csize: number of bytes to copy
- * @offset: offset in bytes into the page
- * @userbuf: if set, @buf is in a user address space
- *
- * This function copies one page from old kernel memory into buffer pointed by
- * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes
- * copied or negative error in case of failure.
- */
-ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
- size_t csize, unsigned long offset,
- int userbuf)
+ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
+ size_t csize, unsigned long offset)
{
void *vaddr;
@@ -33,13 +21,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
if (!vaddr)
return -ENOMEM;
- if (userbuf) {
- if (copy_to_user((char __user *)buf, vaddr + offset, csize)) {
- memunmap(vaddr);
- return -EFAULT;
- }
- } else
- memcpy(buf, vaddr + offset, csize);
+ csize = copy_to_iter(vaddr + offset, csize, iter);
memunmap(vaddr);
return csize;
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index 4716f4cdc038..2086f6585773 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -12,16 +12,14 @@
#include <asm/patch.h>
#ifdef CONFIG_DYNAMIC_FTRACE
-int ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex)
+void ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex)
{
mutex_lock(&text_mutex);
- return 0;
}
-int ftrace_arch_code_modify_post_process(void) __releases(&text_mutex)
+void ftrace_arch_code_modify_post_process(void) __releases(&text_mutex)
{
mutex_unlock(&text_mutex);
- return 0;
}
static int ftrace_check_current_call(unsigned long hook_pos,
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index 2e5ca43c8c49..1549205fe5fe 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -89,13 +89,13 @@ int kvm_arch_init(void *opaque)
return -ENODEV;
}
- kvm_riscv_stage2_mode_detect();
+ kvm_riscv_gstage_mode_detect();
- kvm_riscv_stage2_vmid_detect();
+ kvm_riscv_gstage_vmid_detect();
kvm_info("hypervisor extension available\n");
- switch (kvm_riscv_stage2_mode()) {
+ switch (kvm_riscv_gstage_mode()) {
case HGATP_MODE_SV32X4:
str = "Sv32x4";
break;
@@ -105,12 +105,15 @@ int kvm_arch_init(void *opaque)
case HGATP_MODE_SV48X4:
str = "Sv48x4";
break;
+ case HGATP_MODE_SV57X4:
+ str = "Sv57x4";
+ break;
default:
return -ENODEV;
}
kvm_info("using %s G-stage page table format\n", str);
- kvm_info("VMID %ld bits available\n", kvm_riscv_stage2_vmid_bits());
+ kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits());
return 0;
}
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index f80a34fbf102..1c00695ebee7 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -18,53 +18,52 @@
#include <asm/csr.h>
#include <asm/page.h>
#include <asm/pgtable.h>
-#include <asm/sbi.h>
#ifdef CONFIG_64BIT
-static unsigned long stage2_mode = (HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT);
-static unsigned long stage2_pgd_levels = 3;
-#define stage2_index_bits 9
+static unsigned long gstage_mode = (HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT);
+static unsigned long gstage_pgd_levels = 3;
+#define gstage_index_bits 9
#else
-static unsigned long stage2_mode = (HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT);
-static unsigned long stage2_pgd_levels = 2;
-#define stage2_index_bits 10
+static unsigned long gstage_mode = (HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT);
+static unsigned long gstage_pgd_levels = 2;
+#define gstage_index_bits 10
#endif
-#define stage2_pgd_xbits 2
-#define stage2_pgd_size (1UL << (HGATP_PAGE_SHIFT + stage2_pgd_xbits))
-#define stage2_gpa_bits (HGATP_PAGE_SHIFT + \
- (stage2_pgd_levels * stage2_index_bits) + \
- stage2_pgd_xbits)
-#define stage2_gpa_size ((gpa_t)(1ULL << stage2_gpa_bits))
+#define gstage_pgd_xbits 2
+#define gstage_pgd_size (1UL << (HGATP_PAGE_SHIFT + gstage_pgd_xbits))
+#define gstage_gpa_bits (HGATP_PAGE_SHIFT + \
+ (gstage_pgd_levels * gstage_index_bits) + \
+ gstage_pgd_xbits)
+#define gstage_gpa_size ((gpa_t)(1ULL << gstage_gpa_bits))
-#define stage2_pte_leaf(__ptep) \
+#define gstage_pte_leaf(__ptep) \
(pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC))
-static inline unsigned long stage2_pte_index(gpa_t addr, u32 level)
+static inline unsigned long gstage_pte_index(gpa_t addr, u32 level)
{
unsigned long mask;
- unsigned long shift = HGATP_PAGE_SHIFT + (stage2_index_bits * level);
+ unsigned long shift = HGATP_PAGE_SHIFT + (gstage_index_bits * level);
- if (level == (stage2_pgd_levels - 1))
- mask = (PTRS_PER_PTE * (1UL << stage2_pgd_xbits)) - 1;
+ if (level == (gstage_pgd_levels - 1))
+ mask = (PTRS_PER_PTE * (1UL << gstage_pgd_xbits)) - 1;
else
mask = PTRS_PER_PTE - 1;
return (addr >> shift) & mask;
}
-static inline unsigned long stage2_pte_page_vaddr(pte_t pte)
+static inline unsigned long gstage_pte_page_vaddr(pte_t pte)
{
return (unsigned long)pfn_to_virt(pte_val(pte) >> _PAGE_PFN_SHIFT);
}
-static int stage2_page_size_to_level(unsigned long page_size, u32 *out_level)
+static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
{
u32 i;
unsigned long psz = 1UL << 12;
- for (i = 0; i < stage2_pgd_levels; i++) {
- if (page_size == (psz << (i * stage2_index_bits))) {
+ for (i = 0; i < gstage_pgd_levels; i++) {
+ if (page_size == (psz << (i * gstage_index_bits))) {
*out_level = i;
return 0;
}
@@ -73,27 +72,39 @@ static int stage2_page_size_to_level(unsigned long page_size, u32 *out_level)
return -EINVAL;
}
-static int stage2_level_to_page_size(u32 level, unsigned long *out_pgsize)
+static int gstage_level_to_page_order(u32 level, unsigned long *out_pgorder)
{
- if (stage2_pgd_levels < level)
+ if (gstage_pgd_levels < level)
return -EINVAL;
- *out_pgsize = 1UL << (12 + (level * stage2_index_bits));
+ *out_pgorder = 12 + (level * gstage_index_bits);
+ return 0;
+}
+
+static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize)
+{
+ int rc;
+ unsigned long page_order = PAGE_SHIFT;
+
+ rc = gstage_level_to_page_order(level, &page_order);
+ if (rc)
+ return rc;
+ *out_pgsize = BIT(page_order);
return 0;
}
-static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
+static bool gstage_get_leaf_entry(struct kvm *kvm, gpa_t addr,
pte_t **ptepp, u32 *ptep_level)
{
pte_t *ptep;
- u32 current_level = stage2_pgd_levels - 1;
+ u32 current_level = gstage_pgd_levels - 1;
*ptep_level = current_level;
ptep = (pte_t *)kvm->arch.pgd;
- ptep = &ptep[stage2_pte_index(addr, current_level)];
+ ptep = &ptep[gstage_pte_index(addr, current_level)];
while (ptep && pte_val(*ptep)) {
- if (stage2_pte_leaf(ptep)) {
+ if (gstage_pte_leaf(ptep)) {
*ptep_level = current_level;
*ptepp = ptep;
return true;
@@ -102,8 +113,8 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
if (current_level) {
current_level--;
*ptep_level = current_level;
- ptep = (pte_t *)stage2_pte_page_vaddr(*ptep);
- ptep = &ptep[stage2_pte_index(addr, current_level)];
+ ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
+ ptep = &ptep[gstage_pte_index(addr, current_level)];
} else {
ptep = NULL;
}
@@ -112,38 +123,30 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
return false;
}
-static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr)
+static void gstage_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr)
{
- unsigned long size = PAGE_SIZE;
- struct kvm_vmid *vmid = &kvm->arch.vmid;
+ unsigned long order = PAGE_SHIFT;
- if (stage2_level_to_page_size(level, &size))
+ if (gstage_level_to_page_order(level, &order))
return;
- addr &= ~(size - 1);
+ addr &= ~(BIT(order) - 1);
- /*
- * TODO: Instead of cpu_online_mask, we should only target CPUs
- * where the Guest/VM is running.
- */
- preempt_disable();
- sbi_remote_hfence_gvma_vmid(cpu_online_mask, addr, size,
- READ_ONCE(vmid->vmid));
- preempt_enable();
+ kvm_riscv_hfence_gvma_vmid_gpa(kvm, -1UL, 0, addr, BIT(order), order);
}
-static int stage2_set_pte(struct kvm *kvm, u32 level,
+static int gstage_set_pte(struct kvm *kvm, u32 level,
struct kvm_mmu_memory_cache *pcache,
gpa_t addr, const pte_t *new_pte)
{
- u32 current_level = stage2_pgd_levels - 1;
+ u32 current_level = gstage_pgd_levels - 1;
pte_t *next_ptep = (pte_t *)kvm->arch.pgd;
- pte_t *ptep = &next_ptep[stage2_pte_index(addr, current_level)];
+ pte_t *ptep = &next_ptep[gstage_pte_index(addr, current_level)];
if (current_level < level)
return -EINVAL;
while (current_level != level) {
- if (stage2_pte_leaf(ptep))
+ if (gstage_pte_leaf(ptep))
return -EEXIST;
if (!pte_val(*ptep)) {
@@ -155,23 +158,23 @@ static int stage2_set_pte(struct kvm *kvm, u32 level,
*ptep = pfn_pte(PFN_DOWN(__pa(next_ptep)),
__pgprot(_PAGE_TABLE));
} else {
- if (stage2_pte_leaf(ptep))
+ if (gstage_pte_leaf(ptep))
return -EEXIST;
- next_ptep = (pte_t *)stage2_pte_page_vaddr(*ptep);
+ next_ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
}
current_level--;
- ptep = &next_ptep[stage2_pte_index(addr, current_level)];
+ ptep = &next_ptep[gstage_pte_index(addr, current_level)];
}
*ptep = *new_pte;
- if (stage2_pte_leaf(ptep))
- stage2_remote_tlb_flush(kvm, current_level, addr);
+ if (gstage_pte_leaf(ptep))
+ gstage_remote_tlb_flush(kvm, current_level, addr);
return 0;
}
-static int stage2_map_page(struct kvm *kvm,
+static int gstage_map_page(struct kvm *kvm,
struct kvm_mmu_memory_cache *pcache,
gpa_t gpa, phys_addr_t hpa,
unsigned long page_size,
@@ -182,7 +185,7 @@ static int stage2_map_page(struct kvm *kvm,
pte_t new_pte;
pgprot_t prot;
- ret = stage2_page_size_to_level(page_size, &level);
+ ret = gstage_page_size_to_level(page_size, &level);
if (ret)
return ret;
@@ -193,9 +196,9 @@ static int stage2_map_page(struct kvm *kvm,
* PTE so that software can update these bits.
*
* We support both options mentioned above. To achieve this, we
- * always set 'A' and 'D' PTE bits at time of creating stage2
+ * always set 'A' and 'D' PTE bits at time of creating G-stage
* mapping. To support KVM dirty page logging with both options
- * mentioned above, we will write-protect stage2 PTEs to track
+ * mentioned above, we will write-protect G-stage PTEs to track
* dirty pages.
*/
@@ -213,24 +216,24 @@ static int stage2_map_page(struct kvm *kvm,
new_pte = pfn_pte(PFN_DOWN(hpa), prot);
new_pte = pte_mkdirty(new_pte);
- return stage2_set_pte(kvm, level, pcache, gpa, &new_pte);
+ return gstage_set_pte(kvm, level, pcache, gpa, &new_pte);
}
-enum stage2_op {
- STAGE2_OP_NOP = 0, /* Nothing */
- STAGE2_OP_CLEAR, /* Clear/Unmap */
- STAGE2_OP_WP, /* Write-protect */
+enum gstage_op {
+ GSTAGE_OP_NOP = 0, /* Nothing */
+ GSTAGE_OP_CLEAR, /* Clear/Unmap */
+ GSTAGE_OP_WP, /* Write-protect */
};
-static void stage2_op_pte(struct kvm *kvm, gpa_t addr,
- pte_t *ptep, u32 ptep_level, enum stage2_op op)
+static void gstage_op_pte(struct kvm *kvm, gpa_t addr,
+ pte_t *ptep, u32 ptep_level, enum gstage_op op)
{
int i, ret;
pte_t *next_ptep;
u32 next_ptep_level;
unsigned long next_page_size, page_size;
- ret = stage2_level_to_page_size(ptep_level, &page_size);
+ ret = gstage_level_to_page_size(ptep_level, &page_size);
if (ret)
return;
@@ -239,31 +242,31 @@ static void stage2_op_pte(struct kvm *kvm, gpa_t addr,
if (!pte_val(*ptep))
return;
- if (ptep_level && !stage2_pte_leaf(ptep)) {
- next_ptep = (pte_t *)stage2_pte_page_vaddr(*ptep);
+ if (ptep_level && !gstage_pte_leaf(ptep)) {
+ next_ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
next_ptep_level = ptep_level - 1;
- ret = stage2_level_to_page_size(next_ptep_level,
+ ret = gstage_level_to_page_size(next_ptep_level,
&next_page_size);
if (ret)
return;
- if (op == STAGE2_OP_CLEAR)
+ if (op == GSTAGE_OP_CLEAR)
set_pte(ptep, __pte(0));
for (i = 0; i < PTRS_PER_PTE; i++)
- stage2_op_pte(kvm, addr + i * next_page_size,
+ gstage_op_pte(kvm, addr + i * next_page_size,
&next_ptep[i], next_ptep_level, op);
- if (op == STAGE2_OP_CLEAR)
+ if (op == GSTAGE_OP_CLEAR)
put_page(virt_to_page(next_ptep));
} else {
- if (op == STAGE2_OP_CLEAR)
+ if (op == GSTAGE_OP_CLEAR)
set_pte(ptep, __pte(0));
- else if (op == STAGE2_OP_WP)
+ else if (op == GSTAGE_OP_WP)
set_pte(ptep, __pte(pte_val(*ptep) & ~_PAGE_WRITE));
- stage2_remote_tlb_flush(kvm, ptep_level, addr);
+ gstage_remote_tlb_flush(kvm, ptep_level, addr);
}
}
-static void stage2_unmap_range(struct kvm *kvm, gpa_t start,
+static void gstage_unmap_range(struct kvm *kvm, gpa_t start,
gpa_t size, bool may_block)
{
int ret;
@@ -274,9 +277,9 @@ static void stage2_unmap_range(struct kvm *kvm, gpa_t start,
gpa_t addr = start, end = start + size;
while (addr < end) {
- found_leaf = stage2_get_leaf_entry(kvm, addr,
+ found_leaf = gstage_get_leaf_entry(kvm, addr,
&ptep, &ptep_level);
- ret = stage2_level_to_page_size(ptep_level, &page_size);
+ ret = gstage_level_to_page_size(ptep_level, &page_size);
if (ret)
break;
@@ -284,8 +287,8 @@ static void stage2_unmap_range(struct kvm *kvm, gpa_t start,
goto next;
if (!(addr & (page_size - 1)) && ((end - addr) >= page_size))
- stage2_op_pte(kvm, addr, ptep,
- ptep_level, STAGE2_OP_CLEAR);
+ gstage_op_pte(kvm, addr, ptep,
+ ptep_level, GSTAGE_OP_CLEAR);
next:
addr += page_size;
@@ -299,7 +302,7 @@ next:
}
}
-static void stage2_wp_range(struct kvm *kvm, gpa_t start, gpa_t end)
+static void gstage_wp_range(struct kvm *kvm, gpa_t start, gpa_t end)
{
int ret;
pte_t *ptep;
@@ -309,9 +312,9 @@ static void stage2_wp_range(struct kvm *kvm, gpa_t start, gpa_t end)
unsigned long page_size;
while (addr < end) {
- found_leaf = stage2_get_leaf_entry(kvm, addr,
+ found_leaf = gstage_get_leaf_entry(kvm, addr,
&ptep, &ptep_level);
- ret = stage2_level_to_page_size(ptep_level, &page_size);
+ ret = gstage_level_to_page_size(ptep_level, &page_size);
if (ret)
break;
@@ -319,15 +322,15 @@ static void stage2_wp_range(struct kvm *kvm, gpa_t start, gpa_t end)
goto next;
if (!(addr & (page_size - 1)) && ((end - addr) >= page_size))
- stage2_op_pte(kvm, addr, ptep,
- ptep_level, STAGE2_OP_WP);
+ gstage_op_pte(kvm, addr, ptep,
+ ptep_level, GSTAGE_OP_WP);
next:
addr += page_size;
}
}
-static void stage2_wp_memory_region(struct kvm *kvm, int slot)
+static void gstage_wp_memory_region(struct kvm *kvm, int slot)
{
struct kvm_memslots *slots = kvm_memslots(kvm);
struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
@@ -335,12 +338,12 @@ static void stage2_wp_memory_region(struct kvm *kvm, int slot)
phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
spin_lock(&kvm->mmu_lock);
- stage2_wp_range(kvm, start, end);
+ gstage_wp_range(kvm, start, end);
spin_unlock(&kvm->mmu_lock);
kvm_flush_remote_tlbs(kvm);
}
-static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
+static int gstage_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
unsigned long size, bool writable)
{
pte_t pte;
@@ -361,12 +364,12 @@ static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
if (!writable)
pte = pte_wrprotect(pte);
- ret = kvm_mmu_topup_memory_cache(&pcache, stage2_pgd_levels);
+ ret = kvm_mmu_topup_memory_cache(&pcache, gstage_pgd_levels);
if (ret)
goto out;
spin_lock(&kvm->mmu_lock);
- ret = stage2_set_pte(kvm, 0, &pcache, addr, &pte);
+ ret = gstage_set_pte(kvm, 0, &pcache, addr, &pte);
spin_unlock(&kvm->mmu_lock);
if (ret)
goto out;
@@ -388,7 +391,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
- stage2_wp_range(kvm, start, end);
+ gstage_wp_range(kvm, start, end);
}
void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
@@ -411,7 +414,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
void kvm_arch_flush_shadow_all(struct kvm *kvm)
{
- kvm_riscv_stage2_free_pgd(kvm);
+ kvm_riscv_gstage_free_pgd(kvm);
}
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
@@ -421,7 +424,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
phys_addr_t size = slot->npages << PAGE_SHIFT;
spin_lock(&kvm->mmu_lock);
- stage2_unmap_range(kvm, gpa, size, false);
+ gstage_unmap_range(kvm, gpa, size, false);
spin_unlock(&kvm->mmu_lock);
}
@@ -436,7 +439,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
* the memory slot is write protected.
*/
if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES)
- stage2_wp_memory_region(kvm, new->id);
+ gstage_wp_memory_region(kvm, new->id);
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -458,7 +461,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* space addressable by the KVM guest GPA space.
*/
if ((new->base_gfn + new->npages) >=
- (stage2_gpa_size >> PAGE_SHIFT))
+ (gstage_gpa_size >> PAGE_SHIFT))
return -EFAULT;
hva = new->userspace_addr;
@@ -514,7 +517,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
goto out;
}
- ret = stage2_ioremap(kvm, gpa, pa,
+ ret = gstage_ioremap(kvm, gpa, pa,
vm_end - vm_start, writable);
if (ret)
break;
@@ -527,7 +530,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
spin_lock(&kvm->mmu_lock);
if (ret)
- stage2_unmap_range(kvm, base_gpa, size, false);
+ gstage_unmap_range(kvm, base_gpa, size, false);
spin_unlock(&kvm->mmu_lock);
out:
@@ -540,7 +543,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
if (!kvm->arch.pgd)
return false;
- stage2_unmap_range(kvm, range->start << PAGE_SHIFT,
+ gstage_unmap_range(kvm, range->start << PAGE_SHIFT,
(range->end - range->start) << PAGE_SHIFT,
range->may_block);
return false;
@@ -556,10 +559,10 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
WARN_ON(range->end - range->start != 1);
- ret = stage2_map_page(kvm, NULL, range->start << PAGE_SHIFT,
+ ret = gstage_map_page(kvm, NULL, range->start << PAGE_SHIFT,
__pfn_to_phys(pfn), PAGE_SIZE, true, true);
if (ret) {
- kvm_debug("Failed to map stage2 page (error %d)\n", ret);
+ kvm_debug("Failed to map G-stage page (error %d)\n", ret);
return true;
}
@@ -577,7 +580,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE);
- if (!stage2_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
+ if (!gstage_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
&ptep, &ptep_level))
return false;
@@ -595,14 +598,14 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE);
- if (!stage2_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
+ if (!gstage_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
&ptep, &ptep_level))
return false;
return pte_young(*ptep);
}
-int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
+int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
struct kvm_memory_slot *memslot,
gpa_t gpa, unsigned long hva, bool is_write)
{
@@ -648,9 +651,9 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
}
/* We need minimum second+third level pages */
- ret = kvm_mmu_topup_memory_cache(pcache, stage2_pgd_levels);
+ ret = kvm_mmu_topup_memory_cache(pcache, gstage_pgd_levels);
if (ret) {
- kvm_err("Failed to topup stage2 cache\n");
+ kvm_err("Failed to topup G-stage cache\n");
return ret;
}
@@ -680,15 +683,15 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
if (writeable) {
kvm_set_pfn_dirty(hfn);
mark_page_dirty(kvm, gfn);
- ret = stage2_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT,
+ ret = gstage_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT,
vma_pagesize, false, true);
} else {
- ret = stage2_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT,
+ ret = gstage_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT,
vma_pagesize, true, true);
}
if (ret)
- kvm_err("Failed to map in stage2\n");
+ kvm_err("Failed to map in G-stage\n");
out_unlock:
spin_unlock(&kvm->mmu_lock);
@@ -697,7 +700,7 @@ out_unlock:
return ret;
}
-int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm)
+int kvm_riscv_gstage_alloc_pgd(struct kvm *kvm)
{
struct page *pgd_page;
@@ -707,7 +710,7 @@ int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm)
}
pgd_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(stage2_pgd_size));
+ get_order(gstage_pgd_size));
if (!pgd_page)
return -ENOMEM;
kvm->arch.pgd = page_to_virt(pgd_page);
@@ -716,13 +719,13 @@ int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm)
return 0;
}
-void kvm_riscv_stage2_free_pgd(struct kvm *kvm)
+void kvm_riscv_gstage_free_pgd(struct kvm *kvm)
{
void *pgd = NULL;
spin_lock(&kvm->mmu_lock);
if (kvm->arch.pgd) {
- stage2_unmap_range(kvm, 0UL, stage2_gpa_size, false);
+ gstage_unmap_range(kvm, 0UL, gstage_gpa_size, false);
pgd = READ_ONCE(kvm->arch.pgd);
kvm->arch.pgd = NULL;
kvm->arch.pgd_phys = 0;
@@ -730,12 +733,12 @@ void kvm_riscv_stage2_free_pgd(struct kvm *kvm)
spin_unlock(&kvm->mmu_lock);
if (pgd)
- free_pages((unsigned long)pgd, get_order(stage2_pgd_size));
+ free_pages((unsigned long)pgd, get_order(gstage_pgd_size));
}
-void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu)
+void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu)
{
- unsigned long hgatp = stage2_mode;
+ unsigned long hgatp = gstage_mode;
struct kvm_arch *k = &vcpu->kvm->arch;
hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) &
@@ -744,31 +747,40 @@ void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu)
csr_write(CSR_HGATP, hgatp);
- if (!kvm_riscv_stage2_vmid_bits())
- __kvm_riscv_hfence_gvma_all();
+ if (!kvm_riscv_gstage_vmid_bits())
+ kvm_riscv_local_hfence_gvma_all();
}
-void kvm_riscv_stage2_mode_detect(void)
+void kvm_riscv_gstage_mode_detect(void)
{
#ifdef CONFIG_64BIT
- /* Try Sv48x4 stage2 mode */
+ /* Try Sv57x4 G-stage mode */
+ csr_write(CSR_HGATP, HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT);
+ if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV57X4) {
+ gstage_mode = (HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT);
+ gstage_pgd_levels = 5;
+ goto skip_sv48x4_test;
+ }
+
+ /* Try Sv48x4 G-stage mode */
csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) {
- stage2_mode = (HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
- stage2_pgd_levels = 4;
+ gstage_mode = (HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
+ gstage_pgd_levels = 4;
}
- csr_write(CSR_HGATP, 0);
+skip_sv48x4_test:
- __kvm_riscv_hfence_gvma_all();
+ csr_write(CSR_HGATP, 0);
+ kvm_riscv_local_hfence_gvma_all();
#endif
}
-unsigned long kvm_riscv_stage2_mode(void)
+unsigned long kvm_riscv_gstage_mode(void)
{
- return stage2_mode >> HGATP_MODE_SHIFT;
+ return gstage_mode >> HGATP_MODE_SHIFT;
}
-int kvm_riscv_stage2_gpa_bits(void)
+int kvm_riscv_gstage_gpa_bits(void)
{
- return stage2_gpa_bits;
+ return gstage_gpa_bits;
}
diff --git a/arch/riscv/kvm/tlb.S b/arch/riscv/kvm/tlb.S
deleted file mode 100644
index 899f75d60bad..000000000000
--- a/arch/riscv/kvm/tlb.S
+++ /dev/null
@@ -1,74 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2019 Western Digital Corporation or its affiliates.
- *
- * Authors:
- * Anup Patel <anup.patel@wdc.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm.h>
-
- .text
- .altmacro
- .option norelax
-
- /*
- * Instruction encoding of hfence.gvma is:
- * HFENCE.GVMA rs1, rs2
- * HFENCE.GVMA zero, rs2
- * HFENCE.GVMA rs1
- * HFENCE.GVMA
- *
- * rs1!=zero and rs2!=zero ==> HFENCE.GVMA rs1, rs2
- * rs1==zero and rs2!=zero ==> HFENCE.GVMA zero, rs2
- * rs1!=zero and rs2==zero ==> HFENCE.GVMA rs1
- * rs1==zero and rs2==zero ==> HFENCE.GVMA
- *
- * Instruction encoding of HFENCE.GVMA is:
- * 0110001 rs2(5) rs1(5) 000 00000 1110011
- */
-
-ENTRY(__kvm_riscv_hfence_gvma_vmid_gpa)
- /*
- * rs1 = a0 (GPA >> 2)
- * rs2 = a1 (VMID)
- * HFENCE.GVMA a0, a1
- * 0110001 01011 01010 000 00000 1110011
- */
- .word 0x62b50073
- ret
-ENDPROC(__kvm_riscv_hfence_gvma_vmid_gpa)
-
-ENTRY(__kvm_riscv_hfence_gvma_vmid)
- /*
- * rs1 = zero
- * rs2 = a0 (VMID)
- * HFENCE.GVMA zero, a0
- * 0110001 01010 00000 000 00000 1110011
- */
- .word 0x62a00073
- ret
-ENDPROC(__kvm_riscv_hfence_gvma_vmid)
-
-ENTRY(__kvm_riscv_hfence_gvma_gpa)
- /*
- * rs1 = a0 (GPA >> 2)
- * rs2 = zero
- * HFENCE.GVMA a0
- * 0110001 00000 01010 000 00000 1110011
- */
- .word 0x62050073
- ret
-ENDPROC(__kvm_riscv_hfence_gvma_gpa)
-
-ENTRY(__kvm_riscv_hfence_gvma_all)
- /*
- * rs1 = zero
- * rs2 = zero
- * HFENCE.GVMA
- * 0110001 00000 00000 000 00000 1110011
- */
- .word 0x62000073
- ret
-ENDPROC(__kvm_riscv_hfence_gvma_all)
diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c
new file mode 100644
index 000000000000..1a76d0b1907d
--- /dev/null
+++ b/arch/riscv/kvm/tlb.c
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Ventana Micro Systems Inc.
+ */
+
+#include <linux/bitmap.h>
+#include <linux/cpumask.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/kvm_host.h>
+#include <asm/cacheflush.h>
+#include <asm/csr.h>
+
+/*
+ * Instruction encoding of hfence.gvma is:
+ * HFENCE.GVMA rs1, rs2
+ * HFENCE.GVMA zero, rs2
+ * HFENCE.GVMA rs1
+ * HFENCE.GVMA
+ *
+ * rs1!=zero and rs2!=zero ==> HFENCE.GVMA rs1, rs2
+ * rs1==zero and rs2!=zero ==> HFENCE.GVMA zero, rs2
+ * rs1!=zero and rs2==zero ==> HFENCE.GVMA rs1
+ * rs1==zero and rs2==zero ==> HFENCE.GVMA
+ *
+ * Instruction encoding of HFENCE.GVMA is:
+ * 0110001 rs2(5) rs1(5) 000 00000 1110011
+ */
+
+void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid,
+ gpa_t gpa, gpa_t gpsz,
+ unsigned long order)
+{
+ gpa_t pos;
+
+ if (PTRS_PER_PTE < (gpsz >> order)) {
+ kvm_riscv_local_hfence_gvma_vmid_all(vmid);
+ return;
+ }
+
+ for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) {
+ /*
+ * rs1 = a0 (GPA >> 2)
+ * rs2 = a1 (VMID)
+ * HFENCE.GVMA a0, a1
+ * 0110001 01011 01010 000 00000 1110011
+ */
+ asm volatile ("srli a0, %0, 2\n"
+ "add a1, %1, zero\n"
+ ".word 0x62b50073\n"
+ :: "r" (pos), "r" (vmid)
+ : "a0", "a1", "memory");
+ }
+}
+
+void kvm_riscv_local_hfence_gvma_vmid_all(unsigned long vmid)
+{
+ /*
+ * rs1 = zero
+ * rs2 = a0 (VMID)
+ * HFENCE.GVMA zero, a0
+ * 0110001 01010 00000 000 00000 1110011
+ */
+ asm volatile ("add a0, %0, zero\n"
+ ".word 0x62a00073\n"
+ :: "r" (vmid) : "a0", "memory");
+}
+
+void kvm_riscv_local_hfence_gvma_gpa(gpa_t gpa, gpa_t gpsz,
+ unsigned long order)
+{
+ gpa_t pos;
+
+ if (PTRS_PER_PTE < (gpsz >> order)) {
+ kvm_riscv_local_hfence_gvma_all();
+ return;
+ }
+
+ for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) {
+ /*
+ * rs1 = a0 (GPA >> 2)
+ * rs2 = zero
+ * HFENCE.GVMA a0
+ * 0110001 00000 01010 000 00000 1110011
+ */
+ asm volatile ("srli a0, %0, 2\n"
+ ".word 0x62050073\n"
+ :: "r" (pos) : "a0", "memory");
+ }
+}
+
+void kvm_riscv_local_hfence_gvma_all(void)
+{
+ /*
+ * rs1 = zero
+ * rs2 = zero
+ * HFENCE.GVMA
+ * 0110001 00000 00000 000 00000 1110011
+ */
+ asm volatile (".word 0x62000073" ::: "memory");
+}
+
+/*
+ * Instruction encoding of hfence.gvma is:
+ * HFENCE.VVMA rs1, rs2
+ * HFENCE.VVMA zero, rs2
+ * HFENCE.VVMA rs1
+ * HFENCE.VVMA
+ *
+ * rs1!=zero and rs2!=zero ==> HFENCE.VVMA rs1, rs2
+ * rs1==zero and rs2!=zero ==> HFENCE.VVMA zero, rs2
+ * rs1!=zero and rs2==zero ==> HFENCE.VVMA rs1
+ * rs1==zero and rs2==zero ==> HFENCE.VVMA
+ *
+ * Instruction encoding of HFENCE.VVMA is:
+ * 0010001 rs2(5) rs1(5) 000 00000 1110011
+ */
+
+void kvm_riscv_local_hfence_vvma_asid_gva(unsigned long vmid,
+ unsigned long asid,
+ unsigned long gva,
+ unsigned long gvsz,
+ unsigned long order)
+{
+ unsigned long pos, hgatp;
+
+ if (PTRS_PER_PTE < (gvsz >> order)) {
+ kvm_riscv_local_hfence_vvma_asid_all(vmid, asid);
+ return;
+ }
+
+ hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT);
+
+ for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) {
+ /*
+ * rs1 = a0 (GVA)
+ * rs2 = a1 (ASID)
+ * HFENCE.VVMA a0, a1
+ * 0010001 01011 01010 000 00000 1110011
+ */
+ asm volatile ("add a0, %0, zero\n"
+ "add a1, %1, zero\n"
+ ".word 0x22b50073\n"
+ :: "r" (pos), "r" (asid)
+ : "a0", "a1", "memory");
+ }
+
+ csr_write(CSR_HGATP, hgatp);
+}
+
+void kvm_riscv_local_hfence_vvma_asid_all(unsigned long vmid,
+ unsigned long asid)
+{
+ unsigned long hgatp;
+
+ hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT);
+
+ /*
+ * rs1 = zero
+ * rs2 = a0 (ASID)
+ * HFENCE.VVMA zero, a0
+ * 0010001 01010 00000 000 00000 1110011
+ */
+ asm volatile ("add a0, %0, zero\n"
+ ".word 0x22a00073\n"
+ :: "r" (asid) : "a0", "memory");
+
+ csr_write(CSR_HGATP, hgatp);
+}
+
+void kvm_riscv_local_hfence_vvma_gva(unsigned long vmid,
+ unsigned long gva, unsigned long gvsz,
+ unsigned long order)
+{
+ unsigned long pos, hgatp;
+
+ if (PTRS_PER_PTE < (gvsz >> order)) {
+ kvm_riscv_local_hfence_vvma_all(vmid);
+ return;
+ }
+
+ hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT);
+
+ for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) {
+ /*
+ * rs1 = a0 (GVA)
+ * rs2 = zero
+ * HFENCE.VVMA a0
+ * 0010001 00000 01010 000 00000 1110011
+ */
+ asm volatile ("add a0, %0, zero\n"
+ ".word 0x22050073\n"
+ :: "r" (pos) : "a0", "memory");
+ }
+
+ csr_write(CSR_HGATP, hgatp);
+}
+
+void kvm_riscv_local_hfence_vvma_all(unsigned long vmid)
+{
+ unsigned long hgatp;
+
+ hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT);
+
+ /*
+ * rs1 = zero
+ * rs2 = zero
+ * HFENCE.VVMA
+ * 0010001 00000 00000 000 00000 1110011
+ */
+ asm volatile (".word 0x22000073" ::: "memory");
+
+ csr_write(CSR_HGATP, hgatp);
+}
+
+void kvm_riscv_local_tlb_sanitize(struct kvm_vcpu *vcpu)
+{
+ unsigned long vmid;
+
+ if (!kvm_riscv_gstage_vmid_bits() ||
+ vcpu->arch.last_exit_cpu == vcpu->cpu)
+ return;
+
+ /*
+ * On RISC-V platforms with hardware VMID support, we share same
+ * VMID for all VCPUs of a particular Guest/VM. This means we might
+ * have stale G-stage TLB entries on the current Host CPU due to
+ * some other VCPU of the same Guest which ran previously on the
+ * current Host CPU.
+ *
+ * To cleanup stale TLB entries, we simply flush all G-stage TLB
+ * entries by VMID whenever underlying Host CPU changes for a VCPU.
+ */
+
+ vmid = READ_ONCE(vcpu->kvm->arch.vmid.vmid);
+ kvm_riscv_local_hfence_gvma_vmid_all(vmid);
+}
+
+void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu)
+{
+ local_flush_icache_all();
+}
+
+void kvm_riscv_hfence_gvma_vmid_all_process(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vmid *vmid;
+
+ vmid = &vcpu->kvm->arch.vmid;
+ kvm_riscv_local_hfence_gvma_vmid_all(READ_ONCE(vmid->vmid));
+}
+
+void kvm_riscv_hfence_vvma_all_process(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vmid *vmid;
+
+ vmid = &vcpu->kvm->arch.vmid;
+ kvm_riscv_local_hfence_vvma_all(READ_ONCE(vmid->vmid));
+}
+
+static bool vcpu_hfence_dequeue(struct kvm_vcpu *vcpu,
+ struct kvm_riscv_hfence *out_data)
+{
+ bool ret = false;
+ struct kvm_vcpu_arch *varch = &vcpu->arch;
+
+ spin_lock(&varch->hfence_lock);
+
+ if (varch->hfence_queue[varch->hfence_head].type) {
+ memcpy(out_data, &varch->hfence_queue[varch->hfence_head],
+ sizeof(*out_data));
+ varch->hfence_queue[varch->hfence_head].type = 0;
+
+ varch->hfence_head++;
+ if (varch->hfence_head == KVM_RISCV_VCPU_MAX_HFENCE)
+ varch->hfence_head = 0;
+
+ ret = true;
+ }
+
+ spin_unlock(&varch->hfence_lock);
+
+ return ret;
+}
+
+static bool vcpu_hfence_enqueue(struct kvm_vcpu *vcpu,
+ const struct kvm_riscv_hfence *data)
+{
+ bool ret = false;
+ struct kvm_vcpu_arch *varch = &vcpu->arch;
+
+ spin_lock(&varch->hfence_lock);
+
+ if (!varch->hfence_queue[varch->hfence_tail].type) {
+ memcpy(&varch->hfence_queue[varch->hfence_tail],
+ data, sizeof(*data));
+
+ varch->hfence_tail++;
+ if (varch->hfence_tail == KVM_RISCV_VCPU_MAX_HFENCE)
+ varch->hfence_tail = 0;
+
+ ret = true;
+ }
+
+ spin_unlock(&varch->hfence_lock);
+
+ return ret;
+}
+
+void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu)
+{
+ struct kvm_riscv_hfence d = { 0 };
+ struct kvm_vmid *v = &vcpu->kvm->arch.vmid;
+
+ while (vcpu_hfence_dequeue(vcpu, &d)) {
+ switch (d.type) {
+ case KVM_RISCV_HFENCE_UNKNOWN:
+ break;
+ case KVM_RISCV_HFENCE_GVMA_VMID_GPA:
+ kvm_riscv_local_hfence_gvma_vmid_gpa(
+ READ_ONCE(v->vmid),
+ d.addr, d.size, d.order);
+ break;
+ case KVM_RISCV_HFENCE_VVMA_ASID_GVA:
+ kvm_riscv_local_hfence_vvma_asid_gva(
+ READ_ONCE(v->vmid), d.asid,
+ d.addr, d.size, d.order);
+ break;
+ case KVM_RISCV_HFENCE_VVMA_ASID_ALL:
+ kvm_riscv_local_hfence_vvma_asid_all(
+ READ_ONCE(v->vmid), d.asid);
+ break;
+ case KVM_RISCV_HFENCE_VVMA_GVA:
+ kvm_riscv_local_hfence_vvma_gva(
+ READ_ONCE(v->vmid),
+ d.addr, d.size, d.order);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void make_xfence_request(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ unsigned int req, unsigned int fallback_req,
+ const struct kvm_riscv_hfence *data)
+{
+ unsigned long i;
+ struct kvm_vcpu *vcpu;
+ unsigned int actual_req = req;
+ DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
+
+ bitmap_clear(vcpu_mask, 0, KVM_MAX_VCPUS);
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (hbase != -1UL) {
+ if (vcpu->vcpu_id < hbase)
+ continue;
+ if (!(hmask & (1UL << (vcpu->vcpu_id - hbase))))
+ continue;
+ }
+
+ bitmap_set(vcpu_mask, i, 1);
+
+ if (!data || !data->type)
+ continue;
+
+ /*
+ * Enqueue hfence data to VCPU hfence queue. If we don't
+ * have space in the VCPU hfence queue then fallback to
+ * a more conservative hfence request.
+ */
+ if (!vcpu_hfence_enqueue(vcpu, data))
+ actual_req = fallback_req;
+ }
+
+ kvm_make_vcpus_request_mask(kvm, actual_req, vcpu_mask);
+}
+
+void kvm_riscv_fence_i(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask)
+{
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_FENCE_I,
+ KVM_REQ_FENCE_I, NULL);
+}
+
+void kvm_riscv_hfence_gvma_vmid_gpa(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ gpa_t gpa, gpa_t gpsz,
+ unsigned long order)
+{
+ struct kvm_riscv_hfence data;
+
+ data.type = KVM_RISCV_HFENCE_GVMA_VMID_GPA;
+ data.asid = 0;
+ data.addr = gpa;
+ data.size = gpsz;
+ data.order = order;
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE,
+ KVM_REQ_HFENCE_GVMA_VMID_ALL, &data);
+}
+
+void kvm_riscv_hfence_gvma_vmid_all(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask)
+{
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE_GVMA_VMID_ALL,
+ KVM_REQ_HFENCE_GVMA_VMID_ALL, NULL);
+}
+
+void kvm_riscv_hfence_vvma_asid_gva(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ unsigned long gva, unsigned long gvsz,
+ unsigned long order, unsigned long asid)
+{
+ struct kvm_riscv_hfence data;
+
+ data.type = KVM_RISCV_HFENCE_VVMA_ASID_GVA;
+ data.asid = asid;
+ data.addr = gva;
+ data.size = gvsz;
+ data.order = order;
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE,
+ KVM_REQ_HFENCE_VVMA_ALL, &data);
+}
+
+void kvm_riscv_hfence_vvma_asid_all(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ unsigned long asid)
+{
+ struct kvm_riscv_hfence data;
+
+ data.type = KVM_RISCV_HFENCE_VVMA_ASID_ALL;
+ data.asid = asid;
+ data.addr = data.size = data.order = 0;
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE,
+ KVM_REQ_HFENCE_VVMA_ALL, &data);
+}
+
+void kvm_riscv_hfence_vvma_gva(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ unsigned long gva, unsigned long gvsz,
+ unsigned long order)
+{
+ struct kvm_riscv_hfence data;
+
+ data.type = KVM_RISCV_HFENCE_VVMA_GVA;
+ data.asid = 0;
+ data.addr = gva;
+ data.size = gvsz;
+ data.order = order;
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE,
+ KVM_REQ_HFENCE_VVMA_ALL, &data);
+}
+
+void kvm_riscv_hfence_vvma_all(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask)
+{
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE_VVMA_ALL,
+ KVM_REQ_HFENCE_VVMA_ALL, NULL);
+}
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 7461f964d20a..7f4ad5e4373a 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -67,6 +67,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
if (loaded)
kvm_arch_vcpu_put(vcpu);
+ vcpu->arch.last_exit_cpu = -1;
+
memcpy(csr, reset_csr, sizeof(*csr));
memcpy(cntx, reset_cntx, sizeof(*cntx));
@@ -78,6 +80,10 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
WRITE_ONCE(vcpu->arch.irqs_pending, 0);
WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
+ vcpu->arch.hfence_head = 0;
+ vcpu->arch.hfence_tail = 0;
+ memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue));
+
/* Reset the guest CSRs for hotplug usecase */
if (loaded)
kvm_arch_vcpu_load(vcpu, smp_processor_id());
@@ -101,6 +107,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
/* Setup ISA features available to VCPU */
vcpu->arch.isa = riscv_isa_extension_base(NULL) & KVM_RISCV_ISA_ALLOWED;
+ /* Setup VCPU hfence queue */
+ spin_lock_init(&vcpu->arch.hfence_lock);
+
/* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
cntx = &vcpu->arch.guest_reset_context;
cntx->sstatus = SR_SPP | SR_SPIE;
@@ -137,7 +146,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
/* Cleanup VCPU timer */
kvm_riscv_vcpu_timer_deinit(vcpu);
- /* Free unused pages pre-allocated for Stage2 page table mappings */
+ /* Free unused pages pre-allocated for G-stage page table mappings */
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
}
@@ -365,6 +374,101 @@ static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
return 0;
}
+/* Mapping between KVM ISA Extension ID & Host ISA extension ID */
+static unsigned long kvm_isa_ext_arr[] = {
+ RISCV_ISA_EXT_a,
+ RISCV_ISA_EXT_c,
+ RISCV_ISA_EXT_d,
+ RISCV_ISA_EXT_f,
+ RISCV_ISA_EXT_h,
+ RISCV_ISA_EXT_i,
+ RISCV_ISA_EXT_m,
+};
+
+static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_ISA_EXT);
+ unsigned long reg_val = 0;
+ unsigned long host_isa_ext;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ if (reg_num >= KVM_RISCV_ISA_EXT_MAX || reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
+ return -EINVAL;
+
+ host_isa_ext = kvm_isa_ext_arr[reg_num];
+ if (__riscv_isa_extension_available(&vcpu->arch.isa, host_isa_ext))
+ reg_val = 1; /* Mark the given extension as available */
+
+ if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_ISA_EXT);
+ unsigned long reg_val;
+ unsigned long host_isa_ext;
+ unsigned long host_isa_ext_mask;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ if (reg_num >= KVM_RISCV_ISA_EXT_MAX || reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
+ return -EINVAL;
+
+ if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ host_isa_ext = kvm_isa_ext_arr[reg_num];
+ if (!__riscv_isa_extension_available(NULL, host_isa_ext))
+ return -EOPNOTSUPP;
+
+ if (host_isa_ext >= RISCV_ISA_EXT_BASE &&
+ host_isa_ext < RISCV_ISA_EXT_MAX) {
+ /*
+ * Multi-letter ISA extension. Currently there is no provision
+ * to enable/disable the multi-letter ISA extensions for guests.
+ * Return success if the request is to enable any ISA extension
+ * that is available in the hardware.
+ * Return -EOPNOTSUPP otherwise.
+ */
+ if (!reg_val)
+ return -EOPNOTSUPP;
+ else
+ return 0;
+ }
+
+ /* Single letter base ISA extension */
+ if (!vcpu->arch.ran_atleast_once) {
+ host_isa_ext_mask = BIT_MASK(host_isa_ext);
+ if (!reg_val && (host_isa_ext_mask & KVM_RISCV_ISA_DISABLE_ALLOWED))
+ vcpu->arch.isa &= ~host_isa_ext_mask;
+ else
+ vcpu->arch.isa |= host_isa_ext_mask;
+ vcpu->arch.isa &= riscv_isa_extension_base(NULL);
+ vcpu->arch.isa &= KVM_RISCV_ISA_ALLOWED;
+ kvm_riscv_vcpu_fp_reset(vcpu);
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg)
{
@@ -382,6 +486,8 @@ static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D)
return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
KVM_REG_RISCV_FP_D);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT)
+ return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
return -EINVAL;
}
@@ -403,6 +509,8 @@ static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D)
return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
KVM_REG_RISCV_FP_D);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT)
+ return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
return -EINVAL;
}
@@ -635,7 +743,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
csr_write(CSR_HVIP, csr->hvip);
csr_write(CSR_VSATP, csr->vsatp);
- kvm_riscv_stage2_update_hgatp(vcpu);
+ kvm_riscv_gstage_update_hgatp(vcpu);
kvm_riscv_vcpu_timer_restore(vcpu);
@@ -690,10 +798,23 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
kvm_riscv_reset_vcpu(vcpu);
if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu))
- kvm_riscv_stage2_update_hgatp(vcpu);
+ kvm_riscv_gstage_update_hgatp(vcpu);
- if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
- __kvm_riscv_hfence_gvma_all();
+ if (kvm_check_request(KVM_REQ_FENCE_I, vcpu))
+ kvm_riscv_fence_i_process(vcpu);
+
+ /*
+ * The generic KVM_REQ_TLB_FLUSH is same as
+ * KVM_REQ_HFENCE_GVMA_VMID_ALL
+ */
+ if (kvm_check_request(KVM_REQ_HFENCE_GVMA_VMID_ALL, vcpu))
+ kvm_riscv_hfence_gvma_vmid_all_process(vcpu);
+
+ if (kvm_check_request(KVM_REQ_HFENCE_VVMA_ALL, vcpu))
+ kvm_riscv_hfence_vvma_all_process(vcpu);
+
+ if (kvm_check_request(KVM_REQ_HFENCE, vcpu))
+ kvm_riscv_hfence_process(vcpu);
}
}
@@ -715,6 +836,7 @@ static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu)
{
guest_state_enter_irqoff();
__kvm_riscv_switch_to(&vcpu->arch);
+ vcpu->arch.last_exit_cpu = vcpu->cpu;
guest_state_exit_irqoff();
}
@@ -762,7 +884,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
/* Check conditions before entering the guest */
cond_resched();
- kvm_riscv_stage2_vmid_update(vcpu);
+ kvm_riscv_gstage_vmid_update(vcpu);
kvm_riscv_check_vcpu_requests(vcpu);
@@ -800,7 +922,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_riscv_update_hvip(vcpu);
if (ret <= 0 ||
- kvm_riscv_stage2_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
+ kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
kvm_request_pending(vcpu)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
local_irq_enable();
@@ -809,6 +931,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
continue;
}
+ /*
+ * Cleanup stale TLB enteries
+ *
+ * Note: This should be done after G-stage VMID has been
+ * updated using kvm_riscv_gstage_vmid_ver_changed()
+ */
+ kvm_riscv_local_tlb_sanitize(vcpu);
+
guest_timing_enter_irqoff();
kvm_riscv_vcpu_enter_exit(vcpu);
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index a72c15d4b42a..dbb09afd7546 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -412,7 +412,7 @@ static int emulate_store(struct kvm_vcpu *vcpu, struct kvm_run *run,
return 0;
}
-static int stage2_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
+static int gstage_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
struct kvm_cpu_trap *trap)
{
struct kvm_memory_slot *memslot;
@@ -440,7 +440,7 @@ static int stage2_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
};
}
- ret = kvm_riscv_stage2_map(vcpu, memslot, fault_addr, hva,
+ ret = kvm_riscv_gstage_map(vcpu, memslot, fault_addr, hva,
(trap->scause == EXC_STORE_GUEST_PAGE_FAULT) ? true : false);
if (ret < 0)
return ret;
@@ -686,7 +686,7 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
case EXC_LOAD_GUEST_PAGE_FAULT:
case EXC_STORE_GUEST_PAGE_FAULT:
if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
- ret = stage2_page_fault(vcpu, run, trap);
+ ret = gstage_page_fault(vcpu, run, trap);
break;
case EXC_SUPERVISOR_SYSCALL:
if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c
index 0f217365c287..4c034d8a606a 100644
--- a/arch/riscv/kvm/vcpu_sbi_replace.c
+++ b/arch/riscv/kvm/vcpu_sbi_replace.c
@@ -81,43 +81,41 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
struct kvm_cpu_trap *utrap, bool *exit)
{
int ret = 0;
- unsigned long i;
- struct cpumask cm;
- struct kvm_vcpu *tmp;
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
unsigned long hmask = cp->a0;
unsigned long hbase = cp->a1;
unsigned long funcid = cp->a6;
- cpumask_clear(&cm);
- kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
- if (hbase != -1UL) {
- if (tmp->vcpu_id < hbase)
- continue;
- if (!(hmask & (1UL << (tmp->vcpu_id - hbase))))
- continue;
- }
- if (tmp->cpu < 0)
- continue;
- cpumask_set_cpu(tmp->cpu, &cm);
- }
-
switch (funcid) {
case SBI_EXT_RFENCE_REMOTE_FENCE_I:
- ret = sbi_remote_fence_i(&cm);
+ kvm_riscv_fence_i(vcpu->kvm, hbase, hmask);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
- ret = sbi_remote_hfence_vvma(&cm, cp->a2, cp->a3);
+ if (cp->a2 == 0 && cp->a3 == 0)
+ kvm_riscv_hfence_vvma_all(vcpu->kvm, hbase, hmask);
+ else
+ kvm_riscv_hfence_vvma_gva(vcpu->kvm, hbase, hmask,
+ cp->a2, cp->a3, PAGE_SHIFT);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
- ret = sbi_remote_hfence_vvma_asid(&cm, cp->a2,
- cp->a3, cp->a4);
+ if (cp->a2 == 0 && cp->a3 == 0)
+ kvm_riscv_hfence_vvma_asid_all(vcpu->kvm,
+ hbase, hmask, cp->a4);
+ else
+ kvm_riscv_hfence_vvma_asid_gva(vcpu->kvm,
+ hbase, hmask,
+ cp->a2, cp->a3,
+ PAGE_SHIFT, cp->a4);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID:
case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA:
case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
- /* TODO: implement for nested hypervisor case */
+ /*
+ * Until nested virtualization is implemented, the
+ * SBI HFENCE calls should be treated as NOPs
+ */
+ break;
default:
ret = -EOPNOTSUPP;
}
diff --git a/arch/riscv/kvm/vcpu_sbi_v01.c b/arch/riscv/kvm/vcpu_sbi_v01.c
index da4d6c99c2cf..8a91a14e7139 100644
--- a/arch/riscv/kvm/vcpu_sbi_v01.c
+++ b/arch/riscv/kvm/vcpu_sbi_v01.c
@@ -23,7 +23,6 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
int i, ret = 0;
u64 next_cycle;
struct kvm_vcpu *rvcpu;
- struct cpumask cm;
struct kvm *kvm = vcpu->kvm;
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
@@ -80,19 +79,29 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
if (utrap->scause)
break;
- cpumask_clear(&cm);
- for_each_set_bit(i, &hmask, BITS_PER_LONG) {
- rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
- if (rvcpu->cpu < 0)
- continue;
- cpumask_set_cpu(rvcpu->cpu, &cm);
- }
if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I)
- ret = sbi_remote_fence_i(&cm);
- else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA)
- ret = sbi_remote_hfence_vvma(&cm, cp->a1, cp->a2);
- else
- ret = sbi_remote_hfence_vvma_asid(&cm, cp->a1, cp->a2, cp->a3);
+ kvm_riscv_fence_i(vcpu->kvm, 0, hmask);
+ else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA) {
+ if (cp->a1 == 0 && cp->a2 == 0)
+ kvm_riscv_hfence_vvma_all(vcpu->kvm,
+ 0, hmask);
+ else
+ kvm_riscv_hfence_vvma_gva(vcpu->kvm,
+ 0, hmask,
+ cp->a1, cp->a2,
+ PAGE_SHIFT);
+ } else {
+ if (cp->a1 == 0 && cp->a2 == 0)
+ kvm_riscv_hfence_vvma_asid_all(vcpu->kvm,
+ 0, hmask,
+ cp->a3);
+ else
+ kvm_riscv_hfence_vvma_asid_gva(vcpu->kvm,
+ 0, hmask,
+ cp->a1, cp->a2,
+ PAGE_SHIFT,
+ cp->a3);
+ }
break;
default:
ret = -EINVAL;
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index c768f75279ef..945a2bf5e3f6 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -31,13 +31,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
int r;
- r = kvm_riscv_stage2_alloc_pgd(kvm);
+ r = kvm_riscv_gstage_alloc_pgd(kvm);
if (r)
return r;
- r = kvm_riscv_stage2_vmid_init(kvm);
+ r = kvm_riscv_gstage_vmid_init(kvm);
if (r) {
- kvm_riscv_stage2_free_pgd(kvm);
+ kvm_riscv_gstage_free_pgd(kvm);
return r;
}
@@ -75,7 +75,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_USER_MEM_SLOTS;
break;
case KVM_CAP_VM_GPA_BITS:
- r = kvm_riscv_stage2_gpa_bits();
+ r = kvm_riscv_gstage_gpa_bits();
break;
default:
r = 0;
diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
index 2fa4f7b1813d..9f764df125db 100644
--- a/arch/riscv/kvm/vmid.c
+++ b/arch/riscv/kvm/vmid.c
@@ -11,16 +11,16 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/module.h>
+#include <linux/smp.h>
#include <linux/kvm_host.h>
#include <asm/csr.h>
-#include <asm/sbi.h>
static unsigned long vmid_version = 1;
static unsigned long vmid_next;
static unsigned long vmid_bits;
static DEFINE_SPINLOCK(vmid_lock);
-void kvm_riscv_stage2_vmid_detect(void)
+void kvm_riscv_gstage_vmid_detect(void)
{
unsigned long old;
@@ -33,19 +33,19 @@ void kvm_riscv_stage2_vmid_detect(void)
csr_write(CSR_HGATP, old);
/* We polluted local TLB so flush all guest TLB */
- __kvm_riscv_hfence_gvma_all();
+ kvm_riscv_local_hfence_gvma_all();
/* We don't use VMID bits if they are not sufficient */
if ((1UL << vmid_bits) < num_possible_cpus())
vmid_bits = 0;
}
-unsigned long kvm_riscv_stage2_vmid_bits(void)
+unsigned long kvm_riscv_gstage_vmid_bits(void)
{
return vmid_bits;
}
-int kvm_riscv_stage2_vmid_init(struct kvm *kvm)
+int kvm_riscv_gstage_vmid_init(struct kvm *kvm)
{
/* Mark the initial VMID and VMID version invalid */
kvm->arch.vmid.vmid_version = 0;
@@ -54,7 +54,7 @@ int kvm_riscv_stage2_vmid_init(struct kvm *kvm)
return 0;
}
-bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid)
+bool kvm_riscv_gstage_vmid_ver_changed(struct kvm_vmid *vmid)
{
if (!vmid_bits)
return false;
@@ -63,13 +63,18 @@ bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid)
READ_ONCE(vmid_version));
}
-void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
+static void __local_hfence_gvma_all(void *info)
+{
+ kvm_riscv_local_hfence_gvma_all();
+}
+
+void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu)
{
unsigned long i;
struct kvm_vcpu *v;
struct kvm_vmid *vmid = &vcpu->kvm->arch.vmid;
- if (!kvm_riscv_stage2_vmid_ver_changed(vmid))
+ if (!kvm_riscv_gstage_vmid_ver_changed(vmid))
return;
spin_lock(&vmid_lock);
@@ -78,7 +83,7 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
* We need to re-check the vmid_version here to ensure that if
* another vcpu already allocated a valid vmid for this vm.
*/
- if (!kvm_riscv_stage2_vmid_ver_changed(vmid)) {
+ if (!kvm_riscv_gstage_vmid_ver_changed(vmid)) {
spin_unlock(&vmid_lock);
return;
}
@@ -96,12 +101,13 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
* instances is invalid and we have force VMID re-assignement
* for all Guest instances. The Guest instances that were not
* running will automatically pick-up new VMIDs because will
- * call kvm_riscv_stage2_vmid_update() whenever they enter
+ * call kvm_riscv_gstage_vmid_update() whenever they enter
* in-kernel run loop. For Guest instances that are already
* running, we force VM exits on all host CPUs using IPI and
* flush all Guest TLBs.
*/
- sbi_remote_hfence_gvma(cpu_online_mask, 0, 0);
+ on_each_cpu_mask(cpu_online_mask, __local_hfence_gvma_all,
+ NULL, 1);
}
vmid->vmid = vmid_next;
@@ -112,7 +118,7 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
spin_unlock(&vmid_lock);
- /* Request stage2 page table update for all VCPUs */
+ /* Request G-stage page table update for all VCPUs */
kvm_for_each_vcpu(i, v, vcpu->kvm)
kvm_make_request(KVM_REQ_UPDATE_HGATP, v);
}
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 05ed641a1134..180d6a3e2a05 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -120,13 +120,7 @@ void __init mem_init(void)
BUG_ON(!mem_map);
#endif /* CONFIG_FLATMEM */
-#ifdef CONFIG_SWIOTLB
- if (swiotlb_force == SWIOTLB_FORCE ||
- max_pfn > PFN_DOWN(dma32_phys_limit))
- swiotlb_init(1);
- else
- swiotlb_force = SWIOTLB_NO_FORCE;
-#endif
+ swiotlb_init(max_pfn > PFN_DOWN(dma32_phys_limit), SWIOTLB_VERBOSE);
memblock_free_all();
print_vm_layout();
diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
index f42d9cd3b64d..2a3715bf29fe 100644
--- a/arch/riscv/net/bpf_jit.h
+++ b/arch/riscv/net/bpf_jit.h
@@ -535,6 +535,43 @@ static inline u32 rv_amoadd_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f);
}
+static inline u32 rv_amoand_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0xc, aq, rl, rs2, rs1, 2, rd, 0x2f);
+}
+
+static inline u32 rv_amoor_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x8, aq, rl, rs2, rs1, 2, rd, 0x2f);
+}
+
+static inline u32 rv_amoxor_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x4, aq, rl, rs2, rs1, 2, rd, 0x2f);
+}
+
+static inline u32 rv_amoswap_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x1, aq, rl, rs2, rs1, 2, rd, 0x2f);
+}
+
+static inline u32 rv_lr_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x2, aq, rl, rs2, rs1, 2, rd, 0x2f);
+}
+
+static inline u32 rv_sc_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x3, aq, rl, rs2, rs1, 2, rd, 0x2f);
+}
+
+static inline u32 rv_fence(u8 pred, u8 succ)
+{
+ u16 imm11_0 = pred << 4 | succ;
+
+ return rv_i_insn(imm11_0, 0, 0, 0, 0xf);
+}
+
/* RVC instrutions. */
static inline u16 rvc_addi4spn(u8 rd, u32 imm10)
@@ -753,6 +790,36 @@ static inline u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f);
}
+static inline u32 rv_amoand_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0xc, aq, rl, rs2, rs1, 3, rd, 0x2f);
+}
+
+static inline u32 rv_amoor_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x8, aq, rl, rs2, rs1, 3, rd, 0x2f);
+}
+
+static inline u32 rv_amoxor_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x4, aq, rl, rs2, rs1, 3, rd, 0x2f);
+}
+
+static inline u32 rv_amoswap_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x1, aq, rl, rs2, rs1, 3, rd, 0x2f);
+}
+
+static inline u32 rv_lr_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x2, aq, rl, rs2, rs1, 3, rd, 0x2f);
+}
+
+static inline u32 rv_sc_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x3, aq, rl, rs2, rs1, 3, rd, 0x2f);
+}
+
/* RV64-only RVC instructions. */
static inline u16 rvc_ld(u8 rd, u32 imm8, u8 rs1)
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 0bcda99d1d68..00df3a8f92ac 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -455,6 +455,90 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx)
return 0;
}
+static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
+ struct rv_jit_context *ctx)
+{
+ u8 r0;
+ int jmp_offset;
+
+ if (off) {
+ if (is_12b_int(off)) {
+ emit_addi(RV_REG_T1, rd, off, ctx);
+ } else {
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+ }
+ rd = RV_REG_T1;
+ }
+
+ switch (imm) {
+ /* lock *(u32/u64 *)(dst_reg + off16) <op>= src_reg */
+ case BPF_ADD:
+ emit(is64 ? rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0) :
+ rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
+ break;
+ case BPF_AND:
+ emit(is64 ? rv_amoand_d(RV_REG_ZERO, rs, rd, 0, 0) :
+ rv_amoand_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
+ break;
+ case BPF_OR:
+ emit(is64 ? rv_amoor_d(RV_REG_ZERO, rs, rd, 0, 0) :
+ rv_amoor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
+ break;
+ case BPF_XOR:
+ emit(is64 ? rv_amoxor_d(RV_REG_ZERO, rs, rd, 0, 0) :
+ rv_amoxor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
+ break;
+ /* src_reg = atomic_fetch_<op>(dst_reg + off16, src_reg) */
+ case BPF_ADD | BPF_FETCH:
+ emit(is64 ? rv_amoadd_d(rs, rs, rd, 0, 0) :
+ rv_amoadd_w(rs, rs, rd, 0, 0), ctx);
+ if (!is64)
+ emit_zext_32(rs, ctx);
+ break;
+ case BPF_AND | BPF_FETCH:
+ emit(is64 ? rv_amoand_d(rs, rs, rd, 0, 0) :
+ rv_amoand_w(rs, rs, rd, 0, 0), ctx);
+ if (!is64)
+ emit_zext_32(rs, ctx);
+ break;
+ case BPF_OR | BPF_FETCH:
+ emit(is64 ? rv_amoor_d(rs, rs, rd, 0, 0) :
+ rv_amoor_w(rs, rs, rd, 0, 0), ctx);
+ if (!is64)
+ emit_zext_32(rs, ctx);
+ break;
+ case BPF_XOR | BPF_FETCH:
+ emit(is64 ? rv_amoxor_d(rs, rs, rd, 0, 0) :
+ rv_amoxor_w(rs, rs, rd, 0, 0), ctx);
+ if (!is64)
+ emit_zext_32(rs, ctx);
+ break;
+ /* src_reg = atomic_xchg(dst_reg + off16, src_reg); */
+ case BPF_XCHG:
+ emit(is64 ? rv_amoswap_d(rs, rs, rd, 0, 0) :
+ rv_amoswap_w(rs, rs, rd, 0, 0), ctx);
+ if (!is64)
+ emit_zext_32(rs, ctx);
+ break;
+ /* r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg); */
+ case BPF_CMPXCHG:
+ r0 = bpf_to_rv_reg(BPF_REG_0, ctx);
+ emit(is64 ? rv_addi(RV_REG_T2, r0, 0) :
+ rv_addiw(RV_REG_T2, r0, 0), ctx);
+ emit(is64 ? rv_lr_d(r0, 0, rd, 0, 0) :
+ rv_lr_w(r0, 0, rd, 0, 0), ctx);
+ jmp_offset = ninsns_rvoff(8);
+ emit(rv_bne(RV_REG_T2, r0, jmp_offset >> 1), ctx);
+ emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 0) :
+ rv_sc_w(RV_REG_T3, rs, rd, 0, 0), ctx);
+ jmp_offset = ninsns_rvoff(-6);
+ emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx);
+ emit(rv_fence(0x3, 0x3), ctx);
+ break;
+ }
+}
+
#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
#define BPF_FIXUP_REG_MASK GENMASK(31, 27)
@@ -1146,30 +1230,8 @@ out_be:
break;
case BPF_STX | BPF_ATOMIC | BPF_W:
case BPF_STX | BPF_ATOMIC | BPF_DW:
- if (insn->imm != BPF_ADD) {
- pr_err("bpf-jit: not supported: atomic operation %02x ***\n",
- insn->imm);
- return -EINVAL;
- }
-
- /* atomic_add: lock *(u32 *)(dst + off) += src
- * atomic_add: lock *(u64 *)(dst + off) += src
- */
-
- if (off) {
- if (is_12b_int(off)) {
- emit_addi(RV_REG_T1, rd, off, ctx);
- } else {
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
- }
-
- rd = RV_REG_T1;
- }
-
- emit(BPF_SIZE(code) == BPF_W ?
- rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0) :
- rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0), ctx);
+ emit_atomic(rd, rs, off, imm,
+ BPF_SIZE(code) == BPF_DW, ctx);
break;
default:
pr_err("bpf-jit: unknown opcode %02x\n", code);