summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm
diff options
context:
space:
mode:
authorMarc Zyngier <maz@kernel.org>2021-06-22 17:09:34 +0300
committerMarc Zyngier <maz@kernel.org>2021-06-22 17:09:34 +0300
commit9f03db6673598f618f10ba01b3f8824bd5f31a41 (patch)
tree219eab43901af8c7fb1e1498f597ecb03988d51e /arch/arm64/kvm
parent2fea6cf7d32141b9e95e30500f1d50a9f92a7371 (diff)
parent04c02c201d7e8149ae336ead69fb64e4e6f94bc9 (diff)
downloadlinux-9f03db6673598f618f10ba01b3f8824bd5f31a41.tar.xz
Merge branch kvm-arm64/mmu/mte into kvmarm-master/next
KVM/arm64 support for MTE, courtesy of Steven Price. It allows the guest to use memory tagging, and offers a new userspace API to save/restore the tags. * kvm-arm64/mmu/mte: KVM: arm64: Document MTE capability and ioctl KVM: arm64: Add ioctl to fetch/store tags in a guest KVM: arm64: Expose KVM_ARM_CAP_MTE KVM: arm64: Save/restore MTE registers KVM: arm64: Introduce MTE VM feature arm64: mte: Sync tags for pages where PTE is untagged Signed-off-by: Marc Zyngier <maz@kernel.org>
Diffstat (limited to 'arch/arm64/kvm')
-rw-r--r--arch/arm64/kvm/arm.c16
-rw-r--r--arch/arm64/kvm/guest.c82
-rw-r--r--arch/arm64/kvm/hyp/entry.S7
-rw-r--r--arch/arm64/kvm/hyp/exception.c3
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h21
-rw-r--r--arch/arm64/kvm/mmu.c66
-rw-r--r--arch/arm64/kvm/reset.c4
-rw-r--r--arch/arm64/kvm/sys_regs.c32
8 files changed, 226 insertions, 5 deletions
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index b0294f2e95ba..e0b81870ff2a 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -93,6 +93,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = 0;
kvm->arch.return_nisv_io_abort_to_user = true;
break;
+ case KVM_CAP_ARM_MTE:
+ if (!system_supports_mte() || kvm->created_vcpus)
+ return -EINVAL;
+ r = 0;
+ kvm->arch.mte_enabled = true;
+ break;
default:
r = -EINVAL;
break;
@@ -237,6 +243,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
*/
r = 1;
break;
+ case KVM_CAP_ARM_MTE:
+ r = system_supports_mte();
+ break;
case KVM_CAP_STEAL_TIME:
r = kvm_arm_pvtime_supported();
break;
@@ -1354,6 +1363,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
return 0;
}
+ case KVM_ARM_MTE_COPY_TAGS: {
+ struct kvm_arm_copy_mte_tags copy_tags;
+
+ if (copy_from_user(&copy_tags, argp, sizeof(copy_tags)))
+ return -EFAULT;
+ return kvm_vm_ioctl_mte_copy_tags(kvm, &copy_tags);
+ }
default:
return -EINVAL;
}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 5cb4a1cd5603..4ddb20017b2f 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -995,3 +995,85 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
return ret;
}
+
+long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
+ struct kvm_arm_copy_mte_tags *copy_tags)
+{
+ gpa_t guest_ipa = copy_tags->guest_ipa;
+ size_t length = copy_tags->length;
+ void __user *tags = copy_tags->addr;
+ gpa_t gfn;
+ bool write = !(copy_tags->flags & KVM_ARM_TAGS_FROM_GUEST);
+ int ret = 0;
+
+ if (!kvm_has_mte(kvm))
+ return -EINVAL;
+
+ if (copy_tags->reserved[0] || copy_tags->reserved[1])
+ return -EINVAL;
+
+ if (copy_tags->flags & ~KVM_ARM_TAGS_FROM_GUEST)
+ return -EINVAL;
+
+ if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK)
+ return -EINVAL;
+
+ gfn = gpa_to_gfn(guest_ipa);
+
+ mutex_lock(&kvm->slots_lock);
+
+ while (length > 0) {
+ kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL);
+ void *maddr;
+ unsigned long num_tags;
+ struct page *page;
+
+ if (is_error_noslot_pfn(pfn)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ page = pfn_to_online_page(pfn);
+ if (!page) {
+ /* Reject ZONE_DEVICE memory */
+ ret = -EFAULT;
+ goto out;
+ }
+ maddr = page_address(page);
+
+ if (!write) {
+ if (test_bit(PG_mte_tagged, &page->flags))
+ num_tags = mte_copy_tags_to_user(tags, maddr,
+ MTE_GRANULES_PER_PAGE);
+ else
+ /* No tags in memory, so write zeros */
+ num_tags = MTE_GRANULES_PER_PAGE -
+ clear_user(tags, MTE_GRANULES_PER_PAGE);
+ kvm_release_pfn_clean(pfn);
+ } else {
+ num_tags = mte_copy_tags_from_user(maddr, tags,
+ MTE_GRANULES_PER_PAGE);
+ kvm_release_pfn_dirty(pfn);
+ }
+
+ if (num_tags != MTE_GRANULES_PER_PAGE) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ /* Set the flag after checking the write completed fully */
+ if (write)
+ set_bit(PG_mte_tagged, &page->flags);
+
+ gfn++;
+ tags += num_tags;
+ length -= PAGE_SIZE;
+ }
+
+out:
+ mutex_unlock(&kvm->slots_lock);
+ /* If some data has been copied report the number of bytes copied */
+ if (length != copy_tags->length)
+ return copy_tags->length - length;
+ return ret;
+}
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index e831d3dfd50d..435346ea1504 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -13,6 +13,7 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_mte.h>
#include <asm/kvm_ptrauth.h>
.text
@@ -51,6 +52,9 @@ alternative_else_nop_endif
add x29, x0, #VCPU_CONTEXT
+ // mte_switch_to_guest(g_ctxt, h_ctxt, tmp1)
+ mte_switch_to_guest x29, x1, x2
+
// Macro ptrauth_switch_to_guest format:
// ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
// The below macro to restore guest keys is not implemented in C code
@@ -142,6 +146,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
// when this feature is enabled for kernel code.
ptrauth_switch_to_hyp x1, x2, x3, x4, x5
+ // mte_switch_to_hyp(g_ctxt, h_ctxt, reg1)
+ mte_switch_to_hyp x1, x2, x3
+
// Restore hyp's sp_el0
restore_sp_el0 x2, x3
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 11541b94b328..0418399e0a20 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -112,7 +112,8 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
new |= (old & PSR_C_BIT);
new |= (old & PSR_V_BIT);
- // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests)
+ if (kvm_has_mte(vcpu->kvm))
+ new |= PSR_TCO_BIT;
new |= (old & PSR_DIT_BIT);
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index cce43bfe158f..de7e14c862e6 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -14,6 +14,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
@@ -26,6 +27,16 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0);
}
+static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
+
+ if (!vcpu)
+ vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
+
+ return kvm_has_mte(kern_hyp_va(vcpu->kvm));
+}
+
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1);
@@ -46,6 +57,11 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par();
ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1);
+ if (ctxt_has_mte(ctxt)) {
+ ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR);
+ ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1);
+ }
+
ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1);
ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR);
ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR);
@@ -107,6 +123,11 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1);
write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1);
+ if (ctxt_has_mte(ctxt)) {
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR);
+ write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1);
+ }
+
if (!has_vhe() &&
cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
ctxt->__hyp_running_vcpu) {
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index bf389dfc885d..57292dc5ce35 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -853,6 +853,45 @@ static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
return PAGE_SHIFT;
}
+/*
+ * The page will be mapped in stage 2 as Normal Cacheable, so the VM will be
+ * able to see the page's tags and therefore they must be initialised first. If
+ * PG_mte_tagged is set, tags have already been initialised.
+ *
+ * The race in the test/set of the PG_mte_tagged flag is handled by:
+ * - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs
+ * racing to santise the same page
+ * - mmap_lock protects between a VM faulting a page in and the VMM performing
+ * an mprotect() to add VM_MTE
+ */
+static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
+ unsigned long size)
+{
+ unsigned long i, nr_pages = size >> PAGE_SHIFT;
+ struct page *page;
+
+ if (!kvm_has_mte(kvm))
+ return 0;
+
+ /*
+ * pfn_to_online_page() is used to reject ZONE_DEVICE pages
+ * that may not support tags.
+ */
+ page = pfn_to_online_page(pfn);
+
+ if (!page)
+ return -EFAULT;
+
+ for (i = 0; i < nr_pages; i++, page++) {
+ if (!test_bit(PG_mte_tagged, &page->flags)) {
+ mte_clear_page_tags(page_address(page));
+ set_bit(PG_mte_tagged, &page->flags);
+ }
+ }
+
+ return 0;
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status)
@@ -861,6 +900,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
bool write_fault, writable, force_pte = false;
bool exec_fault;
bool device = false;
+ bool shared;
unsigned long mmu_seq;
struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
@@ -907,6 +947,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
vma_shift = get_vma_page_shift(vma, hva);
}
+ shared = (vma->vm_flags & VM_PFNMAP);
+
switch (vma_shift) {
#ifndef __PAGETABLE_PMD_FOLDED
case PUD_SHIFT:
@@ -1011,6 +1053,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
&pfn, &fault_ipa);
+
+ if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
+ /* Check the VMM hasn't introduced a new VM_SHARED VMA */
+ if (!shared)
+ ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
+ else
+ ret = -EFAULT;
+ if (ret)
+ goto out_unlock;
+ }
+
if (writable)
prot |= KVM_PGTABLE_PROT_W;
@@ -1206,12 +1259,17 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
kvm_pfn_t pfn = pte_pfn(range->pte);
+ int ret;
if (!kvm->arch.mmu.pgt)
return false;
WARN_ON(range->end - range->start != 1);
+ ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE);
+ if (ret)
+ return false;
+
/*
* We've moved a page around, probably through CoW, so let's treat
* it just like a translation fault and the map handler will clean
@@ -1414,6 +1472,14 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (!vma)
break;
+ /*
+ * VM_SHARED mappings are not allowed with MTE to avoid races
+ * when updating the PG_mte_tagged page flag, see
+ * sanitise_mte_tags for more details.
+ */
+ if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED)
+ return -EINVAL;
+
if (vma->vm_flags & VM_PFNMAP) {
/* IO region dirty page logging not allowed */
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index d37ebee085cf..cba7872d69a8 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -176,6 +176,10 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
return false;
+ /* MTE is incompatible with AArch32 */
+ if (kvm_has_mte(vcpu->kvm) && is32bit)
+ return false;
+
/* Check that the vcpus are either all 32bit or all 64bit */
kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1a7968ad078c..f6f126eb6ac1 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1047,6 +1047,13 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
break;
case SYS_ID_AA64PFR1_EL1:
val &= ~FEATURE(ID_AA64PFR1_MTE);
+ if (kvm_has_mte(vcpu->kvm)) {
+ u64 pfr, mte;
+
+ pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
+ mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT);
+ val |= FIELD_PREP(FEATURE(ID_AA64PFR1_MTE), mte);
+ }
break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
@@ -1302,6 +1309,23 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return true;
}
+static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (kvm_has_mte(vcpu->kvm))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
+#define MTE_REG(name) { \
+ SYS_DESC(SYS_##name), \
+ .access = undef_access, \
+ .reset = reset_unknown, \
+ .reg = name, \
+ .visibility = mte_visibility, \
+}
+
/* sys_reg_desc initialiser for known cpufeature ID registers */
#define ID_SANITISED(name) { \
SYS_DESC(SYS_##name), \
@@ -1470,8 +1494,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
- { SYS_DESC(SYS_RGSR_EL1), undef_access },
- { SYS_DESC(SYS_GCR_EL1), undef_access },
+ MTE_REG(RGSR_EL1),
+ MTE_REG(GCR_EL1),
{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
{ SYS_DESC(SYS_TRFCR_EL1), undef_access },
@@ -1498,8 +1522,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
{ SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
- { SYS_DESC(SYS_TFSR_EL1), undef_access },
- { SYS_DESC(SYS_TFSRE0_EL1), undef_access },
+ MTE_REG(TFSR_EL1),
+ MTE_REG(TFSRE0_EL1),
{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },