summaryrefslogtreecommitdiff
path: root/arch/arm/kvm/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/kvm/mmu.c')
-rw-r--r--arch/arm/kvm/mmu.c273
1 files changed, 218 insertions, 55 deletions
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 16e7994bf347..57a403a5c22b 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -42,7 +42,7 @@ static unsigned long hyp_idmap_start;
static unsigned long hyp_idmap_end;
static phys_addr_t hyp_idmap_vector;
-#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
+#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
@@ -134,7 +134,7 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
}
} while (pte++, addr += PAGE_SIZE, addr != end);
- if (kvm_pte_table_empty(start_pte))
+ if (kvm_pte_table_empty(kvm, start_pte))
clear_pmd_entry(kvm, pmd, start_addr);
}
@@ -158,7 +158,7 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud,
}
} while (pmd++, addr = next, addr != end);
- if (kvm_pmd_table_empty(start_pmd))
+ if (kvm_pmd_table_empty(kvm, start_pmd))
clear_pud_entry(kvm, pud, start_addr);
}
@@ -182,7 +182,7 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
}
} while (pud++, addr = next, addr != end);
- if (kvm_pud_table_empty(start_pud))
+ if (kvm_pud_table_empty(kvm, start_pud))
clear_pgd_entry(kvm, pgd, start_addr);
}
@@ -306,7 +306,7 @@ void free_boot_hyp_pgd(void)
if (boot_hyp_pgd) {
unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
- free_pages((unsigned long)boot_hyp_pgd, pgd_order);
+ free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
boot_hyp_pgd = NULL;
}
@@ -343,7 +343,7 @@ void free_hyp_pgds(void)
for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
- free_pages((unsigned long)hyp_pgd, pgd_order);
+ free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
hyp_pgd = NULL;
}
@@ -401,13 +401,46 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
return 0;
}
+static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
+ unsigned long end, unsigned long pfn,
+ pgprot_t prot)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+ unsigned long addr, next;
+ int ret;
+
+ addr = start;
+ do {
+ pud = pud_offset(pgd, addr);
+
+ if (pud_none_or_clear_bad(pud)) {
+ pmd = pmd_alloc_one(NULL, addr);
+ if (!pmd) {
+ kvm_err("Cannot allocate Hyp pmd\n");
+ return -ENOMEM;
+ }
+ pud_populate(NULL, pud, pmd);
+ get_page(virt_to_page(pud));
+ kvm_flush_dcache_to_poc(pud, sizeof(*pud));
+ }
+
+ next = pud_addr_end(addr, end);
+ ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
+ if (ret)
+ return ret;
+ pfn += (next - addr) >> PAGE_SHIFT;
+ } while (addr = next, addr != end);
+
+ return 0;
+}
+
static int __create_hyp_mappings(pgd_t *pgdp,
unsigned long start, unsigned long end,
unsigned long pfn, pgprot_t prot)
{
pgd_t *pgd;
pud_t *pud;
- pmd_t *pmd;
unsigned long addr, next;
int err = 0;
@@ -416,22 +449,21 @@ static int __create_hyp_mappings(pgd_t *pgdp,
end = PAGE_ALIGN(end);
do {
pgd = pgdp + pgd_index(addr);
- pud = pud_offset(pgd, addr);
- if (pud_none_or_clear_bad(pud)) {
- pmd = pmd_alloc_one(NULL, addr);
- if (!pmd) {
- kvm_err("Cannot allocate Hyp pmd\n");
+ if (pgd_none(*pgd)) {
+ pud = pud_alloc_one(NULL, addr);
+ if (!pud) {
+ kvm_err("Cannot allocate Hyp pud\n");
err = -ENOMEM;
goto out;
}
- pud_populate(NULL, pud, pmd);
- get_page(virt_to_page(pud));
- kvm_flush_dcache_to_poc(pud, sizeof(*pud));
+ pgd_populate(NULL, pgd, pud);
+ get_page(virt_to_page(pgd));
+ kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
}
next = pgd_addr_end(addr, end);
- err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
+ err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot);
if (err)
goto out;
pfn += (next - addr) >> PAGE_SHIFT;
@@ -521,6 +553,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
*/
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
+ int ret;
pgd_t *pgd;
if (kvm->arch.pgd != NULL) {
@@ -528,15 +561,38 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
return -EINVAL;
}
- pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
+ if (KVM_PREALLOC_LEVEL > 0) {
+ /*
+ * Allocate fake pgd for the page table manipulation macros to
+ * work. This is not used by the hardware and we have no
+ * alignment requirement for this allocation.
+ */
+ pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
+ GFP_KERNEL | __GFP_ZERO);
+ } else {
+ /*
+ * Allocate actual first-level Stage-2 page table used by the
+ * hardware for Stage-2 page table walks.
+ */
+ pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
+ }
+
if (!pgd)
return -ENOMEM;
- memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
+ ret = kvm_prealloc_hwpgd(kvm, pgd);
+ if (ret)
+ goto out_err;
+
kvm_clean_pgd(pgd);
kvm->arch.pgd = pgd;
-
return 0;
+out_err:
+ if (KVM_PREALLOC_LEVEL > 0)
+ kfree(pgd);
+ else
+ free_pages((unsigned long)pgd, S2_PGD_ORDER);
+ return ret;
}
/**
@@ -572,19 +628,39 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
return;
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
- free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
+ kvm_free_hwpgd(kvm);
+ if (KVM_PREALLOC_LEVEL > 0)
+ kfree(kvm->arch.pgd);
+ else
+ free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
kvm->arch.pgd = NULL;
}
-static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
phys_addr_t addr)
{
pgd_t *pgd;
pud_t *pud;
- pmd_t *pmd;
pgd = kvm->arch.pgd + pgd_index(addr);
- pud = pud_offset(pgd, addr);
+ if (WARN_ON(pgd_none(*pgd))) {
+ if (!cache)
+ return NULL;
+ pud = mmu_memory_cache_alloc(cache);
+ pgd_populate(NULL, pgd, pud);
+ get_page(virt_to_page(pgd));
+ }
+
+ return pud_offset(pgd, addr);
+}
+
+static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+ phys_addr_t addr)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pud = stage2_get_pud(kvm, cache, addr);
if (pud_none(*pud)) {
if (!cache)
return NULL;
@@ -630,7 +706,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
pmd_t *pmd;
pte_t *pte, old_pte;
- /* Create stage-2 page table mapping - Level 1 */
+ /* Create stage-2 page table mapping - Levels 0 and 1 */
pmd = stage2_get_pmd(kvm, cache, addr);
if (!pmd) {
/*
@@ -675,7 +751,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
* @size: The size of the mapping
*/
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
- phys_addr_t pa, unsigned long size)
+ phys_addr_t pa, unsigned long size, bool writable)
{
phys_addr_t addr, end;
int ret = 0;
@@ -688,7 +764,11 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
- ret = mmu_topup_memory_cache(&cache, 2, 2);
+ if (writable)
+ kvm_set_s2pte_writable(&pte);
+
+ ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
+ KVM_NR_MEM_OBJS);
if (ret)
goto out;
spin_lock(&kvm->mmu_lock);
@@ -746,22 +826,29 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
return false;
}
+static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+ if (kvm_vcpu_trap_is_iabt(vcpu))
+ return false;
+
+ return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
- struct kvm_memory_slot *memslot,
+ struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status)
{
int ret;
bool write_fault, writable, hugetlb = false, force_pte = false;
unsigned long mmu_seq;
gfn_t gfn = fault_ipa >> PAGE_SHIFT;
- unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
struct vm_area_struct *vma;
pfn_t pfn;
pgprot_t mem_type = PAGE_S2;
- write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
+ write_fault = kvm_is_write_fault(vcpu);
if (fault_status == FSC_PERM && !write_fault) {
kvm_err("Unexpected L2 read permission error\n");
return -EFAULT;
@@ -770,6 +857,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
/* Let's check if we will get back a huge page backed by hugetlbfs */
down_read(&current->mm->mmap_sem);
vma = find_vma_intersection(current->mm, hva, hva + 1);
+ if (unlikely(!vma)) {
+ kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
+ up_read(&current->mm->mmap_sem);
+ return -EFAULT;
+ }
+
if (is_vm_hugetlb_page(vma)) {
hugetlb = true;
gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
@@ -790,7 +883,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
up_read(&current->mm->mmap_sem);
/* We need minimum second+third level pages */
- ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
+ ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
+ KVM_NR_MEM_OBJS);
if (ret)
return ret;
@@ -836,7 +930,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
- mem_type == PAGE_S2_DEVICE);
+ pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
}
@@ -863,7 +957,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
unsigned long fault_status;
phys_addr_t fault_ipa;
struct kvm_memory_slot *memslot;
- bool is_iabt;
+ unsigned long hva;
+ bool is_iabt, write_fault, writable;
gfn_t gfn;
int ret, idx;
@@ -874,17 +969,22 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_vcpu_get_hfar(vcpu), fault_ipa);
/* Check the stage-2 fault is trans. fault or write fault */
- fault_status = kvm_vcpu_trap_get_fault(vcpu);
+ fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
- kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n",
- kvm_vcpu_trap_get_class(vcpu), fault_status);
+ kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
+ kvm_vcpu_trap_get_class(vcpu),
+ (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
+ (unsigned long)kvm_vcpu_get_hsr(vcpu));
return -EFAULT;
}
idx = srcu_read_lock(&vcpu->kvm->srcu);
gfn = fault_ipa >> PAGE_SHIFT;
- if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+ memslot = gfn_to_memslot(vcpu->kvm, gfn);
+ hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
+ write_fault = kvm_is_write_fault(vcpu);
+ if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
if (is_iabt) {
/* Prefetch Abort on I/O address */
kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
@@ -892,13 +992,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
goto out_unlock;
}
- if (fault_status != FSC_FAULT) {
- kvm_err("Unsupported fault status on io memory: %#lx\n",
- fault_status);
- ret = -EFAULT;
- goto out_unlock;
- }
-
/*
* The IPA is reported as [MAX:12], so we need to
* complement it with the bottom 12 bits from the
@@ -910,9 +1003,10 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
goto out_unlock;
}
- memslot = gfn_to_memslot(vcpu->kvm, gfn);
+ /* Userspace should not be able to register out-of-bounds IPAs */
+ VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
- ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status);
+ ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
if (ret == 0)
ret = 1;
out_unlock:
@@ -1068,8 +1162,8 @@ int kvm_mmu_init(void)
(unsigned long)phys_base);
}
- hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
- boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
+ hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
+ boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
if (!hyp_pgd || !boot_hyp_pgd) {
kvm_err("Hyp mode PGD not allocated\n");
@@ -1122,13 +1216,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_memory_slot *old,
enum kvm_mr_change change)
{
- gpa_t gpa = old->base_gfn << PAGE_SHIFT;
- phys_addr_t size = old->npages << PAGE_SHIFT;
- if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
- spin_lock(&kvm->mmu_lock);
- unmap_stage2_range(kvm, gpa, size);
- spin_unlock(&kvm->mmu_lock);
- }
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -1136,7 +1223,77 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
- return 0;
+ hva_t hva = mem->userspace_addr;
+ hva_t reg_end = hva + mem->memory_size;
+ bool writable = !(mem->flags & KVM_MEM_READONLY);
+ int ret = 0;
+
+ if (change != KVM_MR_CREATE && change != KVM_MR_MOVE)
+ return 0;
+
+ /*
+ * Prevent userspace from creating a memory region outside of the IPA
+ * space addressable by the KVM guest IPA space.
+ */
+ if (memslot->base_gfn + memslot->npages >=
+ (KVM_PHYS_SIZE >> PAGE_SHIFT))
+ return -EFAULT;
+
+ /*
+ * A memory region could potentially cover multiple VMAs, and any holes
+ * between them, so iterate over all of them to find out if we can map
+ * any of them right now.
+ *
+ * +--------------------------------------------+
+ * +---------------+----------------+ +----------------+
+ * | : VMA 1 | VMA 2 | | VMA 3 : |
+ * +---------------+----------------+ +----------------+
+ * | memory region |
+ * +--------------------------------------------+
+ */
+ do {
+ struct vm_area_struct *vma = find_vma(current->mm, hva);
+ hva_t vm_start, vm_end;
+
+ if (!vma || vma->vm_start >= reg_end)
+ break;
+
+ /*
+ * Mapping a read-only VMA is only allowed if the
+ * memory region is configured as read-only.
+ */
+ if (writable && !(vma->vm_flags & VM_WRITE)) {
+ ret = -EPERM;
+ break;
+ }
+
+ /*
+ * Take the intersection of this VMA with the memory region
+ */
+ vm_start = max(hva, vma->vm_start);
+ vm_end = min(reg_end, vma->vm_end);
+
+ if (vma->vm_flags & VM_PFNMAP) {
+ gpa_t gpa = mem->guest_phys_addr +
+ (vm_start - mem->userspace_addr);
+ phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) +
+ vm_start - vma->vm_start;
+
+ ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
+ vm_end - vm_start,
+ writable);
+ if (ret)
+ break;
+ }
+ hva = vm_end;
+ } while (hva < reg_end);
+
+ if (ret) {
+ spin_lock(&kvm->mmu_lock);
+ unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
+ spin_unlock(&kvm->mmu_lock);
+ }
+ return ret;
}
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
@@ -1161,4 +1318,10 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
+ gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
+ phys_addr_t size = slot->npages << PAGE_SHIFT;
+
+ spin_lock(&kvm->mmu_lock);
+ unmap_stage2_range(kvm, gpa, size);
+ spin_unlock(&kvm->mmu_lock);
}