summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm/mmu.c')
-rw-r--r--arch/arm64/kvm/mmu.c311
1 files changed, 178 insertions, 133 deletions
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 7a7ddc4558a7..0121ef2c7c8d 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -55,12 +55,13 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
*/
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
- kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
+ kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
}
-static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
+static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
+ int level)
{
- kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ipa, level);
}
/*
@@ -90,74 +91,80 @@ static bool kvm_is_device_pfn(unsigned long pfn)
/**
* stage2_dissolve_pmd() - clear and flush huge PMD entry
- * @kvm: pointer to kvm structure.
+ * @mmu: pointer to mmu structure to operate on
* @addr: IPA
* @pmd: pmd pointer for IPA
*
* Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs.
*/
-static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
+static void stage2_dissolve_pmd(struct kvm_s2_mmu *mmu, phys_addr_t addr, pmd_t *pmd)
{
if (!pmd_thp_or_huge(*pmd))
return;
pmd_clear(pmd);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL);
put_page(virt_to_page(pmd));
}
/**
* stage2_dissolve_pud() - clear and flush huge PUD entry
- * @kvm: pointer to kvm structure.
+ * @mmu: pointer to mmu structure to operate on
* @addr: IPA
* @pud: pud pointer for IPA
*
* Function clears a PUD entry, flushes addr 1st and 2nd stage TLBs.
*/
-static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp)
+static void stage2_dissolve_pud(struct kvm_s2_mmu *mmu, phys_addr_t addr, pud_t *pudp)
{
+ struct kvm *kvm = mmu->kvm;
+
if (!stage2_pud_huge(kvm, *pudp))
return;
stage2_pud_clear(kvm, pudp);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL);
put_page(virt_to_page(pudp));
}
-static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
+static void clear_stage2_pgd_entry(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr)
{
+ struct kvm *kvm = mmu->kvm;
p4d_t *p4d_table __maybe_unused = stage2_p4d_offset(kvm, pgd, 0UL);
stage2_pgd_clear(kvm, pgd);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
stage2_p4d_free(kvm, p4d_table);
put_page(virt_to_page(pgd));
}
-static void clear_stage2_p4d_entry(struct kvm *kvm, p4d_t *p4d, phys_addr_t addr)
+static void clear_stage2_p4d_entry(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, p4d, 0);
stage2_p4d_clear(kvm, p4d);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
stage2_pud_free(kvm, pud_table);
put_page(virt_to_page(p4d));
}
-static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
+static void clear_stage2_pud_entry(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr)
{
+ struct kvm *kvm = mmu->kvm;
pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0);
+
VM_BUG_ON(stage2_pud_huge(kvm, *pud));
stage2_pud_clear(kvm, pud);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
stage2_pmd_free(kvm, pmd_table);
put_page(virt_to_page(pud));
}
-static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
+static void clear_stage2_pmd_entry(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr_t addr)
{
pte_t *pte_table = pte_offset_kernel(pmd, 0);
VM_BUG_ON(pmd_thp_or_huge(*pmd));
pmd_clear(pmd);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT);
free_page((unsigned long)pte_table);
put_page(virt_to_page(pmd));
}
@@ -223,7 +230,7 @@ static inline void kvm_pgd_populate(pgd_t *pgdp, p4d_t *p4dp)
* we then fully enforce cacheability of RAM, no matter what the guest
* does.
*/
-static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
+static void unmap_stage2_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
phys_addr_t start_addr = addr;
@@ -235,7 +242,7 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
pte_t old_pte = *pte;
kvm_set_pte(pte, __pte(0));
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PTE_LEVEL);
/* No need to invalidate the cache for device mappings */
if (!kvm_is_device_pfn(pte_pfn(old_pte)))
@@ -245,13 +252,14 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
}
} while (pte++, addr += PAGE_SIZE, addr != end);
- if (stage2_pte_table_empty(kvm, start_pte))
- clear_stage2_pmd_entry(kvm, pmd, start_addr);
+ if (stage2_pte_table_empty(mmu->kvm, start_pte))
+ clear_stage2_pmd_entry(mmu, pmd, start_addr);
}
-static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
+static void unmap_stage2_pmds(struct kvm_s2_mmu *mmu, pud_t *pud,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
phys_addr_t next, start_addr = addr;
pmd_t *pmd, *start_pmd;
@@ -263,24 +271,25 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
pmd_t old_pmd = *pmd;
pmd_clear(pmd);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL);
kvm_flush_dcache_pmd(old_pmd);
put_page(virt_to_page(pmd));
} else {
- unmap_stage2_ptes(kvm, pmd, addr, next);
+ unmap_stage2_ptes(mmu, pmd, addr, next);
}
}
} while (pmd++, addr = next, addr != end);
if (stage2_pmd_table_empty(kvm, start_pmd))
- clear_stage2_pud_entry(kvm, pud, start_addr);
+ clear_stage2_pud_entry(mmu, pud, start_addr);
}
-static void unmap_stage2_puds(struct kvm *kvm, p4d_t *p4d,
+static void unmap_stage2_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
phys_addr_t next, start_addr = addr;
pud_t *pud, *start_pud;
@@ -292,22 +301,23 @@ static void unmap_stage2_puds(struct kvm *kvm, p4d_t *p4d,
pud_t old_pud = *pud;
stage2_pud_clear(kvm, pud);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL);
kvm_flush_dcache_pud(old_pud);
put_page(virt_to_page(pud));
} else {
- unmap_stage2_pmds(kvm, pud, addr, next);
+ unmap_stage2_pmds(mmu, pud, addr, next);
}
}
} while (pud++, addr = next, addr != end);
if (stage2_pud_table_empty(kvm, start_pud))
- clear_stage2_p4d_entry(kvm, p4d, start_addr);
+ clear_stage2_p4d_entry(mmu, p4d, start_addr);
}
-static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd,
+static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
phys_addr_t next, start_addr = addr;
p4d_t *p4d, *start_p4d;
@@ -315,11 +325,11 @@ static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd,
do {
next = stage2_p4d_addr_end(kvm, addr, end);
if (!stage2_p4d_none(kvm, *p4d))
- unmap_stage2_puds(kvm, p4d, addr, next);
+ unmap_stage2_puds(mmu, p4d, addr, next);
} while (p4d++, addr = next, addr != end);
if (stage2_p4d_table_empty(kvm, start_p4d))
- clear_stage2_pgd_entry(kvm, pgd, start_addr);
+ clear_stage2_pgd_entry(mmu, pgd, start_addr);
}
/**
@@ -333,8 +343,9 @@ static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd,
* destroying the VM), otherwise another faulting VCPU may come in and mess
* with things behind our backs.
*/
-static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
+static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
{
+ struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
phys_addr_t addr = start, end = start + size;
phys_addr_t next;
@@ -342,18 +353,18 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
assert_spin_locked(&kvm->mmu_lock);
WARN_ON(size & ~PAGE_MASK);
- pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
+ pgd = mmu->pgd + stage2_pgd_index(kvm, addr);
do {
/*
* Make sure the page table is still active, as another thread
* could have possibly freed the page table, while we released
* the lock.
*/
- if (!READ_ONCE(kvm->arch.pgd))
+ if (!READ_ONCE(mmu->pgd))
break;
next = stage2_pgd_addr_end(kvm, addr, end);
if (!stage2_pgd_none(kvm, *pgd))
- unmap_stage2_p4ds(kvm, pgd, addr, next);
+ unmap_stage2_p4ds(mmu, pgd, addr, next);
/*
* If the range is too large, release the kvm->mmu_lock
* to prevent starvation and lockup detector warnings.
@@ -363,7 +374,7 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
} while (pgd++, addr = next, addr != end);
}
-static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
+static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
pte_t *pte;
@@ -375,9 +386,10 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
} while (pte++, addr += PAGE_SIZE, addr != end);
}
-static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
+static void stage2_flush_pmds(struct kvm_s2_mmu *mmu, pud_t *pud,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
pmd_t *pmd;
phys_addr_t next;
@@ -388,14 +400,15 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
if (pmd_thp_or_huge(*pmd))
kvm_flush_dcache_pmd(*pmd);
else
- stage2_flush_ptes(kvm, pmd, addr, next);
+ stage2_flush_ptes(mmu, pmd, addr, next);
}
} while (pmd++, addr = next, addr != end);
}
-static void stage2_flush_puds(struct kvm *kvm, p4d_t *p4d,
+static void stage2_flush_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pud;
phys_addr_t next;
@@ -406,14 +419,15 @@ static void stage2_flush_puds(struct kvm *kvm, p4d_t *p4d,
if (stage2_pud_huge(kvm, *pud))
kvm_flush_dcache_pud(*pud);
else
- stage2_flush_pmds(kvm, pud, addr, next);
+ stage2_flush_pmds(mmu, pud, addr, next);
}
} while (pud++, addr = next, addr != end);
}
-static void stage2_flush_p4ds(struct kvm *kvm, pgd_t *pgd,
+static void stage2_flush_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
p4d_t *p4d;
phys_addr_t next;
@@ -421,23 +435,24 @@ static void stage2_flush_p4ds(struct kvm *kvm, pgd_t *pgd,
do {
next = stage2_p4d_addr_end(kvm, addr, end);
if (!stage2_p4d_none(kvm, *p4d))
- stage2_flush_puds(kvm, p4d, addr, next);
+ stage2_flush_puds(mmu, p4d, addr, next);
} while (p4d++, addr = next, addr != end);
}
static void stage2_flush_memslot(struct kvm *kvm,
struct kvm_memory_slot *memslot)
{
+ struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
phys_addr_t next;
pgd_t *pgd;
- pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
+ pgd = mmu->pgd + stage2_pgd_index(kvm, addr);
do {
next = stage2_pgd_addr_end(kvm, addr, end);
if (!stage2_pgd_none(kvm, *pgd))
- stage2_flush_p4ds(kvm, pgd, addr, next);
+ stage2_flush_p4ds(mmu, pgd, addr, next);
if (next != end)
cond_resched_lock(&kvm->mmu_lock);
@@ -964,21 +979,23 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
}
/**
- * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
- * @kvm: The KVM struct pointer for the VM.
+ * kvm_init_stage2_mmu - Initialise a S2 MMU strucrure
+ * @kvm: The pointer to the KVM structure
+ * @mmu: The pointer to the s2 MMU structure
*
* Allocates only the stage-2 HW PGD level table(s) of size defined by
- * stage2_pgd_size(kvm).
+ * stage2_pgd_size(mmu->kvm).
*
* Note we don't need locking here as this is only called when the VM is
* created, which can only be done once.
*/
-int kvm_alloc_stage2_pgd(struct kvm *kvm)
+int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
{
phys_addr_t pgd_phys;
pgd_t *pgd;
+ int cpu;
- if (kvm->arch.pgd != NULL) {
+ if (mmu->pgd != NULL) {
kvm_err("kvm_arch already initialized?\n");
return -EINVAL;
}
@@ -992,8 +1009,20 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm)))
return -EINVAL;
- kvm->arch.pgd = pgd;
- kvm->arch.pgd_phys = pgd_phys;
+ mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran));
+ if (!mmu->last_vcpu_ran) {
+ free_pages_exact(pgd, stage2_pgd_size(kvm));
+ return -ENOMEM;
+ }
+
+ for_each_possible_cpu(cpu)
+ *per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
+
+ mmu->kvm = kvm;
+ mmu->pgd = pgd;
+ mmu->pgd_phys = pgd_phys;
+ mmu->vmid.vmid_gen = 0;
+
return 0;
}
@@ -1032,7 +1061,7 @@ static void stage2_unmap_memslot(struct kvm *kvm,
if (!(vma->vm_flags & VM_PFNMAP)) {
gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
- unmap_stage2_range(kvm, gpa, vm_end - vm_start);
+ unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start);
}
hva = vm_end;
} while (hva < reg_end);
@@ -1064,39 +1093,34 @@ void stage2_unmap_vm(struct kvm *kvm)
srcu_read_unlock(&kvm->srcu, idx);
}
-/**
- * kvm_free_stage2_pgd - free all stage-2 tables
- * @kvm: The KVM struct pointer for the VM.
- *
- * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
- * underlying level-2 and level-3 tables before freeing the actual level-1 table
- * and setting the struct pointer to NULL.
- */
-void kvm_free_stage2_pgd(struct kvm *kvm)
+void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
{
+ struct kvm *kvm = mmu->kvm;
void *pgd = NULL;
spin_lock(&kvm->mmu_lock);
- if (kvm->arch.pgd) {
- unmap_stage2_range(kvm, 0, kvm_phys_size(kvm));
- pgd = READ_ONCE(kvm->arch.pgd);
- kvm->arch.pgd = NULL;
- kvm->arch.pgd_phys = 0;
+ if (mmu->pgd) {
+ unmap_stage2_range(mmu, 0, kvm_phys_size(kvm));
+ pgd = READ_ONCE(mmu->pgd);
+ mmu->pgd = NULL;
}
spin_unlock(&kvm->mmu_lock);
/* Free the HW pgd, one page at a time */
- if (pgd)
+ if (pgd) {
free_pages_exact(pgd, stage2_pgd_size(kvm));
+ free_percpu(mmu->last_vcpu_ran);
+ }
}
-static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static p4d_t *stage2_get_p4d(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache,
phys_addr_t addr)
{
+ struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
p4d_t *p4d;
- pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
+ pgd = mmu->pgd + stage2_pgd_index(kvm, addr);
if (stage2_pgd_none(kvm, *pgd)) {
if (!cache)
return NULL;
@@ -1108,13 +1132,14 @@ static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
return stage2_p4d_offset(kvm, pgd, addr);
}
-static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static pud_t *stage2_get_pud(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache,
phys_addr_t addr)
{
+ struct kvm *kvm = mmu->kvm;
p4d_t *p4d;
pud_t *pud;
- p4d = stage2_get_p4d(kvm, cache, addr);
+ p4d = stage2_get_p4d(mmu, cache, addr);
if (stage2_p4d_none(kvm, *p4d)) {
if (!cache)
return NULL;
@@ -1126,13 +1151,14 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
return stage2_pud_offset(kvm, p4d, addr);
}
-static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static pmd_t *stage2_get_pmd(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache,
phys_addr_t addr)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pud;
pmd_t *pmd;
- pud = stage2_get_pud(kvm, cache, addr);
+ pud = stage2_get_pud(mmu, cache, addr);
if (!pud || stage2_pud_huge(kvm, *pud))
return NULL;
@@ -1147,13 +1173,14 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
return stage2_pmd_offset(kvm, pud, addr);
}
-static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
- *cache, phys_addr_t addr, const pmd_t *new_pmd)
+static int stage2_set_pmd_huge(struct kvm_s2_mmu *mmu,
+ struct kvm_mmu_memory_cache *cache,
+ phys_addr_t addr, const pmd_t *new_pmd)
{
pmd_t *pmd, old_pmd;
retry:
- pmd = stage2_get_pmd(kvm, cache, addr);
+ pmd = stage2_get_pmd(mmu, cache, addr);
VM_BUG_ON(!pmd);
old_pmd = *pmd;
@@ -1186,7 +1213,7 @@ retry:
* get handled accordingly.
*/
if (!pmd_thp_or_huge(old_pmd)) {
- unmap_stage2_range(kvm, addr & S2_PMD_MASK, S2_PMD_SIZE);
+ unmap_stage2_range(mmu, addr & S2_PMD_MASK, S2_PMD_SIZE);
goto retry;
}
/*
@@ -1202,7 +1229,7 @@ retry:
*/
WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
pmd_clear(pmd);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL);
} else {
get_page(virt_to_page(pmd));
}
@@ -1211,13 +1238,15 @@ retry:
return 0;
}
-static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static int stage2_set_pud_huge(struct kvm_s2_mmu *mmu,
+ struct kvm_mmu_memory_cache *cache,
phys_addr_t addr, const pud_t *new_pudp)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pudp, old_pud;
retry:
- pudp = stage2_get_pud(kvm, cache, addr);
+ pudp = stage2_get_pud(mmu, cache, addr);
VM_BUG_ON(!pudp);
old_pud = *pudp;
@@ -1236,13 +1265,13 @@ retry:
* the range for this block and retry.
*/
if (!stage2_pud_huge(kvm, old_pud)) {
- unmap_stage2_range(kvm, addr & S2_PUD_MASK, S2_PUD_SIZE);
+ unmap_stage2_range(mmu, addr & S2_PUD_MASK, S2_PUD_SIZE);
goto retry;
}
WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp));
stage2_pud_clear(kvm, pudp);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL);
} else {
get_page(virt_to_page(pudp));
}
@@ -1257,9 +1286,10 @@ retry:
* leaf-entry is returned in the appropriate level variable - pudpp,
* pmdpp, ptepp.
*/
-static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr,
+static bool stage2_get_leaf_entry(struct kvm_s2_mmu *mmu, phys_addr_t addr,
pud_t **pudpp, pmd_t **pmdpp, pte_t **ptepp)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
@@ -1268,7 +1298,7 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr,
*pmdpp = NULL;
*ptepp = NULL;
- pudp = stage2_get_pud(kvm, NULL, addr);
+ pudp = stage2_get_pud(mmu, NULL, addr);
if (!pudp || stage2_pud_none(kvm, *pudp) || !stage2_pud_present(kvm, *pudp))
return false;
@@ -1294,14 +1324,14 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr,
return true;
}
-static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr, unsigned long sz)
+static bool stage2_is_exec(struct kvm_s2_mmu *mmu, phys_addr_t addr, unsigned long sz)
{
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
bool found;
- found = stage2_get_leaf_entry(kvm, addr, &pudp, &pmdp, &ptep);
+ found = stage2_get_leaf_entry(mmu, addr, &pudp, &pmdp, &ptep);
if (!found)
return false;
@@ -1313,10 +1343,12 @@ static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr, unsigned long sz)
return sz == PAGE_SIZE && kvm_s2pte_exec(ptep);
}
-static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static int stage2_set_pte(struct kvm_s2_mmu *mmu,
+ struct kvm_mmu_memory_cache *cache,
phys_addr_t addr, const pte_t *new_pte,
unsigned long flags)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pud;
pmd_t *pmd;
pte_t *pte, old_pte;
@@ -1326,7 +1358,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
VM_BUG_ON(logging_active && !cache);
/* Create stage-2 page table mapping - Levels 0 and 1 */
- pud = stage2_get_pud(kvm, cache, addr);
+ pud = stage2_get_pud(mmu, cache, addr);
if (!pud) {
/*
* Ignore calls from kvm_set_spte_hva for unallocated
@@ -1340,7 +1372,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
* on to allocate page.
*/
if (logging_active)
- stage2_dissolve_pud(kvm, addr, pud);
+ stage2_dissolve_pud(mmu, addr, pud);
if (stage2_pud_none(kvm, *pud)) {
if (!cache)
@@ -1364,7 +1396,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
* allocate page.
*/
if (logging_active)
- stage2_dissolve_pmd(kvm, addr, pmd);
+ stage2_dissolve_pmd(mmu, addr, pmd);
/* Create stage-2 page mappings - Level 2 */
if (pmd_none(*pmd)) {
@@ -1388,7 +1420,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
return 0;
kvm_set_pte(pte, __pte(0));
- kvm_tlb_flush_vmid_ipa(kvm, addr);
+ kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PTE_LEVEL);
} else {
get_page(virt_to_page(pte));
}
@@ -1453,8 +1485,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
if (ret)
goto out;
spin_lock(&kvm->mmu_lock);
- ret = stage2_set_pte(kvm, &cache, addr, &pte,
- KVM_S2PTE_FLAG_IS_IOMAP);
+ ret = stage2_set_pte(&kvm->arch.mmu, &cache, addr, &pte,
+ KVM_S2PTE_FLAG_IS_IOMAP);
spin_unlock(&kvm->mmu_lock);
if (ret)
goto out;
@@ -1493,9 +1525,10 @@ static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
* @addr: range start address
* @end: range end address
*/
-static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
+static void stage2_wp_pmds(struct kvm_s2_mmu *mmu, pud_t *pud,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
pmd_t *pmd;
phys_addr_t next;
@@ -1516,13 +1549,14 @@ static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
/**
* stage2_wp_puds - write protect P4D range
- * @pgd: pointer to pgd entry
+ * @p4d: pointer to p4d entry
* @addr: range start address
* @end: range end address
*/
-static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d,
+static void stage2_wp_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
pud_t *pud;
phys_addr_t next;
@@ -1534,7 +1568,7 @@ static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d,
if (!kvm_s2pud_readonly(pud))
kvm_set_s2pud_readonly(pud);
} else {
- stage2_wp_pmds(kvm, pud, addr, next);
+ stage2_wp_pmds(mmu, pud, addr, next);
}
}
} while (pud++, addr = next, addr != end);
@@ -1546,9 +1580,10 @@ static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d,
* @addr: range start address
* @end: range end address
*/
-static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd,
+static void stage2_wp_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
p4d_t *p4d;
phys_addr_t next;
@@ -1556,7 +1591,7 @@ static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd,
do {
next = stage2_p4d_addr_end(kvm, addr, end);
if (!stage2_p4d_none(kvm, *p4d))
- stage2_wp_puds(kvm, p4d, addr, next);
+ stage2_wp_puds(mmu, p4d, addr, next);
} while (p4d++, addr = next, addr != end);
}
@@ -1566,12 +1601,13 @@ static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd,
* @addr: Start address of range
* @end: End address of range
*/
-static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
+static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
{
+ struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
phys_addr_t next;
- pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
+ pgd = mmu->pgd + stage2_pgd_index(kvm, addr);
do {
/*
* Release kvm_mmu_lock periodically if the memory region is
@@ -1583,11 +1619,11 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
* the lock.
*/
cond_resched_lock(&kvm->mmu_lock);
- if (!READ_ONCE(kvm->arch.pgd))
+ if (!READ_ONCE(mmu->pgd))
break;
next = stage2_pgd_addr_end(kvm, addr, end);
if (stage2_pgd_present(kvm, *pgd))
- stage2_wp_p4ds(kvm, pgd, addr, next);
+ stage2_wp_p4ds(mmu, pgd, addr, next);
} while (pgd++, addr = next, addr != end);
}
@@ -1617,7 +1653,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
spin_lock(&kvm->mmu_lock);
- stage2_wp_range(kvm, start, end);
+ stage2_wp_range(&kvm->arch.mmu, start, end);
spin_unlock(&kvm->mmu_lock);
kvm_flush_remote_tlbs(kvm);
}
@@ -1641,7 +1677,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
- stage2_wp_range(kvm, start, end);
+ stage2_wp_range(&kvm->arch.mmu, start, end);
}
/*
@@ -1804,6 +1840,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
pgprot_t mem_type = PAGE_S2;
bool logging_active = memslot_is_logging(memslot);
unsigned long vma_pagesize, flags = 0;
+ struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu;
write_fault = kvm_is_write_fault(vcpu);
exec_fault = kvm_vcpu_trap_is_iabt(vcpu);
@@ -1925,7 +1962,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
*/
needs_exec = exec_fault ||
(fault_status == FSC_PERM &&
- stage2_is_exec(kvm, fault_ipa, vma_pagesize));
+ stage2_is_exec(mmu, fault_ipa, vma_pagesize));
if (vma_pagesize == PUD_SIZE) {
pud_t new_pud = kvm_pfn_pud(pfn, mem_type);
@@ -1937,7 +1974,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (needs_exec)
new_pud = kvm_s2pud_mkexec(new_pud);
- ret = stage2_set_pud_huge(kvm, memcache, fault_ipa, &new_pud);
+ ret = stage2_set_pud_huge(mmu, memcache, fault_ipa, &new_pud);
} else if (vma_pagesize == PMD_SIZE) {
pmd_t new_pmd = kvm_pfn_pmd(pfn, mem_type);
@@ -1949,7 +1986,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (needs_exec)
new_pmd = kvm_s2pmd_mkexec(new_pmd);
- ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
+ ret = stage2_set_pmd_huge(mmu, memcache, fault_ipa, &new_pmd);
} else {
pte_t new_pte = kvm_pfn_pte(pfn, mem_type);
@@ -1961,7 +1998,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (needs_exec)
new_pte = kvm_s2pte_mkexec(new_pte);
- ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
+ ret = stage2_set_pte(mmu, memcache, fault_ipa, &new_pte, flags);
}
out_unlock:
@@ -1990,7 +2027,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
spin_lock(&vcpu->kvm->mmu_lock);
- if (!stage2_get_leaf_entry(vcpu->kvm, fault_ipa, &pud, &pmd, &pte))
+ if (!stage2_get_leaf_entry(vcpu->arch.hw_mmu, fault_ipa, &pud, &pmd, &pte))
goto out;
if (pud) { /* HugeTLB */
@@ -2040,21 +2077,18 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
/* Synchronous External Abort? */
- if (kvm_vcpu_dabt_isextabt(vcpu)) {
+ if (kvm_vcpu_abt_issea(vcpu)) {
/*
* For RAS the host kernel may handle this abort.
* There is no need to pass the error into the guest.
*/
- if (!kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu)))
- return 1;
-
- if (unlikely(!is_iabt)) {
+ if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu)))
kvm_inject_vabt(vcpu);
- return 1;
- }
+
+ return 1;
}
- trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
+ trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),
kvm_vcpu_get_hfar(vcpu), fault_ipa);
/* Check the stage-2 fault is trans. fault or write fault */
@@ -2063,7 +2097,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
kvm_vcpu_trap_get_class(vcpu),
(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
- (unsigned long)kvm_vcpu_get_hsr(vcpu));
+ (unsigned long)kvm_vcpu_get_esr(vcpu));
return -EFAULT;
}
@@ -2074,12 +2108,23 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
write_fault = kvm_is_write_fault(vcpu);
if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
+ /*
+ * The guest has put either its instructions or its page-tables
+ * somewhere it shouldn't have. Userspace won't be able to do
+ * anything about this (there's no syndrome for a start), so
+ * re-inject the abort back into the guest.
+ */
if (is_iabt) {
- /* Prefetch Abort on I/O address */
ret = -ENOEXEC;
goto out;
}
+ if (kvm_vcpu_dabt_iss1tw(vcpu)) {
+ kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+ ret = 1;
+ goto out_unlock;
+ }
+
/*
* Check for a cache maintenance operation. Since we
* ended-up here, we know it is outside of any memory
@@ -2090,7 +2135,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
* So let's assume that the guest is just being
* cautious, and skip the instruction.
*/
- if (kvm_vcpu_dabt_is_cm(vcpu)) {
+ if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) {
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
ret = 1;
goto out_unlock;
@@ -2163,14 +2208,14 @@ static int handle_hva_to_gpa(struct kvm *kvm,
static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
{
- unmap_stage2_range(kvm, gpa, size);
+ unmap_stage2_range(&kvm->arch.mmu, gpa, size);
return 0;
}
int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end)
{
- if (!kvm->arch.pgd)
+ if (!kvm->arch.mmu.pgd)
return 0;
trace_kvm_unmap_hva_range(start, end);
@@ -2190,7 +2235,7 @@ static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data
* therefore stage2_set_pte() never needs to clear out a huge PMD
* through this calling path.
*/
- stage2_set_pte(kvm, NULL, gpa, pte, 0);
+ stage2_set_pte(&kvm->arch.mmu, NULL, gpa, pte, 0);
return 0;
}
@@ -2201,7 +2246,7 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
kvm_pfn_t pfn = pte_pfn(pte);
pte_t stage2_pte;
- if (!kvm->arch.pgd)
+ if (!kvm->arch.mmu.pgd)
return 0;
trace_kvm_set_spte_hva(hva);
@@ -2224,7 +2269,7 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
pte_t *pte;
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
- if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte))
+ if (!stage2_get_leaf_entry(&kvm->arch.mmu, gpa, &pud, &pmd, &pte))
return 0;
if (pud)
@@ -2242,7 +2287,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *
pte_t *pte;
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
- if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte))
+ if (!stage2_get_leaf_entry(&kvm->arch.mmu, gpa, &pud, &pmd, &pte))
return 0;
if (pud)
@@ -2255,7 +2300,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
{
- if (!kvm->arch.pgd)
+ if (!kvm->arch.mmu.pgd)
return 0;
trace_kvm_age_hva(start, end);
return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
@@ -2263,7 +2308,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
{
- if (!kvm->arch.pgd)
+ if (!kvm->arch.mmu.pgd)
return 0;
trace_kvm_test_age_hva(hva);
return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE,
@@ -2476,7 +2521,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
spin_lock(&kvm->mmu_lock);
if (ret)
- unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
+ unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size);
else
stage2_flush_memslot(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
@@ -2495,7 +2540,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
void kvm_arch_flush_shadow_all(struct kvm *kvm)
{
- kvm_free_stage2_pgd(kvm);
+ kvm_free_stage2_pgd(&kvm->arch.mmu);
}
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
@@ -2505,7 +2550,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
phys_addr_t size = slot->npages << PAGE_SHIFT;
spin_lock(&kvm->mmu_lock);
- unmap_stage2_range(kvm, gpa, size);
+ unmap_stage2_range(&kvm->arch.mmu, gpa, size);
spin_unlock(&kvm->mmu_lock);
}