summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm64/include/asm/kvm_host.h1
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h20
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c39
-rw-r--r--arch/arm64/kvm/mmu.c45
-rw-r--r--arch/arm64/kvm/pmu-emul.c8
-rw-r--r--arch/arm64/kvm/sys_regs.c70
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--include/linux/page-flags.h37
-rw-r--r--virt/kvm/kvm_main.c19
9 files changed, 165 insertions, 75 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 26bbd84a02de..62ac4174e608 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -185,7 +185,6 @@ enum vcpu_sysreg {
PMCNTENSET_EL0, /* Count Enable Set Register */
PMINTENSET_EL1, /* Interrupt Enable Set Register */
PMOVSSET_EL0, /* Overflow Flag Status Set Register */
- PMSWINC_EL0, /* Software Increment Register */
PMUSERENR_EL0, /* User Enable Register */
/* Pointer Authentication Registers in a strict increasing order. */
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index f004c0115d89..e42b55bd50a2 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -433,6 +433,26 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
struct kvm_pgtable_walker *walker);
/**
+ * kvm_pgtable_get_leaf() - Walk a page-table and retrieve the leaf entry
+ * with its level.
+ * @pgt: Page-table structure initialised by kvm_pgtable_*_init()
+ * or a similar initialiser.
+ * @addr: Input address for the start of the walk.
+ * @ptep: Pointer to storage for the retrieved PTE.
+ * @level: Pointer to storage for the level of the retrieved PTE.
+ *
+ * The offset of @addr within a page is ignored.
+ *
+ * The walker will walk the page-table entries corresponding to the input
+ * address specified, retrieving the leaf corresponding to this address.
+ * Invalid entries are treated as leaf entries.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
+ kvm_pte_t *ptep, u32 *level);
+
+/**
* kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical
* Addresses with compatible permission
* attributes.
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 05321f4165e3..78f36bd5df6c 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -326,6 +326,45 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
return _kvm_pgtable_walk(&walk_data);
}
+struct leaf_walk_data {
+ kvm_pte_t pte;
+ u32 level;
+};
+
+static int leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+ enum kvm_pgtable_walk_flags flag, void * const arg)
+{
+ struct leaf_walk_data *data = arg;
+
+ data->pte = *ptep;
+ data->level = level;
+
+ return 0;
+}
+
+int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
+ kvm_pte_t *ptep, u32 *level)
+{
+ struct leaf_walk_data data;
+ struct kvm_pgtable_walker walker = {
+ .cb = leaf_walker,
+ .flags = KVM_PGTABLE_WALK_LEAF,
+ .arg = &data,
+ };
+ int ret;
+
+ ret = kvm_pgtable_walk(pgt, ALIGN_DOWN(addr, PAGE_SIZE),
+ PAGE_SIZE, &walker);
+ if (!ret) {
+ if (ptep)
+ *ptep = data.pte;
+ if (level)
+ *level = data.level;
+ }
+
+ return ret;
+}
+
struct hyp_map_data {
u64 phys;
kvm_pte_t attr;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 2ca0a494a111..f446f55b6832 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -434,6 +434,32 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
return 0;
}
+static struct kvm_pgtable_mm_ops kvm_user_mm_ops = {
+ /* We shouldn't need any other callback to walk the PT */
+ .phys_to_virt = kvm_host_va,
+};
+
+static int get_user_mapping_size(struct kvm *kvm, u64 addr)
+{
+ struct kvm_pgtable pgt = {
+ .pgd = (kvm_pte_t *)kvm->mm->pgd,
+ .ia_bits = VA_BITS,
+ .start_level = (KVM_PGTABLE_MAX_LEVELS -
+ CONFIG_PGTABLE_LEVELS),
+ .mm_ops = &kvm_user_mm_ops,
+ };
+ kvm_pte_t pte = 0; /* Keep GCC quiet... */
+ u32 level = ~0;
+ int ret;
+
+ ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level);
+ VM_BUG_ON(ret);
+ VM_BUG_ON(level >= KVM_PGTABLE_MAX_LEVELS);
+ VM_BUG_ON(!(pte & PTE_VALID));
+
+ return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level));
+}
+
static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
.zalloc_page = stage2_memcache_zalloc_page,
.zalloc_pages_exact = kvm_host_zalloc_pages_exact,
@@ -781,7 +807,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
* Returns the size of the mapping.
*/
static unsigned long
-transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
+transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long hva, kvm_pfn_t *pfnp,
phys_addr_t *ipap)
{
@@ -792,8 +818,8 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
* sure that the HVA and IPA are sufficiently aligned and that the
* block map is contained within the memslot.
*/
- if (kvm_is_transparent_hugepage(pfn) &&
- fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
+ if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
+ get_user_mapping_size(kvm, hva) >= PMD_SIZE) {
/*
* The address we faulted on is backed by a transparent huge
* page. However, because we map the compound huge page and
@@ -815,7 +841,7 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
*ipap &= PMD_MASK;
kvm_release_pfn_clean(pfn);
pfn &= ~(PTRS_PER_PMD - 1);
- kvm_get_pfn(pfn);
+ get_page(pfn_to_page(pfn));
*pfnp = pfn;
return PMD_SIZE;
@@ -1051,9 +1077,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* If we are not forced to use page mapping, check if we are
* backed by a THP and thus use block mapping if possible.
*/
- if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
- vma_pagesize = transparent_hugepage_adjust(memslot, hva,
- &pfn, &fault_ipa);
+ if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
+ if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE)
+ vma_pagesize = fault_granule;
+ else
+ vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
+ hva, &pfn,
+ &fault_ipa);
+ }
if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
/* Check the VMM hasn't introduced a new VM_SHARED VMA */
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 60f89bdbeebb..f5065f23b413 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -373,7 +373,6 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
- reg &= kvm_pmu_valid_counter_mask(vcpu);
}
return reg;
@@ -564,20 +563,21 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
*/
void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
{
- unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
int i;
if (val & ARMV8_PMU_PMCR_E) {
kvm_pmu_enable_counter_mask(vcpu,
- __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
+ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
} else {
- kvm_pmu_disable_counter_mask(vcpu, mask);
+ kvm_pmu_disable_counter_mask(vcpu,
+ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
}
if (val & ARMV8_PMU_PMCR_C)
kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
if (val & ARMV8_PMU_PMCR_P) {
+ unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
for_each_set_bit(i, &mask, 32)
kvm_pmu_set_counter_value(vcpu, i, 0);
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index f6f126eb6ac1..a1f5101f49a3 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -603,6 +603,41 @@ static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu,
return REG_HIDDEN;
}
+static void reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ u64 n, mask = BIT(ARMV8_PMU_CYCLE_IDX);
+
+ /* No PMU available, any PMU reg may UNDEF... */
+ if (!kvm_arm_support_pmu_v3())
+ return;
+
+ n = read_sysreg(pmcr_el0) >> ARMV8_PMU_PMCR_N_SHIFT;
+ n &= ARMV8_PMU_PMCR_N_MASK;
+ if (n)
+ mask |= GENMASK(n - 1, 0);
+
+ reset_unknown(vcpu, r);
+ __vcpu_sys_reg(vcpu, r->reg) &= mask;
+}
+
+static void reset_pmevcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ reset_unknown(vcpu, r);
+ __vcpu_sys_reg(vcpu, r->reg) &= GENMASK(31, 0);
+}
+
+static void reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ reset_unknown(vcpu, r);
+ __vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_EVTYPE_MASK;
+}
+
+static void reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ reset_unknown(vcpu, r);
+ __vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_COUNTER_MASK;
+}
+
static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
u64 pmcr, val;
@@ -845,7 +880,7 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
kvm_pmu_disable_counter_mask(vcpu, val);
}
} else {
- p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
+ p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
}
return true;
@@ -869,7 +904,7 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
/* accessing PMINTENCLR_EL1 */
__vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
} else {
- p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask;
+ p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
}
return true;
@@ -891,7 +926,7 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
/* accessing PMOVSCLR_EL0 */
__vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
} else {
- p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask;
+ p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
}
return true;
@@ -944,16 +979,18 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr }
#define PMU_SYS_REG(r) \
- SYS_DESC(r), .reset = reset_unknown, .visibility = pmu_visibility
+ SYS_DESC(r), .reset = reset_pmu_reg, .visibility = pmu_visibility
/* Macro to expand the PMEVCNTRn_EL0 register */
#define PMU_PMEVCNTR_EL0(n) \
{ PMU_SYS_REG(SYS_PMEVCNTRn_EL0(n)), \
+ .reset = reset_pmevcntr, \
.access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), }
/* Macro to expand the PMEVTYPERn_EL0 register */
#define PMU_PMEVTYPER_EL0(n) \
{ PMU_SYS_REG(SYS_PMEVTYPERn_EL0(n)), \
+ .reset = reset_pmevtyper, \
.access = access_pmu_evtyper, .reg = (PMEVTYPER0_EL0 + n), }
static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
@@ -1249,6 +1286,20 @@ static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
return __set_id_reg(vcpu, rd, uaddr, true);
}
+static int set_wi_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ int err;
+ u64 val;
+
+ /* Perform the access even if we are going to ignore the value */
+ err = reg_from_user(&val, uaddr, sys_reg_to_index(rd));
+ if (err)
+ return err;
+
+ return 0;
+}
+
static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
@@ -1592,16 +1643,21 @@ static const struct sys_reg_desc sys_reg_descs[] = {
.access = access_pmcnten, .reg = PMCNTENSET_EL0 },
{ PMU_SYS_REG(SYS_PMOVSCLR_EL0),
.access = access_pmovs, .reg = PMOVSSET_EL0 },
+ /*
+ * PM_SWINC_EL0 is exposed to userspace as RAZ/WI, as it was
+ * previously (and pointlessly) advertised in the past...
+ */
{ PMU_SYS_REG(SYS_PMSWINC_EL0),
- .access = access_pmswinc, .reg = PMSWINC_EL0 },
+ .get_user = get_raz_id_reg, .set_user = set_wi_reg,
+ .access = access_pmswinc, .reset = NULL },
{ PMU_SYS_REG(SYS_PMSELR_EL0),
- .access = access_pmselr, .reg = PMSELR_EL0 },
+ .access = access_pmselr, .reset = reset_pmselr, .reg = PMSELR_EL0 },
{ PMU_SYS_REG(SYS_PMCEID0_EL0),
.access = access_pmceid, .reset = NULL },
{ PMU_SYS_REG(SYS_PMCEID1_EL0),
.access = access_pmceid, .reset = NULL },
{ PMU_SYS_REG(SYS_PMCCNTR_EL0),
- .access = access_pmu_evcntr, .reg = PMCCNTR_EL0 },
+ .access = access_pmu_evcntr, .reset = reset_unknown, .reg = PMCCNTR_EL0 },
{ PMU_SYS_REG(SYS_PMXEVTYPER_EL0),
.access = access_pmu_evtyper, .reset = NULL },
{ PMU_SYS_REG(SYS_PMXEVCNTR_EL0),
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ae7735b490b4..9818d271c2a1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -824,7 +824,6 @@ void kvm_release_pfn_clean(kvm_pfn_t pfn);
void kvm_release_pfn_dirty(kvm_pfn_t pfn);
void kvm_set_pfn_dirty(kvm_pfn_t pfn);
void kvm_set_pfn_accessed(kvm_pfn_t pfn);
-void kvm_get_pfn(kvm_pfn_t pfn);
void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5922031ffab6..1ace27c4a8e0 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -633,43 +633,6 @@ static inline int PageTransCompound(struct page *page)
}
/*
- * PageTransCompoundMap is the same as PageTransCompound, but it also
- * guarantees the primary MMU has the entire compound page mapped
- * through pmd_trans_huge, which in turn guarantees the secondary MMUs
- * can also map the entire compound page. This allows the secondary
- * MMUs to call get_user_pages() only once for each compound page and
- * to immediately map the entire compound page with a single secondary
- * MMU fault. If there will be a pmd split later, the secondary MMUs
- * will get an update through the MMU notifier invalidation through
- * split_huge_pmd().
- *
- * Unlike PageTransCompound, this is safe to be called only while
- * split_huge_pmd() cannot run from under us, like if protected by the
- * MMU notifier, otherwise it may result in page->_mapcount check false
- * positives.
- *
- * We have to treat page cache THP differently since every subpage of it
- * would get _mapcount inc'ed once it is PMD mapped. But, it may be PTE
- * mapped in the current process so comparing subpage's _mapcount to
- * compound_mapcount to filter out PTE mapped case.
- */
-static inline int PageTransCompoundMap(struct page *page)
-{
- struct page *head;
-
- if (!PageTransCompound(page))
- return 0;
-
- if (PageAnon(page))
- return atomic_read(&page->_mapcount) < 0;
-
- head = compound_head(page);
- /* File THP is PMD mapped and not PTE mapped */
- return atomic_read(&page->_mapcount) ==
- atomic_read(compound_mapcount_ptr(head));
-}
-
-/*
* PageTransTail returns true for both transparent huge pages
* and hugetlbfs pages, so it should only be called when it's known
* that hugetlbfs pages aren't involved.
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d20fba0fc290..1d3a03c0fed3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -189,16 +189,6 @@ bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
return true;
}
-bool kvm_is_transparent_hugepage(kvm_pfn_t pfn)
-{
- struct page *page = pfn_to_page(pfn);
-
- if (!PageTransCompoundMap(page))
- return false;
-
- return is_transparent_hugepage(compound_head(page));
-}
-
/*
* Switches to specified vcpu, until a matching vcpu_put()
*/
@@ -2225,7 +2215,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
* Get a reference here because callers of *hva_to_pfn* and
* *gfn_to_pfn* ultimately call kvm_release_pfn_clean on the
* returned pfn. This is only needed if the VMA has VM_MIXEDMAP
- * set, but the kvm_get_pfn/kvm_release_pfn_clean pair will
+ * set, but the kvm_try_get_pfn/kvm_release_pfn_clean pair will
* simply do nothing for reserved pfns.
*
* Whoever called remap_pfn_range is also going to call e.g.
@@ -2622,13 +2612,6 @@ void kvm_set_pfn_accessed(kvm_pfn_t pfn)
}
EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
-void kvm_get_pfn(kvm_pfn_t pfn)
-{
- if (!kvm_is_reserved_pfn(pfn))
- get_page(pfn_to_page(pfn));
-}
-EXPORT_SYMBOL_GPL(kvm_get_pfn);
-
static int next_segment(unsigned long len, int offset)
{
if (len > PAGE_SIZE - offset)