summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virtual/kvm/api.txt18
-rw-r--r--Documentation/virtual/kvm/devices/s390_flic.txt2
-rw-r--r--arch/arm/include/asm/kvm_mmu.h43
-rw-r--r--arch/arm/include/asm/stage2_pgtable.h61
-rw-r--r--arch/arm/kvm/arm.c2
-rw-r--r--arch/arm/kvm/mmu.c408
-rw-r--r--arch/arm64/include/asm/kvm_arm.h85
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h111
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h80
-rw-r--r--arch/arm64/include/asm/pgtable.h15
-rw-r--r--arch/arm64/include/asm/stage2_pgtable-nopmd.h42
-rw-r--r--arch/arm64/include/asm/stage2_pgtable-nopud.h39
-rw-r--r--arch/arm64/include/asm/stage2_pgtable.h142
-rw-r--r--arch/arm64/kvm/Kconfig1
-rw-r--r--arch/arm64/kvm/hyp/s2-setup.c8
-rw-r--r--arch/mips/include/asm/kvm_host.h2
-rw-r--r--arch/mips/kvm/emulate.c89
-rw-r--r--arch/mips/kvm/mips.c8
-rw-r--r--arch/mips/kvm/tlb.c26
-rw-r--r--arch/mips/kvm/trap_emul.c2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h3
-rw-r--r--arch/s390/include/asm/kvm_host.h6
-rw-r--r--arch/s390/include/asm/sclp.h1
-rw-r--r--arch/s390/include/asm/sigp.h1
-rw-r--r--arch/s390/kvm/kvm-s390.c55
-rw-r--r--arch/s390/kvm/priv.c21
-rw-r--r--arch/s390/kvm/sigp.c6
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/uapi/asm/kvm.h6
-rw-r--r--arch/x86/kvm/irq_comm.c3
-rw-r--r--arch/x86/kvm/mmu.c28
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c56
-rw-r--r--drivers/clocksource/arm_arch_timer.c11
-rw-r--r--drivers/irqchip/irq-gic-common.c13
-rw-r--r--drivers/irqchip/irq-gic-common.h3
-rw-r--r--drivers/irqchip/irq-gic-v3.c176
-rw-r--r--drivers/irqchip/irq-gic.c87
-rw-r--r--drivers/s390/char/sclp_early.c6
-rw-r--r--include/clocksource/arm_arch_timer.h12
-rw-r--r--include/kvm/arm_vgic.h7
-rw-r--r--include/linux/irqbypass.h4
-rw-r--r--include/linux/irqchip/arm-gic-common.h34
-rw-r--r--include/linux/kvm_host.h23
-rw-r--r--include/uapi/linux/kvm.h1
-rw-r--r--virt/kvm/arm/arch_timer.c40
-rw-r--r--virt/kvm/arm/vgic-v2.c61
-rw-r--r--virt/kvm/arm/vgic-v3.c47
-rw-r--r--virt/kvm/arm/vgic.c50
-rw-r--r--virt/kvm/eventfd.c18
-rw-r--r--virt/kvm/kvm_main.c4
-rw-r--r--virt/lib/irqbypass.c12
52 files changed, 1302 insertions, 680 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 4d0542c5206b..a4482cce4bae 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -199,8 +199,8 @@ Type: vm ioctl
Parameters: vcpu id (apic id on x86)
Returns: vcpu fd on success, -1 on error
-This API adds a vcpu to a virtual machine. The vcpu id is a small integer
-in the range [0, max_vcpus).
+This API adds a vcpu to a virtual machine. No more than max_vcpus may be added.
+The vcpu id is an integer in the range [0, max_vcpu_id).
The recommended max_vcpus value can be retrieved using the KVM_CAP_NR_VCPUS of
the KVM_CHECK_EXTENSION ioctl() at run-time.
@@ -212,6 +212,12 @@ cpus max.
If the KVM_CAP_MAX_VCPUS does not exist, you should assume that max_vcpus is
same as the value returned from KVM_CAP_NR_VCPUS.
+The maximum possible value for max_vcpu_id can be retrieved using the
+KVM_CAP_MAX_VCPU_ID of the KVM_CHECK_EXTENSION ioctl() at run-time.
+
+If the KVM_CAP_MAX_VCPU_ID does not exist, you should assume that max_vcpu_id
+is the same as the value returned from KVM_CAP_MAX_VCPUS.
+
On powerpc using book3s_hv mode, the vcpus are mapped onto virtual
threads in one or more virtual CPU cores. (This is because the
hardware requires all the hardware threads in a CPU core to be in the
@@ -3788,6 +3794,14 @@ a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace.
Fails if VCPU has already been created, or if the irqchip is already in the
kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
+7.6 KVM_CAP_S390_RI
+
+Architectures: s390
+Parameters: none
+
+Allows use of runtime-instrumentation introduced with zEC12 processor.
+Will return -EINVAL if the machine does not support runtime-instrumentation.
+Will return -EBUSY if a VCPU has already been created.
8. Other capabilities.
----------------------
diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt
index 8478de1c0192..6b0e115301c8 100644
--- a/Documentation/virtual/kvm/devices/s390_flic.txt
+++ b/Documentation/virtual/kvm/devices/s390_flic.txt
@@ -74,7 +74,7 @@ struct kvm_s390_io_adapter {
KVM_DEV_FLIC_ADAPTER_MODIFY
Modifies attributes of an existing I/O adapter interrupt source. Takes
- a kvm_s390_io_adapter_req specifiying the adapter and the operation:
+ a kvm_s390_io_adapter_req specifying the adapter and the operation:
struct kvm_s390_io_adapter_req {
__u32 id;
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index da44be9db4fa..ef0b276d97fc 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -47,6 +47,7 @@
#include <linux/highmem.h>
#include <asm/cacheflush.h>
#include <asm/pgalloc.h>
+#include <asm/stage2_pgtable.h>
int create_hyp_mappings(void *from, void *to);
int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
@@ -105,14 +106,16 @@ static inline void kvm_clean_pte(pte_t *pte)
clean_pte_table(pte);
}
-static inline void kvm_set_s2pte_writable(pte_t *pte)
+static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
{
- pte_val(*pte) |= L_PTE_S2_RDWR;
+ pte_val(pte) |= L_PTE_S2_RDWR;
+ return pte;
}
-static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
+static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
{
- pmd_val(*pmd) |= L_PMD_S2_RDWR;
+ pmd_val(pmd) |= L_PMD_S2_RDWR;
+ return pmd;
}
static inline void kvm_set_s2pte_readonly(pte_t *pte)
@@ -135,22 +138,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY;
}
-
-/* Open coded p*d_addr_end that can deal with 64bit addresses */
-#define kvm_pgd_addr_end(addr, end) \
-({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
- (__boundary - 1 < (end) - 1)? __boundary: (end); \
-})
-
-#define kvm_pud_addr_end(addr,end) (end)
-
-#define kvm_pmd_addr_end(addr, end) \
-({ u64 __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \
- (__boundary - 1 < (end) - 1)? __boundary: (end); \
-})
-
-#define kvm_pgd_index(addr) pgd_index(addr)
-
static inline bool kvm_page_empty(void *ptr)
{
struct page *ptr_page = virt_to_page(ptr);
@@ -159,19 +146,11 @@ static inline bool kvm_page_empty(void *ptr)
#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
-#define kvm_pud_table_empty(kvm, pudp) (0)
-
-#define KVM_PREALLOC_LEVEL 0
+#define kvm_pud_table_empty(kvm, pudp) false
-static inline void *kvm_get_hwpgd(struct kvm *kvm)
-{
- return kvm->arch.pgd;
-}
-
-static inline unsigned int kvm_get_hwpgd_size(void)
-{
- return PTRS_PER_S2_PGD * sizeof(pgd_t);
-}
+#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
+#define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+#define hyp_pud_table_empty(pudp) false
struct kvm;
diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h
new file mode 100644
index 000000000000..460d616bb2d6
--- /dev/null
+++ b/arch/arm/include/asm/stage2_pgtable.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2016 - ARM Ltd
+ *
+ * stage2 page table helpers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_S2_PGTABLE_H_
+#define __ARM_S2_PGTABLE_H_
+
+#define stage2_pgd_none(pgd) pgd_none(pgd)
+#define stage2_pgd_clear(pgd) pgd_clear(pgd)
+#define stage2_pgd_present(pgd) pgd_present(pgd)
+#define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud)
+#define stage2_pud_offset(pgd, address) pud_offset(pgd, address)
+#define stage2_pud_free(pud) pud_free(NULL, pud)
+
+#define stage2_pud_none(pud) pud_none(pud)
+#define stage2_pud_clear(pud) pud_clear(pud)
+#define stage2_pud_present(pud) pud_present(pud)
+#define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd)
+#define stage2_pmd_offset(pud, address) pmd_offset(pud, address)
+#define stage2_pmd_free(pmd) pmd_free(NULL, pmd)
+
+#define stage2_pud_huge(pud) pud_huge(pud)
+
+/* Open coded p*d_addr_end that can deal with 64bit addresses */
+static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end)
+{
+ phys_addr_t boundary = (addr + PGDIR_SIZE) & PGDIR_MASK;
+
+ return (boundary - 1 < end - 1) ? boundary : end;
+}
+
+#define stage2_pud_addr_end(addr, end) (end)
+
+static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end)
+{
+ phys_addr_t boundary = (addr + PMD_SIZE) & PMD_MASK;
+
+ return (boundary - 1 < end - 1) ? boundary : end;
+}
+
+#define stage2_pgd_index(addr) pgd_index(addr)
+
+#define stage2_pte_table_empty(ptep) kvm_page_empty(ptep)
+#define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+#define stage2_pud_table_empty(pudp) false
+
+#endif /* __ARM_S2_PGTABLE_H_ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index dded1b763c16..be4b6394a062 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -448,7 +448,7 @@ static void update_vttbr(struct kvm *kvm)
kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
/* update vttbr to be used with the new vmid */
- pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm));
+ pgd_phys = virt_to_phys(kvm->arch.pgd);
BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
kvm->arch.vttbr = pgd_phys | vmid;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 58dbd5c439df..783e5ff0b32e 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -43,11 +43,9 @@ static unsigned long hyp_idmap_start;
static unsigned long hyp_idmap_end;
static phys_addr_t hyp_idmap_vector;
+#define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t))
#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
-#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
-#define kvm_pud_huge(_x) pud_huge(_x)
-
#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1)
@@ -69,14 +67,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
- /*
- * This function also gets called when dealing with HYP page
- * tables. As HYP doesn't have an associated struct kvm (and
- * the HYP page tables are fairly static), we don't do
- * anything there.
- */
- if (kvm)
- kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
}
/*
@@ -115,7 +106,7 @@ static bool kvm_is_device_pfn(unsigned long pfn)
*/
static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
{
- if (!kvm_pmd_huge(*pmd))
+ if (!pmd_thp_or_huge(*pmd))
return;
pmd_clear(pmd);
@@ -155,29 +146,29 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
return p;
}
-static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
+static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
{
- pud_t *pud_table __maybe_unused = pud_offset(pgd, 0);
- pgd_clear(pgd);
+ pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL);
+ stage2_pgd_clear(pgd);
kvm_tlb_flush_vmid_ipa(kvm, addr);
- pud_free(NULL, pud_table);
+ stage2_pud_free(pud_table);
put_page(virt_to_page(pgd));
}
-static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
+static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
{
- pmd_t *pmd_table = pmd_offset(pud, 0);
- VM_BUG_ON(pud_huge(*pud));
- pud_clear(pud);
+ pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0);
+ VM_BUG_ON(stage2_pud_huge(*pud));
+ stage2_pud_clear(pud);
kvm_tlb_flush_vmid_ipa(kvm, addr);
- pmd_free(NULL, pmd_table);
+ stage2_pmd_free(pmd_table);
put_page(virt_to_page(pud));
}
-static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
+static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
{
pte_t *pte_table = pte_offset_kernel(pmd, 0);
- VM_BUG_ON(kvm_pmd_huge(*pmd));
+ VM_BUG_ON(pmd_thp_or_huge(*pmd));
pmd_clear(pmd);
kvm_tlb_flush_vmid_ipa(kvm, addr);
pte_free_kernel(NULL, pte_table);
@@ -204,7 +195,7 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
* the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
* the IO subsystem will never hit in the cache.
*/
-static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
+static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
phys_addr_t start_addr = addr;
@@ -226,21 +217,21 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
}
} while (pte++, addr += PAGE_SIZE, addr != end);
- if (kvm_pte_table_empty(kvm, start_pte))
- clear_pmd_entry(kvm, pmd, start_addr);
+ if (stage2_pte_table_empty(start_pte))
+ clear_stage2_pmd_entry(kvm, pmd, start_addr);
}
-static void unmap_pmds(struct kvm *kvm, pud_t *pud,
+static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
phys_addr_t addr, phys_addr_t end)
{
phys_addr_t next, start_addr = addr;
pmd_t *pmd, *start_pmd;
- start_pmd = pmd = pmd_offset(pud, addr);
+ start_pmd = pmd = stage2_pmd_offset(pud, addr);
do {
- next = kvm_pmd_addr_end(addr, end);
+ next = stage2_pmd_addr_end(addr, end);
if (!pmd_none(*pmd)) {
- if (kvm_pmd_huge(*pmd)) {
+ if (pmd_thp_or_huge(*pmd)) {
pmd_t old_pmd = *pmd;
pmd_clear(pmd);
@@ -250,57 +241,64 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud,
put_page(virt_to_page(pmd));
} else {
- unmap_ptes(kvm, pmd, addr, next);
+ unmap_stage2_ptes(kvm, pmd, addr, next);
}
}
} while (pmd++, addr = next, addr != end);
- if (kvm_pmd_table_empty(kvm, start_pmd))
- clear_pud_entry(kvm, pud, start_addr);
+ if (stage2_pmd_table_empty(start_pmd))
+ clear_stage2_pud_entry(kvm, pud, start_addr);
}
-static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
+static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
phys_addr_t addr, phys_addr_t end)
{
phys_addr_t next, start_addr = addr;
pud_t *pud, *start_pud;
- start_pud = pud = pud_offset(pgd, addr);
+ start_pud = pud = stage2_pud_offset(pgd, addr);
do {
- next = kvm_pud_addr_end(addr, end);
- if (!pud_none(*pud)) {
- if (pud_huge(*pud)) {
+ next = stage2_pud_addr_end(addr, end);
+ if (!stage2_pud_none(*pud)) {
+ if (stage2_pud_huge(*pud)) {
pud_t old_pud = *pud;
- pud_clear(pud);
+ stage2_pud_clear(pud);
kvm_tlb_flush_vmid_ipa(kvm, addr);
-
kvm_flush_dcache_pud(old_pud);
-
put_page(virt_to_page(pud));
} else {
- unmap_pmds(kvm, pud, addr, next);
+ unmap_stage2_pmds(kvm, pud, addr, next);
}
}
} while (pud++, addr = next, addr != end);
- if (kvm_pud_table_empty(kvm, start_pud))
- clear_pgd_entry(kvm, pgd, start_addr);
+ if (stage2_pud_table_empty(start_pud))
+ clear_stage2_pgd_entry(kvm, pgd, start_addr);
}
-
-static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
- phys_addr_t start, u64 size)
+/**
+ * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
+ * @kvm: The VM pointer
+ * @start: The intermediate physical base address of the range to unmap
+ * @size: The size of the area to unmap
+ *
+ * Clear a range of stage-2 mappings, lowering the various ref-counts. Must
+ * be called while holding mmu_lock (unless for freeing the stage2 pgd before
+ * destroying the VM), otherwise another faulting VCPU may come in and mess
+ * with things behind our backs.
+ */
+static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
{
pgd_t *pgd;
phys_addr_t addr = start, end = start + size;
phys_addr_t next;
- pgd = pgdp + kvm_pgd_index(addr);
+ pgd = kvm->arch.pgd + stage2_pgd_index(addr);
do {
- next = kvm_pgd_addr_end(addr, end);
- if (!pgd_none(*pgd))
- unmap_puds(kvm, pgd, addr, next);
+ next = stage2_pgd_addr_end(addr, end);
+ if (!stage2_pgd_none(*pgd))
+ unmap_stage2_puds(kvm, pgd, addr, next);
} while (pgd++, addr = next, addr != end);
}
@@ -322,11 +320,11 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
pmd_t *pmd;
phys_addr_t next;
- pmd = pmd_offset(pud, addr);
+ pmd = stage2_pmd_offset(pud, addr);
do {
- next = kvm_pmd_addr_end(addr, end);
+ next = stage2_pmd_addr_end(addr, end);
if (!pmd_none(*pmd)) {
- if (kvm_pmd_huge(*pmd))
+ if (pmd_thp_or_huge(*pmd))
kvm_flush_dcache_pmd(*pmd);
else
stage2_flush_ptes(kvm, pmd, addr, next);
@@ -340,11 +338,11 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
pud_t *pud;
phys_addr_t next;
- pud = pud_offset(pgd, addr);
+ pud = stage2_pud_offset(pgd, addr);
do {
- next = kvm_pud_addr_end(addr, end);
- if (!pud_none(*pud)) {
- if (pud_huge(*pud))
+ next = stage2_pud_addr_end(addr, end);
+ if (!stage2_pud_none(*pud)) {
+ if (stage2_pud_huge(*pud))
kvm_flush_dcache_pud(*pud);
else
stage2_flush_pmds(kvm, pud, addr, next);
@@ -360,9 +358,9 @@ static void stage2_flush_memslot(struct kvm *kvm,
phys_addr_t next;
pgd_t *pgd;
- pgd = kvm->arch.pgd + kvm_pgd_index(addr);
+ pgd = kvm->arch.pgd + stage2_pgd_index(addr);
do {
- next = kvm_pgd_addr_end(addr, end);
+ next = stage2_pgd_addr_end(addr, end);
stage2_flush_puds(kvm, pgd, addr, next);
} while (pgd++, addr = next, addr != end);
}
@@ -391,6 +389,100 @@ static void stage2_flush_vm(struct kvm *kvm)
srcu_read_unlock(&kvm->srcu, idx);
}
+static void clear_hyp_pgd_entry(pgd_t *pgd)
+{
+ pud_t *pud_table __maybe_unused = pud_offset(pgd, 0UL);
+ pgd_clear(pgd);
+ pud_free(NULL, pud_table);
+ put_page(virt_to_page(pgd));
+}
+
+static void clear_hyp_pud_entry(pud_t *pud)
+{
+ pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0);
+ VM_BUG_ON(pud_huge(*pud));
+ pud_clear(pud);
+ pmd_free(NULL, pmd_table);
+ put_page(virt_to_page(pud));
+}
+
+static void clear_hyp_pmd_entry(pmd_t *pmd)
+{
+ pte_t *pte_table = pte_offset_kernel(pmd, 0);
+ VM_BUG_ON(pmd_thp_or_huge(*pmd));
+ pmd_clear(pmd);
+ pte_free_kernel(NULL, pte_table);
+ put_page(virt_to_page(pmd));
+}
+
+static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
+{
+ pte_t *pte, *start_pte;
+
+ start_pte = pte = pte_offset_kernel(pmd, addr);
+ do {
+ if (!pte_none(*pte)) {
+ kvm_set_pte(pte, __pte(0));
+ put_page(virt_to_page(pte));
+ }
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+
+ if (hyp_pte_table_empty(start_pte))
+ clear_hyp_pmd_entry(pmd);
+}
+
+static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
+{
+ phys_addr_t next;
+ pmd_t *pmd, *start_pmd;
+
+ start_pmd = pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ /* Hyp doesn't use huge pmds */
+ if (!pmd_none(*pmd))
+ unmap_hyp_ptes(pmd, addr, next);
+ } while (pmd++, addr = next, addr != end);
+
+ if (hyp_pmd_table_empty(start_pmd))
+ clear_hyp_pud_entry(pud);
+}
+
+static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
+{
+ phys_addr_t next;
+ pud_t *pud, *start_pud;
+
+ start_pud = pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ /* Hyp doesn't use huge puds */
+ if (!pud_none(*pud))
+ unmap_hyp_pmds(pud, addr, next);
+ } while (pud++, addr = next, addr != end);
+
+ if (hyp_pud_table_empty(start_pud))
+ clear_hyp_pgd_entry(pgd);
+}
+
+static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
+{
+ pgd_t *pgd;
+ phys_addr_t addr = start, end = start + size;
+ phys_addr_t next;
+
+ /*
+ * We don't unmap anything from HYP, except at the hyp tear down.
+ * Hence, we don't have to invalidate the TLBs here.
+ */
+ pgd = pgdp + pgd_index(addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (!pgd_none(*pgd))
+ unmap_hyp_puds(pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
+}
+
/**
* free_boot_hyp_pgd - free HYP boot page tables
*
@@ -401,14 +493,14 @@ void free_boot_hyp_pgd(void)
mutex_lock(&kvm_hyp_pgd_mutex);
if (boot_hyp_pgd) {
- unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
- unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
+ unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
+ unmap_hyp_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
boot_hyp_pgd = NULL;
}
if (hyp_pgd)
- unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
+ unmap_hyp_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
mutex_unlock(&kvm_hyp_pgd_mutex);
}
@@ -433,9 +525,9 @@ void free_hyp_pgds(void)
if (hyp_pgd) {
for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
- unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
+ unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
- unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
+ unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
hyp_pgd = NULL;
@@ -645,20 +737,6 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
__phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
}
-/* Free the HW pgd, one page at a time */
-static void kvm_free_hwpgd(void *hwpgd)
-{
- free_pages_exact(hwpgd, kvm_get_hwpgd_size());
-}
-
-/* Allocate the HW PGD, making sure that each page gets its own refcount */
-static void *kvm_alloc_hwpgd(void)
-{
- unsigned int size = kvm_get_hwpgd_size();
-
- return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
-}
-
/**
* kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
* @kvm: The KVM struct pointer for the VM.
@@ -673,81 +751,22 @@ static void *kvm_alloc_hwpgd(void)
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
pgd_t *pgd;
- void *hwpgd;
if (kvm->arch.pgd != NULL) {
kvm_err("kvm_arch already initialized?\n");
return -EINVAL;
}
- hwpgd = kvm_alloc_hwpgd();
- if (!hwpgd)
+ /* Allocate the HW PGD, making sure that each page gets its own refcount */
+ pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO);
+ if (!pgd)
return -ENOMEM;
- /* When the kernel uses more levels of page tables than the
- * guest, we allocate a fake PGD and pre-populate it to point
- * to the next-level page table, which will be the real
- * initial page table pointed to by the VTTBR.
- *
- * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
- * the PMD and the kernel will use folded pud.
- * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
- * pages.
- */
- if (KVM_PREALLOC_LEVEL > 0) {
- int i;
-
- /*
- * Allocate fake pgd for the page table manipulation macros to
- * work. This is not used by the hardware and we have no
- * alignment requirement for this allocation.
- */
- pgd = kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
- GFP_KERNEL | __GFP_ZERO);
-
- if (!pgd) {
- kvm_free_hwpgd(hwpgd);
- return -ENOMEM;
- }
-
- /* Plug the HW PGD into the fake one. */
- for (i = 0; i < PTRS_PER_S2_PGD; i++) {
- if (KVM_PREALLOC_LEVEL == 1)
- pgd_populate(NULL, pgd + i,
- (pud_t *)hwpgd + i * PTRS_PER_PUD);
- else if (KVM_PREALLOC_LEVEL == 2)
- pud_populate(NULL, pud_offset(pgd, 0) + i,
- (pmd_t *)hwpgd + i * PTRS_PER_PMD);
- }
- } else {
- /*
- * Allocate actual first-level Stage-2 page table used by the
- * hardware for Stage-2 page table walks.
- */
- pgd = (pgd_t *)hwpgd;
- }
-
kvm_clean_pgd(pgd);
kvm->arch.pgd = pgd;
return 0;
}
-/**
- * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
- * @kvm: The VM pointer
- * @start: The intermediate physical base address of the range to unmap
- * @size: The size of the area to unmap
- *
- * Clear a range of stage-2 mappings, lowering the various ref-counts. Must
- * be called while holding mmu_lock (unless for freeing the stage2 pgd before
- * destroying the VM), otherwise another faulting VCPU may come in and mess
- * with things behind our backs.
- */
-static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
-{
- unmap_range(kvm, kvm->arch.pgd, start, size);
-}
-
static void stage2_unmap_memslot(struct kvm *kvm,
struct kvm_memory_slot *memslot)
{
@@ -830,10 +849,8 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
return;
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
- kvm_free_hwpgd(kvm_get_hwpgd(kvm));
- if (KVM_PREALLOC_LEVEL > 0)
- kfree(kvm->arch.pgd);
-
+ /* Free the HW pgd, one page at a time */
+ free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE);
kvm->arch.pgd = NULL;
}
@@ -843,16 +860,16 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
pgd_t *pgd;
pud_t *pud;
- pgd = kvm->arch.pgd + kvm_pgd_index(addr);
- if (WARN_ON(pgd_none(*pgd))) {
+ pgd = kvm->arch.pgd + stage2_pgd_index(addr);
+ if (WARN_ON(stage2_pgd_none(*pgd))) {
if (!cache)
return NULL;
pud = mmu_memory_cache_alloc(cache);
- pgd_populate(NULL, pgd, pud);
+ stage2_pgd_populate(pgd, pud);
get_page(virt_to_page(pgd));
}
- return pud_offset(pgd, addr);
+ return stage2_pud_offset(pgd, addr);
}
static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
@@ -862,15 +879,15 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
pmd_t *pmd;
pud = stage2_get_pud(kvm, cache, addr);
- if (pud_none(*pud)) {
+ if (stage2_pud_none(*pud)) {
if (!cache)
return NULL;
pmd = mmu_memory_cache_alloc(cache);
- pud_populate(NULL, pud, pmd);
+ stage2_pud_populate(pud, pmd);
get_page(virt_to_page(pud));
}
- return pmd_offset(pud, addr);
+ return stage2_pmd_offset(pud, addr);
}
static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
@@ -893,11 +910,14 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
old_pmd = *pmd;
- kvm_set_pmd(pmd, *new_pmd);
- if (pmd_present(old_pmd))
+ if (pmd_present(old_pmd)) {
+ pmd_clear(pmd);
kvm_tlb_flush_vmid_ipa(kvm, addr);
- else
+ } else {
get_page(virt_to_page(pmd));
+ }
+
+ kvm_set_pmd(pmd, *new_pmd);
return 0;
}
@@ -946,15 +966,38 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
/* Create 2nd stage page table mapping - Level 3 */
old_pte = *pte;
- kvm_set_pte(pte, *new_pte);
- if (pte_present(old_pte))
+ if (pte_present(old_pte)) {
+ kvm_set_pte(pte, __pte(0));
kvm_tlb_flush_vmid_ipa(kvm, addr);
- else
+ } else {
get_page(virt_to_page(pte));
+ }
+ kvm_set_pte(pte, *new_pte);
return 0;
}
+#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static int stage2_ptep_test_and_clear_young(pte_t *pte)
+{
+ if (pte_young(*pte)) {
+ *pte = pte_mkold(*pte);
+ return 1;
+ }
+ return 0;
+}
+#else
+static int stage2_ptep_test_and_clear_young(pte_t *pte)
+{
+ return __ptep_test_and_clear_young(pte);
+}
+#endif
+
+static int stage2_pmdp_test_and_clear_young(pmd_t *pmd)
+{
+ return stage2_ptep_test_and_clear_young((pte_t *)pmd);
+}
+
/**
* kvm_phys_addr_ioremap - map a device range to guest IPA
*
@@ -978,7 +1021,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
if (writable)
- kvm_set_s2pte_writable(&pte);
+ pte = kvm_s2pte_mkwrite(pte);
ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
KVM_NR_MEM_OBJS);
@@ -1078,12 +1121,12 @@ static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
pmd_t *pmd;
phys_addr_t next;
- pmd = pmd_offset(pud, addr);
+ pmd = stage2_pmd_offset(pud, addr);
do {
- next = kvm_pmd_addr_end(addr, end);
+ next = stage2_pmd_addr_end(addr, end);
if (!pmd_none(*pmd)) {
- if (kvm_pmd_huge(*pmd)) {
+ if (pmd_thp_or_huge(*pmd)) {
if (!kvm_s2pmd_readonly(pmd))
kvm_set_s2pmd_readonly(pmd);
} else {
@@ -1106,12 +1149,12 @@ static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
pud_t *pud;
phys_addr_t next;
- pud = pud_offset(pgd, addr);
+ pud = stage2_pud_offset(pgd, addr);
do {
- next = kvm_pud_addr_end(addr, end);
- if (!pud_none(*pud)) {
+ next = stage2_pud_addr_end(addr, end);
+ if (!stage2_pud_none(*pud)) {
/* TODO:PUD not supported, revisit later if supported */
- BUG_ON(kvm_pud_huge(*pud));
+ BUG_ON(stage2_pud_huge(*pud));
stage2_wp_pmds(pud, addr, next);
}
} while (pud++, addr = next, addr != end);
@@ -1128,7 +1171,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
pgd_t *pgd;
phys_addr_t next;
- pgd = kvm->arch.pgd + kvm_pgd_index(addr);
+ pgd = kvm->arch.pgd + stage2_pgd_index(addr);
do {
/*
* Release kvm_mmu_lock periodically if the memory region is
@@ -1140,8 +1183,8 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
if (need_resched() || spin_needbreak(&kvm->mmu_lock))
cond_resched_lock(&kvm->mmu_lock);
- next = kvm_pgd_addr_end(addr, end);
- if (pgd_present(*pgd))
+ next = stage2_pgd_addr_end(addr, end);
+ if (stage2_pgd_present(*pgd))
stage2_wp_puds(pgd, addr, next);
} while (pgd++, addr = next, addr != end);
}
@@ -1320,7 +1363,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
pmd_t new_pmd = pfn_pmd(pfn, mem_type);
new_pmd = pmd_mkhuge(new_pmd);
if (writable) {
- kvm_set_s2pmd_writable(&new_pmd);
+ new_pmd = kvm_s2pmd_mkwrite(new_pmd);
kvm_set_pfn_dirty(pfn);
}
coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
@@ -1329,7 +1372,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
pte_t new_pte = pfn_pte(pfn, mem_type);
if (writable) {
- kvm_set_s2pte_writable(&new_pte);
+ new_pte = kvm_s2pte_mkwrite(new_pte);
kvm_set_pfn_dirty(pfn);
mark_page_dirty(kvm, gfn);
}
@@ -1348,6 +1391,8 @@ out_unlock:
* Resolve the access fault by making the page young again.
* Note that because the faulting entry is guaranteed not to be
* cached in the TLB, we don't need to invalidate anything.
+ * Only the HW Access Flag updates are supported for Stage 2 (no DBM),
+ * so there is no need for atomic (pte|pmd)_mkyoung operations.
*/
static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
{
@@ -1364,7 +1409,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
if (!pmd || pmd_none(*pmd)) /* Nothing there */
goto out;
- if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */
+ if (pmd_thp_or_huge(*pmd)) { /* THP, HugeTLB */
*pmd = pmd_mkyoung(*pmd);
pfn = pmd_pfn(*pmd);
pfn_valid = true;
@@ -1588,25 +1633,14 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
if (!pmd || pmd_none(*pmd)) /* Nothing there */
return 0;
- if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */
- if (pmd_young(*pmd)) {
- *pmd = pmd_mkold(*pmd);
- return 1;
- }
-
- return 0;
- }
+ if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */
+ return stage2_pmdp_test_and_clear_young(pmd);
pte = pte_offset_kernel(pmd, gpa);
if (pte_none(*pte))
return 0;
- if (pte_young(*pte)) {
- *pte = pte_mkold(*pte); /* Just a page... */
- return 1;
- }
-
- return 0;
+ return stage2_ptep_test_and_clear_young(pte);
}
static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
@@ -1618,7 +1652,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
if (!pmd || pmd_none(*pmd)) /* Nothing there */
return 0;
- if (kvm_pmd_huge(*pmd)) /* THP, HugeTLB */
+ if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */
return pmd_young(*pmd);
pte = pte_offset_kernel(pmd, gpa);
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 3f29887995bc..ffde15fed3e1 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -96,32 +96,37 @@
SCTLR_EL2_SA | SCTLR_EL2_I)
/* TCR_EL2 Registers bits */
-#define TCR_EL2_RES1 ((1 << 31) | (1 << 23))
-#define TCR_EL2_TBI (1 << 20)
-#define TCR_EL2_PS (7 << 16)
-#define TCR_EL2_PS_40B (2 << 16)
-#define TCR_EL2_TG0 (1 << 14)
-#define TCR_EL2_SH0 (3 << 12)
-#define TCR_EL2_ORGN0 (3 << 10)
-#define TCR_EL2_IRGN0 (3 << 8)
-#define TCR_EL2_T0SZ 0x3f
-#define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \
- TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
+#define TCR_EL2_RES1 ((1 << 31) | (1 << 23))
+#define TCR_EL2_TBI (1 << 20)
+#define TCR_EL2_PS_SHIFT 16
+#define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)
+#define TCR_EL2_PS_40B (2 << TCR_EL2_PS_SHIFT)
+#define TCR_EL2_TG0_MASK TCR_TG0_MASK
+#define TCR_EL2_SH0_MASK TCR_SH0_MASK
+#define TCR_EL2_ORGN0_MASK TCR_ORGN0_MASK
+#define TCR_EL2_IRGN0_MASK TCR_IRGN0_MASK
+#define TCR_EL2_T0SZ_MASK 0x3f
+#define TCR_EL2_MASK (TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \
+ TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK)
/* VTCR_EL2 Registers bits */
#define VTCR_EL2_RES1 (1 << 31)
-#define VTCR_EL2_PS_MASK (7 << 16)
-#define VTCR_EL2_TG0_MASK (1 << 14)
-#define VTCR_EL2_TG0_4K (0 << 14)
-#define VTCR_EL2_TG0_64K (1 << 14)
-#define VTCR_EL2_SH0_MASK (3 << 12)
-#define VTCR_EL2_SH0_INNER (3 << 12)
-#define VTCR_EL2_ORGN0_MASK (3 << 10)
-#define VTCR_EL2_ORGN0_WBWA (1 << 10)
-#define VTCR_EL2_IRGN0_MASK (3 << 8)
-#define VTCR_EL2_IRGN0_WBWA (1 << 8)
-#define VTCR_EL2_SL0_MASK (3 << 6)
-#define VTCR_EL2_SL0_LVL1 (1 << 6)
+#define VTCR_EL2_HD (1 << 22)
+#define VTCR_EL2_HA (1 << 21)
+#define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK
+#define VTCR_EL2_TG0_MASK TCR_TG0_MASK
+#define VTCR_EL2_TG0_4K TCR_TG0_4K
+#define VTCR_EL2_TG0_16K TCR_TG0_16K
+#define VTCR_EL2_TG0_64K TCR_TG0_64K
+#define VTCR_EL2_SH0_MASK TCR_SH0_MASK
+#define VTCR_EL2_SH0_INNER TCR_SH0_INNER
+#define VTCR_EL2_ORGN0_MASK TCR_ORGN0_MASK
+#define VTCR_EL2_ORGN0_WBWA TCR_ORGN0_WBWA
+#define VTCR_EL2_IRGN0_MASK TCR_IRGN0_MASK
+#define VTCR_EL2_IRGN0_WBWA TCR_IRGN0_WBWA
+#define VTCR_EL2_SL0_SHIFT 6
+#define VTCR_EL2_SL0_MASK (3 << VTCR_EL2_SL0_SHIFT)
+#define VTCR_EL2_SL0_LVL1 (1 << VTCR_EL2_SL0_SHIFT)
#define VTCR_EL2_T0SZ_MASK 0x3f
#define VTCR_EL2_T0SZ_40B 24
#define VTCR_EL2_VS_SHIFT 19
@@ -137,35 +142,45 @@
* (see hyp-init.S).
*
* Note that when using 4K pages, we concatenate two first level page tables
- * together.
+ * together. With 16K pages, we concatenate 16 first level page tables.
*
* The magic numbers used for VTTBR_X in this patch can be found in Tables
* D4-23 and D4-25 in ARM DDI 0487A.b.
*/
+
+#define VTCR_EL2_T0SZ_IPA VTCR_EL2_T0SZ_40B
+#define VTCR_EL2_COMMON_BITS (VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
+ VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1)
+
#ifdef CONFIG_ARM64_64K_PAGES
/*
* Stage2 translation configuration:
- * 40bits input (T0SZ = 24)
* 64kB pages (TG0 = 1)
* 2 level page tables (SL = 1)
*/
-#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \
- VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
- VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1)
-#define VTTBR_X (38 - VTCR_EL2_T0SZ_40B)
-#else
+#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SL0_LVL1)
+#define VTTBR_X_TGRAN_MAGIC 38
+#elif defined(CONFIG_ARM64_16K_PAGES)
+/*
+ * Stage2 translation configuration:
+ * 16kB pages (TG0 = 2)
+ * 2 level page tables (SL = 1)
+ */
+#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_16K | VTCR_EL2_SL0_LVL1)
+#define VTTBR_X_TGRAN_MAGIC 42
+#else /* 4K */
/*
* Stage2 translation configuration:
- * 40bits input (T0SZ = 24)
* 4kB pages (TG0 = 0)
* 3 level page tables (SL = 1)
*/
-#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \
- VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
- VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1)
-#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B)
+#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SL0_LVL1)
+#define VTTBR_X_TGRAN_MAGIC 37
#endif
+#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS)
+#define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA)
+
#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
#define VTTBR_VMID_SHIFT (UL(48))
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 22732a5e3119..844fe5d5ff44 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -45,18 +45,6 @@
*/
#define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK)
-/*
- * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation
- * levels in addition to the PGD and potentially the PUD which are
- * pre-allocated (we pre-allocate the fake PGD and the PUD when the Stage-2
- * tables use one level of tables less than the kernel.
- */
-#ifdef CONFIG_ARM64_64K_PAGES
-#define KVM_MMU_CACHE_MIN_PAGES 1
-#else
-#define KVM_MMU_CACHE_MIN_PAGES 2
-#endif
-
#ifdef __ASSEMBLY__
#include <asm/alternative.h>
@@ -91,6 +79,8 @@ alternative_endif
#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT)
#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL)
+#include <asm/stage2_pgtable.h>
+
int create_hyp_mappings(void *from, void *to);
int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
void free_boot_hyp_pgd(void);
@@ -121,19 +111,32 @@ static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
static inline void kvm_clean_pte(pte_t *pte) {}
static inline void kvm_clean_pte_entry(pte_t *pte) {}
-static inline void kvm_set_s2pte_writable(pte_t *pte)
+static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
{
- pte_val(*pte) |= PTE_S2_RDWR;
+ pte_val(pte) |= PTE_S2_RDWR;
+ return pte;
}
-static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
+static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
{
- pmd_val(*pmd) |= PMD_S2_RDWR;
+ pmd_val(pmd) |= PMD_S2_RDWR;
+ return pmd;
}
static inline void kvm_set_s2pte_readonly(pte_t *pte)
{
- pte_val(*pte) = (pte_val(*pte) & ~PTE_S2_RDWR) | PTE_S2_RDONLY;
+ pteval_t pteval;
+ unsigned long tmp;
+
+ asm volatile("// kvm_set_s2pte_readonly\n"
+ " prfm pstl1strm, %2\n"
+ "1: ldxr %0, %2\n"
+ " and %0, %0, %3 // clear PTE_S2_RDWR\n"
+ " orr %0, %0, %4 // set PTE_S2_RDONLY\n"
+ " stxr %w1, %0, %2\n"
+ " cbnz %w1, 1b\n"
+ : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*pte))
+ : "L" (~PTE_S2_RDWR), "L" (PTE_S2_RDONLY));
}
static inline bool kvm_s2pte_readonly(pte_t *pte)
@@ -143,69 +146,12 @@ static inline bool kvm_s2pte_readonly(pte_t *pte)
static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
{
- pmd_val(*pmd) = (pmd_val(*pmd) & ~PMD_S2_RDWR) | PMD_S2_RDONLY;
+ kvm_set_s2pte_readonly((pte_t *)pmd);
}
static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
{
- return (pmd_val(*pmd) & PMD_S2_RDWR) == PMD_S2_RDONLY;
-}
-
-
-#define kvm_pgd_addr_end(addr, end) pgd_addr_end(addr, end)
-#define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end)
-#define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end)
-
-/*
- * In the case where PGDIR_SHIFT is larger than KVM_PHYS_SHIFT, we can address
- * the entire IPA input range with a single pgd entry, and we would only need
- * one pgd entry. Note that in this case, the pgd is actually not used by
- * the MMU for Stage-2 translations, but is merely a fake pgd used as a data
- * structure for the kernel pgtable macros to work.
- */
-#if PGDIR_SHIFT > KVM_PHYS_SHIFT
-#define PTRS_PER_S2_PGD_SHIFT 0
-#else
-#define PTRS_PER_S2_PGD_SHIFT (KVM_PHYS_SHIFT - PGDIR_SHIFT)
-#endif
-#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT)
-
-#define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
-
-/*
- * If we are concatenating first level stage-2 page tables, we would have less
- * than or equal to 16 pointers in the fake PGD, because that's what the
- * architecture allows. In this case, (4 - CONFIG_PGTABLE_LEVELS)
- * represents the first level for the host, and we add 1 to go to the next
- * level (which uses contatenation) for the stage-2 tables.
- */
-#if PTRS_PER_S2_PGD <= 16
-#define KVM_PREALLOC_LEVEL (4 - CONFIG_PGTABLE_LEVELS + 1)
-#else
-#define KVM_PREALLOC_LEVEL (0)
-#endif
-
-static inline void *kvm_get_hwpgd(struct kvm *kvm)
-{
- pgd_t *pgd = kvm->arch.pgd;
- pud_t *pud;
-
- if (KVM_PREALLOC_LEVEL == 0)
- return pgd;
-
- pud = pud_offset(pgd, 0);
- if (KVM_PREALLOC_LEVEL == 1)
- return pud;
-
- BUG_ON(KVM_PREALLOC_LEVEL != 2);
- return pmd_offset(pud, 0);
-}
-
-static inline unsigned int kvm_get_hwpgd_size(void)
-{
- if (KVM_PREALLOC_LEVEL > 0)
- return PTRS_PER_S2_PGD * PAGE_SIZE;
- return PTRS_PER_S2_PGD * sizeof(pgd_t);
+ return kvm_s2pte_readonly((pte_t *)pmd);
}
static inline bool kvm_page_empty(void *ptr)
@@ -214,23 +160,20 @@ static inline bool kvm_page_empty(void *ptr)
return page_count(ptr_page) == 1;
}
-#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
+#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
#ifdef __PAGETABLE_PMD_FOLDED
-#define kvm_pmd_table_empty(kvm, pmdp) (0)
+#define hyp_pmd_table_empty(pmdp) (0)
#else
-#define kvm_pmd_table_empty(kvm, pmdp) \
- (kvm_page_empty(pmdp) && (!(kvm) || KVM_PREALLOC_LEVEL < 2))
+#define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
#endif
#ifdef __PAGETABLE_PUD_FOLDED
-#define kvm_pud_table_empty(kvm, pudp) (0)
+#define hyp_pud_table_empty(pudp) (0)
#else
-#define kvm_pud_table_empty(kvm, pudp) \
- (kvm_page_empty(pudp) && (!(kvm) || KVM_PREALLOC_LEVEL < 1))
+#define hyp_pud_table_empty(pudp) kvm_page_empty(pudp)
#endif
-
struct kvm;
#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 5c25b831273d..936f1732727c 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -208,23 +208,69 @@
#define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_T1SZ_OFFSET)
#define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x))
#define TCR_TxSZ_WIDTH 6
-#define TCR_IRGN_NC ((UL(0) << 8) | (UL(0) << 24))
-#define TCR_IRGN_WBWA ((UL(1) << 8) | (UL(1) << 24))
-#define TCR_IRGN_WT ((UL(2) << 8) | (UL(2) << 24))
-#define TCR_IRGN_WBnWA ((UL(3) << 8) | (UL(3) << 24))
-#define TCR_IRGN_MASK ((UL(3) << 8) | (UL(3) << 24))
-#define TCR_ORGN_NC ((UL(0) << 10) | (UL(0) << 26))
-#define TCR_ORGN_WBWA ((UL(1) << 10) | (UL(1) << 26))
-#define TCR_ORGN_WT ((UL(2) << 10) | (UL(2) << 26))
-#define TCR_ORGN_WBnWA ((UL(3) << 10) | (UL(3) << 26))
-#define TCR_ORGN_MASK ((UL(3) << 10) | (UL(3) << 26))
-#define TCR_SHARED ((UL(3) << 12) | (UL(3) << 28))
-#define TCR_TG0_4K (UL(0) << 14)
-#define TCR_TG0_64K (UL(1) << 14)
-#define TCR_TG0_16K (UL(2) << 14)
-#define TCR_TG1_16K (UL(1) << 30)
-#define TCR_TG1_4K (UL(2) << 30)
-#define TCR_TG1_64K (UL(3) << 30)
+
+#define TCR_IRGN0_SHIFT 8
+#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT)
+
+#define TCR_IRGN1_SHIFT 24
+#define TCR_IRGN1_MASK (UL(3) << TCR_IRGN1_SHIFT)
+#define TCR_IRGN1_NC (UL(0) << TCR_IRGN1_SHIFT)
+#define TCR_IRGN1_WBWA (UL(1) << TCR_IRGN1_SHIFT)
+#define TCR_IRGN1_WT (UL(2) << TCR_IRGN1_SHIFT)
+#define TCR_IRGN1_WBnWA (UL(3) << TCR_IRGN1_SHIFT)
+
+#define TCR_IRGN_NC (TCR_IRGN0_NC | TCR_IRGN1_NC)
+#define TCR_IRGN_WBWA (TCR_IRGN0_WBWA | TCR_IRGN1_WBWA)
+#define TCR_IRGN_WT (TCR_IRGN0_WT | TCR_IRGN1_WT)
+#define TCR_IRGN_WBnWA (TCR_IRGN0_WBnWA | TCR_IRGN1_WBnWA)
+#define TCR_IRGN_MASK (TCR_IRGN0_MASK | TCR_IRGN1_MASK)
+
+
+#define TCR_ORGN0_SHIFT 10
+#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT)
+
+#define TCR_ORGN1_SHIFT 26
+#define TCR_ORGN1_MASK (UL(3) << TCR_ORGN1_SHIFT)
+#define TCR_ORGN1_NC (UL(0) << TCR_ORGN1_SHIFT)
+#define TCR_ORGN1_WBWA (UL(1) << TCR_ORGN1_SHIFT)
+#define TCR_ORGN1_WT (UL(2) << TCR_ORGN1_SHIFT)
+#define TCR_ORGN1_WBnWA (UL(3) << TCR_ORGN1_SHIFT)
+
+#define TCR_ORGN_NC (TCR_ORGN0_NC | TCR_ORGN1_NC)
+#define TCR_ORGN_WBWA (TCR_ORGN0_WBWA | TCR_ORGN1_WBWA)
+#define TCR_ORGN_WT (TCR_ORGN0_WT | TCR_ORGN1_WT)
+#define TCR_ORGN_WBnWA (TCR_ORGN0_WBnWA | TCR_ORGN1_WBnWA)
+#define TCR_ORGN_MASK (TCR_ORGN0_MASK | TCR_ORGN1_MASK)
+
+#define TCR_SH0_SHIFT 12
+#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT)
+#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT)
+
+#define TCR_SH1_SHIFT 28
+#define TCR_SH1_MASK (UL(3) << TCR_SH1_SHIFT)
+#define TCR_SH1_INNER (UL(3) << TCR_SH1_SHIFT)
+#define TCR_SHARED (TCR_SH0_INNER | TCR_SH1_INNER)
+
+#define TCR_TG0_SHIFT 14
+#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT)
+#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT)
+#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT)
+#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT)
+
+#define TCR_TG1_SHIFT 30
+#define TCR_TG1_MASK (UL(3) << TCR_TG1_SHIFT)
+#define TCR_TG1_16K (UL(1) << TCR_TG1_SHIFT)
+#define TCR_TG1_4K (UL(2) << TCR_TG1_SHIFT)
+#define TCR_TG1_64K (UL(3) << TCR_TG1_SHIFT)
+
#define TCR_ASID16 (UL(1) << 36)
#define TCR_TBI0 (UL(1) << 37)
#define TCR_HA (UL(1) << 39)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 989fef16d461..f1d5afdb12db 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -290,6 +290,8 @@ static inline pgprot_t mk_sect_prot(pgprot_t prot)
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK))
+#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
+
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
@@ -530,14 +532,12 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
* Atomic pte/pmd modifications.
*/
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long address,
- pte_t *ptep)
+static inline int __ptep_test_and_clear_young(pte_t *ptep)
{
pteval_t pteval;
unsigned int tmp, res;
- asm volatile("// ptep_test_and_clear_young\n"
+ asm volatile("// __ptep_test_and_clear_young\n"
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
" ubfx %w3, %w0, %5, #1 // extract PTE_AF (young)\n"
@@ -550,6 +550,13 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
return res;
}
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address,
+ pte_t *ptep)
+{
+ return __ptep_test_and_clear_young(ptep);
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
diff --git a/arch/arm64/include/asm/stage2_pgtable-nopmd.h b/arch/arm64/include/asm/stage2_pgtable-nopmd.h
new file mode 100644
index 000000000000..2656a0fd05a6
--- /dev/null
+++ b/arch/arm64/include/asm/stage2_pgtable-nopmd.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2016 - ARM Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_S2_PGTABLE_NOPMD_H_
+#define __ARM64_S2_PGTABLE_NOPMD_H_
+
+#include <asm/stage2_pgtable-nopud.h>
+
+#define __S2_PGTABLE_PMD_FOLDED
+
+#define S2_PMD_SHIFT S2_PUD_SHIFT
+#define S2_PTRS_PER_PMD 1
+#define S2_PMD_SIZE (1UL << S2_PMD_SHIFT)
+#define S2_PMD_MASK (~(S2_PMD_SIZE-1))
+
+#define stage2_pud_none(pud) (0)
+#define stage2_pud_present(pud) (1)
+#define stage2_pud_clear(pud) do { } while (0)
+#define stage2_pud_populate(pud, pmd) do { } while (0)
+#define stage2_pmd_offset(pud, address) ((pmd_t *)(pud))
+
+#define stage2_pmd_free(pmd) do { } while (0)
+
+#define stage2_pmd_addr_end(addr, end) (end)
+
+#define stage2_pud_huge(pud) (0)
+#define stage2_pmd_table_empty(pmdp) (0)
+
+#endif
diff --git a/arch/arm64/include/asm/stage2_pgtable-nopud.h b/arch/arm64/include/asm/stage2_pgtable-nopud.h
new file mode 100644
index 000000000000..5ee87b54ebf3
--- /dev/null
+++ b/arch/arm64/include/asm/stage2_pgtable-nopud.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2016 - ARM Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_S2_PGTABLE_NOPUD_H_
+#define __ARM64_S2_PGTABLE_NOPUD_H_
+
+#define __S2_PGTABLE_PUD_FOLDED
+
+#define S2_PUD_SHIFT S2_PGDIR_SHIFT
+#define S2_PTRS_PER_PUD 1
+#define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT)
+#define S2_PUD_MASK (~(S2_PUD_SIZE-1))
+
+#define stage2_pgd_none(pgd) (0)
+#define stage2_pgd_present(pgd) (1)
+#define stage2_pgd_clear(pgd) do { } while (0)
+#define stage2_pgd_populate(pgd, pud) do { } while (0)
+
+#define stage2_pud_offset(pgd, address) ((pud_t *)(pgd))
+
+#define stage2_pud_free(x) do { } while (0)
+
+#define stage2_pud_addr_end(addr, end) (end)
+#define stage2_pud_table_empty(pmdp) (0)
+
+#endif
diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h
new file mode 100644
index 000000000000..8b68099348e5
--- /dev/null
+++ b/arch/arm64/include/asm/stage2_pgtable.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2016 - ARM Ltd
+ *
+ * stage2 page table helpers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_S2_PGTABLE_H_
+#define __ARM64_S2_PGTABLE_H_
+
+#include <asm/pgtable.h>
+
+/*
+ * The hardware supports concatenation of up to 16 tables at stage2 entry level
+ * and we use the feature whenever possible.
+ *
+ * Now, the minimum number of bits resolved at any level is (PAGE_SHIFT - 3).
+ * On arm64, the smallest PAGE_SIZE supported is 4k, which means
+ * (PAGE_SHIFT - 3) > 4 holds for all page sizes.
+ * This implies, the total number of page table levels at stage2 expected
+ * by the hardware is actually the number of levels required for (KVM_PHYS_SHIFT - 4)
+ * in normal translations(e.g, stage1), since we cannot have another level in
+ * the range (KVM_PHYS_SHIFT, KVM_PHYS_SHIFT - 4).
+ */
+#define STAGE2_PGTABLE_LEVELS ARM64_HW_PGTABLE_LEVELS(KVM_PHYS_SHIFT - 4)
+
+/*
+ * With all the supported VA_BITs and 40bit guest IPA, the following condition
+ * is always true:
+ *
+ * STAGE2_PGTABLE_LEVELS <= CONFIG_PGTABLE_LEVELS
+ *
+ * We base our stage-2 page table walker helpers on this assumption and
+ * fall back to using the host version of the helper wherever possible.
+ * i.e, if a particular level is not folded (e.g, PUD) at stage2, we fall back
+ * to using the host version, since it is guaranteed it is not folded at host.
+ *
+ * If the condition breaks in the future, we can rearrange the host level
+ * definitions and reuse them for stage2. Till then...
+ */
+#if STAGE2_PGTABLE_LEVELS > CONFIG_PGTABLE_LEVELS
+#error "Unsupported combination of guest IPA and host VA_BITS."
+#endif
+
+/* S2_PGDIR_SHIFT is the size mapped by top-level stage2 entry */
+#define S2_PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - STAGE2_PGTABLE_LEVELS)
+#define S2_PGDIR_SIZE (_AC(1, UL) << S2_PGDIR_SHIFT)
+#define S2_PGDIR_MASK (~(S2_PGDIR_SIZE - 1))
+
+/*
+ * The number of PTRS across all concatenated stage2 tables given by the
+ * number of bits resolved at the initial level.
+ */
+#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - S2_PGDIR_SHIFT))
+
+/*
+ * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation
+ * levels in addition to the PGD.
+ */
+#define KVM_MMU_CACHE_MIN_PAGES (STAGE2_PGTABLE_LEVELS - 1)
+
+
+#if STAGE2_PGTABLE_LEVELS > 3
+
+#define S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
+#define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT)
+#define S2_PUD_MASK (~(S2_PUD_SIZE - 1))
+
+#define stage2_pgd_none(pgd) pgd_none(pgd)
+#define stage2_pgd_clear(pgd) pgd_clear(pgd)
+#define stage2_pgd_present(pgd) pgd_present(pgd)
+#define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud)
+#define stage2_pud_offset(pgd, address) pud_offset(pgd, address)
+#define stage2_pud_free(pud) pud_free(NULL, pud)
+
+#define stage2_pud_table_empty(pudp) kvm_page_empty(pudp)
+
+static inline phys_addr_t stage2_pud_addr_end(phys_addr_t addr, phys_addr_t end)
+{
+ phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK;
+
+ return (boundary - 1 < end - 1) ? boundary : end;
+}
+
+#endif /* STAGE2_PGTABLE_LEVELS > 3 */
+
+
+#if STAGE2_PGTABLE_LEVELS > 2
+
+#define S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
+#define S2_PMD_SIZE (_AC(1, UL) << S2_PMD_SHIFT)
+#define S2_PMD_MASK (~(S2_PMD_SIZE - 1))
+
+#define stage2_pud_none(pud) pud_none(pud)
+#define stage2_pud_clear(pud) pud_clear(pud)
+#define stage2_pud_present(pud) pud_present(pud)
+#define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd)
+#define stage2_pmd_offset(pud, address) pmd_offset(pud, address)
+#define stage2_pmd_free(pmd) pmd_free(NULL, pmd)
+
+#define stage2_pud_huge(pud) pud_huge(pud)
+#define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+
+static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end)
+{
+ phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK;
+
+ return (boundary - 1 < end - 1) ? boundary : end;
+}
+
+#endif /* STAGE2_PGTABLE_LEVELS > 2 */
+
+#define stage2_pte_table_empty(ptep) kvm_page_empty(ptep)
+
+#if STAGE2_PGTABLE_LEVELS == 2
+#include <asm/stage2_pgtable-nopmd.h>
+#elif STAGE2_PGTABLE_LEVELS == 3
+#include <asm/stage2_pgtable-nopud.h>
+#endif
+
+
+#define stage2_pgd_index(addr) (((addr) >> S2_PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
+
+static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end)
+{
+ phys_addr_t boundary = (addr + S2_PGDIR_SIZE) & S2_PGDIR_MASK;
+
+ return (boundary - 1 < end - 1) ? boundary : end;
+}
+
+#endif /* __ARM64_S2_PGTABLE_H_ */
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index de7450df7629..aa2e34e99582 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -22,7 +22,6 @@ config KVM_ARM_VGIC_V3
config KVM
bool "Kernel-based Virtual Machine (KVM) support"
depends on OF
- depends on !ARM64_16K_PAGES
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select ANON_INODES
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
index bcbe761a5a3d..b81f4091c909 100644
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -66,6 +66,14 @@ u32 __hyp_text __init_stage2_translation(void)
val |= 64 - (parange > 40 ? 40 : parange);
/*
+ * Check the availability of Hardware Access Flag / Dirty Bit
+ * Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2.
+ */
+ tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf;
+ if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && tmp)
+ val |= VTCR_EL2_HA;
+
+ /*
* Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS
* bit in VTCR_EL2.
*/
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index f6b12790716c..942b8f6bf35b 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -747,7 +747,7 @@ extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu);
void kvm_mips_write_count(struct kvm_vcpu *vcpu, uint32_t count);
-void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare);
+void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare, bool ack);
void kvm_mips_init_count(struct kvm_vcpu *vcpu);
int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl);
int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume);
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index b37954cc880d..b8b7860ec1a8 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -302,12 +302,31 @@ static inline ktime_t kvm_mips_count_time(struct kvm_vcpu *vcpu)
*/
static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now)
{
- ktime_t expires;
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ ktime_t expires, threshold;
+ uint32_t count, compare;
int running;
- /* Is the hrtimer pending? */
+ /* Calculate the biased and scaled guest CP0_Count */
+ count = vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now);
+ compare = kvm_read_c0_guest_compare(cop0);
+
+ /*
+ * Find whether CP0_Count has reached the closest timer interrupt. If
+ * not, we shouldn't inject it.
+ */
+ if ((int32_t)(count - compare) < 0)
+ return count;
+
+ /*
+ * The CP0_Count we're going to return has already reached the closest
+ * timer interrupt. Quickly check if it really is a new interrupt by
+ * looking at whether the interval until the hrtimer expiry time is
+ * less than 1/4 of the timer period.
+ */
expires = hrtimer_get_expires(&vcpu->arch.comparecount_timer);
- if (ktime_compare(now, expires) >= 0) {
+ threshold = ktime_add_ns(now, vcpu->arch.count_period / 4);
+ if (ktime_before(expires, threshold)) {
/*
* Cancel it while we handle it so there's no chance of
* interference with the timeout handler.
@@ -329,8 +348,7 @@ static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now)
}
}
- /* Return the biased and scaled guest CP0_Count */
- return vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now);
+ return count;
}
/**
@@ -420,32 +438,6 @@ static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu,
}
/**
- * kvm_mips_update_hrtimer() - Update next expiry time of hrtimer.
- * @vcpu: Virtual CPU.
- *
- * Recalculates and updates the expiry time of the hrtimer. This can be used
- * after timer parameters have been altered which do not depend on the time that
- * the change occurs (in those cases kvm_mips_freeze_hrtimer() and
- * kvm_mips_resume_hrtimer() are used directly).
- *
- * It is guaranteed that no timer interrupts will be lost in the process.
- *
- * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running).
- */
-static void kvm_mips_update_hrtimer(struct kvm_vcpu *vcpu)
-{
- ktime_t now;
- uint32_t count;
-
- /*
- * freeze_hrtimer takes care of a timer interrupts <= count, and
- * resume_hrtimer the hrtimer takes care of a timer interrupts > count.
- */
- now = kvm_mips_freeze_hrtimer(vcpu, &count);
- kvm_mips_resume_hrtimer(vcpu, now, count);
-}
-
-/**
* kvm_mips_write_count() - Modify the count and update timer.
* @vcpu: Virtual CPU.
* @count: Guest CP0_Count value to set.
@@ -540,23 +532,42 @@ int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz)
* kvm_mips_write_compare() - Modify compare and update timer.
* @vcpu: Virtual CPU.
* @compare: New CP0_Compare value.
+ * @ack: Whether to acknowledge timer interrupt.
*
* Update CP0_Compare to a new value and update the timeout.
+ * If @ack, atomically acknowledge any pending timer interrupt, otherwise ensure
+ * any pending timer interrupt is preserved.
*/
-void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare)
+void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare, bool ack)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
+ int dc;
+ u32 old_compare = kvm_read_c0_guest_compare(cop0);
+ ktime_t now;
+ uint32_t count;
/* if unchanged, must just be an ack */
- if (kvm_read_c0_guest_compare(cop0) == compare)
+ if (old_compare == compare) {
+ if (!ack)
+ return;
+ kvm_mips_callbacks->dequeue_timer_int(vcpu);
+ kvm_write_c0_guest_compare(cop0, compare);
return;
+ }
+
+ /* freeze_hrtimer() takes care of timer interrupts <= count */
+ dc = kvm_mips_count_disabled(vcpu);
+ if (!dc)
+ now = kvm_mips_freeze_hrtimer(vcpu, &count);
+
+ if (ack)
+ kvm_mips_callbacks->dequeue_timer_int(vcpu);
- /* Update compare */
kvm_write_c0_guest_compare(cop0, compare);
- /* Update timeout if count enabled */
- if (!kvm_mips_count_disabled(vcpu))
- kvm_mips_update_hrtimer(vcpu);
+ /* resume_hrtimer() takes care of timer interrupts > count */
+ if (!dc)
+ kvm_mips_resume_hrtimer(vcpu, now, count);
}
/**
@@ -1095,9 +1106,9 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
/* If we are writing to COMPARE */
/* Clear pending timer interrupt, if any */
- kvm_mips_callbacks->dequeue_timer_int(vcpu);
kvm_mips_write_compare(vcpu,
- vcpu->arch.gprs[rt]);
+ vcpu->arch.gprs[rt],
+ true);
} else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) {
unsigned int old_val, val, change;
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 70ef1a43c114..23b209463238 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1079,7 +1079,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
case KVM_CAP_MIPS_FPU:
- r = !!cpu_has_fpu;
+ /* We don't handle systems with inconsistent cpu_has_fpu */
+ r = !!raw_cpu_has_fpu;
break;
case KVM_CAP_MIPS_MSA:
/*
@@ -1555,8 +1556,10 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu)
/* Disable MSA & FPU */
disable_msa();
- if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)
+ if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
clear_c0_status(ST0_CU1 | ST0_FR);
+ disable_fpu_hazard();
+ }
vcpu->arch.fpu_inuse &= ~(KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA);
} else if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
set_c0_status(ST0_CU1);
@@ -1567,6 +1570,7 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu)
/* Disable FPU */
clear_c0_status(ST0_CU1 | ST0_FR);
+ disable_fpu_hazard();
}
preempt_enable();
}
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index e0e1d0a611fc..60e4ad0016e7 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c
@@ -268,6 +268,7 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
int even;
struct kvm *kvm = vcpu->kvm;
const int flush_dcache_mask = 0;
+ int ret;
if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) {
kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr);
@@ -299,14 +300,18 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
pfn1 = kvm->arch.guest_pmap[gfn];
}
- entryhi = (vaddr | kvm_mips_get_kernel_asid(vcpu));
entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) |
(1 << 2) | (0x1 << 1);
entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) |
(1 << 2) | (0x1 << 1);
- return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
- flush_dcache_mask);
+ preempt_disable();
+ entryhi = (vaddr | kvm_mips_get_kernel_asid(vcpu));
+ ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
+ flush_dcache_mask);
+ preempt_enable();
+
+ return ret;
}
EXPORT_SYMBOL_GPL(kvm_mips_handle_kseg0_tlb_fault);
@@ -361,6 +366,7 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0;
struct kvm *kvm = vcpu->kvm;
kvm_pfn_t pfn0, pfn1;
+ int ret;
if ((tlb->tlb_hi & VPN2_MASK) == 0) {
pfn0 = 0;
@@ -387,9 +393,6 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
*hpa1 = pfn1 << PAGE_SHIFT;
/* Get attributes from the Guest TLB */
- entryhi = (tlb->tlb_hi & VPN2_MASK) | (KVM_GUEST_KERNEL_MODE(vcpu) ?
- kvm_mips_get_kernel_asid(vcpu) :
- kvm_mips_get_user_asid(vcpu));
entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) |
(tlb->tlb_lo0 & MIPS3_PG_D) | (tlb->tlb_lo0 & MIPS3_PG_V);
entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) |
@@ -398,8 +401,15 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
tlb->tlb_lo0, tlb->tlb_lo1);
- return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
- tlb->tlb_mask);
+ preempt_disable();
+ entryhi = (tlb->tlb_hi & VPN2_MASK) | (KVM_GUEST_KERNEL_MODE(vcpu) ?
+ kvm_mips_get_kernel_asid(vcpu) :
+ kvm_mips_get_user_asid(vcpu));
+ ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
+ tlb->tlb_mask);
+ preempt_enable();
+
+ return ret;
}
EXPORT_SYMBOL_GPL(kvm_mips_handle_mapped_seg_tlb_fault);
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index c4038d2a724c..caa5ea1038a0 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c
@@ -546,7 +546,7 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
kvm_mips_write_count(vcpu, v);
break;
case KVM_REG_MIPS_CP0_COMPARE:
- kvm_mips_write_compare(vcpu, v);
+ kvm_mips_write_compare(vcpu, v, false);
break;
case KVM_REG_MIPS_CP0_CAUSE:
/*
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d7b343170453..a07645c17818 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -40,6 +40,9 @@
#define KVM_MAX_VCORES NR_CPUS
#define KVM_USER_MEM_SLOTS 512
+#include <asm/cputhreads.h>
+#define KVM_MAX_VCPU_ID (threads_per_subcore * KVM_MAX_VCORES)
+
#ifdef CONFIG_KVM_MMIO
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#endif
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 6da41fab70fb..9282ccf1d136 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -544,10 +544,6 @@ struct kvm_vcpu_arch {
struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer;
struct kvm_s390_pgm_info pgm;
- union {
- struct cpuid cpu_id;
- u64 stidp_data;
- };
struct gmap *gmap;
struct kvm_guestdbg_info_arch guestdbg;
unsigned long pfault_token;
@@ -605,7 +601,7 @@ struct kvm_s390_cpu_model {
__u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64];
/* facility list requested by guest (in dma page) */
__u64 *fac_list;
- struct cpuid cpu_id;
+ u64 cpuid;
unsigned short ibc;
};
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index bab456be9a4f..994a66c09e8f 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -69,6 +69,7 @@ struct sclp_info {
unsigned int max_cores;
unsigned long hsa_size;
unsigned long facilities;
+ unsigned int hmfai;
};
extern struct sclp_info sclp;
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index ec60cf7fa0a2..1c8f33fca356 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -27,6 +27,7 @@
/* SIGP cpu status bits */
+#define SIGP_STATUS_INVALID_ORDER 0x00000002UL
#define SIGP_STATUS_CHECK_STOP 0x00000010UL
#define SIGP_STATUS_STOPPED 0x00000040UL
#define SIGP_STATUS_EXT_CALL_PENDING 0x00000080UL
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 668c087513e5..c597201a5ca9 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -118,9 +118,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
};
/* upper facilities limit for kvm */
-unsigned long kvm_s390_fac_list_mask[] = {
- 0xffe6fffbfcfdfc40UL,
- 0x005e800000000000UL,
+unsigned long kvm_s390_fac_list_mask[16] = {
+ 0xffe6000000000000UL,
+ 0x005e000000000000UL,
};
unsigned long kvm_s390_fac_list_mask_size(void)
@@ -638,6 +638,7 @@ static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
{
struct kvm_s390_vm_cpu_processor *proc;
+ u16 lowest_ibc, unblocked_ibc;
int ret = 0;
mutex_lock(&kvm->lock);
@@ -652,9 +653,17 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
}
if (!copy_from_user(proc, (void __user *)attr->addr,
sizeof(*proc))) {
- memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
- sizeof(struct cpuid));
- kvm->arch.model.ibc = proc->ibc;
+ kvm->arch.model.cpuid = proc->cpuid;
+ lowest_ibc = sclp.ibc >> 16 & 0xfff;
+ unblocked_ibc = sclp.ibc & 0xfff;
+ if (lowest_ibc) {
+ if (proc->ibc > unblocked_ibc)
+ kvm->arch.model.ibc = unblocked_ibc;
+ else if (proc->ibc < lowest_ibc)
+ kvm->arch.model.ibc = lowest_ibc;
+ else
+ kvm->arch.model.ibc = proc->ibc;
+ }
memcpy(kvm->arch.model.fac_list, proc->fac_list,
S390_ARCH_FAC_LIST_SIZE_BYTE);
} else
@@ -687,7 +696,7 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
ret = -ENOMEM;
goto out;
}
- memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
+ proc->cpuid = kvm->arch.model.cpuid;
proc->ibc = kvm->arch.model.ibc;
memcpy(&proc->fac_list, kvm->arch.model.fac_list,
S390_ARCH_FAC_LIST_SIZE_BYTE);
@@ -1081,10 +1090,13 @@ static void kvm_s390_set_crycb_format(struct kvm *kvm)
kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
}
-static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
+static u64 kvm_s390_get_initial_cpuid(void)
{
- get_cpu_id(cpu_id);
- cpu_id->version = 0xff;
+ struct cpuid cpuid;
+
+ get_cpu_id(&cpuid);
+ cpuid.version = 0xff;
+ return *((u64 *) &cpuid);
}
static void kvm_s390_crypto_init(struct kvm *kvm)
@@ -1175,7 +1187,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
S390_ARCH_FAC_LIST_SIZE_BYTE);
- kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
+ kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
kvm->arch.model.ibc = sclp.ibc & 0x0fff;
kvm_s390_crypto_init(kvm);
@@ -1624,7 +1636,6 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
{
struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
- vcpu->arch.cpu_id = model->cpu_id;
vcpu->arch.sie_block->ibc = model->ibc;
if (test_kvm_facility(vcpu->kvm, 7))
vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
@@ -1645,11 +1656,14 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
kvm_s390_vcpu_setup_model(vcpu);
- vcpu->arch.sie_block->ecb = 6;
+ vcpu->arch.sie_block->ecb = 0x02;
+ if (test_kvm_facility(vcpu->kvm, 9))
+ vcpu->arch.sie_block->ecb |= 0x04;
if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
vcpu->arch.sie_block->ecb |= 0x10;
- vcpu->arch.sie_block->ecb2 = 8;
+ if (test_kvm_facility(vcpu->kvm, 8))
+ vcpu->arch.sie_block->ecb2 |= 0x08;
vcpu->arch.sie_block->eca = 0xC1002000U;
if (sclp.has_siif)
vcpu->arch.sie_block->eca |= 1;
@@ -2971,13 +2985,26 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
return;
}
+static inline unsigned long nonhyp_mask(int i)
+{
+ unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
+
+ return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
+}
+
static int __init kvm_s390_init(void)
{
+ int i;
+
if (!sclp.has_sief2) {
pr_info("SIE not available\n");
return -ENODEV;
}
+ for (i = 0; i < 16; i++)
+ kvm_s390_fac_list_mask[i] |=
+ S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
+
return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
}
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 0a1591d3d25d..95916fa7c670 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -439,7 +439,7 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
static int handle_stidp(struct kvm_vcpu *vcpu)
{
- u64 stidp_data = vcpu->arch.stidp_data;
+ u64 stidp_data = vcpu->kvm->arch.model.cpuid;
u64 operand2;
int rc;
ar_t ar;
@@ -670,8 +670,9 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- /* Only provide non-quiescing support if the host supports it */
- if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ && !test_facility(14))
+ /* Only provide non-quiescing support if enabled for the guest */
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ &&
+ !test_kvm_facility(vcpu->kvm, 14))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
/* No support for conditional-SSKE */
@@ -744,7 +745,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
{
/* entries expected to be 1FF */
int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
- unsigned long *cbrlo, cbrle;
+ unsigned long *cbrlo;
struct gmap *gmap;
int i;
@@ -765,17 +766,9 @@ static int handle_essa(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
down_read(&gmap->mm->mmap_sem);
- for (i = 0; i < entries; ++i) {
- cbrle = cbrlo[i];
- if (unlikely(cbrle & ~PAGE_MASK || cbrle < 2 * PAGE_SIZE))
- /* invalid entry */
- break;
- /* try to free backing */
- __gmap_zap(gmap, cbrle);
- }
+ for (i = 0; i < entries; ++i)
+ __gmap_zap(gmap, cbrlo[i]);
up_read(&gmap->mm->mmap_sem);
- if (i < entries)
- return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
return 0;
}
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 77c22d685c7a..28ea0cab1f1b 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -240,6 +240,12 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu,
struct kvm_s390_local_interrupt *li;
int rc;
+ if (!test_kvm_facility(vcpu->kvm, 9)) {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_INVALID_ORDER;
+ return SIGP_CC_STATUS_STORED;
+ }
+
li = &dst_vcpu->arch.local_int;
if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
/* running */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b7e394485a5f..c66e26280707 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -562,7 +562,6 @@ struct kvm_vcpu_arch {
struct {
u64 msr_val;
u64 last_steal;
- u64 accum_steal;
struct gfn_to_hva_cache stime;
struct kvm_steal_time steal;
} st;
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index cd54147cb365..739c0c594022 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -216,9 +216,9 @@ struct kvm_cpuid_entry2 {
__u32 padding[3];
};
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
-#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
-#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX (1 << 0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC (1 << 1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT (1 << 2)
/* for KVM_SET_CPUID2 */
struct kvm_cpuid2 {
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 54ead79e444b..dfb4c6476877 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -382,9 +382,6 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
u32 i, nr_ioapic_pins;
int idx;
- /* kvm->irq_routing must be read after clearing
- * KVM_SCAN_IOAPIC. */
- smp_mb();
idx = srcu_read_lock(&kvm->irq_srcu);
table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
nr_ioapic_pins = min_t(u32, table->nr_rt_entries,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 1ff4dbb73fb7..850335a71d9f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1909,18 +1909,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
* since it has been deleted from active_mmu_pages but still can be found
* at hast list.
*
- * for_each_gfn_indirect_valid_sp has skipped that kind of page and
- * kvm_mmu_get_page(), the only user of for_each_gfn_sp(), has skipped
- * all the obsolete pages.
+ * for_each_gfn_valid_sp() has skipped that kind of pages.
*/
-#define for_each_gfn_sp(_kvm, _sp, _gfn) \
+#define for_each_gfn_valid_sp(_kvm, _sp, _gfn) \
hlist_for_each_entry(_sp, \
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
- if ((_sp)->gfn != (_gfn)) {} else
+ if ((_sp)->gfn != (_gfn) || is_obsolete_sp((_kvm), (_sp)) \
+ || (_sp)->role.invalid) {} else
#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \
- for_each_gfn_sp(_kvm, _sp, _gfn) \
- if ((_sp)->role.direct || (_sp)->role.invalid) {} else
+ for_each_gfn_valid_sp(_kvm, _sp, _gfn) \
+ if ((_sp)->role.direct) {} else
/* @sp->gfn should be write-protected at the call site */
static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
@@ -1961,6 +1960,11 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
static void mmu_audit_disable(void) { }
#endif
+static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
+}
+
static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
struct list_head *invalid_list)
{
@@ -2105,11 +2109,6 @@ static void clear_sp_write_flooding_count(u64 *spte)
__clear_sp_write_flooding_count(sp);
}
-static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
- return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
-}
-
static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
gfn_t gfn,
gva_t gaddr,
@@ -2136,10 +2135,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
role.quadrant = quadrant;
}
- for_each_gfn_sp(vcpu->kvm, sp, gfn) {
- if (is_obsolete_sp(vcpu->kvm, sp))
- continue;
-
+ for_each_gfn_valid_sp(vcpu->kvm, sp, gfn) {
if (!need_sync && sp->unsync)
need_sync = true;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ee1c8a93871c..ab4a387b98b1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5046,8 +5046,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
- vmx_set_cr0(vcpu, cr0); /* enter rmode */
vmx->vcpu.arch.cr0 = cr0;
+ vmx_set_cr0(vcpu, cr0); /* enter rmode */
vmx_set_cr4(vcpu, 0);
vmx_set_efer(vcpu, 0);
vmx_fpu_activate(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9b7798c7b210..6c774cdf553c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2002,22 +2002,8 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
vcpu->arch.pv_time_enabled = false;
}
-static void accumulate_steal_time(struct kvm_vcpu *vcpu)
-{
- u64 delta;
-
- if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
- return;
-
- delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
- vcpu->arch.st.last_steal = current->sched_info.run_delay;
- vcpu->arch.st.accum_steal = delta;
-}
-
static void record_steal_time(struct kvm_vcpu *vcpu)
{
- accumulate_steal_time(vcpu);
-
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
@@ -2025,9 +2011,26 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
return;
- vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal;
- vcpu->arch.st.steal.version += 2;
- vcpu->arch.st.accum_steal = 0;
+ if (vcpu->arch.st.steal.version & 1)
+ vcpu->arch.st.steal.version += 1; /* first time write, random junk */
+
+ vcpu->arch.st.steal.version += 1;
+
+ kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+ &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+
+ smp_wmb();
+
+ vcpu->arch.st.steal.steal += current->sched_info.run_delay -
+ vcpu->arch.st.last_steal;
+ vcpu->arch.st.last_steal = current->sched_info.run_delay;
+
+ kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+ &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+
+ smp_wmb();
+
+ vcpu->arch.st.steal.version += 1;
kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
@@ -8355,19 +8358,21 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
}
EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
+bool kvm_arch_has_irq_bypass(void)
+{
+ return kvm_x86_ops->update_pi_irte != NULL;
+}
+
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
struct irq_bypass_producer *prod)
{
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
- if (kvm_x86_ops->update_pi_irte) {
- irqfd->producer = prod;
- return kvm_x86_ops->update_pi_irte(irqfd->kvm,
- prod->irq, irqfd->gsi, 1);
- }
+ irqfd->producer = prod;
- return -EINVAL;
+ return kvm_x86_ops->update_pi_irte(irqfd->kvm,
+ prod->irq, irqfd->gsi, 1);
}
void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
@@ -8377,11 +8382,6 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
- if (!kvm_x86_ops->update_pi_irte) {
- WARN_ON(irqfd->producer != NULL);
- return;
- }
-
WARN_ON(irqfd->producer != prod);
irqfd->producer = NULL;
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 5152b3898155..4814446a0024 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -468,11 +468,11 @@ static struct cyclecounter cyclecounter = {
.mask = CLOCKSOURCE_MASK(56),
};
-static struct timecounter timecounter;
+static struct arch_timer_kvm_info arch_timer_kvm_info;
-struct timecounter *arch_timer_get_timecounter(void)
+struct arch_timer_kvm_info *arch_timer_get_kvm_info(void)
{
- return &timecounter;
+ return &arch_timer_kvm_info;
}
static void __init arch_counter_register(unsigned type)
@@ -500,7 +500,8 @@ static void __init arch_counter_register(unsigned type)
clocksource_register_hz(&clocksource_counter, arch_timer_rate);
cyclecounter.mult = clocksource_counter.mult;
cyclecounter.shift = clocksource_counter.shift;
- timecounter_init(&timecounter, &cyclecounter, start_count);
+ timecounter_init(&arch_timer_kvm_info.timecounter,
+ &cyclecounter, start_count);
/* 56 bits minimum, so we assume worst case rollover */
sched_clock_register(arch_timer_read_counter, 56, arch_timer_rate);
@@ -744,6 +745,8 @@ static void __init arch_timer_init(void)
arch_timer_register();
arch_timer_common_init();
+
+ arch_timer_kvm_info.virtual_irq = arch_timer_ppi[VIRT_PPI];
}
static void __init arch_timer_of_init(struct device_node *np)
diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
index f174ce0ca361..2e9443be2b14 100644
--- a/drivers/irqchip/irq-gic-common.c
+++ b/drivers/irqchip/irq-gic-common.c
@@ -21,6 +21,19 @@
#include "irq-gic-common.h"
+static const struct gic_kvm_info *gic_kvm_info;
+
+const struct gic_kvm_info *gic_get_kvm_info(void)
+{
+ return gic_kvm_info;
+}
+
+void gic_set_kvm_info(const struct gic_kvm_info *info)
+{
+ BUG_ON(gic_kvm_info != NULL);
+ gic_kvm_info = info;
+}
+
void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
void *data)
{
diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index fff697db8e22..205e5fddf6da 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -19,6 +19,7 @@
#include <linux/of.h>
#include <linux/irqdomain.h>
+#include <linux/irqchip/arm-gic-common.h>
struct gic_quirk {
const char *desc;
@@ -35,4 +36,6 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void));
void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
void *data);
+void gic_set_kvm_info(const struct gic_kvm_info *info);
+
#endif /* _IRQ_GIC_COMMON_H */
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 5b7d3c2129d8..05a856073714 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -15,6 +15,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#define pr_fmt(fmt) "GICv3: " fmt
+
#include <linux/acpi.h>
#include <linux/cpu.h>
#include <linux/cpu_pm.h>
@@ -28,6 +30,7 @@
#include <linux/slab.h>
#include <linux/irqchip.h>
+#include <linux/irqchip/arm-gic-common.h>
#include <linux/irqchip/arm-gic-v3.h>
#include <asm/cputype.h>
@@ -56,6 +59,8 @@ struct gic_chip_data {
static struct gic_chip_data gic_data __read_mostly;
static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE;
+static struct gic_kvm_info gic_v3_kvm_info;
+
#define gic_data_rdist() (this_cpu_ptr(gic_data.rdists.rdist))
#define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base)
#define gic_data_rdist_sgi_base() (gic_data_rdist_rd_base() + SZ_64K)
@@ -901,6 +906,30 @@ static int __init gic_validate_dist_version(void __iomem *dist_base)
return 0;
}
+static void __init gic_of_setup_kvm_info(struct device_node *node)
+{
+ int ret;
+ struct resource r;
+ u32 gicv_idx;
+
+ gic_v3_kvm_info.type = GIC_V3;
+
+ gic_v3_kvm_info.maint_irq = irq_of_parse_and_map(node, 0);
+ if (!gic_v3_kvm_info.maint_irq)
+ return;
+
+ if (of_property_read_u32(node, "#redistributor-regions",
+ &gicv_idx))
+ gicv_idx = 1;
+
+ gicv_idx += 3; /* Also skip GICD, GICC, GICH */
+ ret = of_address_to_resource(node, gicv_idx, &r);
+ if (!ret)
+ gic_v3_kvm_info.vcpu = r;
+
+ gic_set_kvm_info(&gic_v3_kvm_info);
+}
+
static int __init gic_of_init(struct device_node *node, struct device_node *parent)
{
void __iomem *dist_base;
@@ -952,8 +981,10 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
err = gic_init_bases(dist_base, rdist_regs, nr_redist_regions,
redist_stride, &node->fwnode);
- if (!err)
+ if (!err) {
+ gic_of_setup_kvm_info(node);
return 0;
+ }
out_unmap_rdist:
for (i = 0; i < nr_redist_regions; i++)
@@ -968,19 +999,25 @@ out_unmap_dist:
IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gic_of_init);
#ifdef CONFIG_ACPI
-static void __iomem *dist_base;
-static struct redist_region *redist_regs __initdata;
-static u32 nr_redist_regions __initdata;
-static bool single_redist;
+static struct
+{
+ void __iomem *dist_base;
+ struct redist_region *redist_regs;
+ u32 nr_redist_regions;
+ bool single_redist;
+ u32 maint_irq;
+ int maint_irq_mode;
+ phys_addr_t vcpu_base;
+} acpi_data __initdata;
static void __init
gic_acpi_register_redist(phys_addr_t phys_base, void __iomem *redist_base)
{
static int count = 0;
- redist_regs[count].phys_base = phys_base;
- redist_regs[count].redist_base = redist_base;
- redist_regs[count].single_redist = single_redist;
+ acpi_data.redist_regs[count].phys_base = phys_base;
+ acpi_data.redist_regs[count].redist_base = redist_base;
+ acpi_data.redist_regs[count].single_redist = acpi_data.single_redist;
count++;
}
@@ -1008,7 +1045,7 @@ gic_acpi_parse_madt_gicc(struct acpi_subtable_header *header,
{
struct acpi_madt_generic_interrupt *gicc =
(struct acpi_madt_generic_interrupt *)header;
- u32 reg = readl_relaxed(dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK;
+ u32 reg = readl_relaxed(acpi_data.dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK;
u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2;
void __iomem *redist_base;
@@ -1025,7 +1062,7 @@ static int __init gic_acpi_collect_gicr_base(void)
acpi_tbl_entry_handler redist_parser;
enum acpi_madt_type type;
- if (single_redist) {
+ if (acpi_data.single_redist) {
type = ACPI_MADT_TYPE_GENERIC_INTERRUPT;
redist_parser = gic_acpi_parse_madt_gicc;
} else {
@@ -1076,14 +1113,14 @@ static int __init gic_acpi_count_gicr_regions(void)
count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR,
gic_acpi_match_gicr, 0);
if (count > 0) {
- single_redist = false;
+ acpi_data.single_redist = false;
return count;
}
count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
gic_acpi_match_gicc, 0);
if (count > 0)
- single_redist = true;
+ acpi_data.single_redist = true;
return count;
}
@@ -1103,36 +1140,117 @@ static bool __init acpi_validate_gic_table(struct acpi_subtable_header *header,
if (count <= 0)
return false;
- nr_redist_regions = count;
+ acpi_data.nr_redist_regions = count;
return true;
}
+static int __init gic_acpi_parse_virt_madt_gicc(struct acpi_subtable_header *header,
+ const unsigned long end)
+{
+ struct acpi_madt_generic_interrupt *gicc =
+ (struct acpi_madt_generic_interrupt *)header;
+ int maint_irq_mode;
+ static int first_madt = true;
+
+ /* Skip unusable CPUs */
+ if (!(gicc->flags & ACPI_MADT_ENABLED))
+ return 0;
+
+ maint_irq_mode = (gicc->flags & ACPI_MADT_VGIC_IRQ_MODE) ?
+ ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE;
+
+ if (first_madt) {
+ first_madt = false;
+
+ acpi_data.maint_irq = gicc->vgic_interrupt;
+ acpi_data.maint_irq_mode = maint_irq_mode;
+ acpi_data.vcpu_base = gicc->gicv_base_address;
+
+ return 0;
+ }
+
+ /*
+ * The maintenance interrupt and GICV should be the same for every CPU
+ */
+ if ((acpi_data.maint_irq != gicc->vgic_interrupt) ||
+ (acpi_data.maint_irq_mode != maint_irq_mode) ||
+ (acpi_data.vcpu_base != gicc->gicv_base_address))
+ return -EINVAL;
+
+ return 0;
+}
+
+static bool __init gic_acpi_collect_virt_info(void)
+{
+ int count;
+
+ count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
+ gic_acpi_parse_virt_madt_gicc, 0);
+
+ return (count > 0);
+}
+
#define ACPI_GICV3_DIST_MEM_SIZE (SZ_64K)
+#define ACPI_GICV2_VCTRL_MEM_SIZE (SZ_4K)
+#define ACPI_GICV2_VCPU_MEM_SIZE (SZ_8K)
+
+static void __init gic_acpi_setup_kvm_info(void)
+{
+ int irq;
+
+ if (!gic_acpi_collect_virt_info()) {
+ pr_warn("Unable to get hardware information used for virtualization\n");
+ return;
+ }
+
+ gic_v3_kvm_info.type = GIC_V3;
+
+ irq = acpi_register_gsi(NULL, acpi_data.maint_irq,
+ acpi_data.maint_irq_mode,
+ ACPI_ACTIVE_HIGH);
+ if (irq <= 0)
+ return;
+
+ gic_v3_kvm_info.maint_irq = irq;
+
+ if (acpi_data.vcpu_base) {
+ struct resource *vcpu = &gic_v3_kvm_info.vcpu;
+
+ vcpu->flags = IORESOURCE_MEM;
+ vcpu->start = acpi_data.vcpu_base;
+ vcpu->end = vcpu->start + ACPI_GICV2_VCPU_MEM_SIZE - 1;
+ }
+
+ gic_set_kvm_info(&gic_v3_kvm_info);
+}
static int __init
gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end)
{
struct acpi_madt_generic_distributor *dist;
struct fwnode_handle *domain_handle;
+ size_t size;
int i, err;
/* Get distributor base address */
dist = (struct acpi_madt_generic_distributor *)header;
- dist_base = ioremap(dist->base_address, ACPI_GICV3_DIST_MEM_SIZE);
- if (!dist_base) {
+ acpi_data.dist_base = ioremap(dist->base_address,
+ ACPI_GICV3_DIST_MEM_SIZE);
+ if (!acpi_data.dist_base) {
pr_err("Unable to map GICD registers\n");
return -ENOMEM;
}
- err = gic_validate_dist_version(dist_base);
+ err = gic_validate_dist_version(acpi_data.dist_base);
if (err) {
- pr_err("No distributor detected at @%p, giving up", dist_base);
+ pr_err("No distributor detected at @%p, giving up",
+ acpi_data.dist_base);
goto out_dist_unmap;
}
- redist_regs = kzalloc(sizeof(*redist_regs) * nr_redist_regions,
- GFP_KERNEL);
- if (!redist_regs) {
+ size = sizeof(*acpi_data.redist_regs) * acpi_data.nr_redist_regions;
+ acpi_data.redist_regs = kzalloc(size, GFP_KERNEL);
+ if (!acpi_data.redist_regs) {
err = -ENOMEM;
goto out_dist_unmap;
}
@@ -1141,29 +1259,31 @@ gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end)
if (err)
goto out_redist_unmap;
- domain_handle = irq_domain_alloc_fwnode(dist_base);
+ domain_handle = irq_domain_alloc_fwnode(acpi_data.dist_base);
if (!domain_handle) {
err = -ENOMEM;
goto out_redist_unmap;
}
- err = gic_init_bases(dist_base, redist_regs, nr_redist_regions, 0,
- domain_handle);
+ err = gic_init_bases(acpi_data.dist_base, acpi_data.redist_regs,
+ acpi_data.nr_redist_regions, 0, domain_handle);
if (err)
goto out_fwhandle_free;
acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle);
+ gic_acpi_setup_kvm_info();
+
return 0;
out_fwhandle_free:
irq_domain_free_fwnode(domain_handle);
out_redist_unmap:
- for (i = 0; i < nr_redist_regions; i++)
- if (redist_regs[i].redist_base)
- iounmap(redist_regs[i].redist_base);
- kfree(redist_regs);
+ for (i = 0; i < acpi_data.nr_redist_regions; i++)
+ if (acpi_data.redist_regs[i].redist_base)
+ iounmap(acpi_data.redist_regs[i].redist_base);
+ kfree(acpi_data.redist_regs);
out_dist_unmap:
- iounmap(dist_base);
+ iounmap(acpi_data.dist_base);
return err;
}
IRQCHIP_ACPI_DECLARE(gic_v3, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 282344b95ec2..3f1d9fd3a462 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -102,6 +102,8 @@ static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE;
static struct gic_chip_data gic_data[CONFIG_ARM_GIC_MAX_NR] __read_mostly;
+static struct gic_kvm_info gic_v2_kvm_info;
+
#ifdef CONFIG_GIC_NON_BANKED
static void __iomem *gic_get_percpu_base(union gic_base *base)
{
@@ -1189,6 +1191,29 @@ static bool gic_check_eoimode(struct device_node *node, void __iomem **base)
return true;
}
+static void __init gic_of_setup_kvm_info(struct device_node *node)
+{
+ int ret;
+ struct resource *vctrl_res = &gic_v2_kvm_info.vctrl;
+ struct resource *vcpu_res = &gic_v2_kvm_info.vcpu;
+
+ gic_v2_kvm_info.type = GIC_V2;
+
+ gic_v2_kvm_info.maint_irq = irq_of_parse_and_map(node, 0);
+ if (!gic_v2_kvm_info.maint_irq)
+ return;
+
+ ret = of_address_to_resource(node, 2, vctrl_res);
+ if (ret)
+ return;
+
+ ret = of_address_to_resource(node, 3, vcpu_res);
+ if (ret)
+ return;
+
+ gic_set_kvm_info(&gic_v2_kvm_info);
+}
+
int __init
gic_of_init(struct device_node *node, struct device_node *parent)
{
@@ -1218,8 +1243,10 @@ gic_of_init(struct device_node *node, struct device_node *parent)
__gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset,
&node->fwnode);
- if (!gic_cnt)
+ if (!gic_cnt) {
gic_init_physaddr(node);
+ gic_of_setup_kvm_info(node);
+ }
if (parent) {
irq = irq_of_parse_and_map(node, 0);
@@ -1245,7 +1272,14 @@ IRQCHIP_DECLARE(pl390, "arm,pl390", gic_of_init);
#endif
#ifdef CONFIG_ACPI
-static phys_addr_t cpu_phy_base __initdata;
+static struct
+{
+ phys_addr_t cpu_phys_base;
+ u32 maint_irq;
+ int maint_irq_mode;
+ phys_addr_t vctrl_base;
+ phys_addr_t vcpu_base;
+} acpi_data __initdata;
static int __init
gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header,
@@ -1265,10 +1299,16 @@ gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header,
* All CPU interface addresses have to be the same.
*/
gic_cpu_base = processor->base_address;
- if (cpu_base_assigned && gic_cpu_base != cpu_phy_base)
+ if (cpu_base_assigned && gic_cpu_base != acpi_data.cpu_phys_base)
return -EINVAL;
- cpu_phy_base = gic_cpu_base;
+ acpi_data.cpu_phys_base = gic_cpu_base;
+ acpi_data.maint_irq = processor->vgic_interrupt;
+ acpi_data.maint_irq_mode = (processor->flags & ACPI_MADT_VGIC_IRQ_MODE) ?
+ ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE;
+ acpi_data.vctrl_base = processor->gich_base_address;
+ acpi_data.vcpu_base = processor->gicv_base_address;
+
cpu_base_assigned = 1;
return 0;
}
@@ -1299,6 +1339,41 @@ static bool __init gic_validate_dist(struct acpi_subtable_header *header,
#define ACPI_GICV2_DIST_MEM_SIZE (SZ_4K)
#define ACPI_GIC_CPU_IF_MEM_SIZE (SZ_8K)
+#define ACPI_GICV2_VCTRL_MEM_SIZE (SZ_4K)
+#define ACPI_GICV2_VCPU_MEM_SIZE (SZ_8K)
+
+static void __init gic_acpi_setup_kvm_info(void)
+{
+ int irq;
+ struct resource *vctrl_res = &gic_v2_kvm_info.vctrl;
+ struct resource *vcpu_res = &gic_v2_kvm_info.vcpu;
+
+ gic_v2_kvm_info.type = GIC_V2;
+
+ if (!acpi_data.vctrl_base)
+ return;
+
+ vctrl_res->flags = IORESOURCE_MEM;
+ vctrl_res->start = acpi_data.vctrl_base;
+ vctrl_res->end = vctrl_res->start + ACPI_GICV2_VCTRL_MEM_SIZE - 1;
+
+ if (!acpi_data.vcpu_base)
+ return;
+
+ vcpu_res->flags = IORESOURCE_MEM;
+ vcpu_res->start = acpi_data.vcpu_base;
+ vcpu_res->end = vcpu_res->start + ACPI_GICV2_VCPU_MEM_SIZE - 1;
+
+ irq = acpi_register_gsi(NULL, acpi_data.maint_irq,
+ acpi_data.maint_irq_mode,
+ ACPI_ACTIVE_HIGH);
+ if (irq <= 0)
+ return;
+
+ gic_v2_kvm_info.maint_irq = irq;
+
+ gic_set_kvm_info(&gic_v2_kvm_info);
+}
static int __init gic_v2_acpi_init(struct acpi_subtable_header *header,
const unsigned long end)
@@ -1316,7 +1391,7 @@ static int __init gic_v2_acpi_init(struct acpi_subtable_header *header,
return -EINVAL;
}
- cpu_base = ioremap(cpu_phy_base, ACPI_GIC_CPU_IF_MEM_SIZE);
+ cpu_base = ioremap(acpi_data.cpu_phys_base, ACPI_GIC_CPU_IF_MEM_SIZE);
if (!cpu_base) {
pr_err("Unable to map GICC registers\n");
return -ENOMEM;
@@ -1356,6 +1431,8 @@ static int __init gic_v2_acpi_init(struct acpi_subtable_header *header,
if (IS_ENABLED(CONFIG_ARM_GIC_V2M))
gicv2m_init(NULL, gic_data[0].domain);
+ gic_acpi_setup_kvm_info();
+
return 0;
}
IRQCHIP_ACPI_DECLARE(gic_v2, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index 6804354c42bd..0ac520dd1b21 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -49,7 +49,9 @@ struct read_info_sccb {
u8 _pad_117[119 - 117]; /* 117-118 */
u8 fac119; /* 119 */
u16 hcpua; /* 120-121 */
- u8 _pad_122[4096 - 122]; /* 122-4095 */
+ u8 _pad_122[124 - 122]; /* 122-123 */
+ u32 hmfai; /* 124-127 */
+ u8 _pad_128[4096 - 128]; /* 128-4095 */
} __packed __aligned(PAGE_SIZE);
static char sccb_early[PAGE_SIZE] __aligned(PAGE_SIZE) __initdata;
@@ -155,6 +157,8 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
sclp.mtid = (sccb->fac42 & 0x80) ? (sccb->fac42 & 31) : 0;
sclp.mtid_cp = (sccb->fac42 & 0x80) ? (sccb->fac43 & 31) : 0;
sclp.mtid_prev = (sccb->fac42 & 0x80) ? (sccb->fac66 & 31) : 0;
+
+ sclp.hmfai = sccb->hmfai;
}
/*
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index 25d0914481a2..caedb74c9210 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -49,11 +49,16 @@ enum arch_timer_reg {
#define ARCH_TIMER_EVT_STREAM_FREQ 10000 /* 100us */
+struct arch_timer_kvm_info {
+ struct timecounter timecounter;
+ int virtual_irq;
+};
+
#ifdef CONFIG_ARM_ARCH_TIMER
extern u32 arch_timer_get_rate(void);
extern u64 (*arch_timer_read_counter)(void);
-extern struct timecounter *arch_timer_get_timecounter(void);
+extern struct arch_timer_kvm_info *arch_timer_get_kvm_info(void);
#else
@@ -67,11 +72,6 @@ static inline u64 arch_timer_read_counter(void)
return 0;
}
-static inline struct timecounter *arch_timer_get_timecounter(void)
-{
- return NULL;
-}
-
#endif
#endif
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 281caf847fad..be6037aa703d 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -25,6 +25,7 @@
#include <linux/spinlock.h>
#include <linux/types.h>
#include <kvm/iodev.h>
+#include <linux/irqchip/arm-gic-common.h>
#define VGIC_NR_IRQS_LEGACY 256
#define VGIC_NR_SGIS 16
@@ -353,15 +354,15 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus))
#define vgic_ready(k) ((k)->arch.vgic.ready)
-int vgic_v2_probe(struct device_node *vgic_node,
+int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info,
const struct vgic_ops **ops,
const struct vgic_params **params);
#ifdef CONFIG_KVM_ARM_VGIC_V3
-int vgic_v3_probe(struct device_node *vgic_node,
+int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info,
const struct vgic_ops **ops,
const struct vgic_params **params);
#else
-static inline int vgic_v3_probe(struct device_node *vgic_node,
+static inline int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info,
const struct vgic_ops **ops,
const struct vgic_params **params)
{
diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h
index 1551b5b2f4c2..f0f5d2671509 100644
--- a/include/linux/irqbypass.h
+++ b/include/linux/irqbypass.h
@@ -34,7 +34,7 @@ struct irq_bypass_consumer;
/**
* struct irq_bypass_producer - IRQ bypass producer definition
* @node: IRQ bypass manager private list management
- * @token: opaque token to match between producer and consumer
+ * @token: opaque token to match between producer and consumer (non-NULL)
* @irq: Linux IRQ number for the producer device
* @add_consumer: Connect the IRQ producer to an IRQ consumer (optional)
* @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional)
@@ -60,7 +60,7 @@ struct irq_bypass_producer {
/**
* struct irq_bypass_consumer - IRQ bypass consumer definition
* @node: IRQ bypass manager private list management
- * @token: opaque token to match between producer and consumer
+ * @token: opaque token to match between producer and consumer (non-NULL)
* @add_producer: Connect the IRQ consumer to an IRQ producer
* @del_producer: Disconnect the IRQ consumer from an IRQ producer
* @stop: Perform any quiesce operations necessary prior to add/del (optional)
diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h
new file mode 100644
index 000000000000..c647b0547bcd
--- /dev/null
+++ b/include/linux/irqchip/arm-gic-common.h
@@ -0,0 +1,34 @@
+/*
+ * include/linux/irqchip/arm-gic-common.h
+ *
+ * Copyright (C) 2016 ARM Limited, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __LINUX_IRQCHIP_ARM_GIC_COMMON_H
+#define __LINUX_IRQCHIP_ARM_GIC_COMMON_H
+
+#include <linux/types.h>
+#include <linux/ioport.h>
+
+enum gic_type {
+ GIC_V2,
+ GIC_V3,
+};
+
+struct gic_kvm_info {
+ /* GIC type */
+ enum gic_type type;
+ /* Virtual CPU interface */
+ struct resource vcpu;
+ /* Interrupt number */
+ unsigned int maint_irq;
+ /* Virtual control interface */
+ struct resource vctrl;
+};
+
+const struct gic_kvm_info *gic_get_kvm_info(void);
+
+#endif /* __LINUX_IRQCHIP_ARM_GIC_COMMON_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5276fe0916fc..92a0229044fb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -35,6 +35,10 @@
#include <asm/kvm_host.h>
+#ifndef KVM_MAX_VCPU_ID
+#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
+#endif
+
/*
* The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
* in kvm, other bits are visible for userspace which are defined in
@@ -447,12 +451,13 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
{
- struct kvm_vcpu *vcpu;
+ struct kvm_vcpu *vcpu = NULL;
int i;
- if (id < 0 || id >= KVM_MAX_VCPUS)
+ if (id < 0)
return NULL;
- vcpu = kvm_get_vcpu(kvm, id);
+ if (id < KVM_MAX_VCPUS)
+ vcpu = kvm_get_vcpu(kvm, id);
if (vcpu && vcpu->vcpu_id == id)
return vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
@@ -1091,6 +1096,11 @@ static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; }
static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
{
+ /*
+ * Ensure the rest of the request is published to kvm_check_request's
+ * caller. Paired with the smp_mb__after_atomic in kvm_check_request.
+ */
+ smp_wmb();
set_bit(req, &vcpu->requests);
}
@@ -1098,6 +1108,12 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
{
if (test_bit(req, &vcpu->requests)) {
clear_bit(req, &vcpu->requests);
+
+ /*
+ * Ensure the rest of the request is visible to kvm_check_request's
+ * caller. Paired with the smp_wmb in kvm_make_request.
+ */
+ smp_mb__after_atomic();
return true;
} else {
return false;
@@ -1169,6 +1185,7 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
#endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+bool kvm_arch_has_irq_bypass(void);
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *,
struct irq_bypass_producer *);
void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *,
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a7f1f8032ec1..05ebf475104c 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -865,6 +865,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_SPAPR_TCE_64 125
#define KVM_CAP_ARM_PMU_V3 126
#define KVM_CAP_VCPU_ATTRIBUTES 127
+#define KVM_CAP_MAX_VCPU_ID 128
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 9aaa35dd9144..409db3304471 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -17,7 +17,6 @@
*/
#include <linux/cpu.h>
-#include <linux/of_irq.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
@@ -438,45 +437,29 @@ static struct notifier_block kvm_timer_cpu_nb = {
.notifier_call = kvm_timer_cpu_notify,
};
-static const struct of_device_id arch_timer_of_match[] = {
- { .compatible = "arm,armv7-timer", },
- { .compatible = "arm,armv8-timer", },
- {},
-};
-
int kvm_timer_hyp_init(void)
{
- struct device_node *np;
- unsigned int ppi;
+ struct arch_timer_kvm_info *info;
int err;
- timecounter = arch_timer_get_timecounter();
- if (!timecounter)
- return -ENODEV;
+ info = arch_timer_get_kvm_info();
+ timecounter = &info->timecounter;
- np = of_find_matching_node(NULL, arch_timer_of_match);
- if (!np) {
- kvm_err("kvm_arch_timer: can't find DT node\n");
+ if (info->virtual_irq <= 0) {
+ kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
+ info->virtual_irq);
return -ENODEV;
}
+ host_vtimer_irq = info->virtual_irq;
- ppi = irq_of_parse_and_map(np, 2);
- if (!ppi) {
- kvm_err("kvm_arch_timer: no virtual timer interrupt\n");
- err = -EINVAL;
- goto out;
- }
-
- err = request_percpu_irq(ppi, kvm_arch_timer_handler,
+ err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
"kvm guest timer", kvm_get_running_vcpus());
if (err) {
kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
- ppi, err);
+ host_vtimer_irq, err);
goto out;
}
- host_vtimer_irq = ppi;
-
err = __register_cpu_notifier(&kvm_timer_cpu_nb);
if (err) {
kvm_err("Cannot register timer CPU notifier\n");
@@ -489,14 +472,13 @@ int kvm_timer_hyp_init(void)
goto out_free;
}
- kvm_info("%s IRQ%d\n", np->name, ppi);
+ kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
on_each_cpu(kvm_timer_init_interrupt, NULL, 1);
goto out;
out_free:
- free_percpu_irq(ppi, kvm_get_running_vcpus());
+ free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus());
out:
- of_node_put(np);
return err;
}
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 67ec334ce1d0..7e826c9b2b0a 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -20,9 +20,6 @@
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
#include <linux/irqchip/arm-gic.h>
@@ -186,38 +183,39 @@ static void vgic_cpu_init_lrs(void *params)
}
/**
- * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
- * @node: pointer to the DT node
- * @ops: address of a pointer to the GICv2 operations
- * @params: address of a pointer to HW-specific parameters
+ * vgic_v2_probe - probe for a GICv2 compatible interrupt controller
+ * @gic_kvm_info: pointer to the GIC description
+ * @ops: address of a pointer to the GICv2 operations
+ * @params: address of a pointer to HW-specific parameters
*
* Returns 0 if a GICv2 has been found, with the low level operations
* in *ops and the HW parameters in *params. Returns an error code
* otherwise.
*/
-int vgic_v2_probe(struct device_node *vgic_node,
- const struct vgic_ops **ops,
- const struct vgic_params **params)
+int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info,
+ const struct vgic_ops **ops,
+ const struct vgic_params **params)
{
int ret;
- struct resource vctrl_res;
- struct resource vcpu_res;
struct vgic_params *vgic = &vgic_v2_params;
+ const struct resource *vctrl_res = &gic_kvm_info->vctrl;
+ const struct resource *vcpu_res = &gic_kvm_info->vcpu;
- vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
- if (!vgic->maint_irq) {
- kvm_err("error getting vgic maintenance irq from DT\n");
+ if (!gic_kvm_info->maint_irq) {
+ kvm_err("error getting vgic maintenance irq\n");
ret = -ENXIO;
goto out;
}
+ vgic->maint_irq = gic_kvm_info->maint_irq;
- ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
- if (ret) {
- kvm_err("Cannot obtain GICH resource\n");
+ if (!gic_kvm_info->vctrl.start) {
+ kvm_err("GICH not present in the firmware table\n");
+ ret = -ENXIO;
goto out;
}
- vgic->vctrl_base = of_iomap(vgic_node, 2);
+ vgic->vctrl_base = ioremap(gic_kvm_info->vctrl.start,
+ resource_size(&gic_kvm_info->vctrl));
if (!vgic->vctrl_base) {
kvm_err("Cannot ioremap GICH\n");
ret = -ENOMEM;
@@ -228,29 +226,23 @@ int vgic_v2_probe(struct device_node *vgic_node,
vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1;
ret = create_hyp_io_mappings(vgic->vctrl_base,
- vgic->vctrl_base + resource_size(&vctrl_res),
- vctrl_res.start);
+ vgic->vctrl_base + resource_size(vctrl_res),
+ vctrl_res->start);
if (ret) {
kvm_err("Cannot map VCTRL into hyp\n");
goto out_unmap;
}
- if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
- kvm_err("Cannot obtain GICV resource\n");
- ret = -ENXIO;
- goto out_unmap;
- }
-
- if (!PAGE_ALIGNED(vcpu_res.start)) {
+ if (!PAGE_ALIGNED(vcpu_res->start)) {
kvm_err("GICV physical address 0x%llx not page aligned\n",
- (unsigned long long)vcpu_res.start);
+ (unsigned long long)vcpu_res->start);
ret = -ENXIO;
goto out_unmap;
}
- if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
+ if (!PAGE_ALIGNED(resource_size(vcpu_res))) {
kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
- (unsigned long long)resource_size(&vcpu_res),
+ (unsigned long long)resource_size(vcpu_res),
PAGE_SIZE);
ret = -ENXIO;
goto out_unmap;
@@ -259,10 +251,10 @@ int vgic_v2_probe(struct device_node *vgic_node,
vgic->can_emulate_gicv2 = true;
kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2);
- vgic->vcpu_base = vcpu_res.start;
+ vgic->vcpu_base = vcpu_res->start;
- kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
- vctrl_res.start, vgic->maint_irq);
+ kvm_info("GICH base=0x%llx, GICV base=0x%llx, IRQ=%d\n",
+ gic_kvm_info->vctrl.start, vgic->vcpu_base, vgic->maint_irq);
vgic->type = VGIC_V2;
vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS;
@@ -276,6 +268,5 @@ int vgic_v2_probe(struct device_node *vgic_node,
out_unmap:
iounmap(vgic->vctrl_base);
out:
- of_node_put(vgic_node);
return ret;
}
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 999bdc6d9d9f..c02a1b1cf855 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -20,11 +20,9 @@
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
#include <linux/irqchip/arm-gic-v3.h>
+#include <linux/irqchip/arm-gic-common.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_arm.h>
@@ -222,30 +220,24 @@ static void vgic_cpu_init_lrs(void *params)
}
/**
- * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
- * @node: pointer to the DT node
- * @ops: address of a pointer to the GICv3 operations
- * @params: address of a pointer to HW-specific parameters
+ * vgic_v3_probe - probe for a GICv3 compatible interrupt controller
+ * @gic_kvm_info: pointer to the GIC description
+ * @ops: address of a pointer to the GICv3 operations
+ * @params: address of a pointer to HW-specific parameters
*
* Returns 0 if a GICv3 has been found, with the low level operations
* in *ops and the HW parameters in *params. Returns an error code
* otherwise.
*/
-int vgic_v3_probe(struct device_node *vgic_node,
+int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info,
const struct vgic_ops **ops,
const struct vgic_params **params)
{
int ret = 0;
- u32 gicv_idx;
- struct resource vcpu_res;
struct vgic_params *vgic = &vgic_v3_params;
+ const struct resource *vcpu_res = &gic_kvm_info->vcpu;
- vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
- if (!vgic->maint_irq) {
- kvm_err("error getting vgic maintenance irq from DT\n");
- ret = -ENXIO;
- goto out;
- }
+ vgic->maint_irq = gic_kvm_info->maint_irq;
ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
@@ -256,24 +248,19 @@ int vgic_v3_probe(struct device_node *vgic_node,
vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1;
vgic->can_emulate_gicv2 = false;
- if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx))
- gicv_idx = 1;
-
- gicv_idx += 3; /* Also skip GICD, GICC, GICH */
- if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) {
+ if (!vcpu_res->start) {
kvm_info("GICv3: no GICV resource entry\n");
vgic->vcpu_base = 0;
- } else if (!PAGE_ALIGNED(vcpu_res.start)) {
+ } else if (!PAGE_ALIGNED(vcpu_res->start)) {
pr_warn("GICV physical address 0x%llx not page aligned\n",
- (unsigned long long)vcpu_res.start);
+ (unsigned long long)vcpu_res->start);
vgic->vcpu_base = 0;
- } else if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
+ } else if (!PAGE_ALIGNED(resource_size(vcpu_res))) {
pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n",
- (unsigned long long)resource_size(&vcpu_res),
+ (unsigned long long)resource_size(vcpu_res),
PAGE_SIZE);
- vgic->vcpu_base = 0;
} else {
- vgic->vcpu_base = vcpu_res.start;
+ vgic->vcpu_base = vcpu_res->start;
vgic->can_emulate_gicv2 = true;
kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
KVM_DEV_TYPE_ARM_VGIC_V2);
@@ -286,15 +273,13 @@ int vgic_v3_probe(struct device_node *vgic_node,
vgic->type = VGIC_V3;
vgic->max_gic_vcpus = VGIC_V3_MAX_CPUS;
- kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
- vcpu_res.start, vgic->maint_irq);
+ kvm_info("GICV base=0x%llx, IRQ=%d\n",
+ vgic->vcpu_base, vgic->maint_irq);
on_each_cpu(vgic_cpu_init_lrs, vgic, 1);
*ops = &vgic_v3_ops;
*params = vgic;
-out:
- of_node_put(vgic_node);
return ret;
}
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 00429b392c61..60668a7f319a 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -21,9 +21,7 @@
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
+#include <linux/irq.h>
#include <linux/rculist.h>
#include <linux/uaccess.h>
@@ -33,6 +31,7 @@
#include <trace/events/kvm.h>
#include <asm/kvm.h>
#include <kvm/iodev.h>
+#include <linux/irqchip/arm-gic-common.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -2389,33 +2388,38 @@ static struct notifier_block vgic_cpu_nb = {
.notifier_call = vgic_cpu_notify,
};
-static const struct of_device_id vgic_ids[] = {
- { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
- { .compatible = "arm,cortex-a7-gic", .data = vgic_v2_probe, },
- { .compatible = "arm,gic-400", .data = vgic_v2_probe, },
- { .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
- {},
-};
-
-int kvm_vgic_hyp_init(void)
+static int kvm_vgic_probe(void)
{
- const struct of_device_id *matched_id;
- const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
- const struct vgic_params **);
- struct device_node *vgic_node;
+ const struct gic_kvm_info *gic_kvm_info;
int ret;
- vgic_node = of_find_matching_node_and_match(NULL,
- vgic_ids, &matched_id);
- if (!vgic_node) {
- kvm_err("error: no compatible GIC node found\n");
+ gic_kvm_info = gic_get_kvm_info();
+ if (!gic_kvm_info)
return -ENODEV;
+
+ switch (gic_kvm_info->type) {
+ case GIC_V2:
+ ret = vgic_v2_probe(gic_kvm_info, &vgic_ops, &vgic);
+ break;
+ case GIC_V3:
+ ret = vgic_v3_probe(gic_kvm_info, &vgic_ops, &vgic);
+ break;
+ default:
+ ret = -ENODEV;
}
- vgic_probe = matched_id->data;
- ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
- if (ret)
+ return ret;
+}
+
+int kvm_vgic_hyp_init(void)
+{
+ int ret;
+
+ ret = kvm_vgic_probe();
+ if (ret) {
+ kvm_err("error: KVM vGIC probing failed\n");
return ret;
+ }
ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
"vgic", kvm_get_running_vcpus());
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 46dbc0a7dfc1..e469b6012471 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -408,15 +408,17 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
*/
fdput(f);
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
- irqfd->consumer.token = (void *)irqfd->eventfd;
- irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
- irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
- irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
- irqfd->consumer.start = kvm_arch_irq_bypass_start;
- ret = irq_bypass_register_consumer(&irqfd->consumer);
- if (ret)
- pr_info("irq bypass consumer (token %p) registration fails: %d\n",
+ if (kvm_arch_has_irq_bypass()) {
+ irqfd->consumer.token = (void *)irqfd->eventfd;
+ irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
+ irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
+ irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
+ irqfd->consumer.start = kvm_arch_irq_bypass_start;
+ ret = irq_bypass_register_consumer(&irqfd->consumer);
+ if (ret)
+ pr_info("irq bypass consumer (token %p) registration fails: %d\n",
irqfd->consumer.token, ret);
+ }
#endif
return 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4fd482fb9260..ed3d9bb18a56 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2272,7 +2272,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
int r;
struct kvm_vcpu *vcpu;
- if (id >= KVM_MAX_VCPUS)
+ if (id >= KVM_MAX_VCPU_ID)
return -EINVAL;
vcpu = kvm_arch_vcpu_create(kvm, id);
@@ -2746,6 +2746,8 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_MULTI_ADDRESS_SPACE:
return KVM_ADDRESS_SPACE_NUM;
#endif
+ case KVM_CAP_MAX_VCPU_ID:
+ return KVM_MAX_VCPU_ID;
default:
break;
}
diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c
index 09a03b5a21ff..52abac4bb6a2 100644
--- a/virt/lib/irqbypass.c
+++ b/virt/lib/irqbypass.c
@@ -89,6 +89,9 @@ int irq_bypass_register_producer(struct irq_bypass_producer *producer)
struct irq_bypass_producer *tmp;
struct irq_bypass_consumer *consumer;
+ if (!producer->token)
+ return -EINVAL;
+
might_sleep();
if (!try_module_get(THIS_MODULE))
@@ -136,6 +139,9 @@ void irq_bypass_unregister_producer(struct irq_bypass_producer *producer)
struct irq_bypass_producer *tmp;
struct irq_bypass_consumer *consumer;
+ if (!producer->token)
+ return;
+
might_sleep();
if (!try_module_get(THIS_MODULE))
@@ -177,7 +183,8 @@ int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer)
struct irq_bypass_consumer *tmp;
struct irq_bypass_producer *producer;
- if (!consumer->add_producer || !consumer->del_producer)
+ if (!consumer->token ||
+ !consumer->add_producer || !consumer->del_producer)
return -EINVAL;
might_sleep();
@@ -227,6 +234,9 @@ void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer)
struct irq_bypass_consumer *tmp;
struct irq_bypass_producer *producer;
+ if (!consumer->token)
+ return;
+
might_sleep();
if (!try_module_get(THIS_MODULE))