summaryrefslogtreecommitdiff
path: root/arch/arm/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/kvm')
-rw-r--r--arch/arm/kvm/Kconfig33
-rw-r--r--arch/arm/kvm/Makefile10
-rw-r--r--arch/arm/kvm/arm.c269
-rw-r--r--arch/arm/kvm/coproc.c160
-rw-r--r--arch/arm/kvm/coproc.h6
-rw-r--r--arch/arm/kvm/coproc_a15.c2
-rw-r--r--arch/arm/kvm/coproc_a7.c2
-rw-r--r--arch/arm/kvm/guest.c64
-rw-r--r--arch/arm/kvm/handle_exit.c10
-rw-r--r--arch/arm/kvm/init.S11
-rw-r--r--arch/arm/kvm/interrupts.S20
-rw-r--r--arch/arm/kvm/interrupts_head.S63
-rw-r--r--arch/arm/kvm/mmio.c67
-rw-r--r--arch/arm/kvm/mmu.c1206
-rw-r--r--arch/arm/kvm/psci.c35
-rw-r--r--arch/arm/kvm/trace.h108
16 files changed, 1534 insertions, 532 deletions
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 4be5bb150bdd..f1f79d104309 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -18,15 +18,21 @@ if VIRTUALIZATION
config KVM
bool "Kernel-based Virtual Machine (KVM) support"
+ depends on MMU && OF
select PREEMPT_NOTIFIERS
select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
+ select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_MMIO
select KVM_ARM_HOST
- depends on ARM_VIRT_EXT && ARM_LPAE && !CPU_BIG_ENDIAN
+ select KVM_GENERIC_DIRTYLOG_READ_PROTECT
+ select SRCU
+ select MMU_NOTIFIER
+ select HAVE_KVM_EVENTFD
+ select HAVE_KVM_IRQFD
+ depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
---help---
- Support hosting virtualized guest machines. You will also
- need to select one or more of the processor modules below.
+ Support hosting virtualized guest machines.
This module provides access to the hardware capabilities through
a character device node named /dev/kvm.
@@ -34,10 +40,7 @@ config KVM
If unsure, say N.
config KVM_ARM_HOST
- bool "KVM host support for ARM cpus."
- depends on KVM
- depends on MMU
- select MMU_NOTIFIER
+ bool
---help---
Provides host support for ARM processors.
@@ -52,20 +55,4 @@ config KVM_ARM_MAX_VCPUS
large, so only choose a reasonable number that you expect to
actually use.
-config KVM_ARM_VGIC
- bool "KVM support for Virtual GIC"
- depends on KVM_ARM_HOST && OF
- select HAVE_KVM_IRQCHIP
- default y
- ---help---
- Adds support for a hardware assisted, in-kernel GIC emulation.
-
-config KVM_ARM_TIMER
- bool "KVM support for Architected Timers"
- depends on KVM_ARM_VGIC && ARM_ARCH_TIMER
- select HAVE_KVM_IRQCHIP
- default y
- ---help---
- Adds support for the Architected Timers in virtual machines
-
endif # VIRTUALIZATION
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 789bca9e64a7..139e46c08b6e 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -7,7 +7,7 @@ ifeq ($(plus_virt),+virt)
plus_virt_def := -DREQUIRES_VIRT=1
endif
-ccflags-y += -Ivirt/kvm -Iarch/arm/kvm
+ccflags-y += -Iarch/arm/kvm
CFLAGS_arm.o := -I. $(plus_virt_def)
CFLAGS_mmu.o := -I.
@@ -15,10 +15,12 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
KVM := ../../../virt/kvm
-kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o
obj-y += kvm-arm.o init.o interrupts.o
obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
-obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
-obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
+obj-y += $(KVM)/arm/vgic.o
+obj-y += $(KVM)/arm/vgic-v2.o
+obj-y += $(KVM)/arm/vgic-v2-emul.o
+obj-y += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 3c82b37c0f9e..6f536451ab78 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -61,8 +61,6 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
static u8 kvm_next_vmid;
static DEFINE_SPINLOCK(kvm_vmid_lock);
-static bool vgic_present;
-
static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
{
BUG_ON(preemptible());
@@ -82,12 +80,12 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
/**
* kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
*/
-struct kvm_vcpu __percpu **kvm_get_running_vcpus(void)
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
{
return &kvm_arm_running_vcpu;
}
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
{
return 0;
}
@@ -97,27 +95,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
}
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
int kvm_arch_hardware_setup(void)
{
return 0;
}
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
void kvm_arch_check_processor_compat(void *rtn)
{
*(int *)rtn = 0;
}
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
/**
* kvm_arch_init_vm - initializes a VM data structure
@@ -143,6 +130,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
/* Mark the initial VMID generation invalid */
kvm->arch.vmid_gen = 0;
+ /* The maximum number of VCPUs is limited by the host's GIC model */
+ kvm->arch.max_vcpus = kvm_vgic_get_max_vcpus();
+
return ret;
out_free_stage2_pgd:
kvm_free_stage2_pgd(kvm);
@@ -155,16 +145,6 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
-{
-}
-
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
- unsigned long npages)
-{
- return 0;
-}
/**
* kvm_arch_destroy_vm - destroy the VM data structure
@@ -182,15 +162,17 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm->vcpus[i] = NULL;
}
}
+
+ kvm_vgic_destroy(kvm);
}
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
switch (ext) {
case KVM_CAP_IRQCHIP:
- r = vgic_present;
- break;
+ case KVM_CAP_IRQFD:
+ case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_USER_MEMORY:
case KVM_CAP_SYNC_MMU:
@@ -198,6 +180,8 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_ONE_REG:
case KVM_CAP_ARM_PSCI:
case KVM_CAP_ARM_PSCI_0_2:
+ case KVM_CAP_READONLY_MEM:
+ case KVM_CAP_MP_STATE:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -225,39 +209,22 @@ long kvm_arch_dev_ioctl(struct file *filp,
return -EINVAL;
}
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
-int kvm_arch_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem,
- enum kvm_mr_change change)
-{
- return 0;
-}
-
-void kvm_arch_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
- enum kvm_mr_change change)
-{
-}
-
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
- struct kvm_memory_slot *slot)
-{
-}
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
{
int err;
struct kvm_vcpu *vcpu;
+ if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ if (id >= kvm->arch.max_vcpus) {
+ err = -EINVAL;
+ goto out;
+ }
+
vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
if (!vcpu) {
err = -ENOMEM;
@@ -281,15 +248,15 @@ out:
return ERR_PTR(err);
}
-int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
- return 0;
}
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
kvm_mmu_free_memory_caches(vcpu);
kvm_timer_vcpu_terminate(vcpu);
+ kvm_vgic_vcpu_destroy(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
}
@@ -300,20 +267,14 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- return 0;
+ return kvm_timer_should_fire(vcpu);
}
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
- int ret;
-
/* Force users to call KVM_ARM_VCPU_INIT */
vcpu->arch.target = -1;
-
- /* Set up VGIC */
- ret = kvm_vgic_vcpu_init(vcpu);
- if (ret)
- return ret;
+ bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
/* Set up the timer */
kvm_timer_vcpu_init(vcpu);
@@ -321,24 +282,11 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
return 0;
}
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
vcpu->cpu = cpu;
vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
- /*
- * Check whether this vcpu requires the cache to be flushed on
- * this physical CPU. This is a consequence of doing dcache
- * operations by set/way on this vcpu. We do it here to be in
- * a non-preemptible section.
- */
- if (cpumask_test_and_clear_cpu(cpu, &vcpu->arch.require_dcache_flush))
- flush_cache_all(); /* We'd really want v7_flush_dcache_all() */
-
kvm_arm_set_running_vcpu(vcpu);
}
@@ -364,13 +312,29 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- return -EINVAL;
+ if (vcpu->arch.pause)
+ mp_state->mp_state = KVM_MP_STATE_STOPPED;
+ else
+ mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+
+ return 0;
}
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- return -EINVAL;
+ switch (mp_state->mp_state) {
+ case KVM_MP_STATE_RUNNABLE:
+ vcpu->arch.pause = false;
+ break;
+ case KVM_MP_STATE_STOPPED:
+ vcpu->arch.pause = true;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
}
/**
@@ -464,16 +428,17 @@ static void update_vttbr(struct kvm *kvm)
kvm_next_vmid++;
/* update vttbr to be used with the new vmid */
- pgd_phys = virt_to_phys(kvm->arch.pgd);
+ pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm));
+ BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
- kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK;
- kvm->arch.vttbr |= vmid;
+ kvm->arch.vttbr = pgd_phys | vmid;
spin_unlock(&kvm_vmid_lock);
}
static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
{
+ struct kvm *kvm = vcpu->kvm;
int ret;
if (likely(vcpu->arch.has_run_once))
@@ -482,18 +447,31 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
vcpu->arch.has_run_once = true;
/*
- * Initialize the VGIC before running a vcpu the first time on
- * this VM.
+ * Map the VGIC hardware resources before running a vcpu the first
+ * time on this VM.
*/
- if (unlikely(!vgic_initialized(vcpu->kvm))) {
- ret = kvm_vgic_init(vcpu->kvm);
+ if (unlikely(!vgic_ready(kvm))) {
+ ret = kvm_vgic_map_resources(kvm);
if (ret)
return ret;
}
+ /*
+ * Enable the arch timers only if we have an in-kernel VGIC
+ * and it has been properly initialized, since we cannot handle
+ * interrupts from the virtual timer with a userspace gic.
+ */
+ if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
+ kvm_timer_enable(kvm);
+
return 0;
}
+bool kvm_arch_intc_initialized(struct kvm *kvm)
+{
+ return vgic_initialized(kvm);
+}
+
static void vcpu_pause(struct kvm_vcpu *vcpu)
{
wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
@@ -581,9 +559,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
vcpu->mode = OUTSIDE_GUEST_MODE;
- vcpu->arch.last_pcpu = smp_processor_id();
kvm_guest_exit();
- trace_kvm_exit(*vcpu_pc(vcpu));
+ trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
/*
* We may have taken a host interrupt in HYP mode (ie
* while executing the guest). This interrupt is still
@@ -704,6 +681,48 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
return -EINVAL;
}
+static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+ const struct kvm_vcpu_init *init)
+{
+ unsigned int i;
+ int phys_target = kvm_target_cpu();
+
+ if (init->target != phys_target)
+ return -EINVAL;
+
+ /*
+ * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+ * use the same target.
+ */
+ if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
+ return -EINVAL;
+
+ /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
+ for (i = 0; i < sizeof(init->features) * 8; i++) {
+ bool set = (init->features[i / 32] & (1 << (i % 32)));
+
+ if (set && i >= KVM_VCPU_MAX_FEATURES)
+ return -ENOENT;
+
+ /*
+ * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+ * use the same feature set.
+ */
+ if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
+ test_bit(i, vcpu->arch.features) != set)
+ return -EINVAL;
+
+ if (set)
+ set_bit(i, vcpu->arch.features);
+ }
+
+ vcpu->arch.target = phys_target;
+
+ /* Now we know what it is, we can reset it. */
+ return kvm_reset_vcpu(vcpu);
+}
+
+
static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
struct kvm_vcpu_init *init)
{
@@ -714,10 +733,21 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
return ret;
/*
+ * Ensure a rebooted VM will fault in RAM pages and detect if the
+ * guest MMU is turned off and flush the caches as needed.
+ */
+ if (vcpu->arch.has_run_once)
+ stage2_unmap_vm(vcpu->kvm);
+
+ vcpu_reset_hcr(vcpu);
+
+ /*
* Handle the "start in power-off" case by marking the VCPU as paused.
*/
- if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+ if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
vcpu->arch.pause = true;
+ else
+ vcpu->arch.pause = false;
return 0;
}
@@ -774,9 +804,39 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
}
+/**
+ * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
+ * @kvm: kvm instance
+ * @log: slot id and address to which we copy the log
+ *
+ * Steps 1-4 below provide general overview of dirty page logging. See
+ * kvm_get_dirty_log_protect() function description for additional details.
+ *
+ * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
+ * always flush the TLB (step 4) even if previous step failed and the dirty
+ * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
+ * does not preclude user space subsequent dirty log read. Flushing TLB ensures
+ * writes will be marked dirty for next log read.
+ *
+ * 1. Take a snapshot of the bit and clear it if needed.
+ * 2. Write protect the corresponding page.
+ * 3. Copy the snapshot to the userspace.
+ * 4. Flush TLB's if needed.
+ */
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
{
- return -EINVAL;
+ bool is_dirty = false;
+ int r;
+
+ mutex_lock(&kvm->slots_lock);
+
+ r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
+
+ if (is_dirty)
+ kvm_flush_remote_tlbs(kvm);
+
+ mutex_unlock(&kvm->slots_lock);
+ return r;
}
static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
@@ -791,8 +851,6 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
switch (dev_id) {
case KVM_ARM_DEVICE_VGIC_V2:
- if (!vgic_present)
- return -ENXIO;
return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
default:
return -ENODEV;
@@ -807,10 +865,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
switch (ioctl) {
case KVM_CREATE_IRQCHIP: {
- if (vgic_present)
- return kvm_vgic_create(kvm);
- else
- return -ENXIO;
+ return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
}
case KVM_ARM_SET_DEVICE_ADDR: {
struct kvm_arm_device_addr dev_addr;
@@ -863,7 +918,8 @@ static int hyp_init_cpu_notify(struct notifier_block *self,
switch (action) {
case CPU_STARTING:
case CPU_STARTING_FROZEN:
- cpu_init_hyp_mode(NULL);
+ if (__hyp_get_vectors() == hyp_default_vectors)
+ cpu_init_hyp_mode(NULL);
break;
}
@@ -994,10 +1050,6 @@ static int init_hyp_mode(void)
if (err)
goto out_free_context;
-#ifdef CONFIG_KVM_ARM_VGIC
- vgic_present = true;
-#endif
-
/*
* Init HYP architected timer support
*/
@@ -1031,6 +1083,19 @@ static void check_kvm_target_cpu(void *ret)
*(int *)ret = kvm_target_cpu();
}
+struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
+{
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ mpidr &= MPIDR_HWID_BITMASK;
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
+ return vcpu;
+ }
+ return NULL;
+}
+
/**
* Initialize Hyp-mode and memory mappings on all CPUs.
*/
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index c58a35116f63..f3d88dc388bc 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -44,6 +44,31 @@ static u32 cache_levels;
/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
#define CSSELR_MAX 12
+/*
+ * kvm_vcpu_arch.cp15 holds cp15 registers as an array of u32, but some
+ * of cp15 registers can be viewed either as couple of two u32 registers
+ * or one u64 register. Current u64 register encoding is that least
+ * significant u32 word is followed by most significant u32 word.
+ */
+static inline void vcpu_cp15_reg64_set(struct kvm_vcpu *vcpu,
+ const struct coproc_reg *r,
+ u64 val)
+{
+ vcpu->arch.cp15[r->reg] = val & 0xffffffff;
+ vcpu->arch.cp15[r->reg + 1] = val >> 32;
+}
+
+static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu,
+ const struct coproc_reg *r)
+{
+ u64 val;
+
+ val = vcpu->arch.cp15[r->reg + 1];
+ val = val << 32;
+ val = val | vcpu->arch.cp15[r->reg];
+ return val;
+}
+
int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
kvm_inject_undefined(vcpu);
@@ -164,82 +189,40 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu,
return true;
}
-/* See note at ARM ARM B1.14.4 */
+/*
+ * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
+ */
static bool access_dcsw(struct kvm_vcpu *vcpu,
const struct coproc_params *p,
const struct coproc_reg *r)
{
- unsigned long val;
- int cpu;
-
if (!p->is_write)
return read_from_write_only(vcpu, p);
- cpu = get_cpu();
-
- cpumask_setall(&vcpu->arch.require_dcache_flush);
- cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush);
-
- /* If we were already preempted, take the long way around */
- if (cpu != vcpu->arch.last_pcpu) {
- flush_cache_all();
- goto done;
- }
-
- val = *vcpu_reg(vcpu, p->Rt1);
-
- switch (p->CRm) {
- case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */
- case 14: /* DCCISW */
- asm volatile("mcr p15, 0, %0, c7, c14, 2" : : "r" (val));
- break;
-
- case 10: /* DCCSW */
- asm volatile("mcr p15, 0, %0, c7, c10, 2" : : "r" (val));
- break;
- }
-
-done:
- put_cpu();
-
+ kvm_set_way_flush(vcpu);
return true;
}
/*
* Generic accessor for VM registers. Only called as long as HCR_TVM
- * is set.
+ * is set. If the guest enables the MMU, we stop trapping the VM
+ * sys_regs and leave it in complete control of the caches.
+ *
+ * Used by the cpu-specific code.
*/
-static bool access_vm_reg(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
+bool access_vm_reg(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
{
+ bool was_enabled = vcpu_has_cache_enabled(vcpu);
+
BUG_ON(!p->is_write);
vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1);
if (p->is_64bit)
vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2);
- return true;
-}
-
-/*
- * SCTLR accessor. Only called as long as HCR_TVM is set. If the
- * guest enables the MMU, we stop trapping the VM sys_regs and leave
- * it in complete control of the caches.
- *
- * Used by the cpu-specific code.
- */
-bool access_sctlr(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- access_vm_reg(vcpu, p, r);
-
- if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */
- vcpu->arch.hcr &= ~HCR_TVM;
- stage2_flush_vm(vcpu->kvm);
- }
-
+ kvm_toggle_cache(vcpu, was_enabled);
return true;
}
@@ -682,17 +665,23 @@ static struct coproc_reg invariant_cp15[] = {
{ CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR },
};
+/*
+ * Reads a register value from a userspace address to a kernel
+ * variable. Make sure that register size matches sizeof(*__val).
+ */
static int reg_from_user(void *val, const void __user *uaddr, u64 id)
{
- /* This Just Works because we are little endian. */
if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
return -EFAULT;
return 0;
}
+/*
+ * Writes a register value to a userspace address from a kernel variable.
+ * Make sure that register size matches sizeof(*__val).
+ */
static int reg_to_user(void __user *uaddr, const void *val, u64 id)
{
- /* This Just Works because we are little endian. */
if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
return -EFAULT;
return 0;
@@ -702,6 +691,7 @@ static int get_invariant_cp15(u64 id, void __user *uaddr)
{
struct coproc_params params;
const struct coproc_reg *r;
+ int ret;
if (!index_to_params(id, &params))
return -ENOENT;
@@ -710,7 +700,15 @@ static int get_invariant_cp15(u64 id, void __user *uaddr)
if (!r)
return -ENOENT;
- return reg_to_user(uaddr, &r->val, id);
+ ret = -ENOENT;
+ if (KVM_REG_SIZE(id) == 4) {
+ u32 val = r->val;
+
+ ret = reg_to_user(uaddr, &val, id);
+ } else if (KVM_REG_SIZE(id) == 8) {
+ ret = reg_to_user(uaddr, &r->val, id);
+ }
+ return ret;
}
static int set_invariant_cp15(u64 id, void __user *uaddr)
@@ -718,7 +716,7 @@ static int set_invariant_cp15(u64 id, void __user *uaddr)
struct coproc_params params;
const struct coproc_reg *r;
int err;
- u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
+ u64 val;
if (!index_to_params(id, &params))
return -ENOENT;
@@ -726,7 +724,16 @@ static int set_invariant_cp15(u64 id, void __user *uaddr)
if (!r)
return -ENOENT;
- err = reg_from_user(&val, uaddr, id);
+ err = -ENOENT;
+ if (KVM_REG_SIZE(id) == 4) {
+ u32 val32;
+
+ err = reg_from_user(&val32, uaddr, id);
+ if (!err)
+ val = val32;
+ } else if (KVM_REG_SIZE(id) == 8) {
+ err = reg_from_user(&val, uaddr, id);
+ }
if (err)
return err;
@@ -742,7 +749,7 @@ static bool is_valid_cache(u32 val)
u32 level, ctype;
if (val >= CSSELR_MAX)
- return -ENOENT;
+ return false;
/* Bottom bit is Instruction or Data bit. Next 3 bits are level. */
level = (val >> 1);
@@ -1004,6 +1011,7 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
const struct coproc_reg *r;
void __user *uaddr = (void __user *)(long)reg->addr;
+ int ret;
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
return demux_c15_get(reg->id, uaddr);
@@ -1015,14 +1023,24 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if (!r)
return get_invariant_cp15(reg->id, uaddr);
- /* Note: copies two regs if size is 64 bit. */
- return reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id);
+ ret = -ENOENT;
+ if (KVM_REG_SIZE(reg->id) == 8) {
+ u64 val;
+
+ val = vcpu_cp15_reg64_get(vcpu, r);
+ ret = reg_to_user(uaddr, &val, reg->id);
+ } else if (KVM_REG_SIZE(reg->id) == 4) {
+ ret = reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id);
+ }
+
+ return ret;
}
int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
const struct coproc_reg *r;
void __user *uaddr = (void __user *)(long)reg->addr;
+ int ret;
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
return demux_c15_set(reg->id, uaddr);
@@ -1034,8 +1052,18 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if (!r)
return set_invariant_cp15(reg->id, uaddr);
- /* Note: copies two regs if size is 64 bit */
- return reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id);
+ ret = -ENOENT;
+ if (KVM_REG_SIZE(reg->id) == 8) {
+ u64 val;
+
+ ret = reg_from_user(&val, uaddr, reg->id);
+ if (!ret)
+ vcpu_cp15_reg64_set(vcpu, r, val);
+ } else if (KVM_REG_SIZE(reg->id) == 4) {
+ ret = reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id);
+ }
+
+ return ret;
}
static unsigned int num_demux_regs(void)
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index 1a44bbe39643..88d24a3a9778 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -153,8 +153,8 @@ static inline int cmp_reg(const struct coproc_reg *i1,
#define is64 .is_64 = true
#define is32 .is_64 = false
-bool access_sctlr(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r);
+bool access_vm_reg(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r);
#endif /* __ARM_KVM_COPROC_LOCAL_H__ */
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index e6f4ae48bda9..a7136757d373 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -34,7 +34,7 @@
static const struct coproc_reg a15_regs[] = {
/* SCTLR: swapped by interrupt.S. */
{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
- access_sctlr, reset_val, c1_SCTLR, 0x00C50078 },
+ access_vm_reg, reset_val, c1_SCTLR, 0x00C50078 },
};
static struct kvm_coproc_target_table a15_target_table = {
diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
index 17fc7cd479d3..b19e46d1b2c0 100644
--- a/arch/arm/kvm/coproc_a7.c
+++ b/arch/arm/kvm/coproc_a7.c
@@ -37,7 +37,7 @@
static const struct coproc_reg a7_regs[] = {
/* SCTLR: swapped by interrupt.S. */
{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
- access_sctlr, reset_val, c1_SCTLR, 0x00C50878 },
+ access_vm_reg, reset_val, c1_SCTLR, 0x00C50878 },
};
static struct kvm_coproc_target_table a7_target_table = {
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index b23a59c1c522..d503fbb787d3 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
- vcpu->arch.hcr = HCR_GUEST_MASK;
return 0;
}
@@ -110,32 +109,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
return -EINVAL;
}
-#ifndef CONFIG_KVM_ARM_TIMER
-
-#define NUM_TIMER_REGS 0
-
-static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
-{
- return 0;
-}
-
-static bool is_timer_reg(u64 index)
-{
- return false;
-}
-
-int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
-{
- return 0;
-}
-
-u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
-{
- return 0;
-}
-
-#else
-
#define NUM_TIMER_REGS 3
static bool is_timer_reg(u64 index)
@@ -163,8 +136,6 @@ static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
return 0;
}
-#endif
-
static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
void __user *uaddr = (void __user *)(long)reg->addr;
@@ -173,7 +144,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
if (ret != 0)
- return ret;
+ return -EFAULT;
return kvm_arm_timer_set_reg(vcpu, reg->id, val);
}
@@ -274,13 +245,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int __attribute_const__ kvm_target_cpu(void)
{
- unsigned long implementor = read_cpuid_implementor();
- unsigned long part_number = read_cpuid_part_number();
-
- if (implementor != ARM_CPU_IMP_ARM)
- return -EINVAL;
-
- switch (part_number) {
+ switch (read_cpuid_part()) {
case ARM_CPU_PART_CORTEX_A7:
return KVM_ARM_TARGET_CORTEX_A7;
case ARM_CPU_PART_CORTEX_A15:
@@ -290,31 +255,6 @@ int __attribute_const__ kvm_target_cpu(void)
}
}
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
- const struct kvm_vcpu_init *init)
-{
- unsigned int i;
-
- /* We can only cope with guest==host and only on A15/A7 (for now). */
- if (init->target != kvm_target_cpu())
- return -EINVAL;
-
- vcpu->arch.target = init->target;
- bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
-
- /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
- for (i = 0; i < sizeof(init->features) * 8; i++) {
- if (test_bit(i, (void *)init->features)) {
- if (i >= KVM_VCPU_MAX_FEATURES)
- return -ENOENT;
- set_bit(i, vcpu->arch.features);
- }
- }
-
- /* Now we know what it is, we can reset it. */
- return kvm_reset_vcpu(vcpu);
-}
-
int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
{
int target = kvm_target_cpu();
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 4c979d466cc1..95f12b2ccdcb 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -87,11 +87,15 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
*/
static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- trace_kvm_wfi(*vcpu_pc(vcpu));
- if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE)
+ if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) {
+ trace_kvm_wfx(*vcpu_pc(vcpu), true);
kvm_vcpu_on_spin(vcpu);
- else
+ } else {
+ trace_kvm_wfx(*vcpu_pc(vcpu), false);
kvm_vcpu_block(vcpu);
+ }
+
+ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
return 1;
}
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 1b9844d369cc..3988e72d16ff 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -17,6 +17,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/unified.h>
#include <asm/asm-offsets.h>
#include <asm/kvm_asm.h>
@@ -71,7 +72,7 @@ __do_hyp_init:
bne phase2 @ Yes, second stage init
@ Set the HTTBR to point to the hypervisor PGD pointer passed
- mcrr p15, 4, r2, r3, c2
+ mcrr p15, 4, rr_lo_hi(r2, r3), c2
@ Set the HTCR and VTCR to the same shareability and cacheability
@ settings as the non-secure TTBCR and with T0SZ == 0.
@@ -98,6 +99,10 @@ __do_hyp_init:
mrc p15, 0, r0, c10, c2, 1
mcr p15, 4, r0, c10, c2, 1
+ @ Invalidate the stale TLBs from Bootloader
+ mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH
+ dsb ish
+
@ Set the HSCTLR to:
@ - ARM/THUMB exceptions: Kernel config (Thumb-2 kernel)
@ - Endianness: Kernel config
@@ -134,10 +139,10 @@ phase2:
ldr r0, =TRAMPOLINE_VA
adr r1, target
bfi r0, r1, #0, #PAGE_SHIFT
- mov pc, r0
+ ret r0
target: @ We're now in the trampoline code, switch page tables
- mcrr p15, 4, r2, r3, c2
+ mcrr p15, 4, rr_lo_hi(r2, r3), c2
isb
@ Invalidate the old TLBs
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 0d68d4073068..79caf79b304a 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -52,7 +52,7 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
dsb ishst
add r0, r0, #KVM_VTTBR
ldrd r2, r3, [r0]
- mcrr p15, 6, r2, r3, c2 @ Write VTTBR
+ mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR
isb
mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored)
dsb ish
@@ -66,6 +66,17 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
bx lr
ENDPROC(__kvm_tlb_flush_vmid_ipa)
+/**
+ * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs
+ *
+ * Reuses __kvm_tlb_flush_vmid_ipa() for ARMv7, without passing address
+ * parameter
+ */
+
+ENTRY(__kvm_tlb_flush_vmid)
+ b __kvm_tlb_flush_vmid_ipa
+ENDPROC(__kvm_tlb_flush_vmid)
+
/********************************************************************
* Flush TLBs and instruction caches of all CPUs inside the inner-shareable
* domain, for all VMIDs
@@ -135,7 +146,7 @@ ENTRY(__kvm_vcpu_run)
ldr r1, [vcpu, #VCPU_KVM]
add r1, r1, #KVM_VTTBR
ldrd r2, r3, [r1]
- mcrr p15, 6, r2, r3, c2 @ Write VTTBR
+ mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR
@ We're all done, just restore the GPRs and go to the guest
restore_guest_regs
@@ -199,8 +210,13 @@ after_vfp_restore:
restore_host_regs
clrex @ Clear exclusive monitor
+#ifndef CONFIG_CPU_ENDIAN_BE8
mov r0, r1 @ Return the return code
mov r1, #0 @ Clear upper bits in return value
+#else
+ @ r1 already has return code
+ mov r0, #0 @ Clear upper bits in return value
+#endif /* CONFIG_CPU_ENDIAN_BE8 */
bx lr @ return to IOCTL
/********************************************************************
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 76af93025574..35e4a3a0c476 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -1,4 +1,5 @@
#include <linux/irqchip/arm-gic.h>
+#include <asm/assembler.h>
#define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4))
#define VCPU_USR_SP (VCPU_USR_REG(13))
@@ -401,7 +402,6 @@ vcpu .req r0 @ vcpu pointer always in r0
* Assumes vcpu pointer in vcpu reg
*/
.macro save_vgic_state
-#ifdef CONFIG_KVM_ARM_VGIC
/* Get VGIC VCTRL base into r2 */
ldr r2, [vcpu, #VCPU_KVM]
ldr r2, [r2, #KVM_VGIC_VCTRL]
@@ -420,15 +420,30 @@ vcpu .req r0 @ vcpu pointer always in r0
ldr r8, [r2, #GICH_ELRSR0]
ldr r9, [r2, #GICH_ELRSR1]
ldr r10, [r2, #GICH_APR]
-
- str r3, [r11, #VGIC_CPU_HCR]
- str r4, [r11, #VGIC_CPU_VMCR]
- str r5, [r11, #VGIC_CPU_MISR]
- str r6, [r11, #VGIC_CPU_EISR]
- str r7, [r11, #(VGIC_CPU_EISR + 4)]
- str r8, [r11, #VGIC_CPU_ELRSR]
- str r9, [r11, #(VGIC_CPU_ELRSR + 4)]
- str r10, [r11, #VGIC_CPU_APR]
+ARM_BE8(rev r3, r3 )
+ARM_BE8(rev r4, r4 )
+ARM_BE8(rev r5, r5 )
+ARM_BE8(rev r6, r6 )
+ARM_BE8(rev r7, r7 )
+ARM_BE8(rev r8, r8 )
+ARM_BE8(rev r9, r9 )
+ARM_BE8(rev r10, r10 )
+
+ str r3, [r11, #VGIC_V2_CPU_HCR]
+ str r4, [r11, #VGIC_V2_CPU_VMCR]
+ str r5, [r11, #VGIC_V2_CPU_MISR]
+#ifdef CONFIG_CPU_ENDIAN_BE8
+ str r6, [r11, #(VGIC_V2_CPU_EISR + 4)]
+ str r7, [r11, #VGIC_V2_CPU_EISR]
+ str r8, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
+ str r9, [r11, #VGIC_V2_CPU_ELRSR]
+#else
+ str r6, [r11, #VGIC_V2_CPU_EISR]
+ str r7, [r11, #(VGIC_V2_CPU_EISR + 4)]
+ str r8, [r11, #VGIC_V2_CPU_ELRSR]
+ str r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
+#endif
+ str r10, [r11, #VGIC_V2_CPU_APR]
/* Clear GICH_HCR */
mov r5, #0
@@ -436,14 +451,14 @@ vcpu .req r0 @ vcpu pointer always in r0
/* Save list registers */
add r2, r2, #GICH_LR0
- add r3, r11, #VGIC_CPU_LR
+ add r3, r11, #VGIC_V2_CPU_LR
ldr r4, [r11, #VGIC_CPU_NR_LR]
1: ldr r6, [r2], #4
+ARM_BE8(rev r6, r6 )
str r6, [r3], #4
subs r4, r4, #1
bne 1b
2:
-#endif
.endm
/*
@@ -452,7 +467,6 @@ vcpu .req r0 @ vcpu pointer always in r0
* Assumes vcpu pointer in vcpu reg
*/
.macro restore_vgic_state
-#ifdef CONFIG_KVM_ARM_VGIC
/* Get VGIC VCTRL base into r2 */
ldr r2, [vcpu, #VCPU_KVM]
ldr r2, [r2, #KVM_VGIC_VCTRL]
@@ -463,9 +477,12 @@ vcpu .req r0 @ vcpu pointer always in r0
add r11, vcpu, #VCPU_VGIC_CPU
/* We only restore a minimal set of registers */
- ldr r3, [r11, #VGIC_CPU_HCR]
- ldr r4, [r11, #VGIC_CPU_VMCR]
- ldr r8, [r11, #VGIC_CPU_APR]
+ ldr r3, [r11, #VGIC_V2_CPU_HCR]
+ ldr r4, [r11, #VGIC_V2_CPU_VMCR]
+ ldr r8, [r11, #VGIC_V2_CPU_APR]
+ARM_BE8(rev r3, r3 )
+ARM_BE8(rev r4, r4 )
+ARM_BE8(rev r8, r8 )
str r3, [r2, #GICH_HCR]
str r4, [r2, #GICH_VMCR]
@@ -473,14 +490,14 @@ vcpu .req r0 @ vcpu pointer always in r0
/* Restore list registers */
add r2, r2, #GICH_LR0
- add r3, r11, #VGIC_CPU_LR
+ add r3, r11, #VGIC_V2_CPU_LR
ldr r4, [r11, #VGIC_CPU_NR_LR]
1: ldr r6, [r3], #4
+ARM_BE8(rev r6, r6 )
str r6, [r2], #4
subs r4, r4, #1
bne 1b
2:
-#endif
.endm
#define CNTHCTL_PL1PCTEN (1 << 0)
@@ -494,7 +511,6 @@ vcpu .req r0 @ vcpu pointer always in r0
* Clobbers r2-r5
*/
.macro save_timer_state
-#ifdef CONFIG_KVM_ARM_TIMER
ldr r4, [vcpu, #VCPU_KVM]
ldr r2, [r4, #KVM_TIMER_ENABLED]
cmp r2, #0
@@ -506,7 +522,7 @@ vcpu .req r0 @ vcpu pointer always in r0
mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL
isb
- mrrc p15, 3, r2, r3, c14 @ CNTV_CVAL
+ mrrc p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL
ldr r4, =VCPU_TIMER_CNTV_CVAL
add r5, vcpu, r4
strd r2, r3, [r5]
@@ -516,7 +532,6 @@ vcpu .req r0 @ vcpu pointer always in r0
mcrr p15, 4, r2, r2, c14 @ CNTVOFF
1:
-#endif
@ Allow physical timer/counter access for the host
mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL
orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN)
@@ -538,7 +553,6 @@ vcpu .req r0 @ vcpu pointer always in r0
bic r2, r2, #CNTHCTL_PL1PCEN
mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL
-#ifdef CONFIG_KVM_ARM_TIMER
ldr r4, [vcpu, #VCPU_KVM]
ldr r2, [r4, #KVM_TIMER_ENABLED]
cmp r2, #0
@@ -546,19 +560,18 @@ vcpu .req r0 @ vcpu pointer always in r0
ldr r2, [r4, #KVM_TIMER_CNTVOFF]
ldr r3, [r4, #(KVM_TIMER_CNTVOFF + 4)]
- mcrr p15, 4, r2, r3, c14 @ CNTVOFF
+ mcrr p15, 4, rr_lo_hi(r2, r3), c14 @ CNTVOFF
ldr r4, =VCPU_TIMER_CNTV_CVAL
add r5, vcpu, r4
ldrd r2, r3, [r5]
- mcrr p15, 3, r2, r3, c14 @ CNTV_CVAL
+ mcrr p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL
isb
ldr r2, [vcpu, #VCPU_TIMER_CNTV_CTL]
and r2, r2, #3
mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL
1:
-#endif
.endm
.equ vmentry, 0
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 4cb5a93182e9..974b1c606d04 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -121,12 +121,11 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 0;
}
-static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
- struct kvm_exit_mmio *mmio)
+static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len)
{
unsigned long rt;
- int len;
- bool is_write, sign_extend;
+ int access_size;
+ bool sign_extend;
if (kvm_vcpu_dabt_isextabt(vcpu)) {
/* cache operation on I/O addr, tell guest unsupported */
@@ -140,17 +139,15 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return 1;
}
- len = kvm_vcpu_dabt_get_as(vcpu);
- if (unlikely(len < 0))
- return len;
+ access_size = kvm_vcpu_dabt_get_as(vcpu);
+ if (unlikely(access_size < 0))
+ return access_size;
- is_write = kvm_vcpu_dabt_iswrite(vcpu);
+ *is_write = kvm_vcpu_dabt_iswrite(vcpu);
sign_extend = kvm_vcpu_dabt_issext(vcpu);
rt = kvm_vcpu_dabt_get_rd(vcpu);
- mmio->is_write = is_write;
- mmio->phys_addr = fault_ipa;
- mmio->len = len;
+ *len = access_size;
vcpu->arch.mmio_decode.sign_extend = sign_extend;
vcpu->arch.mmio_decode.rt = rt;
@@ -165,20 +162,20 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
phys_addr_t fault_ipa)
{
- struct kvm_exit_mmio mmio;
unsigned long data;
unsigned long rt;
int ret;
+ bool is_write;
+ int len;
+ u8 data_buf[8];
/*
- * Prepare MMIO operation. First stash it in a private
- * structure that we can use for in-kernel emulation. If the
- * kernel can't handle it, copy it into run->mmio and let user
- * space do its magic.
+ * Prepare MMIO operation. First decode the syndrome data we get
+ * from the CPU. Then try if some in-kernel emulation feels
+ * responsible, otherwise let user space do its magic.
*/
-
if (kvm_vcpu_dabt_isvalid(vcpu)) {
- ret = decode_hsr(vcpu, fault_ipa, &mmio);
+ ret = decode_hsr(vcpu, &is_write, &len);
if (ret)
return ret;
} else {
@@ -187,19 +184,35 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
}
rt = vcpu->arch.mmio_decode.rt;
- data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len);
- trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE :
- KVM_TRACE_MMIO_READ_UNSATISFIED,
- mmio.len, fault_ipa,
- (mmio.is_write) ? data : 0);
+ if (is_write) {
+ data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), len);
- if (mmio.is_write)
- mmio_write_buf(mmio.data, mmio.len, data);
+ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
+ mmio_write_buf(data_buf, len, data);
+
+ ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+ data_buf);
+ } else {
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
+ fault_ipa, 0);
- if (vgic_handle_mmio(vcpu, run, &mmio))
+ ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+ data_buf);
+ }
+
+ /* Now prepare kvm_run for the potential return to userland. */
+ run->mmio.is_write = is_write;
+ run->mmio.phys_addr = fault_ipa;
+ run->mmio.len = len;
+ memcpy(run->mmio.data, data_buf, len);
+
+ if (!ret) {
+ /* We handled the access successfully in the kernel. */
+ kvm_handle_mmio_return(vcpu, run);
return 1;
+ }
- kvm_prepare_mmio(run, &mmio);
+ run->exit_reason = KVM_EXIT_MMIO;
return 0;
}
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 16f804938b8f..1d5accbd3dcf 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -35,16 +35,36 @@ extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
static pgd_t *boot_hyp_pgd;
static pgd_t *hyp_pgd;
+static pgd_t *merged_hyp_pgd;
static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
-static void *init_bounce_page;
static unsigned long hyp_idmap_start;
static unsigned long hyp_idmap_end;
static phys_addr_t hyp_idmap_vector;
-#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
+#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
+#define kvm_pud_huge(_x) pud_huge(_x)
+
+#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
+#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1)
+
+static bool memslot_is_logging(struct kvm_memory_slot *memslot)
+{
+ return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
+}
+
+/**
+ * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
+ * @kvm: pointer to kvm structure.
+ *
+ * Interface to HYP function to flush all VM TLB entries
+ */
+void kvm_flush_remote_tlbs(struct kvm *kvm)
+{
+ kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
+}
static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
@@ -58,6 +78,45 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
}
+/*
+ * D-Cache management functions. They take the page table entries by
+ * value, as they are flushing the cache using the kernel mapping (or
+ * kmap on 32bit).
+ */
+static void kvm_flush_dcache_pte(pte_t pte)
+{
+ __kvm_flush_dcache_pte(pte);
+}
+
+static void kvm_flush_dcache_pmd(pmd_t pmd)
+{
+ __kvm_flush_dcache_pmd(pmd);
+}
+
+static void kvm_flush_dcache_pud(pud_t pud)
+{
+ __kvm_flush_dcache_pud(pud);
+}
+
+/**
+ * stage2_dissolve_pmd() - clear and flush huge PMD entry
+ * @kvm: pointer to kvm structure.
+ * @addr: IPA
+ * @pmd: pmd pointer for IPA
+ *
+ * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. Marks all
+ * pages in the range dirty.
+ */
+static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
+{
+ if (!kvm_pmd_huge(*pmd))
+ return;
+
+ pmd_clear(pmd);
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
+ put_page(virt_to_page(pmd));
+}
+
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
int min, int max)
{
@@ -90,104 +149,153 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
return p;
}
-static bool page_empty(void *ptr)
+static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
{
- struct page *ptr_page = virt_to_page(ptr);
- return page_count(ptr_page) == 1;
+ pud_t *pud_table __maybe_unused = pud_offset(pgd, 0);
+ pgd_clear(pgd);
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
+ pud_free(NULL, pud_table);
+ put_page(virt_to_page(pgd));
}
static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
{
- if (pud_huge(*pud)) {
- pud_clear(pud);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
- } else {
- pmd_t *pmd_table = pmd_offset(pud, 0);
- pud_clear(pud);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
- pmd_free(NULL, pmd_table);
- }
+ pmd_t *pmd_table = pmd_offset(pud, 0);
+ VM_BUG_ON(pud_huge(*pud));
+ pud_clear(pud);
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
+ pmd_free(NULL, pmd_table);
put_page(virt_to_page(pud));
}
static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
{
- if (kvm_pmd_huge(*pmd)) {
- pmd_clear(pmd);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
- } else {
- pte_t *pte_table = pte_offset_kernel(pmd, 0);
- pmd_clear(pmd);
- kvm_tlb_flush_vmid_ipa(kvm, addr);
- pte_free_kernel(NULL, pte_table);
- }
+ pte_t *pte_table = pte_offset_kernel(pmd, 0);
+ VM_BUG_ON(kvm_pmd_huge(*pmd));
+ pmd_clear(pmd);
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
+ pte_free_kernel(NULL, pte_table);
put_page(virt_to_page(pmd));
}
-static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr)
+/*
+ * Unmapping vs dcache management:
+ *
+ * If a guest maps certain memory pages as uncached, all writes will
+ * bypass the data cache and go directly to RAM. However, the CPUs
+ * can still speculate reads (not writes) and fill cache lines with
+ * data.
+ *
+ * Those cache lines will be *clean* cache lines though, so a
+ * clean+invalidate operation is equivalent to an invalidate
+ * operation, because no cache lines are marked dirty.
+ *
+ * Those clean cache lines could be filled prior to an uncached write
+ * by the guest, and the cache coherent IO subsystem would therefore
+ * end up writing old data to disk.
+ *
+ * This is why right after unmapping a page/section and invalidating
+ * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
+ * the IO subsystem will never hit in the cache.
+ */
+static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
+ phys_addr_t addr, phys_addr_t end)
{
- if (pte_present(*pte)) {
- kvm_set_pte(pte, __pte(0));
- put_page(virt_to_page(pte));
- kvm_tlb_flush_vmid_ipa(kvm, addr);
- }
+ phys_addr_t start_addr = addr;
+ pte_t *pte, *start_pte;
+
+ start_pte = pte = pte_offset_kernel(pmd, addr);
+ do {
+ if (!pte_none(*pte)) {
+ pte_t old_pte = *pte;
+
+ kvm_set_pte(pte, __pte(0));
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
+
+ /* No need to invalidate the cache for device mappings */
+ if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
+ kvm_flush_dcache_pte(old_pte);
+
+ put_page(virt_to_page(pte));
+ }
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+
+ if (kvm_pte_table_empty(kvm, start_pte))
+ clear_pmd_entry(kvm, pmd, start_addr);
}
-static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
- unsigned long long start, u64 size)
+static void unmap_pmds(struct kvm *kvm, pud_t *pud,
+ phys_addr_t addr, phys_addr_t end)
{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- unsigned long long addr = start, end = start + size;
- u64 next;
+ phys_addr_t next, start_addr = addr;
+ pmd_t *pmd, *start_pmd;
- while (addr < end) {
- pgd = pgdp + pgd_index(addr);
- pud = pud_offset(pgd, addr);
- pte = NULL;
- if (pud_none(*pud)) {
- addr = kvm_pud_addr_end(addr, end);
- continue;
- }
+ start_pmd = pmd = pmd_offset(pud, addr);
+ do {
+ next = kvm_pmd_addr_end(addr, end);
+ if (!pmd_none(*pmd)) {
+ if (kvm_pmd_huge(*pmd)) {
+ pmd_t old_pmd = *pmd;
- if (pud_huge(*pud)) {
- /*
- * If we are dealing with a huge pud, just clear it and
- * move on.
- */
- clear_pud_entry(kvm, pud, addr);
- addr = kvm_pud_addr_end(addr, end);
- continue;
- }
+ pmd_clear(pmd);
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd)) {
- addr = kvm_pmd_addr_end(addr, end);
- continue;
- }
+ kvm_flush_dcache_pmd(old_pmd);
- if (!kvm_pmd_huge(*pmd)) {
- pte = pte_offset_kernel(pmd, addr);
- clear_pte_entry(kvm, pte, addr);
- next = addr + PAGE_SIZE;
+ put_page(virt_to_page(pmd));
+ } else {
+ unmap_ptes(kvm, pmd, addr, next);
+ }
}
+ } while (pmd++, addr = next, addr != end);
- /*
- * If the pmd entry is to be cleared, walk back up the ladder
- */
- if (kvm_pmd_huge(*pmd) || (pte && page_empty(pte))) {
- clear_pmd_entry(kvm, pmd, addr);
- next = kvm_pmd_addr_end(addr, end);
- if (page_empty(pmd) && !page_empty(pud)) {
- clear_pud_entry(kvm, pud, addr);
- next = kvm_pud_addr_end(addr, end);
+ if (kvm_pmd_table_empty(kvm, start_pmd))
+ clear_pud_entry(kvm, pud, start_addr);
+}
+
+static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
+ phys_addr_t addr, phys_addr_t end)
+{
+ phys_addr_t next, start_addr = addr;
+ pud_t *pud, *start_pud;
+
+ start_pud = pud = pud_offset(pgd, addr);
+ do {
+ next = kvm_pud_addr_end(addr, end);
+ if (!pud_none(*pud)) {
+ if (pud_huge(*pud)) {
+ pud_t old_pud = *pud;
+
+ pud_clear(pud);
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
+
+ kvm_flush_dcache_pud(old_pud);
+
+ put_page(virt_to_page(pud));
+ } else {
+ unmap_pmds(kvm, pud, addr, next);
}
}
+ } while (pud++, addr = next, addr != end);
- addr = next;
- }
+ if (kvm_pud_table_empty(kvm, start_pud))
+ clear_pgd_entry(kvm, pgd, start_addr);
+}
+
+
+static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
+ phys_addr_t start, u64 size)
+{
+ pgd_t *pgd;
+ phys_addr_t addr = start, end = start + size;
+ phys_addr_t next;
+
+ pgd = pgdp + kvm_pgd_index(addr);
+ do {
+ next = kvm_pgd_addr_end(addr, end);
+ if (!pgd_none(*pgd))
+ unmap_puds(kvm, pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
}
static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
@@ -197,10 +305,9 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
pte = pte_offset_kernel(pmd, addr);
do {
- if (!pte_none(*pte)) {
- hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
- kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE);
- }
+ if (!pte_none(*pte) &&
+ (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
+ kvm_flush_dcache_pte(*pte);
} while (pte++, addr += PAGE_SIZE, addr != end);
}
@@ -214,12 +321,10 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
do {
next = kvm_pmd_addr_end(addr, end);
if (!pmd_none(*pmd)) {
- if (kvm_pmd_huge(*pmd)) {
- hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
- kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE);
- } else {
+ if (kvm_pmd_huge(*pmd))
+ kvm_flush_dcache_pmd(*pmd);
+ else
stage2_flush_ptes(kvm, pmd, addr, next);
- }
}
} while (pmd++, addr = next, addr != end);
}
@@ -234,12 +339,10 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
do {
next = kvm_pud_addr_end(addr, end);
if (!pud_none(*pud)) {
- if (pud_huge(*pud)) {
- hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
- kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE);
- } else {
+ if (pud_huge(*pud))
+ kvm_flush_dcache_pud(*pud);
+ else
stage2_flush_pmds(kvm, pud, addr, next);
- }
}
} while (pud++, addr = next, addr != end);
}
@@ -252,7 +355,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
phys_addr_t next;
pgd_t *pgd;
- pgd = kvm->arch.pgd + pgd_index(addr);
+ pgd = kvm->arch.pgd + kvm_pgd_index(addr);
do {
next = kvm_pgd_addr_end(addr, end);
stage2_flush_puds(kvm, pgd, addr, next);
@@ -266,7 +369,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
* Go through the stage 2 page tables and invalidate any cache lines
* backing memory already mapped to the VM.
*/
-void stage2_flush_vm(struct kvm *kvm)
+static void stage2_flush_vm(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
@@ -295,16 +398,13 @@ void free_boot_hyp_pgd(void)
if (boot_hyp_pgd) {
unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
- free_pages((unsigned long)boot_hyp_pgd, pgd_order);
+ free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
boot_hyp_pgd = NULL;
}
if (hyp_pgd)
unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
- free_page((unsigned long)init_bounce_page);
- init_bounce_page = NULL;
-
mutex_unlock(&kvm_hyp_pgd_mutex);
}
@@ -332,9 +432,14 @@ void free_hyp_pgds(void)
for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
- free_pages((unsigned long)hyp_pgd, pgd_order);
+ free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
hyp_pgd = NULL;
}
+ if (merged_hyp_pgd) {
+ clear_page(merged_hyp_pgd);
+ free_page((unsigned long)merged_hyp_pgd);
+ merged_hyp_pgd = NULL;
+ }
mutex_unlock(&kvm_hyp_pgd_mutex);
}
@@ -390,13 +495,46 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
return 0;
}
+static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
+ unsigned long end, unsigned long pfn,
+ pgprot_t prot)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+ unsigned long addr, next;
+ int ret;
+
+ addr = start;
+ do {
+ pud = pud_offset(pgd, addr);
+
+ if (pud_none_or_clear_bad(pud)) {
+ pmd = pmd_alloc_one(NULL, addr);
+ if (!pmd) {
+ kvm_err("Cannot allocate Hyp pmd\n");
+ return -ENOMEM;
+ }
+ pud_populate(NULL, pud, pmd);
+ get_page(virt_to_page(pud));
+ kvm_flush_dcache_to_poc(pud, sizeof(*pud));
+ }
+
+ next = pud_addr_end(addr, end);
+ ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
+ if (ret)
+ return ret;
+ pfn += (next - addr) >> PAGE_SHIFT;
+ } while (addr = next, addr != end);
+
+ return 0;
+}
+
static int __create_hyp_mappings(pgd_t *pgdp,
unsigned long start, unsigned long end,
unsigned long pfn, pgprot_t prot)
{
pgd_t *pgd;
pud_t *pud;
- pmd_t *pmd;
unsigned long addr, next;
int err = 0;
@@ -405,22 +543,21 @@ static int __create_hyp_mappings(pgd_t *pgdp,
end = PAGE_ALIGN(end);
do {
pgd = pgdp + pgd_index(addr);
- pud = pud_offset(pgd, addr);
- if (pud_none_or_clear_bad(pud)) {
- pmd = pmd_alloc_one(NULL, addr);
- if (!pmd) {
- kvm_err("Cannot allocate Hyp pmd\n");
+ if (pgd_none(*pgd)) {
+ pud = pud_alloc_one(NULL, addr);
+ if (!pud) {
+ kvm_err("Cannot allocate Hyp pud\n");
err = -ENOMEM;
goto out;
}
- pud_populate(NULL, pud, pmd);
- get_page(virt_to_page(pud));
- kvm_flush_dcache_to_poc(pud, sizeof(*pud));
+ pgd_populate(NULL, pgd, pud);
+ get_page(virt_to_page(pgd));
+ kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
}
next = pgd_addr_end(addr, end);
- err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
+ err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot);
if (err)
goto out;
pfn += (next - addr) >> PAGE_SHIFT;
@@ -497,6 +634,20 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
__phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
}
+/* Free the HW pgd, one page at a time */
+static void kvm_free_hwpgd(void *hwpgd)
+{
+ free_pages_exact(hwpgd, kvm_get_hwpgd_size());
+}
+
+/* Allocate the HW PGD, making sure that each page gets its own refcount */
+static void *kvm_alloc_hwpgd(void)
+{
+ unsigned int size = kvm_get_hwpgd_size();
+
+ return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
+}
+
/**
* kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
* @kvm: The KVM struct pointer for the VM.
@@ -511,20 +662,62 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
pgd_t *pgd;
+ void *hwpgd;
if (kvm->arch.pgd != NULL) {
kvm_err("kvm_arch already initialized?\n");
return -EINVAL;
}
- pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
- if (!pgd)
+ hwpgd = kvm_alloc_hwpgd();
+ if (!hwpgd)
return -ENOMEM;
- memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
+ /* When the kernel uses more levels of page tables than the
+ * guest, we allocate a fake PGD and pre-populate it to point
+ * to the next-level page table, which will be the real
+ * initial page table pointed to by the VTTBR.
+ *
+ * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
+ * the PMD and the kernel will use folded pud.
+ * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
+ * pages.
+ */
+ if (KVM_PREALLOC_LEVEL > 0) {
+ int i;
+
+ /*
+ * Allocate fake pgd for the page table manipulation macros to
+ * work. This is not used by the hardware and we have no
+ * alignment requirement for this allocation.
+ */
+ pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
+ GFP_KERNEL | __GFP_ZERO);
+
+ if (!pgd) {
+ kvm_free_hwpgd(hwpgd);
+ return -ENOMEM;
+ }
+
+ /* Plug the HW PGD into the fake one. */
+ for (i = 0; i < PTRS_PER_S2_PGD; i++) {
+ if (KVM_PREALLOC_LEVEL == 1)
+ pgd_populate(NULL, pgd + i,
+ (pud_t *)hwpgd + i * PTRS_PER_PUD);
+ else if (KVM_PREALLOC_LEVEL == 2)
+ pud_populate(NULL, pud_offset(pgd, 0) + i,
+ (pmd_t *)hwpgd + i * PTRS_PER_PMD);
+ }
+ } else {
+ /*
+ * Allocate actual first-level Stage-2 page table used by the
+ * hardware for Stage-2 page table walks.
+ */
+ pgd = (pgd_t *)hwpgd;
+ }
+
kvm_clean_pgd(pgd);
kvm->arch.pgd = pgd;
-
return 0;
}
@@ -544,6 +737,71 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
unmap_range(kvm, kvm->arch.pgd, start, size);
}
+static void stage2_unmap_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *memslot)
+{
+ hva_t hva = memslot->userspace_addr;
+ phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+ phys_addr_t size = PAGE_SIZE * memslot->npages;
+ hva_t reg_end = hva + size;
+
+ /*
+ * A memory region could potentially cover multiple VMAs, and any holes
+ * between them, so iterate over all of them to find out if we should
+ * unmap any of them.
+ *
+ * +--------------------------------------------+
+ * +---------------+----------------+ +----------------+
+ * | : VMA 1 | VMA 2 | | VMA 3 : |
+ * +---------------+----------------+ +----------------+
+ * | memory region |
+ * +--------------------------------------------+
+ */
+ do {
+ struct vm_area_struct *vma = find_vma(current->mm, hva);
+ hva_t vm_start, vm_end;
+
+ if (!vma || vma->vm_start >= reg_end)
+ break;
+
+ /*
+ * Take the intersection of this VMA with the memory region
+ */
+ vm_start = max(hva, vma->vm_start);
+ vm_end = min(reg_end, vma->vm_end);
+
+ if (!(vma->vm_flags & VM_PFNMAP)) {
+ gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
+ unmap_stage2_range(kvm, gpa, vm_end - vm_start);
+ }
+ hva = vm_end;
+ } while (hva < reg_end);
+}
+
+/**
+ * stage2_unmap_vm - Unmap Stage-2 RAM mappings
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the memregions and unmap any reguler RAM
+ * backing memory already mapped to the VM.
+ */
+void stage2_unmap_vm(struct kvm *kvm)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ int idx;
+
+ idx = srcu_read_lock(&kvm->srcu);
+ spin_lock(&kvm->mmu_lock);
+
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(memslot, slots)
+ stage2_unmap_memslot(kvm, memslot);
+
+ spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
+}
+
/**
* kvm_free_stage2_pgd - free all stage-2 tables
* @kvm: The KVM struct pointer for the VM.
@@ -561,19 +819,38 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
return;
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
- free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
+ kvm_free_hwpgd(kvm_get_hwpgd(kvm));
+ if (KVM_PREALLOC_LEVEL > 0)
+ kfree(kvm->arch.pgd);
+
kvm->arch.pgd = NULL;
}
-static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
phys_addr_t addr)
{
pgd_t *pgd;
pud_t *pud;
+
+ pgd = kvm->arch.pgd + kvm_pgd_index(addr);
+ if (WARN_ON(pgd_none(*pgd))) {
+ if (!cache)
+ return NULL;
+ pud = mmu_memory_cache_alloc(cache);
+ pgd_populate(NULL, pgd, pud);
+ get_page(virt_to_page(pgd));
+ }
+
+ return pud_offset(pgd, addr);
+}
+
+static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+ phys_addr_t addr)
+{
+ pud_t *pud;
pmd_t *pmd;
- pgd = kvm->arch.pgd + pgd_index(addr);
- pud = pud_offset(pgd, addr);
+ pud = stage2_get_pud(kvm, cache, addr);
if (pud_none(*pud)) {
if (!cache)
return NULL;
@@ -614,12 +891,17 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
}
static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
- phys_addr_t addr, const pte_t *new_pte, bool iomap)
+ phys_addr_t addr, const pte_t *new_pte,
+ unsigned long flags)
{
pmd_t *pmd;
pte_t *pte, old_pte;
+ bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP;
+ bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE;
- /* Create stage-2 page table mapping - Level 1 */
+ VM_BUG_ON(logging_active && !cache);
+
+ /* Create stage-2 page table mapping - Levels 0 and 1 */
pmd = stage2_get_pmd(kvm, cache, addr);
if (!pmd) {
/*
@@ -629,6 +911,13 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
return 0;
}
+ /*
+ * While dirty page logging - dissolve huge PMD, then continue on to
+ * allocate page.
+ */
+ if (logging_active)
+ stage2_dissolve_pmd(kvm, addr, pmd);
+
/* Create stage-2 page mappings - Level 2 */
if (pmd_none(*pmd)) {
if (!cache)
@@ -664,7 +953,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
* @size: The size of the mapping
*/
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
- phys_addr_t pa, unsigned long size)
+ phys_addr_t pa, unsigned long size, bool writable)
{
phys_addr_t addr, end;
int ret = 0;
@@ -677,11 +966,16 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
- ret = mmu_topup_memory_cache(&cache, 2, 2);
+ if (writable)
+ kvm_set_s2pte_writable(&pte);
+
+ ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
+ KVM_NR_MEM_OBJS);
if (ret)
goto out;
spin_lock(&kvm->mmu_lock);
- ret = stage2_set_pte(kvm, &cache, addr, &pte, true);
+ ret = stage2_set_pte(kvm, &cache, addr, &pte,
+ KVM_S2PTE_FLAG_IS_IOMAP);
spin_unlock(&kvm->mmu_lock);
if (ret)
goto out;
@@ -735,21 +1029,202 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
return false;
}
+static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+ if (kvm_vcpu_trap_is_iabt(vcpu))
+ return false;
+
+ return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
+static bool kvm_is_device_pfn(unsigned long pfn)
+{
+ return !pfn_valid(pfn);
+}
+
+/**
+ * stage2_wp_ptes - write protect PMD range
+ * @pmd: pointer to pmd entry
+ * @addr: range start address
+ * @end: range end address
+ */
+static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
+{
+ pte_t *pte;
+
+ pte = pte_offset_kernel(pmd, addr);
+ do {
+ if (!pte_none(*pte)) {
+ if (!kvm_s2pte_readonly(pte))
+ kvm_set_s2pte_readonly(pte);
+ }
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+}
+
+/**
+ * stage2_wp_pmds - write protect PUD range
+ * @pud: pointer to pud entry
+ * @addr: range start address
+ * @end: range end address
+ */
+static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
+{
+ pmd_t *pmd;
+ phys_addr_t next;
+
+ pmd = pmd_offset(pud, addr);
+
+ do {
+ next = kvm_pmd_addr_end(addr, end);
+ if (!pmd_none(*pmd)) {
+ if (kvm_pmd_huge(*pmd)) {
+ if (!kvm_s2pmd_readonly(pmd))
+ kvm_set_s2pmd_readonly(pmd);
+ } else {
+ stage2_wp_ptes(pmd, addr, next);
+ }
+ }
+ } while (pmd++, addr = next, addr != end);
+}
+
+/**
+ * stage2_wp_puds - write protect PGD range
+ * @pgd: pointer to pgd entry
+ * @addr: range start address
+ * @end: range end address
+ *
+ * Process PUD entries, for a huge PUD we cause a panic.
+ */
+static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
+{
+ pud_t *pud;
+ phys_addr_t next;
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = kvm_pud_addr_end(addr, end);
+ if (!pud_none(*pud)) {
+ /* TODO:PUD not supported, revisit later if supported */
+ BUG_ON(kvm_pud_huge(*pud));
+ stage2_wp_pmds(pud, addr, next);
+ }
+ } while (pud++, addr = next, addr != end);
+}
+
+/**
+ * stage2_wp_range() - write protect stage2 memory region range
+ * @kvm: The KVM pointer
+ * @addr: Start address of range
+ * @end: End address of range
+ */
+static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
+{
+ pgd_t *pgd;
+ phys_addr_t next;
+
+ pgd = kvm->arch.pgd + kvm_pgd_index(addr);
+ do {
+ /*
+ * Release kvm_mmu_lock periodically if the memory region is
+ * large. Otherwise, we may see kernel panics with
+ * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR,
+ * CONFIG_LOCKDEP. Additionally, holding the lock too long
+ * will also starve other vCPUs.
+ */
+ if (need_resched() || spin_needbreak(&kvm->mmu_lock))
+ cond_resched_lock(&kvm->mmu_lock);
+
+ next = kvm_pgd_addr_end(addr, end);
+ if (pgd_present(*pgd))
+ stage2_wp_puds(pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
+}
+
+/**
+ * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot
+ * @kvm: The KVM pointer
+ * @slot: The memory slot to write protect
+ *
+ * Called to start logging dirty pages after memory region
+ * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns
+ * all present PMD and PTEs are write protected in the memory region.
+ * Afterwards read of dirty page log can be called.
+ *
+ * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired,
+ * serializing operations for VM memory regions.
+ */
+void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
+{
+ struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot);
+ phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
+ phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
+
+ spin_lock(&kvm->mmu_lock);
+ stage2_wp_range(kvm, start, end);
+ spin_unlock(&kvm->mmu_lock);
+ kvm_flush_remote_tlbs(kvm);
+}
+
+/**
+ * kvm_mmu_write_protect_pt_masked() - write protect dirty pages
+ * @kvm: The KVM pointer
+ * @slot: The memory slot associated with mask
+ * @gfn_offset: The gfn offset in memory slot
+ * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory
+ * slot to be write protected
+ *
+ * Walks bits set in mask write protects the associated pte's. Caller must
+ * acquire kvm_mmu_lock.
+ */
+static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ gfn_t gfn_offset, unsigned long mask)
+{
+ phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
+ phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
+ phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
+
+ stage2_wp_range(kvm, start, end);
+}
+
+/*
+ * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
+ * dirty pages.
+ *
+ * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
+ * enable dirty logging for them.
+ */
+void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ gfn_t gfn_offset, unsigned long mask)
+{
+ kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
+}
+
+static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
+ unsigned long size, bool uncached)
+{
+ __coherent_cache_guest_page(vcpu, pfn, size, uncached);
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
- struct kvm_memory_slot *memslot,
+ struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status)
{
int ret;
bool write_fault, writable, hugetlb = false, force_pte = false;
unsigned long mmu_seq;
gfn_t gfn = fault_ipa >> PAGE_SHIFT;
- unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
struct vm_area_struct *vma;
pfn_t pfn;
+ pgprot_t mem_type = PAGE_S2;
+ bool fault_ipa_uncached;
+ bool logging_active = memslot_is_logging(memslot);
+ unsigned long flags = 0;
- write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
+ write_fault = kvm_is_write_fault(vcpu);
if (fault_status == FSC_PERM && !write_fault) {
kvm_err("Unexpected L2 read permission error\n");
return -EFAULT;
@@ -758,7 +1233,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
/* Let's check if we will get back a huge page backed by hugetlbfs */
down_read(&current->mm->mmap_sem);
vma = find_vma_intersection(current->mm, hva, hva + 1);
- if (is_vm_hugetlb_page(vma)) {
+ if (unlikely(!vma)) {
+ kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
+ up_read(&current->mm->mmap_sem);
+ return -EFAULT;
+ }
+
+ if (is_vm_hugetlb_page(vma) && !logging_active) {
hugetlb = true;
gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
} else {
@@ -778,7 +1259,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
up_read(&current->mm->mmap_sem);
/* We need minimum second+third level pages */
- ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
+ ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
+ KVM_NR_MEM_OBJS);
if (ret)
return ret;
@@ -798,38 +1280,103 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (is_error_pfn(pfn))
return -EFAULT;
+ if (kvm_is_device_pfn(pfn)) {
+ mem_type = PAGE_S2_DEVICE;
+ flags |= KVM_S2PTE_FLAG_IS_IOMAP;
+ } else if (logging_active) {
+ /*
+ * Faults on pages in a memslot with logging enabled
+ * should not be mapped with huge pages (it introduces churn
+ * and performance degradation), so force a pte mapping.
+ */
+ force_pte = true;
+ flags |= KVM_S2_FLAG_LOGGING_ACTIVE;
+
+ /*
+ * Only actually map the page as writable if this was a write
+ * fault.
+ */
+ if (!write_fault)
+ writable = false;
+ }
+
spin_lock(&kvm->mmu_lock);
if (mmu_notifier_retry(kvm, mmu_seq))
goto out_unlock;
+
if (!hugetlb && !force_pte)
hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
+ fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT;
+
if (hugetlb) {
- pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2);
+ pmd_t new_pmd = pfn_pmd(pfn, mem_type);
new_pmd = pmd_mkhuge(new_pmd);
if (writable) {
kvm_set_s2pmd_writable(&new_pmd);
kvm_set_pfn_dirty(pfn);
}
- coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
+ coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
} else {
- pte_t new_pte = pfn_pte(pfn, PAGE_S2);
+ pte_t new_pte = pfn_pte(pfn, mem_type);
+
if (writable) {
kvm_set_s2pte_writable(&new_pte);
kvm_set_pfn_dirty(pfn);
+ mark_page_dirty(kvm, gfn);
}
- coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
- ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);
+ coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached);
+ ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
}
-
out_unlock:
spin_unlock(&kvm->mmu_lock);
+ kvm_set_pfn_accessed(pfn);
kvm_release_pfn_clean(pfn);
return ret;
}
+/*
+ * Resolve the access fault by making the page young again.
+ * Note that because the faulting entry is guaranteed not to be
+ * cached in the TLB, we don't need to invalidate anything.
+ */
+static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
+{
+ pmd_t *pmd;
+ pte_t *pte;
+ pfn_t pfn;
+ bool pfn_valid = false;
+
+ trace_kvm_access_fault(fault_ipa);
+
+ spin_lock(&vcpu->kvm->mmu_lock);
+
+ pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa);
+ if (!pmd || pmd_none(*pmd)) /* Nothing there */
+ goto out;
+
+ if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */
+ *pmd = pmd_mkyoung(*pmd);
+ pfn = pmd_pfn(*pmd);
+ pfn_valid = true;
+ goto out;
+ }
+
+ pte = pte_offset_kernel(pmd, fault_ipa);
+ if (pte_none(*pte)) /* Nothing there either */
+ goto out;
+
+ *pte = pte_mkyoung(*pte); /* Just a page... */
+ pfn = pte_pfn(*pte);
+ pfn_valid = true;
+out:
+ spin_unlock(&vcpu->kvm->mmu_lock);
+ if (pfn_valid)
+ kvm_set_pfn_accessed(pfn);
+}
+
/**
* kvm_handle_guest_abort - handles all 2nd stage aborts
* @vcpu: the VCPU pointer
@@ -847,7 +1394,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
unsigned long fault_status;
phys_addr_t fault_ipa;
struct kvm_memory_slot *memslot;
- bool is_iabt;
+ unsigned long hva;
+ bool is_iabt, write_fault, writable;
gfn_t gfn;
int ret, idx;
@@ -858,17 +1406,23 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_vcpu_get_hfar(vcpu), fault_ipa);
/* Check the stage-2 fault is trans. fault or write fault */
- fault_status = kvm_vcpu_trap_get_fault(vcpu);
- if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
- kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n",
- kvm_vcpu_trap_get_class(vcpu), fault_status);
+ fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
+ if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
+ fault_status != FSC_ACCESS) {
+ kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
+ kvm_vcpu_trap_get_class(vcpu),
+ (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
+ (unsigned long)kvm_vcpu_get_hsr(vcpu));
return -EFAULT;
}
idx = srcu_read_lock(&vcpu->kvm->srcu);
gfn = fault_ipa >> PAGE_SHIFT;
- if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+ memslot = gfn_to_memslot(vcpu->kvm, gfn);
+ hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
+ write_fault = kvm_is_write_fault(vcpu);
+ if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
if (is_iabt) {
/* Prefetch Abort on I/O address */
kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
@@ -876,13 +1430,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
goto out_unlock;
}
- if (fault_status != FSC_FAULT) {
- kvm_err("Unsupported fault status on io memory: %#lx\n",
- fault_status);
- ret = -EFAULT;
- goto out_unlock;
- }
-
/*
* The IPA is reported as [MAX:12], so we need to
* complement it with the bottom 12 bits from the
@@ -894,9 +1441,16 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
goto out_unlock;
}
- memslot = gfn_to_memslot(vcpu->kvm, gfn);
+ /* Userspace should not be able to register out-of-bounds IPAs */
+ VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
+
+ if (fault_status == FSC_ACCESS) {
+ handle_access_fault(vcpu, fault_ipa);
+ ret = 1;
+ goto out_unlock;
+ }
- ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status);
+ ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
if (ret == 0)
ret = 1;
out_unlock:
@@ -904,15 +1458,16 @@ out_unlock:
return ret;
}
-static void handle_hva_to_gpa(struct kvm *kvm,
- unsigned long start,
- unsigned long end,
- void (*handler)(struct kvm *kvm,
- gpa_t gpa, void *data),
- void *data)
+static int handle_hva_to_gpa(struct kvm *kvm,
+ unsigned long start,
+ unsigned long end,
+ int (*handler)(struct kvm *kvm,
+ gpa_t gpa, void *data),
+ void *data)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
+ int ret = 0;
slots = kvm_memslots(kvm);
@@ -936,14 +1491,17 @@ static void handle_hva_to_gpa(struct kvm *kvm,
for (; gfn < gfn_end; ++gfn) {
gpa_t gpa = gfn << PAGE_SHIFT;
- handler(kvm, gpa, data);
+ ret |= handler(kvm, gpa, data);
}
}
+
+ return ret;
}
-static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
{
unmap_stage2_range(kvm, gpa, PAGE_SIZE);
+ return 0;
}
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
@@ -969,11 +1527,19 @@ int kvm_unmap_hva_range(struct kvm *kvm,
return 0;
}
-static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
+static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
{
pte_t *pte = (pte_t *)data;
- stage2_set_pte(kvm, NULL, gpa, pte, false);
+ /*
+ * We can always call stage2_set_pte with KVM_S2PTE_FLAG_LOGGING_ACTIVE
+ * flag clear because MMU notifiers will have unmapped a huge PMD before
+ * calling ->change_pte() (which in turn calls kvm_set_spte_hva()) and
+ * therefore stage2_set_pte() never needs to clear out a huge PMD
+ * through this calling path.
+ */
+ stage2_set_pte(kvm, NULL, gpa, pte, 0);
+ return 0;
}
@@ -990,6 +1556,67 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
}
+static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+{
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pmd = stage2_get_pmd(kvm, NULL, gpa);
+ if (!pmd || pmd_none(*pmd)) /* Nothing there */
+ return 0;
+
+ if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */
+ if (pmd_young(*pmd)) {
+ *pmd = pmd_mkold(*pmd);
+ return 1;
+ }
+
+ return 0;
+ }
+
+ pte = pte_offset_kernel(pmd, gpa);
+ if (pte_none(*pte))
+ return 0;
+
+ if (pte_young(*pte)) {
+ *pte = pte_mkold(*pte); /* Just a page... */
+ return 1;
+ }
+
+ return 0;
+}
+
+static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+{
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pmd = stage2_get_pmd(kvm, NULL, gpa);
+ if (!pmd || pmd_none(*pmd)) /* Nothing there */
+ return 0;
+
+ if (kvm_pmd_huge(*pmd)) /* THP, HugeTLB */
+ return pmd_young(*pmd);
+
+ pte = pte_offset_kernel(pmd, gpa);
+ if (!pte_none(*pte)) /* Just a page... */
+ return pte_young(*pte);
+
+ return 0;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+ trace_kvm_age_hva(start, end);
+ return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ trace_kvm_test_age_hva(hva);
+ return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
+}
+
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{
mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
@@ -997,12 +1624,18 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
phys_addr_t kvm_mmu_get_httbr(void)
{
- return virt_to_phys(hyp_pgd);
+ if (__kvm_cpu_uses_extended_idmap())
+ return virt_to_phys(merged_hyp_pgd);
+ else
+ return virt_to_phys(hyp_pgd);
}
phys_addr_t kvm_mmu_get_boot_httbr(void)
{
- return virt_to_phys(boot_hyp_pgd);
+ if (__kvm_cpu_uses_extended_idmap())
+ return virt_to_phys(merged_hyp_pgd);
+ else
+ return virt_to_phys(boot_hyp_pgd);
}
phys_addr_t kvm_get_idmap_vector(void)
@@ -1018,42 +1651,14 @@ int kvm_mmu_init(void)
hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
- if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) {
- /*
- * Our init code is crossing a page boundary. Allocate
- * a bounce page, copy the code over and use that.
- */
- size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start;
- phys_addr_t phys_base;
-
- init_bounce_page = (void *)__get_free_page(GFP_KERNEL);
- if (!init_bounce_page) {
- kvm_err("Couldn't allocate HYP init bounce page\n");
- err = -ENOMEM;
- goto out;
- }
-
- memcpy(init_bounce_page, __hyp_idmap_text_start, len);
- /*
- * Warning: the code we just copied to the bounce page
- * must be flushed to the point of coherency.
- * Otherwise, the data may be sitting in L2, and HYP
- * mode won't be able to observe it as it runs with
- * caches off at that point.
- */
- kvm_flush_dcache_to_poc(init_bounce_page, len);
-
- phys_base = kvm_virt_to_phys(init_bounce_page);
- hyp_idmap_vector += phys_base - hyp_idmap_start;
- hyp_idmap_start = phys_base;
- hyp_idmap_end = phys_base + len;
-
- kvm_info("Using HYP init bounce page @%lx\n",
- (unsigned long)phys_base);
- }
+ /*
+ * We rely on the linker script to ensure at build time that the HYP
+ * init code does not cross a page boundary.
+ */
+ BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
- hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
- boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
+ hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
+ boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
if (!hyp_pgd || !boot_hyp_pgd) {
kvm_err("Hyp mode PGD not allocated\n");
@@ -1073,6 +1678,17 @@ int kvm_mmu_init(void)
goto out;
}
+ if (__kvm_cpu_uses_extended_idmap()) {
+ merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+ if (!merged_hyp_pgd) {
+ kvm_err("Failed to allocate extra HYP pgd\n");
+ goto out;
+ }
+ __kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd,
+ hyp_idmap_start);
+ return 0;
+ }
+
/* Map the very same page at the trampoline VA */
err = __create_hyp_mappings(boot_hyp_pgd,
TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
@@ -1100,3 +1716,211 @@ out:
free_hyp_pgds();
return err;
}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ enum kvm_mr_change change)
+{
+ /*
+ * At this point memslot has been committed and there is an
+ * allocated dirty_bitmap[], dirty pages will be be tracked while the
+ * memory slot is write protected.
+ */
+ if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
+ kvm_mmu_wp_memory_region(kvm, mem->slot);
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ struct kvm_userspace_memory_region *mem,
+ enum kvm_mr_change change)
+{
+ hva_t hva = mem->userspace_addr;
+ hva_t reg_end = hva + mem->memory_size;
+ bool writable = !(mem->flags & KVM_MEM_READONLY);
+ int ret = 0;
+
+ if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
+ change != KVM_MR_FLAGS_ONLY)
+ return 0;
+
+ /*
+ * Prevent userspace from creating a memory region outside of the IPA
+ * space addressable by the KVM guest IPA space.
+ */
+ if (memslot->base_gfn + memslot->npages >=
+ (KVM_PHYS_SIZE >> PAGE_SHIFT))
+ return -EFAULT;
+
+ /*
+ * A memory region could potentially cover multiple VMAs, and any holes
+ * between them, so iterate over all of them to find out if we can map
+ * any of them right now.
+ *
+ * +--------------------------------------------+
+ * +---------------+----------------+ +----------------+
+ * | : VMA 1 | VMA 2 | | VMA 3 : |
+ * +---------------+----------------+ +----------------+
+ * | memory region |
+ * +--------------------------------------------+
+ */
+ do {
+ struct vm_area_struct *vma = find_vma(current->mm, hva);
+ hva_t vm_start, vm_end;
+
+ if (!vma || vma->vm_start >= reg_end)
+ break;
+
+ /*
+ * Mapping a read-only VMA is only allowed if the
+ * memory region is configured as read-only.
+ */
+ if (writable && !(vma->vm_flags & VM_WRITE)) {
+ ret = -EPERM;
+ break;
+ }
+
+ /*
+ * Take the intersection of this VMA with the memory region
+ */
+ vm_start = max(hva, vma->vm_start);
+ vm_end = min(reg_end, vma->vm_end);
+
+ if (vma->vm_flags & VM_PFNMAP) {
+ gpa_t gpa = mem->guest_phys_addr +
+ (vm_start - mem->userspace_addr);
+ phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) +
+ vm_start - vma->vm_start;
+
+ /* IO region dirty page logging not allowed */
+ if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES)
+ return -EINVAL;
+
+ ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
+ vm_end - vm_start,
+ writable);
+ if (ret)
+ break;
+ }
+ hva = vm_end;
+ } while (hva < reg_end);
+
+ if (change == KVM_MR_FLAGS_ONLY)
+ return ret;
+
+ spin_lock(&kvm->mmu_lock);
+ if (ret)
+ unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
+ else
+ stage2_flush_memslot(kvm, memslot);
+ spin_unlock(&kvm->mmu_lock);
+ return ret;
+}
+
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+}
+
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages)
+{
+ /*
+ * Readonly memslots are not incoherent with the caches by definition,
+ * but in practice, they are used mostly to emulate ROMs or NOR flashes
+ * that the guest may consider devices and hence map as uncached.
+ * To prevent incoherency issues in these cases, tag all readonly
+ * regions as incoherent.
+ */
+ if (slot->flags & KVM_MEM_READONLY)
+ slot->flags |= KVM_MEMSLOT_INCOHERENT;
+ return 0;
+}
+
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot)
+{
+ gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
+ phys_addr_t size = slot->npages << PAGE_SHIFT;
+
+ spin_lock(&kvm->mmu_lock);
+ unmap_stage2_range(kvm, gpa, size);
+ spin_unlock(&kvm->mmu_lock);
+}
+
+/*
+ * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
+ *
+ * Main problems:
+ * - S/W ops are local to a CPU (not broadcast)
+ * - We have line migration behind our back (speculation)
+ * - System caches don't support S/W at all (damn!)
+ *
+ * In the face of the above, the best we can do is to try and convert
+ * S/W ops to VA ops. Because the guest is not allowed to infer the
+ * S/W to PA mapping, it can only use S/W to nuke the whole cache,
+ * which is a rather good thing for us.
+ *
+ * Also, it is only used when turning caches on/off ("The expected
+ * usage of the cache maintenance instructions that operate by set/way
+ * is associated with the cache maintenance instructions associated
+ * with the powerdown and powerup of caches, if this is required by
+ * the implementation.").
+ *
+ * We use the following policy:
+ *
+ * - If we trap a S/W operation, we enable VM trapping to detect
+ * caches being turned on/off, and do a full clean.
+ *
+ * - We flush the caches on both caches being turned on and off.
+ *
+ * - Once the caches are enabled, we stop trapping VM ops.
+ */
+void kvm_set_way_flush(struct kvm_vcpu *vcpu)
+{
+ unsigned long hcr = vcpu_get_hcr(vcpu);
+
+ /*
+ * If this is the first time we do a S/W operation
+ * (i.e. HCR_TVM not set) flush the whole memory, and set the
+ * VM trapping.
+ *
+ * Otherwise, rely on the VM trapping to wait for the MMU +
+ * Caches to be turned off. At that point, we'll be able to
+ * clean the caches again.
+ */
+ if (!(hcr & HCR_TVM)) {
+ trace_kvm_set_way_flush(*vcpu_pc(vcpu),
+ vcpu_has_cache_enabled(vcpu));
+ stage2_flush_vm(vcpu->kvm);
+ vcpu_set_hcr(vcpu, hcr | HCR_TVM);
+ }
+}
+
+void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
+{
+ bool now_enabled = vcpu_has_cache_enabled(vcpu);
+
+ /*
+ * If switching the MMU+caches on, need to invalidate the caches.
+ * If switching it off, need to clean the caches.
+ * Clean + invalidate does the trick always.
+ */
+ if (now_enabled != was_enabled)
+ stage2_flush_vm(vcpu->kvm);
+
+ /* Caches are now on, stop trapping VM ops (until a S/W op) */
+ if (now_enabled)
+ vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM);
+
+ trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
+}
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 09cf37737ee2..02fa8eff6ae1 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -15,12 +15,14 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/preempt.h>
#include <linux/kvm_host.h>
#include <linux/wait.h>
#include <asm/cputype.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_psci.h>
+#include <asm/kvm_host.h>
/*
* This is an implementation of the Power State Coordination Interface
@@ -65,25 +67,17 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
{
struct kvm *kvm = source_vcpu->kvm;
- struct kvm_vcpu *vcpu = NULL, *tmp;
+ struct kvm_vcpu *vcpu = NULL;
wait_queue_head_t *wq;
unsigned long cpu_id;
unsigned long context_id;
- unsigned long mpidr;
phys_addr_t target_pc;
- int i;
- cpu_id = *vcpu_reg(source_vcpu, 1);
+ cpu_id = *vcpu_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK;
if (vcpu_mode_is_32bit(source_vcpu))
cpu_id &= ~((u32) 0);
- kvm_for_each_vcpu(i, tmp, kvm) {
- mpidr = kvm_vcpu_get_mpidr(tmp);
- if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) {
- vcpu = tmp;
- break;
- }
- }
+ vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id);
/*
* Make sure the caller requested a valid CPU and that the CPU is
@@ -154,7 +148,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
* then ON else OFF
*/
kvm_for_each_vcpu(i, tmp, kvm) {
- mpidr = kvm_vcpu_get_mpidr(tmp);
+ mpidr = kvm_vcpu_get_mpidr_aff(tmp);
if (((mpidr & target_affinity_mask) == target_affinity) &&
!tmp->arch.pause) {
return PSCI_0_2_AFFINITY_LEVEL_ON;
@@ -166,6 +160,23 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
{
+ int i;
+ struct kvm_vcpu *tmp;
+
+ /*
+ * The KVM ABI specifies that a system event exit may call KVM_RUN
+ * again and may perform shutdown/reboot at a later time that when the
+ * actual request is made. Since we are implementing PSCI and a
+ * caller of PSCI reboot and shutdown expects that the system shuts
+ * down or reboots immediately, let's make sure that VCPUs are not run
+ * after this call is handled and before the VCPUs have been
+ * re-initialized.
+ */
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+ tmp->arch.pause = true;
+ kvm_vcpu_kick(tmp);
+ }
+
memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
vcpu->run->system_event.type = type;
vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index b1d640f78623..0ec35392d208 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -25,18 +25,22 @@ TRACE_EVENT(kvm_entry,
);
TRACE_EVENT(kvm_exit,
- TP_PROTO(unsigned long vcpu_pc),
- TP_ARGS(vcpu_pc),
+ TP_PROTO(unsigned int exit_reason, unsigned long vcpu_pc),
+ TP_ARGS(exit_reason, vcpu_pc),
TP_STRUCT__entry(
+ __field( unsigned int, exit_reason )
__field( unsigned long, vcpu_pc )
),
TP_fast_assign(
+ __entry->exit_reason = exit_reason;
__entry->vcpu_pc = vcpu_pc;
),
- TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
+ TP_printk("HSR_EC: 0x%04x, PC: 0x%08lx",
+ __entry->exit_reason,
+ __entry->vcpu_pc)
);
TRACE_EVENT(kvm_guest_fault,
@@ -64,6 +68,21 @@ TRACE_EVENT(kvm_guest_fault,
__entry->hxfar, __entry->vcpu_pc)
);
+TRACE_EVENT(kvm_access_fault,
+ TP_PROTO(unsigned long ipa),
+ TP_ARGS(ipa),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, ipa )
+ ),
+
+ TP_fast_assign(
+ __entry->ipa = ipa;
+ ),
+
+ TP_printk("IPA: %lx", __entry->ipa)
+);
+
TRACE_EVENT(kvm_irq_line,
TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level),
TP_ARGS(type, vcpu_idx, irq_num, level),
@@ -140,19 +159,22 @@ TRACE_EVENT(kvm_emulate_cp15_imp,
__entry->CRm, __entry->Op2)
);
-TRACE_EVENT(kvm_wfi,
- TP_PROTO(unsigned long vcpu_pc),
- TP_ARGS(vcpu_pc),
+TRACE_EVENT(kvm_wfx,
+ TP_PROTO(unsigned long vcpu_pc, bool is_wfe),
+ TP_ARGS(vcpu_pc, is_wfe),
TP_STRUCT__entry(
__field( unsigned long, vcpu_pc )
+ __field( bool, is_wfe )
),
TP_fast_assign(
__entry->vcpu_pc = vcpu_pc;
+ __entry->is_wfe = is_wfe;
),
- TP_printk("guest executed wfi at: 0x%08lx", __entry->vcpu_pc)
+ TP_printk("guest executed wf%c at: 0x%08lx",
+ __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
);
TRACE_EVENT(kvm_unmap_hva,
@@ -203,6 +225,39 @@ TRACE_EVENT(kvm_set_spte_hva,
TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
);
+TRACE_EVENT(kvm_age_hva,
+ TP_PROTO(unsigned long start, unsigned long end),
+ TP_ARGS(start, end),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, start )
+ __field( unsigned long, end )
+ ),
+
+ TP_fast_assign(
+ __entry->start = start;
+ __entry->end = end;
+ ),
+
+ TP_printk("mmu notifier age hva: %#08lx -- %#08lx",
+ __entry->start, __entry->end)
+);
+
+TRACE_EVENT(kvm_test_age_hva,
+ TP_PROTO(unsigned long hva),
+ TP_ARGS(hva),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, hva )
+ ),
+
+ TP_fast_assign(
+ __entry->hva = hva;
+ ),
+
+ TP_printk("mmu notifier test age hva: %#08lx", __entry->hva)
+);
+
TRACE_EVENT(kvm_hvc,
TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
TP_ARGS(vcpu_pc, r0, imm),
@@ -223,6 +278,45 @@ TRACE_EVENT(kvm_hvc,
__entry->vcpu_pc, __entry->r0, __entry->imm)
);
+TRACE_EVENT(kvm_set_way_flush,
+ TP_PROTO(unsigned long vcpu_pc, bool cache),
+ TP_ARGS(vcpu_pc, cache),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, vcpu_pc )
+ __field( bool, cache )
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_pc = vcpu_pc;
+ __entry->cache = cache;
+ ),
+
+ TP_printk("S/W flush at 0x%016lx (cache %s)",
+ __entry->vcpu_pc, __entry->cache ? "on" : "off")
+);
+
+TRACE_EVENT(kvm_toggle_cache,
+ TP_PROTO(unsigned long vcpu_pc, bool was, bool now),
+ TP_ARGS(vcpu_pc, was, now),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, vcpu_pc )
+ __field( bool, was )
+ __field( bool, now )
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_pc = vcpu_pc;
+ __entry->was = was;
+ __entry->now = now;
+ ),
+
+ TP_printk("VM op at 0x%016lx (cache was %s, now %s)",
+ __entry->vcpu_pc, __entry->was ? "on" : "off",
+ __entry->now ? "on" : "off")
+);
+
#endif /* _TRACE_KVM_H */
#undef TRACE_INCLUDE_PATH