From 07a9748c78cfc39b54f06125a216b67b9c8f09ed Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 10 Nov 2014 08:33:55 +0000 Subject: arm/arm64: kvm: drop inappropriate use of kvm_is_mmio_pfn() Instead of using kvm_is_mmio_pfn() to decide whether a host region should be stage 2 mapped with device attributes, add a new static function kvm_is_device_pfn() that disregards RAM pages with the reserved bit set, as those should usually not be mapped as device memory. Signed-off-by: Ard Biesheuvel Signed-off-by: Marc Zyngier --- arch/arm/kvm/mmu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 57a403a5c22b..b007438242e2 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -834,6 +834,11 @@ static bool kvm_is_write_fault(struct kvm_vcpu *vcpu) return kvm_vcpu_dabt_iswrite(vcpu); } +static bool kvm_is_device_pfn(unsigned long pfn) +{ + return !pfn_valid(pfn); +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -904,7 +909,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (is_error_pfn(pfn)) return -EFAULT; - if (kvm_is_mmio_pfn(pfn)) + if (kvm_is_device_pfn(pfn)) mem_type = PAGE_S2_DEVICE; spin_lock(&kvm->mmu_lock); -- cgit v1.2.3 From 840f4bfbe03f1ce94ade8fdf84e8cd925ef15a48 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Mon, 17 Nov 2014 14:58:52 +0000 Subject: arm, arm64: KVM: allow forced dcache flush on page faults To allow handling of incoherent memslots in a subsequent patch, this patch adds a paramater 'ipa_uncached' to cache_coherent_guest_page() so that we can instruct it to flush the page's contents to DRAM even if the guest has caching globally enabled. Signed-off-by: Laszlo Ersek Signed-off-by: Ard Biesheuvel Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_mmu.h | 5 +++-- arch/arm/kvm/mmu.c | 9 +++++++-- arch/arm64/include/asm/kvm_mmu.h | 5 +++-- 3 files changed, 13 insertions(+), 6 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index acb0d5712716..f867060035ec 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -161,9 +161,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) } static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, - unsigned long size) + unsigned long size, + bool ipa_uncached) { - if (!vcpu_has_cache_enabled(vcpu)) + if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached) kvm_flush_dcache_to_poc((void *)hva, size); /* diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index b007438242e2..cb924c6d56a6 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -852,6 +852,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct vm_area_struct *vma; pfn_t pfn; pgprot_t mem_type = PAGE_S2; + bool fault_ipa_uncached; write_fault = kvm_is_write_fault(vcpu); if (fault_status == FSC_PERM && !write_fault) { @@ -918,6 +919,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (!hugetlb && !force_pte) hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); + fault_ipa_uncached = false; + if (hugetlb) { pmd_t new_pmd = pfn_pmd(pfn, mem_type); new_pmd = pmd_mkhuge(new_pmd); @@ -925,7 +928,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_s2pmd_writable(&new_pmd); kvm_set_pfn_dirty(pfn); } - coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE); + coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE, + fault_ipa_uncached); ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); } else { pte_t new_pte = pfn_pte(pfn, mem_type); @@ -933,7 +937,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_s2pte_writable(&new_pte); kvm_set_pfn_dirty(pfn); } - coherent_cache_guest_page(vcpu, hva, PAGE_SIZE); + coherent_cache_guest_page(vcpu, hva, PAGE_SIZE, + fault_ipa_uncached); ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); } diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 0caf7a59f6a1..123b521a9908 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -243,9 +243,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) } static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, - unsigned long size) + unsigned long size, + bool ipa_uncached) { - if (!vcpu_has_cache_enabled(vcpu)) + if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached) kvm_flush_dcache_to_poc((void *)hva, size); if (!icache_is_aliasing()) { /* PIPT */ -- cgit v1.2.3 From 849260c72c6b8bd53850cb00b80027db3a273c2c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 17 Nov 2014 14:58:53 +0000 Subject: arm, arm64: KVM: handle potential incoherency of readonly memslots Readonly memslots are often used to implement emulation of ROMs and NOR flashes, in which case the guest may legally map these regions as uncached. To deal with the incoherency associated with uncached guest mappings, treat all readonly memslots as incoherent, and ensure that pages that belong to regions tagged as such are flushed to DRAM before being passed to the guest. Signed-off-by: Ard Biesheuvel Signed-off-by: Marc Zyngier --- arch/arm/kvm/mmu.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index cb924c6d56a6..f2a9874ff5cb 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -919,7 +919,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (!hugetlb && !force_pte) hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); - fault_ipa_uncached = false; + fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT; if (hugetlb) { pmd_t new_pmd = pfn_pmd(pfn, mem_type); @@ -1298,11 +1298,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, hva = vm_end; } while (hva < reg_end); - if (ret) { - spin_lock(&kvm->mmu_lock); + spin_lock(&kvm->mmu_lock); + if (ret) unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); - spin_unlock(&kvm->mmu_lock); - } + else + stage2_flush_memslot(kvm, memslot); + spin_unlock(&kvm->mmu_lock); return ret; } @@ -1314,6 +1315,15 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { + /* + * Readonly memslots are not incoherent with the caches by definition, + * but in practice, they are used mostly to emulate ROMs or NOR flashes + * that the guest may consider devices and hence map as uncached. + * To prevent incoherency issues in these cases, tag all readonly + * regions as incoherent. + */ + if (slot->flags & KVM_MEM_READONLY) + slot->flags |= KVM_MEMSLOT_INCOHERENT; return 0; } -- cgit v1.2.3 From 5100f9833e1881c800bc088e70afa4b9a1409f51 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 6 Nov 2014 12:11:45 +0000 Subject: arm/arm64: KVM: avoid unnecessary guest register mangling on MMIO read Currently we mangle the endianness of the guest's register even on an MMIO _read_, where it is completely useless, because we will not use the value of that register. Rework the io_mem_abort() function to clearly separate between reads and writes and only do the endianness mangling on MMIO writes. Signed-off-by: Andre Przywara Signed-off-by: Marc Zyngier --- arch/arm/kvm/mmio.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 4cb5a93182e9..5d3bfc0eb3f0 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -187,15 +187,18 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, } rt = vcpu->arch.mmio_decode.rt; - data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len); - trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE : - KVM_TRACE_MMIO_READ_UNSATISFIED, - mmio.len, fault_ipa, - (mmio.is_write) ? data : 0); + if (mmio.is_write) { + data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), + mmio.len); - if (mmio.is_write) + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len, + fault_ipa, data); mmio_write_buf(mmio.data, mmio.len, data); + } else { + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len, + fault_ipa, 0); + } if (vgic_handle_mmio(vcpu, run, &mmio)) return 1; -- cgit v1.2.3 From 03f1d4c17edb31b41b14ca3a749ae38d2dd6639d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 2 Dec 2014 15:27:51 +0100 Subject: arm/arm64: KVM: Don't clear the VCPU_POWER_OFF flag If a VCPU was originally started with power off (typically to be brought up by PSCI in SMP configurations), there is no need to clear the POWER_OFF flag in the kernel, as this flag is only tested during the init ioctl itself. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9e193c8a959e..b160beadc956 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -661,7 +661,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, /* * Handle the "start in power-off" case by marking the VCPU as paused. */ - if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) + if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) vcpu->arch.pause = true; return 0; -- cgit v1.2.3 From 3ad8b3de526a76fbe9466b366059e4958957b88f Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 16 Oct 2014 16:14:43 +0200 Subject: arm/arm64: KVM: Correct KVM_ARM_VCPU_INIT power off option The implementation of KVM_ARM_VCPU_INIT is currently not doing what userspace expects, namely making sure that a vcpu which may have been turned off using PSCI is returned to its initial state, which would be powered on if userspace does not set the KVM_ARM_VCPU_POWER_OFF flag. Implement the expected functionality and clarify the ABI. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 3 ++- arch/arm/kvm/arm.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 7610eaa4d491..bb82a906e51e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2455,7 +2455,8 @@ should be created before this ioctl is invoked. Possible features: - KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state. - Depends on KVM_CAP_ARM_PSCI. + Depends on KVM_CAP_ARM_PSCI. If not set, the CPU will be powered on + and execute guest code when KVM_RUN is called. - KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode. Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU. diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index b160beadc956..edc196412abe 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -663,6 +663,8 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, */ if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) vcpu->arch.pause = true; + else + vcpu->arch.pause = false; return 0; } -- cgit v1.2.3 From b856a59141b1066d3c896a0d0231f84dabd040af Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 16 Oct 2014 17:21:16 +0200 Subject: arm/arm64: KVM: Reset the HCR on each vcpu when resetting the vcpu When userspace resets the vcpu using KVM_ARM_VCPU_INIT, we should also reset the HCR, because we now modify the HCR dynamically to enable/disable trapping of guest accesses to the VM registers. This is crucial for reboot of VMs working since otherwise we will not be doing the necessary cache maintenance operations when faulting in pages with the guest MMU off. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_emulate.h | 5 +++++ arch/arm/kvm/arm.c | 2 ++ arch/arm/kvm/guest.c | 1 - arch/arm64/include/asm/kvm_emulate.h | 5 +++++ arch/arm64/kvm/guest.c | 1 - 5 files changed, 12 insertions(+), 2 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index b9db269c6e61..66ce17655bb9 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -33,6 +33,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr = HCR_GUEST_MASK; +} + static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) { return 1; diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index edc196412abe..24c9ca4076b2 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -658,6 +658,8 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, if (ret) return ret; + vcpu_reset_hcr(vcpu); + /* * Handle the "start in power-off" case by marking the VCPU as paused. */ diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index cc0b78769bd8..8c97208b9b97 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { - vcpu->arch.hcr = HCR_GUEST_MASK; return 0; } diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5674a55b5518..8127e45e2637 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -38,6 +38,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; +} + static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) { return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 76794692c20b..84d5959ff874 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { - vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; return 0; } -- cgit v1.2.3 From f7fa034dc8559c7d7326bfc8bd1a26175abd931a Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 16 Oct 2014 16:40:53 +0200 Subject: arm/arm64: KVM: Clarify KVM_ARM_VCPU_INIT ABI It is not clear that this ioctl can be called multiple times for a given vcpu. Userspace already does this, so clarify the ABI. Also specify that userspace is expected to always make secondary and subsequent calls to the ioctl with the same parameters for the VCPU as the initial call (which userspace also already does). Add code to check that userspace doesn't violate that ABI in the future, and move the kvm_vcpu_set_target() function which is currently duplicated between the 32-bit and 64-bit versions in guest.c to a common static function in arm.c, shared between both architectures. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 5 +++++ arch/arm/include/asm/kvm_host.h | 2 -- arch/arm/kvm/arm.c | 43 +++++++++++++++++++++++++++++++++++++++ arch/arm/kvm/guest.c | 25 ----------------------- arch/arm64/include/asm/kvm_host.h | 2 -- arch/arm64/kvm/guest.c | 25 ----------------------- 6 files changed, 48 insertions(+), 54 deletions(-) (limited to 'arch/arm') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index bb82a906e51e..81f1b974c06a 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2453,6 +2453,11 @@ return ENOEXEC for that vcpu. Note that because some registers reflect machine topology, all vcpus should be created before this ioctl is invoked. +Userspace can call this function multiple times for a given vcpu, including +after the vcpu has been run. This will reset the vcpu to its initial +state. All calls to this function after the initial call must use the same +target and same set of feature flags, otherwise EINVAL will be returned. + Possible features: - KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state. Depends on KVM_CAP_ARM_PSCI. If not set, the CPU will be powered on diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 53036e21756b..254e0650e48b 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -150,8 +150,6 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, - const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 24c9ca4076b2..4043769583e7 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -263,6 +263,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { /* Force users to call KVM_ARM_VCPU_INIT */ vcpu->arch.target = -1; + bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); /* Set up the timer */ kvm_timer_vcpu_init(vcpu); @@ -649,6 +650,48 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, return -EINVAL; } +static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init) +{ + unsigned int i; + int phys_target = kvm_target_cpu(); + + if (init->target != phys_target) + return -EINVAL; + + /* + * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must + * use the same target. + */ + if (vcpu->arch.target != -1 && vcpu->arch.target != init->target) + return -EINVAL; + + /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ + for (i = 0; i < sizeof(init->features) * 8; i++) { + bool set = (init->features[i / 32] & (1 << (i % 32))); + + if (set && i >= KVM_VCPU_MAX_FEATURES) + return -ENOENT; + + /* + * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must + * use the same feature set. + */ + if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES && + test_bit(i, vcpu->arch.features) != set) + return -EINVAL; + + if (set) + set_bit(i, vcpu->arch.features); + } + + vcpu->arch.target = phys_target; + + /* Now we know what it is, we can reset it. */ + return kvm_reset_vcpu(vcpu); +} + + static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) { diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 8c97208b9b97..384bab67c462 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -273,31 +273,6 @@ int __attribute_const__ kvm_target_cpu(void) } } -int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, - const struct kvm_vcpu_init *init) -{ - unsigned int i; - - /* We can only cope with guest==host and only on A15/A7 (for now). */ - if (init->target != kvm_target_cpu()) - return -EINVAL; - - vcpu->arch.target = init->target; - bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); - - /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ - for (i = 0; i < sizeof(init->features) * 8; i++) { - if (test_bit(i, (void *)init->features)) { - if (i >= KVM_VCPU_MAX_FEATURES) - return -ENOENT; - set_bit(i, vcpu->arch.features); - } - } - - /* Now we know what it is, we can reset it. */ - return kvm_reset_vcpu(vcpu); -} - int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) { int target = kvm_target_cpu(); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2012c4ba8d67..65c615294589 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -165,8 +165,6 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, - const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 84d5959ff874..9535bd555d1d 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -296,31 +296,6 @@ int __attribute_const__ kvm_target_cpu(void) return -EINVAL; } -int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, - const struct kvm_vcpu_init *init) -{ - unsigned int i; - int phys_target = kvm_target_cpu(); - - if (init->target != phys_target) - return -EINVAL; - - vcpu->arch.target = phys_target; - bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); - - /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ - for (i = 0; i < sizeof(init->features) * 8; i++) { - if (init->features[i / 32] & (1 << (i % 32))) { - if (i >= KVM_VCPU_MAX_FEATURES) - return -ENOENT; - set_bit(i, vcpu->arch.features); - } - } - - /* Now we know what it is, we can reset it. */ - return kvm_reset_vcpu(vcpu); -} - int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) { int target = kvm_target_cpu(); -- cgit v1.2.3 From cf5d318865e25f887d49a0c6083bbc6dcd1905b1 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 16 Oct 2014 17:00:18 +0200 Subject: arm/arm64: KVM: Turn off vcpus on PSCI shutdown/reboot When a vcpu calls SYSTEM_OFF or SYSTEM_RESET with PSCI v0.2, the vcpus should really be turned off for the VM adhering to the suggestions in the PSCI spec, and it's the sane thing to do. Also, clarify the behavior and expectations for exits to user space with the KVM_EXIT_SYSTEM_EVENT case. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 9 +++++++++ arch/arm/kvm/psci.c | 18 ++++++++++++++++++ arch/arm64/include/asm/kvm_host.h | 1 + 3 files changed, 28 insertions(+) (limited to 'arch/arm') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 81f1b974c06a..228f9cf5a5b5 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2957,6 +2957,15 @@ HVC instruction based PSCI call from the vcpu. The 'type' field describes the system-level event type. The 'flags' field describes architecture specific flags for the system-level event. +Valid values for 'type' are: + KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the + VM. Userspace is not obliged to honour this, and if it does honour + this does not need to destroy the VM synchronously (ie it may call + KVM_RUN again before shutdown finally occurs). + KVM_SYSTEM_EVENT_RESET -- the guest has requested a reset of the VM. + As with SHUTDOWN, userspace can choose to ignore the request, or + to schedule the reset to occur in the future and may call KVM_RUN again. + /* Fix the size of the union. */ char padding[256]; }; diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 09cf37737ee2..58cb3248d277 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -15,6 +15,7 @@ * along with this program. If not, see . */ +#include #include #include @@ -166,6 +167,23 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) { + int i; + struct kvm_vcpu *tmp; + + /* + * The KVM ABI specifies that a system event exit may call KVM_RUN + * again and may perform shutdown/reboot at a later time that when the + * actual request is made. Since we are implementing PSCI and a + * caller of PSCI reboot and shutdown expects that the system shuts + * down or reboots immediately, let's make sure that VCPUs are not run + * after this call is handled and before the VCPUs have been + * re-initialized. + */ + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + tmp->arch.pause = true; + kvm_vcpu_kick(tmp); + } + memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); vcpu->run->system_event.type = type; vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 65c615294589..0b7dfdb931df 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -198,6 +198,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); u64 kvm_call_hyp(void *hypfn, ...); +void force_vm_exit(const cpumask_t *mask); int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); -- cgit v1.2.3 From 957db105c99792ae8ef61ffc9ae77d910f6471da Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 27 Nov 2014 10:35:03 +0100 Subject: arm/arm64: KVM: Introduce stage2_unmap_vm Introduce a new function to unmap user RAM regions in the stage2 page tables. This is needed on reboot (or when the guest turns off the MMU) to ensure we fault in pages again and make the dcache, RAM, and icache coherent. Using unmap_stage2_range for the whole guest physical range does not work, because that unmaps IO regions (such as the GIC) which will not be recreated or in the best case faulted in on a page-by-page basis. Call this function on secondary and subsequent calls to the KVM_ARM_VCPU_INIT ioctl so that a reset VCPU will detect the guest Stage-1 MMU is off when faulting in pages and make the caches coherent. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 1 + arch/arm/kvm/arm.c | 7 +++++ arch/arm/kvm/mmu.c | 65 ++++++++++++++++++++++++++++++++++++++++ arch/arm64/include/asm/kvm_mmu.h | 1 + 4 files changed, 74 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index f867060035ec..63e0ecc04901 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -52,6 +52,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t); void free_boot_hyp_pgd(void); void free_hyp_pgds(void); +void stage2_unmap_vm(struct kvm *kvm); int kvm_alloc_stage2_pgd(struct kvm *kvm); void kvm_free_stage2_pgd(struct kvm *kvm); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 4043769583e7..da87c07d8577 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -701,6 +701,13 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, if (ret) return ret; + /* + * Ensure a rebooted VM will fault in RAM pages and detect if the + * guest MMU is turned off and flush the caches as needed. + */ + if (vcpu->arch.has_run_once) + stage2_unmap_vm(vcpu->kvm); + vcpu_reset_hcr(vcpu); /* diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index f2a9874ff5cb..3756dd3e85c2 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -611,6 +611,71 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) unmap_range(kvm, kvm->arch.pgd, start, size); } +static void stage2_unmap_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + hva_t hva = memslot->userspace_addr; + phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; + phys_addr_t size = PAGE_SIZE * memslot->npages; + hva_t reg_end = hva + size; + + /* + * A memory region could potentially cover multiple VMAs, and any holes + * between them, so iterate over all of them to find out if we should + * unmap any of them. + * + * +--------------------------------------------+ + * +---------------+----------------+ +----------------+ + * | : VMA 1 | VMA 2 | | VMA 3 : | + * +---------------+----------------+ +----------------+ + * | memory region | + * +--------------------------------------------+ + */ + do { + struct vm_area_struct *vma = find_vma(current->mm, hva); + hva_t vm_start, vm_end; + + if (!vma || vma->vm_start >= reg_end) + break; + + /* + * Take the intersection of this VMA with the memory region + */ + vm_start = max(hva, vma->vm_start); + vm_end = min(reg_end, vma->vm_end); + + if (!(vma->vm_flags & VM_PFNMAP)) { + gpa_t gpa = addr + (vm_start - memslot->userspace_addr); + unmap_stage2_range(kvm, gpa, vm_end - vm_start); + } + hva = vm_end; + } while (hva < reg_end); +} + +/** + * stage2_unmap_vm - Unmap Stage-2 RAM mappings + * @kvm: The struct kvm pointer + * + * Go through the memregions and unmap any reguler RAM + * backing memory already mapped to the VM. + */ +void stage2_unmap_vm(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + spin_lock(&kvm->mmu_lock); + + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) + stage2_unmap_memslot(kvm, memslot); + + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); +} + /** * kvm_free_stage2_pgd - free all stage-2 tables * @kvm: The KVM struct pointer for the VM. diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 123b521a9908..14a74f136272 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -83,6 +83,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t); void free_boot_hyp_pgd(void); void free_hyp_pgds(void); +void stage2_unmap_vm(struct kvm *kvm); int kvm_alloc_stage2_pgd(struct kvm *kvm); void kvm_free_stage2_pgd(struct kvm *kvm); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, -- cgit v1.2.3 From 6d3cfbe21bef5b66530b50ad16c88fdc71a04c35 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 4 Dec 2014 15:02:24 +0000 Subject: arm/arm64: KVM: vgic: move reset initialization into vgic_init_maps() VGIC initialization currently happens in three phases: (1) kvm_vgic_create() (triggered by userspace GIC creation) (2) vgic_init_maps() (triggered by userspace GIC register read/write requests, or from kvm_vgic_init() if not already run) (3) kvm_vgic_init() (triggered by first VM run) We were doing initialization of some state to correspond with the state of a freshly-reset GIC in kvm_vgic_init(); this is too late, since it will overwrite changes made by userspace using the register access APIs before the VM is run. Move this initialization earlier, into the vgic_init_maps() phase. This fixes a bug where QEMU could successfully restore a saved VM state snapshot into a VM that had already been run, but could not restore it "from cold" using the -loadvm command line option (the symptoms being that the restored VM would run but interrupts were ignored). Finally rename vgic_init_maps to vgic_init and renamed kvm_vgic_init to kvm_vgic_map_resources. [ This patch is originally written by Peter Maydell, but I have modified it somewhat heavily, renaming various bits and moving code around. If something is broken, I am to be blamed. - Christoffer ] Acked-by: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Peter Maydell Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 6 ++-- include/kvm/arm_vgic.h | 4 +-- virt/kvm/arm/vgic.c | 77 +++++++++++++++++++++----------------------------- 3 files changed, 37 insertions(+), 50 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index da87c07d8577..fa4b97c5746c 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -428,11 +428,11 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) vcpu->arch.has_run_once = true; /* - * Initialize the VGIC before running a vcpu the first time on - * this VM. + * Map the VGIC hardware resources before running a vcpu the first + * time on this VM. */ if (unlikely(!vgic_initialized(vcpu->kvm))) { - ret = kvm_vgic_init(vcpu->kvm); + ret = kvm_vgic_map_resources(vcpu->kvm); if (ret) return ret; } diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 206dcc3b3f7a..fe9783ba924c 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -274,7 +274,7 @@ struct kvm_exit_mmio; #ifdef CONFIG_KVM_ARM_VGIC int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); int kvm_vgic_hyp_init(void); -int kvm_vgic_init(struct kvm *kvm); +int kvm_vgic_map_resources(struct kvm *kvm); int kvm_vgic_create(struct kvm *kvm); void kvm_vgic_destroy(struct kvm *kvm); void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); @@ -321,7 +321,7 @@ static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, return -ENXIO; } -static inline int kvm_vgic_init(struct kvm *kvm) +static inline int kvm_vgic_map_resources(struct kvm *kvm) { return 0; } diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 21e035cc5460..1ce4e364c1e0 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -91,6 +91,7 @@ #define ACCESS_WRITE_VALUE (3 << 1) #define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) +static int vgic_init(struct kvm *kvm); static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); static void vgic_update_state(struct kvm *kvm); @@ -1732,39 +1733,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); - vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL); + vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL); if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) { kvm_vgic_vcpu_destroy(vcpu); return -ENOMEM; } - return 0; -} - -/** - * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state - * @vcpu: pointer to the vcpu struct - * - * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to - * this vcpu and enable the VGIC for this VCPU - */ -static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int i; - - for (i = 0; i < dist->nr_irqs; i++) { - if (i < VGIC_NR_PPIS) - vgic_bitmap_set_irq_val(&dist->irq_enabled, - vcpu->vcpu_id, i, 1); - if (i < VGIC_NR_PRIVATE_IRQS) - vgic_bitmap_set_irq_val(&dist->irq_cfg, - vcpu->vcpu_id, i, VGIC_CFG_EDGE); - - vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; - } + memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs); /* * Store the number of LRs per vcpu, so we don't have to go @@ -1773,7 +1749,7 @@ static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) */ vgic_cpu->nr_lr = vgic->nr_lr; - vgic_enable(vcpu); + return 0; } void kvm_vgic_destroy(struct kvm *kvm) @@ -1810,12 +1786,12 @@ void kvm_vgic_destroy(struct kvm *kvm) * Allocate and initialize the various data structures. Must be called * with kvm->lock held! */ -static int vgic_init_maps(struct kvm *kvm) +static int vgic_init(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; int nr_cpus, nr_irqs; - int ret, i; + int ret, i, vcpu_id; if (dist->nr_cpus) /* Already allocated */ return 0; @@ -1865,16 +1841,28 @@ static int vgic_init_maps(struct kvm *kvm) if (ret) goto out; - kvm_for_each_vcpu(i, vcpu, kvm) { + for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4) + vgic_set_target_reg(kvm, 0, i); + + kvm_for_each_vcpu(vcpu_id, vcpu, kvm) { ret = vgic_vcpu_init_maps(vcpu, nr_irqs); if (ret) { kvm_err("VGIC: Failed to allocate vcpu memory\n"); break; } - } - for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4) - vgic_set_target_reg(kvm, 0, i); + for (i = 0; i < dist->nr_irqs; i++) { + if (i < VGIC_NR_PPIS) + vgic_bitmap_set_irq_val(&dist->irq_enabled, + vcpu->vcpu_id, i, 1); + if (i < VGIC_NR_PRIVATE_IRQS) + vgic_bitmap_set_irq_val(&dist->irq_cfg, + vcpu->vcpu_id, i, + VGIC_CFG_EDGE); + } + + vgic_enable(vcpu); + } out: if (ret) @@ -1884,18 +1872,16 @@ out: } /** - * kvm_vgic_init - Initialize global VGIC state before running any VCPUs + * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs * @kvm: pointer to the kvm struct * * Map the virtual CPU interface into the VM before running any VCPUs. We * can't do this at creation time, because user space must first set the - * virtual CPU interface address in the guest physical address space. Also - * initialize the ITARGETSRn regs to 0 on the emulated distributor. + * virtual CPU interface address in the guest physical address space. */ -int kvm_vgic_init(struct kvm *kvm) +int kvm_vgic_map_resources(struct kvm *kvm) { - struct kvm_vcpu *vcpu; - int ret = 0, i; + int ret = 0; if (!irqchip_in_kernel(kvm)) return 0; @@ -1912,7 +1898,11 @@ int kvm_vgic_init(struct kvm *kvm) goto out; } - ret = vgic_init_maps(kvm); + /* + * Initialize the vgic if this hasn't already been done on demand by + * accessing the vgic state from userspace. + */ + ret = vgic_init(kvm); if (ret) { kvm_err("Unable to allocate maps\n"); goto out; @@ -1926,9 +1916,6 @@ int kvm_vgic_init(struct kvm *kvm) goto out; } - kvm_for_each_vcpu(i, vcpu, kvm) - kvm_vgic_vcpu_init(vcpu); - kvm->arch.vgic.ready = true; out: if (ret) @@ -2173,7 +2160,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); - ret = vgic_init_maps(dev->kvm); + ret = vgic_init(dev->kvm); if (ret) goto out; -- cgit v1.2.3 From c52edf5f8caff878afc93c1b1e9a3d9490a9932f Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 9 Dec 2014 14:28:09 +0100 Subject: arm/arm64: KVM: Rename vgic_initialized to vgic_ready The vgic_initialized() macro currently returns the state of the vgic->ready flag, which indicates if the vgic is ready to be used when running a VM, not specifically if its internal state has been initialized. Rename the macro accordingly in preparation for a more nuanced initialization flow. Acked-by: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 2 +- include/kvm/arm_vgic.h | 4 ++-- virt/kvm/arm/vgic.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index fa4b97c5746c..c5a05f2c28ac 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -431,7 +431,7 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) * Map the VGIC hardware resources before running a vcpu the first * time on this VM. */ - if (unlikely(!vgic_initialized(vcpu->kvm))) { + if (unlikely(!vgic_ready(vcpu->kvm))) { ret = kvm_vgic_map_resources(vcpu->kvm); if (ret) return ret; diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index fe9783ba924c..3e262b9bbddf 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -287,7 +287,7 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_exit_mmio *mmio); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) -#define vgic_initialized(k) ((k)->arch.vgic.ready) +#define vgic_ready(k) ((k)->arch.vgic.ready) int vgic_v2_probe(struct device_node *vgic_node, const struct vgic_ops **ops, @@ -369,7 +369,7 @@ static inline int irqchip_in_kernel(struct kvm *kvm) return 0; } -static inline bool vgic_initialized(struct kvm *kvm) +static inline bool vgic_ready(struct kvm *kvm) { return true; } diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 1ce4e364c1e0..4edb2572ea9a 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1696,7 +1696,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, { int vcpu_id; - if (likely(vgic_initialized(kvm))) { + if (likely(vgic_ready(kvm))) { vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level); if (vcpu_id >= 0) /* kick the specified vcpu */ @@ -1888,7 +1888,7 @@ int kvm_vgic_map_resources(struct kvm *kvm) mutex_lock(&kvm->lock); - if (vgic_initialized(kvm)) + if (vgic_ready(kvm)) goto out; if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || @@ -2282,7 +2282,7 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) mutex_lock(&dev->kvm->lock); - if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs) + if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_irqs) ret = -EBUSY; else dev->kvm->arch.vgic.nr_irqs = val; -- cgit v1.2.3 From 716139df2517fbc3f2306dbe8eba0fa88dca0189 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 9 Dec 2014 14:33:45 +0100 Subject: arm/arm64: KVM: Don't allow creating VCPUs after vgic_initialized When the vgic initializes its internal state it does so based on the number of VCPUs available at the time. If we allow KVM to create more VCPUs after the VGIC has been initialized, we are likely to error out in unfortunate ways later, perform buffer overflows etc. Acked-by: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index c5a05f2c28ac..66f37c4cdf13 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -213,6 +213,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) int err; struct kvm_vcpu *vcpu; + if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) { + err = -EBUSY; + goto out; + } + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); if (!vcpu) { err = -ENOMEM; -- cgit v1.2.3 From 05971120fca43e0357789a14b3386bb56eef2201 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 12 Dec 2014 21:19:23 +0100 Subject: arm/arm64: KVM: Require in-kernel vgic for the arch timers It is curently possible to run a VM with architected timers support without creating an in-kernel VGIC, which will result in interrupts from the virtual timer going nowhere. To address this issue, move the architected timers initialization to the time when we run a VCPU for the first time, and then only initialize (and enable) the architected timers if we have a properly created and initialized in-kernel VGIC. When injecting interrupts from the virtual timer to the vgic, the current setup should ensure that this never calls an on-demand init of the VGIC, which is the only call path that could return an error from kvm_vgic_inject_irq(), so capture the return value and raise a warning if there's an error there. We also change the kvm_timer_init() function from returning an int to be a void function, since the function always succeeds. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 13 +++++++++++-- include/kvm/arm_arch_timer.h | 10 ++++------ virt/kvm/arm/arch_timer.c | 30 ++++++++++++++++++++++-------- 3 files changed, 37 insertions(+), 16 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 66f37c4cdf13..2d6d91001062 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -425,6 +425,7 @@ static void update_vttbr(struct kvm *kvm) static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) { + struct kvm *kvm = vcpu->kvm; int ret; if (likely(vcpu->arch.has_run_once)) @@ -436,12 +437,20 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) * Map the VGIC hardware resources before running a vcpu the first * time on this VM. */ - if (unlikely(!vgic_ready(vcpu->kvm))) { - ret = kvm_vgic_map_resources(vcpu->kvm); + if (unlikely(!vgic_ready(kvm))) { + ret = kvm_vgic_map_resources(kvm); if (ret) return ret; } + /* + * Enable the arch timers only if we have an in-kernel VGIC + * and it has been properly initialized, since we cannot handle + * interrupts from the virtual timer with a userspace gic. + */ + if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) + kvm_timer_enable(kvm); + return 0; } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index ad9db6045b2f..b3f45a578344 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -60,7 +60,8 @@ struct arch_timer_cpu { #ifdef CONFIG_KVM_ARM_TIMER int kvm_timer_hyp_init(void); -int kvm_timer_init(struct kvm *kvm); +void kvm_timer_enable(struct kvm *kvm); +void kvm_timer_init(struct kvm *kvm); void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, const struct kvm_irq_level *irq); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); @@ -77,11 +78,8 @@ static inline int kvm_timer_hyp_init(void) return 0; }; -static inline int kvm_timer_init(struct kvm *kvm) -{ - return 0; -} - +static inline void kvm_timer_enable(struct kvm *kvm) {} +static inline void kvm_timer_init(struct kvm *kvm) {} static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, const struct kvm_irq_level *irq) {} static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 22fa819a9b6a..1c0772b340d8 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -61,12 +61,14 @@ static void timer_disarm(struct arch_timer_cpu *timer) static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) { + int ret; struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; - kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - timer->irq->irq, - timer->irq->level); + ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + timer->irq->irq, + timer->irq->level); + WARN_ON(ret); } static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) @@ -307,12 +309,24 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) timer_disarm(timer); } -int kvm_timer_init(struct kvm *kvm) +void kvm_timer_enable(struct kvm *kvm) { - if (timecounter && wqueue) { - kvm->arch.timer.cntvoff = kvm_phys_timer_read(); + if (kvm->arch.timer.enabled) + return; + + /* + * There is a potential race here between VCPUs starting for the first + * time, which may be enabling the timer multiple times. That doesn't + * hurt though, because we're just setting a variable to the same + * variable that it already was. The important thing is that all + * VCPUs have the enabled variable set, before entering the guest, if + * the arch timers are enabled. + */ + if (timecounter && wqueue) kvm->arch.timer.enabled = 1; - } +} - return 0; +void kvm_timer_init(struct kvm *kvm) +{ + kvm->arch.timer.cntvoff = kvm_phys_timer_read(); } -- cgit v1.2.3