From edde99ce05290e50ce0b3495d209e54e6349ab47 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 25 Oct 2010 03:21:24 +0200 Subject: KVM: Write protect memory after slot swap I have observed the following bug trigger: 1. userspace calls GET_DIRTY_LOG 2. kvm_mmu_slot_remove_write_access is called and makes a page ro 3. page fault happens and makes the page writeable fault is logged in the bitmap appropriately 4. kvm_vm_ioctl_get_dirty_log swaps slot pointers a lot of time passes 5. guest writes into the page 6. userspace calls GET_DIRTY_LOG At point (5), bitmap is clean and page is writeable, thus, guest modification of memory is not logged and GET_DIRTY_LOG returns an empty bitmap. The rule is that all pages are either dirty in the current bitmap, or write-protected, which is violated here. It seems that just moving kvm_mmu_slot_remove_write_access down to after the slot pointer swap should fix this bug. KVM-Stable-Tag. Signed-off-by: Michael S. Tsirkin Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2288ad829b32..b0818f672064 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3169,10 +3169,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_memslots *slots, *old_slots; unsigned long *dirty_bitmap; - spin_lock(&kvm->mmu_lock); - kvm_mmu_slot_remove_write_access(kvm, log->slot); - spin_unlock(&kvm->mmu_lock); - r = -ENOMEM; dirty_bitmap = vmalloc(n); if (!dirty_bitmap) @@ -3194,6 +3190,10 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; kfree(old_slots); + spin_lock(&kvm->mmu_lock); + kvm_mmu_slot_remove_write_access(kvm, log->slot); + spin_unlock(&kvm->mmu_lock); + r = -EFAULT; if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) { vfree(dirty_bitmap); -- cgit v1.2.3 From eb45fda45f915c7ca3e81e005e853cb770da2642 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 25 Oct 2010 11:58:22 -0200 Subject: KVM: MMU: fix rmap_remove on non present sptes drop_spte should not attempt to rmap_remove a non present shadow pte. This fixes a BUG_ON seen on kvm-autotest. Signed-off-by: Marcelo Tosatti Reported-by: Lucas Meneghel Rodrigues Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 908ea5464a51..fb8b376bf28c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -720,7 +720,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) } } -static void set_spte_track_bits(u64 *sptep, u64 new_spte) +static int set_spte_track_bits(u64 *sptep, u64 new_spte) { pfn_t pfn; u64 old_spte = *sptep; @@ -731,19 +731,20 @@ static void set_spte_track_bits(u64 *sptep, u64 new_spte) old_spte = __xchg_spte(sptep, new_spte); if (!is_rmap_spte(old_spte)) - return; + return 0; pfn = spte_to_pfn(old_spte); if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) kvm_set_pfn_accessed(pfn); if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask)) kvm_set_pfn_dirty(pfn); + return 1; } static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte) { - set_spte_track_bits(sptep, new_spte); - rmap_remove(kvm, sptep); + if (set_spte_track_bits(sptep, new_spte)) + rmap_remove(kvm, sptep); } static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) -- cgit v1.2.3 From d8cdddcd645766cd4d80fa222226ae6ebfb706af Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Sat, 30 Oct 2010 13:04:24 +0400 Subject: KVM: PPC: fix information leak to userland Structure kvm_ppc_pvinfo is copied to userland with flags and pad fields unitialized. It leads to leaking of contents of kernel stack memory. Signed-off-by: Vasiliy Kulikov Signed-off-by: Marcelo Tosatti --- arch/powerpc/kvm/powerpc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2f87a1627f6c..38f756f25053 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -617,6 +617,7 @@ long kvm_arch_vm_ioctl(struct file *filp, switch (ioctl) { case KVM_PPC_GET_PVINFO: { struct kvm_ppc_pvinfo pvinfo; + memset(&pvinfo, 0, sizeof(pvinfo)); r = kvm_vm_ioctl_get_pvinfo(&pvinfo); if (copy_to_user(argp, &pvinfo, sizeof(pvinfo))) { r = -EFAULT; -- cgit v1.2.3 From 97e69aa62f8b5d338d6cff49be09e37cc1262838 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Sat, 30 Oct 2010 22:54:47 +0400 Subject: KVM: x86: fix information leak to userland Structures kvm_vcpu_events, kvm_debugregs, kvm_pit_state2 and kvm_clock_data are copied to userland with some padding and reserved fields unitialized. It leads to leaking of contents of kernel stack memory. We have to initialize them to zero. In patch v1 Jan Kiszka suggested to fill reserved fields with zeros instead of memset'ting the whole struct. It makes sense as these fields are explicitly marked as padding. No more fields need zeroing. KVM-Stable-Tag. Signed-off-by: Vasiliy Kulikov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b0818f672064..463c65b8f93f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2560,6 +2560,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, !kvm_exception_is_soft(vcpu->arch.exception.nr); events->exception.nr = vcpu->arch.exception.nr; events->exception.has_error_code = vcpu->arch.exception.has_error_code; + events->exception.pad = 0; events->exception.error_code = vcpu->arch.exception.error_code; events->interrupt.injected = @@ -2573,12 +2574,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->nmi.injected = vcpu->arch.nmi_injected; events->nmi.pending = vcpu->arch.nmi_pending; events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); + events->nmi.pad = 0; events->sipi_vector = vcpu->arch.sipi_vector; events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR | KVM_VCPUEVENT_VALID_SHADOW); + memset(&events->reserved, 0, sizeof(events->reserved)); } static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, @@ -2623,6 +2626,7 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, dbgregs->dr6 = vcpu->arch.dr6; dbgregs->dr7 = vcpu->arch.dr7; dbgregs->flags = 0; + memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); } static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, @@ -3106,6 +3110,7 @@ static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) sizeof(ps->channels)); ps->flags = kvm->arch.vpit->pit_state.flags; mutex_unlock(&kvm->arch.vpit->pit_state.lock); + memset(&ps->reserved, 0, sizeof(ps->reserved)); return r; } @@ -3486,6 +3491,7 @@ long kvm_arch_vm_ioctl(struct file *filp, user_ns.clock = kvm->arch.kvmclock_offset + now_ns; local_irq_enable(); user_ns.flags = 0; + memset(&user_ns.pad, 0, sizeof(user_ns.pad)); r = -EFAULT; if (copy_to_user(argp, &user_ns, sizeof(user_ns))) -- cgit v1.2.3 From 453d9c57e27b4401bc3e98906bcac31ae8be0165 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 1 Nov 2010 14:01:13 +0100 Subject: KVM: x86: Issue smp_call_function_many with preemption disabled smp_call_function_many is specified to be called only with preemption disabled. Fulfill this requirement. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 463c65b8f93f..cdac9e592aa5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3978,8 +3978,10 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) return X86EMUL_CONTINUE; if (kvm_x86_ops->has_wbinvd_exit()) { + preempt_disable(); smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, wbinvd_ipi, NULL, 1); + preempt_enable(); cpumask_clear(vcpu->arch.wbinvd_dirty_mask); } wbinvd(); -- cgit v1.2.3 From a36be1003a80197714fc2b6e198df2f31f9eb270 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Mon, 18 Oct 2010 17:35:48 -0500 Subject: PPC: KVM: Book E doesn't have __end_interrupts. Fix an unresolved symbol with CONFIG_KVM_GUEST plus CONFIG_RELOCATABLE on Book E. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kernel/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 428d0e538aec..b06bdae04064 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -127,7 +127,7 @@ static void kvm_patch_ins_nop(u32 *inst) static void kvm_patch_ins_b(u32 *inst, int addr) { -#ifdef CONFIG_RELOCATABLE +#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_PPC_BOOK3S) /* On relocatable kernels interrupts handlers and our code can be in different regions, so we don't patch them */ -- cgit v1.2.3 From f22e2f049d4643ed3c2d498ca50f894ace87962b Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 5 Oct 2010 14:22:41 -0500 Subject: KVM: PPC: e500: Call kvm_vcpu_uninit() before kvmppc_e500_tlb_uninit(). The VCPU uninit calls some TLB functions, and the TLB uninit function frees the memory used by them. Signed-off-by: Scott Wood Acked-by: Liu Yu Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 71750f2dd5d3..e3768ee9b595 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -138,8 +138,8 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); free_page((unsigned long)vcpu->arch.shared); - kvmppc_e500_tlb_uninit(vcpu_e500); kvm_vcpu_uninit(vcpu); + kvmppc_e500_tlb_uninit(vcpu_e500); kmem_cache_free(kvm_vcpu_cache, vcpu_e500); } -- cgit v1.2.3 From bb59e9748f9bc95212c7fe21468ba184938c48cb Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 30 Sep 2010 14:28:50 -0500 Subject: KVM: PPC: BookE: fix sleep with interrupts disabled It is not legal to call mutex_lock() with interrupts disabled. This will assert with debug checks enabled. If there's a real need to disable interrupts here, it could be done after the mutex is acquired -- but I don't see why it's needed at all. Signed-off-by: Scott Wood Reviewed-by: Christian Ehrhardt Signed-off-by: Alexander Graf --- arch/powerpc/kvm/timing.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index 46fa04f12a9b..a021f5827a33 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -35,7 +35,6 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) int i; /* pause guest execution to avoid concurrent updates */ - local_irq_disable(); mutex_lock(&vcpu->mutex); vcpu->arch.last_exit_type = 0xDEAD; @@ -51,7 +50,6 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) vcpu->arch.timing_last_enter.tv64 = 0; mutex_unlock(&vcpu->mutex); - local_irq_enable(); } static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) -- cgit v1.2.3 From df8940eadf011db2d4bedecf6eb659d44494edb3 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 30 Sep 2010 14:31:27 -0500 Subject: KVM: PPC: BookE: Load the lower half of MSR This was preventing the guest from setting any bits in the hardware MSR which aren't forced on, such as MSR[SPE]. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke_interrupts.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 049846911ce4..1cc471faac2d 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -416,7 +416,7 @@ lightweight_exit: lwz r3, VCPU_PC(r4) mtsrr0 r3 lwz r3, VCPU_SHARED(r4) - lwz r3, VCPU_SHARED_MSR(r3) + lwz r3, (VCPU_SHARED_MSR + 4)(r3) oris r3, r3, KVMPPC_MSR_MASK@h ori r3, r3, KVMPPC_MSR_MASK@l mtsrr1 r3 -- cgit v1.2.3