diff options
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r-- | virt/kvm/kvm_main.c | 140 |
1 files changed, 89 insertions, 51 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index de2c11dae231..ba0327e2d0d3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -594,6 +594,11 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm, */ gfn_range.arg = range->arg; gfn_range.may_block = range->may_block; + /* + * HVA-based notifications aren't relevant to private + * mappings as they don't have a userspace mapping. + */ + gfn_range.attr_filter = KVM_FILTER_SHARED; /* * {gfn(page) | page intersects with [hva_start, hva_end)} = @@ -1066,15 +1071,6 @@ out_err: } /* - * Called after the VM is otherwise initialized, but just before adding it to - * the vm_list. - */ -int __weak kvm_arch_post_init_vm(struct kvm *kvm) -{ - return 0; -} - -/* * Called just after removing the VM from the vm_list, but before doing any * other destruction. */ @@ -1194,10 +1190,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) if (r) goto out_err_no_debugfs; - r = kvm_arch_post_init_vm(kvm); - if (r) - goto out_err; - mutex_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); mutex_unlock(&kvm_lock); @@ -1207,8 +1199,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) return kvm; -out_err: - kvm_destroy_vm_debugfs(kvm); out_err_no_debugfs: kvm_coalesced_mmio_free(kvm); out_no_coalesced_mmio: @@ -1926,16 +1916,8 @@ static bool kvm_check_memslot_overlap(struct kvm_memslots *slots, int id, return false; } -/* - * Allocate some memory and give it an address in the guest physical address - * space. - * - * Discontiguous memory is allowed, mostly for framebuffers. - * - * Must be called holding kvm->slots_lock for write. - */ -int __kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region2 *mem) +static int kvm_set_memory_region(struct kvm *kvm, + const struct kvm_userspace_memory_region2 *mem) { struct kvm_memory_slot *old, *new; struct kvm_memslots *slots; @@ -1945,6 +1927,8 @@ int __kvm_set_memory_region(struct kvm *kvm, int as_id, id; int r; + lockdep_assert_held(&kvm->slots_lock); + r = check_memory_region_flags(kvm, mem); if (r) return r; @@ -1972,7 +1956,15 @@ int __kvm_set_memory_region(struct kvm *kvm, return -EINVAL; if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) return -EINVAL; - if ((mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES) + + /* + * The size of userspace-defined memory regions is restricted in order + * to play nice with dirty bitmap operations, which are indexed with an + * "unsigned int". KVM's internal memory regions don't support dirty + * logging, and so are exempt. + */ + if (id < KVM_USER_MEM_SLOTS && + (mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES) return -EINVAL; slots = __kvm_memslots(kvm, as_id); @@ -2056,19 +2048,19 @@ out: kfree(new); return r; } -EXPORT_SYMBOL_GPL(__kvm_set_memory_region); -int kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region2 *mem) +int kvm_set_internal_memslot(struct kvm *kvm, + const struct kvm_userspace_memory_region2 *mem) { - int r; + if (WARN_ON_ONCE(mem->slot < KVM_USER_MEM_SLOTS)) + return -EINVAL; - mutex_lock(&kvm->slots_lock); - r = __kvm_set_memory_region(kvm, mem); - mutex_unlock(&kvm->slots_lock); - return r; + if (WARN_ON_ONCE(mem->flags)) + return -EINVAL; + + return kvm_set_memory_region(kvm, mem); } -EXPORT_SYMBOL_GPL(kvm_set_memory_region); +EXPORT_SYMBOL_GPL(kvm_set_internal_memslot); static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region2 *mem) @@ -2076,6 +2068,7 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, if ((u16)mem->slot >= KVM_USER_MEM_SLOTS) return -EINVAL; + guard(mutex)(&kvm->slots_lock); return kvm_set_memory_region(kvm, mem); } @@ -2408,6 +2401,14 @@ static __always_inline void kvm_handle_gfn_range(struct kvm *kvm, gfn_range.arg = range->arg; gfn_range.may_block = range->may_block; + /* + * If/when KVM supports more attributes beyond private .vs shared, this + * _could_ set KVM_FILTER_{SHARED,PRIVATE} appropriately if the entire target + * range already has the desired private vs. shared state (it's unclear + * if that is a net win). For now, KVM reaches this point if and only + * if the private flag is being toggled, i.e. all mappings are in play. + */ + for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) { slots = __kvm_memslots(kvm, i); @@ -2464,6 +2465,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end, struct kvm_mmu_notifier_range pre_set_range = { .start = start, .end = end, + .arg.attributes = attributes, .handler = kvm_pre_set_memory_attributes, .on_lock = kvm_mmu_invalidate_begin, .flush_on_ret = true, @@ -4116,32 +4118,30 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) mutex_lock(&kvm->lock); -#ifdef CONFIG_LOCKDEP - /* Ensure that lockdep knows vcpu->mutex is taken *inside* kvm->lock */ - mutex_lock(&vcpu->mutex); - mutex_unlock(&vcpu->mutex); -#endif - if (kvm_get_vcpu_by_id(kvm, id)) { r = -EEXIST; goto unlock_vcpu_destroy; } vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus); - r = xa_reserve(&kvm->vcpu_array, vcpu->vcpu_idx, GFP_KERNEL_ACCOUNT); + r = xa_insert(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT); + WARN_ON_ONCE(r == -EBUSY); if (r) goto unlock_vcpu_destroy; - /* Now it's all set up, let userspace reach it */ + /* + * Now it's all set up, let userspace reach it. Grab the vCPU's mutex + * so that userspace can't invoke vCPU ioctl()s until the vCPU is fully + * visible (per online_vcpus), e.g. so that KVM doesn't get tricked + * into a NULL-pointer dereference because KVM thinks the _current_ + * vCPU doesn't exist. As a bonus, taking vcpu->mutex ensures lockdep + * knows it's taken *inside* kvm->lock. + */ + mutex_lock(&vcpu->mutex); kvm_get_kvm(kvm); r = create_vcpu_fd(vcpu); if (r < 0) - goto kvm_put_xa_release; - - if (KVM_BUG_ON(xa_store(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, 0), kvm)) { - r = -EINVAL; - goto kvm_put_xa_release; - } + goto kvm_put_xa_erase; /* * Pairs with smp_rmb() in kvm_get_vcpu. Store the vcpu @@ -4149,15 +4149,17 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) */ smp_wmb(); atomic_inc(&kvm->online_vcpus); + mutex_unlock(&vcpu->mutex); mutex_unlock(&kvm->lock); kvm_arch_vcpu_postcreate(vcpu); kvm_create_vcpu_debugfs(vcpu); return r; -kvm_put_xa_release: +kvm_put_xa_erase: + mutex_unlock(&vcpu->mutex); kvm_put_kvm_no_destroy(kvm); - xa_release(&kvm->vcpu_array, vcpu->vcpu_idx); + xa_erase(&kvm->vcpu_array, vcpu->vcpu_idx); unlock_vcpu_destroy: mutex_unlock(&kvm->lock); kvm_dirty_ring_free(&vcpu->dirty_ring); @@ -4282,6 +4284,33 @@ static int kvm_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, } #endif +static int kvm_wait_for_vcpu_online(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + /* + * In practice, this happy path will always be taken, as a well-behaved + * VMM will never invoke a vCPU ioctl() before KVM_CREATE_VCPU returns. + */ + if (likely(vcpu->vcpu_idx < atomic_read(&kvm->online_vcpus))) + return 0; + + /* + * Acquire and release the vCPU's mutex to wait for vCPU creation to + * complete (kvm_vm_ioctl_create_vcpu() holds the mutex until the vCPU + * is fully online). + */ + if (mutex_lock_killable(&vcpu->mutex)) + return -EINTR; + + mutex_unlock(&vcpu->mutex); + + if (WARN_ON_ONCE(!kvm_get_vcpu(kvm, vcpu->vcpu_idx))) + return -EIO; + + return 0; +} + static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -4298,6 +4327,15 @@ static long kvm_vcpu_ioctl(struct file *filp, return -EINVAL; /* + * Wait for the vCPU to be online before handling the ioctl(), as KVM + * assumes the vCPU is reachable via vcpu_array, i.e. may dereference + * a NULL pointer if userspace invokes an ioctl() before KVM is ready. + */ + r = kvm_wait_for_vcpu_online(vcpu); + if (r) + return r; + + /* * Some architectures have vcpu ioctls that are asynchronous to vcpu * execution; mutex_lock() would break them. */ |