summaryrefslogtreecommitdiff
path: root/virt/kvm/kvm_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r--virt/kvm/kvm_main.c140
1 files changed, 89 insertions, 51 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de2c11dae231..ba0327e2d0d3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -594,6 +594,11 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm,
*/
gfn_range.arg = range->arg;
gfn_range.may_block = range->may_block;
+ /*
+ * HVA-based notifications aren't relevant to private
+ * mappings as they don't have a userspace mapping.
+ */
+ gfn_range.attr_filter = KVM_FILTER_SHARED;
/*
* {gfn(page) | page intersects with [hva_start, hva_end)} =
@@ -1066,15 +1071,6 @@ out_err:
}
/*
- * Called after the VM is otherwise initialized, but just before adding it to
- * the vm_list.
- */
-int __weak kvm_arch_post_init_vm(struct kvm *kvm)
-{
- return 0;
-}
-
-/*
* Called just after removing the VM from the vm_list, but before doing any
* other destruction.
*/
@@ -1194,10 +1190,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
if (r)
goto out_err_no_debugfs;
- r = kvm_arch_post_init_vm(kvm);
- if (r)
- goto out_err;
-
mutex_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
mutex_unlock(&kvm_lock);
@@ -1207,8 +1199,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
return kvm;
-out_err:
- kvm_destroy_vm_debugfs(kvm);
out_err_no_debugfs:
kvm_coalesced_mmio_free(kvm);
out_no_coalesced_mmio:
@@ -1926,16 +1916,8 @@ static bool kvm_check_memslot_overlap(struct kvm_memslots *slots, int id,
return false;
}
-/*
- * Allocate some memory and give it an address in the guest physical address
- * space.
- *
- * Discontiguous memory is allowed, mostly for framebuffers.
- *
- * Must be called holding kvm->slots_lock for write.
- */
-int __kvm_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region2 *mem)
+static int kvm_set_memory_region(struct kvm *kvm,
+ const struct kvm_userspace_memory_region2 *mem)
{
struct kvm_memory_slot *old, *new;
struct kvm_memslots *slots;
@@ -1945,6 +1927,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
int as_id, id;
int r;
+ lockdep_assert_held(&kvm->slots_lock);
+
r = check_memory_region_flags(kvm, mem);
if (r)
return r;
@@ -1972,7 +1956,15 @@ int __kvm_set_memory_region(struct kvm *kvm,
return -EINVAL;
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
return -EINVAL;
- if ((mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES)
+
+ /*
+ * The size of userspace-defined memory regions is restricted in order
+ * to play nice with dirty bitmap operations, which are indexed with an
+ * "unsigned int". KVM's internal memory regions don't support dirty
+ * logging, and so are exempt.
+ */
+ if (id < KVM_USER_MEM_SLOTS &&
+ (mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES)
return -EINVAL;
slots = __kvm_memslots(kvm, as_id);
@@ -2056,19 +2048,19 @@ out:
kfree(new);
return r;
}
-EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
-int kvm_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region2 *mem)
+int kvm_set_internal_memslot(struct kvm *kvm,
+ const struct kvm_userspace_memory_region2 *mem)
{
- int r;
+ if (WARN_ON_ONCE(mem->slot < KVM_USER_MEM_SLOTS))
+ return -EINVAL;
- mutex_lock(&kvm->slots_lock);
- r = __kvm_set_memory_region(kvm, mem);
- mutex_unlock(&kvm->slots_lock);
- return r;
+ if (WARN_ON_ONCE(mem->flags))
+ return -EINVAL;
+
+ return kvm_set_memory_region(kvm, mem);
}
-EXPORT_SYMBOL_GPL(kvm_set_memory_region);
+EXPORT_SYMBOL_GPL(kvm_set_internal_memslot);
static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region2 *mem)
@@ -2076,6 +2068,7 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
if ((u16)mem->slot >= KVM_USER_MEM_SLOTS)
return -EINVAL;
+ guard(mutex)(&kvm->slots_lock);
return kvm_set_memory_region(kvm, mem);
}
@@ -2408,6 +2401,14 @@ static __always_inline void kvm_handle_gfn_range(struct kvm *kvm,
gfn_range.arg = range->arg;
gfn_range.may_block = range->may_block;
+ /*
+ * If/when KVM supports more attributes beyond private .vs shared, this
+ * _could_ set KVM_FILTER_{SHARED,PRIVATE} appropriately if the entire target
+ * range already has the desired private vs. shared state (it's unclear
+ * if that is a net win). For now, KVM reaches this point if and only
+ * if the private flag is being toggled, i.e. all mappings are in play.
+ */
+
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
slots = __kvm_memslots(kvm, i);
@@ -2464,6 +2465,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
struct kvm_mmu_notifier_range pre_set_range = {
.start = start,
.end = end,
+ .arg.attributes = attributes,
.handler = kvm_pre_set_memory_attributes,
.on_lock = kvm_mmu_invalidate_begin,
.flush_on_ret = true,
@@ -4116,32 +4118,30 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
mutex_lock(&kvm->lock);
-#ifdef CONFIG_LOCKDEP
- /* Ensure that lockdep knows vcpu->mutex is taken *inside* kvm->lock */
- mutex_lock(&vcpu->mutex);
- mutex_unlock(&vcpu->mutex);
-#endif
-
if (kvm_get_vcpu_by_id(kvm, id)) {
r = -EEXIST;
goto unlock_vcpu_destroy;
}
vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus);
- r = xa_reserve(&kvm->vcpu_array, vcpu->vcpu_idx, GFP_KERNEL_ACCOUNT);
+ r = xa_insert(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT);
+ WARN_ON_ONCE(r == -EBUSY);
if (r)
goto unlock_vcpu_destroy;
- /* Now it's all set up, let userspace reach it */
+ /*
+ * Now it's all set up, let userspace reach it. Grab the vCPU's mutex
+ * so that userspace can't invoke vCPU ioctl()s until the vCPU is fully
+ * visible (per online_vcpus), e.g. so that KVM doesn't get tricked
+ * into a NULL-pointer dereference because KVM thinks the _current_
+ * vCPU doesn't exist. As a bonus, taking vcpu->mutex ensures lockdep
+ * knows it's taken *inside* kvm->lock.
+ */
+ mutex_lock(&vcpu->mutex);
kvm_get_kvm(kvm);
r = create_vcpu_fd(vcpu);
if (r < 0)
- goto kvm_put_xa_release;
-
- if (KVM_BUG_ON(xa_store(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, 0), kvm)) {
- r = -EINVAL;
- goto kvm_put_xa_release;
- }
+ goto kvm_put_xa_erase;
/*
* Pairs with smp_rmb() in kvm_get_vcpu. Store the vcpu
@@ -4149,15 +4149,17 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
*/
smp_wmb();
atomic_inc(&kvm->online_vcpus);
+ mutex_unlock(&vcpu->mutex);
mutex_unlock(&kvm->lock);
kvm_arch_vcpu_postcreate(vcpu);
kvm_create_vcpu_debugfs(vcpu);
return r;
-kvm_put_xa_release:
+kvm_put_xa_erase:
+ mutex_unlock(&vcpu->mutex);
kvm_put_kvm_no_destroy(kvm);
- xa_release(&kvm->vcpu_array, vcpu->vcpu_idx);
+ xa_erase(&kvm->vcpu_array, vcpu->vcpu_idx);
unlock_vcpu_destroy:
mutex_unlock(&kvm->lock);
kvm_dirty_ring_free(&vcpu->dirty_ring);
@@ -4282,6 +4284,33 @@ static int kvm_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
}
#endif
+static int kvm_wait_for_vcpu_online(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ /*
+ * In practice, this happy path will always be taken, as a well-behaved
+ * VMM will never invoke a vCPU ioctl() before KVM_CREATE_VCPU returns.
+ */
+ if (likely(vcpu->vcpu_idx < atomic_read(&kvm->online_vcpus)))
+ return 0;
+
+ /*
+ * Acquire and release the vCPU's mutex to wait for vCPU creation to
+ * complete (kvm_vm_ioctl_create_vcpu() holds the mutex until the vCPU
+ * is fully online).
+ */
+ if (mutex_lock_killable(&vcpu->mutex))
+ return -EINTR;
+
+ mutex_unlock(&vcpu->mutex);
+
+ if (WARN_ON_ONCE(!kvm_get_vcpu(kvm, vcpu->vcpu_idx)))
+ return -EIO;
+
+ return 0;
+}
+
static long kvm_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -4298,6 +4327,15 @@ static long kvm_vcpu_ioctl(struct file *filp,
return -EINVAL;
/*
+ * Wait for the vCPU to be online before handling the ioctl(), as KVM
+ * assumes the vCPU is reachable via vcpu_array, i.e. may dereference
+ * a NULL pointer if userspace invokes an ioctl() before KVM is ready.
+ */
+ r = kvm_wait_for_vcpu_online(vcpu);
+ if (r)
+ return r;
+
+ /*
* Some architectures have vcpu ioctls that are asynchronous to vcpu
* execution; mutex_lock() would break them.
*/