diff options
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r-- | virt/kvm/kvm_main.c | 107 |
1 files changed, 63 insertions, 44 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9102ae172d2a..4fd482fb9260 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -72,11 +72,11 @@ module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); /* Default doubles per-vcpu halt_poll_ns. */ static unsigned int halt_poll_ns_grow = 2; -module_param(halt_poll_ns_grow, int, S_IRUGO); +module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR); /* Default resets per-vcpu halt_poll_ns . */ static unsigned int halt_poll_ns_shrink; -module_param(halt_poll_ns_shrink, int, S_IRUGO); +module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR); /* * Ordering of locks: @@ -170,8 +170,8 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) kvm_make_request(req, vcpu); cpu = vcpu->cpu; - /* Set ->requests bit before we read ->mode */ - smp_mb(); + /* Set ->requests bit before we read ->mode. */ + smp_mb__after_atomic(); if (cpus != NULL && cpu != -1 && cpu != me && kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE) @@ -191,9 +191,23 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) #ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL void kvm_flush_remote_tlbs(struct kvm *kvm) { - long dirty_count = kvm->tlbs_dirty; + /* + * Read tlbs_dirty before setting KVM_REQ_TLB_FLUSH in + * kvm_make_all_cpus_request. + */ + long dirty_count = smp_load_acquire(&kvm->tlbs_dirty); - smp_mb(); + /* + * We want to publish modifications to the page tables before reading + * mode. Pairs with a memory barrier in arch-specific code. + * - x86: smp_mb__after_srcu_read_unlock in vcpu_enter_guest + * and smp_mb in walk_shadow_page_lockless_begin/end. + * - powerpc: smp_mb in kvmppc_prepare_to_enter. + * + * There is already an smp_mb__after_atomic() before + * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that + * barrier here. + */ if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) ++kvm->stat.remote_tlb_flush; cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); @@ -216,8 +230,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) vcpu->kvm = kvm; vcpu->vcpu_id = id; vcpu->pid = NULL; - vcpu->halt_poll_ns = 0; - init_waitqueue_head(&vcpu->wq); + init_swait_queue_head(&vcpu->wq); kvm_async_pf_vcpu_init(vcpu); vcpu->pre_pcpu = -1; @@ -537,6 +550,16 @@ static struct kvm *kvm_create_vm(unsigned long type) if (!kvm) return ERR_PTR(-ENOMEM); + spin_lock_init(&kvm->mmu_lock); + atomic_inc(¤t->mm->mm_count); + kvm->mm = current->mm; + kvm_eventfd_init(kvm); + mutex_init(&kvm->lock); + mutex_init(&kvm->irq_lock); + mutex_init(&kvm->slots_lock); + atomic_set(&kvm->users_count, 1); + INIT_LIST_HEAD(&kvm->devices); + r = kvm_arch_init_vm(kvm, type); if (r) goto out_err_no_disable; @@ -569,16 +592,6 @@ static struct kvm *kvm_create_vm(unsigned long type) goto out_err; } - spin_lock_init(&kvm->mmu_lock); - kvm->mm = current->mm; - atomic_inc(&kvm->mm->mm_count); - kvm_eventfd_init(kvm); - mutex_init(&kvm->lock); - mutex_init(&kvm->irq_lock); - mutex_init(&kvm->slots_lock); - atomic_set(&kvm->users_count, 1); - INIT_LIST_HEAD(&kvm->devices); - r = kvm_init_mmu_notifier(kvm); if (r) goto out_err; @@ -603,6 +616,7 @@ out_err_no_disable: for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) kvm_free_memslots(kvm, kvm->memslots[i]); kvm_arch_free_vm(kvm); + mmdrop(current->mm); return ERR_PTR(r); } @@ -620,13 +634,10 @@ void *kvm_kvzalloc(unsigned long size) static void kvm_destroy_devices(struct kvm *kvm) { - struct list_head *node, *tmp; - - list_for_each_safe(node, tmp, &kvm->devices) { - struct kvm_device *dev = - list_entry(node, struct kvm_device, vm_node); + struct kvm_device *dev, *tmp; - list_del(node); + list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) { + list_del(&dev->vm_node); dev->ops->destroy(dev); } } @@ -1264,15 +1275,16 @@ unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *w return gfn_to_hva_memslot_prot(slot, gfn, writable); } -static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, int write, struct page **page) +static int get_user_page_nowait(unsigned long start, int write, + struct page **page) { int flags = FOLL_TOUCH | FOLL_NOWAIT | FOLL_HWPOISON | FOLL_GET; if (write) flags |= FOLL_WRITE; - return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL); + return __get_user_pages(current, current->mm, start, 1, flags, page, + NULL, NULL); } static inline int check_user_page_hwpoison(unsigned long addr) @@ -1334,8 +1346,7 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, if (async) { down_read(¤t->mm->mmap_sem); - npages = get_user_page_nowait(current, current->mm, - addr, write_fault, page); + npages = get_user_page_nowait(addr, write_fault, page); up_read(¤t->mm->mmap_sem); } else npages = __get_user_pages_unlocked(current, current->mm, addr, 1, @@ -1437,11 +1448,17 @@ kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, { unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault); - if (addr == KVM_HVA_ERR_RO_BAD) + if (addr == KVM_HVA_ERR_RO_BAD) { + if (writable) + *writable = false; return KVM_PFN_ERR_RO_FAULT; + } - if (kvm_is_error_hva(addr)) + if (kvm_is_error_hva(addr)) { + if (writable) + *writable = false; return KVM_PFN_NOSLOT; + } /* Do not map writable pfn in the readonly memslot. */ if (writable && memslot_is_readonly(slot)) { @@ -1943,14 +1960,15 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) { - int old, val; + unsigned int old, val, grow; old = val = vcpu->halt_poll_ns; + grow = READ_ONCE(halt_poll_ns_grow); /* 10us base */ - if (val == 0 && halt_poll_ns_grow) + if (val == 0 && grow) val = 10000; else - val *= halt_poll_ns_grow; + val *= grow; if (val > halt_poll_ns) val = halt_poll_ns; @@ -1961,13 +1979,14 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu) { - int old, val; + unsigned int old, val, shrink; old = val = vcpu->halt_poll_ns; - if (halt_poll_ns_shrink == 0) + shrink = READ_ONCE(halt_poll_ns_shrink); + if (shrink == 0) val = 0; else - val /= halt_poll_ns_shrink; + val /= shrink; vcpu->halt_poll_ns = val; trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old); @@ -1993,7 +2012,7 @@ static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu) void kvm_vcpu_block(struct kvm_vcpu *vcpu) { ktime_t start, cur; - DEFINE_WAIT(wait); + DECLARE_SWAITQUEUE(wait); bool waited = false; u64 block_ns; @@ -2018,7 +2037,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) kvm_arch_vcpu_blocking(vcpu); for (;;) { - prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); + prepare_to_swait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); if (kvm_vcpu_check_block(vcpu) < 0) break; @@ -2027,7 +2046,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) schedule(); } - finish_wait(&vcpu->wq, &wait); + finish_swait(&vcpu->wq, &wait); cur = ktime_get(); kvm_arch_vcpu_unblocking(vcpu); @@ -2059,11 +2078,11 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) { int me; int cpu = vcpu->cpu; - wait_queue_head_t *wqp; + struct swait_queue_head *wqp; wqp = kvm_arch_vcpu_wq(vcpu); - if (waitqueue_active(wqp)) { - wake_up_interruptible(wqp); + if (swait_active(wqp)) { + swake_up(wqp); ++vcpu->stat.halt_wakeup; } @@ -2164,7 +2183,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; if (vcpu == me) continue; - if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) + if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) continue; if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) continue; |