diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/assigned-dev.c | 36 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 11 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 48 | ||||
-rw-r--r-- | virt/kvm/ioapic.h | 4 | ||||
-rw-r--r-- | virt/kvm/iommu.c | 14 | ||||
-rw-r--r-- | virt/kvm/irq_comm.c | 118 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 246 |
7 files changed, 327 insertions, 150 deletions
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 23a41a9f8db9..3642239252b0 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -105,6 +105,15 @@ static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) } #ifdef __KVM_HAVE_MSI +static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id) +{ + struct kvm_assigned_dev_kernel *assigned_dev = dev_id; + int ret = kvm_set_irq_inatomic(assigned_dev->kvm, + assigned_dev->irq_source_id, + assigned_dev->guest_irq, 1); + return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; +} + static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) { struct kvm_assigned_dev_kernel *assigned_dev = dev_id; @@ -117,6 +126,23 @@ static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) #endif #ifdef __KVM_HAVE_MSIX +static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id) +{ + struct kvm_assigned_dev_kernel *assigned_dev = dev_id; + int index = find_index_from_host_irq(assigned_dev, irq); + u32 vector; + int ret = 0; + + if (index >= 0) { + vector = assigned_dev->guest_msix_entries[index].vector; + ret = kvm_set_irq_inatomic(assigned_dev->kvm, + assigned_dev->irq_source_id, + vector, 1); + } + + return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; +} + static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) { struct kvm_assigned_dev_kernel *assigned_dev = dev_id; @@ -334,11 +360,6 @@ static int assigned_device_enable_host_intx(struct kvm *kvm, } #ifdef __KVM_HAVE_MSI -static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id) -{ - return IRQ_WAKE_THREAD; -} - static int assigned_device_enable_host_msi(struct kvm *kvm, struct kvm_assigned_dev_kernel *dev) { @@ -363,11 +384,6 @@ static int assigned_device_enable_host_msi(struct kvm *kvm, #endif #ifdef __KVM_HAVE_MSIX -static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id) -{ - return IRQ_WAKE_THREAD; -} - static int assigned_device_enable_host_msix(struct kvm *kvm, struct kvm_assigned_dev_kernel *dev) { diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 9718e98d6d2a..adb17f266b28 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -35,6 +35,7 @@ #include "iodev.h" +#ifdef __KVM_HAVE_IOAPIC /* * -------------------------------------------------------------------- * irqfd: Allows an fd to be used to inject an interrupt to the guest @@ -267,14 +268,13 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, struct kvm_irq_routing_table *irq_rt) { struct kvm_kernel_irq_routing_entry *e; - struct hlist_node *n; if (irqfd->gsi >= irq_rt->nr_rt_entries) { rcu_assign_pointer(irqfd->irq_entry, NULL); return; } - hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) { + hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) { /* Only fast-path MSI. */ if (e->type == KVM_IRQ_ROUTING_MSI) rcu_assign_pointer(irqfd->irq_entry, e); @@ -332,7 +332,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) mutex_lock(&kvm->irqfds.resampler_lock); list_for_each_entry(resampler, - &kvm->irqfds.resampler_list, list) { + &kvm->irqfds.resampler_list, link) { if (resampler->notifier.gsi == irqfd->gsi) { irqfd->resampler = resampler; break; @@ -425,17 +425,21 @@ fail: kfree(irqfd); return ret; } +#endif void kvm_eventfd_init(struct kvm *kvm) { +#ifdef __KVM_HAVE_IOAPIC spin_lock_init(&kvm->irqfds.lock); INIT_LIST_HEAD(&kvm->irqfds.items); INIT_LIST_HEAD(&kvm->irqfds.resampler_list); mutex_init(&kvm->irqfds.resampler_lock); +#endif INIT_LIST_HEAD(&kvm->ioeventfds); } +#ifdef __KVM_HAVE_IOAPIC /* * shutdown any irqfd's that match fd+gsi */ @@ -555,6 +559,7 @@ static void __exit irqfd_module_exit(void) module_init(irqfd_module_init); module_exit(irqfd_module_exit); +#endif /* * -------------------------------------------------------------------- diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index cfb7e4d52dc2..ce82b9401958 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -35,6 +35,7 @@ #include <linux/hrtimer.h> #include <linux/io.h> #include <linux/slab.h> +#include <linux/export.h> #include <asm/processor.h> #include <asm/page.h> #include <asm/current.h> @@ -115,6 +116,42 @@ static void update_handled_vectors(struct kvm_ioapic *ioapic) smp_wmb(); } +void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, + u64 *eoi_exit_bitmap) +{ + struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + union kvm_ioapic_redirect_entry *e; + struct kvm_lapic_irq irqe; + int index; + + spin_lock(&ioapic->lock); + /* traverse ioapic entry to set eoi exit bitmap*/ + for (index = 0; index < IOAPIC_NUM_PINS; index++) { + e = &ioapic->redirtbl[index]; + if (!e->fields.mask && + (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || + kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, + index))) { + irqe.dest_id = e->fields.dest_id; + irqe.vector = e->fields.vector; + irqe.dest_mode = e->fields.dest_mode; + irqe.delivery_mode = e->fields.delivery_mode << 8; + kvm_calculate_eoi_exitmap(vcpu, &irqe, eoi_exit_bitmap); + } + } + spin_unlock(&ioapic->lock); +} +EXPORT_SYMBOL_GPL(kvm_ioapic_calculate_eoi_exitmap); + +void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + + if (!kvm_apic_vid_enabled(kvm) || !ioapic) + return; + kvm_make_update_eoibitmap_request(kvm); +} + static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { unsigned index; @@ -156,6 +193,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG && ioapic->irr & (1 << index)) ioapic_service(ioapic, index); + kvm_ioapic_make_eoibitmap_request(ioapic->kvm); break; } } @@ -179,15 +217,6 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) irqe.level = 1; irqe.shorthand = 0; -#ifdef CONFIG_X86 - /* Always delivery PIT interrupt to vcpu 0 */ - if (irq == 0) { - irqe.dest_mode = 0; /* Physical mode. */ - /* need to read apic_id from apic regiest since - * it can be rewritten */ - irqe.dest_id = ioapic->kvm->bsp_vcpu_id; - } -#endif return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); } @@ -464,6 +493,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) spin_lock(&ioapic->lock); memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); update_handled_vectors(ioapic); + kvm_ioapic_make_eoibitmap_request(kvm); spin_unlock(&ioapic->lock); return 0; } diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index a30abfe6ed16..0400a466c50c 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -82,5 +82,9 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq); int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); +void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm); +void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, + u64 *eoi_exit_bitmap); + #endif diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 037cb6730e68..72a130bc448a 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -52,7 +52,7 @@ static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, end_gfn = gfn + (size >> PAGE_SHIFT); gfn += 1; - if (is_error_pfn(pfn)) + if (is_error_noslot_pfn(pfn)) return pfn; while (gfn < end_gfn) @@ -76,7 +76,9 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) gfn = slot->base_gfn; end_gfn = gfn + slot->npages; - flags = IOMMU_READ | IOMMU_WRITE; + flags = IOMMU_READ; + if (!(slot->flags & KVM_MEM_READONLY)) + flags |= IOMMU_WRITE; if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) flags |= IOMMU_CACHE; @@ -106,7 +108,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) * important because we unmap and unpin in 4kb steps later. */ pfn = kvm_pin_pages(slot, gfn, page_size); - if (is_error_pfn(pfn)) { + if (is_error_noslot_pfn(pfn)) { gfn += 1; continue; } @@ -168,11 +170,7 @@ int kvm_assign_device(struct kvm *kvm, r = iommu_attach_device(domain, &pdev->dev); if (r) { - printk(KERN_ERR "assign device %x:%x:%x.%x failed", - pci_domain_nr(pdev->bus), - pdev->bus->number, - PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn)); + dev_err(&pdev->dev, "kvm assign device failed ret %d", r); return r; } diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 2eb58af7ee99..e9073cf4d040 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -22,6 +22,7 @@ #include <linux/kvm_host.h> #include <linux/slab.h> +#include <linux/export.h> #include <trace/events/kvm.h> #include <asm/msidef.h> @@ -102,6 +103,23 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, return r; } +static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm_lapic_irq *irq) +{ + trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); + + irq->dest_id = (e->msi.address_lo & + MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; + irq->vector = (e->msi.data & + MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; + irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo; + irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data; + irq->delivery_mode = e->msi.data & 0x700; + irq->level = 1; + irq->shorthand = 0; + /* TODO Deal with RH bit of MSI message address */ +} + int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level) { @@ -110,22 +128,26 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, if (!level) return -1; - trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); + kvm_set_msi_irq(e, &irq); - irq.dest_id = (e->msi.address_lo & - MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; - irq.vector = (e->msi.data & - MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; - irq.dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo; - irq.trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data; - irq.delivery_mode = e->msi.data & 0x700; - irq.level = 1; - irq.shorthand = 0; - - /* TODO Deal with RH bit of MSI message address */ return kvm_irq_delivery_to_apic(kvm, NULL, &irq); } + +static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm) +{ + struct kvm_lapic_irq irq; + int r; + + kvm_set_msi_irq(e, &irq); + + if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r)) + return r; + else + return -EWOULDBLOCK; +} + int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) { struct kvm_kernel_irq_routing_entry route; @@ -151,7 +173,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; int ret = -1, i = 0; struct kvm_irq_routing_table *irq_rt; - struct hlist_node *n; trace_kvm_set_irq(irq, level, irq_source_id); @@ -162,7 +183,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) rcu_read_lock(); irq_rt = rcu_dereference(kvm->irq_routing); if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, n, &irq_rt->map[irq], link) + hlist_for_each_entry(e, &irq_rt->map[irq], link) irq_set[i++] = *e; rcu_read_unlock(); @@ -178,10 +199,67 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) return ret; } +/* + * Deliver an IRQ in an atomic context if we can, or return a failure, + * user can retry in a process context. + * Return value: + * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context. + * Other values - No need to retry. + */ +int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) +{ + struct kvm_kernel_irq_routing_entry *e; + int ret = -EINVAL; + struct kvm_irq_routing_table *irq_rt; + + trace_kvm_set_irq(irq, level, irq_source_id); + + /* + * Injection into either PIC or IOAPIC might need to scan all CPUs, + * which would need to be retried from thread context; when same GSI + * is connected to both PIC and IOAPIC, we'd have to report a + * partial failure here. + * Since there's no easy way to do this, we only support injecting MSI + * which is limited to 1:1 GSI mapping. + */ + rcu_read_lock(); + irq_rt = rcu_dereference(kvm->irq_routing); + if (irq < irq_rt->nr_rt_entries) + hlist_for_each_entry(e, &irq_rt->map[irq], link) { + if (likely(e->type == KVM_IRQ_ROUTING_MSI)) + ret = kvm_set_msi_inatomic(e, kvm); + else + ret = -EWOULDBLOCK; + break; + } + rcu_read_unlock(); + return ret; +} + +bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi; + + rcu_read_lock(); + gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) { + rcu_read_unlock(); + return true; + } + + rcu_read_unlock(); + + return false; +} +EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); + void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { struct kvm_irq_ack_notifier *kian; - struct hlist_node *n; int gsi; trace_kvm_ack_irq(irqchip, pin); @@ -189,7 +267,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) rcu_read_lock(); gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; if (gsi != -1) - hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) if (kian->gsi == gsi) kian->irq_acked(kian); @@ -202,6 +280,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm, mutex_lock(&kvm->irq_lock); hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); mutex_unlock(&kvm->irq_lock); + kvm_ioapic_make_eoibitmap_request(kvm); } void kvm_unregister_irq_ack_notifier(struct kvm *kvm, @@ -211,6 +290,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, hlist_del_init_rcu(&kian->link); mutex_unlock(&kvm->irq_lock); synchronize_rcu(); + kvm_ioapic_make_eoibitmap_request(kvm); } int kvm_request_irq_source_id(struct kvm *kvm) @@ -285,13 +365,12 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask) { struct kvm_irq_mask_notifier *kimn; - struct hlist_node *n; int gsi; rcu_read_lock(); gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; if (gsi != -1) - hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link) + hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link) if (kimn->irq == gsi) kimn->func(kimn, mask); rcu_read_unlock(); @@ -312,13 +391,12 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, int delta; unsigned max_pin; struct kvm_kernel_irq_routing_entry *ei; - struct hlist_node *n; /* * Do not allow GSI to be mapped to the same irqchip more than once. * Allow only one to one mapping between GSI and MSI. */ - hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link) + hlist_for_each_entry(ei, &rt->map[ue->gsi], link) if (ei->type == KVM_IRQ_ROUTING_MSI || ue->type == KVM_IRQ_ROUTING_MSI || ue->u.irqchip.irqchip == ei->irqchip.irqchip) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index be70035fd42a..adc68feb5c5a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -212,6 +212,16 @@ void kvm_reload_remote_mmus(struct kvm *kvm) make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); } +void kvm_make_mclock_inprogress_request(struct kvm *kvm) +{ + make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); +} + +void kvm_make_update_eoibitmap_request(struct kvm *kvm) +{ + make_all_cpus_request(kvm, KVM_REQ_EOIBITMAP); +} + int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) { struct page *page; @@ -469,6 +479,8 @@ static struct kvm *kvm_create_vm(unsigned long type) INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); #endif + BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); + r = -ENOMEM; kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); if (!kvm->memslots) @@ -665,7 +677,8 @@ static void sort_memslots(struct kvm_memslots *slots) slots->id_to_index[slots->memslots[i].id] = i; } -void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new) +void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new, + u64 last_generation) { if (new) { int id = new->id; @@ -677,7 +690,7 @@ void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new) sort_memslots(slots); } - slots->generation++; + slots->generation = last_generation + 1; } static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) @@ -694,6 +707,35 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) return 0; } +static struct kvm_memslots *install_new_memslots(struct kvm *kvm, + struct kvm_memslots *slots, struct kvm_memory_slot *new) +{ + struct kvm_memslots *old_memslots = kvm->memslots; + + update_memslots(slots, new, kvm->memslots->generation); + rcu_assign_pointer(kvm->memslots, slots); + synchronize_srcu_expedited(&kvm->srcu); + return old_memslots; +} + +/* + * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: + * - create a new memory slot + * - delete an existing memory slot + * - modify an existing memory slot + * -- move it in the guest physical memory space + * -- just change its flags + * + * Since flags can be changed by some of these operations, the following + * differentiation is the best we can do for __kvm_set_memory_region(): + */ +enum kvm_mr_change { + KVM_MR_CREATE, + KVM_MR_DELETE, + KVM_MR_MOVE, + KVM_MR_FLAGS_ONLY, +}; + /* * Allocate some memory and give it an address in the guest physical address * space. @@ -704,15 +746,15 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) */ int __kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - int user_alloc) + bool user_alloc) { int r; gfn_t base_gfn; unsigned long npages; - unsigned long i; - struct kvm_memory_slot *memslot; + struct kvm_memory_slot *slot; struct kvm_memory_slot old, new; - struct kvm_memslots *slots, *old_memslots; + struct kvm_memslots *slots = NULL, *old_memslots; + enum kvm_mr_change change; r = check_memory_region_flags(mem); if (r) @@ -736,7 +778,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) goto out; - memslot = id_to_memslot(kvm->memslots, mem->slot); + slot = id_to_memslot(kvm->memslots, mem->slot); base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; npages = mem->memory_size >> PAGE_SHIFT; @@ -747,28 +789,48 @@ int __kvm_set_memory_region(struct kvm *kvm, if (!npages) mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; - new = old = *memslot; + new = old = *slot; new.id = mem->slot; new.base_gfn = base_gfn; new.npages = npages; new.flags = mem->flags; - /* Disallow changing a memory slot's size. */ r = -EINVAL; - if (npages && old.npages && npages != old.npages) - goto out_free; + if (npages) { + if (!old.npages) + change = KVM_MR_CREATE; + else { /* Modify an existing slot. */ + if ((mem->userspace_addr != old.userspace_addr) || + (npages != old.npages) || + ((new.flags ^ old.flags) & KVM_MEM_READONLY)) + goto out; - /* Check for overlaps */ - r = -EEXIST; - for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { - struct kvm_memory_slot *s = &kvm->memslots->memslots[i]; + if (base_gfn != old.base_gfn) + change = KVM_MR_MOVE; + else if (new.flags != old.flags) + change = KVM_MR_FLAGS_ONLY; + else { /* Nothing to change. */ + r = 0; + goto out; + } + } + } else if (old.npages) { + change = KVM_MR_DELETE; + } else /* Modify a non-existent slot: disallowed. */ + goto out; - if (s == memslot || !s->npages) - continue; - if (!((base_gfn + npages <= s->base_gfn) || - (base_gfn >= s->base_gfn + s->npages))) - goto out_free; + if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { + /* Check for overlaps */ + r = -EEXIST; + kvm_for_each_memslot(slot, kvm->memslots) { + if ((slot->id >= KVM_USER_MEM_SLOTS) || + (slot->id == mem->slot)) + continue; + if (!((base_gfn + npages <= slot->base_gfn) || + (base_gfn >= slot->base_gfn + slot->npages))) + goto out; + } } /* Free page dirty bitmap if unneeded */ @@ -776,10 +838,7 @@ int __kvm_set_memory_region(struct kvm *kvm, new.dirty_bitmap = NULL; r = -ENOMEM; - - /* Allocate if a slot is being created */ - if (npages && !old.npages) { - new.user_alloc = user_alloc; + if (change == KVM_MR_CREATE) { new.userspace_addr = mem->userspace_addr; if (kvm_arch_create_memslot(&new, npages)) @@ -790,12 +849,9 @@ int __kvm_set_memory_region(struct kvm *kvm, if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { if (kvm_create_dirty_bitmap(&new) < 0) goto out_free; - /* destroy any largepage mappings for dirty tracking */ } - if (!npages || base_gfn != old.base_gfn) { - struct kvm_memory_slot *slot; - + if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { r = -ENOMEM; slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), GFP_KERNEL); @@ -804,11 +860,10 @@ int __kvm_set_memory_region(struct kvm *kvm, slot = id_to_memslot(slots, mem->slot); slot->flags |= KVM_MEMSLOT_INVALID; - update_memslots(slots, NULL); + old_memslots = install_new_memslots(kvm, slots, NULL); - old_memslots = kvm->memslots; - rcu_assign_pointer(kvm->memslots, slots); - synchronize_srcu_expedited(&kvm->srcu); + /* slot was deleted or moved, clear iommu mapping */ + kvm_iommu_unmap_pages(kvm, &old); /* From this point no new shadow pages pointing to a deleted, * or moved, memslot will be created. * @@ -817,37 +872,48 @@ int __kvm_set_memory_region(struct kvm *kvm, * - kvm_is_visible_gfn (mmu_check_roots) */ kvm_arch_flush_shadow_memslot(kvm, slot); - kfree(old_memslots); + slots = old_memslots; } r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); if (r) - goto out_free; + goto out_slots; - /* map/unmap the pages in iommu page table */ - if (npages) { - r = kvm_iommu_map_pages(kvm, &new); - if (r) + r = -ENOMEM; + /* + * We can re-use the old_memslots from above, the only difference + * from the currently installed memslots is the invalid flag. This + * will get overwritten by update_memslots anyway. + */ + if (!slots) { + slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), + GFP_KERNEL); + if (!slots) goto out_free; - } else - kvm_iommu_unmap_pages(kvm, &old); + } - r = -ENOMEM; - slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), - GFP_KERNEL); - if (!slots) - goto out_free; + /* + * IOMMU mapping: New slots need to be mapped. Old slots need to be + * un-mapped and re-mapped if their base changes. Since base change + * unmapping is handled above with slot deletion, mapping alone is + * needed here. Anything else the iommu might care about for existing + * slots (size changes, userspace addr changes and read-only flag + * changes) is disallowed above, so any other attribute changes getting + * here can be skipped. + */ + if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { + r = kvm_iommu_map_pages(kvm, &new); + if (r) + goto out_slots; + } /* actual memory is freed via old in kvm_free_physmem_slot below */ - if (!npages) { + if (change == KVM_MR_DELETE) { new.dirty_bitmap = NULL; memset(&new.arch, 0, sizeof(new.arch)); } - update_memslots(slots, &new); - old_memslots = kvm->memslots; - rcu_assign_pointer(kvm->memslots, slots); - synchronize_srcu_expedited(&kvm->srcu); + old_memslots = install_new_memslots(kvm, slots, &new); kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); @@ -856,17 +922,18 @@ int __kvm_set_memory_region(struct kvm *kvm, return 0; +out_slots: + kfree(slots); out_free: kvm_free_physmem_slot(&new, &old); out: return r; - } EXPORT_SYMBOL_GPL(__kvm_set_memory_region); int kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - int user_alloc) + bool user_alloc) { int r; @@ -880,9 +947,9 @@ EXPORT_SYMBOL_GPL(kvm_set_memory_region); int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - int user_alloc) + bool user_alloc) { - if (mem->slot >= KVM_MEMORY_SLOTS) + if (mem->slot >= KVM_USER_MEM_SLOTS) return -EINVAL; return kvm_set_memory_region(kvm, mem, user_alloc); } @@ -896,7 +963,7 @@ int kvm_get_dirty_log(struct kvm *kvm, unsigned long any = 0; r = -EINVAL; - if (log->slot >= KVM_MEMORY_SLOTS) + if (log->slot >= KVM_USER_MEM_SLOTS) goto out; memslot = id_to_memslot(kvm->memslots, log->slot); @@ -942,7 +1009,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) { struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn); - if (!memslot || memslot->id >= KVM_MEMORY_SLOTS || + if (!memslot || memslot->id >= KVM_USER_MEM_SLOTS || memslot->flags & KVM_MEMSLOT_INVALID) return 0; @@ -1208,7 +1275,7 @@ __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, return KVM_PFN_ERR_RO_FAULT; if (kvm_is_error_hva(addr)) - return KVM_PFN_ERR_BAD; + return KVM_PFN_NOSLOT; /* Do not map writable pfn in the readonly memslot. */ if (writable && memslot_is_readonly(slot)) { @@ -1290,7 +1357,7 @@ EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic); static struct page *kvm_pfn_to_page(pfn_t pfn) { - if (is_error_pfn(pfn)) + if (is_error_noslot_pfn(pfn)) return KVM_ERR_PTR_BAD_PAGE; if (kvm_is_mmio_pfn(pfn)) { @@ -1322,7 +1389,7 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean); void kvm_release_pfn_clean(pfn_t pfn) { - if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn)) + if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn)) put_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); @@ -1639,6 +1706,7 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target) { struct pid *pid; struct task_struct *task = NULL; + bool ret = false; rcu_read_lock(); pid = rcu_dereference(target->pid); @@ -1646,17 +1714,15 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target) task = get_pid_task(target->pid, PIDTYPE_PID); rcu_read_unlock(); if (!task) - return false; + return ret; if (task->flags & PF_VCPU) { put_task_struct(task); - return false; - } - if (yield_to(task, 1)) { - put_task_struct(task); - return true; + return ret; } + ret = yield_to(task, 1); put_task_struct(task); - return false; + + return ret; } EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); @@ -1697,12 +1763,14 @@ bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) return eligible; } #endif + void kvm_vcpu_on_spin(struct kvm_vcpu *me) { struct kvm *kvm = me->kvm; struct kvm_vcpu *vcpu; int last_boosted_vcpu = me->kvm->last_boosted_vcpu; int yielded = 0; + int try = 3; int pass; int i; @@ -1714,7 +1782,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) * VCPU is holding the lock that we need and will release it. * We approximate round-robin by starting at the last boosted VCPU. */ - for (pass = 0; pass < 2 && !yielded; pass++) { + for (pass = 0; pass < 2 && !yielded && try; pass++) { kvm_for_each_vcpu(i, vcpu, kvm) { if (!pass && i <= last_boosted_vcpu) { i = last_boosted_vcpu; @@ -1727,10 +1795,15 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) continue; - if (kvm_vcpu_yield_to(vcpu)) { + + yielded = kvm_vcpu_yield_to(vcpu); + if (yielded > 0) { kvm->last_boosted_vcpu = i; - yielded = 1; break; + } else if (yielded < 0) { + try--; + if (!try) + break; } } } @@ -1848,6 +1921,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) atomic_inc(&kvm->online_vcpus); mutex_unlock(&kvm->lock); + kvm_arch_vcpu_postcreate(vcpu); return r; unlock_vcpu_destroy: @@ -1929,10 +2003,6 @@ out_free1: goto out; } r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); - if (r) - goto out_free2; - r = 0; -out_free2: kfree(kvm_regs); break; } @@ -1954,12 +2024,10 @@ out_free2: kvm_sregs = memdup_user(argp, sizeof(*kvm_sregs)); if (IS_ERR(kvm_sregs)) { r = PTR_ERR(kvm_sregs); + kvm_sregs = NULL; goto out; } r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); - if (r) - goto out; - r = 0; break; } case KVM_GET_MP_STATE: { @@ -1981,9 +2049,6 @@ out_free2: if (copy_from_user(&mp_state, argp, sizeof mp_state)) goto out; r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); - if (r) - goto out; - r = 0; break; } case KVM_TRANSLATE: { @@ -2008,9 +2073,6 @@ out_free2: if (copy_from_user(&dbg, argp, sizeof dbg)) goto out; r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); - if (r) - goto out; - r = 0; break; } case KVM_SET_SIGNAL_MASK: { @@ -2054,12 +2116,10 @@ out_free2: fpu = memdup_user(argp, sizeof(*fpu)); if (IS_ERR(fpu)) { r = PTR_ERR(fpu); + fpu = NULL; goto out; } r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); - if (r) - goto out; - r = 0; break; } default: @@ -2129,8 +2189,6 @@ static long kvm_vm_ioctl(struct file *filp, switch (ioctl) { case KVM_CREATE_VCPU: r = kvm_vm_ioctl_create_vcpu(kvm, arg); - if (r < 0) - goto out; break; case KVM_SET_USER_MEMORY_REGION: { struct kvm_userspace_memory_region kvm_userspace_mem; @@ -2140,9 +2198,7 @@ static long kvm_vm_ioctl(struct file *filp, sizeof kvm_userspace_mem)) goto out; - r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1); - if (r) - goto out; + r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, true); break; } case KVM_GET_DIRTY_LOG: { @@ -2152,8 +2208,6 @@ static long kvm_vm_ioctl(struct file *filp, if (copy_from_user(&log, argp, sizeof log)) goto out; r = kvm_vm_ioctl_get_dirty_log(kvm, &log); - if (r) - goto out; break; } #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET @@ -2163,9 +2217,6 @@ static long kvm_vm_ioctl(struct file *filp, if (copy_from_user(&zone, argp, sizeof zone)) goto out; r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); - if (r) - goto out; - r = 0; break; } case KVM_UNREGISTER_COALESCED_MMIO: { @@ -2174,9 +2225,6 @@ static long kvm_vm_ioctl(struct file *filp, if (copy_from_user(&zone, argp, sizeof zone)) goto out; r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); - if (r) - goto out; - r = 0; break; } #endif @@ -2285,8 +2333,6 @@ static long kvm_vm_compat_ioctl(struct file *filp, log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); r = kvm_vm_ioctl_get_dirty_log(kvm, &log); - if (r) - goto out; break; } default: |