diff options
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r-- | virt/kvm/kvm_main.c | 184 |
1 files changed, 101 insertions, 83 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3a5a08298aab..605697e9c4dd 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -47,10 +47,6 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> -#ifdef CONFIG_X86 -#include <asm/msidef.h> -#endif - #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET #include "coalesced_mmio.h" #endif @@ -85,57 +81,6 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, static bool kvm_rebooting; #ifdef KVM_CAP_DEVICE_ASSIGNMENT - -#ifdef CONFIG_X86 -static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) -{ - int vcpu_id; - struct kvm_vcpu *vcpu; - struct kvm_ioapic *ioapic = ioapic_irqchip(dev->kvm); - int dest_id = (dev->guest_msi.address_lo & MSI_ADDR_DEST_ID_MASK) - >> MSI_ADDR_DEST_ID_SHIFT; - int vector = (dev->guest_msi.data & MSI_DATA_VECTOR_MASK) - >> MSI_DATA_VECTOR_SHIFT; - int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT, - (unsigned long *)&dev->guest_msi.address_lo); - int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT, - (unsigned long *)&dev->guest_msi.data); - int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT, - (unsigned long *)&dev->guest_msi.data); - u32 deliver_bitmask; - - BUG_ON(!ioapic); - - deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, - dest_id, dest_mode); - /* IOAPIC delivery mode value is the same as MSI here */ - switch (delivery_mode) { - case IOAPIC_LOWEST_PRIORITY: - vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector, - deliver_bitmask); - if (vcpu != NULL) - kvm_apic_set_irq(vcpu, vector, trig_mode); - else - printk(KERN_INFO "kvm: null lowest priority vcpu!\n"); - break; - case IOAPIC_FIXED: - for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { - if (!(deliver_bitmask & (1 << vcpu_id))) - continue; - deliver_bitmask &= ~(1 << vcpu_id); - vcpu = ioapic->kvm->vcpus[vcpu_id]; - if (vcpu) - kvm_apic_set_irq(vcpu, vector, trig_mode); - } - break; - default: - printk(KERN_INFO "kvm: unsupported MSI delivery mode\n"); - } -} -#else -static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) {} -#endif - static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, int assigned_dev_id) { @@ -162,18 +107,14 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) * finer-grained lock, update this */ mutex_lock(&assigned_dev->kvm->lock); - if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX) - kvm_set_irq(assigned_dev->kvm, - assigned_dev->irq_source_id, - assigned_dev->guest_irq, 1); - else if (assigned_dev->irq_requested_type & - KVM_ASSIGNED_DEV_GUEST_MSI) { - assigned_device_msi_dispatch(assigned_dev); + kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, + assigned_dev->guest_irq, 1); + + if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) { enable_irq(assigned_dev->host_irq); assigned_dev->host_irq_disabled = false; } mutex_unlock(&assigned_dev->kvm->lock); - kvm_put_kvm(assigned_dev->kvm); } static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) @@ -181,8 +122,6 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) struct kvm_assigned_dev_kernel *assigned_dev = (struct kvm_assigned_dev_kernel *) dev_id; - kvm_get_kvm(assigned_dev->kvm); - schedule_work(&assigned_dev->interrupt_work); disable_irq_nosync(irq); @@ -213,6 +152,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) } } +/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ static void kvm_free_assigned_irq(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev) { @@ -228,11 +168,24 @@ static void kvm_free_assigned_irq(struct kvm *kvm, if (!assigned_dev->irq_requested_type) return; - if (cancel_work_sync(&assigned_dev->interrupt_work)) - /* We had pending work. That means we will have to take - * care of kvm_put_kvm. - */ - kvm_put_kvm(kvm); + /* + * In kvm_free_device_irq, cancel_work_sync return true if: + * 1. work is scheduled, and then cancelled. + * 2. work callback is executed. + * + * The first one ensured that the irq is disabled and no more events + * would happen. But for the second one, the irq may be enabled (e.g. + * for MSI). So we disable irq here to prevent further events. + * + * Notice this maybe result in nested disable if the interrupt type is + * INTx, but it's OK for we are going to free it. + * + * If this function is a part of VM destroy, please ensure that till + * now, the kvm state is still legal for probably we also have to wait + * interrupt_work done. + */ + disable_irq_nosync(assigned_dev->host_irq); + cancel_work_sync(&assigned_dev->interrupt_work); free_irq(assigned_dev->host_irq, (void *)assigned_dev); @@ -285,8 +238,8 @@ static int assigned_device_update_intx(struct kvm *kvm, if (irqchip_in_kernel(kvm)) { if (!msi2intx && - adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) { - free_irq(adev->host_irq, (void *)kvm); + (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) { + free_irq(adev->host_irq, (void *)adev); pci_disable_msi(adev->dev); } @@ -320,18 +273,24 @@ static int assigned_device_update_msi(struct kvm *kvm, { int r; + adev->guest_irq = airq->guest_irq; if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) { /* x86 don't care upper address of guest msi message addr */ adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI; adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX; - adev->guest_msi.address_lo = airq->guest_msi.addr_lo; - adev->guest_msi.data = airq->guest_msi.data; adev->ack_notifier.gsi = -1; } else if (msi2intx) { adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX; adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI; - adev->guest_irq = airq->guest_irq; adev->ack_notifier.gsi = airq->guest_irq; + } else { + /* + * Guest require to disable device MSI, we disable MSI and + * re-enable INTx by default again. Notice it's only for + * non-msi2intx. + */ + assigned_device_update_intx(kvm, adev, airq); + return 0; } if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) @@ -368,6 +327,7 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, { int r = 0; struct kvm_assigned_dev_kernel *match; + u32 current_flags = 0, changed_flags; mutex_lock(&kvm->lock); @@ -405,8 +365,13 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, } } - if ((!msi2intx && - (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI)) || + if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) && + (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI)) + current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI; + + changed_flags = assigned_irq->flags ^ current_flags; + + if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) || (msi2intx && match->dev->msi_enabled)) { #ifdef CONFIG_X86 r = assigned_device_update_msi(kvm, match, assigned_irq); @@ -455,6 +420,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *match; struct pci_dev *dev; + down_read(&kvm->slots_lock); mutex_lock(&kvm->lock); match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, @@ -516,6 +482,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, out: mutex_unlock(&kvm->lock); + up_read(&kvm->slots_lock); return r; out_list_del: list_del(&match->list); @@ -527,6 +494,7 @@ out_put: out_free: kfree(match); mutex_unlock(&kvm->lock); + up_read(&kvm->slots_lock); return r; } #endif @@ -549,7 +517,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, goto out; } - if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) + if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) kvm_deassign_device(kvm, match); kvm_free_assigned_device(kvm, match); @@ -567,8 +535,10 @@ static inline int valid_vcpu(int n) inline int kvm_is_mmio_pfn(pfn_t pfn) { - if (pfn_valid(pfn)) - return PageReserved(pfn_to_page(pfn)); + if (pfn_valid(pfn)) { + struct page *page = compound_head(pfn_to_page(pfn)); + return PageReserved(page); + } return true; } @@ -789,11 +759,19 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, return young; } +static void kvm_mmu_notifier_release(struct mmu_notifier *mn, + struct mm_struct *mm) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + kvm_arch_flush_shadow(kvm); +} + static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { .invalidate_page = kvm_mmu_notifier_invalidate_page, .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, .clear_flush_young = kvm_mmu_notifier_clear_flush_young, + .release = kvm_mmu_notifier_release, }; #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ @@ -806,6 +784,10 @@ static struct kvm *kvm_create_vm(void) if (IS_ERR(kvm)) goto out; +#ifdef CONFIG_HAVE_KVM_IRQCHIP + INIT_LIST_HEAD(&kvm->irq_routing); + INIT_HLIST_HEAD(&kvm->mask_notifier_list); +#endif #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET page = alloc_page(GFP_KERNEL | __GFP_ZERO); @@ -883,9 +865,11 @@ static void kvm_destroy_vm(struct kvm *kvm) { struct mm_struct *mm = kvm->mm; + kvm_arch_sync_events(kvm); spin_lock(&kvm_lock); list_del(&kvm->vm_list); spin_unlock(&kvm_lock); + kvm_free_irq_routing(kvm); kvm_io_bus_destroy(&kvm->pio_bus); kvm_io_bus_destroy(&kvm->mmio_bus); #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET @@ -1732,13 +1716,13 @@ out_free2: r = 0; break; } - case KVM_DEBUG_GUEST: { - struct kvm_debug_guest dbg; + case KVM_SET_GUEST_DEBUG: { + struct kvm_guest_debug dbg; r = -EFAULT; if (copy_from_user(&dbg, argp, sizeof dbg)) goto out; - r = kvm_arch_vcpu_ioctl_debug_guest(vcpu, &dbg); + r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); if (r) goto out; r = 0; @@ -1906,6 +1890,36 @@ static long kvm_vm_ioctl(struct file *filp, break; } #endif +#ifdef KVM_CAP_IRQ_ROUTING + case KVM_SET_GSI_ROUTING: { + struct kvm_irq_routing routing; + struct kvm_irq_routing __user *urouting; + struct kvm_irq_routing_entry *entries; + + r = -EFAULT; + if (copy_from_user(&routing, argp, sizeof(routing))) + goto out; + r = -EINVAL; + if (routing.nr >= KVM_MAX_IRQ_ROUTES) + goto out; + if (routing.flags) + goto out; + r = -ENOMEM; + entries = vmalloc(routing.nr * sizeof(*entries)); + if (!entries) + goto out; + r = -EFAULT; + urouting = argp; + if (copy_from_user(entries, urouting->entries, + routing.nr * sizeof(*entries))) + goto out_free_irq_routing; + r = kvm_set_irq_routing(kvm, entries, routing.nr, + routing.flags); + out_free_irq_routing: + vfree(entries); + break; + } +#endif default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); } @@ -1972,6 +1986,10 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) case KVM_CAP_USER_MEMORY: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: return 1; +#ifdef CONFIG_HAVE_KVM_IRQCHIP + case KVM_CAP_IRQ_ROUTING: + return KVM_MAX_IRQ_ROUTES; +#endif default: break; } |