diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-17 02:36:00 +0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-17 02:36:00 +0400 |
commit | 08d19f51f05a68ce89a289320ce4ed96e757df72 (patch) | |
tree | 31c5d718d0aeaff5083fe533cd6e1f9fbbe846bb /virt | |
parent | 1c95e1b69073cff5ff179e592fa1a1e182c78a17 (diff) | |
parent | 2381ad241d0bea1253a37f314b270848067640bb (diff) | |
download | linux-08d19f51f05a68ce89a289320ce4ed96e757df72.tar.xz |
Merge branch 'kvm-updates/2.6.28' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'kvm-updates/2.6.28' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (134 commits)
KVM: ia64: Add intel iommu support for guests.
KVM: ia64: add directed mmio range support for kvm guests
KVM: ia64: Make pmt table be able to hold physical mmio entries.
KVM: Move irqchip_in_kernel() from ioapic.h to irq.h
KVM: Separate irq ack notification out of arch/x86/kvm/irq.c
KVM: Change is_mmio_pfn to kvm_is_mmio_pfn, and make it common for all archs
KVM: Move device assignment logic to common code
KVM: Device Assignment: Move vtd.c from arch/x86/kvm/ to virt/kvm/
KVM: VMX: enable invlpg exiting if EPT is disabled
KVM: x86: Silence various LAPIC-related host kernel messages
KVM: Device Assignment: Map mmio pages into VT-d page table
KVM: PIC: enhance IPI avoidance
KVM: MMU: add "oos_shadow" parameter to disable oos
KVM: MMU: speed up mmu_unsync_walk
KVM: MMU: out of sync shadow core
KVM: MMU: mmu_convert_notrap helper
KVM: MMU: awareness of new kvm_mmu_zap_page behaviour
KVM: MMU: mmu_parent_walk
KVM: x86: trap invlpg
KVM: MMU: sync roots on mmu reload
...
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/ioapic.c | 22 | ||||
-rw-r--r-- | virt/kvm/ioapic.h | 10 | ||||
-rw-r--r-- | virt/kvm/irq_comm.c | 60 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 382 | ||||
-rw-r--r-- | virt/kvm/kvm_trace.c | 30 | ||||
-rw-r--r-- | virt/kvm/vtd.c | 191 |
6 files changed, 620 insertions, 75 deletions
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index c0d22870ee9c..53772bb46320 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -39,6 +39,7 @@ #include "ioapic.h" #include "lapic.h" +#include "irq.h" #if 0 #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) @@ -285,26 +286,31 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) } } -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi) +static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi, + int trigger_mode) { union ioapic_redir_entry *ent; ent = &ioapic->redirtbl[gsi]; - ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); - ent->fields.remote_irr = 0; - if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) - ioapic_service(ioapic, gsi); + kvm_notify_acked_irq(ioapic->kvm, gsi); + + if (trigger_mode == IOAPIC_LEVEL_TRIG) { + ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); + ent->fields.remote_irr = 0; + if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) + ioapic_service(ioapic, gsi); + } } -void kvm_ioapic_update_eoi(struct kvm *kvm, int vector) +void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; int i; for (i = 0; i < IOAPIC_NUM_PINS; i++) if (ioapic->redirtbl[i].fields.vector == vector) - __kvm_ioapic_update_eoi(ioapic, i); + __kvm_ioapic_update_eoi(ioapic, i, trigger_mode); } static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr, @@ -380,7 +386,7 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, break; #ifdef CONFIG_IA64 case IOAPIC_REG_EOI: - kvm_ioapic_update_eoi(ioapic->kvm, data); + kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG); break; #endif diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 7f16675fe783..cd7ae7691c9d 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -58,6 +58,7 @@ struct kvm_ioapic { } redirtbl[IOAPIC_NUM_PINS]; struct kvm_io_device dev; struct kvm *kvm; + void (*ack_notifier)(void *opaque, int irq); }; #ifdef DEBUG @@ -78,16 +79,9 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) return kvm->arch.vioapic; } -#ifdef CONFIG_IA64 -static inline int irqchip_in_kernel(struct kvm *kvm) -{ - return 1; -} -#endif - struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, unsigned long bitmap); -void kvm_ioapic_update_eoi(struct kvm *kvm, int vector); +void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); void kvm_ioapic_reset(struct kvm_ioapic *ioapic); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c new file mode 100644 index 000000000000..d0169f5e6047 --- /dev/null +++ b/virt/kvm/irq_comm.c @@ -0,0 +1,60 @@ +/* + * irq_comm.c: Common API for in kernel interrupt controller + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * Authors: + * Yaozu (Eddie) Dong <Eddie.dong@intel.com> + * + */ + +#include <linux/kvm_host.h> +#include "irq.h" + +#include "ioapic.h" + +/* This should be called with the kvm->lock mutex held */ +void kvm_set_irq(struct kvm *kvm, int irq, int level) +{ + /* Not possible to detect if the guest uses the PIC or the + * IOAPIC. So set the bit in both. The guest will ignore + * writes to the unused one. + */ + kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level); +#ifdef CONFIG_X86 + kvm_pic_set_irq(pic_irqchip(kvm), irq, level); +#endif +} + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi) +{ + struct kvm_irq_ack_notifier *kian; + struct hlist_node *n; + + hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link) + if (kian->gsi == gsi) + kian->irq_acked(kian); +} + +void kvm_register_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list); +} + +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + hlist_del(&kian->link); +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7dd9b0b85e4e..cf0ab8ed3845 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -51,6 +51,12 @@ #include "coalesced_mmio.h" #endif +#ifdef KVM_CAP_DEVICE_ASSIGNMENT +#include <linux/pci.h> +#include <linux/interrupt.h> +#include "irq.h" +#endif + MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -71,11 +77,253 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, bool kvm_rebooting; +#ifdef KVM_CAP_DEVICE_ASSIGNMENT +static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, + int assigned_dev_id) +{ + struct list_head *ptr; + struct kvm_assigned_dev_kernel *match; + + list_for_each(ptr, head) { + match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); + if (match->assigned_dev_id == assigned_dev_id) + return match; + } + return NULL; +} + +static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) +{ + struct kvm_assigned_dev_kernel *assigned_dev; + + assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, + interrupt_work); + + /* This is taken to safely inject irq inside the guest. When + * the interrupt injection (or the ioapic code) uses a + * finer-grained lock, update this + */ + mutex_lock(&assigned_dev->kvm->lock); + kvm_set_irq(assigned_dev->kvm, + assigned_dev->guest_irq, 1); + mutex_unlock(&assigned_dev->kvm->lock); + kvm_put_kvm(assigned_dev->kvm); +} + +/* FIXME: Implement the OR logic needed to make shared interrupts on + * this line behave properly + */ +static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) +{ + struct kvm_assigned_dev_kernel *assigned_dev = + (struct kvm_assigned_dev_kernel *) dev_id; + + kvm_get_kvm(assigned_dev->kvm); + schedule_work(&assigned_dev->interrupt_work); + disable_irq_nosync(irq); + return IRQ_HANDLED; +} + +/* Ack the irq line for an assigned device */ +static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) +{ + struct kvm_assigned_dev_kernel *dev; + + if (kian->gsi == -1) + return; + + dev = container_of(kian, struct kvm_assigned_dev_kernel, + ack_notifier); + kvm_set_irq(dev->kvm, dev->guest_irq, 0); + enable_irq(dev->host_irq); +} + +static void kvm_free_assigned_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel + *assigned_dev) +{ + if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested) + free_irq(assigned_dev->host_irq, (void *)assigned_dev); + + kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); + + if (cancel_work_sync(&assigned_dev->interrupt_work)) + /* We had pending work. That means we will have to take + * care of kvm_put_kvm. + */ + kvm_put_kvm(kvm); + + pci_release_regions(assigned_dev->dev); + pci_disable_device(assigned_dev->dev); + pci_dev_put(assigned_dev->dev); + + list_del(&assigned_dev->list); + kfree(assigned_dev); +} + +void kvm_free_all_assigned_devices(struct kvm *kvm) +{ + struct list_head *ptr, *ptr2; + struct kvm_assigned_dev_kernel *assigned_dev; + + list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { + assigned_dev = list_entry(ptr, + struct kvm_assigned_dev_kernel, + list); + + kvm_free_assigned_device(kvm, assigned_dev); + } +} + +static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, + struct kvm_assigned_irq + *assigned_irq) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_irq->assigned_dev_id); + if (!match) { + mutex_unlock(&kvm->lock); + return -EINVAL; + } + + if (match->irq_requested) { + match->guest_irq = assigned_irq->guest_irq; + match->ack_notifier.gsi = assigned_irq->guest_irq; + mutex_unlock(&kvm->lock); + return 0; + } + + INIT_WORK(&match->interrupt_work, + kvm_assigned_dev_interrupt_work_handler); + + if (irqchip_in_kernel(kvm)) { + if (!capable(CAP_SYS_RAWIO)) { + r = -EPERM; + goto out_release; + } + + if (assigned_irq->host_irq) + match->host_irq = assigned_irq->host_irq; + else + match->host_irq = match->dev->irq; + match->guest_irq = assigned_irq->guest_irq; + match->ack_notifier.gsi = assigned_irq->guest_irq; + match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; + kvm_register_irq_ack_notifier(kvm, &match->ack_notifier); + + /* Even though this is PCI, we don't want to use shared + * interrupts. Sharing host devices with guest-assigned devices + * on the same interrupt line is not a happy situation: there + * are going to be long delays in accepting, acking, etc. + */ + if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0, + "kvm_assigned_device", (void *)match)) { + r = -EIO; + goto out_release; + } + } + + match->irq_requested = true; + mutex_unlock(&kvm->lock); + return r; +out_release: + mutex_unlock(&kvm->lock); + kvm_free_assigned_device(kvm, match); + return r; +} + +static int kvm_vm_ioctl_assign_device(struct kvm *kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + struct pci_dev *dev; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_dev->assigned_dev_id); + if (match) { + /* device already assigned */ + r = -EINVAL; + goto out; + } + + match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); + if (match == NULL) { + printk(KERN_INFO "%s: Couldn't allocate memory\n", + __func__); + r = -ENOMEM; + goto out; + } + dev = pci_get_bus_and_slot(assigned_dev->busnr, + assigned_dev->devfn); + if (!dev) { + printk(KERN_INFO "%s: host device not found\n", __func__); + r = -EINVAL; + goto out_free; + } + if (pci_enable_device(dev)) { + printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); + r = -EBUSY; + goto out_put; + } + r = pci_request_regions(dev, "kvm_assigned_device"); + if (r) { + printk(KERN_INFO "%s: Could not get access to device regions\n", + __func__); + goto out_disable; + } + match->assigned_dev_id = assigned_dev->assigned_dev_id; + match->host_busnr = assigned_dev->busnr; + match->host_devfn = assigned_dev->devfn; + match->dev = dev; + + match->kvm = kvm; + + list_add(&match->list, &kvm->arch.assigned_dev_head); + + if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { + r = kvm_iommu_map_guest(kvm, match); + if (r) + goto out_list_del; + } + +out: + mutex_unlock(&kvm->lock); + return r; +out_list_del: + list_del(&match->list); + pci_release_regions(dev); +out_disable: + pci_disable_device(dev); +out_put: + pci_dev_put(dev); +out_free: + kfree(match); + mutex_unlock(&kvm->lock); + return r; +} +#endif + static inline int valid_vcpu(int n) { return likely(n >= 0 && n < KVM_MAX_VCPUS); } +inline int kvm_is_mmio_pfn(pfn_t pfn) +{ + if (pfn_valid(pfn)) + return PageReserved(pfn_to_page(pfn)); + + return true; +} + /* * Switches to specified vcpu, until a matching vcpu_put() */ @@ -570,6 +818,12 @@ int __kvm_set_memory_region(struct kvm *kvm, } kvm_free_physmem_slot(&old, &new); +#ifdef CONFIG_DMAR + /* map the pages in iommu page table */ + r = kvm_iommu_map_pages(kvm, base_gfn, npages); + if (r) + goto out; +#endif return 0; out_free: @@ -708,9 +962,6 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_hva); -/* - * Requires current->mm->mmap_sem to be held - */ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) { struct page *page[1]; @@ -726,21 +977,24 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) return page_to_pfn(bad_page); } - npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, - NULL); + npages = get_user_pages_fast(addr, 1, 1, page); if (unlikely(npages != 1)) { struct vm_area_struct *vma; + down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, addr); + if (vma == NULL || addr < vma->vm_start || !(vma->vm_flags & VM_PFNMAP)) { + up_read(¤t->mm->mmap_sem); get_page(bad_page); return page_to_pfn(bad_page); } pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - BUG_ON(pfn_valid(pfn)); + up_read(¤t->mm->mmap_sem); + BUG_ON(!kvm_is_mmio_pfn(pfn)); } else pfn = page_to_pfn(page[0]); @@ -754,10 +1008,10 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) pfn_t pfn; pfn = gfn_to_pfn(kvm, gfn); - if (pfn_valid(pfn)) + if (!kvm_is_mmio_pfn(pfn)) return pfn_to_page(pfn); - WARN_ON(!pfn_valid(pfn)); + WARN_ON(kvm_is_mmio_pfn(pfn)); get_page(bad_page); return bad_page; @@ -773,7 +1027,7 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean); void kvm_release_pfn_clean(pfn_t pfn) { - if (pfn_valid(pfn)) + if (!kvm_is_mmio_pfn(pfn)) put_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); @@ -799,7 +1053,7 @@ EXPORT_SYMBOL_GPL(kvm_set_page_dirty); void kvm_set_pfn_dirty(pfn_t pfn) { - if (pfn_valid(pfn)) { + if (!kvm_is_mmio_pfn(pfn)) { struct page *page = pfn_to_page(pfn); if (!PageReserved(page)) SetPageDirty(page); @@ -809,14 +1063,14 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); void kvm_set_pfn_accessed(pfn_t pfn) { - if (pfn_valid(pfn)) + if (!kvm_is_mmio_pfn(pfn)) mark_page_accessed(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); void kvm_get_pfn(pfn_t pfn) { - if (pfn_valid(pfn)) + if (!kvm_is_mmio_pfn(pfn)) get_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_get_pfn); @@ -972,12 +1226,12 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) for (;;) { prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); - if (kvm_cpu_has_interrupt(vcpu)) - break; - if (kvm_cpu_has_pending_timer(vcpu)) - break; - if (kvm_arch_vcpu_runnable(vcpu)) + if (kvm_cpu_has_interrupt(vcpu) || + kvm_cpu_has_pending_timer(vcpu) || + kvm_arch_vcpu_runnable(vcpu)) { + set_bit(KVM_REQ_UNHALT, &vcpu->requests); break; + } if (signal_pending(current)) break; @@ -1074,12 +1328,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) r = kvm_arch_vcpu_setup(vcpu); if (r) - goto vcpu_destroy; + return r; mutex_lock(&kvm->lock); if (kvm->vcpus[n]) { r = -EEXIST; - mutex_unlock(&kvm->lock); goto vcpu_destroy; } kvm->vcpus[n] = vcpu; @@ -1095,8 +1348,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) unlink: mutex_lock(&kvm->lock); kvm->vcpus[n] = NULL; - mutex_unlock(&kvm->lock); vcpu_destroy: + mutex_unlock(&kvm->lock); kvm_arch_vcpu_destroy(vcpu); return r; } @@ -1118,6 +1371,8 @@ static long kvm_vcpu_ioctl(struct file *filp, struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; int r; + struct kvm_fpu *fpu = NULL; + struct kvm_sregs *kvm_sregs = NULL; if (vcpu->kvm->mm != current->mm) return -EIO; @@ -1165,25 +1420,28 @@ out_free2: break; } case KVM_GET_SREGS: { - struct kvm_sregs kvm_sregs; - - memset(&kvm_sregs, 0, sizeof kvm_sregs); - r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, &kvm_sregs); + kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); + r = -ENOMEM; + if (!kvm_sregs) + goto out; + r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs); if (r) goto out; r = -EFAULT; - if (copy_to_user(argp, &kvm_sregs, sizeof kvm_sregs)) + if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs))) goto out; r = 0; break; } case KVM_SET_SREGS: { - struct kvm_sregs kvm_sregs; - + kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL); + r = -ENOMEM; + if (!kvm_sregs) + goto out; r = -EFAULT; - if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs)) + if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs))) goto out; - r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, &kvm_sregs); + r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); if (r) goto out; r = 0; @@ -1264,25 +1522,28 @@ out_free2: break; } case KVM_GET_FPU: { - struct kvm_fpu fpu; - - memset(&fpu, 0, sizeof fpu); - r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, &fpu); + fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); + r = -ENOMEM; + if (!fpu) + goto out; + r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu); if (r) goto out; r = -EFAULT; - if (copy_to_user(argp, &fpu, sizeof fpu)) + if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu))) goto out; r = 0; break; } case KVM_SET_FPU: { - struct kvm_fpu fpu; - + fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL); + r = -ENOMEM; + if (!fpu) + goto out; r = -EFAULT; - if (copy_from_user(&fpu, argp, sizeof fpu)) + if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu))) goto out; - r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, &fpu); + r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); if (r) goto out; r = 0; @@ -1292,6 +1553,8 @@ out_free2: r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); } out: + kfree(fpu); + kfree(kvm_sregs); return r; } @@ -1360,6 +1623,30 @@ static long kvm_vm_ioctl(struct file *filp, break; } #endif +#ifdef KVM_CAP_DEVICE_ASSIGNMENT + case KVM_ASSIGN_PCI_DEVICE: { + struct kvm_assigned_pci_dev assigned_dev; + + r = -EFAULT; + if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) + goto out; + r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); + if (r) + goto out; + break; + } + case KVM_ASSIGN_IRQ: { + struct kvm_assigned_irq assigned_irq; + + r = -EFAULT; + if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) + goto out; + r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); + if (r) + goto out; + break; + } +#endif default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); } @@ -1369,17 +1656,22 @@ out: static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { + struct page *page[1]; + unsigned long addr; + int npages; + gfn_t gfn = vmf->pgoff; struct kvm *kvm = vma->vm_file->private_data; - struct page *page; - if (!kvm_is_visible_gfn(kvm, vmf->pgoff)) + addr = gfn_to_hva(kvm, gfn); + if (kvm_is_error_hva(addr)) return VM_FAULT_SIGBUS; - page = gfn_to_page(kvm, vmf->pgoff); - if (is_error_page(page)) { - kvm_release_page_clean(page); + + npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page, + NULL); + if (unlikely(npages != 1)) return VM_FAULT_SIGBUS; - } - vmf->page = page; + + vmf->page = page[0]; return 0; } diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c index 58141f31ea8f..41dcc845f78c 100644 --- a/virt/kvm/kvm_trace.c +++ b/virt/kvm/kvm_trace.c @@ -17,6 +17,7 @@ #include <linux/module.h> #include <linux/relay.h> #include <linux/debugfs.h> +#include <linux/ktime.h> #include <linux/kvm_host.h> @@ -35,16 +36,16 @@ static struct kvm_trace *kvm_trace; struct kvm_trace_probe { const char *name; const char *format; - u32 cycle_in; + u32 timestamp_in; marker_probe_func *probe_func; }; -static inline int calc_rec_size(int cycle, int extra) +static inline int calc_rec_size(int timestamp, int extra) { int rec_size = KVM_TRC_HEAD_SIZE; rec_size += extra; - return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; + return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; } static void kvm_add_trace(void *probe_private, void *call_data, @@ -54,12 +55,13 @@ static void kvm_add_trace(void *probe_private, void *call_data, struct kvm_trace *kt = kvm_trace; struct kvm_trace_rec rec; struct kvm_vcpu *vcpu; - int i, extra, size; + int i, size; + u32 extra; if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING)) return; - rec.event = va_arg(*args, u32); + rec.rec_val = TRACE_REC_EVENT_ID(va_arg(*args, u32)); vcpu = va_arg(*args, struct kvm_vcpu *); rec.pid = current->tgid; rec.vcpu_id = vcpu->vcpu_id; @@ -67,21 +69,21 @@ static void kvm_add_trace(void *probe_private, void *call_data, extra = va_arg(*args, u32); WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); - rec.extra_u32 = extra; - rec.cycle_in = p->cycle_in; + rec.rec_val |= TRACE_REC_TCS(p->timestamp_in) + | TRACE_REC_NUM_DATA_ARGS(extra); - if (rec.cycle_in) { - rec.u.cycle.cycle_u64 = get_cycles(); + if (p->timestamp_in) { + rec.u.timestamp.timestamp = ktime_to_ns(ktime_get()); - for (i = 0; i < rec.extra_u32; i++) - rec.u.cycle.extra_u32[i] = va_arg(*args, u32); + for (i = 0; i < extra; i++) + rec.u.timestamp.extra_u32[i] = va_arg(*args, u32); } else { - for (i = 0; i < rec.extra_u32; i++) - rec.u.nocycle.extra_u32[i] = va_arg(*args, u32); + for (i = 0; i < extra; i++) + rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32); } - size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32)); + size = calc_rec_size(p->timestamp_in, extra * sizeof(u32)); relay_write(kt->rchan, &rec, size); } diff --git a/virt/kvm/vtd.c b/virt/kvm/vtd.c new file mode 100644 index 000000000000..a770874f3a3a --- /dev/null +++ b/virt/kvm/vtd.c @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) 2006-2008 Intel Corporation + * Copyright IBM Corporation, 2008 + * Author: Allen M. Kay <allen.m.kay@intel.com> + * Author: Weidong Han <weidong.han@intel.com> + * Author: Ben-Ami Yassour <benami@il.ibm.com> + */ + +#include <linux/list.h> +#include <linux/kvm_host.h> +#include <linux/pci.h> +#include <linux/dmar.h> +#include <linux/intel-iommu.h> + +static int kvm_iommu_unmap_memslots(struct kvm *kvm); +static void kvm_iommu_put_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages); + +int kvm_iommu_map_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages) +{ + gfn_t gfn = base_gfn; + pfn_t pfn; + int i, r = 0; + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + + /* check if iommu exists and in use */ + if (!domain) + return 0; + + for (i = 0; i < npages; i++) { + /* check if already mapped */ + pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, + gfn_to_gpa(gfn)); + if (pfn) + continue; + + pfn = gfn_to_pfn(kvm, gfn); + r = intel_iommu_page_mapping(domain, + gfn_to_gpa(gfn), + pfn_to_hpa(pfn), + PAGE_SIZE, + DMA_PTE_READ | + DMA_PTE_WRITE); + if (r) { + printk(KERN_ERR "kvm_iommu_map_pages:" + "iommu failed to map pfn=%lx\n", pfn); + goto unmap_pages; + } + gfn++; + } + return 0; + +unmap_pages: + kvm_iommu_put_pages(kvm, base_gfn, i); + return r; +} + +static int kvm_iommu_map_memslots(struct kvm *kvm) +{ + int i, r; + + down_read(&kvm->slots_lock); + for (i = 0; i < kvm->nmemslots; i++) { + r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn, + kvm->memslots[i].npages); + if (r) + break; + } + up_read(&kvm->slots_lock); + return r; +} + +int kvm_iommu_map_guest(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + struct pci_dev *pdev = NULL; + int r; + + if (!intel_iommu_found()) { + printk(KERN_ERR "%s: intel iommu not found\n", __func__); + return -ENODEV; + } + + printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n", + assigned_dev->host_busnr, + PCI_SLOT(assigned_dev->host_devfn), + PCI_FUNC(assigned_dev->host_devfn)); + + pdev = assigned_dev->dev; + + if (pdev == NULL) { + if (kvm->arch.intel_iommu_domain) { + intel_iommu_domain_exit(kvm->arch.intel_iommu_domain); + kvm->arch.intel_iommu_domain = NULL; + } + return -ENODEV; + } + + kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev); + if (!kvm->arch.intel_iommu_domain) + return -ENODEV; + + r = kvm_iommu_map_memslots(kvm); + if (r) + goto out_unmap; + + intel_iommu_detach_dev(kvm->arch.intel_iommu_domain, + pdev->bus->number, pdev->devfn); + + r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain, + pdev); + if (r) { + printk(KERN_ERR "Domain context map for %s failed", + pci_name(pdev)); + goto out_unmap; + } + return 0; + +out_unmap: + kvm_iommu_unmap_memslots(kvm); + return r; +} + +static void kvm_iommu_put_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages) +{ + gfn_t gfn = base_gfn; + pfn_t pfn; + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + int i; + + for (i = 0; i < npages; i++) { + pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, + gfn_to_gpa(gfn)); + kvm_release_pfn_clean(pfn); + gfn++; + } +} + +static int kvm_iommu_unmap_memslots(struct kvm *kvm) +{ + int i; + down_read(&kvm->slots_lock); + for (i = 0; i < kvm->nmemslots; i++) { + kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn, + kvm->memslots[i].npages); + } + up_read(&kvm->slots_lock); + + return 0; +} + +int kvm_iommu_unmap_guest(struct kvm *kvm) +{ + struct kvm_assigned_dev_kernel *entry; + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + + /* check if iommu exists and in use */ + if (!domain) + return 0; + + list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) { + printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n", + entry->host_busnr, + PCI_SLOT(entry->host_devfn), + PCI_FUNC(entry->host_devfn)); + + /* detach kvm dmar domain */ + intel_iommu_detach_dev(domain, entry->host_busnr, + entry->host_devfn); + } + kvm_iommu_unmap_memslots(kvm); + intel_iommu_domain_exit(domain); + return 0; +} |