summaryrefslogtreecommitdiff
path: root/virt
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-10-17 02:36:00 +0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-17 02:36:00 +0400
commit08d19f51f05a68ce89a289320ce4ed96e757df72 (patch)
tree31c5d718d0aeaff5083fe533cd6e1f9fbbe846bb /virt
parent1c95e1b69073cff5ff179e592fa1a1e182c78a17 (diff)
parent2381ad241d0bea1253a37f314b270848067640bb (diff)
downloadlinux-08d19f51f05a68ce89a289320ce4ed96e757df72.tar.xz
Merge branch 'kvm-updates/2.6.28' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'kvm-updates/2.6.28' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (134 commits) KVM: ia64: Add intel iommu support for guests. KVM: ia64: add directed mmio range support for kvm guests KVM: ia64: Make pmt table be able to hold physical mmio entries. KVM: Move irqchip_in_kernel() from ioapic.h to irq.h KVM: Separate irq ack notification out of arch/x86/kvm/irq.c KVM: Change is_mmio_pfn to kvm_is_mmio_pfn, and make it common for all archs KVM: Move device assignment logic to common code KVM: Device Assignment: Move vtd.c from arch/x86/kvm/ to virt/kvm/ KVM: VMX: enable invlpg exiting if EPT is disabled KVM: x86: Silence various LAPIC-related host kernel messages KVM: Device Assignment: Map mmio pages into VT-d page table KVM: PIC: enhance IPI avoidance KVM: MMU: add "oos_shadow" parameter to disable oos KVM: MMU: speed up mmu_unsync_walk KVM: MMU: out of sync shadow core KVM: MMU: mmu_convert_notrap helper KVM: MMU: awareness of new kvm_mmu_zap_page behaviour KVM: MMU: mmu_parent_walk KVM: x86: trap invlpg KVM: MMU: sync roots on mmu reload ...
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/ioapic.c22
-rw-r--r--virt/kvm/ioapic.h10
-rw-r--r--virt/kvm/irq_comm.c60
-rw-r--r--virt/kvm/kvm_main.c382
-rw-r--r--virt/kvm/kvm_trace.c30
-rw-r--r--virt/kvm/vtd.c191
6 files changed, 620 insertions, 75 deletions
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index c0d22870ee9c..53772bb46320 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -39,6 +39,7 @@
#include "ioapic.h"
#include "lapic.h"
+#include "irq.h"
#if 0
#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
@@ -285,26 +286,31 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
}
}
-static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi)
+static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi,
+ int trigger_mode)
{
union ioapic_redir_entry *ent;
ent = &ioapic->redirtbl[gsi];
- ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
- ent->fields.remote_irr = 0;
- if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
- ioapic_service(ioapic, gsi);
+ kvm_notify_acked_irq(ioapic->kvm, gsi);
+
+ if (trigger_mode == IOAPIC_LEVEL_TRIG) {
+ ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
+ ent->fields.remote_irr = 0;
+ if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
+ ioapic_service(ioapic, gsi);
+ }
}
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
{
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
int i;
for (i = 0; i < IOAPIC_NUM_PINS; i++)
if (ioapic->redirtbl[i].fields.vector == vector)
- __kvm_ioapic_update_eoi(ioapic, i);
+ __kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
}
static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr,
@@ -380,7 +386,7 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
break;
#ifdef CONFIG_IA64
case IOAPIC_REG_EOI:
- kvm_ioapic_update_eoi(ioapic->kvm, data);
+ kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG);
break;
#endif
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 7f16675fe783..cd7ae7691c9d 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -58,6 +58,7 @@ struct kvm_ioapic {
} redirtbl[IOAPIC_NUM_PINS];
struct kvm_io_device dev;
struct kvm *kvm;
+ void (*ack_notifier)(void *opaque, int irq);
};
#ifdef DEBUG
@@ -78,16 +79,9 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
return kvm->arch.vioapic;
}
-#ifdef CONFIG_IA64
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
- return 1;
-}
-#endif
-
struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
unsigned long bitmap);
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
int kvm_ioapic_init(struct kvm *kvm);
void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
new file mode 100644
index 000000000000..d0169f5e6047
--- /dev/null
+++ b/virt/kvm/irq_comm.c
@@ -0,0 +1,60 @@
+/*
+ * irq_comm.c: Common API for in kernel interrupt controller
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ * Authors:
+ * Yaozu (Eddie) Dong <Eddie.dong@intel.com>
+ *
+ */
+
+#include <linux/kvm_host.h>
+#include "irq.h"
+
+#include "ioapic.h"
+
+/* This should be called with the kvm->lock mutex held */
+void kvm_set_irq(struct kvm *kvm, int irq, int level)
+{
+ /* Not possible to detect if the guest uses the PIC or the
+ * IOAPIC. So set the bit in both. The guest will ignore
+ * writes to the unused one.
+ */
+ kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level);
+#ifdef CONFIG_X86
+ kvm_pic_set_irq(pic_irqchip(kvm), irq, level);
+#endif
+}
+
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi)
+{
+ struct kvm_irq_ack_notifier *kian;
+ struct hlist_node *n;
+
+ hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
+ if (kian->gsi == gsi)
+ kian->irq_acked(kian);
+}
+
+void kvm_register_irq_ack_notifier(struct kvm *kvm,
+ struct kvm_irq_ack_notifier *kian)
+{
+ hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
+}
+
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+ struct kvm_irq_ack_notifier *kian)
+{
+ hlist_del(&kian->link);
+}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7dd9b0b85e4e..cf0ab8ed3845 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -51,6 +51,12 @@
#include "coalesced_mmio.h"
#endif
+#ifdef KVM_CAP_DEVICE_ASSIGNMENT
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include "irq.h"
+#endif
+
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
@@ -71,11 +77,253 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
bool kvm_rebooting;
+#ifdef KVM_CAP_DEVICE_ASSIGNMENT
+static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
+ int assigned_dev_id)
+{
+ struct list_head *ptr;
+ struct kvm_assigned_dev_kernel *match;
+
+ list_for_each(ptr, head) {
+ match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
+ if (match->assigned_dev_id == assigned_dev_id)
+ return match;
+ }
+ return NULL;
+}
+
+static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
+{
+ struct kvm_assigned_dev_kernel *assigned_dev;
+
+ assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
+ interrupt_work);
+
+ /* This is taken to safely inject irq inside the guest. When
+ * the interrupt injection (or the ioapic code) uses a
+ * finer-grained lock, update this
+ */
+ mutex_lock(&assigned_dev->kvm->lock);
+ kvm_set_irq(assigned_dev->kvm,
+ assigned_dev->guest_irq, 1);
+ mutex_unlock(&assigned_dev->kvm->lock);
+ kvm_put_kvm(assigned_dev->kvm);
+}
+
+/* FIXME: Implement the OR logic needed to make shared interrupts on
+ * this line behave properly
+ */
+static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
+{
+ struct kvm_assigned_dev_kernel *assigned_dev =
+ (struct kvm_assigned_dev_kernel *) dev_id;
+
+ kvm_get_kvm(assigned_dev->kvm);
+ schedule_work(&assigned_dev->interrupt_work);
+ disable_irq_nosync(irq);
+ return IRQ_HANDLED;
+}
+
+/* Ack the irq line for an assigned device */
+static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
+{
+ struct kvm_assigned_dev_kernel *dev;
+
+ if (kian->gsi == -1)
+ return;
+
+ dev = container_of(kian, struct kvm_assigned_dev_kernel,
+ ack_notifier);
+ kvm_set_irq(dev->kvm, dev->guest_irq, 0);
+ enable_irq(dev->host_irq);
+}
+
+static void kvm_free_assigned_device(struct kvm *kvm,
+ struct kvm_assigned_dev_kernel
+ *assigned_dev)
+{
+ if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested)
+ free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+
+ kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
+
+ if (cancel_work_sync(&assigned_dev->interrupt_work))
+ /* We had pending work. That means we will have to take
+ * care of kvm_put_kvm.
+ */
+ kvm_put_kvm(kvm);
+
+ pci_release_regions(assigned_dev->dev);
+ pci_disable_device(assigned_dev->dev);
+ pci_dev_put(assigned_dev->dev);
+
+ list_del(&assigned_dev->list);
+ kfree(assigned_dev);
+}
+
+void kvm_free_all_assigned_devices(struct kvm *kvm)
+{
+ struct list_head *ptr, *ptr2;
+ struct kvm_assigned_dev_kernel *assigned_dev;
+
+ list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
+ assigned_dev = list_entry(ptr,
+ struct kvm_assigned_dev_kernel,
+ list);
+
+ kvm_free_assigned_device(kvm, assigned_dev);
+ }
+}
+
+static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
+ struct kvm_assigned_irq
+ *assigned_irq)
+{
+ int r = 0;
+ struct kvm_assigned_dev_kernel *match;
+
+ mutex_lock(&kvm->lock);
+
+ match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+ assigned_irq->assigned_dev_id);
+ if (!match) {
+ mutex_unlock(&kvm->lock);
+ return -EINVAL;
+ }
+
+ if (match->irq_requested) {
+ match->guest_irq = assigned_irq->guest_irq;
+ match->ack_notifier.gsi = assigned_irq->guest_irq;
+ mutex_unlock(&kvm->lock);
+ return 0;
+ }
+
+ INIT_WORK(&match->interrupt_work,
+ kvm_assigned_dev_interrupt_work_handler);
+
+ if (irqchip_in_kernel(kvm)) {
+ if (!capable(CAP_SYS_RAWIO)) {
+ r = -EPERM;
+ goto out_release;
+ }
+
+ if (assigned_irq->host_irq)
+ match->host_irq = assigned_irq->host_irq;
+ else
+ match->host_irq = match->dev->irq;
+ match->guest_irq = assigned_irq->guest_irq;
+ match->ack_notifier.gsi = assigned_irq->guest_irq;
+ match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
+ kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);
+
+ /* Even though this is PCI, we don't want to use shared
+ * interrupts. Sharing host devices with guest-assigned devices
+ * on the same interrupt line is not a happy situation: there
+ * are going to be long delays in accepting, acking, etc.
+ */
+ if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
+ "kvm_assigned_device", (void *)match)) {
+ r = -EIO;
+ goto out_release;
+ }
+ }
+
+ match->irq_requested = true;
+ mutex_unlock(&kvm->lock);
+ return r;
+out_release:
+ mutex_unlock(&kvm->lock);
+ kvm_free_assigned_device(kvm, match);
+ return r;
+}
+
+static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
+ struct kvm_assigned_pci_dev *assigned_dev)
+{
+ int r = 0;
+ struct kvm_assigned_dev_kernel *match;
+ struct pci_dev *dev;
+
+ mutex_lock(&kvm->lock);
+
+ match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+ assigned_dev->assigned_dev_id);
+ if (match) {
+ /* device already assigned */
+ r = -EINVAL;
+ goto out;
+ }
+
+ match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
+ if (match == NULL) {
+ printk(KERN_INFO "%s: Couldn't allocate memory\n",
+ __func__);
+ r = -ENOMEM;
+ goto out;
+ }
+ dev = pci_get_bus_and_slot(assigned_dev->busnr,
+ assigned_dev->devfn);
+ if (!dev) {
+ printk(KERN_INFO "%s: host device not found\n", __func__);
+ r = -EINVAL;
+ goto out_free;
+ }
+ if (pci_enable_device(dev)) {
+ printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
+ r = -EBUSY;
+ goto out_put;
+ }
+ r = pci_request_regions(dev, "kvm_assigned_device");
+ if (r) {
+ printk(KERN_INFO "%s: Could not get access to device regions\n",
+ __func__);
+ goto out_disable;
+ }
+ match->assigned_dev_id = assigned_dev->assigned_dev_id;
+ match->host_busnr = assigned_dev->busnr;
+ match->host_devfn = assigned_dev->devfn;
+ match->dev = dev;
+
+ match->kvm = kvm;
+
+ list_add(&match->list, &kvm->arch.assigned_dev_head);
+
+ if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
+ r = kvm_iommu_map_guest(kvm, match);
+ if (r)
+ goto out_list_del;
+ }
+
+out:
+ mutex_unlock(&kvm->lock);
+ return r;
+out_list_del:
+ list_del(&match->list);
+ pci_release_regions(dev);
+out_disable:
+ pci_disable_device(dev);
+out_put:
+ pci_dev_put(dev);
+out_free:
+ kfree(match);
+ mutex_unlock(&kvm->lock);
+ return r;
+}
+#endif
+
static inline int valid_vcpu(int n)
{
return likely(n >= 0 && n < KVM_MAX_VCPUS);
}
+inline int kvm_is_mmio_pfn(pfn_t pfn)
+{
+ if (pfn_valid(pfn))
+ return PageReserved(pfn_to_page(pfn));
+
+ return true;
+}
+
/*
* Switches to specified vcpu, until a matching vcpu_put()
*/
@@ -570,6 +818,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
}
kvm_free_physmem_slot(&old, &new);
+#ifdef CONFIG_DMAR
+ /* map the pages in iommu page table */
+ r = kvm_iommu_map_pages(kvm, base_gfn, npages);
+ if (r)
+ goto out;
+#endif
return 0;
out_free:
@@ -708,9 +962,6 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(gfn_to_hva);
-/*
- * Requires current->mm->mmap_sem to be held
- */
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
{
struct page *page[1];
@@ -726,21 +977,24 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
return page_to_pfn(bad_page);
}
- npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page,
- NULL);
+ npages = get_user_pages_fast(addr, 1, 1, page);
if (unlikely(npages != 1)) {
struct vm_area_struct *vma;
+ down_read(&current->mm->mmap_sem);
vma = find_vma(current->mm, addr);
+
if (vma == NULL || addr < vma->vm_start ||
!(vma->vm_flags & VM_PFNMAP)) {
+ up_read(&current->mm->mmap_sem);
get_page(bad_page);
return page_to_pfn(bad_page);
}
pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
- BUG_ON(pfn_valid(pfn));
+ up_read(&current->mm->mmap_sem);
+ BUG_ON(!kvm_is_mmio_pfn(pfn));
} else
pfn = page_to_pfn(page[0]);
@@ -754,10 +1008,10 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
pfn_t pfn;
pfn = gfn_to_pfn(kvm, gfn);
- if (pfn_valid(pfn))
+ if (!kvm_is_mmio_pfn(pfn))
return pfn_to_page(pfn);
- WARN_ON(!pfn_valid(pfn));
+ WARN_ON(kvm_is_mmio_pfn(pfn));
get_page(bad_page);
return bad_page;
@@ -773,7 +1027,7 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean);
void kvm_release_pfn_clean(pfn_t pfn)
{
- if (pfn_valid(pfn))
+ if (!kvm_is_mmio_pfn(pfn))
put_page(pfn_to_page(pfn));
}
EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
@@ -799,7 +1053,7 @@ EXPORT_SYMBOL_GPL(kvm_set_page_dirty);
void kvm_set_pfn_dirty(pfn_t pfn)
{
- if (pfn_valid(pfn)) {
+ if (!kvm_is_mmio_pfn(pfn)) {
struct page *page = pfn_to_page(pfn);
if (!PageReserved(page))
SetPageDirty(page);
@@ -809,14 +1063,14 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
void kvm_set_pfn_accessed(pfn_t pfn)
{
- if (pfn_valid(pfn))
+ if (!kvm_is_mmio_pfn(pfn))
mark_page_accessed(pfn_to_page(pfn));
}
EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
void kvm_get_pfn(pfn_t pfn)
{
- if (pfn_valid(pfn))
+ if (!kvm_is_mmio_pfn(pfn))
get_page(pfn_to_page(pfn));
}
EXPORT_SYMBOL_GPL(kvm_get_pfn);
@@ -972,12 +1226,12 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
for (;;) {
prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
- if (kvm_cpu_has_interrupt(vcpu))
- break;
- if (kvm_cpu_has_pending_timer(vcpu))
- break;
- if (kvm_arch_vcpu_runnable(vcpu))
+ if (kvm_cpu_has_interrupt(vcpu) ||
+ kvm_cpu_has_pending_timer(vcpu) ||
+ kvm_arch_vcpu_runnable(vcpu)) {
+ set_bit(KVM_REQ_UNHALT, &vcpu->requests);
break;
+ }
if (signal_pending(current))
break;
@@ -1074,12 +1328,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
r = kvm_arch_vcpu_setup(vcpu);
if (r)
- goto vcpu_destroy;
+ return r;
mutex_lock(&kvm->lock);
if (kvm->vcpus[n]) {
r = -EEXIST;
- mutex_unlock(&kvm->lock);
goto vcpu_destroy;
}
kvm->vcpus[n] = vcpu;
@@ -1095,8 +1348,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
unlink:
mutex_lock(&kvm->lock);
kvm->vcpus[n] = NULL;
- mutex_unlock(&kvm->lock);
vcpu_destroy:
+ mutex_unlock(&kvm->lock);
kvm_arch_vcpu_destroy(vcpu);
return r;
}
@@ -1118,6 +1371,8 @@ static long kvm_vcpu_ioctl(struct file *filp,
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
int r;
+ struct kvm_fpu *fpu = NULL;
+ struct kvm_sregs *kvm_sregs = NULL;
if (vcpu->kvm->mm != current->mm)
return -EIO;
@@ -1165,25 +1420,28 @@ out_free2:
break;
}
case KVM_GET_SREGS: {
- struct kvm_sregs kvm_sregs;
-
- memset(&kvm_sregs, 0, sizeof kvm_sregs);
- r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, &kvm_sregs);
+ kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
+ r = -ENOMEM;
+ if (!kvm_sregs)
+ goto out;
+ r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs);
if (r)
goto out;
r = -EFAULT;
- if (copy_to_user(argp, &kvm_sregs, sizeof kvm_sregs))
+ if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs)))
goto out;
r = 0;
break;
}
case KVM_SET_SREGS: {
- struct kvm_sregs kvm_sregs;
-
+ kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
+ r = -ENOMEM;
+ if (!kvm_sregs)
+ goto out;
r = -EFAULT;
- if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs))
+ if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs)))
goto out;
- r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, &kvm_sregs);
+ r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs);
if (r)
goto out;
r = 0;
@@ -1264,25 +1522,28 @@ out_free2:
break;
}
case KVM_GET_FPU: {
- struct kvm_fpu fpu;
-
- memset(&fpu, 0, sizeof fpu);
- r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, &fpu);
+ fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
+ r = -ENOMEM;
+ if (!fpu)
+ goto out;
+ r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu);
if (r)
goto out;
r = -EFAULT;
- if (copy_to_user(argp, &fpu, sizeof fpu))
+ if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu)))
goto out;
r = 0;
break;
}
case KVM_SET_FPU: {
- struct kvm_fpu fpu;
-
+ fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
+ r = -ENOMEM;
+ if (!fpu)
+ goto out;
r = -EFAULT;
- if (copy_from_user(&fpu, argp, sizeof fpu))
+ if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu)))
goto out;
- r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, &fpu);
+ r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu);
if (r)
goto out;
r = 0;
@@ -1292,6 +1553,8 @@ out_free2:
r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
}
out:
+ kfree(fpu);
+ kfree(kvm_sregs);
return r;
}
@@ -1360,6 +1623,30 @@ static long kvm_vm_ioctl(struct file *filp,
break;
}
#endif
+#ifdef KVM_CAP_DEVICE_ASSIGNMENT
+ case KVM_ASSIGN_PCI_DEVICE: {
+ struct kvm_assigned_pci_dev assigned_dev;
+
+ r = -EFAULT;
+ if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
+ goto out;
+ r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
+ if (r)
+ goto out;
+ break;
+ }
+ case KVM_ASSIGN_IRQ: {
+ struct kvm_assigned_irq assigned_irq;
+
+ r = -EFAULT;
+ if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
+ goto out;
+ r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
+ if (r)
+ goto out;
+ break;
+ }
+#endif
default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
}
@@ -1369,17 +1656,22 @@ out:
static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
+ struct page *page[1];
+ unsigned long addr;
+ int npages;
+ gfn_t gfn = vmf->pgoff;
struct kvm *kvm = vma->vm_file->private_data;
- struct page *page;
- if (!kvm_is_visible_gfn(kvm, vmf->pgoff))
+ addr = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(addr))
return VM_FAULT_SIGBUS;
- page = gfn_to_page(kvm, vmf->pgoff);
- if (is_error_page(page)) {
- kvm_release_page_clean(page);
+
+ npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
+ NULL);
+ if (unlikely(npages != 1))
return VM_FAULT_SIGBUS;
- }
- vmf->page = page;
+
+ vmf->page = page[0];
return 0;
}
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c
index 58141f31ea8f..41dcc845f78c 100644
--- a/virt/kvm/kvm_trace.c
+++ b/virt/kvm/kvm_trace.c
@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/relay.h>
#include <linux/debugfs.h>
+#include <linux/ktime.h>
#include <linux/kvm_host.h>
@@ -35,16 +36,16 @@ static struct kvm_trace *kvm_trace;
struct kvm_trace_probe {
const char *name;
const char *format;
- u32 cycle_in;
+ u32 timestamp_in;
marker_probe_func *probe_func;
};
-static inline int calc_rec_size(int cycle, int extra)
+static inline int calc_rec_size(int timestamp, int extra)
{
int rec_size = KVM_TRC_HEAD_SIZE;
rec_size += extra;
- return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
+ return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
}
static void kvm_add_trace(void *probe_private, void *call_data,
@@ -54,12 +55,13 @@ static void kvm_add_trace(void *probe_private, void *call_data,
struct kvm_trace *kt = kvm_trace;
struct kvm_trace_rec rec;
struct kvm_vcpu *vcpu;
- int i, extra, size;
+ int i, size;
+ u32 extra;
if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING))
return;
- rec.event = va_arg(*args, u32);
+ rec.rec_val = TRACE_REC_EVENT_ID(va_arg(*args, u32));
vcpu = va_arg(*args, struct kvm_vcpu *);
rec.pid = current->tgid;
rec.vcpu_id = vcpu->vcpu_id;
@@ -67,21 +69,21 @@ static void kvm_add_trace(void *probe_private, void *call_data,
extra = va_arg(*args, u32);
WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX);
- rec.extra_u32 = extra;
- rec.cycle_in = p->cycle_in;
+ rec.rec_val |= TRACE_REC_TCS(p->timestamp_in)
+ | TRACE_REC_NUM_DATA_ARGS(extra);
- if (rec.cycle_in) {
- rec.u.cycle.cycle_u64 = get_cycles();
+ if (p->timestamp_in) {
+ rec.u.timestamp.timestamp = ktime_to_ns(ktime_get());
- for (i = 0; i < rec.extra_u32; i++)
- rec.u.cycle.extra_u32[i] = va_arg(*args, u32);
+ for (i = 0; i < extra; i++)
+ rec.u.timestamp.extra_u32[i] = va_arg(*args, u32);
} else {
- for (i = 0; i < rec.extra_u32; i++)
- rec.u.nocycle.extra_u32[i] = va_arg(*args, u32);
+ for (i = 0; i < extra; i++)
+ rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32);
}
- size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32));
+ size = calc_rec_size(p->timestamp_in, extra * sizeof(u32));
relay_write(kt->rchan, &rec, size);
}
diff --git a/virt/kvm/vtd.c b/virt/kvm/vtd.c
new file mode 100644
index 000000000000..a770874f3a3a
--- /dev/null
+++ b/virt/kvm/vtd.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Copyright IBM Corporation, 2008
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ * Author: Ben-Ami Yassour <benami@il.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+static void kvm_iommu_put_pages(struct kvm *kvm,
+ gfn_t base_gfn, unsigned long npages);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+ gfn_t base_gfn, unsigned long npages)
+{
+ gfn_t gfn = base_gfn;
+ pfn_t pfn;
+ int i, r = 0;
+ struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+ /* check if iommu exists and in use */
+ if (!domain)
+ return 0;
+
+ for (i = 0; i < npages; i++) {
+ /* check if already mapped */
+ pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+ gfn_to_gpa(gfn));
+ if (pfn)
+ continue;
+
+ pfn = gfn_to_pfn(kvm, gfn);
+ r = intel_iommu_page_mapping(domain,
+ gfn_to_gpa(gfn),
+ pfn_to_hpa(pfn),
+ PAGE_SIZE,
+ DMA_PTE_READ |
+ DMA_PTE_WRITE);
+ if (r) {
+ printk(KERN_ERR "kvm_iommu_map_pages:"
+ "iommu failed to map pfn=%lx\n", pfn);
+ goto unmap_pages;
+ }
+ gfn++;
+ }
+ return 0;
+
+unmap_pages:
+ kvm_iommu_put_pages(kvm, base_gfn, i);
+ return r;
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+ int i, r;
+
+ down_read(&kvm->slots_lock);
+ for (i = 0; i < kvm->nmemslots; i++) {
+ r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+ kvm->memslots[i].npages);
+ if (r)
+ break;
+ }
+ up_read(&kvm->slots_lock);
+ return r;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm,
+ struct kvm_assigned_dev_kernel *assigned_dev)
+{
+ struct pci_dev *pdev = NULL;
+ int r;
+
+ if (!intel_iommu_found()) {
+ printk(KERN_ERR "%s: intel iommu not found\n", __func__);
+ return -ENODEV;
+ }
+
+ printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
+ assigned_dev->host_busnr,
+ PCI_SLOT(assigned_dev->host_devfn),
+ PCI_FUNC(assigned_dev->host_devfn));
+
+ pdev = assigned_dev->dev;
+
+ if (pdev == NULL) {
+ if (kvm->arch.intel_iommu_domain) {
+ intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
+ kvm->arch.intel_iommu_domain = NULL;
+ }
+ return -ENODEV;
+ }
+
+ kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+ if (!kvm->arch.intel_iommu_domain)
+ return -ENODEV;
+
+ r = kvm_iommu_map_memslots(kvm);
+ if (r)
+ goto out_unmap;
+
+ intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
+ pdev->bus->number, pdev->devfn);
+
+ r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
+ pdev);
+ if (r) {
+ printk(KERN_ERR "Domain context map for %s failed",
+ pci_name(pdev));
+ goto out_unmap;
+ }
+ return 0;
+
+out_unmap:
+ kvm_iommu_unmap_memslots(kvm);
+ return r;
+}
+
+static void kvm_iommu_put_pages(struct kvm *kvm,
+ gfn_t base_gfn, unsigned long npages)
+{
+ gfn_t gfn = base_gfn;
+ pfn_t pfn;
+ struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+ int i;
+
+ for (i = 0; i < npages; i++) {
+ pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+ gfn_to_gpa(gfn));
+ kvm_release_pfn_clean(pfn);
+ gfn++;
+ }
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+ int i;
+ down_read(&kvm->slots_lock);
+ for (i = 0; i < kvm->nmemslots; i++) {
+ kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
+ kvm->memslots[i].npages);
+ }
+ up_read(&kvm->slots_lock);
+
+ return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+ struct kvm_assigned_dev_kernel *entry;
+ struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+ /* check if iommu exists and in use */
+ if (!domain)
+ return 0;
+
+ list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
+ printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+ entry->host_busnr,
+ PCI_SLOT(entry->host_devfn),
+ PCI_FUNC(entry->host_devfn));
+
+ /* detach kvm dmar domain */
+ intel_iommu_detach_dev(domain, entry->host_busnr,
+ entry->host_devfn);
+ }
+ kvm_iommu_unmap_memslots(kvm);
+ intel_iommu_domain_exit(domain);
+ return 0;
+}