diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/assigned-dev.c | 93 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 12 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 17 | ||||
-rw-r--r-- | virt/kvm/iommu.c | 27 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 204 |
5 files changed, 253 insertions, 100 deletions
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 3ad0925d23a9..758e3b36d4cf 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -17,6 +17,8 @@ #include <linux/pci.h> #include <linux/interrupt.h> #include <linux/slab.h> +#include <linux/namei.h> +#include <linux/fs.h> #include "irq.h" static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, @@ -480,12 +482,76 @@ out: return r; } +/* + * We want to test whether the caller has been granted permissions to + * use this device. To be able to configure and control the device, + * the user needs access to PCI configuration space and BAR resources. + * These are accessed through PCI sysfs. PCI config space is often + * passed to the process calling this ioctl via file descriptor, so we + * can't rely on access to that file. We can check for permissions + * on each of the BAR resource files, which is a pretty clear + * indicator that the user has been granted access to the device. + */ +static int probe_sysfs_permissions(struct pci_dev *dev) +{ +#ifdef CONFIG_SYSFS + int i; + bool bar_found = false; + + for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) { + char *kpath, *syspath; + struct path path; + struct inode *inode; + int r; + + if (!pci_resource_len(dev, i)) + continue; + + kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL); + if (!kpath) + return -ENOMEM; + + /* Per sysfs-rules, sysfs is always at /sys */ + syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i); + kfree(kpath); + if (!syspath) + return -ENOMEM; + + r = kern_path(syspath, LOOKUP_FOLLOW, &path); + kfree(syspath); + if (r) + return r; + + inode = path.dentry->d_inode; + + r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS); + path_put(&path); + if (r) + return r; + + bar_found = true; + } + + /* If no resources, probably something special */ + if (!bar_found) + return -EPERM; + + return 0; +#else + return -EINVAL; /* No way to control the device without sysfs */ +#endif +} + static int kvm_vm_ioctl_assign_device(struct kvm *kvm, struct kvm_assigned_pci_dev *assigned_dev) { int r = 0, idx; struct kvm_assigned_dev_kernel *match; struct pci_dev *dev; + u8 header_type; + + if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)) + return -EINVAL; mutex_lock(&kvm->lock); idx = srcu_read_lock(&kvm->srcu); @@ -513,6 +579,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, r = -EINVAL; goto out_free; } + + /* Don't allow bridges to be assigned */ + pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); + if ((header_type & PCI_HEADER_TYPE) != PCI_HEADER_TYPE_NORMAL) { + r = -EPERM; + goto out_put; + } + + r = probe_sysfs_permissions(dev); + if (r) + goto out_put; + if (pci_enable_device(dev)) { printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); r = -EBUSY; @@ -544,16 +622,14 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, list_add(&match->list, &kvm->arch.assigned_dev_head); - if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { - if (!kvm->arch.iommu_domain) { - r = kvm_iommu_map_guest(kvm); - if (r) - goto out_list_del; - } - r = kvm_assign_device(kvm, match); + if (!kvm->arch.iommu_domain) { + r = kvm_iommu_map_guest(kvm); if (r) goto out_list_del; } + r = kvm_assign_device(kvm, match); + if (r) + goto out_list_del; out: srcu_read_unlock(&kvm->srcu, idx); @@ -593,8 +669,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, goto out; } - if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) - kvm_deassign_device(kvm, match); + kvm_deassign_device(kvm, match); kvm_free_assigned_device(kvm, match); diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index a6ec206f36ba..88b2fe3ddf42 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -28,9 +28,15 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, * (addr,len) is fully included in * (zone->addr, zone->size) */ - - return (dev->zone.addr <= addr && - addr + len <= dev->zone.addr + dev->zone.size); + if (len < 0) + return 0; + if (addr + len < addr) + return 0; + if (addr < dev->zone.addr) + return 0; + if (addr + len > dev->zone.addr + dev->zone.size) + return 0; + return 1; } static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev) diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 3eed61eb4867..dcaf272c26c0 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -185,7 +185,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) irqe.dest_mode = 0; /* Physical mode. */ /* need to read apic_id from apic regiest since * it can be rewritten */ - irqe.dest_id = ioapic->kvm->bsp_vcpu->vcpu_id; + irqe.dest_id = ioapic->kvm->bsp_vcpu_id; } #endif return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); @@ -332,9 +332,18 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, (void*)addr, len, val); ASSERT(!(addr & 0xf)); /* check alignment */ - if (len == 4 || len == 8) + switch (len) { + case 8: + case 4: data = *(u32 *) val; - else { + break; + case 2: + data = *(u16 *) val; + break; + case 1: + data = *(u8 *) val; + break; + default: printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); return 0; } @@ -343,7 +352,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, spin_lock(&ioapic->lock); switch (addr) { case IOAPIC_REG_SELECT: - ioapic->ioregsel = data; + ioapic->ioregsel = data & 0xFF; /* 8-bit register */ break; case IOAPIC_REG_WINDOW: diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index a195c07fa829..a457d2138f49 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -32,7 +32,7 @@ #include <linux/iommu.h> #include <linux/intel-iommu.h> -static int allow_unsafe_assigned_interrupts; +static bool allow_unsafe_assigned_interrupts; module_param_named(allow_unsafe_assigned_interrupts, allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(allow_unsafe_assigned_interrupts, @@ -113,7 +113,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) /* Map into IO address space */ r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn), - get_order(page_size), flags); + page_size, flags); if (r) { printk(KERN_ERR "kvm_iommu_map_address:" "iommu failed to map pfn=%llx\n", pfn); @@ -134,14 +134,15 @@ unmap_pages: static int kvm_iommu_map_memslots(struct kvm *kvm) { - int i, idx, r = 0; + int idx, r = 0; struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; idx = srcu_read_lock(&kvm->srcu); slots = kvm_memslots(kvm); - for (i = 0; i < slots->nmemslots; i++) { - r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); + kvm_for_each_memslot(memslot, slots) { + r = kvm_iommu_map_pages(kvm, memslot); if (r) break; } @@ -292,15 +293,15 @@ static void kvm_iommu_put_pages(struct kvm *kvm, while (gfn < end_gfn) { unsigned long unmap_pages; - int order; + size_t size; /* Get physical address */ phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); pfn = phys >> PAGE_SHIFT; /* Unmap address from IO address space */ - order = iommu_unmap(domain, gfn_to_gpa(gfn), 0); - unmap_pages = 1ULL << order; + size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE); + unmap_pages = 1ULL << get_order(size); /* Unpin all pages we just unmapped to not leak any memory */ kvm_unpin_pages(kvm, pfn, unmap_pages); @@ -311,16 +312,16 @@ static void kvm_iommu_put_pages(struct kvm *kvm, static int kvm_iommu_unmap_memslots(struct kvm *kvm) { - int i, idx; + int idx; struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; idx = srcu_read_lock(&kvm->srcu); slots = kvm_memslots(kvm); - for (i = 0; i < slots->nmemslots; i++) { - kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, - slots->memslots[i].npages); - } + kvm_for_each_memslot(memslot, slots) + kvm_iommu_put_pages(kvm, memslot->base_gfn, memslot->npages); + srcu_read_unlock(&kvm->srcu, idx); return 0; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d9cfb782cb81..7287bf5d1c9e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -440,6 +440,15 @@ static int kvm_init_mmu_notifier(struct kvm *kvm) #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ +static void kvm_init_memslots_id(struct kvm *kvm) +{ + int i; + struct kvm_memslots *slots = kvm->memslots; + + for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) + slots->id_to_index[i] = slots->memslots[i].id = i; +} + static struct kvm *kvm_create_vm(void) { int r, i; @@ -465,6 +474,7 @@ static struct kvm *kvm_create_vm(void) kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); if (!kvm->memslots) goto out_err_nosrcu; + kvm_init_memslots_id(kvm); if (init_srcu_struct(&kvm->srcu)) goto out_err_nosrcu; for (i = 0; i < KVM_NR_BUSES; i++) { @@ -547,11 +557,11 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free, void kvm_free_physmem(struct kvm *kvm) { - int i; struct kvm_memslots *slots = kvm->memslots; + struct kvm_memory_slot *memslot; - for (i = 0; i < slots->nmemslots; ++i) - kvm_free_physmem_slot(&slots->memslots[i], NULL); + kvm_for_each_memslot(memslot, slots) + kvm_free_physmem_slot(memslot, NULL); kfree(kvm->memslots); } @@ -625,10 +635,69 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) return -ENOMEM; memslot->dirty_bitmap_head = memslot->dirty_bitmap; + memslot->nr_dirty_pages = 0; return 0; } #endif /* !CONFIG_S390 */ +static struct kvm_memory_slot * +search_memslots(struct kvm_memslots *slots, gfn_t gfn) +{ + struct kvm_memory_slot *memslot; + + kvm_for_each_memslot(memslot, slots) + if (gfn >= memslot->base_gfn && + gfn < memslot->base_gfn + memslot->npages) + return memslot; + + return NULL; +} + +static int cmp_memslot(const void *slot1, const void *slot2) +{ + struct kvm_memory_slot *s1, *s2; + + s1 = (struct kvm_memory_slot *)slot1; + s2 = (struct kvm_memory_slot *)slot2; + + if (s1->npages < s2->npages) + return 1; + if (s1->npages > s2->npages) + return -1; + + return 0; +} + +/* + * Sort the memslots base on its size, so the larger slots + * will get better fit. + */ +static void sort_memslots(struct kvm_memslots *slots) +{ + int i; + + sort(slots->memslots, KVM_MEM_SLOTS_NUM, + sizeof(struct kvm_memory_slot), cmp_memslot, NULL); + + for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) + slots->id_to_index[slots->memslots[i].id] = i; +} + +void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new) +{ + if (new) { + int id = new->id; + struct kvm_memory_slot *old = id_to_memslot(slots, id); + unsigned long npages = old->npages; + + *old = *new; + if (new->npages != npages) + sort_memslots(slots); + } + + slots->generation++; +} + /* * Allocate some memory and give it an address in the guest physical address * space. @@ -662,12 +731,12 @@ int __kvm_set_memory_region(struct kvm *kvm, (void __user *)(unsigned long)mem->userspace_addr, mem->memory_size))) goto out; - if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) + if (mem->slot >= KVM_MEM_SLOTS_NUM) goto out; if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) goto out; - memslot = &kvm->memslots->memslots[mem->slot]; + memslot = id_to_memslot(kvm->memslots, mem->slot); base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; npages = mem->memory_size >> PAGE_SHIFT; @@ -774,15 +843,17 @@ skip_lpage: #endif /* not defined CONFIG_S390 */ if (!npages) { + struct kvm_memory_slot *slot; + r = -ENOMEM; - slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); + slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), + GFP_KERNEL); if (!slots) goto out_free; - memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); - if (mem->slot >= slots->nmemslots) - slots->nmemslots = mem->slot + 1; - slots->generation++; - slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; + slot = id_to_memslot(slots, mem->slot); + slot->flags |= KVM_MEMSLOT_INVALID; + + update_memslots(slots, NULL); old_memslots = kvm->memslots; rcu_assign_pointer(kvm->memslots, slots); @@ -810,13 +881,10 @@ skip_lpage: } r = -ENOMEM; - slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); + slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), + GFP_KERNEL); if (!slots) goto out_free; - memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); - if (mem->slot >= slots->nmemslots) - slots->nmemslots = mem->slot + 1; - slots->generation++; /* actual memory is freed via old in kvm_free_physmem_slot below */ if (!npages) { @@ -826,7 +894,7 @@ skip_lpage: new.lpage_info[i] = NULL; } - slots->memslots[mem->slot] = new; + update_memslots(slots, &new); old_memslots = kvm->memslots; rcu_assign_pointer(kvm->memslots, slots); synchronize_srcu_expedited(&kvm->srcu); @@ -888,7 +956,7 @@ int kvm_get_dirty_log(struct kvm *kvm, if (log->slot >= KVM_MEMORY_SLOTS) goto out; - memslot = &kvm->memslots->memslots[log->slot]; + memslot = id_to_memslot(kvm->memslots, log->slot); r = -ENOENT; if (!memslot->dirty_bitmap) goto out; @@ -966,16 +1034,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva); static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn) { - int i; - - for (i = 0; i < slots->nmemslots; ++i) { - struct kvm_memory_slot *memslot = &slots->memslots[i]; - - if (gfn >= memslot->base_gfn - && gfn < memslot->base_gfn + memslot->npages) - return memslot; - } - return NULL; + return search_memslots(slots, gfn); } struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) @@ -986,20 +1045,13 @@ EXPORT_SYMBOL_GPL(gfn_to_memslot); int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) { - int i; - struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn); - for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { - struct kvm_memory_slot *memslot = &slots->memslots[i]; - - if (memslot->flags & KVM_MEMSLOT_INVALID) - continue; + if (!memslot || memslot->id >= KVM_MEMORY_SLOTS || + memslot->flags & KVM_MEMSLOT_INVALID) + return 0; - if (gfn >= memslot->base_gfn - && gfn < memslot->base_gfn + memslot->npages) - return 1; - } - return 0; + return 1; } EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); @@ -1491,7 +1543,8 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; - __set_bit_le(rel_gfn, memslot->dirty_bitmap); + if (!__test_and_set_bit_le(rel_gfn, memslot->dirty_bitmap)) + memslot->nr_dirty_pages++; } } @@ -1690,10 +1743,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) smp_wmb(); atomic_inc(&kvm->online_vcpus); -#ifdef CONFIG_KVM_APIC_ARCHITECTURE - if (kvm->bsp_vcpu_id == id) - kvm->bsp_vcpu = vcpu; -#endif mutex_unlock(&kvm->lock); return r; @@ -1768,12 +1817,11 @@ out_free1: struct kvm_regs *kvm_regs; r = -ENOMEM; - kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); - if (!kvm_regs) + kvm_regs = memdup_user(argp, sizeof(*kvm_regs)); + if (IS_ERR(kvm_regs)) { + r = PTR_ERR(kvm_regs); goto out; - r = -EFAULT; - if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs))) - goto out_free2; + } r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); if (r) goto out_free2; @@ -1797,13 +1845,11 @@ out_free2: break; } case KVM_SET_SREGS: { - kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL); - r = -ENOMEM; - if (!kvm_sregs) - goto out; - r = -EFAULT; - if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs))) + kvm_sregs = memdup_user(argp, sizeof(*kvm_sregs)); + if (IS_ERR(kvm_sregs)) { + r = PTR_ERR(kvm_sregs); goto out; + } r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); if (r) goto out; @@ -1899,13 +1945,11 @@ out_free2: break; } case KVM_SET_FPU: { - fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL); - r = -ENOMEM; - if (!fpu) - goto out; - r = -EFAULT; - if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu))) + fpu = memdup_user(argp, sizeof(*fpu)); + if (IS_ERR(fpu)) { + r = PTR_ERR(fpu); goto out; + } r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); if (r) goto out; @@ -2520,10 +2564,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, if (bus->dev_count > NR_IOBUS_DEVS-1) return -ENOSPC; - new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); + new_bus = kmemdup(bus, sizeof(struct kvm_io_bus), GFP_KERNEL); if (!new_bus) return -ENOMEM; - memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); kvm_io_bus_insert_dev(new_bus, dev, addr, len); rcu_assign_pointer(kvm->buses[bus_idx], new_bus); synchronize_srcu_expedited(&kvm->srcu); @@ -2539,13 +2582,12 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, int i, r; struct kvm_io_bus *new_bus, *bus; - new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); + bus = kvm->buses[bus_idx]; + + new_bus = kmemdup(bus, sizeof(*bus), GFP_KERNEL); if (!new_bus) return -ENOMEM; - bus = kvm->buses[bus_idx]; - memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); - r = -ENOENT; for (i = 0; i < new_bus->dev_count; i++) if (new_bus->range[i].dev == dev) { @@ -2612,15 +2654,29 @@ static const struct file_operations *stat_fops[] = { [KVM_STAT_VM] = &vm_stat_fops, }; -static void kvm_init_debug(void) +static int kvm_init_debug(void) { + int r = -EFAULT; struct kvm_stats_debugfs_item *p; kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); - for (p = debugfs_entries; p->name; ++p) + if (kvm_debugfs_dir == NULL) + goto out; + + for (p = debugfs_entries; p->name; ++p) { p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, (void *)(long)p->offset, stat_fops[p->kind]); + if (p->dentry == NULL) + goto out_dir; + } + + return 0; + +out_dir: + debugfs_remove_recursive(kvm_debugfs_dir); +out: + return r; } static void kvm_exit_debug(void) @@ -2764,10 +2820,16 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, kvm_preempt_ops.sched_in = kvm_sched_in; kvm_preempt_ops.sched_out = kvm_sched_out; - kvm_init_debug(); + r = kvm_init_debug(); + if (r) { + printk(KERN_ERR "kvm: create debugfs files failed\n"); + goto out_undebugfs; + } return 0; +out_undebugfs: + unregister_syscore_ops(&kvm_syscore_ops); out_unreg: kvm_async_pf_deinit(); out_free: |