diff options
Diffstat (limited to 'drivers/xen')
-rw-r--r-- | drivers/xen/Makefile | 2 | ||||
-rw-r--r-- | drivers/xen/acpi.c | 2 | ||||
-rw-r--r-- | drivers/xen/efi.c | 30 | ||||
-rw-r--r-- | drivers/xen/events/events_base.c | 5 | ||||
-rw-r--r-- | drivers/xen/events/events_fifo.c | 23 | ||||
-rw-r--r-- | drivers/xen/evtchn.c | 123 | ||||
-rw-r--r-- | drivers/xen/gntdev.c | 207 | ||||
-rw-r--r-- | drivers/xen/grant-table.c | 4 | ||||
-rw-r--r-- | drivers/xen/pcpu.c | 8 | ||||
-rw-r--r-- | drivers/xen/time.c | 88 | ||||
-rw-r--r-- | drivers/xen/xen-acpi-cpuhotplug.c | 2 | ||||
-rw-r--r-- | drivers/xen/xen-acpi-pad.c | 4 | ||||
-rw-r--r-- | drivers/xen/xen-acpi-processor.c | 8 | ||||
-rw-r--r-- | drivers/xen/xen-pciback/pciback.h | 1 | ||||
-rw-r--r-- | drivers/xen/xen-pciback/pciback_ops.c | 75 | ||||
-rw-r--r-- | drivers/xen/xen-pciback/xenbus.c | 4 | ||||
-rw-r--r-- | drivers/xen/xen-scsiback.c | 2 | ||||
-rw-r--r-- | drivers/xen/xenfs/xensyms.c | 4 |
18 files changed, 518 insertions, 74 deletions
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index aa8a7f71f310..9b7a35c9e51d 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,6 +1,6 @@ obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_X86) += fallback.o -obj-y += grant-table.o features.o balloon.o manage.o preempt.o +obj-y += grant-table.o features.o balloon.o manage.o preempt.o time.o obj-y += events/ obj-y += xenbus/ diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c index 90307c0b630c..6893c79fd2a1 100644 --- a/drivers/xen/acpi.c +++ b/drivers/xen/acpi.c @@ -58,7 +58,7 @@ static int xen_acpi_notify_hypervisor_state(u8 sleep_state, bits, val_a, val_b)) return -1; - HYPERVISOR_dom0_op(&op); + HYPERVISOR_platform_op(&op); return 1; } diff --git a/drivers/xen/efi.c b/drivers/xen/efi.c index f745db270171..be7e56a338e8 100644 --- a/drivers/xen/efi.c +++ b/drivers/xen/efi.c @@ -42,7 +42,7 @@ static efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { struct xen_platform_op op = INIT_EFI_OP(get_time); - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; if (tm) { @@ -67,7 +67,7 @@ static efi_status_t xen_efi_set_time(efi_time_t *tm) BUILD_BUG_ON(sizeof(*tm) != sizeof(efi_data(op).u.set_time)); memcpy(&efi_data(op).u.set_time, tm, sizeof(*tm)); - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; return efi_data(op).status; @@ -79,7 +79,7 @@ static efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, { struct xen_platform_op op = INIT_EFI_OP(get_wakeup_time); - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; if (tm) { @@ -108,7 +108,7 @@ static efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) else efi_data(op).misc |= XEN_EFI_SET_WAKEUP_TIME_ENABLE_ONLY; - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; return efi_data(op).status; @@ -129,7 +129,7 @@ static efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_data(op).u.get_variable.size = *data_size; set_xen_guest_handle(efi_data(op).u.get_variable.data, data); - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; *data_size = efi_data(op).u.get_variable.size; @@ -152,7 +152,7 @@ static efi_status_t xen_efi_get_next_variable(unsigned long *name_size, memcpy(&efi_data(op).u.get_next_variable_name.vendor_guid, vendor, sizeof(*vendor)); - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; *name_size = efi_data(op).u.get_next_variable_name.size; @@ -178,7 +178,7 @@ static efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_data(op).u.set_variable.size = data_size; set_xen_guest_handle(efi_data(op).u.set_variable.data, data); - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; return efi_data(op).status; @@ -196,7 +196,7 @@ static efi_status_t xen_efi_query_variable_info(u32 attr, efi_data(op).u.query_variable_info.attr = attr; - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; *storage_space = efi_data(op).u.query_variable_info.max_store_size; @@ -210,7 +210,7 @@ static efi_status_t xen_efi_get_next_high_mono_count(u32 *count) { struct xen_platform_op op = INIT_EFI_OP(get_next_high_monotonic_count); - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; *count = efi_data(op).misc; @@ -232,7 +232,7 @@ static efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, efi_data(op).u.update_capsule.capsule_count = count; efi_data(op).u.update_capsule.sg_list = sg_list; - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; return efi_data(op).status; @@ -252,7 +252,7 @@ static efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, capsules); efi_data(op).u.query_capsule_capabilities.capsule_count = count; - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; *max_size = efi_data(op).u.query_capsule_capabilities.max_capsule_size; @@ -331,7 +331,7 @@ efi_system_table_t __init *xen_efi_probe(void) }; union xenpf_efi_info *info = &op.u.firmware_info.u.efi_info; - if (!xen_initial_domain() || HYPERVISOR_dom0_op(&op) < 0) + if (!xen_initial_domain() || HYPERVISOR_platform_op(&op) < 0) return NULL; /* Here we know that Xen runs on EFI platform. */ @@ -347,7 +347,7 @@ efi_system_table_t __init *xen_efi_probe(void) info->vendor.bufsz = sizeof(vendor); set_xen_guest_handle(info->vendor.name, vendor); - if (HYPERVISOR_dom0_op(&op) == 0) { + if (HYPERVISOR_platform_op(&op) == 0) { efi_systab_xen.fw_vendor = __pa_symbol(vendor); efi_systab_xen.fw_revision = info->vendor.revision; } else @@ -357,14 +357,14 @@ efi_system_table_t __init *xen_efi_probe(void) op.u.firmware_info.type = XEN_FW_EFI_INFO; op.u.firmware_info.index = XEN_FW_EFI_VERSION; - if (HYPERVISOR_dom0_op(&op) == 0) + if (HYPERVISOR_platform_op(&op) == 0) efi_systab_xen.hdr.revision = info->version; op.cmd = XENPF_firmware_info; op.u.firmware_info.type = XEN_FW_EFI_INFO; op.u.firmware_info.index = XEN_FW_EFI_RT_VERSION; - if (HYPERVISOR_dom0_op(&op) == 0) + if (HYPERVISOR_platform_op(&op) == 0) efi.runtime_version = info->version; return &efi_systab_xen; diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 849500e4e14d..524c22146429 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -39,6 +39,7 @@ #include <asm/irq.h> #include <asm/idle.h> #include <asm/io_apic.h> +#include <asm/i8259.h> #include <asm/xen/pci.h> #endif #include <asm/sync_bitops.h> @@ -420,7 +421,7 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) return xen_allocate_irq_dynamic(); /* Legacy IRQ descriptors are already allocated by the arch. */ - if (gsi < NR_IRQS_LEGACY) + if (gsi < nr_legacy_irqs()) irq = gsi; else irq = irq_alloc_desc_at(gsi, -1); @@ -446,7 +447,7 @@ static void xen_free_irq(unsigned irq) kfree(info); /* Legacy IRQ descriptors are managed by the arch. */ - if (irq < NR_IRQS_LEGACY) + if (irq < nr_legacy_irqs()) return; irq_free_desc(irq); diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index e3e9e3d46d1b..96a1b8da5371 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -281,7 +281,8 @@ static void handle_irq_for_port(unsigned port) static void consume_one_event(unsigned cpu, struct evtchn_fifo_control_block *control_block, - unsigned priority, unsigned long *ready) + unsigned priority, unsigned long *ready, + bool drop) { struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu); uint32_t head; @@ -313,13 +314,17 @@ static void consume_one_event(unsigned cpu, if (head == 0) clear_bit(priority, ready); - if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) - handle_irq_for_port(port); + if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) { + if (unlikely(drop)) + pr_warn("Dropping pending event for port %u\n", port); + else + handle_irq_for_port(port); + } q->head[priority] = head; } -static void evtchn_fifo_handle_events(unsigned cpu) +static void __evtchn_fifo_handle_events(unsigned cpu, bool drop) { struct evtchn_fifo_control_block *control_block; unsigned long ready; @@ -331,11 +336,16 @@ static void evtchn_fifo_handle_events(unsigned cpu) while (ready) { q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES); - consume_one_event(cpu, control_block, q, &ready); + consume_one_event(cpu, control_block, q, &ready, drop); ready |= xchg(&control_block->ready, 0); } } +static void evtchn_fifo_handle_events(unsigned cpu) +{ + __evtchn_fifo_handle_events(cpu, false); +} + static void evtchn_fifo_resume(void) { unsigned cpu; @@ -420,6 +430,9 @@ static int evtchn_fifo_cpu_notification(struct notifier_block *self, if (!per_cpu(cpu_control_block, cpu)) ret = evtchn_fifo_alloc_control_block(cpu); break; + case CPU_DEAD: + __evtchn_fifo_handle_events(cpu, true); + break; default: break; } diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 00f40f051d95..38272ad24551 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -49,6 +49,8 @@ #include <linux/init.h> #include <linux/mutex.h> #include <linux/cpu.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> #include <xen/xen.h> #include <xen/events.h> @@ -58,10 +60,10 @@ struct per_user_data { struct mutex bind_mutex; /* serialize bind/unbind operations */ struct rb_root evtchns; + unsigned int nr_evtchns; /* Notification ring, accessed via /dev/xen/evtchn. */ -#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) -#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1)) + unsigned int ring_size; evtchn_port_t *ring; unsigned int ring_cons, ring_prod, ring_overflow; struct mutex ring_cons_mutex; /* protect against concurrent readers */ @@ -80,10 +82,41 @@ struct user_evtchn { bool enabled; }; +static evtchn_port_t *evtchn_alloc_ring(unsigned int size) +{ + evtchn_port_t *ring; + size_t s = size * sizeof(*ring); + + ring = kmalloc(s, GFP_KERNEL); + if (!ring) + ring = vmalloc(s); + + return ring; +} + +static void evtchn_free_ring(evtchn_port_t *ring) +{ + kvfree(ring); +} + +static unsigned int evtchn_ring_offset(struct per_user_data *u, + unsigned int idx) +{ + return idx & (u->ring_size - 1); +} + +static evtchn_port_t *evtchn_ring_entry(struct per_user_data *u, + unsigned int idx) +{ + return u->ring + evtchn_ring_offset(u, idx); +} + static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) { struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL; + u->nr_evtchns++; + while (*new) { struct user_evtchn *this; @@ -107,6 +140,7 @@ static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) { + u->nr_evtchns--; rb_erase(&evtchn->node, &u->evtchns); kfree(evtchn); } @@ -144,8 +178,8 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) spin_lock(&u->ring_prod_lock); - if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { - u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port; + if ((u->ring_prod - u->ring_cons) < u->ring_size) { + *evtchn_ring_entry(u, u->ring_prod) = evtchn->port; wmb(); /* Ensure ring contents visible */ if (u->ring_cons == u->ring_prod++) { wake_up_interruptible(&u->evtchn_wait); @@ -200,10 +234,10 @@ static ssize_t evtchn_read(struct file *file, char __user *buf, } /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ - if (((c ^ p) & EVTCHN_RING_SIZE) != 0) { - bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * + if (((c ^ p) & u->ring_size) != 0) { + bytes1 = (u->ring_size - evtchn_ring_offset(u, c)) * sizeof(evtchn_port_t); - bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t); + bytes2 = evtchn_ring_offset(u, p) * sizeof(evtchn_port_t); } else { bytes1 = (p - c) * sizeof(evtchn_port_t); bytes2 = 0; @@ -219,7 +253,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf, rc = -EFAULT; rmb(); /* Ensure that we see the port before we copy it. */ - if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) || + if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) || ((bytes2 != 0) && copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) goto unlock_out; @@ -278,6 +312,66 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, return rc; } +static int evtchn_resize_ring(struct per_user_data *u) +{ + unsigned int new_size; + evtchn_port_t *new_ring, *old_ring; + unsigned int p, c; + + /* + * Ensure the ring is large enough to capture all possible + * events. i.e., one free slot for each bound event. + */ + if (u->nr_evtchns <= u->ring_size) + return 0; + + if (u->ring_size == 0) + new_size = 64; + else + new_size = 2 * u->ring_size; + + new_ring = evtchn_alloc_ring(new_size); + if (!new_ring) + return -ENOMEM; + + old_ring = u->ring; + + /* + * Access to the ring contents is serialized by either the + * prod /or/ cons lock so take both when resizing. + */ + mutex_lock(&u->ring_cons_mutex); + spin_lock_irq(&u->ring_prod_lock); + + /* + * Copy the old ring contents to the new ring. + * + * If the ring contents crosses the end of the current ring, + * it needs to be copied in two chunks. + * + * +---------+ +------------------+ + * |34567 12| -> | 1234567 | + * +-----p-c-+ +------------------+ + */ + p = evtchn_ring_offset(u, u->ring_prod); + c = evtchn_ring_offset(u, u->ring_cons); + if (p < c) { + memcpy(new_ring + c, u->ring + c, (u->ring_size - c) * sizeof(*u->ring)); + memcpy(new_ring + u->ring_size, u->ring, p * sizeof(*u->ring)); + } else + memcpy(new_ring + c, u->ring + c, (p - c) * sizeof(*u->ring)); + + u->ring = new_ring; + u->ring_size = new_size; + + spin_unlock_irq(&u->ring_prod_lock); + mutex_unlock(&u->ring_cons_mutex); + + evtchn_free_ring(old_ring); + + return 0; +} + static int evtchn_bind_to_user(struct per_user_data *u, int port) { struct user_evtchn *evtchn; @@ -305,6 +399,10 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) if (rc < 0) goto err; + rc = evtchn_resize_ring(u); + if (rc < 0) + goto err; + rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0, u->name, evtchn); if (rc < 0) @@ -503,13 +601,6 @@ static int evtchn_open(struct inode *inode, struct file *filp) init_waitqueue_head(&u->evtchn_wait); - u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL); - if (u->ring == NULL) { - kfree(u->name); - kfree(u); - return -ENOMEM; - } - mutex_init(&u->bind_mutex); mutex_init(&u->ring_cons_mutex); spin_lock_init(&u->ring_prod_lock); @@ -532,7 +623,7 @@ static int evtchn_release(struct inode *inode, struct file *filp) evtchn_unbind_from_user(u, evtchn); } - free_page((unsigned long)u->ring); + evtchn_free_ring(u->ring); kfree(u->name); kfree(u); diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 2ea0b3b2a91d..dc495383ad73 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -518,7 +518,7 @@ static void mn_release(struct mmu_notifier *mn, mutex_unlock(&priv->lock); } -static struct mmu_notifier_ops gntdev_mmu_ops = { +static const struct mmu_notifier_ops gntdev_mmu_ops = { .release = mn_release, .invalidate_page = mn_invl_page, .invalidate_range_start = mn_invl_range_start, @@ -748,6 +748,206 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) return rc; } +#define GNTDEV_COPY_BATCH 24 + +struct gntdev_copy_batch { + struct gnttab_copy ops[GNTDEV_COPY_BATCH]; + struct page *pages[GNTDEV_COPY_BATCH]; + s16 __user *status[GNTDEV_COPY_BATCH]; + unsigned int nr_ops; + unsigned int nr_pages; +}; + +static int gntdev_get_page(struct gntdev_copy_batch *batch, void __user *virt, + bool writeable, unsigned long *gfn) +{ + unsigned long addr = (unsigned long)virt; + struct page *page; + unsigned long xen_pfn; + int ret; + + ret = get_user_pages_fast(addr, 1, writeable, &page); + if (ret < 0) + return ret; + + batch->pages[batch->nr_pages++] = page; + + xen_pfn = page_to_xen_pfn(page) + XEN_PFN_DOWN(addr & ~PAGE_MASK); + *gfn = pfn_to_gfn(xen_pfn); + + return 0; +} + +static void gntdev_put_pages(struct gntdev_copy_batch *batch) +{ + unsigned int i; + + for (i = 0; i < batch->nr_pages; i++) + put_page(batch->pages[i]); + batch->nr_pages = 0; +} + +static int gntdev_copy(struct gntdev_copy_batch *batch) +{ + unsigned int i; + + gnttab_batch_copy(batch->ops, batch->nr_ops); + gntdev_put_pages(batch); + + /* + * For each completed op, update the status if the op failed + * and all previous ops for the segment were successful. + */ + for (i = 0; i < batch->nr_ops; i++) { + s16 status = batch->ops[i].status; + s16 old_status; + + if (status == GNTST_okay) + continue; + + if (__get_user(old_status, batch->status[i])) + return -EFAULT; + + if (old_status != GNTST_okay) + continue; + + if (__put_user(status, batch->status[i])) + return -EFAULT; + } + + batch->nr_ops = 0; + return 0; +} + +static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch, + struct gntdev_grant_copy_segment *seg, + s16 __user *status) +{ + uint16_t copied = 0; + + /* + * Disallow local -> local copies since there is only space in + * batch->pages for one page per-op and this would be a very + * expensive memcpy(). + */ + if (!(seg->flags & (GNTCOPY_source_gref | GNTCOPY_dest_gref))) + return -EINVAL; + + /* Can't cross page if source/dest is a grant ref. */ + if (seg->flags & GNTCOPY_source_gref) { + if (seg->source.foreign.offset + seg->len > XEN_PAGE_SIZE) + return -EINVAL; + } + if (seg->flags & GNTCOPY_dest_gref) { + if (seg->dest.foreign.offset + seg->len > XEN_PAGE_SIZE) + return -EINVAL; + } + + if (put_user(GNTST_okay, status)) + return -EFAULT; + + while (copied < seg->len) { + struct gnttab_copy *op; + void __user *virt; + size_t len, off; + unsigned long gfn; + int ret; + + if (batch->nr_ops >= GNTDEV_COPY_BATCH) { + ret = gntdev_copy(batch); + if (ret < 0) + return ret; + } + + len = seg->len - copied; + + op = &batch->ops[batch->nr_ops]; + op->flags = 0; + + if (seg->flags & GNTCOPY_source_gref) { + op->source.u.ref = seg->source.foreign.ref; + op->source.domid = seg->source.foreign.domid; + op->source.offset = seg->source.foreign.offset + copied; + op->flags |= GNTCOPY_source_gref; + } else { + virt = seg->source.virt + copied; + off = (unsigned long)virt & ~XEN_PAGE_MASK; + len = min(len, (size_t)XEN_PAGE_SIZE - off); + + ret = gntdev_get_page(batch, virt, false, &gfn); + if (ret < 0) + return ret; + + op->source.u.gmfn = gfn; + op->source.domid = DOMID_SELF; + op->source.offset = off; + } + + if (seg->flags & GNTCOPY_dest_gref) { + op->dest.u.ref = seg->dest.foreign.ref; + op->dest.domid = seg->dest.foreign.domid; + op->dest.offset = seg->dest.foreign.offset + copied; + op->flags |= GNTCOPY_dest_gref; + } else { + virt = seg->dest.virt + copied; + off = (unsigned long)virt & ~XEN_PAGE_MASK; + len = min(len, (size_t)XEN_PAGE_SIZE - off); + + ret = gntdev_get_page(batch, virt, true, &gfn); + if (ret < 0) + return ret; + + op->dest.u.gmfn = gfn; + op->dest.domid = DOMID_SELF; + op->dest.offset = off; + } + + op->len = len; + copied += len; + + batch->status[batch->nr_ops] = status; + batch->nr_ops++; + } + + return 0; +} + +static long gntdev_ioctl_grant_copy(struct gntdev_priv *priv, void __user *u) +{ + struct ioctl_gntdev_grant_copy copy; + struct gntdev_copy_batch batch; + unsigned int i; + int ret = 0; + + if (copy_from_user(©, u, sizeof(copy))) + return -EFAULT; + + batch.nr_ops = 0; + batch.nr_pages = 0; + + for (i = 0; i < copy.count; i++) { + struct gntdev_grant_copy_segment seg; + + if (copy_from_user(&seg, ©.segments[i], sizeof(seg))) { + ret = -EFAULT; + goto out; + } + + ret = gntdev_grant_copy_seg(&batch, &seg, ©.segments[i].status); + if (ret < 0) + goto out; + + cond_resched(); + } + if (batch.nr_ops) + ret = gntdev_copy(&batch); + return ret; + + out: + gntdev_put_pages(&batch); + return ret; +} + static long gntdev_ioctl(struct file *flip, unsigned int cmd, unsigned long arg) { @@ -767,6 +967,9 @@ static long gntdev_ioctl(struct file *flip, case IOCTL_GNTDEV_SET_UNMAP_NOTIFY: return gntdev_ioctl_notify(priv, ptr); + case IOCTL_GNTDEV_GRANT_COPY: + return gntdev_ioctl_grant_copy(priv, ptr); + default: pr_debug("priv %p, unknown cmd %x\n", priv, cmd); return -ENOIOCTLCMD; @@ -804,7 +1007,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) vma->vm_ops = &gntdev_vmops; - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_IO; if (use_ptemod) vma->vm_flags |= VM_DONTCOPY; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index c49f79ed58c5..effbaf91791f 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -128,7 +128,7 @@ struct unmap_refs_callback_data { int result; }; -static struct gnttab_ops *gnttab_interface; +static const struct gnttab_ops *gnttab_interface; static int grant_table_version; static int grefs_per_grant_frame; @@ -1013,7 +1013,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) return rc; } -static struct gnttab_ops gnttab_v1_ops = { +static const struct gnttab_ops gnttab_v1_ops = { .map_frames = gnttab_map_frames_v1, .unmap_frames = gnttab_unmap_frames_v1, .update_entry = gnttab_update_entry_v1, diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c index 49e88f2ce7a1..cdc6daa7a9f6 100644 --- a/drivers/xen/pcpu.c +++ b/drivers/xen/pcpu.c @@ -78,7 +78,7 @@ static int xen_pcpu_down(uint32_t cpu_id) .u.cpu_ol.cpuid = cpu_id, }; - return HYPERVISOR_dom0_op(&op); + return HYPERVISOR_platform_op(&op); } static int xen_pcpu_up(uint32_t cpu_id) @@ -89,7 +89,7 @@ static int xen_pcpu_up(uint32_t cpu_id) .u.cpu_ol.cpuid = cpu_id, }; - return HYPERVISOR_dom0_op(&op); + return HYPERVISOR_platform_op(&op); } static ssize_t show_online(struct device *dev, @@ -277,7 +277,7 @@ static int sync_pcpu(uint32_t cpu, uint32_t *max_cpu) .u.pcpu_info.xen_cpuid = cpu, }; - ret = HYPERVISOR_dom0_op(&op); + ret = HYPERVISOR_platform_op(&op); if (ret) return ret; @@ -364,7 +364,7 @@ int xen_pcpu_id(uint32_t acpi_id) op.cmd = XENPF_get_cpuinfo; while (cpu_id <= max_id) { op.u.pcpu_info.xen_cpuid = cpu_id; - if (HYPERVISOR_dom0_op(&op)) { + if (HYPERVISOR_platform_op(&op)) { cpu_id++; continue; } diff --git a/drivers/xen/time.c b/drivers/xen/time.c new file mode 100644 index 000000000000..71078425c9ea --- /dev/null +++ b/drivers/xen/time.c @@ -0,0 +1,88 @@ +/* + * Xen stolen ticks accounting. + */ +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/math64.h> +#include <linux/gfp.h> + +#include <asm/xen/hypervisor.h> +#include <asm/xen/hypercall.h> + +#include <xen/events.h> +#include <xen/features.h> +#include <xen/interface/xen.h> +#include <xen/interface/vcpu.h> +#include <xen/xen-ops.h> + +/* runstate info updated by Xen */ +static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate); + +/* return an consistent snapshot of 64-bit time/counter value */ +static u64 get64(const u64 *p) +{ + u64 ret; + + if (BITS_PER_LONG < 64) { + u32 *p32 = (u32 *)p; + u32 h, l, h2; + + /* + * Read high then low, and then make sure high is + * still the same; this will only loop if low wraps + * and carries into high. + * XXX some clean way to make this endian-proof? + */ + do { + h = READ_ONCE(p32[1]); + l = READ_ONCE(p32[0]); + h2 = READ_ONCE(p32[1]); + } while(h2 != h); + + ret = (((u64)h) << 32) | l; + } else + ret = READ_ONCE(*p); + + return ret; +} + +/* + * Runstate accounting + */ +void xen_get_runstate_snapshot(struct vcpu_runstate_info *res) +{ + u64 state_time; + struct vcpu_runstate_info *state; + + BUG_ON(preemptible()); + + state = this_cpu_ptr(&xen_runstate); + + /* + * The runstate info is always updated by the hypervisor on + * the current CPU, so there's no need to use anything + * stronger than a compiler barrier when fetching it. + */ + do { + state_time = get64(&state->state_entry_time); + *res = READ_ONCE(*state); + } while (get64(&state->state_entry_time) != state_time); +} + +/* return true when a vcpu could run but has no real cpu to run on */ +bool xen_vcpu_stolen(int vcpu) +{ + return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable; +} + +void xen_setup_runstate_info(int cpu) +{ + struct vcpu_register_runstate_memory_area area; + + area.addr.v = &per_cpu(xen_runstate, cpu); + + if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, + cpu, &area)) + BUG(); +} + diff --git a/drivers/xen/xen-acpi-cpuhotplug.c b/drivers/xen/xen-acpi-cpuhotplug.c index f4a369429553..fdc9e67b842d 100644 --- a/drivers/xen/xen-acpi-cpuhotplug.c +++ b/drivers/xen/xen-acpi-cpuhotplug.c @@ -206,7 +206,7 @@ static int xen_hotadd_cpu(struct acpi_processor *pr) op.u.cpu_add.acpi_id = pr->acpi_id; op.u.cpu_add.pxm = pxm; - cpu_id = HYPERVISOR_dom0_op(&op); + cpu_id = HYPERVISOR_platform_op(&op); if (cpu_id < 0) pr_err(PREFIX "Failed to hotadd CPU for acpi_id %d\n", pr->acpi_id); diff --git a/drivers/xen/xen-acpi-pad.c b/drivers/xen/xen-acpi-pad.c index f83b754505f8..23d1808fe027 100644 --- a/drivers/xen/xen-acpi-pad.c +++ b/drivers/xen/xen-acpi-pad.c @@ -36,7 +36,7 @@ static int xen_acpi_pad_idle_cpus(unsigned int idle_nums) op.u.core_parking.type = XEN_CORE_PARKING_SET; op.u.core_parking.idle_nums = idle_nums; - return HYPERVISOR_dom0_op(&op); + return HYPERVISOR_platform_op(&op); } static int xen_acpi_pad_idle_cpus_num(void) @@ -46,7 +46,7 @@ static int xen_acpi_pad_idle_cpus_num(void) op.cmd = XENPF_core_parking; op.u.core_parking.type = XEN_CORE_PARKING_GET; - return HYPERVISOR_dom0_op(&op) + return HYPERVISOR_platform_op(&op) ?: op.u.core_parking.idle_nums; } diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index 70fa438000af..076970a54f89 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -116,7 +116,7 @@ static int push_cxx_to_hypervisor(struct acpi_processor *_pr) set_xen_guest_handle(op.u.set_pminfo.power.states, dst_cx_states); if (!no_hypercall) - ret = HYPERVISOR_dom0_op(&op); + ret = HYPERVISOR_platform_op(&op); if (!ret) { pr_debug("ACPI CPU%u - C-states uploaded.\n", _pr->acpi_id); @@ -244,7 +244,7 @@ static int push_pxx_to_hypervisor(struct acpi_processor *_pr) } if (!no_hypercall) - ret = HYPERVISOR_dom0_op(&op); + ret = HYPERVISOR_platform_op(&op); if (!ret) { struct acpi_processor_performance *perf; @@ -302,7 +302,7 @@ static unsigned int __init get_max_acpi_id(void) info = &op.u.pcpu_info; info->xen_cpuid = 0; - ret = HYPERVISOR_dom0_op(&op); + ret = HYPERVISOR_platform_op(&op); if (ret) return NR_CPUS; @@ -310,7 +310,7 @@ static unsigned int __init get_max_acpi_id(void) last_cpu = op.u.pcpu_info.max_present; for (i = 0; i <= last_cpu; i++) { info->xen_cpuid = i; - ret = HYPERVISOR_dom0_op(&op); + ret = HYPERVISOR_platform_op(&op); if (ret) continue; max_acpi_id = max(info->acpi_id, max_acpi_id); diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index 58e38d586f52..4d529f3e40df 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -37,6 +37,7 @@ struct xen_pcibk_device { struct xen_pci_sharedinfo *sh_info; unsigned long flags; struct work_struct op_work; + struct xen_pci_op op; }; struct xen_pcibk_dev_data { diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index c4a0666de6f5..73dafdc494aa 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -70,6 +70,13 @@ static void xen_pcibk_control_isr(struct pci_dev *dev, int reset) enable ? "enable" : "disable"); if (enable) { + /* + * The MSI or MSI-X should not have an IRQ handler. Otherwise + * if the guest terminates we BUG_ON in free_msi_irqs. + */ + if (dev->msi_enabled || dev->msix_enabled) + goto out; + rc = request_irq(dev_data->irq, xen_pcibk_guest_interrupt, IRQF_SHARED, dev_data->irq_name, dev); @@ -144,7 +151,12 @@ int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev, if (unlikely(verbose_request)) printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev)); - status = pci_enable_msi(dev); + if (dev->msi_enabled) + status = -EALREADY; + else if (dev->msix_enabled) + status = -ENXIO; + else + status = pci_enable_msi(dev); if (status) { pr_warn_ratelimited("%s: error enabling MSI for guest %u: err %d\n", @@ -173,20 +185,23 @@ static int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev, struct pci_dev *dev, struct xen_pci_op *op) { - struct xen_pcibk_dev_data *dev_data; - if (unlikely(verbose_request)) printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n", pci_name(dev)); - pci_disable_msi(dev); + if (dev->msi_enabled) { + struct xen_pcibk_dev_data *dev_data; + + pci_disable_msi(dev); + + dev_data = pci_get_drvdata(dev); + if (dev_data) + dev_data->ack_intr = 1; + } op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; if (unlikely(verbose_request)) printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), op->value); - dev_data = pci_get_drvdata(dev); - if (dev_data) - dev_data->ack_intr = 1; return 0; } @@ -197,13 +212,26 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, struct xen_pcibk_dev_data *dev_data; int i, result; struct msix_entry *entries; + u16 cmd; if (unlikely(verbose_request)) printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n", pci_name(dev)); + if (op->value > SH_INFO_MAX_VEC) return -EINVAL; + if (dev->msix_enabled) + return -EALREADY; + + /* + * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able + * to access the BARs where the MSI-X entries reside. + */ + pci_read_config_word(dev, PCI_COMMAND, &cmd); + if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY)) + return -ENXIO; + entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); if (entries == NULL) return -ENOMEM; @@ -245,23 +273,27 @@ static int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, struct pci_dev *dev, struct xen_pci_op *op) { - struct xen_pcibk_dev_data *dev_data; if (unlikely(verbose_request)) printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n", pci_name(dev)); - pci_disable_msix(dev); + if (dev->msix_enabled) { + struct xen_pcibk_dev_data *dev_data; + + pci_disable_msix(dev); + + dev_data = pci_get_drvdata(dev); + if (dev_data) + dev_data->ack_intr = 1; + } /* * SR-IOV devices (which don't have any legacy IRQ) have * an undefined IRQ value of zero. */ op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; if (unlikely(verbose_request)) - printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev), - op->value); - dev_data = pci_get_drvdata(dev); - if (dev_data) - dev_data->ack_intr = 1; + printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", + pci_name(dev), op->value); return 0; } #endif @@ -298,9 +330,11 @@ void xen_pcibk_do_op(struct work_struct *data) container_of(data, struct xen_pcibk_device, op_work); struct pci_dev *dev; struct xen_pcibk_dev_data *dev_data = NULL; - struct xen_pci_op *op = &pdev->sh_info->op; + struct xen_pci_op *op = &pdev->op; int test_intx = 0; + *op = pdev->sh_info->op; + barrier(); dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn); if (dev == NULL) @@ -342,6 +376,17 @@ void xen_pcibk_do_op(struct work_struct *data) if ((dev_data->enable_intx != test_intx)) xen_pcibk_control_isr(dev, 0 /* no reset */); } + pdev->sh_info->op.err = op->err; + pdev->sh_info->op.value = op->value; +#ifdef CONFIG_PCI_MSI + if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) { + unsigned int i; + + for (i = 0; i < op->value; i++) + pdev->sh_info->op.msix_entries[i].vector = + op->msix_entries[i].vector; + } +#endif /* Tell the driver domain that we're done. */ wmb(); clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 98bc345f296e..4843741e703a 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -44,7 +44,6 @@ static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev) dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev); pdev->xdev = xdev; - dev_set_drvdata(&xdev->dev, pdev); mutex_init(&pdev->dev_lock); @@ -58,6 +57,9 @@ static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev) kfree(pdev); pdev = NULL; } + + dev_set_drvdata(&xdev->dev, pdev); + out: return pdev; } diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 43bcae852546..ad4eb1024d1f 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -726,7 +726,7 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info) if (!pending_req) return 1; - ring_req = *RING_GET_REQUEST(ring, rc); + RING_COPY_REQUEST(ring, rc, &ring_req); ring->req_cons = ++rc; err = prepare_pending_reqs(info, &ring_req, pending_req); diff --git a/drivers/xen/xenfs/xensyms.c b/drivers/xen/xenfs/xensyms.c index f8b12856753f..a03f261b12d8 100644 --- a/drivers/xen/xenfs/xensyms.c +++ b/drivers/xen/xenfs/xensyms.c @@ -31,7 +31,7 @@ static int xensyms_next_sym(struct xensyms *xs) symnum = symdata->symnum; - ret = HYPERVISOR_dom0_op(&xs->op); + ret = HYPERVISOR_platform_op(&xs->op); if (ret < 0) return ret; @@ -50,7 +50,7 @@ static int xensyms_next_sym(struct xensyms *xs) set_xen_guest_handle(symdata->name, xs->name); symdata->symnum--; /* Rewind */ - ret = HYPERVISOR_dom0_op(&xs->op); + ret = HYPERVISOR_platform_op(&xs->op); if (ret < 0) return ret; } |