diff options
Diffstat (limited to 'drivers/xen')
38 files changed, 1739 insertions, 532 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 5f7ff8e2fc14..a1ced521cf74 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -86,6 +86,7 @@ config XEN_BACKEND config XENFS tristate "Xen filesystem" + select XEN_PRIVCMD default y help The xen filesystem provides a way for domains to share @@ -137,16 +138,6 @@ config XEN_GRANT_DEV_ALLOC to other domains. This can be used to implement frontend drivers or as part of an inter-domain shared memory channel. -config XEN_PLATFORM_PCI - tristate "xen platform pci device driver" - depends on XEN_PVHVM && PCI - default m - help - Driver for the Xen PCI Platform device: it is responsible for - initializing xenbus and grant_table when running in a Xen HVM - domain. As a consequence this driver is required to run any Xen PV - frontend on Xen HVM. - config SWIOTLB_XEN def_bool y depends on PCI @@ -181,4 +172,10 @@ config XEN_PCIDEV_BACKEND xen-pciback.hide=(03:00.0)(04:00.0) If in doubt, say m. + +config XEN_PRIVCMD + tristate + depends on XEN + default m + endmenu diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 72bbb27d7a68..aa31337192cc 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -14,14 +14,14 @@ obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o -obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o +obj-$(CONFIG_XEN_PVHVM) += platform-pci.o obj-$(CONFIG_XEN_TMEM) += tmem.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o obj-$(CONFIG_XEN_DOM0) += pci.o obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ +obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o xen-gntalloc-y := gntalloc.o - -xen-platform-pci-y := platform-pci.o +xen-privcmd-y := privcmd.o diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 5dfd8f8ff07f..31ab82fda38a 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -39,6 +39,7 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/errno.h> +#include <linux/module.h> #include <linux/mm.h> #include <linux/bootmem.h> #include <linux/pagemap.h> @@ -93,8 +94,8 @@ static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; #define inc_totalhigh_pages() (totalhigh_pages++) #define dec_totalhigh_pages() (totalhigh_pages--) #else -#define inc_totalhigh_pages() do {} while(0) -#define dec_totalhigh_pages() do {} while(0) +#define inc_totalhigh_pages() do {} while (0) +#define dec_totalhigh_pages() do {} while (0) #endif /* List of ballooned pages, threaded through the mem_map array. */ @@ -154,8 +155,7 @@ static struct page *balloon_retrieve(bool prefer_highmem) if (PageHighMem(page)) { balloon_stats.balloon_high--; inc_totalhigh_pages(); - } - else + } else balloon_stats.balloon_low--; totalram_pages++; @@ -422,7 +422,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) (unsigned long)__va(pfn << PAGE_SHIFT), __pte_ma(0), 0); BUG_ON(ret); - } + } } @@ -501,20 +501,24 @@ EXPORT_SYMBOL_GPL(balloon_set_new_target); * alloc_xenballooned_pages - get pages that have been ballooned out * @nr_pages: Number of pages to get * @pages: pages returned + * @highmem: allow highmem pages * @return 0 on success, error otherwise */ -int alloc_xenballooned_pages(int nr_pages, struct page** pages) +int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem) { int pgno = 0; - struct page* page; + struct page *page; mutex_lock(&balloon_mutex); while (pgno < nr_pages) { - page = balloon_retrieve(true); - if (page) { + page = balloon_retrieve(highmem); + if (page && (highmem || !PageHighMem(page))) { pages[pgno++] = page; } else { enum bp_state st; - st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER); + if (page) + balloon_append(page); + st = decrease_reservation(nr_pages - pgno, + highmem ? GFP_HIGHUSER : GFP_USER); if (st != BP_DONE) goto out_undo; } @@ -536,7 +540,7 @@ EXPORT_SYMBOL(alloc_xenballooned_pages); * @nr_pages: Number of pages * @pages: pages to return */ -void free_xenballooned_pages(int nr_pages, struct page** pages) +void free_xenballooned_pages(int nr_pages, struct page **pages) { int i; @@ -555,17 +559,40 @@ void free_xenballooned_pages(int nr_pages, struct page** pages) } EXPORT_SYMBOL(free_xenballooned_pages); -static int __init balloon_init(void) +static void __init balloon_add_region(unsigned long start_pfn, + unsigned long pages) { unsigned long pfn, extra_pfn_end; struct page *page; + /* + * If the amount of usable memory has been limited (e.g., with + * the 'mem' command line parameter), don't add pages beyond + * this limit. + */ + extra_pfn_end = min(max_pfn, start_pfn + pages); + + for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) { + page = pfn_to_page(pfn); + /* totalram_pages and totalhigh_pages do not + include the boot-time balloon extension, so + don't subtract from it. */ + __balloon_append(page); + } +} + +static int __init balloon_init(void) +{ + int i; + if (!xen_domain()) return -ENODEV; pr_info("xen/balloon: Initialising balloon driver.\n"); - balloon_stats.current_pages = xen_pv_domain() ? min(xen_start_info->nr_pages, max_pfn) : max_pfn; + balloon_stats.current_pages = xen_pv_domain() + ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn) + : max_pfn; balloon_stats.target_pages = balloon_stats.current_pages; balloon_stats.balloon_low = 0; balloon_stats.balloon_high = 0; @@ -584,24 +611,13 @@ static int __init balloon_init(void) #endif /* - * Initialise the balloon with excess memory space. We need - * to make sure we don't add memory which doesn't exist or - * logically exist. The E820 map can be trimmed to be smaller - * than the amount of physical memory due to the mem= command - * line parameter. And if this is a 32-bit non-HIGHMEM kernel - * on a system with memory which requires highmem to access, - * don't try to use it. + * Initialize the balloon with pages from the extra memory + * regions (see arch/x86/xen/setup.c). */ - extra_pfn_end = min(min(max_pfn, e820_end_of_ram_pfn()), - (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size)); - for (pfn = PFN_UP(xen_extra_mem_start); - pfn < extra_pfn_end; - pfn++) { - page = pfn_to_page(pfn); - /* totalram_pages and totalhigh_pages do not include the boot-time - balloon extension, so don't subtract from it. */ - __balloon_append(page); - } + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) + if (xen_extra_mem[i].size) + balloon_add_region(PFN_UP(xen_extra_mem[i].start), + PFN_DOWN(xen_extra_mem[i].size)); return 0; } diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 7523719bf8a4..e5e5812a1014 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -85,9 +85,9 @@ enum xen_irq_type { * IPI - IPI vector * EVTCHN - */ -struct irq_info -{ +struct irq_info { struct list_head list; + int refcnt; enum xen_irq_type type; /* type */ unsigned irq; unsigned short evtchn; /* event channel */ @@ -282,9 +282,9 @@ static inline unsigned long active_evtchns(unsigned int cpu, struct shared_info *sh, unsigned int idx) { - return (sh->evtchn_pending[idx] & + return sh->evtchn_pending[idx] & per_cpu(cpu_evtchn_mask, cpu)[idx] & - ~sh->evtchn_mask[idx]); + ~sh->evtchn_mask[idx]; } static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) @@ -407,6 +407,7 @@ static void xen_irq_init(unsigned irq) panic("Unable to allocate metadata for IRQ%d\n", irq); info->type = IRQT_UNBOUND; + info->refcnt = -1; irq_set_handler_data(irq, info); @@ -432,7 +433,8 @@ static int __must_check xen_allocate_irq_dynamic(void) irq = irq_alloc_desc_from(first, -1); - xen_irq_init(irq); + if (irq >= 0) + xen_irq_init(irq); return irq; } @@ -469,6 +471,8 @@ static void xen_free_irq(unsigned irq) irq_set_handler_data(irq, NULL); + WARN_ON(info->refcnt > 0); + kfree(info); /* Legacy IRQ descriptors are managed by the arch. */ @@ -637,7 +641,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, if (irq != -1) { printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n", irq, gsi); - goto out; /* XXX need refcount? */ + goto out; } irq = xen_allocate_irq_gsi(gsi); @@ -713,7 +717,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, mutex_lock(&irq_mapping_update_lock); irq = xen_allocate_irq_dynamic(); - if (irq == -1) + if (irq < 0) goto out; irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq, @@ -729,7 +733,7 @@ out: error_irq: mutex_unlock(&irq_mapping_update_lock); xen_free_irq(irq); - return -1; + return ret; } #endif @@ -779,7 +783,7 @@ int xen_irq_from_pirq(unsigned pirq) mutex_lock(&irq_mapping_update_lock); list_for_each_entry(info, &xen_irq_list_head, list) { - if (info == NULL || info->type != IRQT_PIRQ) + if (info->type != IRQT_PIRQ) continue; irq = info->irq; if (info->u.pirq.pirq == pirq) @@ -872,11 +876,32 @@ static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); } +static int find_virq(unsigned int virq, unsigned int cpu) +{ + struct evtchn_status status; + int port, rc = -ENOENT; + + memset(&status, 0, sizeof(status)); + for (port = 0; port <= NR_EVENT_CHANNELS; port++) { + status.dom = DOMID_SELF; + status.port = port; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status); + if (rc < 0) + continue; + if (status.status != EVTCHNSTAT_virq) + continue; + if (status.u.virq == virq && status.vcpu == cpu) { + rc = port; + break; + } + } + return rc; +} int bind_virq_to_irq(unsigned int virq, unsigned int cpu) { struct evtchn_bind_virq bind_virq; - int evtchn, irq; + int evtchn, irq, ret; mutex_lock(&irq_mapping_update_lock); @@ -892,10 +917,16 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu) bind_virq.virq = virq; bind_virq.vcpu = cpu; - if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, - &bind_virq) != 0) - BUG(); - evtchn = bind_virq.port; + ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq); + if (ret == 0) + evtchn = bind_virq.port; + else { + if (ret == -EEXIST) + ret = find_virq(virq, cpu); + BUG_ON(ret < 0); + evtchn = ret; + } xen_irq_info_virq_init(cpu, irq, evtchn, virq); @@ -912,9 +943,16 @@ static void unbind_from_irq(unsigned int irq) { struct evtchn_close close; int evtchn = evtchn_from_irq(irq); + struct irq_info *info = irq_get_handler_data(irq); mutex_lock(&irq_mapping_update_lock); + if (info->refcnt > 0) { + info->refcnt--; + if (info->refcnt != 0) + goto done; + } + if (VALID_EVTCHN(evtchn)) { close.port = evtchn; if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) @@ -943,6 +981,7 @@ static void unbind_from_irq(unsigned int irq) xen_free_irq(irq); + done: mutex_unlock(&irq_mapping_update_lock); } @@ -1021,7 +1060,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, if (irq < 0) return irq; - irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME; + irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME | IRQF_EARLY_RESUME; retval = request_irq(irq, handler, irqflags, devname, dev_id); if (retval != 0) { unbind_from_irq(irq); @@ -1038,6 +1077,69 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id) } EXPORT_SYMBOL_GPL(unbind_from_irqhandler); +int evtchn_make_refcounted(unsigned int evtchn) +{ + int irq = evtchn_to_irq[evtchn]; + struct irq_info *info; + + if (irq == -1) + return -ENOENT; + + info = irq_get_handler_data(irq); + + if (!info) + return -ENOENT; + + WARN_ON(info->refcnt != -1); + + info->refcnt = 1; + + return 0; +} +EXPORT_SYMBOL_GPL(evtchn_make_refcounted); + +int evtchn_get(unsigned int evtchn) +{ + int irq; + struct irq_info *info; + int err = -ENOENT; + + if (evtchn >= NR_EVENT_CHANNELS) + return -EINVAL; + + mutex_lock(&irq_mapping_update_lock); + + irq = evtchn_to_irq[evtchn]; + if (irq == -1) + goto done; + + info = irq_get_handler_data(irq); + + if (!info) + goto done; + + err = -EINVAL; + if (info->refcnt <= 0) + goto done; + + info->refcnt++; + err = 0; + done: + mutex_unlock(&irq_mapping_update_lock); + + return err; +} +EXPORT_SYMBOL_GPL(evtchn_get); + +void evtchn_put(unsigned int evtchn) +{ + int irq = evtchn_to_irq[evtchn]; + if (WARN_ON(irq == -1)) + return; + unbind_from_irq(irq); +} +EXPORT_SYMBOL_GPL(evtchn_put); + void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) { int irq = per_cpu(ipi_to_irq, cpu)[vector]; @@ -1152,7 +1254,7 @@ static void __xen_evtchn_do_upcall(void) int cpu = get_cpu(); struct shared_info *s = HYPERVISOR_shared_info; struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); - unsigned count; + unsigned count; do { unsigned long pending_words; @@ -1670,6 +1772,7 @@ void __init xen_init_IRQ(void) evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), GFP_KERNEL); + BUG_ON(!evtchn_to_irq); for (i = 0; i < NR_EVENT_CHANNELS; i++) evtchn_to_irq[i] = -1; diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index dbc13e94b612..b1f60a0c0bea 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -268,7 +268,7 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, u->name, (void *)(unsigned long)port); if (rc >= 0) - rc = 0; + rc = evtchn_make_refcounted(port); return rc; } diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index f6832f46aea4..934985d14c24 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -74,7 +74,7 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by " "the gntalloc device"); static LIST_HEAD(gref_list); -static DEFINE_SPINLOCK(gref_lock); +static DEFINE_MUTEX(gref_mutex); static int gref_size; struct notify_info { @@ -99,6 +99,12 @@ struct gntalloc_file_private_data { uint64_t index; }; +struct gntalloc_vma_private_data { + struct gntalloc_gref *gref; + int users; + int count; +}; + static void __del_gref(struct gntalloc_gref *gref); static void do_cleanup(void) @@ -135,7 +141,7 @@ static int add_grefs(struct ioctl_gntalloc_alloc_gref *op, /* Grant foreign access to the page. */ gref->gref_id = gnttab_grant_foreign_access(op->domid, pfn_to_mfn(page_to_pfn(gref->page)), readonly); - if (gref->gref_id < 0) { + if ((int)gref->gref_id < 0) { rc = gref->gref_id; goto undo; } @@ -143,15 +149,15 @@ static int add_grefs(struct ioctl_gntalloc_alloc_gref *op, } /* Add to gref lists. */ - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); list_splice_tail(&queue_gref, &gref_list); list_splice_tail(&queue_file, &priv->list); - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); return 0; undo: - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); gref_size -= (op->count - i); list_for_each_entry(gref, &queue_file, next_file) { @@ -167,7 +173,7 @@ undo: */ if (unlikely(!list_empty(&queue_gref))) list_splice_tail(&queue_gref, &gref_list); - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); return rc; } @@ -178,8 +184,10 @@ static void __del_gref(struct gntalloc_gref *gref) tmp[gref->notify.pgoff] = 0; kunmap(gref->page); } - if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) + if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { notify_remote_via_evtchn(gref->notify.event); + evtchn_put(gref->notify.event); + } gref->notify.flags = 0; @@ -189,6 +197,8 @@ static void __del_gref(struct gntalloc_gref *gref) if (!gnttab_end_foreign_access_ref(gref->gref_id, 0)) return; + + gnttab_free_grant_reference(gref->gref_id); } gref_size--; @@ -251,7 +261,7 @@ static int gntalloc_release(struct inode *inode, struct file *filp) pr_debug("%s: priv %p\n", __func__, priv); - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); while (!list_empty(&priv->list)) { gref = list_entry(priv->list.next, struct gntalloc_gref, next_file); @@ -261,7 +271,7 @@ static int gntalloc_release(struct inode *inode, struct file *filp) __del_gref(gref); } kfree(priv); - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); return 0; } @@ -280,27 +290,27 @@ static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv, goto out; } - gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY); + gref_ids = kcalloc(op.count, sizeof(gref_ids[0]), GFP_TEMPORARY); if (!gref_ids) { rc = -ENOMEM; goto out; } - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); /* Clean up pages that were at zero (local) users but were still mapped * by remote domains. Since those pages count towards the limit that we * are about to enforce, removing them here is a good idea. */ do_cleanup(); if (gref_size + op.count > limit) { - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); rc = -ENOSPC; goto out_free; } gref_size += op.count; op.index = priv->index; priv->index += op.count * PAGE_SIZE; - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); rc = add_grefs(&op, gref_ids, priv); if (rc < 0) @@ -343,7 +353,7 @@ static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv, goto dealloc_grant_out; } - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); gref = find_grefs(priv, op.index, op.count); if (gref) { /* Remove from the file list only, and decrease reference count. @@ -363,7 +373,7 @@ static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv, do_cleanup(); - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); dealloc_grant_out: return rc; } @@ -383,7 +393,7 @@ static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv, index = op.index & ~(PAGE_SIZE - 1); pgoff = op.index & (PAGE_SIZE - 1); - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); gref = find_grefs(priv, index, 1); if (!gref) { @@ -396,12 +406,30 @@ static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv, goto unlock_out; } + /* We need to grab a reference to the event channel we are going to use + * to send the notify before releasing the reference we may already have + * (if someone has called this ioctl twice). This is required so that + * it is possible to change the clear_byte part of the notification + * without disturbing the event channel part, which may now be the last + * reference to that event channel. + */ + if (op.action & UNMAP_NOTIFY_SEND_EVENT) { + if (evtchn_get(op.event_channel_port)) { + rc = -EINVAL; + goto unlock_out; + } + } + + if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) + evtchn_put(gref->notify.event); + gref->notify.flags = op.action; gref->notify.pgoff = pgoff; gref->notify.event = op.event_channel_port; rc = 0; + unlock_out: - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); return rc; } @@ -429,26 +457,40 @@ static long gntalloc_ioctl(struct file *filp, unsigned int cmd, static void gntalloc_vma_open(struct vm_area_struct *vma) { - struct gntalloc_gref *gref = vma->vm_private_data; - if (!gref) + struct gntalloc_vma_private_data *priv = vma->vm_private_data; + + if (!priv) return; - spin_lock(&gref_lock); - gref->users++; - spin_unlock(&gref_lock); + mutex_lock(&gref_mutex); + priv->users++; + mutex_unlock(&gref_mutex); } static void gntalloc_vma_close(struct vm_area_struct *vma) { - struct gntalloc_gref *gref = vma->vm_private_data; - if (!gref) + struct gntalloc_vma_private_data *priv = vma->vm_private_data; + struct gntalloc_gref *gref, *next; + int i; + + if (!priv) return; - spin_lock(&gref_lock); - gref->users--; - if (gref->users == 0) - __del_gref(gref); - spin_unlock(&gref_lock); + mutex_lock(&gref_mutex); + priv->users--; + if (priv->users == 0) { + gref = priv->gref; + for (i = 0; i < priv->count; i++) { + gref->users--; + next = list_entry(gref->next_gref.next, + struct gntalloc_gref, next_gref); + if (gref->users == 0) + __del_gref(gref); + gref = next; + } + kfree(priv); + } + mutex_unlock(&gref_mutex); } static struct vm_operations_struct gntalloc_vmops = { @@ -459,30 +501,41 @@ static struct vm_operations_struct gntalloc_vmops = { static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) { struct gntalloc_file_private_data *priv = filp->private_data; + struct gntalloc_vma_private_data *vm_priv; struct gntalloc_gref *gref; int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; int rv, i; - pr_debug("%s: priv %p, page %lu+%d\n", __func__, - priv, vma->vm_pgoff, count); - if (!(vma->vm_flags & VM_SHARED)) { printk(KERN_ERR "%s: Mapping must be shared.\n", __func__); return -EINVAL; } - spin_lock(&gref_lock); + vm_priv = kmalloc(sizeof(*vm_priv), GFP_KERNEL); + if (!vm_priv) + return -ENOMEM; + + mutex_lock(&gref_mutex); + + pr_debug("%s: priv %p,%p, page %lu+%d\n", __func__, + priv, vm_priv, vma->vm_pgoff, count); + gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count); if (gref == NULL) { rv = -ENOENT; pr_debug("%s: Could not find grant reference", __func__); + kfree(vm_priv); goto out_unlock; } - vma->vm_private_data = gref; + vm_priv->gref = gref; + vm_priv->users = 1; + vm_priv->count = count; + + vma->vm_private_data = vm_priv; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; vma->vm_ops = &gntalloc_vmops; @@ -499,7 +552,7 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) rv = 0; out_unlock: - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); return rv; } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index f914b26cf0c2..99d8151c824a 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -83,6 +83,7 @@ struct grant_map { struct ioctl_gntdev_grant_ref *grants; struct gnttab_map_grant_ref *map_ops; struct gnttab_unmap_grant_ref *unmap_ops; + struct gnttab_map_grant_ref *kmap_ops; struct page **pages; }; @@ -113,22 +114,25 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) if (NULL == add) return NULL; - add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL); - add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL); - add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL); - add->pages = kzalloc(sizeof(add->pages[0]) * count, GFP_KERNEL); + add->grants = kcalloc(count, sizeof(add->grants[0]), GFP_KERNEL); + add->map_ops = kcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL); + add->unmap_ops = kcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL); + add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL); + add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL); if (NULL == add->grants || NULL == add->map_ops || NULL == add->unmap_ops || + NULL == add->kmap_ops || NULL == add->pages) goto err; - if (alloc_xenballooned_pages(count, add->pages)) + if (alloc_xenballooned_pages(count, add->pages, false /* lowmem */)) goto err; for (i = 0; i < count; i++) { add->map_ops[i].handle = -1; add->unmap_ops[i].handle = -1; + add->kmap_ops[i].handle = -1; } add->index = 0; @@ -142,6 +146,7 @@ err: kfree(add->grants); kfree(add->map_ops); kfree(add->unmap_ops); + kfree(add->kmap_ops); kfree(add); return NULL; } @@ -190,6 +195,7 @@ static void gntdev_put_map(struct grant_map *map) if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { notify_remote_via_evtchn(map->notify.event); + evtchn_put(map->notify.event); } if (map->pages) { @@ -243,10 +249,35 @@ static int map_grant_pages(struct grant_map *map) gnttab_set_unmap_op(&map->unmap_ops[i], addr, map->flags, -1 /* handle */); } + } else { + /* + * Setup the map_ops corresponding to the pte entries pointing + * to the kernel linear addresses of the struct pages. + * These ptes are completely different from the user ptes dealt + * with find_grant_ptes. + */ + for (i = 0; i < map->count; i++) { + unsigned level; + unsigned long address = (unsigned long) + pfn_to_kaddr(page_to_pfn(map->pages[i])); + pte_t *ptep; + u64 pte_maddr = 0; + BUG_ON(PageHighMem(map->pages[i])); + + ptep = lookup_address(address, &level); + pte_maddr = arbitrary_virt_to_machine(ptep).maddr; + gnttab_set_map_op(&map->kmap_ops[i], pte_maddr, + map->flags | + GNTMAP_host_map | + GNTMAP_contains_pte, + map->grants[i].ref, + map->grants[i].domid); + } } pr_debug("map %d+%d\n", map->index, map->count); - err = gnttab_map_refs(map->map_ops, map->pages, map->count); + err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL, + map->pages, map->count); if (err) return err; @@ -283,7 +314,8 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) } } - err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages); + err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, + pages, true); if (err) return err; @@ -462,13 +494,11 @@ static int gntdev_release(struct inode *inode, struct file *flip) pr_debug("priv %p\n", priv); - spin_lock(&priv->lock); while (!list_empty(&priv->maps)) { map = list_entry(priv->maps.next, struct grant_map, next); list_del(&map->next); gntdev_put_map(map); } - spin_unlock(&priv->lock); if (use_ptemod) mmu_notifier_unregister(&priv->mn, priv->mm); @@ -532,10 +562,11 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); if (map) { list_del(&map->next); - gntdev_put_map(map); err = 0; } spin_unlock(&priv->lock); + if (map) + gntdev_put_map(map); return err; } @@ -571,6 +602,8 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) struct ioctl_gntdev_unmap_notify op; struct grant_map *map; int rc; + int out_flags; + unsigned int out_event; if (copy_from_user(&op, u, sizeof(op))) return -EFAULT; @@ -578,6 +611,21 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) return -EINVAL; + /* We need to grab a reference to the event channel we are going to use + * to send the notify before releasing the reference we may already have + * (if someone has called this ioctl twice). This is required so that + * it is possible to change the clear_byte part of the notification + * without disturbing the event channel part, which may now be the last + * reference to that event channel. + */ + if (op.action & UNMAP_NOTIFY_SEND_EVENT) { + if (evtchn_get(op.event_channel_port)) + return -EINVAL; + } + + out_flags = op.action; + out_event = op.event_channel_port; + spin_lock(&priv->lock); list_for_each_entry(map, &priv->maps, next) { @@ -596,12 +644,22 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) goto unlock_out; } + out_flags = map->notify.flags; + out_event = map->notify.event; + map->notify.flags = op.action; map->notify.addr = op.index - (map->index << PAGE_SHIFT); map->notify.event = op.event_channel_port; + rc = 0; + unlock_out: spin_unlock(&priv->lock); + + /* Drop the reference to the event channel we did not save in the map */ + if (out_flags & UNMAP_NOTIFY_SEND_EVENT) + evtchn_put(out_event); + return rc; } diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 4f44b347b24a..1cd94daa71db 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -44,16 +44,19 @@ #include <xen/page.h> #include <xen/grant_table.h> #include <xen/interface/memory.h> +#include <xen/hvc-console.h> #include <asm/xen/hypercall.h> #include <asm/pgtable.h> #include <asm/sync_bitops.h> - /* External tools reserve first few grant table entries. */ #define NR_RESERVED_ENTRIES 8 #define GNTTAB_LIST_END 0xffffffff -#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry)) +#define GREFS_PER_GRANT_FRAME \ +(grant_table_version == 1 ? \ +(PAGE_SIZE / sizeof(struct grant_entry_v1)) : \ +(PAGE_SIZE / sizeof(union grant_entry_v2))) static grant_ref_t **gnttab_list; static unsigned int nr_grant_frames; @@ -64,13 +67,97 @@ static DEFINE_SPINLOCK(gnttab_list_lock); unsigned long xen_hvm_resume_frames; EXPORT_SYMBOL_GPL(xen_hvm_resume_frames); -static struct grant_entry *shared; +static union { + struct grant_entry_v1 *v1; + union grant_entry_v2 *v2; + void *addr; +} gnttab_shared; + +/*This is a structure of function pointers for grant table*/ +struct gnttab_ops { + /* + * Mapping a list of frames for storing grant entries. Frames parameter + * is used to store grant table address when grant table being setup, + * nr_gframes is the number of frames to map grant table. Returning + * GNTST_okay means success and negative value means failure. + */ + int (*map_frames)(unsigned long *frames, unsigned int nr_gframes); + /* + * Release a list of frames which are mapped in map_frames for grant + * entry status. + */ + void (*unmap_frames)(void); + /* + * Introducing a valid entry into the grant table, granting the frame of + * this grant entry to domain for accessing or transfering. Ref + * parameter is reference of this introduced grant entry, domid is id of + * granted domain, frame is the page frame to be granted, and flags is + * status of the grant entry to be updated. + */ + void (*update_entry)(grant_ref_t ref, domid_t domid, + unsigned long frame, unsigned flags); + /* + * Stop granting a grant entry to domain for accessing. Ref parameter is + * reference of a grant entry whose grant access will be stopped, + * readonly is not in use in this function. If the grant entry is + * currently mapped for reading or writing, just return failure(==0) + * directly and don't tear down the grant access. Otherwise, stop grant + * access for this entry and return success(==1). + */ + int (*end_foreign_access_ref)(grant_ref_t ref, int readonly); + /* + * Stop granting a grant entry to domain for transfer. Ref parameter is + * reference of a grant entry whose grant transfer will be stopped. If + * tranfer has not started, just reclaim the grant entry and return + * failure(==0). Otherwise, wait for the transfer to complete and then + * return the frame. + */ + unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref); + /* + * Query the status of a grant entry. Ref parameter is reference of + * queried grant entry, return value is the status of queried entry. + * Detailed status(writing/reading) can be gotten from the return value + * by bit operations. + */ + int (*query_foreign_access)(grant_ref_t ref); + /* + * Grant a domain to access a range of bytes within the page referred by + * an available grant entry. Ref parameter is reference of a grant entry + * which will be sub-page accessed, domid is id of grantee domain, frame + * is frame address of subpage grant, flags is grant type and flag + * information, page_off is offset of the range of bytes, and length is + * length of bytes to be accessed. + */ + void (*update_subpage_entry)(grant_ref_t ref, domid_t domid, + unsigned long frame, int flags, + unsigned page_off, unsigned length); + /* + * Redirect an available grant entry on domain A to another grant + * reference of domain B, then allow domain C to use grant reference + * of domain B transitively. Ref parameter is an available grant entry + * reference on domain A, domid is id of domain C which accesses grant + * entry transitively, flags is grant type and flag information, + * trans_domid is id of domain B whose grant entry is finally accessed + * transitively, trans_gref is grant entry transitive reference of + * domain B. + */ + void (*update_trans_entry)(grant_ref_t ref, domid_t domid, int flags, + domid_t trans_domid, grant_ref_t trans_gref); +}; + +static struct gnttab_ops *gnttab_interface; + +/*This reflects status of grant entries, so act as a global value*/ +static grant_status_t *grstatus; + +static int grant_table_version; static struct gnttab_free_callback *gnttab_free_callback_list; static int gnttab_expand(unsigned int req_entries); #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) +#define SPP (PAGE_SIZE / sizeof(grant_status_t)) static inline grant_ref_t *__gnttab_entry(grant_ref_t entry) { @@ -142,23 +229,33 @@ static void put_free_entry(grant_ref_t ref) spin_unlock_irqrestore(&gnttab_list_lock, flags); } -static void update_grant_entry(grant_ref_t ref, domid_t domid, - unsigned long frame, unsigned flags) +/* + * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2. + * Introducing a valid entry into the grant table: + * 1. Write ent->domid. + * 2. Write ent->frame: + * GTF_permit_access: Frame to which access is permitted. + * GTF_accept_transfer: Pseudo-phys frame slot being filled by new + * frame, or zero if none. + * 3. Write memory barrier (WMB). + * 4. Write ent->flags, inc. valid type. + */ +static void gnttab_update_entry_v1(grant_ref_t ref, domid_t domid, + unsigned long frame, unsigned flags) +{ + gnttab_shared.v1[ref].domid = domid; + gnttab_shared.v1[ref].frame = frame; + wmb(); + gnttab_shared.v1[ref].flags = flags; +} + +static void gnttab_update_entry_v2(grant_ref_t ref, domid_t domid, + unsigned long frame, unsigned flags) { - /* - * Introducing a valid entry into the grant table: - * 1. Write ent->domid. - * 2. Write ent->frame: - * GTF_permit_access: Frame to which access is permitted. - * GTF_accept_transfer: Pseudo-phys frame slot being filled by new - * frame, or zero if none. - * 3. Write memory barrier (WMB). - * 4. Write ent->flags, inc. valid type. - */ - shared[ref].frame = frame; - shared[ref].domid = domid; + gnttab_shared.v2[ref].hdr.domid = domid; + gnttab_shared.v2[ref].full_page.frame = frame; wmb(); - shared[ref].flags = flags; + gnttab_shared.v2[ref].hdr.flags = GTF_permit_access | flags; } /* @@ -167,7 +264,7 @@ static void update_grant_entry(grant_ref_t ref, domid_t domid, void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, unsigned long frame, int readonly) { - update_grant_entry(ref, domid, frame, + gnttab_interface->update_entry(ref, domid, frame, GTF_permit_access | (readonly ? GTF_readonly : 0)); } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref); @@ -187,31 +284,184 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); -int gnttab_query_foreign_access(grant_ref_t ref) +void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid, + unsigned long frame, int flags, + unsigned page_off, + unsigned length) +{ + gnttab_shared.v2[ref].sub_page.frame = frame; + gnttab_shared.v2[ref].sub_page.page_off = page_off; + gnttab_shared.v2[ref].sub_page.length = length; + gnttab_shared.v2[ref].hdr.domid = domid; + wmb(); + gnttab_shared.v2[ref].hdr.flags = + GTF_permit_access | GTF_sub_page | flags; +} + +int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid, + unsigned long frame, int flags, + unsigned page_off, + unsigned length) { - u16 nflags; + if (flags & (GTF_accept_transfer | GTF_reading | + GTF_writing | GTF_transitive)) + return -EPERM; - nflags = shared[ref].flags; + if (gnttab_interface->update_subpage_entry == NULL) + return -ENOSYS; - return (nflags & (GTF_reading|GTF_writing)); + gnttab_interface->update_subpage_entry(ref, domid, frame, flags, + page_off, length); + + return 0; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage_ref); + +int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame, + int flags, unsigned page_off, + unsigned length) +{ + int ref, rc; + + ref = get_free_entries(1); + if (unlikely(ref < 0)) + return -ENOSPC; + + rc = gnttab_grant_foreign_access_subpage_ref(ref, domid, frame, flags, + page_off, length); + if (rc < 0) { + put_free_entry(ref); + return rc; + } + + return ref; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage); + +bool gnttab_subpage_grants_available(void) +{ + return gnttab_interface->update_subpage_entry != NULL; +} +EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available); + +void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid, + int flags, domid_t trans_domid, + grant_ref_t trans_gref) +{ + gnttab_shared.v2[ref].transitive.trans_domid = trans_domid; + gnttab_shared.v2[ref].transitive.gref = trans_gref; + gnttab_shared.v2[ref].hdr.domid = domid; + wmb(); + gnttab_shared.v2[ref].hdr.flags = + GTF_permit_access | GTF_transitive | flags; +} + +int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid, + int flags, domid_t trans_domid, + grant_ref_t trans_gref) +{ + if (flags & (GTF_accept_transfer | GTF_reading | + GTF_writing | GTF_sub_page)) + return -EPERM; + + if (gnttab_interface->update_trans_entry == NULL) + return -ENOSYS; + + gnttab_interface->update_trans_entry(ref, domid, flags, trans_domid, + trans_gref); + + return 0; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans_ref); + +int gnttab_grant_foreign_access_trans(domid_t domid, int flags, + domid_t trans_domid, + grant_ref_t trans_gref) +{ + int ref, rc; + + ref = get_free_entries(1); + if (unlikely(ref < 0)) + return -ENOSPC; + + rc = gnttab_grant_foreign_access_trans_ref(ref, domid, flags, + trans_domid, trans_gref); + if (rc < 0) { + put_free_entry(ref); + return rc; + } + + return ref; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans); + +bool gnttab_trans_grants_available(void) +{ + return gnttab_interface->update_trans_entry != NULL; +} +EXPORT_SYMBOL_GPL(gnttab_trans_grants_available); + +static int gnttab_query_foreign_access_v1(grant_ref_t ref) +{ + return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing); +} + +static int gnttab_query_foreign_access_v2(grant_ref_t ref) +{ + return grstatus[ref] & (GTF_reading|GTF_writing); +} + +int gnttab_query_foreign_access(grant_ref_t ref) +{ + return gnttab_interface->query_foreign_access(ref); } EXPORT_SYMBOL_GPL(gnttab_query_foreign_access); -int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) +static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly) { u16 flags, nflags; + u16 *pflags; - nflags = shared[ref].flags; + pflags = &gnttab_shared.v1[ref].flags; + nflags = *pflags; do { flags = nflags; if (flags & (GTF_reading|GTF_writing)) { printk(KERN_ALERT "WARNING: g.e. still in use!\n"); return 0; } - } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags); + } while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags); + + return 1; +} + +static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly) +{ + gnttab_shared.v2[ref].hdr.flags = 0; + mb(); + if (grstatus[ref] & (GTF_reading|GTF_writing)) { + return 0; + } else { + /* The read of grstatus needs to have acquire + semantics. On x86, reads already have + that, and we just need to protect against + compiler reorderings. On other + architectures we may need a full + barrier. */ +#ifdef CONFIG_X86 + barrier(); +#else + mb(); +#endif + } return 1; } + +int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) +{ + return gnttab_interface->end_foreign_access_ref(ref, readonly); +} EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref); void gnttab_end_foreign_access(grant_ref_t ref, int readonly, @@ -246,37 +496,76 @@ EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer); void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, unsigned long pfn) { - update_grant_entry(ref, domid, pfn, GTF_accept_transfer); + gnttab_interface->update_entry(ref, domid, pfn, GTF_accept_transfer); } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref); -unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) +static unsigned long gnttab_end_foreign_transfer_ref_v1(grant_ref_t ref) { unsigned long frame; u16 flags; + u16 *pflags; + + pflags = &gnttab_shared.v1[ref].flags; /* * If a transfer is not even yet started, try to reclaim the grant * reference and return failure (== 0). */ - while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { - if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags) + while (!((flags = *pflags) & GTF_transfer_committed)) { + if (sync_cmpxchg(pflags, flags, 0) == flags) return 0; cpu_relax(); } /* If a transfer is in progress then wait until it is completed. */ while (!(flags & GTF_transfer_completed)) { - flags = shared[ref].flags; + flags = *pflags; cpu_relax(); } rmb(); /* Read the frame number /after/ reading completion status. */ - frame = shared[ref].frame; + frame = gnttab_shared.v1[ref].frame; + BUG_ON(frame == 0); + + return frame; +} + +static unsigned long gnttab_end_foreign_transfer_ref_v2(grant_ref_t ref) +{ + unsigned long frame; + u16 flags; + u16 *pflags; + + pflags = &gnttab_shared.v2[ref].hdr.flags; + + /* + * If a transfer is not even yet started, try to reclaim the grant + * reference and return failure (== 0). + */ + while (!((flags = *pflags) & GTF_transfer_committed)) { + if (sync_cmpxchg(pflags, flags, 0) == flags) + return 0; + cpu_relax(); + } + + /* If a transfer is in progress then wait until it is completed. */ + while (!(flags & GTF_transfer_completed)) { + flags = *pflags; + cpu_relax(); + } + + rmb(); /* Read the frame number /after/ reading completion status. */ + frame = gnttab_shared.v2[ref].full_page.frame; BUG_ON(frame == 0); return frame; } + +unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) +{ + return gnttab_interface->end_foreign_transfer_ref(ref); +} EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref); unsigned long gnttab_end_foreign_transfer(grant_ref_t ref) @@ -448,6 +737,7 @@ unsigned int gnttab_max_grant_frames(void) EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, + struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count) { int i, ret; @@ -471,25 +761,10 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, (map_ops[i].host_addr & ~PAGE_MASK)); mfn = pte_mfn(*pte); } else { - /* If you really wanted to do this: - * mfn = PFN_DOWN(map_ops[i].dev_bus_addr); - * - * The reason we do not implement it is b/c on the - * unmap path (gnttab_unmap_refs) we have no means of - * checking whether the page is !GNTMAP_contains_pte. - * - * That is without some extra data-structure to carry - * the struct page, bool clear_pte, and list_head next - * tuples and deal with allocation/delallocation, etc. - * - * The users of this API set the GNTMAP_contains_pte - * flag so lets just return not supported until it - * becomes neccessary to implement. - */ - return -EOPNOTSUPP; + mfn = PFN_DOWN(map_ops[i].dev_bus_addr); } - ret = m2p_add_override(mfn, pages[i], - map_ops[i].flags & GNTMAP_contains_pte); + ret = m2p_add_override(mfn, pages[i], kmap_ops ? + &kmap_ops[i] : NULL); if (ret) return ret; } @@ -499,7 +774,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, EXPORT_SYMBOL_GPL(gnttab_map_refs); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, - struct page **pages, unsigned int count) + struct page **pages, unsigned int count, bool clear_pte) { int i, ret; @@ -511,7 +786,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, return ret; for (i = 0; i < count; i++) { - ret = m2p_remove_override(pages[i], true /* clear the PTE */); + ret = m2p_remove_override(pages[i], clear_pte); if (ret) return ret; } @@ -520,6 +795,77 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, } EXPORT_SYMBOL_GPL(gnttab_unmap_refs); +static unsigned nr_status_frames(unsigned nr_grant_frames) +{ + return (nr_grant_frames * GREFS_PER_GRANT_FRAME + SPP - 1) / SPP; +} + +static int gnttab_map_frames_v1(unsigned long *frames, unsigned int nr_gframes) +{ + int rc; + + rc = arch_gnttab_map_shared(frames, nr_gframes, + gnttab_max_grant_frames(), + &gnttab_shared.addr); + BUG_ON(rc); + + return 0; +} + +static void gnttab_unmap_frames_v1(void) +{ + arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames); +} + +static int gnttab_map_frames_v2(unsigned long *frames, unsigned int nr_gframes) +{ + uint64_t *sframes; + unsigned int nr_sframes; + struct gnttab_get_status_frames getframes; + int rc; + + nr_sframes = nr_status_frames(nr_gframes); + + /* No need for kzalloc as it is initialized in following hypercall + * GNTTABOP_get_status_frames. + */ + sframes = kmalloc(nr_sframes * sizeof(uint64_t), GFP_ATOMIC); + if (!sframes) + return -ENOMEM; + + getframes.dom = DOMID_SELF; + getframes.nr_frames = nr_sframes; + set_xen_guest_handle(getframes.frame_list, sframes); + + rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames, + &getframes, 1); + if (rc == -ENOSYS) { + kfree(sframes); + return -ENOSYS; + } + + BUG_ON(rc || getframes.status); + + rc = arch_gnttab_map_status(sframes, nr_sframes, + nr_status_frames(gnttab_max_grant_frames()), + &grstatus); + BUG_ON(rc); + kfree(sframes); + + rc = arch_gnttab_map_shared(frames, nr_gframes, + gnttab_max_grant_frames(), + &gnttab_shared.addr); + BUG_ON(rc); + + return 0; +} + +static void gnttab_unmap_frames_v2(void) +{ + arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames); + arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames)); +} + static int gnttab_map(unsigned int start_idx, unsigned int end_idx) { struct gnttab_setup_table setup; @@ -551,6 +897,9 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) return rc; } + /* No need for kzalloc as it is initialized in following hypercall + * GNTTABOP_setup_table. + */ frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC); if (!frames) return -ENOMEM; @@ -567,19 +916,65 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) BUG_ON(rc || setup.status); - rc = arch_gnttab_map_shared(frames, nr_gframes, gnttab_max_grant_frames(), - &shared); - BUG_ON(rc); + rc = gnttab_interface->map_frames(frames, nr_gframes); kfree(frames); - return 0; + return rc; +} + +static struct gnttab_ops gnttab_v1_ops = { + .map_frames = gnttab_map_frames_v1, + .unmap_frames = gnttab_unmap_frames_v1, + .update_entry = gnttab_update_entry_v1, + .end_foreign_access_ref = gnttab_end_foreign_access_ref_v1, + .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v1, + .query_foreign_access = gnttab_query_foreign_access_v1, +}; + +static struct gnttab_ops gnttab_v2_ops = { + .map_frames = gnttab_map_frames_v2, + .unmap_frames = gnttab_unmap_frames_v2, + .update_entry = gnttab_update_entry_v2, + .end_foreign_access_ref = gnttab_end_foreign_access_ref_v2, + .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2, + .query_foreign_access = gnttab_query_foreign_access_v2, + .update_subpage_entry = gnttab_update_subpage_entry_v2, + .update_trans_entry = gnttab_update_trans_entry_v2, +}; + +static void gnttab_request_version(void) +{ + int rc; + struct gnttab_set_version gsv; + + gsv.version = 2; + rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1); + if (rc == 0) { + grant_table_version = 2; + gnttab_interface = &gnttab_v2_ops; + } else if (grant_table_version == 2) { + /* + * If we've already used version 2 features, + * but then suddenly discover that they're not + * available (e.g. migrating to an older + * version of Xen), almost unbounded badness + * can happen. + */ + panic("we need grant tables version 2, but only version 1 is available"); + } else { + grant_table_version = 1; + gnttab_interface = &gnttab_v1_ops; + } + printk(KERN_INFO "Grant tables using version %d layout.\n", + grant_table_version); } int gnttab_resume(void) { unsigned int max_nr_gframes; + gnttab_request_version(); max_nr_gframes = gnttab_max_grant_frames(); if (max_nr_gframes < nr_grant_frames) return -ENOSYS; @@ -587,9 +982,10 @@ int gnttab_resume(void) if (xen_pv_domain()) return gnttab_map(0, nr_grant_frames - 1); - if (!shared) { - shared = ioremap(xen_hvm_resume_frames, PAGE_SIZE * max_nr_gframes); - if (shared == NULL) { + if (gnttab_shared.addr == NULL) { + gnttab_shared.addr = ioremap(xen_hvm_resume_frames, + PAGE_SIZE * max_nr_gframes); + if (gnttab_shared.addr == NULL) { printk(KERN_WARNING "Failed to ioremap gnttab share frames!"); return -ENOMEM; @@ -603,7 +999,7 @@ int gnttab_resume(void) int gnttab_suspend(void) { - arch_gnttab_unmap_shared(shared, nr_grant_frames); + gnttab_interface->unmap_frames(); return 0; } diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 0b5366b5be20..ce4fa0831860 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -9,6 +9,7 @@ #include <linux/stop_machine.h> #include <linux/freezer.h> #include <linux/syscore_ops.h> +#include <linux/export.h> #include <xen/xen.h> #include <xen/xenbus.h> diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c index cef4bafc07dc..b84bf0b6cc34 100644 --- a/drivers/xen/pci.c +++ b/drivers/xen/pci.c @@ -18,6 +18,7 @@ */ #include <linux/pci.h> +#include <linux/acpi.h> #include <xen/xen.h> #include <xen/interface/physdev.h> #include <xen/interface/xen.h> @@ -26,26 +27,85 @@ #include <asm/xen/hypercall.h> #include "../pci/pci.h" +static bool __read_mostly pci_seg_supported = true; + static int xen_add_device(struct device *dev) { int r; struct pci_dev *pci_dev = to_pci_dev(dev); +#ifdef CONFIG_PCI_IOV + struct pci_dev *physfn = pci_dev->physfn; +#endif + + if (pci_seg_supported) { + struct physdev_pci_device_add add = { + .seg = pci_domain_nr(pci_dev->bus), + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn + }; +#ifdef CONFIG_ACPI + acpi_handle handle; +#endif + +#ifdef CONFIG_PCI_IOV + if (pci_dev->is_virtfn) { + add.flags = XEN_PCI_DEV_VIRTFN; + add.physfn.bus = physfn->bus->number; + add.physfn.devfn = physfn->devfn; + } else +#endif + if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) + add.flags = XEN_PCI_DEV_EXTFN; + +#ifdef CONFIG_ACPI + handle = DEVICE_ACPI_HANDLE(&pci_dev->dev); + if (!handle) + handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge); +#ifdef CONFIG_PCI_IOV + if (!handle && pci_dev->is_virtfn) + handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge); +#endif + if (handle) { + acpi_status status; + + do { + unsigned long long pxm; + + status = acpi_evaluate_integer(handle, "_PXM", + NULL, &pxm); + if (ACPI_SUCCESS(status)) { + add.optarr[0] = pxm; + add.flags |= XEN_PCI_DEV_PXM; + break; + } + status = acpi_get_parent(handle, &handle); + } while (ACPI_SUCCESS(status)); + } +#endif /* CONFIG_ACPI */ + + r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add); + if (r != -ENOSYS) + return r; + pci_seg_supported = false; + } + if (pci_domain_nr(pci_dev->bus)) + r = -ENOSYS; #ifdef CONFIG_PCI_IOV - if (pci_dev->is_virtfn) { + else if (pci_dev->is_virtfn) { struct physdev_manage_pci_ext manage_pci_ext = { .bus = pci_dev->bus->number, .devfn = pci_dev->devfn, .is_virtfn = 1, - .physfn.bus = pci_dev->physfn->bus->number, - .physfn.devfn = pci_dev->physfn->devfn, + .physfn.bus = physfn->bus->number, + .physfn.devfn = physfn->devfn, }; r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext, &manage_pci_ext); - } else + } #endif - if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) { + else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) { struct physdev_manage_pci_ext manage_pci_ext = { .bus = pci_dev->bus->number, .devfn = pci_dev->devfn, @@ -56,7 +116,7 @@ static int xen_add_device(struct device *dev) &manage_pci_ext); } else { struct physdev_manage_pci manage_pci = { - .bus = pci_dev->bus->number, + .bus = pci_dev->bus->number, .devfn = pci_dev->devfn, }; @@ -71,13 +131,27 @@ static int xen_remove_device(struct device *dev) { int r; struct pci_dev *pci_dev = to_pci_dev(dev); - struct physdev_manage_pci manage_pci; - manage_pci.bus = pci_dev->bus->number; - manage_pci.devfn = pci_dev->devfn; + if (pci_seg_supported) { + struct physdev_pci_device device = { + .seg = pci_domain_nr(pci_dev->bus), + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn + }; - r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, - &manage_pci); + r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_remove, + &device); + } else if (pci_domain_nr(pci_dev->bus)) + r = -ENOSYS; + else { + struct physdev_manage_pci manage_pci = { + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn + }; + + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, + &manage_pci); + } return r; } @@ -96,13 +170,16 @@ static int xen_pci_notifier(struct notifier_block *nb, r = xen_remove_device(dev); break; default: - break; + return NOTIFY_DONE; } - - return r; + if (r) + dev_err(dev, "Failed to %s - passthrough or MSI/MSI-X might fail!\n", + action == BUS_NOTIFY_ADD_DEVICE ? "add" : + (action == BUS_NOTIFY_DEL_DEVICE ? "delete" : "?")); + return NOTIFY_OK; } -struct notifier_block device_nb = { +static struct notifier_block device_nb = { .notifier_call = xen_pci_notifier, }; diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/privcmd.c index dbd3b16fd131..ccee0f16bcf8 100644 --- a/drivers/xen/xenfs/privcmd.c +++ b/drivers/xen/privcmd.c @@ -7,6 +7,7 @@ */ #include <linux/kernel.h> +#include <linux/module.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/string.h> @@ -18,6 +19,7 @@ #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/seq_file.h> +#include <linux/miscdevice.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> @@ -32,6 +34,10 @@ #include <xen/page.h> #include <xen/xen-ops.h> +#include "privcmd.h" + +MODULE_LICENSE("GPL"); + #ifndef HAVE_ARCH_PRIVCMD_MMAP static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); #endif @@ -359,7 +365,6 @@ static long privcmd_ioctl(struct file *file, return ret; } -#ifndef HAVE_ARCH_PRIVCMD_MMAP static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", @@ -392,9 +397,39 @@ static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) { return (xchg(&vma->vm_private_data, (void *)1) == NULL); } -#endif -const struct file_operations privcmd_file_ops = { +const struct file_operations xen_privcmd_fops = { + .owner = THIS_MODULE, .unlocked_ioctl = privcmd_ioctl, .mmap = privcmd_mmap, }; +EXPORT_SYMBOL_GPL(xen_privcmd_fops); + +static struct miscdevice privcmd_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "xen/privcmd", + .fops = &xen_privcmd_fops, +}; + +static int __init privcmd_init(void) +{ + int err; + + if (!xen_domain()) + return -ENODEV; + + err = misc_register(&privcmd_dev); + if (err != 0) { + printk(KERN_ERR "Could not register Xen privcmd device\n"); + return err; + } + return 0; +} + +static void __exit privcmd_exit(void) +{ + misc_deregister(&privcmd_dev); +} + +module_init(privcmd_init); +module_exit(privcmd_exit); diff --git a/drivers/xen/privcmd.h b/drivers/xen/privcmd.h new file mode 100644 index 000000000000..14facaeed36f --- /dev/null +++ b/drivers/xen/privcmd.h @@ -0,0 +1,3 @@ +#include <linux/fs.h> + +extern const struct file_operations xen_privcmd_fops; diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 6e8c15a23201..19e6a2041371 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -35,9 +35,11 @@ #include <linux/bootmem.h> #include <linux/dma-mapping.h> +#include <linux/export.h> #include <xen/swiotlb-xen.h> #include <xen/page.h> #include <xen/xen-ops.h> +#include <xen/hvc-console.h> /* * Used to do a quick range check in swiotlb_tbl_unmap_single and * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this @@ -146,26 +148,29 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs) void __init xen_swiotlb_init(int verbose) { unsigned long bytes; - int rc; + int rc = -ENOMEM; unsigned long nr_tbl; + char *m = NULL; + unsigned int repeat = 3; - nr_tbl = swioltb_nr_tbl(); + nr_tbl = swiotlb_nr_tbl(); if (nr_tbl) xen_io_tlb_nslabs = nr_tbl; else { xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT); xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE); } - +retry: bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT; /* * Get IO TLB memory from any location. */ - xen_io_tlb_start = alloc_bootmem(bytes); - if (!xen_io_tlb_start) - panic("Cannot allocate SWIOTLB buffer"); - + xen_io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes)); + if (!xen_io_tlb_start) { + m = "Cannot allocate Xen-SWIOTLB buffer!\n"; + goto error; + } xen_io_tlb_end = xen_io_tlb_start + bytes; /* * And replace that memory with pages under 4GB. @@ -173,17 +178,28 @@ void __init xen_swiotlb_init(int verbose) rc = xen_swiotlb_fixup(xen_io_tlb_start, bytes, xen_io_tlb_nslabs); - if (rc) + if (rc) { + free_bootmem(__pa(xen_io_tlb_start), PAGE_ALIGN(bytes)); + m = "Failed to get contiguous memory for DMA from Xen!\n"\ + "You either: don't have the permissions, do not have"\ + " enough free memory under 4GB, or the hypervisor memory"\ + "is too fragmented!"; goto error; - + } start_dma_addr = xen_virt_to_bus(xen_io_tlb_start); swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose); return; error: - panic("DMA(%d): Failed to exchange pages allocated for DMA with Xen! "\ - "We either don't have the permission or you do not have enough"\ - "free memory under 4GB!\n", rc); + if (repeat--) { + xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */ + (xen_io_tlb_nslabs >> 1)); + printk(KERN_INFO "Xen-SWIOTLB: Lowering to %luMB\n", + (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20); + goto retry; + } + xen_raw_printk("%s (rc:%d)", m, rc); + panic("%s (rc:%d)", m, rc); } void * @@ -194,6 +210,8 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, int order = get_order(size); u64 dma_mask = DMA_BIT_MASK(32); unsigned long vstart; + phys_addr_t phys; + dma_addr_t dev_addr; /* * Ignore region specifiers - the kernel's ideas of @@ -209,18 +227,26 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, vstart = __get_free_pages(flags, order); ret = (void *)vstart; + if (!ret) + return ret; + if (hwdev && hwdev->coherent_dma_mask) - dma_mask = dma_alloc_coherent_mask(hwdev, flags); + dma_mask = hwdev->coherent_dma_mask; - if (ret) { + phys = virt_to_phys(ret); + dev_addr = xen_phys_to_bus(phys); + if (((dev_addr + size - 1 <= dma_mask)) && + !range_straddles_page_boundary(phys, size)) + *dma_handle = dev_addr; + else { if (xen_create_contiguous_region(vstart, order, fls64(dma_mask)) != 0) { free_pages(vstart, order); return NULL; } - memset(ret, 0, size); *dma_handle = virt_to_machine(ret).maddr; } + memset(ret, 0, size); return ret; } EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent); @@ -230,11 +256,21 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dev_addr) { int order = get_order(size); + phys_addr_t phys; + u64 dma_mask = DMA_BIT_MASK(32); if (dma_release_from_coherent(hwdev, order, vaddr)) return; - xen_destroy_contiguous_region((unsigned long)vaddr, order); + if (hwdev && hwdev->coherent_dma_mask) + dma_mask = hwdev->coherent_dma_mask; + + phys = virt_to_phys(vaddr); + + if (((dev_addr + size - 1 > dma_mask)) || + range_straddles_page_boundary(phys, size)) + xen_destroy_contiguous_region((unsigned long)vaddr, order); + free_pages((unsigned long)vaddr, order); } EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent); @@ -278,9 +314,10 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, /* * Ensure that the address returned is DMA'ble */ - if (!dma_capable(dev, dev_addr, size)) - panic("map_single: bounce buffer is not DMA'ble"); - + if (!dma_capable(dev, dev_addr, size)) { + swiotlb_tbl_unmap_single(dev, map, size, dir); + dev_addr = 0; + } return dev_addr; } EXPORT_SYMBOL_GPL(xen_swiotlb_map_page); diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index 5c9dc43c1e94..596e6a7b17d6 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -32,7 +32,6 @@ #include <linux/kernel.h> #include <linux/module.h> -#include <linux/sysdev.h> #include <linux/capability.h> #include <xen/xen.h> @@ -46,14 +45,9 @@ #define BALLOON_CLASS_NAME "xen_memory" -static struct sys_device balloon_sysdev; +static struct device balloon_dev; -static int register_balloon(struct sys_device *sysdev); - -static struct xenbus_watch target_watch = -{ - .node = "memory/target" -}; +static int register_balloon(struct device *dev); /* React to a change in the target key */ static void watch_target(struct xenbus_watch *watch, @@ -73,6 +67,11 @@ static void watch_target(struct xenbus_watch *watch, */ balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); } +static struct xenbus_watch target_watch = { + .node = "memory/target", + .callback = watch_target, +}; + static int balloon_init_watcher(struct notifier_block *notifier, unsigned long event, @@ -87,7 +86,9 @@ static int balloon_init_watcher(struct notifier_block *notifier, return NOTIFY_DONE; } -static struct notifier_block xenstore_notifier; +static struct notifier_block xenstore_notifier = { + .notifier_call = balloon_init_watcher, +}; static int __init balloon_init(void) { @@ -96,12 +97,9 @@ static int __init balloon_init(void) pr_info("xen-balloon: Initialising balloon driver.\n"); - register_balloon(&balloon_sysdev); - - register_xen_selfballooning(&balloon_sysdev); + register_balloon(&balloon_dev); - target_watch.callback = watch_target; - xenstore_notifier.notifier_call = balloon_init_watcher; + register_xen_selfballooning(&balloon_dev); register_xenstore_notifier(&xenstore_notifier); @@ -118,31 +116,31 @@ static void balloon_exit(void) module_exit(balloon_exit); #define BALLOON_SHOW(name, format, args...) \ - static ssize_t show_##name(struct sys_device *dev, \ - struct sysdev_attribute *attr, \ + static ssize_t show_##name(struct device *dev, \ + struct device_attribute *attr, \ char *buf) \ { \ return sprintf(buf, format, ##args); \ } \ - static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) + static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low)); BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high)); -static SYSDEV_ULONG_ATTR(schedule_delay, 0444, balloon_stats.schedule_delay); -static SYSDEV_ULONG_ATTR(max_schedule_delay, 0644, balloon_stats.max_schedule_delay); -static SYSDEV_ULONG_ATTR(retry_count, 0444, balloon_stats.retry_count); -static SYSDEV_ULONG_ATTR(max_retry_count, 0644, balloon_stats.max_retry_count); +static DEVICE_ULONG_ATTR(schedule_delay, 0444, balloon_stats.schedule_delay); +static DEVICE_ULONG_ATTR(max_schedule_delay, 0644, balloon_stats.max_schedule_delay); +static DEVICE_ULONG_ATTR(retry_count, 0444, balloon_stats.retry_count); +static DEVICE_ULONG_ATTR(max_retry_count, 0644, balloon_stats.max_retry_count); -static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr, +static ssize_t show_target_kb(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); } -static ssize_t store_target_kb(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t store_target_kb(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { @@ -159,11 +157,11 @@ static ssize_t store_target_kb(struct sys_device *dev, return count; } -static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR, +static DEVICE_ATTR(target_kb, S_IRUGO | S_IWUSR, show_target_kb, store_target_kb); -static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr, +static ssize_t show_target(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "%llu\n", @@ -171,8 +169,8 @@ static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr << PAGE_SHIFT); } -static ssize_t store_target(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t store_target(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { @@ -189,23 +187,23 @@ static ssize_t store_target(struct sys_device *dev, return count; } -static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR, +static DEVICE_ATTR(target, S_IRUGO | S_IWUSR, show_target, store_target); -static struct sysdev_attribute *balloon_attrs[] = { - &attr_target_kb, - &attr_target, - &attr_schedule_delay.attr, - &attr_max_schedule_delay.attr, - &attr_retry_count.attr, - &attr_max_retry_count.attr +static struct device_attribute *balloon_attrs[] = { + &dev_attr_target_kb, + &dev_attr_target, + &dev_attr_schedule_delay.attr, + &dev_attr_max_schedule_delay.attr, + &dev_attr_retry_count.attr, + &dev_attr_max_retry_count.attr }; static struct attribute *balloon_info_attrs[] = { - &attr_current_kb.attr, - &attr_low_kb.attr, - &attr_high_kb.attr, + &dev_attr_current_kb.attr, + &dev_attr_low_kb.attr, + &dev_attr_high_kb.attr, NULL }; @@ -214,34 +212,35 @@ static struct attribute_group balloon_info_group = { .attrs = balloon_info_attrs }; -static struct sysdev_class balloon_sysdev_class = { - .name = BALLOON_CLASS_NAME +static struct bus_type balloon_subsys = { + .name = BALLOON_CLASS_NAME, + .dev_name = BALLOON_CLASS_NAME, }; -static int register_balloon(struct sys_device *sysdev) +static int register_balloon(struct device *dev) { int i, error; - error = sysdev_class_register(&balloon_sysdev_class); + error = subsys_system_register(&balloon_subsys, NULL); if (error) return error; - sysdev->id = 0; - sysdev->cls = &balloon_sysdev_class; + dev->id = 0; + dev->bus = &balloon_subsys; - error = sysdev_register(sysdev); + error = device_register(dev); if (error) { - sysdev_class_unregister(&balloon_sysdev_class); + bus_unregister(&balloon_subsys); return error; } for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { - error = sysdev_create_file(sysdev, balloon_attrs[i]); + error = device_create_file(dev, balloon_attrs[i]); if (error) goto fail; } - error = sysfs_create_group(&sysdev->kobj, &balloon_info_group); + error = sysfs_create_group(&dev->kobj, &balloon_info_group); if (error) goto fail; @@ -249,9 +248,9 @@ static int register_balloon(struct sys_device *sysdev) fail: while (--i >= 0) - sysdev_remove_file(sysdev, balloon_attrs[i]); - sysdev_unregister(sysdev); - sysdev_class_unregister(&balloon_sysdev_class); + device_remove_file(dev, balloon_attrs[i]); + device_unregister(dev); + bus_unregister(&balloon_subsys); return error; } diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c index a8031445d94e..30d7be026c18 100644 --- a/drivers/xen/xen-pciback/conf_space.c +++ b/drivers/xen/xen-pciback/conf_space.c @@ -10,13 +10,13 @@ */ #include <linux/kernel.h> +#include <linux/module.h> #include <linux/pci.h> #include "pciback.h" #include "conf_space.h" #include "conf_space_quirks.h" -#define DRV_NAME "xen-pciback" -static int permissive; +static bool permissive; module_param(permissive, bool, 0644); /* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word, diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index da3cbdfcb5dc..3daf862d739d 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -15,7 +15,6 @@ struct pci_bar_info { int which; }; -#define DRV_NAME "xen-pciback" #define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO)) #define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER) @@ -25,7 +24,7 @@ static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data) int ret; ret = xen_pcibk_read_config_word(dev, offset, value, data); - if (!atomic_read(&dev->enable_cnt)) + if (!pci_is_enabled(dev)) return ret; for (i = 0; i < PCI_ROM_RESOURCE; i++) { @@ -187,7 +186,7 @@ static inline void read_dev_bar(struct pci_dev *dev, bar_info->val = res[pos].start | (res[pos].flags & PCI_REGION_FLAG_MASK); - bar_info->len_val = res[pos].end - res[pos].start + 1; + bar_info->len_val = resource_size(&res[pos]); } static void *bar_init(struct pci_dev *dev, int offset) diff --git a/drivers/xen/xen-pciback/conf_space_quirks.c b/drivers/xen/xen-pciback/conf_space_quirks.c index 921a889e65eb..7476791cab40 100644 --- a/drivers/xen/xen-pciback/conf_space_quirks.c +++ b/drivers/xen/xen-pciback/conf_space_quirks.c @@ -12,7 +12,6 @@ #include "conf_space_quirks.h" LIST_HEAD(xen_pcibk_quirks); -#define DRV_NAME "xen-pciback" static inline const struct pci_device_id * match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) { @@ -36,7 +35,7 @@ static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev) goto out; tmp_quirk = NULL; printk(KERN_DEBUG DRV_NAME - ":quirk didn't match any device xen_pciback knows about\n"); + ": quirk didn't match any device known\n"); out: return tmp_quirk; } diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c index 1d32a9a42c01..828dddc360df 100644 --- a/drivers/xen/xen-pciback/passthrough.c +++ b/drivers/xen/xen-pciback/passthrough.c @@ -7,13 +7,13 @@ #include <linux/list.h> #include <linux/pci.h> -#include <linux/spinlock.h> +#include <linux/mutex.h> #include "pciback.h" struct passthrough_dev_data { /* Access to dev_list must be protected by lock */ struct list_head dev_list; - spinlock_t lock; + struct mutex lock; }; static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, @@ -24,9 +24,8 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, struct passthrough_dev_data *dev_data = pdev->pci_dev_data; struct pci_dev_entry *dev_entry; struct pci_dev *dev = NULL; - unsigned long flags; - spin_lock_irqsave(&dev_data->lock, flags); + mutex_lock(&dev_data->lock); list_for_each_entry(dev_entry, &dev_data->dev_list, list) { if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus) @@ -37,7 +36,7 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, } } - spin_unlock_irqrestore(&dev_data->lock, flags); + mutex_unlock(&dev_data->lock); return dev; } @@ -48,7 +47,6 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, { struct passthrough_dev_data *dev_data = pdev->pci_dev_data; struct pci_dev_entry *dev_entry; - unsigned long flags; unsigned int domain, bus, devfn; int err; @@ -57,9 +55,9 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, return -ENOMEM; dev_entry->dev = dev; - spin_lock_irqsave(&dev_data->lock, flags); + mutex_lock(&dev_data->lock); list_add_tail(&dev_entry->list, &dev_data->dev_list); - spin_unlock_irqrestore(&dev_data->lock, flags); + mutex_unlock(&dev_data->lock); /* Publish this device. */ domain = (unsigned int)pci_domain_nr(dev->bus); @@ -76,9 +74,8 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, struct passthrough_dev_data *dev_data = pdev->pci_dev_data; struct pci_dev_entry *dev_entry, *t; struct pci_dev *found_dev = NULL; - unsigned long flags; - spin_lock_irqsave(&dev_data->lock, flags); + mutex_lock(&dev_data->lock); list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) { if (dev_entry->dev == dev) { @@ -88,7 +85,7 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, } } - spin_unlock_irqrestore(&dev_data->lock, flags); + mutex_unlock(&dev_data->lock); if (found_dev) pcistub_put_pci_dev(found_dev); @@ -102,7 +99,7 @@ static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev) if (!dev_data) return -ENOMEM; - spin_lock_init(&dev_data->lock); + mutex_init(&dev_data->lock); INIT_LIST_HEAD(&dev_data->dev_list); @@ -116,14 +113,14 @@ static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, { int err = 0; struct passthrough_dev_data *dev_data = pdev->pci_dev_data; - struct pci_dev_entry *dev_entry, *e, *tmp; + struct pci_dev_entry *dev_entry, *e; struct pci_dev *dev; int found; unsigned int domain, bus; - spin_lock(&dev_data->lock); + mutex_lock(&dev_data->lock); - list_for_each_entry_safe(dev_entry, tmp, &dev_data->dev_list, list) { + list_for_each_entry(dev_entry, &dev_data->dev_list, list) { /* Only publish this device as a root if none of its * parent bridges are exported */ @@ -142,16 +139,13 @@ static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, bus = (unsigned int)dev_entry->dev->bus->number; if (!found) { - spin_unlock(&dev_data->lock); err = publish_root_cb(pdev, domain, bus); if (err) break; - spin_lock(&dev_data->lock); } } - if (!err) - spin_unlock(&dev_data->lock); + mutex_unlock(&dev_data->lock); return err; } @@ -182,7 +176,7 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, return 1; } -struct xen_pcibk_backend xen_pcibk_passthrough_backend = { +const struct xen_pcibk_backend xen_pcibk_passthrough_backend = { .name = "passthrough", .init = __xen_pcibk_init_devices, .free = __xen_pcibk_release_devices, diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index aec214ac0a14..7944a17f5cbf 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -21,8 +21,6 @@ #include "conf_space.h" #include "conf_space_quirks.h" -#define DRV_NAME "xen-pciback" - static char *pci_devs_to_hide; wait_queue_head_t xen_pcibk_aer_wait_queue; /*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops, @@ -101,6 +99,7 @@ static void pcistub_device_release(struct kref *kref) kfree(pci_get_drvdata(psdev->dev)); pci_set_drvdata(psdev->dev, NULL); + psdev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; pci_dev_put(psdev->dev); kfree(psdev); @@ -222,6 +221,8 @@ void pcistub_put_pci_dev(struct pci_dev *dev) } spin_unlock_irqrestore(&pcistub_devices_lock, flags); + if (WARN_ON(!found_psdev)) + return; /*hold this lock for avoiding breaking link between * pcistub and xen_pcibk when AER is in processing @@ -234,6 +235,8 @@ void pcistub_put_pci_dev(struct pci_dev *dev) xen_pcibk_config_free_dyn_fields(found_psdev->dev); xen_pcibk_config_reset_dev(found_psdev->dev); + xen_unregister_device_domain_owner(found_psdev->dev); + spin_lock_irqsave(&found_psdev->lock, flags); found_psdev->pdev = NULL; spin_unlock_irqrestore(&found_psdev->lock, flags); @@ -331,6 +334,7 @@ static int __devinit pcistub_init_device(struct pci_dev *dev) dev_dbg(&dev->dev, "reset device\n"); xen_pcibk_reset_device(dev); + dev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED; return 0; config_release: @@ -514,12 +518,9 @@ static void kill_domain_by_device(struct pcistub_device *psdev) int err; char nodename[PCI_NODENAME_MAX]; - if (!psdev) - dev_err(&psdev->dev->dev, - "device is NULL when do AER recovery/kill_domain\n"); + BUG_ON(!psdev); snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0", psdev->pdev->xdev->otherend_id); - nodename[strlen(nodename)] = '\0'; again: err = xenbus_transaction_start(&xbt); @@ -605,7 +606,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, if (test_bit(_XEN_PCIF_active, (unsigned long *)&psdev->pdev->sh_info->flags)) { dev_dbg(&psdev->dev->dev, - "schedule pci_conf service in xen_pcibk\n"); + "schedule pci_conf service in " DRV_NAME "\n"); xen_pcibk_test_and_schedule_op(psdev->pdev); } @@ -995,8 +996,7 @@ out: err = count; return err; } - -DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add); +static DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add); static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf, size_t count) @@ -1015,8 +1015,7 @@ out: err = count; return err; } - -DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove); +static DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove); static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf) { @@ -1039,8 +1038,7 @@ static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf) return count; } - -DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL); +static DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL); static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf) { @@ -1069,8 +1067,7 @@ static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf) spin_unlock_irqrestore(&pcistub_devices_lock, flags); return count; } - -DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL); +static DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL); static ssize_t pcistub_irq_handler_switch(struct device_driver *drv, const char *buf, @@ -1106,7 +1103,8 @@ out: err = count; return err; } -DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch); +static DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, + pcistub_irq_handler_switch); static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf, size_t count) @@ -1170,8 +1168,8 @@ out: return count; } - -DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add); +static DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, + pcistub_quirk_add); static ssize_t permissive_add(struct device_driver *drv, const char *buf, size_t count) @@ -1236,8 +1234,8 @@ static ssize_t permissive_show(struct device_driver *drv, char *buf) spin_unlock_irqrestore(&pcistub_devices_lock, flags); return count; } - -DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add); +static DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, + permissive_add); static void pcistub_exit(void) { @@ -1374,3 +1372,4 @@ module_init(xen_pcibk_init); module_exit(xen_pcibk_cleanup); MODULE_LICENSE("Dual BSD/GPL"); +MODULE_ALIAS("xen-backend:pci"); diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index a0e131a81503..e9b4011c5f9a 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -15,6 +15,8 @@ #include <linux/atomic.h> #include <xen/interface/io/pciif.h> +#define DRV_NAME "xen-pciback" + struct pci_dev_entry { struct list_head list; struct pci_dev *dev; @@ -27,7 +29,7 @@ struct pci_dev_entry { struct xen_pcibk_device { void *pci_dev_data; - spinlock_t dev_lock; + struct mutex dev_lock; struct xenbus_device *xdev; struct xenbus_watch be_watch; u8 be_watching; @@ -89,7 +91,7 @@ typedef int (*publish_pci_root_cb) (struct xen_pcibk_device *pdev, * passthrough - BDFs are exactly like in the host. */ struct xen_pcibk_backend { - char *name; + const char *name; int (*init)(struct xen_pcibk_device *pdev); void (*free)(struct xen_pcibk_device *pdev); int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev, @@ -104,9 +106,9 @@ struct xen_pcibk_backend { unsigned int devfn); }; -extern struct xen_pcibk_backend xen_pcibk_vpci_backend; -extern struct xen_pcibk_backend xen_pcibk_passthrough_backend; -extern struct xen_pcibk_backend *xen_pcibk_backend; +extern const struct xen_pcibk_backend xen_pcibk_vpci_backend; +extern const struct xen_pcibk_backend xen_pcibk_passthrough_backend; +extern const struct xen_pcibk_backend *xen_pcibk_backend; static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev *dev, @@ -116,13 +118,14 @@ static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, if (xen_pcibk_backend && xen_pcibk_backend->add) return xen_pcibk_backend->add(pdev, dev, devid, publish_cb); return -1; -}; +} + static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev *dev) { if (xen_pcibk_backend && xen_pcibk_backend->free) return xen_pcibk_backend->release(pdev, dev); -}; +} static inline struct pci_dev * xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain, @@ -131,7 +134,8 @@ xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain, if (xen_pcibk_backend && xen_pcibk_backend->get) return xen_pcibk_backend->get(pdev, domain, bus, devfn); return NULL; -}; +} + /** * Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk * before sending aer request to pcifront, so that guest could identify @@ -148,25 +152,29 @@ static inline int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, return xen_pcibk_backend->find(pcidev, pdev, domain, bus, devfn); return -1; -}; +} + static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev) { if (xen_pcibk_backend && xen_pcibk_backend->init) return xen_pcibk_backend->init(pdev); return -1; -}; +} + static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, publish_pci_root_cb cb) { if (xen_pcibk_backend && xen_pcibk_backend->publish) return xen_pcibk_backend->publish(pdev, cb); return -1; -}; +} + static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) { if (xen_pcibk_backend && xen_pcibk_backend->free) return xen_pcibk_backend->free(pdev); -}; +} + /* Handles events from front-end */ irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id); void xen_pcibk_do_op(struct work_struct *data); diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 8c95c3415b75..63616d7453e6 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -10,7 +10,6 @@ #include <linux/sched.h> #include "pciback.h" -#define DRV_NAME "xen-pciback" int verbose_request; module_param(verbose_request, int, 0644); diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c index 4a42cfb0959d..46d140baebd8 100644 --- a/drivers/xen/xen-pciback/vpci.c +++ b/drivers/xen/xen-pciback/vpci.c @@ -8,16 +8,15 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/pci.h> -#include <linux/spinlock.h> +#include <linux/mutex.h> #include "pciback.h" #define PCI_SLOT_MAX 32 -#define DRV_NAME "xen-pciback" struct vpci_dev_data { /* Access to dev_list must be protected by lock */ struct list_head dev_list[PCI_SLOT_MAX]; - spinlock_t lock; + struct mutex lock; }; static inline struct list_head *list_first(struct list_head *head) @@ -33,13 +32,12 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev_entry *entry; struct pci_dev *dev = NULL; struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; - unsigned long flags; if (domain != 0 || bus != 0) return NULL; if (PCI_SLOT(devfn) < PCI_SLOT_MAX) { - spin_lock_irqsave(&vpci_dev->lock, flags); + mutex_lock(&vpci_dev->lock); list_for_each_entry(entry, &vpci_dev->dev_list[PCI_SLOT(devfn)], @@ -50,7 +48,7 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, } } - spin_unlock_irqrestore(&vpci_dev->lock, flags); + mutex_unlock(&vpci_dev->lock); } return dev; } @@ -71,7 +69,6 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, int err = 0, slot, func = -1; struct pci_dev_entry *t, *dev_entry; struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; - unsigned long flags; if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) { err = -EFAULT; @@ -90,7 +87,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, dev_entry->dev = dev; - spin_lock_irqsave(&vpci_dev->lock, flags); + mutex_lock(&vpci_dev->lock); /* Keep multi-function devices together on the virtual PCI bus */ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { @@ -129,7 +126,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, "No more space on root virtual PCI bus"); unlock: - spin_unlock_irqrestore(&vpci_dev->lock, flags); + mutex_unlock(&vpci_dev->lock); /* Publish this device. */ if (!err) @@ -145,14 +142,13 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, int slot; struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; struct pci_dev *found_dev = NULL; - unsigned long flags; - spin_lock_irqsave(&vpci_dev->lock, flags); + mutex_lock(&vpci_dev->lock); for (slot = 0; slot < PCI_SLOT_MAX; slot++) { - struct pci_dev_entry *e, *tmp; - list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot], - list) { + struct pci_dev_entry *e; + + list_for_each_entry(e, &vpci_dev->dev_list[slot], list) { if (e->dev == dev) { list_del(&e->list); found_dev = e->dev; @@ -163,7 +159,7 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, } out: - spin_unlock_irqrestore(&vpci_dev->lock, flags); + mutex_unlock(&vpci_dev->lock); if (found_dev) pcistub_put_pci_dev(found_dev); @@ -178,7 +174,7 @@ static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev) if (!vpci_dev) return -ENOMEM; - spin_lock_init(&vpci_dev->lock); + mutex_init(&vpci_dev->lock); for (slot = 0; slot < PCI_SLOT_MAX; slot++) INIT_LIST_HEAD(&vpci_dev->dev_list[slot]); @@ -222,10 +218,9 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, struct pci_dev_entry *entry; struct pci_dev *dev = NULL; struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; - unsigned long flags; int found = 0, slot; - spin_lock_irqsave(&vpci_dev->lock, flags); + mutex_lock(&vpci_dev->lock); for (slot = 0; slot < PCI_SLOT_MAX; slot++) { list_for_each_entry(entry, &vpci_dev->dev_list[slot], @@ -243,11 +238,11 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, } } } - spin_unlock_irqrestore(&vpci_dev->lock, flags); + mutex_unlock(&vpci_dev->lock); return found; } -struct xen_pcibk_backend xen_pcibk_vpci_backend = { +const struct xen_pcibk_backend xen_pcibk_vpci_backend = { .name = "vpci", .init = __xen_pcibk_init_devices, .free = __xen_pcibk_release_devices, diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 978d2c6f5dca..d5dcf8d5d3d9 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -13,11 +13,10 @@ #include <asm/xen/pci.h> #include "pciback.h" -#define DRV_NAME "xen-pciback" #define INVALID_EVTCHN_IRQ (-1) struct workqueue_struct *xen_pcibk_wq; -static int __read_mostly passthrough; +static bool __read_mostly passthrough; module_param(passthrough, bool, S_IRUGO); MODULE_PARM_DESC(passthrough, "Option to specify how to export PCI topology to guest:\n"\ @@ -44,7 +43,7 @@ static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev) pdev->xdev = xdev; dev_set_drvdata(&xdev->dev, pdev); - spin_lock_init(&pdev->dev_lock); + mutex_init(&pdev->dev_lock); pdev->sh_info = NULL; pdev->evtchn_irq = INVALID_EVTCHN_IRQ; @@ -62,14 +61,12 @@ out: static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev) { - spin_lock(&pdev->dev_lock); - + mutex_lock(&pdev->dev_lock); /* Ensure the guest can't trigger our handler before removing devices */ if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) { unbind_from_irqhandler(pdev->evtchn_irq, pdev); pdev->evtchn_irq = INVALID_EVTCHN_IRQ; } - spin_unlock(&pdev->dev_lock); /* If the driver domain started an op, make sure we complete it * before releasing the shared memory */ @@ -77,13 +74,11 @@ static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev) /* Note, the workqueue does not use spinlocks at all.*/ flush_workqueue(xen_pcibk_wq); - spin_lock(&pdev->dev_lock); if (pdev->sh_info != NULL) { xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info); pdev->sh_info = NULL; } - spin_unlock(&pdev->dev_lock); - + mutex_unlock(&pdev->dev_lock); } static void free_pdev(struct xen_pcibk_device *pdev) @@ -120,9 +115,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, goto out; } - spin_lock(&pdev->dev_lock); pdev->sh_info = vaddr; - spin_unlock(&pdev->dev_lock); err = bind_interdomain_evtchn_to_irqhandler( pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event, @@ -132,10 +125,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, "Error binding event channel to IRQ"); goto out; } - - spin_lock(&pdev->dev_lock); pdev->evtchn_irq = err; - spin_unlock(&pdev->dev_lock); err = 0; dev_dbg(&pdev->xdev->dev, "Attached!\n"); @@ -150,6 +140,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev) char *magic = NULL; + mutex_lock(&pdev->dev_lock); /* Make sure we only do this setup once */ if (xenbus_read_driver_state(pdev->xdev->nodename) != XenbusStateInitialised) @@ -176,7 +167,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev) if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) { xenbus_dev_fatal(pdev->xdev, -EFAULT, "version mismatch (%s/%s) with pcifront - " - "halting xen_pcibk", + "halting " DRV_NAME, magic, XEN_PCI_MAGIC); goto out; } @@ -194,6 +185,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev) dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err); out: + mutex_unlock(&pdev->dev_lock); kfree(magic); @@ -251,8 +243,8 @@ static int xen_pcibk_export_device(struct xen_pcibk_device *pdev, dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id); if (xen_register_device_domain_owner(dev, pdev->xdev->otherend_id) != 0) { - dev_err(&dev->dev, "device has been assigned to another " \ - "domain! Over-writting the ownership, but beware.\n"); + dev_err(&dev->dev, "Stealing ownership from dom%d.\n", + xen_find_device_domain_owner(dev)); xen_unregister_device_domain_owner(dev); xen_register_device_domain_owner(dev, pdev->xdev->otherend_id); } @@ -369,6 +361,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev) dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n"); + mutex_lock(&pdev->dev_lock); /* Make sure we only reconfigure once */ if (xenbus_read_driver_state(pdev->xdev->nodename) != XenbusStateReconfiguring) @@ -506,6 +499,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev) } out: + mutex_unlock(&pdev->dev_lock); return 0; } @@ -562,6 +556,7 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev) char dev_str[64]; char state_str[64]; + mutex_lock(&pdev->dev_lock); /* It's possible we could get the call to setup twice, so make sure * we're not already connected. */ @@ -642,10 +637,10 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev) "Error switching to initialised state!"); out: + mutex_unlock(&pdev->dev_lock); if (!err) /* see if pcifront is already configured (if not, we'll wait) */ xen_pcibk_attach(pdev); - return err; } @@ -710,21 +705,18 @@ static int xen_pcibk_xenbus_remove(struct xenbus_device *dev) return 0; } -static const struct xenbus_device_id xenpci_ids[] = { +static const struct xenbus_device_id xen_pcibk_ids[] = { {"pci"}, {""}, }; -static struct xenbus_driver xenbus_xen_pcibk_driver = { - .name = DRV_NAME, - .owner = THIS_MODULE, - .ids = xenpci_ids, +static DEFINE_XENBUS_DRIVER(xen_pcibk, DRV_NAME, .probe = xen_pcibk_xenbus_probe, .remove = xen_pcibk_xenbus_remove, .otherend_changed = xen_pcibk_frontend_changed, -}; +); -struct xen_pcibk_backend *xen_pcibk_backend; +const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend; int __init xen_pcibk_xenbus_register(void) { @@ -738,11 +730,11 @@ int __init xen_pcibk_xenbus_register(void) if (passthrough) xen_pcibk_backend = &xen_pcibk_passthrough_backend; pr_info(DRV_NAME ": backend is %s\n", xen_pcibk_backend->name); - return xenbus_register_backend(&xenbus_xen_pcibk_driver); + return xenbus_register_backend(&xen_pcibk_driver); } void __exit xen_pcibk_xenbus_unregister(void) { destroy_workqueue(xen_pcibk_wq); - xenbus_unregister_driver(&xenbus_xen_pcibk_driver); + xenbus_unregister_driver(&xen_pcibk_driver); } diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 6ea852e25162..767ff656d5a7 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -68,10 +68,13 @@ */ #include <linux/kernel.h> +#include <linux/bootmem.h> +#include <linux/swap.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/module.h> #include <linux/workqueue.h> +#include <linux/device.h> #include <xen/balloon.h> #include <xen/tmem.h> #include <xen/xen.h> @@ -93,6 +96,15 @@ static unsigned int selfballoon_uphysteresis __read_mostly = 1; /* In HZ, controls frequency of worker invocation. */ static unsigned int selfballoon_interval __read_mostly = 5; +/* + * Minimum usable RAM in MB for selfballooning target for balloon. + * If non-zero, it is added to totalreserve_pages and self-ballooning + * will not balloon below the sum. If zero, a piecewise linear function + * is calculated as a minimum and added to totalreserve_pages. Note that + * setting this value indiscriminately may cause OOMs and crashes. + */ +static unsigned int selfballoon_min_usable_mb; + static void selfballoon_process(struct work_struct *work); static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); @@ -189,20 +201,23 @@ static int __init xen_selfballooning_setup(char *s) __setup("selfballooning", xen_selfballooning_setup); #endif /* CONFIG_FRONTSWAP */ +#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) + /* * Use current balloon size, the goal (vm_committed_as), and hysteresis * parameters to set a new target balloon size */ static void selfballoon_process(struct work_struct *work) { - unsigned long cur_pages, goal_pages, tgt_pages; + unsigned long cur_pages, goal_pages, tgt_pages, floor_pages; + unsigned long useful_pages; bool reset_timer = false; if (xen_selfballooning_enabled) { - cur_pages = balloon_stats.current_pages; + cur_pages = totalram_pages; tgt_pages = cur_pages; /* default is no change */ goal_pages = percpu_counter_read_positive(&vm_committed_as) + - balloon_stats.current_pages - totalram_pages; + totalreserve_pages; #ifdef CONFIG_FRONTSWAP /* allow space for frontswap pages to be repatriated */ if (frontswap_selfshrinking && frontswap_enabled) @@ -217,7 +232,26 @@ static void selfballoon_process(struct work_struct *work) ((goal_pages - cur_pages) / selfballoon_uphysteresis); /* else if cur_pages == goal_pages, no change */ - balloon_set_new_target(tgt_pages); + useful_pages = max_pfn - totalreserve_pages; + if (selfballoon_min_usable_mb != 0) + floor_pages = totalreserve_pages + + MB2PAGES(selfballoon_min_usable_mb); + /* piecewise linear function ending in ~3% slope */ + else if (useful_pages < MB2PAGES(16)) + floor_pages = max_pfn; /* not worth ballooning */ + else if (useful_pages < MB2PAGES(64)) + floor_pages = totalreserve_pages + MB2PAGES(16) + + ((useful_pages - MB2PAGES(16)) >> 1); + else if (useful_pages < MB2PAGES(512)) + floor_pages = totalreserve_pages + MB2PAGES(40) + + ((useful_pages - MB2PAGES(40)) >> 3); + else /* useful_pages >= MB2PAGES(512) */ + floor_pages = totalreserve_pages + MB2PAGES(99) + + ((useful_pages - MB2PAGES(99)) >> 5); + if (tgt_pages < floor_pages) + tgt_pages = floor_pages; + balloon_set_new_target(tgt_pages + + balloon_stats.current_pages - totalram_pages); reset_timer = true; } #ifdef CONFIG_FRONTSWAP @@ -233,21 +267,20 @@ static void selfballoon_process(struct work_struct *work) #ifdef CONFIG_SYSFS -#include <linux/sysdev.h> #include <linux/capability.h> #define SELFBALLOON_SHOW(name, format, args...) \ - static ssize_t show_##name(struct sys_device *dev, \ - struct sysdev_attribute *attr, \ - char *buf) \ + static ssize_t show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ { \ return sprintf(buf, format, ##args); \ } SELFBALLOON_SHOW(selfballooning, "%d\n", xen_selfballooning_enabled); -static ssize_t store_selfballooning(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t store_selfballooning(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { @@ -270,13 +303,13 @@ static ssize_t store_selfballooning(struct sys_device *dev, return count; } -static SYSDEV_ATTR(selfballooning, S_IRUGO | S_IWUSR, +static DEVICE_ATTR(selfballooning, S_IRUGO | S_IWUSR, show_selfballooning, store_selfballooning); SELFBALLOON_SHOW(selfballoon_interval, "%d\n", selfballoon_interval); -static ssize_t store_selfballoon_interval(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t store_selfballoon_interval(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { @@ -292,13 +325,13 @@ static ssize_t store_selfballoon_interval(struct sys_device *dev, return count; } -static SYSDEV_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR, +static DEVICE_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR, show_selfballoon_interval, store_selfballoon_interval); SELFBALLOON_SHOW(selfballoon_downhys, "%d\n", selfballoon_downhysteresis); -static ssize_t store_selfballoon_downhys(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t store_selfballoon_downhys(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { @@ -314,14 +347,14 @@ static ssize_t store_selfballoon_downhys(struct sys_device *dev, return count; } -static SYSDEV_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR, +static DEVICE_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR, show_selfballoon_downhys, store_selfballoon_downhys); SELFBALLOON_SHOW(selfballoon_uphys, "%d\n", selfballoon_uphysteresis); -static ssize_t store_selfballoon_uphys(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t store_selfballoon_uphys(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { @@ -337,14 +370,39 @@ static ssize_t store_selfballoon_uphys(struct sys_device *dev, return count; } -static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR, +static DEVICE_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR, show_selfballoon_uphys, store_selfballoon_uphys); +SELFBALLOON_SHOW(selfballoon_min_usable_mb, "%d\n", + selfballoon_min_usable_mb); + +static ssize_t store_selfballoon_min_usable_mb(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &val); + if (err || val == 0) + return -EINVAL; + selfballoon_min_usable_mb = val; + return count; +} + +static DEVICE_ATTR(selfballoon_min_usable_mb, S_IRUGO | S_IWUSR, + show_selfballoon_min_usable_mb, + store_selfballoon_min_usable_mb); + + #ifdef CONFIG_FRONTSWAP SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking); -static ssize_t store_frontswap_selfshrinking(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t store_frontswap_selfshrinking(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { @@ -366,13 +424,13 @@ static ssize_t store_frontswap_selfshrinking(struct sys_device *dev, return count; } -static SYSDEV_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR, +static DEVICE_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR, show_frontswap_selfshrinking, store_frontswap_selfshrinking); SELFBALLOON_SHOW(frontswap_inertia, "%d\n", frontswap_inertia); -static ssize_t store_frontswap_inertia(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t store_frontswap_inertia(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { @@ -389,13 +447,13 @@ static ssize_t store_frontswap_inertia(struct sys_device *dev, return count; } -static SYSDEV_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR, +static DEVICE_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR, show_frontswap_inertia, store_frontswap_inertia); SELFBALLOON_SHOW(frontswap_hysteresis, "%d\n", frontswap_hysteresis); -static ssize_t store_frontswap_hysteresis(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t store_frontswap_hysteresis(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { @@ -411,20 +469,21 @@ static ssize_t store_frontswap_hysteresis(struct sys_device *dev, return count; } -static SYSDEV_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR, +static DEVICE_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR, show_frontswap_hysteresis, store_frontswap_hysteresis); #endif /* CONFIG_FRONTSWAP */ static struct attribute *selfballoon_attrs[] = { - &attr_selfballooning.attr, - &attr_selfballoon_interval.attr, - &attr_selfballoon_downhysteresis.attr, - &attr_selfballoon_uphysteresis.attr, + &dev_attr_selfballooning.attr, + &dev_attr_selfballoon_interval.attr, + &dev_attr_selfballoon_downhysteresis.attr, + &dev_attr_selfballoon_uphysteresis.attr, + &dev_attr_selfballoon_min_usable_mb.attr, #ifdef CONFIG_FRONTSWAP - &attr_frontswap_selfshrinking.attr, - &attr_frontswap_hysteresis.attr, - &attr_frontswap_inertia.attr, + &dev_attr_frontswap_selfshrinking.attr, + &dev_attr_frontswap_hysteresis.attr, + &dev_attr_frontswap_inertia.attr, #endif NULL }; @@ -435,12 +494,12 @@ static struct attribute_group selfballoon_group = { }; #endif -int register_xen_selfballooning(struct sys_device *sysdev) +int register_xen_selfballooning(struct device *dev) { int error = -1; #ifdef CONFIG_SYSFS - error = sysfs_create_group(&sysdev->kobj, &selfballoon_group); + error = sysfs_create_group(&dev->kobj, &selfballoon_group); #endif return error; } diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile index 8dca685358b4..31e2e9050c7a 100644 --- a/drivers/xen/xenbus/Makefile +++ b/drivers/xen/xenbus/Makefile @@ -1,4 +1,5 @@ obj-y += xenbus.o +obj-y += xenbus_dev_frontend.o xenbus-objs = xenbus-objs += xenbus_client.o @@ -9,4 +10,5 @@ xenbus-objs += xenbus_probe.o xenbus-be-objs-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o xenbus-objs += $(xenbus-be-objs-y) +obj-$(CONFIG_XEN_BACKEND) += xenbus_dev_backend.o obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index cdacf923e073..566d2adbd6ea 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -32,13 +32,39 @@ #include <linux/slab.h> #include <linux/types.h> +#include <linux/spinlock.h> #include <linux/vmalloc.h> +#include <linux/export.h> #include <asm/xen/hypervisor.h> +#include <asm/xen/page.h> #include <xen/interface/xen.h> #include <xen/interface/event_channel.h> +#include <xen/balloon.h> #include <xen/events.h> #include <xen/grant_table.h> #include <xen/xenbus.h> +#include <xen/xen.h> + +#include "xenbus_probe.h" + +struct xenbus_map_node { + struct list_head next; + union { + struct vm_struct *area; /* PV */ + struct page *page; /* HVM */ + }; + grant_handle_t handle; +}; + +static DEFINE_SPINLOCK(xenbus_valloc_lock); +static LIST_HEAD(xenbus_valloc_pages); + +struct xenbus_ring_ops { + int (*map)(struct xenbus_device *dev, int gnt, void **vaddr); + int (*unmap)(struct xenbus_device *dev, void *vaddr); +}; + +static const struct xenbus_ring_ops *ring_ops __read_mostly; const char *xenbus_strstate(enum xenbus_state state) { @@ -434,39 +460,94 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); */ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) { + return ring_ops->map(dev, gnt_ref, vaddr); +} +EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); + +static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, + int gnt_ref, void **vaddr) +{ struct gnttab_map_grant_ref op = { - .flags = GNTMAP_host_map, + .flags = GNTMAP_host_map | GNTMAP_contains_pte, .ref = gnt_ref, .dom = dev->otherend_id, }; + struct xenbus_map_node *node; struct vm_struct *area; + pte_t *pte; *vaddr = NULL; - area = xen_alloc_vm_area(PAGE_SIZE); - if (!area) + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) return -ENOMEM; - op.host_addr = (unsigned long)area->addr; + area = alloc_vm_area(PAGE_SIZE, &pte); + if (!area) { + kfree(node); + return -ENOMEM; + } + + op.host_addr = arbitrary_virt_to_machine(pte).maddr; if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) BUG(); if (op.status != GNTST_okay) { - xen_free_vm_area(area); + free_vm_area(area); + kfree(node); xenbus_dev_fatal(dev, op.status, "mapping in shared page %d from domain %d", gnt_ref, dev->otherend_id); return op.status; } - /* Stuff the handle in an unused field */ - area->phys_addr = (unsigned long)op.handle; + node->handle = op.handle; + node->area = area; + + spin_lock(&xenbus_valloc_lock); + list_add(&node->next, &xenbus_valloc_pages); + spin_unlock(&xenbus_valloc_lock); *vaddr = area->addr; return 0; } -EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); + +static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, + int gnt_ref, void **vaddr) +{ + struct xenbus_map_node *node; + int err; + void *addr; + + *vaddr = NULL; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */); + if (err) + goto out_err; + + addr = pfn_to_kaddr(page_to_pfn(node->page)); + + err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr); + if (err) + goto out_err; + + spin_lock(&xenbus_valloc_lock); + list_add(&node->next, &xenbus_valloc_pages); + spin_unlock(&xenbus_valloc_lock); + + *vaddr = addr; + return 0; + + out_err: + free_xenballooned_pages(1, &node->page); + kfree(node); + return err; +} /** @@ -486,12 +567,10 @@ EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, grant_handle_t *handle, void *vaddr) { - struct gnttab_map_grant_ref op = { - .host_addr = (unsigned long)vaddr, - .flags = GNTMAP_host_map, - .ref = gnt_ref, - .dom = dev->otherend_id, - }; + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref, + dev->otherend_id); if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) BUG(); @@ -522,46 +601,87 @@ EXPORT_SYMBOL_GPL(xenbus_map_ring); */ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) { - struct vm_struct *area; + return ring_ops->unmap(dev, vaddr); +} +EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); + +static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) +{ + struct xenbus_map_node *node; struct gnttab_unmap_grant_ref op = { .host_addr = (unsigned long)vaddr, }; - - /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr) - * method so that we don't have to muck with vmalloc internals here. - * We could force the user to hang on to their struct vm_struct from - * xenbus_map_ring_valloc, but these 6 lines considerably simplify - * this API. - */ - read_lock(&vmlist_lock); - for (area = vmlist; area != NULL; area = area->next) { - if (area->addr == vaddr) - break; + unsigned int level; + + spin_lock(&xenbus_valloc_lock); + list_for_each_entry(node, &xenbus_valloc_pages, next) { + if (node->area->addr == vaddr) { + list_del(&node->next); + goto found; + } } - read_unlock(&vmlist_lock); + node = NULL; + found: + spin_unlock(&xenbus_valloc_lock); - if (!area) { + if (!node) { xenbus_dev_error(dev, -ENOENT, "can't find mapped virtual address %p", vaddr); return GNTST_bad_virt_addr; } - op.handle = (grant_handle_t)area->phys_addr; + op.handle = node->handle; + op.host_addr = arbitrary_virt_to_machine( + lookup_address((unsigned long)vaddr, &level)).maddr; if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) BUG(); if (op.status == GNTST_okay) - xen_free_vm_area(area); + free_vm_area(node->area); else xenbus_dev_error(dev, op.status, "unmapping page at handle %d error %d", - (int16_t)area->phys_addr, op.status); + node->handle, op.status); + kfree(node); return op.status; } -EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); +static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) +{ + int rv; + struct xenbus_map_node *node; + void *addr; + + spin_lock(&xenbus_valloc_lock); + list_for_each_entry(node, &xenbus_valloc_pages, next) { + addr = pfn_to_kaddr(page_to_pfn(node->page)); + if (addr == vaddr) { + list_del(&node->next); + goto found; + } + } + node = NULL; + found: + spin_unlock(&xenbus_valloc_lock); + + if (!node) { + xenbus_dev_error(dev, -ENOENT, + "can't find mapped virtual address %p", vaddr); + return GNTST_bad_virt_addr; + } + + rv = xenbus_unmap_ring(dev, node->handle, addr); + + if (!rv) + free_xenballooned_pages(1, &node->page); + else + WARN(1, "Leaking %p\n", vaddr); + + kfree(node); + return rv; +} /** * xenbus_unmap_ring @@ -576,10 +696,9 @@ EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t handle, void *vaddr) { - struct gnttab_unmap_grant_ref op = { - .host_addr = (unsigned long)vaddr, - .handle = handle, - }; + struct gnttab_unmap_grant_ref op; + + gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle); if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) BUG(); @@ -611,3 +730,21 @@ enum xenbus_state xenbus_read_driver_state(const char *path) return result; } EXPORT_SYMBOL_GPL(xenbus_read_driver_state); + +static const struct xenbus_ring_ops ring_ops_pv = { + .map = xenbus_map_ring_valloc_pv, + .unmap = xenbus_unmap_ring_vfree_pv, +}; + +static const struct xenbus_ring_ops ring_ops_hvm = { + .map = xenbus_map_ring_valloc_hvm, + .unmap = xenbus_unmap_ring_vfree_hvm, +}; + +void __init xenbus_ring_ops_init(void) +{ + if (xen_pv_domain()) + ring_ops = &ring_ops_pv; + else + ring_ops = &ring_ops_hvm; +} diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index 090c61ee8fd0..2eff7a6aaa20 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -212,7 +212,9 @@ int xb_init_comms(void) printk(KERN_WARNING "XENBUS response ring is not quiescent " "(%08x:%08x): fixing up\n", intf->rsp_cons, intf->rsp_prod); - intf->rsp_cons = intf->rsp_prod; + /* breaks kdump */ + if (!reset_devices) + intf->rsp_cons = intf->rsp_prod; } if (xenbus_irq) { diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h index c21db7513736..6e42800fa499 100644 --- a/drivers/xen/xenbus/xenbus_comms.h +++ b/drivers/xen/xenbus/xenbus_comms.h @@ -31,6 +31,8 @@ #ifndef _XENBUS_COMMS_H #define _XENBUS_COMMS_H +#include <linux/fs.h> + int xs_init(void); int xb_init_comms(void); @@ -43,4 +45,6 @@ int xs_input_avail(void); extern struct xenstore_domain_interface *xen_store_interface; extern int xen_store_evtchn; +extern const struct file_operations xen_xenbus_fops; + #endif /* _XENBUS_COMMS_H */ diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c new file mode 100644 index 000000000000..3d3be78c1093 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_dev_backend.c @@ -0,0 +1,90 @@ +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/capability.h> + +#include <xen/xen.h> +#include <xen/page.h> +#include <xen/xenbus_dev.h> + +#include "xenbus_comms.h" + +MODULE_LICENSE("GPL"); + +static int xenbus_backend_open(struct inode *inode, struct file *filp) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return nonseekable_open(inode, filp); +} + +static long xenbus_backend_ioctl(struct file *file, unsigned int cmd, unsigned long data) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + switch (cmd) { + case IOCTL_XENBUS_BACKEND_EVTCHN: + if (xen_store_evtchn > 0) + return xen_store_evtchn; + return -ENODEV; + + default: + return -ENOTTY; + } +} + +static int xenbus_backend_mmap(struct file *file, struct vm_area_struct *vma) +{ + size_t size = vma->vm_end - vma->vm_start; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0)) + return -EINVAL; + + if (remap_pfn_range(vma, vma->vm_start, + virt_to_pfn(xen_store_interface), + size, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +const struct file_operations xenbus_backend_fops = { + .open = xenbus_backend_open, + .mmap = xenbus_backend_mmap, + .unlocked_ioctl = xenbus_backend_ioctl, +}; + +static struct miscdevice xenbus_backend_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "xen/xenbus_backend", + .fops = &xenbus_backend_fops, +}; + +static int __init xenbus_backend_init(void) +{ + int err; + + if (!xen_initial_domain()) + return -ENODEV; + + err = misc_register(&xenbus_backend_dev); + if (err) + printk(KERN_ERR "Could not register xenbus backend device\n"); + return err; +} + +static void __exit xenbus_backend_exit(void) +{ + misc_deregister(&xenbus_backend_dev); +} + +module_init(xenbus_backend_init); +module_exit(xenbus_backend_exit); diff --git a/drivers/xen/xenfs/xenbus.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index bbd000f88af7..527dc2a3b89f 100644 --- a/drivers/xen/xenfs/xenbus.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -52,13 +52,17 @@ #include <linux/namei.h> #include <linux/string.h> #include <linux/slab.h> +#include <linux/miscdevice.h> +#include <linux/module.h> -#include "xenfs.h" -#include "../xenbus/xenbus_comms.h" +#include "xenbus_comms.h" #include <xen/xenbus.h> +#include <xen/xen.h> #include <asm/xen/hypervisor.h> +MODULE_LICENSE("GPL"); + /* * An element of a list of outstanding transactions, for which we're * still waiting a reply. @@ -101,7 +105,7 @@ struct xenbus_file_priv { unsigned int len; union { struct xsd_sockmsg msg; - char buffer[PAGE_SIZE]; + char buffer[XENSTORE_PAYLOAD_MAX]; } u; /* Response queue. */ @@ -583,7 +587,7 @@ static unsigned int xenbus_file_poll(struct file *file, poll_table *wait) return 0; } -const struct file_operations xenbus_file_ops = { +const struct file_operations xen_xenbus_fops = { .read = xenbus_file_read, .write = xenbus_file_write, .open = xenbus_file_open, @@ -591,3 +595,31 @@ const struct file_operations xenbus_file_ops = { .poll = xenbus_file_poll, .llseek = no_llseek, }; +EXPORT_SYMBOL_GPL(xen_xenbus_fops); + +static struct miscdevice xenbus_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "xen/xenbus", + .fops = &xen_xenbus_fops, +}; + +static int __init xenbus_init(void) +{ + int err; + + if (!xen_domain()) + return -ENODEV; + + err = misc_register(&xenbus_dev); + if (err) + printk(KERN_ERR "Could not register xenbus frontend device\n"); + return err; +} + +static void __exit xenbus_exit(void) +{ + misc_deregister(&xenbus_dev); +} + +module_init(xenbus_init); +module_exit(xenbus_exit); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index bd2f90c9ac8b..3864967202b5 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -46,6 +46,7 @@ #include <linux/mutex.h> #include <linux/io.h> #include <linux/slab.h> +#include <linux/module.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -290,14 +291,9 @@ void xenbus_dev_shutdown(struct device *_dev) EXPORT_SYMBOL_GPL(xenbus_dev_shutdown); int xenbus_register_driver_common(struct xenbus_driver *drv, - struct xen_bus_type *bus, - struct module *owner, - const char *mod_name) + struct xen_bus_type *bus) { - drv->driver.name = drv->name; drv->driver.bus = &bus->bus; - drv->driver.owner = owner; - drv->driver.mod_name = mod_name; return driver_register(&drv->driver); } @@ -309,8 +305,7 @@ void xenbus_unregister_driver(struct xenbus_driver *drv) } EXPORT_SYMBOL_GPL(xenbus_unregister_driver); -struct xb_find_info -{ +struct xb_find_info { struct xenbus_device *dev; const char *nodename; }; @@ -639,7 +634,7 @@ int xenbus_dev_cancel(struct device *dev) EXPORT_SYMBOL_GPL(xenbus_dev_cancel); /* A flag to determine if xenstored is 'ready' (i.e. has started) */ -int xenstored_ready = 0; +int xenstored_ready; int register_xenstore_notifier(struct notifier_block *nb) @@ -684,64 +679,76 @@ static int __init xenbus_probe_initcall(void) device_initcall(xenbus_probe_initcall); -static int __init xenbus_init(void) +/* Set up event channel for xenstored which is run as a local process + * (this is normally used only in dom0) + */ +static int __init xenstored_local_init(void) { int err = 0; unsigned long page = 0; + struct evtchn_alloc_unbound alloc_unbound; - DPRINTK(""); + /* Allocate Xenstore page */ + page = get_zeroed_page(GFP_KERNEL); + if (!page) + goto out_err; - err = -ENODEV; - if (!xen_domain()) - return err; + xen_store_mfn = xen_start_info->store_mfn = + pfn_to_mfn(virt_to_phys((void *)page) >> + PAGE_SHIFT); - /* - * Domain0 doesn't have a store_evtchn or store_mfn yet. - */ - if (xen_initial_domain()) { - struct evtchn_alloc_unbound alloc_unbound; + /* Next allocate a local port which xenstored can bind to */ + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = DOMID_SELF; - /* Allocate Xenstore page */ - page = get_zeroed_page(GFP_KERNEL); - if (!page) - goto out_error; + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (err == -ENOSYS) + goto out_err; - xen_store_mfn = xen_start_info->store_mfn = - pfn_to_mfn(virt_to_phys((void *)page) >> - PAGE_SHIFT); + BUG_ON(err); + xen_store_evtchn = xen_start_info->store_evtchn = + alloc_unbound.port; - /* Next allocate a local port which xenstored can bind to */ - alloc_unbound.dom = DOMID_SELF; - alloc_unbound.remote_dom = 0; + return 0; - err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, - &alloc_unbound); - if (err == -ENOSYS) - goto out_error; + out_err: + if (page != 0) + free_page(page); + return err; +} - BUG_ON(err); - xen_store_evtchn = xen_start_info->store_evtchn = - alloc_unbound.port; +static int __init xenbus_init(void) +{ + int err = 0; - xen_store_interface = mfn_to_virt(xen_store_mfn); + if (!xen_domain()) + return -ENODEV; + + xenbus_ring_ops_init(); + + if (xen_hvm_domain()) { + uint64_t v = 0; + err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); + if (err) + goto out_error; + xen_store_evtchn = (int)v; + err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); + if (err) + goto out_error; + xen_store_mfn = (unsigned long)v; + xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); } else { - if (xen_hvm_domain()) { - uint64_t v = 0; - err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); - if (err) - goto out_error; - xen_store_evtchn = (int)v; - err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); + xen_store_evtchn = xen_start_info->store_evtchn; + xen_store_mfn = xen_start_info->store_mfn; + if (xen_store_evtchn) + xenstored_ready = 1; + else { + err = xenstored_local_init(); if (err) goto out_error; - xen_store_mfn = (unsigned long)v; - xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); - } else { - xen_store_evtchn = xen_start_info->store_evtchn; - xen_store_mfn = xen_start_info->store_mfn; - xen_store_interface = mfn_to_virt(xen_store_mfn); - xenstored_ready = 1; } + xen_store_interface = mfn_to_virt(xen_store_mfn); } /* Initialize the interface to xenstore. */ @@ -760,12 +767,7 @@ static int __init xenbus_init(void) proc_mkdir("xen", NULL); #endif - return 0; - - out_error: - if (page != 0) - free_page(page); - +out_error: return err; } diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h index b814935378c7..bb4f92ed8730 100644 --- a/drivers/xen/xenbus/xenbus_probe.h +++ b/drivers/xen/xenbus/xenbus_probe.h @@ -36,8 +36,7 @@ #define XEN_BUS_ID_SIZE 20 -struct xen_bus_type -{ +struct xen_bus_type { char *root; unsigned int levels; int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); @@ -54,9 +53,7 @@ extern int xenbus_match(struct device *_dev, struct device_driver *_drv); extern int xenbus_dev_probe(struct device *_dev); extern int xenbus_dev_remove(struct device *_dev); extern int xenbus_register_driver_common(struct xenbus_driver *drv, - struct xen_bus_type *bus, - struct module *owner, - const char *mod_name); + struct xen_bus_type *bus); extern int xenbus_probe_node(struct xen_bus_type *bus, const char *type, const char *nodename); @@ -77,4 +74,6 @@ extern void xenbus_otherend_changed(struct xenbus_watch *watch, extern int xenbus_read_otherend_details(struct xenbus_device *xendev, char *id_node, char *path_node); +void xenbus_ring_ops_init(void); + #endif diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index 60adf919d78d..257be37d9091 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -42,6 +42,7 @@ #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/notifier.h> +#include <linux/export.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -104,8 +105,6 @@ static int xenbus_uevent_backend(struct device *dev, xdev = to_xenbus_device(dev); bus = container_of(xdev->dev.bus, struct xen_bus_type, bus); - if (xdev == NULL) - return -ENODEV; if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype)) return -ENOMEM; @@ -233,15 +232,13 @@ int xenbus_dev_is_online(struct xenbus_device *dev) } EXPORT_SYMBOL_GPL(xenbus_dev_is_online); -int __xenbus_register_backend(struct xenbus_driver *drv, - struct module *owner, const char *mod_name) +int xenbus_register_backend(struct xenbus_driver *drv) { drv->read_otherend_details = read_frontend_details; - return xenbus_register_driver_common(drv, &xenbus_backend, - owner, mod_name); + return xenbus_register_driver_common(drv, &xenbus_backend); } -EXPORT_SYMBOL_GPL(__xenbus_register_backend); +EXPORT_SYMBOL_GPL(xenbus_register_backend); static int backend_probe_and_watch(struct notifier_block *notifier, unsigned long event, diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index ed2ba474a560..9c57819df51a 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -13,6 +13,7 @@ #include <linux/kthread.h> #include <linux/mutex.h> #include <linux/io.h> +#include <linux/module.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -229,15 +230,13 @@ static void wait_for_devices(struct xenbus_driver *xendrv) print_device_status); } -int __xenbus_register_frontend(struct xenbus_driver *drv, - struct module *owner, const char *mod_name) +int xenbus_register_frontend(struct xenbus_driver *drv) { int ret; drv->read_otherend_details = read_backend_details; - ret = xenbus_register_driver_common(drv, &xenbus_frontend, - owner, mod_name); + ret = xenbus_register_driver_common(drv, &xenbus_frontend); if (ret) return ret; @@ -246,12 +245,133 @@ int __xenbus_register_frontend(struct xenbus_driver *drv, return 0; } -EXPORT_SYMBOL_GPL(__xenbus_register_frontend); +EXPORT_SYMBOL_GPL(xenbus_register_frontend); + +static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq); +static int backend_state; + +static void xenbus_reset_backend_state_changed(struct xenbus_watch *w, + const char **v, unsigned int l) +{ + xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state); + printk(KERN_DEBUG "XENBUS: backend %s %s\n", + v[XS_WATCH_PATH], xenbus_strstate(backend_state)); + wake_up(&backend_state_wq); +} + +static void xenbus_reset_wait_for_backend(char *be, int expected) +{ + long timeout; + timeout = wait_event_interruptible_timeout(backend_state_wq, + backend_state == expected, 5 * HZ); + if (timeout <= 0) + printk(KERN_INFO "XENBUS: backend %s timed out.\n", be); +} + +/* + * Reset frontend if it is in Connected or Closed state. + * Wait for backend to catch up. + * State Connected happens during kdump, Closed after kexec. + */ +static void xenbus_reset_frontend(char *fe, char *be, int be_state) +{ + struct xenbus_watch be_watch; + + printk(KERN_DEBUG "XENBUS: backend %s %s\n", + be, xenbus_strstate(be_state)); + + memset(&be_watch, 0, sizeof(be_watch)); + be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be); + if (!be_watch.node) + return; + + be_watch.callback = xenbus_reset_backend_state_changed; + backend_state = XenbusStateUnknown; + + printk(KERN_INFO "XENBUS: triggering reconnect on %s\n", be); + register_xenbus_watch(&be_watch); + + /* fall through to forward backend to state XenbusStateInitialising */ + switch (be_state) { + case XenbusStateConnected: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing); + xenbus_reset_wait_for_backend(be, XenbusStateClosing); + + case XenbusStateClosing: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed); + xenbus_reset_wait_for_backend(be, XenbusStateClosed); + + case XenbusStateClosed: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising); + xenbus_reset_wait_for_backend(be, XenbusStateInitWait); + } + + unregister_xenbus_watch(&be_watch); + printk(KERN_INFO "XENBUS: reconnect done on %s\n", be); + kfree(be_watch.node); +} + +static void xenbus_check_frontend(char *class, char *dev) +{ + int be_state, fe_state, err; + char *backend, *frontend; + + frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev); + if (!frontend) + return; + + err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state); + if (err != 1) + goto out; + + switch (fe_state) { + case XenbusStateConnected: + case XenbusStateClosed: + printk(KERN_DEBUG "XENBUS: frontend %s %s\n", + frontend, xenbus_strstate(fe_state)); + backend = xenbus_read(XBT_NIL, frontend, "backend", NULL); + if (!backend || IS_ERR(backend)) + goto out; + err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state); + if (err == 1) + xenbus_reset_frontend(frontend, backend, be_state); + kfree(backend); + break; + default: + break; + } +out: + kfree(frontend); +} + +static void xenbus_reset_state(void) +{ + char **devclass, **dev; + int devclass_n, dev_n; + int i, j; + + devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n); + if (IS_ERR(devclass)) + return; + + for (i = 0; i < devclass_n; i++) { + dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n); + if (IS_ERR(dev)) + continue; + for (j = 0; j < dev_n; j++) + xenbus_check_frontend(devclass[i], dev[j]); + kfree(dev); + } + kfree(devclass); +} static int frontend_probe_and_watch(struct notifier_block *notifier, unsigned long event, void *data) { + /* reset devices in Connected or Closed state */ + if (xen_hvm_domain()) + xenbus_reset_state(); /* Enumerate devices in xenstore and watch for changes. */ xenbus_probe_devices(&xenbus_frontend); register_xenbus_watch(&fe_watch); diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 5534690075af..d1c217b23a42 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -45,6 +45,7 @@ #include <linux/module.h> #include <linux/mutex.h> #include <xen/xenbus.h> +#include <xen/xen.h> #include "xenbus_comms.h" struct xs_stored_msg { @@ -531,21 +532,18 @@ int xenbus_printf(struct xenbus_transaction t, { va_list ap; int ret; -#define PRINTF_BUFFER_SIZE 4096 - char *printf_buffer; - - printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_NOIO | __GFP_HIGH); - if (printf_buffer == NULL) - return -ENOMEM; + char *buf; va_start(ap, fmt); - ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap); + buf = kvasprintf(GFP_NOIO | __GFP_HIGH, fmt, ap); va_end(ap); - BUG_ON(ret > PRINTF_BUFFER_SIZE-1); - ret = xenbus_write(t, dir, node, printf_buffer); + if (!buf) + return -ENOMEM; - kfree(printf_buffer); + ret = xenbus_write(t, dir, node, buf); + + kfree(buf); return ret; } @@ -638,8 +636,7 @@ int register_xenbus_watch(struct xenbus_watch *watch) err = xs_watch(watch->node, token); - /* Ignore errors due to multiple registration. */ - if ((err != 0) && (err != -EEXIST)) { + if (err) { spin_lock(&watches_lock); list_del(&watch->list); spin_unlock(&watches_lock); @@ -801,6 +798,12 @@ static int process_msg(void) goto out; } + if (msg->hdr.len > XENSTORE_PAYLOAD_MAX) { + kfree(msg); + err = -EINVAL; + goto out; + } + body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH); if (body == NULL) { kfree(msg); diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile index 4fde9440fe1f..b019865fcc56 100644 --- a/drivers/xen/xenfs/Makefile +++ b/drivers/xen/xenfs/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_XENFS) += xenfs.o -xenfs-y = super.o xenbus.o privcmd.o +xenfs-y = super.o xenfs-$(CONFIG_XEN_DOM0) += xenstored.o diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index 1aa389719846..a84b53c01436 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -16,6 +16,8 @@ #include <xen/xen.h> #include "xenfs.h" +#include "../privcmd.h" +#include "../xenbus/xenbus_comms.h" #include <asm/xen/hypervisor.h> @@ -82,9 +84,9 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent) { static struct tree_descr xenfs_files[] = { [1] = {}, - { "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR }, + { "xenbus", &xen_xenbus_fops, S_IRUSR|S_IWUSR }, { "capabilities", &capabilities_file_ops, S_IRUGO }, - { "privcmd", &privcmd_file_ops, S_IRUSR|S_IWUSR }, + { "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR }, {""}, }; int rc; diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h index b68aa6200003..6b80c7779c02 100644 --- a/drivers/xen/xenfs/xenfs.h +++ b/drivers/xen/xenfs/xenfs.h @@ -1,8 +1,6 @@ #ifndef _XENFS_XENBUS_H #define _XENFS_XENBUS_H -extern const struct file_operations xenbus_file_ops; -extern const struct file_operations privcmd_file_ops; extern const struct file_operations xsd_kva_file_ops; extern const struct file_operations xsd_port_file_ops; |