summaryrefslogtreecommitdiff
path: root/drivers/xen
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/Kconfig17
-rw-r--r--drivers/xen/Makefile6
-rw-r--r--drivers/xen/balloon.c76
-rw-r--r--drivers/xen/events.c135
-rw-r--r--drivers/xen/evtchn.c2
-rw-r--r--drivers/xen/gntalloc.c125
-rw-r--r--drivers/xen/gntdev.c78
-rw-r--r--drivers/xen/grant-table.c516
-rw-r--r--drivers/xen/manage.c1
-rw-r--r--drivers/xen/pci.c107
-rw-r--r--drivers/xen/privcmd.c (renamed from drivers/xen/xenfs/privcmd.c)41
-rw-r--r--drivers/xen/privcmd.h3
-rw-r--r--drivers/xen/swiotlb-xen.c75
-rw-r--r--drivers/xen/xen-balloon.c103
-rw-r--r--drivers/xen/xen-pciback/conf_space.c4
-rw-r--r--drivers/xen/xen-pciback/conf_space_header.c5
-rw-r--r--drivers/xen/xen-pciback/conf_space_quirks.c3
-rw-r--r--drivers/xen/xen-pciback/passthrough.c34
-rw-r--r--drivers/xen/xen-pciback/pci_stub.c39
-rw-r--r--drivers/xen/xen-pciback/pciback.h32
-rw-r--r--drivers/xen/xen-pciback/pciback_ops.c1
-rw-r--r--drivers/xen/xen-pciback/vpci.c35
-rw-r--r--drivers/xen/xen-pciback/xenbus.c46
-rw-r--r--drivers/xen/xen-selfballoon.c135
-rw-r--r--drivers/xen/xenbus/Makefile2
-rw-r--r--drivers/xen/xenbus/xenbus_client.c209
-rw-r--r--drivers/xen/xenbus/xenbus_comms.c4
-rw-r--r--drivers/xen/xenbus/xenbus_comms.h4
-rw-r--r--drivers/xen/xenbus/xenbus_dev_backend.c90
-rw-r--r--drivers/xen/xenbus/xenbus_dev_frontend.c (renamed from drivers/xen/xenfs/xenbus.c)40
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c116
-rw-r--r--drivers/xen/xenbus/xenbus_probe.h9
-rw-r--r--drivers/xen/xenbus/xenbus_probe_backend.c11
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c130
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c27
-rw-r--r--drivers/xen/xenfs/Makefile2
-rw-r--r--drivers/xen/xenfs/super.c6
-rw-r--r--drivers/xen/xenfs/xenfs.h2
38 files changed, 1739 insertions, 532 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 5f7ff8e2fc14..a1ced521cf74 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -86,6 +86,7 @@ config XEN_BACKEND
config XENFS
tristate "Xen filesystem"
+ select XEN_PRIVCMD
default y
help
The xen filesystem provides a way for domains to share
@@ -137,16 +138,6 @@ config XEN_GRANT_DEV_ALLOC
to other domains. This can be used to implement frontend drivers
or as part of an inter-domain shared memory channel.
-config XEN_PLATFORM_PCI
- tristate "xen platform pci device driver"
- depends on XEN_PVHVM && PCI
- default m
- help
- Driver for the Xen PCI Platform device: it is responsible for
- initializing xenbus and grant_table when running in a Xen HVM
- domain. As a consequence this driver is required to run any Xen PV
- frontend on Xen HVM.
-
config SWIOTLB_XEN
def_bool y
depends on PCI
@@ -181,4 +172,10 @@ config XEN_PCIDEV_BACKEND
xen-pciback.hide=(03:00.0)(04:00.0)
If in doubt, say m.
+
+config XEN_PRIVCMD
+ tristate
+ depends on XEN
+ default m
+
endmenu
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 72bbb27d7a68..aa31337192cc 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -14,14 +14,14 @@ obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o
obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o
obj-$(CONFIG_XENFS) += xenfs/
obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
-obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o
+obj-$(CONFIG_XEN_PVHVM) += platform-pci.o
obj-$(CONFIG_XEN_TMEM) += tmem.o
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
obj-$(CONFIG_XEN_DOM0) += pci.o
obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
+obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o
xen-evtchn-y := evtchn.o
xen-gntdev-y := gntdev.o
xen-gntalloc-y := gntalloc.o
-
-xen-platform-pci-y := platform-pci.o
+xen-privcmd-y := privcmd.o
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 5dfd8f8ff07f..31ab82fda38a 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -39,6 +39,7 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/errno.h>
+#include <linux/module.h>
#include <linux/mm.h>
#include <linux/bootmem.h>
#include <linux/pagemap.h>
@@ -93,8 +94,8 @@ static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
#define inc_totalhigh_pages() (totalhigh_pages++)
#define dec_totalhigh_pages() (totalhigh_pages--)
#else
-#define inc_totalhigh_pages() do {} while(0)
-#define dec_totalhigh_pages() do {} while(0)
+#define inc_totalhigh_pages() do {} while (0)
+#define dec_totalhigh_pages() do {} while (0)
#endif
/* List of ballooned pages, threaded through the mem_map array. */
@@ -154,8 +155,7 @@ static struct page *balloon_retrieve(bool prefer_highmem)
if (PageHighMem(page)) {
balloon_stats.balloon_high--;
inc_totalhigh_pages();
- }
- else
+ } else
balloon_stats.balloon_low--;
totalram_pages++;
@@ -422,7 +422,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
(unsigned long)__va(pfn << PAGE_SHIFT),
__pte_ma(0), 0);
BUG_ON(ret);
- }
+ }
}
@@ -501,20 +501,24 @@ EXPORT_SYMBOL_GPL(balloon_set_new_target);
* alloc_xenballooned_pages - get pages that have been ballooned out
* @nr_pages: Number of pages to get
* @pages: pages returned
+ * @highmem: allow highmem pages
* @return 0 on success, error otherwise
*/
-int alloc_xenballooned_pages(int nr_pages, struct page** pages)
+int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem)
{
int pgno = 0;
- struct page* page;
+ struct page *page;
mutex_lock(&balloon_mutex);
while (pgno < nr_pages) {
- page = balloon_retrieve(true);
- if (page) {
+ page = balloon_retrieve(highmem);
+ if (page && (highmem || !PageHighMem(page))) {
pages[pgno++] = page;
} else {
enum bp_state st;
- st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER);
+ if (page)
+ balloon_append(page);
+ st = decrease_reservation(nr_pages - pgno,
+ highmem ? GFP_HIGHUSER : GFP_USER);
if (st != BP_DONE)
goto out_undo;
}
@@ -536,7 +540,7 @@ EXPORT_SYMBOL(alloc_xenballooned_pages);
* @nr_pages: Number of pages
* @pages: pages to return
*/
-void free_xenballooned_pages(int nr_pages, struct page** pages)
+void free_xenballooned_pages(int nr_pages, struct page **pages)
{
int i;
@@ -555,17 +559,40 @@ void free_xenballooned_pages(int nr_pages, struct page** pages)
}
EXPORT_SYMBOL(free_xenballooned_pages);
-static int __init balloon_init(void)
+static void __init balloon_add_region(unsigned long start_pfn,
+ unsigned long pages)
{
unsigned long pfn, extra_pfn_end;
struct page *page;
+ /*
+ * If the amount of usable memory has been limited (e.g., with
+ * the 'mem' command line parameter), don't add pages beyond
+ * this limit.
+ */
+ extra_pfn_end = min(max_pfn, start_pfn + pages);
+
+ for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) {
+ page = pfn_to_page(pfn);
+ /* totalram_pages and totalhigh_pages do not
+ include the boot-time balloon extension, so
+ don't subtract from it. */
+ __balloon_append(page);
+ }
+}
+
+static int __init balloon_init(void)
+{
+ int i;
+
if (!xen_domain())
return -ENODEV;
pr_info("xen/balloon: Initialising balloon driver.\n");
- balloon_stats.current_pages = xen_pv_domain() ? min(xen_start_info->nr_pages, max_pfn) : max_pfn;
+ balloon_stats.current_pages = xen_pv_domain()
+ ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn)
+ : max_pfn;
balloon_stats.target_pages = balloon_stats.current_pages;
balloon_stats.balloon_low = 0;
balloon_stats.balloon_high = 0;
@@ -584,24 +611,13 @@ static int __init balloon_init(void)
#endif
/*
- * Initialise the balloon with excess memory space. We need
- * to make sure we don't add memory which doesn't exist or
- * logically exist. The E820 map can be trimmed to be smaller
- * than the amount of physical memory due to the mem= command
- * line parameter. And if this is a 32-bit non-HIGHMEM kernel
- * on a system with memory which requires highmem to access,
- * don't try to use it.
+ * Initialize the balloon with pages from the extra memory
+ * regions (see arch/x86/xen/setup.c).
*/
- extra_pfn_end = min(min(max_pfn, e820_end_of_ram_pfn()),
- (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size));
- for (pfn = PFN_UP(xen_extra_mem_start);
- pfn < extra_pfn_end;
- pfn++) {
- page = pfn_to_page(pfn);
- /* totalram_pages and totalhigh_pages do not include the boot-time
- balloon extension, so don't subtract from it. */
- __balloon_append(page);
- }
+ for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++)
+ if (xen_extra_mem[i].size)
+ balloon_add_region(PFN_UP(xen_extra_mem[i].start),
+ PFN_DOWN(xen_extra_mem[i].size));
return 0;
}
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 7523719bf8a4..e5e5812a1014 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -85,9 +85,9 @@ enum xen_irq_type {
* IPI - IPI vector
* EVTCHN -
*/
-struct irq_info
-{
+struct irq_info {
struct list_head list;
+ int refcnt;
enum xen_irq_type type; /* type */
unsigned irq;
unsigned short evtchn; /* event channel */
@@ -282,9 +282,9 @@ static inline unsigned long active_evtchns(unsigned int cpu,
struct shared_info *sh,
unsigned int idx)
{
- return (sh->evtchn_pending[idx] &
+ return sh->evtchn_pending[idx] &
per_cpu(cpu_evtchn_mask, cpu)[idx] &
- ~sh->evtchn_mask[idx]);
+ ~sh->evtchn_mask[idx];
}
static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
@@ -407,6 +407,7 @@ static void xen_irq_init(unsigned irq)
panic("Unable to allocate metadata for IRQ%d\n", irq);
info->type = IRQT_UNBOUND;
+ info->refcnt = -1;
irq_set_handler_data(irq, info);
@@ -432,7 +433,8 @@ static int __must_check xen_allocate_irq_dynamic(void)
irq = irq_alloc_desc_from(first, -1);
- xen_irq_init(irq);
+ if (irq >= 0)
+ xen_irq_init(irq);
return irq;
}
@@ -469,6 +471,8 @@ static void xen_free_irq(unsigned irq)
irq_set_handler_data(irq, NULL);
+ WARN_ON(info->refcnt > 0);
+
kfree(info);
/* Legacy IRQ descriptors are managed by the arch. */
@@ -637,7 +641,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
if (irq != -1) {
printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n",
irq, gsi);
- goto out; /* XXX need refcount? */
+ goto out;
}
irq = xen_allocate_irq_gsi(gsi);
@@ -713,7 +717,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
mutex_lock(&irq_mapping_update_lock);
irq = xen_allocate_irq_dynamic();
- if (irq == -1)
+ if (irq < 0)
goto out;
irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq,
@@ -729,7 +733,7 @@ out:
error_irq:
mutex_unlock(&irq_mapping_update_lock);
xen_free_irq(irq);
- return -1;
+ return ret;
}
#endif
@@ -779,7 +783,7 @@ int xen_irq_from_pirq(unsigned pirq)
mutex_lock(&irq_mapping_update_lock);
list_for_each_entry(info, &xen_irq_list_head, list) {
- if (info == NULL || info->type != IRQT_PIRQ)
+ if (info->type != IRQT_PIRQ)
continue;
irq = info->irq;
if (info->u.pirq.pirq == pirq)
@@ -872,11 +876,32 @@ static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
}
+static int find_virq(unsigned int virq, unsigned int cpu)
+{
+ struct evtchn_status status;
+ int port, rc = -ENOENT;
+
+ memset(&status, 0, sizeof(status));
+ for (port = 0; port <= NR_EVENT_CHANNELS; port++) {
+ status.dom = DOMID_SELF;
+ status.port = port;
+ rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
+ if (rc < 0)
+ continue;
+ if (status.status != EVTCHNSTAT_virq)
+ continue;
+ if (status.u.virq == virq && status.vcpu == cpu) {
+ rc = port;
+ break;
+ }
+ }
+ return rc;
+}
int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
{
struct evtchn_bind_virq bind_virq;
- int evtchn, irq;
+ int evtchn, irq, ret;
mutex_lock(&irq_mapping_update_lock);
@@ -892,10 +917,16 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
bind_virq.virq = virq;
bind_virq.vcpu = cpu;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
- &bind_virq) != 0)
- BUG();
- evtchn = bind_virq.port;
+ ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+ &bind_virq);
+ if (ret == 0)
+ evtchn = bind_virq.port;
+ else {
+ if (ret == -EEXIST)
+ ret = find_virq(virq, cpu);
+ BUG_ON(ret < 0);
+ evtchn = ret;
+ }
xen_irq_info_virq_init(cpu, irq, evtchn, virq);
@@ -912,9 +943,16 @@ static void unbind_from_irq(unsigned int irq)
{
struct evtchn_close close;
int evtchn = evtchn_from_irq(irq);
+ struct irq_info *info = irq_get_handler_data(irq);
mutex_lock(&irq_mapping_update_lock);
+ if (info->refcnt > 0) {
+ info->refcnt--;
+ if (info->refcnt != 0)
+ goto done;
+ }
+
if (VALID_EVTCHN(evtchn)) {
close.port = evtchn;
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
@@ -943,6 +981,7 @@ static void unbind_from_irq(unsigned int irq)
xen_free_irq(irq);
+ done:
mutex_unlock(&irq_mapping_update_lock);
}
@@ -1021,7 +1060,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
if (irq < 0)
return irq;
- irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME;
+ irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME | IRQF_EARLY_RESUME;
retval = request_irq(irq, handler, irqflags, devname, dev_id);
if (retval != 0) {
unbind_from_irq(irq);
@@ -1038,6 +1077,69 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id)
}
EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
+int evtchn_make_refcounted(unsigned int evtchn)
+{
+ int irq = evtchn_to_irq[evtchn];
+ struct irq_info *info;
+
+ if (irq == -1)
+ return -ENOENT;
+
+ info = irq_get_handler_data(irq);
+
+ if (!info)
+ return -ENOENT;
+
+ WARN_ON(info->refcnt != -1);
+
+ info->refcnt = 1;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
+
+int evtchn_get(unsigned int evtchn)
+{
+ int irq;
+ struct irq_info *info;
+ int err = -ENOENT;
+
+ if (evtchn >= NR_EVENT_CHANNELS)
+ return -EINVAL;
+
+ mutex_lock(&irq_mapping_update_lock);
+
+ irq = evtchn_to_irq[evtchn];
+ if (irq == -1)
+ goto done;
+
+ info = irq_get_handler_data(irq);
+
+ if (!info)
+ goto done;
+
+ err = -EINVAL;
+ if (info->refcnt <= 0)
+ goto done;
+
+ info->refcnt++;
+ err = 0;
+ done:
+ mutex_unlock(&irq_mapping_update_lock);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(evtchn_get);
+
+void evtchn_put(unsigned int evtchn)
+{
+ int irq = evtchn_to_irq[evtchn];
+ if (WARN_ON(irq == -1))
+ return;
+ unbind_from_irq(irq);
+}
+EXPORT_SYMBOL_GPL(evtchn_put);
+
void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
{
int irq = per_cpu(ipi_to_irq, cpu)[vector];
@@ -1152,7 +1254,7 @@ static void __xen_evtchn_do_upcall(void)
int cpu = get_cpu();
struct shared_info *s = HYPERVISOR_shared_info;
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
- unsigned count;
+ unsigned count;
do {
unsigned long pending_words;
@@ -1670,6 +1772,7 @@ void __init xen_init_IRQ(void)
evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
GFP_KERNEL);
+ BUG_ON(!evtchn_to_irq);
for (i = 0; i < NR_EVENT_CHANNELS; i++)
evtchn_to_irq[i] = -1;
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index dbc13e94b612..b1f60a0c0bea 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -268,7 +268,7 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
u->name, (void *)(unsigned long)port);
if (rc >= 0)
- rc = 0;
+ rc = evtchn_make_refcounted(port);
return rc;
}
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index f6832f46aea4..934985d14c24 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -74,7 +74,7 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by "
"the gntalloc device");
static LIST_HEAD(gref_list);
-static DEFINE_SPINLOCK(gref_lock);
+static DEFINE_MUTEX(gref_mutex);
static int gref_size;
struct notify_info {
@@ -99,6 +99,12 @@ struct gntalloc_file_private_data {
uint64_t index;
};
+struct gntalloc_vma_private_data {
+ struct gntalloc_gref *gref;
+ int users;
+ int count;
+};
+
static void __del_gref(struct gntalloc_gref *gref);
static void do_cleanup(void)
@@ -135,7 +141,7 @@ static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
/* Grant foreign access to the page. */
gref->gref_id = gnttab_grant_foreign_access(op->domid,
pfn_to_mfn(page_to_pfn(gref->page)), readonly);
- if (gref->gref_id < 0) {
+ if ((int)gref->gref_id < 0) {
rc = gref->gref_id;
goto undo;
}
@@ -143,15 +149,15 @@ static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
}
/* Add to gref lists. */
- spin_lock(&gref_lock);
+ mutex_lock(&gref_mutex);
list_splice_tail(&queue_gref, &gref_list);
list_splice_tail(&queue_file, &priv->list);
- spin_unlock(&gref_lock);
+ mutex_unlock(&gref_mutex);
return 0;
undo:
- spin_lock(&gref_lock);
+ mutex_lock(&gref_mutex);
gref_size -= (op->count - i);
list_for_each_entry(gref, &queue_file, next_file) {
@@ -167,7 +173,7 @@ undo:
*/
if (unlikely(!list_empty(&queue_gref)))
list_splice_tail(&queue_gref, &gref_list);
- spin_unlock(&gref_lock);
+ mutex_unlock(&gref_mutex);
return rc;
}
@@ -178,8 +184,10 @@ static void __del_gref(struct gntalloc_gref *gref)
tmp[gref->notify.pgoff] = 0;
kunmap(gref->page);
}
- if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT)
+ if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(gref->notify.event);
+ evtchn_put(gref->notify.event);
+ }
gref->notify.flags = 0;
@@ -189,6 +197,8 @@ static void __del_gref(struct gntalloc_gref *gref)
if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
return;
+
+ gnttab_free_grant_reference(gref->gref_id);
}
gref_size--;
@@ -251,7 +261,7 @@ static int gntalloc_release(struct inode *inode, struct file *filp)
pr_debug("%s: priv %p\n", __func__, priv);
- spin_lock(&gref_lock);
+ mutex_lock(&gref_mutex);
while (!list_empty(&priv->list)) {
gref = list_entry(priv->list.next,
struct gntalloc_gref, next_file);
@@ -261,7 +271,7 @@ static int gntalloc_release(struct inode *inode, struct file *filp)
__del_gref(gref);
}
kfree(priv);
- spin_unlock(&gref_lock);
+ mutex_unlock(&gref_mutex);
return 0;
}
@@ -280,27 +290,27 @@ static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
goto out;
}
- gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY);
+ gref_ids = kcalloc(op.count, sizeof(gref_ids[0]), GFP_TEMPORARY);
if (!gref_ids) {
rc = -ENOMEM;
goto out;
}
- spin_lock(&gref_lock);
+ mutex_lock(&gref_mutex);
/* Clean up pages that were at zero (local) users but were still mapped
* by remote domains. Since those pages count towards the limit that we
* are about to enforce, removing them here is a good idea.
*/
do_cleanup();
if (gref_size + op.count > limit) {
- spin_unlock(&gref_lock);
+ mutex_unlock(&gref_mutex);
rc = -ENOSPC;
goto out_free;
}
gref_size += op.count;
op.index = priv->index;
priv->index += op.count * PAGE_SIZE;
- spin_unlock(&gref_lock);
+ mutex_unlock(&gref_mutex);
rc = add_grefs(&op, gref_ids, priv);
if (rc < 0)
@@ -343,7 +353,7 @@ static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
goto dealloc_grant_out;
}
- spin_lock(&gref_lock);
+ mutex_lock(&gref_mutex);
gref = find_grefs(priv, op.index, op.count);
if (gref) {
/* Remove from the file list only, and decrease reference count.
@@ -363,7 +373,7 @@ static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
do_cleanup();
- spin_unlock(&gref_lock);
+ mutex_unlock(&gref_mutex);
dealloc_grant_out:
return rc;
}
@@ -383,7 +393,7 @@ static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv,
index = op.index & ~(PAGE_SIZE - 1);
pgoff = op.index & (PAGE_SIZE - 1);
- spin_lock(&gref_lock);
+ mutex_lock(&gref_mutex);
gref = find_grefs(priv, index, 1);
if (!gref) {
@@ -396,12 +406,30 @@ static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv,
goto unlock_out;
}
+ /* We need to grab a reference to the event channel we are going to use
+ * to send the notify before releasing the reference we may already have
+ * (if someone has called this ioctl twice). This is required so that
+ * it is possible to change the clear_byte part of the notification
+ * without disturbing the event channel part, which may now be the last
+ * reference to that event channel.
+ */
+ if (op.action & UNMAP_NOTIFY_SEND_EVENT) {
+ if (evtchn_get(op.event_channel_port)) {
+ rc = -EINVAL;
+ goto unlock_out;
+ }
+ }
+
+ if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT)
+ evtchn_put(gref->notify.event);
+
gref->notify.flags = op.action;
gref->notify.pgoff = pgoff;
gref->notify.event = op.event_channel_port;
rc = 0;
+
unlock_out:
- spin_unlock(&gref_lock);
+ mutex_unlock(&gref_mutex);
return rc;
}
@@ -429,26 +457,40 @@ static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
static void gntalloc_vma_open(struct vm_area_struct *vma)
{
- struct gntalloc_gref *gref = vma->vm_private_data;
- if (!gref)
+ struct gntalloc_vma_private_data *priv = vma->vm_private_data;
+
+ if (!priv)
return;
- spin_lock(&gref_lock);
- gref->users++;
- spin_unlock(&gref_lock);
+ mutex_lock(&gref_mutex);
+ priv->users++;
+ mutex_unlock(&gref_mutex);
}
static void gntalloc_vma_close(struct vm_area_struct *vma)
{
- struct gntalloc_gref *gref = vma->vm_private_data;
- if (!gref)
+ struct gntalloc_vma_private_data *priv = vma->vm_private_data;
+ struct gntalloc_gref *gref, *next;
+ int i;
+
+ if (!priv)
return;
- spin_lock(&gref_lock);
- gref->users--;
- if (gref->users == 0)
- __del_gref(gref);
- spin_unlock(&gref_lock);
+ mutex_lock(&gref_mutex);
+ priv->users--;
+ if (priv->users == 0) {
+ gref = priv->gref;
+ for (i = 0; i < priv->count; i++) {
+ gref->users--;
+ next = list_entry(gref->next_gref.next,
+ struct gntalloc_gref, next_gref);
+ if (gref->users == 0)
+ __del_gref(gref);
+ gref = next;
+ }
+ kfree(priv);
+ }
+ mutex_unlock(&gref_mutex);
}
static struct vm_operations_struct gntalloc_vmops = {
@@ -459,30 +501,41 @@ static struct vm_operations_struct gntalloc_vmops = {
static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct gntalloc_file_private_data *priv = filp->private_data;
+ struct gntalloc_vma_private_data *vm_priv;
struct gntalloc_gref *gref;
int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
int rv, i;
- pr_debug("%s: priv %p, page %lu+%d\n", __func__,
- priv, vma->vm_pgoff, count);
-
if (!(vma->vm_flags & VM_SHARED)) {
printk(KERN_ERR "%s: Mapping must be shared.\n", __func__);
return -EINVAL;
}
- spin_lock(&gref_lock);
+ vm_priv = kmalloc(sizeof(*vm_priv), GFP_KERNEL);
+ if (!vm_priv)
+ return -ENOMEM;
+
+ mutex_lock(&gref_mutex);
+
+ pr_debug("%s: priv %p,%p, page %lu+%d\n", __func__,
+ priv, vm_priv, vma->vm_pgoff, count);
+
gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count);
if (gref == NULL) {
rv = -ENOENT;
pr_debug("%s: Could not find grant reference",
__func__);
+ kfree(vm_priv);
goto out_unlock;
}
- vma->vm_private_data = gref;
+ vm_priv->gref = gref;
+ vm_priv->users = 1;
+ vm_priv->count = count;
+
+ vma->vm_private_data = vm_priv;
- vma->vm_flags |= VM_RESERVED;
+ vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
vma->vm_ops = &gntalloc_vmops;
@@ -499,7 +552,7 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
rv = 0;
out_unlock:
- spin_unlock(&gref_lock);
+ mutex_unlock(&gref_mutex);
return rv;
}
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index f914b26cf0c2..99d8151c824a 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -83,6 +83,7 @@ struct grant_map {
struct ioctl_gntdev_grant_ref *grants;
struct gnttab_map_grant_ref *map_ops;
struct gnttab_unmap_grant_ref *unmap_ops;
+ struct gnttab_map_grant_ref *kmap_ops;
struct page **pages;
};
@@ -113,22 +114,25 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
if (NULL == add)
return NULL;
- add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL);
- add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL);
- add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL);
- add->pages = kzalloc(sizeof(add->pages[0]) * count, GFP_KERNEL);
+ add->grants = kcalloc(count, sizeof(add->grants[0]), GFP_KERNEL);
+ add->map_ops = kcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL);
+ add->unmap_ops = kcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL);
+ add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL);
+ add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
if (NULL == add->grants ||
NULL == add->map_ops ||
NULL == add->unmap_ops ||
+ NULL == add->kmap_ops ||
NULL == add->pages)
goto err;
- if (alloc_xenballooned_pages(count, add->pages))
+ if (alloc_xenballooned_pages(count, add->pages, false /* lowmem */))
goto err;
for (i = 0; i < count; i++) {
add->map_ops[i].handle = -1;
add->unmap_ops[i].handle = -1;
+ add->kmap_ops[i].handle = -1;
}
add->index = 0;
@@ -142,6 +146,7 @@ err:
kfree(add->grants);
kfree(add->map_ops);
kfree(add->unmap_ops);
+ kfree(add->kmap_ops);
kfree(add);
return NULL;
}
@@ -190,6 +195,7 @@ static void gntdev_put_map(struct grant_map *map)
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event);
+ evtchn_put(map->notify.event);
}
if (map->pages) {
@@ -243,10 +249,35 @@ static int map_grant_pages(struct grant_map *map)
gnttab_set_unmap_op(&map->unmap_ops[i], addr,
map->flags, -1 /* handle */);
}
+ } else {
+ /*
+ * Setup the map_ops corresponding to the pte entries pointing
+ * to the kernel linear addresses of the struct pages.
+ * These ptes are completely different from the user ptes dealt
+ * with find_grant_ptes.
+ */
+ for (i = 0; i < map->count; i++) {
+ unsigned level;
+ unsigned long address = (unsigned long)
+ pfn_to_kaddr(page_to_pfn(map->pages[i]));
+ pte_t *ptep;
+ u64 pte_maddr = 0;
+ BUG_ON(PageHighMem(map->pages[i]));
+
+ ptep = lookup_address(address, &level);
+ pte_maddr = arbitrary_virt_to_machine(ptep).maddr;
+ gnttab_set_map_op(&map->kmap_ops[i], pte_maddr,
+ map->flags |
+ GNTMAP_host_map |
+ GNTMAP_contains_pte,
+ map->grants[i].ref,
+ map->grants[i].domid);
+ }
}
pr_debug("map %d+%d\n", map->index, map->count);
- err = gnttab_map_refs(map->map_ops, map->pages, map->count);
+ err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL,
+ map->pages, map->count);
if (err)
return err;
@@ -283,7 +314,8 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
}
}
- err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages);
+ err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset,
+ pages, true);
if (err)
return err;
@@ -462,13 +494,11 @@ static int gntdev_release(struct inode *inode, struct file *flip)
pr_debug("priv %p\n", priv);
- spin_lock(&priv->lock);
while (!list_empty(&priv->maps)) {
map = list_entry(priv->maps.next, struct grant_map, next);
list_del(&map->next);
gntdev_put_map(map);
}
- spin_unlock(&priv->lock);
if (use_ptemod)
mmu_notifier_unregister(&priv->mn, priv->mm);
@@ -532,10 +562,11 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
if (map) {
list_del(&map->next);
- gntdev_put_map(map);
err = 0;
}
spin_unlock(&priv->lock);
+ if (map)
+ gntdev_put_map(map);
return err;
}
@@ -571,6 +602,8 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
struct ioctl_gntdev_unmap_notify op;
struct grant_map *map;
int rc;
+ int out_flags;
+ unsigned int out_event;
if (copy_from_user(&op, u, sizeof(op)))
return -EFAULT;
@@ -578,6 +611,21 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT))
return -EINVAL;
+ /* We need to grab a reference to the event channel we are going to use
+ * to send the notify before releasing the reference we may already have
+ * (if someone has called this ioctl twice). This is required so that
+ * it is possible to change the clear_byte part of the notification
+ * without disturbing the event channel part, which may now be the last
+ * reference to that event channel.
+ */
+ if (op.action & UNMAP_NOTIFY_SEND_EVENT) {
+ if (evtchn_get(op.event_channel_port))
+ return -EINVAL;
+ }
+
+ out_flags = op.action;
+ out_event = op.event_channel_port;
+
spin_lock(&priv->lock);
list_for_each_entry(map, &priv->maps, next) {
@@ -596,12 +644,22 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
goto unlock_out;
}
+ out_flags = map->notify.flags;
+ out_event = map->notify.event;
+
map->notify.flags = op.action;
map->notify.addr = op.index - (map->index << PAGE_SHIFT);
map->notify.event = op.event_channel_port;
+
rc = 0;
+
unlock_out:
spin_unlock(&priv->lock);
+
+ /* Drop the reference to the event channel we did not save in the map */
+ if (out_flags & UNMAP_NOTIFY_SEND_EVENT)
+ evtchn_put(out_event);
+
return rc;
}
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 4f44b347b24a..1cd94daa71db 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -44,16 +44,19 @@
#include <xen/page.h>
#include <xen/grant_table.h>
#include <xen/interface/memory.h>
+#include <xen/hvc-console.h>
#include <asm/xen/hypercall.h>
#include <asm/pgtable.h>
#include <asm/sync_bitops.h>
-
/* External tools reserve first few grant table entries. */
#define NR_RESERVED_ENTRIES 8
#define GNTTAB_LIST_END 0xffffffff
-#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry))
+#define GREFS_PER_GRANT_FRAME \
+(grant_table_version == 1 ? \
+(PAGE_SIZE / sizeof(struct grant_entry_v1)) : \
+(PAGE_SIZE / sizeof(union grant_entry_v2)))
static grant_ref_t **gnttab_list;
static unsigned int nr_grant_frames;
@@ -64,13 +67,97 @@ static DEFINE_SPINLOCK(gnttab_list_lock);
unsigned long xen_hvm_resume_frames;
EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
-static struct grant_entry *shared;
+static union {
+ struct grant_entry_v1 *v1;
+ union grant_entry_v2 *v2;
+ void *addr;
+} gnttab_shared;
+
+/*This is a structure of function pointers for grant table*/
+struct gnttab_ops {
+ /*
+ * Mapping a list of frames for storing grant entries. Frames parameter
+ * is used to store grant table address when grant table being setup,
+ * nr_gframes is the number of frames to map grant table. Returning
+ * GNTST_okay means success and negative value means failure.
+ */
+ int (*map_frames)(unsigned long *frames, unsigned int nr_gframes);
+ /*
+ * Release a list of frames which are mapped in map_frames for grant
+ * entry status.
+ */
+ void (*unmap_frames)(void);
+ /*
+ * Introducing a valid entry into the grant table, granting the frame of
+ * this grant entry to domain for accessing or transfering. Ref
+ * parameter is reference of this introduced grant entry, domid is id of
+ * granted domain, frame is the page frame to be granted, and flags is
+ * status of the grant entry to be updated.
+ */
+ void (*update_entry)(grant_ref_t ref, domid_t domid,
+ unsigned long frame, unsigned flags);
+ /*
+ * Stop granting a grant entry to domain for accessing. Ref parameter is
+ * reference of a grant entry whose grant access will be stopped,
+ * readonly is not in use in this function. If the grant entry is
+ * currently mapped for reading or writing, just return failure(==0)
+ * directly and don't tear down the grant access. Otherwise, stop grant
+ * access for this entry and return success(==1).
+ */
+ int (*end_foreign_access_ref)(grant_ref_t ref, int readonly);
+ /*
+ * Stop granting a grant entry to domain for transfer. Ref parameter is
+ * reference of a grant entry whose grant transfer will be stopped. If
+ * tranfer has not started, just reclaim the grant entry and return
+ * failure(==0). Otherwise, wait for the transfer to complete and then
+ * return the frame.
+ */
+ unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref);
+ /*
+ * Query the status of a grant entry. Ref parameter is reference of
+ * queried grant entry, return value is the status of queried entry.
+ * Detailed status(writing/reading) can be gotten from the return value
+ * by bit operations.
+ */
+ int (*query_foreign_access)(grant_ref_t ref);
+ /*
+ * Grant a domain to access a range of bytes within the page referred by
+ * an available grant entry. Ref parameter is reference of a grant entry
+ * which will be sub-page accessed, domid is id of grantee domain, frame
+ * is frame address of subpage grant, flags is grant type and flag
+ * information, page_off is offset of the range of bytes, and length is
+ * length of bytes to be accessed.
+ */
+ void (*update_subpage_entry)(grant_ref_t ref, domid_t domid,
+ unsigned long frame, int flags,
+ unsigned page_off, unsigned length);
+ /*
+ * Redirect an available grant entry on domain A to another grant
+ * reference of domain B, then allow domain C to use grant reference
+ * of domain B transitively. Ref parameter is an available grant entry
+ * reference on domain A, domid is id of domain C which accesses grant
+ * entry transitively, flags is grant type and flag information,
+ * trans_domid is id of domain B whose grant entry is finally accessed
+ * transitively, trans_gref is grant entry transitive reference of
+ * domain B.
+ */
+ void (*update_trans_entry)(grant_ref_t ref, domid_t domid, int flags,
+ domid_t trans_domid, grant_ref_t trans_gref);
+};
+
+static struct gnttab_ops *gnttab_interface;
+
+/*This reflects status of grant entries, so act as a global value*/
+static grant_status_t *grstatus;
+
+static int grant_table_version;
static struct gnttab_free_callback *gnttab_free_callback_list;
static int gnttab_expand(unsigned int req_entries);
#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
+#define SPP (PAGE_SIZE / sizeof(grant_status_t))
static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
{
@@ -142,23 +229,33 @@ static void put_free_entry(grant_ref_t ref)
spin_unlock_irqrestore(&gnttab_list_lock, flags);
}
-static void update_grant_entry(grant_ref_t ref, domid_t domid,
- unsigned long frame, unsigned flags)
+/*
+ * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
+ * Introducing a valid entry into the grant table:
+ * 1. Write ent->domid.
+ * 2. Write ent->frame:
+ * GTF_permit_access: Frame to which access is permitted.
+ * GTF_accept_transfer: Pseudo-phys frame slot being filled by new
+ * frame, or zero if none.
+ * 3. Write memory barrier (WMB).
+ * 4. Write ent->flags, inc. valid type.
+ */
+static void gnttab_update_entry_v1(grant_ref_t ref, domid_t domid,
+ unsigned long frame, unsigned flags)
+{
+ gnttab_shared.v1[ref].domid = domid;
+ gnttab_shared.v1[ref].frame = frame;
+ wmb();
+ gnttab_shared.v1[ref].flags = flags;
+}
+
+static void gnttab_update_entry_v2(grant_ref_t ref, domid_t domid,
+ unsigned long frame, unsigned flags)
{
- /*
- * Introducing a valid entry into the grant table:
- * 1. Write ent->domid.
- * 2. Write ent->frame:
- * GTF_permit_access: Frame to which access is permitted.
- * GTF_accept_transfer: Pseudo-phys frame slot being filled by new
- * frame, or zero if none.
- * 3. Write memory barrier (WMB).
- * 4. Write ent->flags, inc. valid type.
- */
- shared[ref].frame = frame;
- shared[ref].domid = domid;
+ gnttab_shared.v2[ref].hdr.domid = domid;
+ gnttab_shared.v2[ref].full_page.frame = frame;
wmb();
- shared[ref].flags = flags;
+ gnttab_shared.v2[ref].hdr.flags = GTF_permit_access | flags;
}
/*
@@ -167,7 +264,7 @@ static void update_grant_entry(grant_ref_t ref, domid_t domid,
void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
unsigned long frame, int readonly)
{
- update_grant_entry(ref, domid, frame,
+ gnttab_interface->update_entry(ref, domid, frame,
GTF_permit_access | (readonly ? GTF_readonly : 0));
}
EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
@@ -187,31 +284,184 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
}
EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
-int gnttab_query_foreign_access(grant_ref_t ref)
+void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid,
+ unsigned long frame, int flags,
+ unsigned page_off,
+ unsigned length)
+{
+ gnttab_shared.v2[ref].sub_page.frame = frame;
+ gnttab_shared.v2[ref].sub_page.page_off = page_off;
+ gnttab_shared.v2[ref].sub_page.length = length;
+ gnttab_shared.v2[ref].hdr.domid = domid;
+ wmb();
+ gnttab_shared.v2[ref].hdr.flags =
+ GTF_permit_access | GTF_sub_page | flags;
+}
+
+int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid,
+ unsigned long frame, int flags,
+ unsigned page_off,
+ unsigned length)
{
- u16 nflags;
+ if (flags & (GTF_accept_transfer | GTF_reading |
+ GTF_writing | GTF_transitive))
+ return -EPERM;
- nflags = shared[ref].flags;
+ if (gnttab_interface->update_subpage_entry == NULL)
+ return -ENOSYS;
- return (nflags & (GTF_reading|GTF_writing));
+ gnttab_interface->update_subpage_entry(ref, domid, frame, flags,
+ page_off, length);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage_ref);
+
+int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame,
+ int flags, unsigned page_off,
+ unsigned length)
+{
+ int ref, rc;
+
+ ref = get_free_entries(1);
+ if (unlikely(ref < 0))
+ return -ENOSPC;
+
+ rc = gnttab_grant_foreign_access_subpage_ref(ref, domid, frame, flags,
+ page_off, length);
+ if (rc < 0) {
+ put_free_entry(ref);
+ return rc;
+ }
+
+ return ref;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage);
+
+bool gnttab_subpage_grants_available(void)
+{
+ return gnttab_interface->update_subpage_entry != NULL;
+}
+EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available);
+
+void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid,
+ int flags, domid_t trans_domid,
+ grant_ref_t trans_gref)
+{
+ gnttab_shared.v2[ref].transitive.trans_domid = trans_domid;
+ gnttab_shared.v2[ref].transitive.gref = trans_gref;
+ gnttab_shared.v2[ref].hdr.domid = domid;
+ wmb();
+ gnttab_shared.v2[ref].hdr.flags =
+ GTF_permit_access | GTF_transitive | flags;
+}
+
+int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid,
+ int flags, domid_t trans_domid,
+ grant_ref_t trans_gref)
+{
+ if (flags & (GTF_accept_transfer | GTF_reading |
+ GTF_writing | GTF_sub_page))
+ return -EPERM;
+
+ if (gnttab_interface->update_trans_entry == NULL)
+ return -ENOSYS;
+
+ gnttab_interface->update_trans_entry(ref, domid, flags, trans_domid,
+ trans_gref);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans_ref);
+
+int gnttab_grant_foreign_access_trans(domid_t domid, int flags,
+ domid_t trans_domid,
+ grant_ref_t trans_gref)
+{
+ int ref, rc;
+
+ ref = get_free_entries(1);
+ if (unlikely(ref < 0))
+ return -ENOSPC;
+
+ rc = gnttab_grant_foreign_access_trans_ref(ref, domid, flags,
+ trans_domid, trans_gref);
+ if (rc < 0) {
+ put_free_entry(ref);
+ return rc;
+ }
+
+ return ref;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans);
+
+bool gnttab_trans_grants_available(void)
+{
+ return gnttab_interface->update_trans_entry != NULL;
+}
+EXPORT_SYMBOL_GPL(gnttab_trans_grants_available);
+
+static int gnttab_query_foreign_access_v1(grant_ref_t ref)
+{
+ return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing);
+}
+
+static int gnttab_query_foreign_access_v2(grant_ref_t ref)
+{
+ return grstatus[ref] & (GTF_reading|GTF_writing);
+}
+
+int gnttab_query_foreign_access(grant_ref_t ref)
+{
+ return gnttab_interface->query_foreign_access(ref);
}
EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
-int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
+static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly)
{
u16 flags, nflags;
+ u16 *pflags;
- nflags = shared[ref].flags;
+ pflags = &gnttab_shared.v1[ref].flags;
+ nflags = *pflags;
do {
flags = nflags;
if (flags & (GTF_reading|GTF_writing)) {
printk(KERN_ALERT "WARNING: g.e. still in use!\n");
return 0;
}
- } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags);
+ } while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags);
+
+ return 1;
+}
+
+static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly)
+{
+ gnttab_shared.v2[ref].hdr.flags = 0;
+ mb();
+ if (grstatus[ref] & (GTF_reading|GTF_writing)) {
+ return 0;
+ } else {
+ /* The read of grstatus needs to have acquire
+ semantics. On x86, reads already have
+ that, and we just need to protect against
+ compiler reorderings. On other
+ architectures we may need a full
+ barrier. */
+#ifdef CONFIG_X86
+ barrier();
+#else
+ mb();
+#endif
+ }
return 1;
}
+
+int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
+{
+ return gnttab_interface->end_foreign_access_ref(ref, readonly);
+}
EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
@@ -246,37 +496,76 @@ EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
unsigned long pfn)
{
- update_grant_entry(ref, domid, pfn, GTF_accept_transfer);
+ gnttab_interface->update_entry(ref, domid, pfn, GTF_accept_transfer);
}
EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
-unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
+static unsigned long gnttab_end_foreign_transfer_ref_v1(grant_ref_t ref)
{
unsigned long frame;
u16 flags;
+ u16 *pflags;
+
+ pflags = &gnttab_shared.v1[ref].flags;
/*
* If a transfer is not even yet started, try to reclaim the grant
* reference and return failure (== 0).
*/
- while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
- if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags)
+ while (!((flags = *pflags) & GTF_transfer_committed)) {
+ if (sync_cmpxchg(pflags, flags, 0) == flags)
return 0;
cpu_relax();
}
/* If a transfer is in progress then wait until it is completed. */
while (!(flags & GTF_transfer_completed)) {
- flags = shared[ref].flags;
+ flags = *pflags;
cpu_relax();
}
rmb(); /* Read the frame number /after/ reading completion status. */
- frame = shared[ref].frame;
+ frame = gnttab_shared.v1[ref].frame;
+ BUG_ON(frame == 0);
+
+ return frame;
+}
+
+static unsigned long gnttab_end_foreign_transfer_ref_v2(grant_ref_t ref)
+{
+ unsigned long frame;
+ u16 flags;
+ u16 *pflags;
+
+ pflags = &gnttab_shared.v2[ref].hdr.flags;
+
+ /*
+ * If a transfer is not even yet started, try to reclaim the grant
+ * reference and return failure (== 0).
+ */
+ while (!((flags = *pflags) & GTF_transfer_committed)) {
+ if (sync_cmpxchg(pflags, flags, 0) == flags)
+ return 0;
+ cpu_relax();
+ }
+
+ /* If a transfer is in progress then wait until it is completed. */
+ while (!(flags & GTF_transfer_completed)) {
+ flags = *pflags;
+ cpu_relax();
+ }
+
+ rmb(); /* Read the frame number /after/ reading completion status. */
+ frame = gnttab_shared.v2[ref].full_page.frame;
BUG_ON(frame == 0);
return frame;
}
+
+unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
+{
+ return gnttab_interface->end_foreign_transfer_ref(ref);
+}
EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
@@ -448,6 +737,7 @@ unsigned int gnttab_max_grant_frames(void)
EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
+ struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count)
{
int i, ret;
@@ -471,25 +761,10 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
(map_ops[i].host_addr & ~PAGE_MASK));
mfn = pte_mfn(*pte);
} else {
- /* If you really wanted to do this:
- * mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
- *
- * The reason we do not implement it is b/c on the
- * unmap path (gnttab_unmap_refs) we have no means of
- * checking whether the page is !GNTMAP_contains_pte.
- *
- * That is without some extra data-structure to carry
- * the struct page, bool clear_pte, and list_head next
- * tuples and deal with allocation/delallocation, etc.
- *
- * The users of this API set the GNTMAP_contains_pte
- * flag so lets just return not supported until it
- * becomes neccessary to implement.
- */
- return -EOPNOTSUPP;
+ mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
}
- ret = m2p_add_override(mfn, pages[i],
- map_ops[i].flags & GNTMAP_contains_pte);
+ ret = m2p_add_override(mfn, pages[i], kmap_ops ?
+ &kmap_ops[i] : NULL);
if (ret)
return ret;
}
@@ -499,7 +774,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
EXPORT_SYMBOL_GPL(gnttab_map_refs);
int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
- struct page **pages, unsigned int count)
+ struct page **pages, unsigned int count, bool clear_pte)
{
int i, ret;
@@ -511,7 +786,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
return ret;
for (i = 0; i < count; i++) {
- ret = m2p_remove_override(pages[i], true /* clear the PTE */);
+ ret = m2p_remove_override(pages[i], clear_pte);
if (ret)
return ret;
}
@@ -520,6 +795,77 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
}
EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
+static unsigned nr_status_frames(unsigned nr_grant_frames)
+{
+ return (nr_grant_frames * GREFS_PER_GRANT_FRAME + SPP - 1) / SPP;
+}
+
+static int gnttab_map_frames_v1(unsigned long *frames, unsigned int nr_gframes)
+{
+ int rc;
+
+ rc = arch_gnttab_map_shared(frames, nr_gframes,
+ gnttab_max_grant_frames(),
+ &gnttab_shared.addr);
+ BUG_ON(rc);
+
+ return 0;
+}
+
+static void gnttab_unmap_frames_v1(void)
+{
+ arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
+}
+
+static int gnttab_map_frames_v2(unsigned long *frames, unsigned int nr_gframes)
+{
+ uint64_t *sframes;
+ unsigned int nr_sframes;
+ struct gnttab_get_status_frames getframes;
+ int rc;
+
+ nr_sframes = nr_status_frames(nr_gframes);
+
+ /* No need for kzalloc as it is initialized in following hypercall
+ * GNTTABOP_get_status_frames.
+ */
+ sframes = kmalloc(nr_sframes * sizeof(uint64_t), GFP_ATOMIC);
+ if (!sframes)
+ return -ENOMEM;
+
+ getframes.dom = DOMID_SELF;
+ getframes.nr_frames = nr_sframes;
+ set_xen_guest_handle(getframes.frame_list, sframes);
+
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
+ &getframes, 1);
+ if (rc == -ENOSYS) {
+ kfree(sframes);
+ return -ENOSYS;
+ }
+
+ BUG_ON(rc || getframes.status);
+
+ rc = arch_gnttab_map_status(sframes, nr_sframes,
+ nr_status_frames(gnttab_max_grant_frames()),
+ &grstatus);
+ BUG_ON(rc);
+ kfree(sframes);
+
+ rc = arch_gnttab_map_shared(frames, nr_gframes,
+ gnttab_max_grant_frames(),
+ &gnttab_shared.addr);
+ BUG_ON(rc);
+
+ return 0;
+}
+
+static void gnttab_unmap_frames_v2(void)
+{
+ arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
+ arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames));
+}
+
static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
{
struct gnttab_setup_table setup;
@@ -551,6 +897,9 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
return rc;
}
+ /* No need for kzalloc as it is initialized in following hypercall
+ * GNTTABOP_setup_table.
+ */
frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
if (!frames)
return -ENOMEM;
@@ -567,19 +916,65 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
BUG_ON(rc || setup.status);
- rc = arch_gnttab_map_shared(frames, nr_gframes, gnttab_max_grant_frames(),
- &shared);
- BUG_ON(rc);
+ rc = gnttab_interface->map_frames(frames, nr_gframes);
kfree(frames);
- return 0;
+ return rc;
+}
+
+static struct gnttab_ops gnttab_v1_ops = {
+ .map_frames = gnttab_map_frames_v1,
+ .unmap_frames = gnttab_unmap_frames_v1,
+ .update_entry = gnttab_update_entry_v1,
+ .end_foreign_access_ref = gnttab_end_foreign_access_ref_v1,
+ .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v1,
+ .query_foreign_access = gnttab_query_foreign_access_v1,
+};
+
+static struct gnttab_ops gnttab_v2_ops = {
+ .map_frames = gnttab_map_frames_v2,
+ .unmap_frames = gnttab_unmap_frames_v2,
+ .update_entry = gnttab_update_entry_v2,
+ .end_foreign_access_ref = gnttab_end_foreign_access_ref_v2,
+ .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2,
+ .query_foreign_access = gnttab_query_foreign_access_v2,
+ .update_subpage_entry = gnttab_update_subpage_entry_v2,
+ .update_trans_entry = gnttab_update_trans_entry_v2,
+};
+
+static void gnttab_request_version(void)
+{
+ int rc;
+ struct gnttab_set_version gsv;
+
+ gsv.version = 2;
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
+ if (rc == 0) {
+ grant_table_version = 2;
+ gnttab_interface = &gnttab_v2_ops;
+ } else if (grant_table_version == 2) {
+ /*
+ * If we've already used version 2 features,
+ * but then suddenly discover that they're not
+ * available (e.g. migrating to an older
+ * version of Xen), almost unbounded badness
+ * can happen.
+ */
+ panic("we need grant tables version 2, but only version 1 is available");
+ } else {
+ grant_table_version = 1;
+ gnttab_interface = &gnttab_v1_ops;
+ }
+ printk(KERN_INFO "Grant tables using version %d layout.\n",
+ grant_table_version);
}
int gnttab_resume(void)
{
unsigned int max_nr_gframes;
+ gnttab_request_version();
max_nr_gframes = gnttab_max_grant_frames();
if (max_nr_gframes < nr_grant_frames)
return -ENOSYS;
@@ -587,9 +982,10 @@ int gnttab_resume(void)
if (xen_pv_domain())
return gnttab_map(0, nr_grant_frames - 1);
- if (!shared) {
- shared = ioremap(xen_hvm_resume_frames, PAGE_SIZE * max_nr_gframes);
- if (shared == NULL) {
+ if (gnttab_shared.addr == NULL) {
+ gnttab_shared.addr = ioremap(xen_hvm_resume_frames,
+ PAGE_SIZE * max_nr_gframes);
+ if (gnttab_shared.addr == NULL) {
printk(KERN_WARNING
"Failed to ioremap gnttab share frames!");
return -ENOMEM;
@@ -603,7 +999,7 @@ int gnttab_resume(void)
int gnttab_suspend(void)
{
- arch_gnttab_unmap_shared(shared, nr_grant_frames);
+ gnttab_interface->unmap_frames();
return 0;
}
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 0b5366b5be20..ce4fa0831860 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -9,6 +9,7 @@
#include <linux/stop_machine.h>
#include <linux/freezer.h>
#include <linux/syscore_ops.h>
+#include <linux/export.h>
#include <xen/xen.h>
#include <xen/xenbus.h>
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index cef4bafc07dc..b84bf0b6cc34 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -18,6 +18,7 @@
*/
#include <linux/pci.h>
+#include <linux/acpi.h>
#include <xen/xen.h>
#include <xen/interface/physdev.h>
#include <xen/interface/xen.h>
@@ -26,26 +27,85 @@
#include <asm/xen/hypercall.h>
#include "../pci/pci.h"
+static bool __read_mostly pci_seg_supported = true;
+
static int xen_add_device(struct device *dev)
{
int r;
struct pci_dev *pci_dev = to_pci_dev(dev);
+#ifdef CONFIG_PCI_IOV
+ struct pci_dev *physfn = pci_dev->physfn;
+#endif
+
+ if (pci_seg_supported) {
+ struct physdev_pci_device_add add = {
+ .seg = pci_domain_nr(pci_dev->bus),
+ .bus = pci_dev->bus->number,
+ .devfn = pci_dev->devfn
+ };
+#ifdef CONFIG_ACPI
+ acpi_handle handle;
+#endif
+
+#ifdef CONFIG_PCI_IOV
+ if (pci_dev->is_virtfn) {
+ add.flags = XEN_PCI_DEV_VIRTFN;
+ add.physfn.bus = physfn->bus->number;
+ add.physfn.devfn = physfn->devfn;
+ } else
+#endif
+ if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn))
+ add.flags = XEN_PCI_DEV_EXTFN;
+
+#ifdef CONFIG_ACPI
+ handle = DEVICE_ACPI_HANDLE(&pci_dev->dev);
+ if (!handle)
+ handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge);
+#ifdef CONFIG_PCI_IOV
+ if (!handle && pci_dev->is_virtfn)
+ handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge);
+#endif
+ if (handle) {
+ acpi_status status;
+
+ do {
+ unsigned long long pxm;
+
+ status = acpi_evaluate_integer(handle, "_PXM",
+ NULL, &pxm);
+ if (ACPI_SUCCESS(status)) {
+ add.optarr[0] = pxm;
+ add.flags |= XEN_PCI_DEV_PXM;
+ break;
+ }
+ status = acpi_get_parent(handle, &handle);
+ } while (ACPI_SUCCESS(status));
+ }
+#endif /* CONFIG_ACPI */
+
+ r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add);
+ if (r != -ENOSYS)
+ return r;
+ pci_seg_supported = false;
+ }
+ if (pci_domain_nr(pci_dev->bus))
+ r = -ENOSYS;
#ifdef CONFIG_PCI_IOV
- if (pci_dev->is_virtfn) {
+ else if (pci_dev->is_virtfn) {
struct physdev_manage_pci_ext manage_pci_ext = {
.bus = pci_dev->bus->number,
.devfn = pci_dev->devfn,
.is_virtfn = 1,
- .physfn.bus = pci_dev->physfn->bus->number,
- .physfn.devfn = pci_dev->physfn->devfn,
+ .physfn.bus = physfn->bus->number,
+ .physfn.devfn = physfn->devfn,
};
r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
&manage_pci_ext);
- } else
+ }
#endif
- if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
+ else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
struct physdev_manage_pci_ext manage_pci_ext = {
.bus = pci_dev->bus->number,
.devfn = pci_dev->devfn,
@@ -56,7 +116,7 @@ static int xen_add_device(struct device *dev)
&manage_pci_ext);
} else {
struct physdev_manage_pci manage_pci = {
- .bus = pci_dev->bus->number,
+ .bus = pci_dev->bus->number,
.devfn = pci_dev->devfn,
};
@@ -71,13 +131,27 @@ static int xen_remove_device(struct device *dev)
{
int r;
struct pci_dev *pci_dev = to_pci_dev(dev);
- struct physdev_manage_pci manage_pci;
- manage_pci.bus = pci_dev->bus->number;
- manage_pci.devfn = pci_dev->devfn;
+ if (pci_seg_supported) {
+ struct physdev_pci_device device = {
+ .seg = pci_domain_nr(pci_dev->bus),
+ .bus = pci_dev->bus->number,
+ .devfn = pci_dev->devfn
+ };
- r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
- &manage_pci);
+ r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_remove,
+ &device);
+ } else if (pci_domain_nr(pci_dev->bus))
+ r = -ENOSYS;
+ else {
+ struct physdev_manage_pci manage_pci = {
+ .bus = pci_dev->bus->number,
+ .devfn = pci_dev->devfn
+ };
+
+ r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
+ &manage_pci);
+ }
return r;
}
@@ -96,13 +170,16 @@ static int xen_pci_notifier(struct notifier_block *nb,
r = xen_remove_device(dev);
break;
default:
- break;
+ return NOTIFY_DONE;
}
-
- return r;
+ if (r)
+ dev_err(dev, "Failed to %s - passthrough or MSI/MSI-X might fail!\n",
+ action == BUS_NOTIFY_ADD_DEVICE ? "add" :
+ (action == BUS_NOTIFY_DEL_DEVICE ? "delete" : "?"));
+ return NOTIFY_OK;
}
-struct notifier_block device_nb = {
+static struct notifier_block device_nb = {
.notifier_call = xen_pci_notifier,
};
diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/privcmd.c
index dbd3b16fd131..ccee0f16bcf8 100644
--- a/drivers/xen/xenfs/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -7,6 +7,7 @@
*/
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/string.h>
@@ -18,6 +19,7 @@
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/seq_file.h>
+#include <linux/miscdevice.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
@@ -32,6 +34,10 @@
#include <xen/page.h>
#include <xen/xen-ops.h>
+#include "privcmd.h"
+
+MODULE_LICENSE("GPL");
+
#ifndef HAVE_ARCH_PRIVCMD_MMAP
static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
#endif
@@ -359,7 +365,6 @@ static long privcmd_ioctl(struct file *file,
return ret;
}
-#ifndef HAVE_ARCH_PRIVCMD_MMAP
static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
@@ -392,9 +397,39 @@ static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
{
return (xchg(&vma->vm_private_data, (void *)1) == NULL);
}
-#endif
-const struct file_operations privcmd_file_ops = {
+const struct file_operations xen_privcmd_fops = {
+ .owner = THIS_MODULE,
.unlocked_ioctl = privcmd_ioctl,
.mmap = privcmd_mmap,
};
+EXPORT_SYMBOL_GPL(xen_privcmd_fops);
+
+static struct miscdevice privcmd_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "xen/privcmd",
+ .fops = &xen_privcmd_fops,
+};
+
+static int __init privcmd_init(void)
+{
+ int err;
+
+ if (!xen_domain())
+ return -ENODEV;
+
+ err = misc_register(&privcmd_dev);
+ if (err != 0) {
+ printk(KERN_ERR "Could not register Xen privcmd device\n");
+ return err;
+ }
+ return 0;
+}
+
+static void __exit privcmd_exit(void)
+{
+ misc_deregister(&privcmd_dev);
+}
+
+module_init(privcmd_init);
+module_exit(privcmd_exit);
diff --git a/drivers/xen/privcmd.h b/drivers/xen/privcmd.h
new file mode 100644
index 000000000000..14facaeed36f
--- /dev/null
+++ b/drivers/xen/privcmd.h
@@ -0,0 +1,3 @@
+#include <linux/fs.h>
+
+extern const struct file_operations xen_privcmd_fops;
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 6e8c15a23201..19e6a2041371 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -35,9 +35,11 @@
#include <linux/bootmem.h>
#include <linux/dma-mapping.h>
+#include <linux/export.h>
#include <xen/swiotlb-xen.h>
#include <xen/page.h>
#include <xen/xen-ops.h>
+#include <xen/hvc-console.h>
/*
* Used to do a quick range check in swiotlb_tbl_unmap_single and
* swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
@@ -146,26 +148,29 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
void __init xen_swiotlb_init(int verbose)
{
unsigned long bytes;
- int rc;
+ int rc = -ENOMEM;
unsigned long nr_tbl;
+ char *m = NULL;
+ unsigned int repeat = 3;
- nr_tbl = swioltb_nr_tbl();
+ nr_tbl = swiotlb_nr_tbl();
if (nr_tbl)
xen_io_tlb_nslabs = nr_tbl;
else {
xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE);
}
-
+retry:
bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
/*
* Get IO TLB memory from any location.
*/
- xen_io_tlb_start = alloc_bootmem(bytes);
- if (!xen_io_tlb_start)
- panic("Cannot allocate SWIOTLB buffer");
-
+ xen_io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes));
+ if (!xen_io_tlb_start) {
+ m = "Cannot allocate Xen-SWIOTLB buffer!\n";
+ goto error;
+ }
xen_io_tlb_end = xen_io_tlb_start + bytes;
/*
* And replace that memory with pages under 4GB.
@@ -173,17 +178,28 @@ void __init xen_swiotlb_init(int verbose)
rc = xen_swiotlb_fixup(xen_io_tlb_start,
bytes,
xen_io_tlb_nslabs);
- if (rc)
+ if (rc) {
+ free_bootmem(__pa(xen_io_tlb_start), PAGE_ALIGN(bytes));
+ m = "Failed to get contiguous memory for DMA from Xen!\n"\
+ "You either: don't have the permissions, do not have"\
+ " enough free memory under 4GB, or the hypervisor memory"\
+ "is too fragmented!";
goto error;
-
+ }
start_dma_addr = xen_virt_to_bus(xen_io_tlb_start);
swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose);
return;
error:
- panic("DMA(%d): Failed to exchange pages allocated for DMA with Xen! "\
- "We either don't have the permission or you do not have enough"\
- "free memory under 4GB!\n", rc);
+ if (repeat--) {
+ xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */
+ (xen_io_tlb_nslabs >> 1));
+ printk(KERN_INFO "Xen-SWIOTLB: Lowering to %luMB\n",
+ (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20);
+ goto retry;
+ }
+ xen_raw_printk("%s (rc:%d)", m, rc);
+ panic("%s (rc:%d)", m, rc);
}
void *
@@ -194,6 +210,8 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
int order = get_order(size);
u64 dma_mask = DMA_BIT_MASK(32);
unsigned long vstart;
+ phys_addr_t phys;
+ dma_addr_t dev_addr;
/*
* Ignore region specifiers - the kernel's ideas of
@@ -209,18 +227,26 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
vstart = __get_free_pages(flags, order);
ret = (void *)vstart;
+ if (!ret)
+ return ret;
+
if (hwdev && hwdev->coherent_dma_mask)
- dma_mask = dma_alloc_coherent_mask(hwdev, flags);
+ dma_mask = hwdev->coherent_dma_mask;
- if (ret) {
+ phys = virt_to_phys(ret);
+ dev_addr = xen_phys_to_bus(phys);
+ if (((dev_addr + size - 1 <= dma_mask)) &&
+ !range_straddles_page_boundary(phys, size))
+ *dma_handle = dev_addr;
+ else {
if (xen_create_contiguous_region(vstart, order,
fls64(dma_mask)) != 0) {
free_pages(vstart, order);
return NULL;
}
- memset(ret, 0, size);
*dma_handle = virt_to_machine(ret).maddr;
}
+ memset(ret, 0, size);
return ret;
}
EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent);
@@ -230,11 +256,21 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
dma_addr_t dev_addr)
{
int order = get_order(size);
+ phys_addr_t phys;
+ u64 dma_mask = DMA_BIT_MASK(32);
if (dma_release_from_coherent(hwdev, order, vaddr))
return;
- xen_destroy_contiguous_region((unsigned long)vaddr, order);
+ if (hwdev && hwdev->coherent_dma_mask)
+ dma_mask = hwdev->coherent_dma_mask;
+
+ phys = virt_to_phys(vaddr);
+
+ if (((dev_addr + size - 1 > dma_mask)) ||
+ range_straddles_page_boundary(phys, size))
+ xen_destroy_contiguous_region((unsigned long)vaddr, order);
+
free_pages((unsigned long)vaddr, order);
}
EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent);
@@ -278,9 +314,10 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
/*
* Ensure that the address returned is DMA'ble
*/
- if (!dma_capable(dev, dev_addr, size))
- panic("map_single: bounce buffer is not DMA'ble");
-
+ if (!dma_capable(dev, dev_addr, size)) {
+ swiotlb_tbl_unmap_single(dev, map, size, dir);
+ dev_addr = 0;
+ }
return dev_addr;
}
EXPORT_SYMBOL_GPL(xen_swiotlb_map_page);
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index 5c9dc43c1e94..596e6a7b17d6 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -32,7 +32,6 @@
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/sysdev.h>
#include <linux/capability.h>
#include <xen/xen.h>
@@ -46,14 +45,9 @@
#define BALLOON_CLASS_NAME "xen_memory"
-static struct sys_device balloon_sysdev;
+static struct device balloon_dev;
-static int register_balloon(struct sys_device *sysdev);
-
-static struct xenbus_watch target_watch =
-{
- .node = "memory/target"
-};
+static int register_balloon(struct device *dev);
/* React to a change in the target key */
static void watch_target(struct xenbus_watch *watch,
@@ -73,6 +67,11 @@ static void watch_target(struct xenbus_watch *watch,
*/
balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
}
+static struct xenbus_watch target_watch = {
+ .node = "memory/target",
+ .callback = watch_target,
+};
+
static int balloon_init_watcher(struct notifier_block *notifier,
unsigned long event,
@@ -87,7 +86,9 @@ static int balloon_init_watcher(struct notifier_block *notifier,
return NOTIFY_DONE;
}
-static struct notifier_block xenstore_notifier;
+static struct notifier_block xenstore_notifier = {
+ .notifier_call = balloon_init_watcher,
+};
static int __init balloon_init(void)
{
@@ -96,12 +97,9 @@ static int __init balloon_init(void)
pr_info("xen-balloon: Initialising balloon driver.\n");
- register_balloon(&balloon_sysdev);
-
- register_xen_selfballooning(&balloon_sysdev);
+ register_balloon(&balloon_dev);
- target_watch.callback = watch_target;
- xenstore_notifier.notifier_call = balloon_init_watcher;
+ register_xen_selfballooning(&balloon_dev);
register_xenstore_notifier(&xenstore_notifier);
@@ -118,31 +116,31 @@ static void balloon_exit(void)
module_exit(balloon_exit);
#define BALLOON_SHOW(name, format, args...) \
- static ssize_t show_##name(struct sys_device *dev, \
- struct sysdev_attribute *attr, \
+ static ssize_t show_##name(struct device *dev, \
+ struct device_attribute *attr, \
char *buf) \
{ \
return sprintf(buf, format, ##args); \
} \
- static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
+ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high));
-static SYSDEV_ULONG_ATTR(schedule_delay, 0444, balloon_stats.schedule_delay);
-static SYSDEV_ULONG_ATTR(max_schedule_delay, 0644, balloon_stats.max_schedule_delay);
-static SYSDEV_ULONG_ATTR(retry_count, 0444, balloon_stats.retry_count);
-static SYSDEV_ULONG_ATTR(max_retry_count, 0644, balloon_stats.max_retry_count);
+static DEVICE_ULONG_ATTR(schedule_delay, 0444, balloon_stats.schedule_delay);
+static DEVICE_ULONG_ATTR(max_schedule_delay, 0644, balloon_stats.max_schedule_delay);
+static DEVICE_ULONG_ATTR(retry_count, 0444, balloon_stats.retry_count);
+static DEVICE_ULONG_ATTR(max_retry_count, 0644, balloon_stats.max_retry_count);
-static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
+static ssize_t show_target_kb(struct device *dev, struct device_attribute *attr,
char *buf)
{
return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
}
-static ssize_t store_target_kb(struct sys_device *dev,
- struct sysdev_attribute *attr,
+static ssize_t store_target_kb(struct device *dev,
+ struct device_attribute *attr,
const char *buf,
size_t count)
{
@@ -159,11 +157,11 @@ static ssize_t store_target_kb(struct sys_device *dev,
return count;
}
-static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(target_kb, S_IRUGO | S_IWUSR,
show_target_kb, store_target_kb);
-static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr,
+static ssize_t show_target(struct device *dev, struct device_attribute *attr,
char *buf)
{
return sprintf(buf, "%llu\n",
@@ -171,8 +169,8 @@ static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr
<< PAGE_SHIFT);
}
-static ssize_t store_target(struct sys_device *dev,
- struct sysdev_attribute *attr,
+static ssize_t store_target(struct device *dev,
+ struct device_attribute *attr,
const char *buf,
size_t count)
{
@@ -189,23 +187,23 @@ static ssize_t store_target(struct sys_device *dev,
return count;
}
-static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(target, S_IRUGO | S_IWUSR,
show_target, store_target);
-static struct sysdev_attribute *balloon_attrs[] = {
- &attr_target_kb,
- &attr_target,
- &attr_schedule_delay.attr,
- &attr_max_schedule_delay.attr,
- &attr_retry_count.attr,
- &attr_max_retry_count.attr
+static struct device_attribute *balloon_attrs[] = {
+ &dev_attr_target_kb,
+ &dev_attr_target,
+ &dev_attr_schedule_delay.attr,
+ &dev_attr_max_schedule_delay.attr,
+ &dev_attr_retry_count.attr,
+ &dev_attr_max_retry_count.attr
};
static struct attribute *balloon_info_attrs[] = {
- &attr_current_kb.attr,
- &attr_low_kb.attr,
- &attr_high_kb.attr,
+ &dev_attr_current_kb.attr,
+ &dev_attr_low_kb.attr,
+ &dev_attr_high_kb.attr,
NULL
};
@@ -214,34 +212,35 @@ static struct attribute_group balloon_info_group = {
.attrs = balloon_info_attrs
};
-static struct sysdev_class balloon_sysdev_class = {
- .name = BALLOON_CLASS_NAME
+static struct bus_type balloon_subsys = {
+ .name = BALLOON_CLASS_NAME,
+ .dev_name = BALLOON_CLASS_NAME,
};
-static int register_balloon(struct sys_device *sysdev)
+static int register_balloon(struct device *dev)
{
int i, error;
- error = sysdev_class_register(&balloon_sysdev_class);
+ error = subsys_system_register(&balloon_subsys, NULL);
if (error)
return error;
- sysdev->id = 0;
- sysdev->cls = &balloon_sysdev_class;
+ dev->id = 0;
+ dev->bus = &balloon_subsys;
- error = sysdev_register(sysdev);
+ error = device_register(dev);
if (error) {
- sysdev_class_unregister(&balloon_sysdev_class);
+ bus_unregister(&balloon_subsys);
return error;
}
for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
- error = sysdev_create_file(sysdev, balloon_attrs[i]);
+ error = device_create_file(dev, balloon_attrs[i]);
if (error)
goto fail;
}
- error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
+ error = sysfs_create_group(&dev->kobj, &balloon_info_group);
if (error)
goto fail;
@@ -249,9 +248,9 @@ static int register_balloon(struct sys_device *sysdev)
fail:
while (--i >= 0)
- sysdev_remove_file(sysdev, balloon_attrs[i]);
- sysdev_unregister(sysdev);
- sysdev_class_unregister(&balloon_sysdev_class);
+ device_remove_file(dev, balloon_attrs[i]);
+ device_unregister(dev);
+ bus_unregister(&balloon_subsys);
return error;
}
diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
index a8031445d94e..30d7be026c18 100644
--- a/drivers/xen/xen-pciback/conf_space.c
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -10,13 +10,13 @@
*/
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/pci.h>
#include "pciback.h"
#include "conf_space.h"
#include "conf_space_quirks.h"
-#define DRV_NAME "xen-pciback"
-static int permissive;
+static bool permissive;
module_param(permissive, bool, 0644);
/* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word,
diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c
index da3cbdfcb5dc..3daf862d739d 100644
--- a/drivers/xen/xen-pciback/conf_space_header.c
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -15,7 +15,6 @@ struct pci_bar_info {
int which;
};
-#define DRV_NAME "xen-pciback"
#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
@@ -25,7 +24,7 @@ static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
int ret;
ret = xen_pcibk_read_config_word(dev, offset, value, data);
- if (!atomic_read(&dev->enable_cnt))
+ if (!pci_is_enabled(dev))
return ret;
for (i = 0; i < PCI_ROM_RESOURCE; i++) {
@@ -187,7 +186,7 @@ static inline void read_dev_bar(struct pci_dev *dev,
bar_info->val = res[pos].start |
(res[pos].flags & PCI_REGION_FLAG_MASK);
- bar_info->len_val = res[pos].end - res[pos].start + 1;
+ bar_info->len_val = resource_size(&res[pos]);
}
static void *bar_init(struct pci_dev *dev, int offset)
diff --git a/drivers/xen/xen-pciback/conf_space_quirks.c b/drivers/xen/xen-pciback/conf_space_quirks.c
index 921a889e65eb..7476791cab40 100644
--- a/drivers/xen/xen-pciback/conf_space_quirks.c
+++ b/drivers/xen/xen-pciback/conf_space_quirks.c
@@ -12,7 +12,6 @@
#include "conf_space_quirks.h"
LIST_HEAD(xen_pcibk_quirks);
-#define DRV_NAME "xen-pciback"
static inline const struct pci_device_id *
match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
{
@@ -36,7 +35,7 @@ static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev)
goto out;
tmp_quirk = NULL;
printk(KERN_DEBUG DRV_NAME
- ":quirk didn't match any device xen_pciback knows about\n");
+ ": quirk didn't match any device known\n");
out:
return tmp_quirk;
}
diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c
index 1d32a9a42c01..828dddc360df 100644
--- a/drivers/xen/xen-pciback/passthrough.c
+++ b/drivers/xen/xen-pciback/passthrough.c
@@ -7,13 +7,13 @@
#include <linux/list.h>
#include <linux/pci.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
#include "pciback.h"
struct passthrough_dev_data {
/* Access to dev_list must be protected by lock */
struct list_head dev_list;
- spinlock_t lock;
+ struct mutex lock;
};
static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
@@ -24,9 +24,8 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
struct pci_dev_entry *dev_entry;
struct pci_dev *dev = NULL;
- unsigned long flags;
- spin_lock_irqsave(&dev_data->lock, flags);
+ mutex_lock(&dev_data->lock);
list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
@@ -37,7 +36,7 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
}
}
- spin_unlock_irqrestore(&dev_data->lock, flags);
+ mutex_unlock(&dev_data->lock);
return dev;
}
@@ -48,7 +47,6 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
{
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
struct pci_dev_entry *dev_entry;
- unsigned long flags;
unsigned int domain, bus, devfn;
int err;
@@ -57,9 +55,9 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
return -ENOMEM;
dev_entry->dev = dev;
- spin_lock_irqsave(&dev_data->lock, flags);
+ mutex_lock(&dev_data->lock);
list_add_tail(&dev_entry->list, &dev_data->dev_list);
- spin_unlock_irqrestore(&dev_data->lock, flags);
+ mutex_unlock(&dev_data->lock);
/* Publish this device. */
domain = (unsigned int)pci_domain_nr(dev->bus);
@@ -76,9 +74,8 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
struct pci_dev_entry *dev_entry, *t;
struct pci_dev *found_dev = NULL;
- unsigned long flags;
- spin_lock_irqsave(&dev_data->lock, flags);
+ mutex_lock(&dev_data->lock);
list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
if (dev_entry->dev == dev) {
@@ -88,7 +85,7 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
}
}
- spin_unlock_irqrestore(&dev_data->lock, flags);
+ mutex_unlock(&dev_data->lock);
if (found_dev)
pcistub_put_pci_dev(found_dev);
@@ -102,7 +99,7 @@ static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
if (!dev_data)
return -ENOMEM;
- spin_lock_init(&dev_data->lock);
+ mutex_init(&dev_data->lock);
INIT_LIST_HEAD(&dev_data->dev_list);
@@ -116,14 +113,14 @@ static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
{
int err = 0;
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
- struct pci_dev_entry *dev_entry, *e, *tmp;
+ struct pci_dev_entry *dev_entry, *e;
struct pci_dev *dev;
int found;
unsigned int domain, bus;
- spin_lock(&dev_data->lock);
+ mutex_lock(&dev_data->lock);
- list_for_each_entry_safe(dev_entry, tmp, &dev_data->dev_list, list) {
+ list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
/* Only publish this device as a root if none of its
* parent bridges are exported
*/
@@ -142,16 +139,13 @@ static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
bus = (unsigned int)dev_entry->dev->bus->number;
if (!found) {
- spin_unlock(&dev_data->lock);
err = publish_root_cb(pdev, domain, bus);
if (err)
break;
- spin_lock(&dev_data->lock);
}
}
- if (!err)
- spin_unlock(&dev_data->lock);
+ mutex_unlock(&dev_data->lock);
return err;
}
@@ -182,7 +176,7 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
return 1;
}
-struct xen_pcibk_backend xen_pcibk_passthrough_backend = {
+const struct xen_pcibk_backend xen_pcibk_passthrough_backend = {
.name = "passthrough",
.init = __xen_pcibk_init_devices,
.free = __xen_pcibk_release_devices,
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index aec214ac0a14..7944a17f5cbf 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -21,8 +21,6 @@
#include "conf_space.h"
#include "conf_space_quirks.h"
-#define DRV_NAME "xen-pciback"
-
static char *pci_devs_to_hide;
wait_queue_head_t xen_pcibk_aer_wait_queue;
/*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops,
@@ -101,6 +99,7 @@ static void pcistub_device_release(struct kref *kref)
kfree(pci_get_drvdata(psdev->dev));
pci_set_drvdata(psdev->dev, NULL);
+ psdev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
pci_dev_put(psdev->dev);
kfree(psdev);
@@ -222,6 +221,8 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
}
spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+ if (WARN_ON(!found_psdev))
+ return;
/*hold this lock for avoiding breaking link between
* pcistub and xen_pcibk when AER is in processing
@@ -234,6 +235,8 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
xen_pcibk_config_free_dyn_fields(found_psdev->dev);
xen_pcibk_config_reset_dev(found_psdev->dev);
+ xen_unregister_device_domain_owner(found_psdev->dev);
+
spin_lock_irqsave(&found_psdev->lock, flags);
found_psdev->pdev = NULL;
spin_unlock_irqrestore(&found_psdev->lock, flags);
@@ -331,6 +334,7 @@ static int __devinit pcistub_init_device(struct pci_dev *dev)
dev_dbg(&dev->dev, "reset device\n");
xen_pcibk_reset_device(dev);
+ dev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
return 0;
config_release:
@@ -514,12 +518,9 @@ static void kill_domain_by_device(struct pcistub_device *psdev)
int err;
char nodename[PCI_NODENAME_MAX];
- if (!psdev)
- dev_err(&psdev->dev->dev,
- "device is NULL when do AER recovery/kill_domain\n");
+ BUG_ON(!psdev);
snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0",
psdev->pdev->xdev->otherend_id);
- nodename[strlen(nodename)] = '\0';
again:
err = xenbus_transaction_start(&xbt);
@@ -605,7 +606,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
if (test_bit(_XEN_PCIF_active,
(unsigned long *)&psdev->pdev->sh_info->flags)) {
dev_dbg(&psdev->dev->dev,
- "schedule pci_conf service in xen_pcibk\n");
+ "schedule pci_conf service in " DRV_NAME "\n");
xen_pcibk_test_and_schedule_op(psdev->pdev);
}
@@ -995,8 +996,7 @@ out:
err = count;
return err;
}
-
-DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
+static DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
size_t count)
@@ -1015,8 +1015,7 @@ out:
err = count;
return err;
}
-
-DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
+static DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
{
@@ -1039,8 +1038,7 @@ static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
return count;
}
-
-DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
+static DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
{
@@ -1069,8 +1067,7 @@ static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
spin_unlock_irqrestore(&pcistub_devices_lock, flags);
return count;
}
-
-DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);
+static DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);
static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
const char *buf,
@@ -1106,7 +1103,8 @@ out:
err = count;
return err;
}
-DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch);
+static DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL,
+ pcistub_irq_handler_switch);
static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
size_t count)
@@ -1170,8 +1168,8 @@ out:
return count;
}
-
-DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add);
+static DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show,
+ pcistub_quirk_add);
static ssize_t permissive_add(struct device_driver *drv, const char *buf,
size_t count)
@@ -1236,8 +1234,8 @@ static ssize_t permissive_show(struct device_driver *drv, char *buf)
spin_unlock_irqrestore(&pcistub_devices_lock, flags);
return count;
}
-
-DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
+static DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show,
+ permissive_add);
static void pcistub_exit(void)
{
@@ -1374,3 +1372,4 @@ module_init(xen_pcibk_init);
module_exit(xen_pcibk_cleanup);
MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS("xen-backend:pci");
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
index a0e131a81503..e9b4011c5f9a 100644
--- a/drivers/xen/xen-pciback/pciback.h
+++ b/drivers/xen/xen-pciback/pciback.h
@@ -15,6 +15,8 @@
#include <linux/atomic.h>
#include <xen/interface/io/pciif.h>
+#define DRV_NAME "xen-pciback"
+
struct pci_dev_entry {
struct list_head list;
struct pci_dev *dev;
@@ -27,7 +29,7 @@ struct pci_dev_entry {
struct xen_pcibk_device {
void *pci_dev_data;
- spinlock_t dev_lock;
+ struct mutex dev_lock;
struct xenbus_device *xdev;
struct xenbus_watch be_watch;
u8 be_watching;
@@ -89,7 +91,7 @@ typedef int (*publish_pci_root_cb) (struct xen_pcibk_device *pdev,
* passthrough - BDFs are exactly like in the host.
*/
struct xen_pcibk_backend {
- char *name;
+ const char *name;
int (*init)(struct xen_pcibk_device *pdev);
void (*free)(struct xen_pcibk_device *pdev);
int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev,
@@ -104,9 +106,9 @@ struct xen_pcibk_backend {
unsigned int devfn);
};
-extern struct xen_pcibk_backend xen_pcibk_vpci_backend;
-extern struct xen_pcibk_backend xen_pcibk_passthrough_backend;
-extern struct xen_pcibk_backend *xen_pcibk_backend;
+extern const struct xen_pcibk_backend xen_pcibk_vpci_backend;
+extern const struct xen_pcibk_backend xen_pcibk_passthrough_backend;
+extern const struct xen_pcibk_backend *xen_pcibk_backend;
static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev,
@@ -116,13 +118,14 @@ static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
if (xen_pcibk_backend && xen_pcibk_backend->add)
return xen_pcibk_backend->add(pdev, dev, devid, publish_cb);
return -1;
-};
+}
+
static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev)
{
if (xen_pcibk_backend && xen_pcibk_backend->free)
return xen_pcibk_backend->release(pdev, dev);
-};
+}
static inline struct pci_dev *
xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain,
@@ -131,7 +134,8 @@ xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain,
if (xen_pcibk_backend && xen_pcibk_backend->get)
return xen_pcibk_backend->get(pdev, domain, bus, devfn);
return NULL;
-};
+}
+
/**
* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk
* before sending aer request to pcifront, so that guest could identify
@@ -148,25 +152,29 @@ static inline int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
return xen_pcibk_backend->find(pcidev, pdev, domain, bus,
devfn);
return -1;
-};
+}
+
static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
{
if (xen_pcibk_backend && xen_pcibk_backend->init)
return xen_pcibk_backend->init(pdev);
return -1;
-};
+}
+
static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
publish_pci_root_cb cb)
{
if (xen_pcibk_backend && xen_pcibk_backend->publish)
return xen_pcibk_backend->publish(pdev, cb);
return -1;
-};
+}
+
static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
{
if (xen_pcibk_backend && xen_pcibk_backend->free)
return xen_pcibk_backend->free(pdev);
-};
+}
+
/* Handles events from front-end */
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id);
void xen_pcibk_do_op(struct work_struct *data);
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
index 8c95c3415b75..63616d7453e6 100644
--- a/drivers/xen/xen-pciback/pciback_ops.c
+++ b/drivers/xen/xen-pciback/pciback_ops.c
@@ -10,7 +10,6 @@
#include <linux/sched.h>
#include "pciback.h"
-#define DRV_NAME "xen-pciback"
int verbose_request;
module_param(verbose_request, int, 0644);
diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c
index 4a42cfb0959d..46d140baebd8 100644
--- a/drivers/xen/xen-pciback/vpci.c
+++ b/drivers/xen/xen-pciback/vpci.c
@@ -8,16 +8,15 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/pci.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
#include "pciback.h"
#define PCI_SLOT_MAX 32
-#define DRV_NAME "xen-pciback"
struct vpci_dev_data {
/* Access to dev_list must be protected by lock */
struct list_head dev_list[PCI_SLOT_MAX];
- spinlock_t lock;
+ struct mutex lock;
};
static inline struct list_head *list_first(struct list_head *head)
@@ -33,13 +32,12 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev_entry *entry;
struct pci_dev *dev = NULL;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
- unsigned long flags;
if (domain != 0 || bus != 0)
return NULL;
if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
- spin_lock_irqsave(&vpci_dev->lock, flags);
+ mutex_lock(&vpci_dev->lock);
list_for_each_entry(entry,
&vpci_dev->dev_list[PCI_SLOT(devfn)],
@@ -50,7 +48,7 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
}
}
- spin_unlock_irqrestore(&vpci_dev->lock, flags);
+ mutex_unlock(&vpci_dev->lock);
}
return dev;
}
@@ -71,7 +69,6 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
int err = 0, slot, func = -1;
struct pci_dev_entry *t, *dev_entry;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
- unsigned long flags;
if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
err = -EFAULT;
@@ -90,7 +87,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
dev_entry->dev = dev;
- spin_lock_irqsave(&vpci_dev->lock, flags);
+ mutex_lock(&vpci_dev->lock);
/* Keep multi-function devices together on the virtual PCI bus */
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
@@ -129,7 +126,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
"No more space on root virtual PCI bus");
unlock:
- spin_unlock_irqrestore(&vpci_dev->lock, flags);
+ mutex_unlock(&vpci_dev->lock);
/* Publish this device. */
if (!err)
@@ -145,14 +142,13 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
int slot;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
struct pci_dev *found_dev = NULL;
- unsigned long flags;
- spin_lock_irqsave(&vpci_dev->lock, flags);
+ mutex_lock(&vpci_dev->lock);
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
- struct pci_dev_entry *e, *tmp;
- list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
- list) {
+ struct pci_dev_entry *e;
+
+ list_for_each_entry(e, &vpci_dev->dev_list[slot], list) {
if (e->dev == dev) {
list_del(&e->list);
found_dev = e->dev;
@@ -163,7 +159,7 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
}
out:
- spin_unlock_irqrestore(&vpci_dev->lock, flags);
+ mutex_unlock(&vpci_dev->lock);
if (found_dev)
pcistub_put_pci_dev(found_dev);
@@ -178,7 +174,7 @@ static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
if (!vpci_dev)
return -ENOMEM;
- spin_lock_init(&vpci_dev->lock);
+ mutex_init(&vpci_dev->lock);
for (slot = 0; slot < PCI_SLOT_MAX; slot++)
INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
@@ -222,10 +218,9 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
struct pci_dev_entry *entry;
struct pci_dev *dev = NULL;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
- unsigned long flags;
int found = 0, slot;
- spin_lock_irqsave(&vpci_dev->lock, flags);
+ mutex_lock(&vpci_dev->lock);
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
list_for_each_entry(entry,
&vpci_dev->dev_list[slot],
@@ -243,11 +238,11 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
}
}
}
- spin_unlock_irqrestore(&vpci_dev->lock, flags);
+ mutex_unlock(&vpci_dev->lock);
return found;
}
-struct xen_pcibk_backend xen_pcibk_vpci_backend = {
+const struct xen_pcibk_backend xen_pcibk_vpci_backend = {
.name = "vpci",
.init = __xen_pcibk_init_devices,
.free = __xen_pcibk_release_devices,
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 978d2c6f5dca..d5dcf8d5d3d9 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -13,11 +13,10 @@
#include <asm/xen/pci.h>
#include "pciback.h"
-#define DRV_NAME "xen-pciback"
#define INVALID_EVTCHN_IRQ (-1)
struct workqueue_struct *xen_pcibk_wq;
-static int __read_mostly passthrough;
+static bool __read_mostly passthrough;
module_param(passthrough, bool, S_IRUGO);
MODULE_PARM_DESC(passthrough,
"Option to specify how to export PCI topology to guest:\n"\
@@ -44,7 +43,7 @@ static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev)
pdev->xdev = xdev;
dev_set_drvdata(&xdev->dev, pdev);
- spin_lock_init(&pdev->dev_lock);
+ mutex_init(&pdev->dev_lock);
pdev->sh_info = NULL;
pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
@@ -62,14 +61,12 @@ out:
static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)
{
- spin_lock(&pdev->dev_lock);
-
+ mutex_lock(&pdev->dev_lock);
/* Ensure the guest can't trigger our handler before removing devices */
if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
unbind_from_irqhandler(pdev->evtchn_irq, pdev);
pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
}
- spin_unlock(&pdev->dev_lock);
/* If the driver domain started an op, make sure we complete it
* before releasing the shared memory */
@@ -77,13 +74,11 @@ static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)
/* Note, the workqueue does not use spinlocks at all.*/
flush_workqueue(xen_pcibk_wq);
- spin_lock(&pdev->dev_lock);
if (pdev->sh_info != NULL) {
xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
pdev->sh_info = NULL;
}
- spin_unlock(&pdev->dev_lock);
-
+ mutex_unlock(&pdev->dev_lock);
}
static void free_pdev(struct xen_pcibk_device *pdev)
@@ -120,9 +115,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
goto out;
}
- spin_lock(&pdev->dev_lock);
pdev->sh_info = vaddr;
- spin_unlock(&pdev->dev_lock);
err = bind_interdomain_evtchn_to_irqhandler(
pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
@@ -132,10 +125,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
"Error binding event channel to IRQ");
goto out;
}
-
- spin_lock(&pdev->dev_lock);
pdev->evtchn_irq = err;
- spin_unlock(&pdev->dev_lock);
err = 0;
dev_dbg(&pdev->xdev->dev, "Attached!\n");
@@ -150,6 +140,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
char *magic = NULL;
+ mutex_lock(&pdev->dev_lock);
/* Make sure we only do this setup once */
if (xenbus_read_driver_state(pdev->xdev->nodename) !=
XenbusStateInitialised)
@@ -176,7 +167,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
xenbus_dev_fatal(pdev->xdev, -EFAULT,
"version mismatch (%s/%s) with pcifront - "
- "halting xen_pcibk",
+ "halting " DRV_NAME,
magic, XEN_PCI_MAGIC);
goto out;
}
@@ -194,6 +185,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
out:
+ mutex_unlock(&pdev->dev_lock);
kfree(magic);
@@ -251,8 +243,8 @@ static int xen_pcibk_export_device(struct xen_pcibk_device *pdev,
dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
if (xen_register_device_domain_owner(dev,
pdev->xdev->otherend_id) != 0) {
- dev_err(&dev->dev, "device has been assigned to another " \
- "domain! Over-writting the ownership, but beware.\n");
+ dev_err(&dev->dev, "Stealing ownership from dom%d.\n",
+ xen_find_device_domain_owner(dev));
xen_unregister_device_domain_owner(dev);
xen_register_device_domain_owner(dev, pdev->xdev->otherend_id);
}
@@ -369,6 +361,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
+ mutex_lock(&pdev->dev_lock);
/* Make sure we only reconfigure once */
if (xenbus_read_driver_state(pdev->xdev->nodename) !=
XenbusStateReconfiguring)
@@ -506,6 +499,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
}
out:
+ mutex_unlock(&pdev->dev_lock);
return 0;
}
@@ -562,6 +556,7 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev)
char dev_str[64];
char state_str[64];
+ mutex_lock(&pdev->dev_lock);
/* It's possible we could get the call to setup twice, so make sure
* we're not already connected.
*/
@@ -642,10 +637,10 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev)
"Error switching to initialised state!");
out:
+ mutex_unlock(&pdev->dev_lock);
if (!err)
/* see if pcifront is already configured (if not, we'll wait) */
xen_pcibk_attach(pdev);
-
return err;
}
@@ -710,21 +705,18 @@ static int xen_pcibk_xenbus_remove(struct xenbus_device *dev)
return 0;
}
-static const struct xenbus_device_id xenpci_ids[] = {
+static const struct xenbus_device_id xen_pcibk_ids[] = {
{"pci"},
{""},
};
-static struct xenbus_driver xenbus_xen_pcibk_driver = {
- .name = DRV_NAME,
- .owner = THIS_MODULE,
- .ids = xenpci_ids,
+static DEFINE_XENBUS_DRIVER(xen_pcibk, DRV_NAME,
.probe = xen_pcibk_xenbus_probe,
.remove = xen_pcibk_xenbus_remove,
.otherend_changed = xen_pcibk_frontend_changed,
-};
+);
-struct xen_pcibk_backend *xen_pcibk_backend;
+const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend;
int __init xen_pcibk_xenbus_register(void)
{
@@ -738,11 +730,11 @@ int __init xen_pcibk_xenbus_register(void)
if (passthrough)
xen_pcibk_backend = &xen_pcibk_passthrough_backend;
pr_info(DRV_NAME ": backend is %s\n", xen_pcibk_backend->name);
- return xenbus_register_backend(&xenbus_xen_pcibk_driver);
+ return xenbus_register_backend(&xen_pcibk_driver);
}
void __exit xen_pcibk_xenbus_unregister(void)
{
destroy_workqueue(xen_pcibk_wq);
- xenbus_unregister_driver(&xenbus_xen_pcibk_driver);
+ xenbus_unregister_driver(&xen_pcibk_driver);
}
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 6ea852e25162..767ff656d5a7 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -68,10 +68,13 @@
*/
#include <linux/kernel.h>
+#include <linux/bootmem.h>
+#include <linux/swap.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/module.h>
#include <linux/workqueue.h>
+#include <linux/device.h>
#include <xen/balloon.h>
#include <xen/tmem.h>
#include <xen/xen.h>
@@ -93,6 +96,15 @@ static unsigned int selfballoon_uphysteresis __read_mostly = 1;
/* In HZ, controls frequency of worker invocation. */
static unsigned int selfballoon_interval __read_mostly = 5;
+/*
+ * Minimum usable RAM in MB for selfballooning target for balloon.
+ * If non-zero, it is added to totalreserve_pages and self-ballooning
+ * will not balloon below the sum. If zero, a piecewise linear function
+ * is calculated as a minimum and added to totalreserve_pages. Note that
+ * setting this value indiscriminately may cause OOMs and crashes.
+ */
+static unsigned int selfballoon_min_usable_mb;
+
static void selfballoon_process(struct work_struct *work);
static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process);
@@ -189,20 +201,23 @@ static int __init xen_selfballooning_setup(char *s)
__setup("selfballooning", xen_selfballooning_setup);
#endif /* CONFIG_FRONTSWAP */
+#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
+
/*
* Use current balloon size, the goal (vm_committed_as), and hysteresis
* parameters to set a new target balloon size
*/
static void selfballoon_process(struct work_struct *work)
{
- unsigned long cur_pages, goal_pages, tgt_pages;
+ unsigned long cur_pages, goal_pages, tgt_pages, floor_pages;
+ unsigned long useful_pages;
bool reset_timer = false;
if (xen_selfballooning_enabled) {
- cur_pages = balloon_stats.current_pages;
+ cur_pages = totalram_pages;
tgt_pages = cur_pages; /* default is no change */
goal_pages = percpu_counter_read_positive(&vm_committed_as) +
- balloon_stats.current_pages - totalram_pages;
+ totalreserve_pages;
#ifdef CONFIG_FRONTSWAP
/* allow space for frontswap pages to be repatriated */
if (frontswap_selfshrinking && frontswap_enabled)
@@ -217,7 +232,26 @@ static void selfballoon_process(struct work_struct *work)
((goal_pages - cur_pages) /
selfballoon_uphysteresis);
/* else if cur_pages == goal_pages, no change */
- balloon_set_new_target(tgt_pages);
+ useful_pages = max_pfn - totalreserve_pages;
+ if (selfballoon_min_usable_mb != 0)
+ floor_pages = totalreserve_pages +
+ MB2PAGES(selfballoon_min_usable_mb);
+ /* piecewise linear function ending in ~3% slope */
+ else if (useful_pages < MB2PAGES(16))
+ floor_pages = max_pfn; /* not worth ballooning */
+ else if (useful_pages < MB2PAGES(64))
+ floor_pages = totalreserve_pages + MB2PAGES(16) +
+ ((useful_pages - MB2PAGES(16)) >> 1);
+ else if (useful_pages < MB2PAGES(512))
+ floor_pages = totalreserve_pages + MB2PAGES(40) +
+ ((useful_pages - MB2PAGES(40)) >> 3);
+ else /* useful_pages >= MB2PAGES(512) */
+ floor_pages = totalreserve_pages + MB2PAGES(99) +
+ ((useful_pages - MB2PAGES(99)) >> 5);
+ if (tgt_pages < floor_pages)
+ tgt_pages = floor_pages;
+ balloon_set_new_target(tgt_pages +
+ balloon_stats.current_pages - totalram_pages);
reset_timer = true;
}
#ifdef CONFIG_FRONTSWAP
@@ -233,21 +267,20 @@ static void selfballoon_process(struct work_struct *work)
#ifdef CONFIG_SYSFS
-#include <linux/sysdev.h>
#include <linux/capability.h>
#define SELFBALLOON_SHOW(name, format, args...) \
- static ssize_t show_##name(struct sys_device *dev, \
- struct sysdev_attribute *attr, \
- char *buf) \
+ static ssize_t show_##name(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
{ \
return sprintf(buf, format, ##args); \
}
SELFBALLOON_SHOW(selfballooning, "%d\n", xen_selfballooning_enabled);
-static ssize_t store_selfballooning(struct sys_device *dev,
- struct sysdev_attribute *attr,
+static ssize_t store_selfballooning(struct device *dev,
+ struct device_attribute *attr,
const char *buf,
size_t count)
{
@@ -270,13 +303,13 @@ static ssize_t store_selfballooning(struct sys_device *dev,
return count;
}
-static SYSDEV_ATTR(selfballooning, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(selfballooning, S_IRUGO | S_IWUSR,
show_selfballooning, store_selfballooning);
SELFBALLOON_SHOW(selfballoon_interval, "%d\n", selfballoon_interval);
-static ssize_t store_selfballoon_interval(struct sys_device *dev,
- struct sysdev_attribute *attr,
+static ssize_t store_selfballoon_interval(struct device *dev,
+ struct device_attribute *attr,
const char *buf,
size_t count)
{
@@ -292,13 +325,13 @@ static ssize_t store_selfballoon_interval(struct sys_device *dev,
return count;
}
-static SYSDEV_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR,
show_selfballoon_interval, store_selfballoon_interval);
SELFBALLOON_SHOW(selfballoon_downhys, "%d\n", selfballoon_downhysteresis);
-static ssize_t store_selfballoon_downhys(struct sys_device *dev,
- struct sysdev_attribute *attr,
+static ssize_t store_selfballoon_downhys(struct device *dev,
+ struct device_attribute *attr,
const char *buf,
size_t count)
{
@@ -314,14 +347,14 @@ static ssize_t store_selfballoon_downhys(struct sys_device *dev,
return count;
}
-static SYSDEV_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR,
show_selfballoon_downhys, store_selfballoon_downhys);
SELFBALLOON_SHOW(selfballoon_uphys, "%d\n", selfballoon_uphysteresis);
-static ssize_t store_selfballoon_uphys(struct sys_device *dev,
- struct sysdev_attribute *attr,
+static ssize_t store_selfballoon_uphys(struct device *dev,
+ struct device_attribute *attr,
const char *buf,
size_t count)
{
@@ -337,14 +370,39 @@ static ssize_t store_selfballoon_uphys(struct sys_device *dev,
return count;
}
-static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR,
show_selfballoon_uphys, store_selfballoon_uphys);
+SELFBALLOON_SHOW(selfballoon_min_usable_mb, "%d\n",
+ selfballoon_min_usable_mb);
+
+static ssize_t store_selfballoon_min_usable_mb(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ unsigned long val;
+ int err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ err = strict_strtoul(buf, 10, &val);
+ if (err || val == 0)
+ return -EINVAL;
+ selfballoon_min_usable_mb = val;
+ return count;
+}
+
+static DEVICE_ATTR(selfballoon_min_usable_mb, S_IRUGO | S_IWUSR,
+ show_selfballoon_min_usable_mb,
+ store_selfballoon_min_usable_mb);
+
+
#ifdef CONFIG_FRONTSWAP
SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking);
-static ssize_t store_frontswap_selfshrinking(struct sys_device *dev,
- struct sysdev_attribute *attr,
+static ssize_t store_frontswap_selfshrinking(struct device *dev,
+ struct device_attribute *attr,
const char *buf,
size_t count)
{
@@ -366,13 +424,13 @@ static ssize_t store_frontswap_selfshrinking(struct sys_device *dev,
return count;
}
-static SYSDEV_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR,
show_frontswap_selfshrinking, store_frontswap_selfshrinking);
SELFBALLOON_SHOW(frontswap_inertia, "%d\n", frontswap_inertia);
-static ssize_t store_frontswap_inertia(struct sys_device *dev,
- struct sysdev_attribute *attr,
+static ssize_t store_frontswap_inertia(struct device *dev,
+ struct device_attribute *attr,
const char *buf,
size_t count)
{
@@ -389,13 +447,13 @@ static ssize_t store_frontswap_inertia(struct sys_device *dev,
return count;
}
-static SYSDEV_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR,
show_frontswap_inertia, store_frontswap_inertia);
SELFBALLOON_SHOW(frontswap_hysteresis, "%d\n", frontswap_hysteresis);
-static ssize_t store_frontswap_hysteresis(struct sys_device *dev,
- struct sysdev_attribute *attr,
+static ssize_t store_frontswap_hysteresis(struct device *dev,
+ struct device_attribute *attr,
const char *buf,
size_t count)
{
@@ -411,20 +469,21 @@ static ssize_t store_frontswap_hysteresis(struct sys_device *dev,
return count;
}
-static SYSDEV_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR,
show_frontswap_hysteresis, store_frontswap_hysteresis);
#endif /* CONFIG_FRONTSWAP */
static struct attribute *selfballoon_attrs[] = {
- &attr_selfballooning.attr,
- &attr_selfballoon_interval.attr,
- &attr_selfballoon_downhysteresis.attr,
- &attr_selfballoon_uphysteresis.attr,
+ &dev_attr_selfballooning.attr,
+ &dev_attr_selfballoon_interval.attr,
+ &dev_attr_selfballoon_downhysteresis.attr,
+ &dev_attr_selfballoon_uphysteresis.attr,
+ &dev_attr_selfballoon_min_usable_mb.attr,
#ifdef CONFIG_FRONTSWAP
- &attr_frontswap_selfshrinking.attr,
- &attr_frontswap_hysteresis.attr,
- &attr_frontswap_inertia.attr,
+ &dev_attr_frontswap_selfshrinking.attr,
+ &dev_attr_frontswap_hysteresis.attr,
+ &dev_attr_frontswap_inertia.attr,
#endif
NULL
};
@@ -435,12 +494,12 @@ static struct attribute_group selfballoon_group = {
};
#endif
-int register_xen_selfballooning(struct sys_device *sysdev)
+int register_xen_selfballooning(struct device *dev)
{
int error = -1;
#ifdef CONFIG_SYSFS
- error = sysfs_create_group(&sysdev->kobj, &selfballoon_group);
+ error = sysfs_create_group(&dev->kobj, &selfballoon_group);
#endif
return error;
}
diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile
index 8dca685358b4..31e2e9050c7a 100644
--- a/drivers/xen/xenbus/Makefile
+++ b/drivers/xen/xenbus/Makefile
@@ -1,4 +1,5 @@
obj-y += xenbus.o
+obj-y += xenbus_dev_frontend.o
xenbus-objs =
xenbus-objs += xenbus_client.o
@@ -9,4 +10,5 @@ xenbus-objs += xenbus_probe.o
xenbus-be-objs-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o
xenbus-objs += $(xenbus-be-objs-y)
+obj-$(CONFIG_XEN_BACKEND) += xenbus_dev_backend.o
obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index cdacf923e073..566d2adbd6ea 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -32,13 +32,39 @@
#include <linux/slab.h>
#include <linux/types.h>
+#include <linux/spinlock.h>
#include <linux/vmalloc.h>
+#include <linux/export.h>
#include <asm/xen/hypervisor.h>
+#include <asm/xen/page.h>
#include <xen/interface/xen.h>
#include <xen/interface/event_channel.h>
+#include <xen/balloon.h>
#include <xen/events.h>
#include <xen/grant_table.h>
#include <xen/xenbus.h>
+#include <xen/xen.h>
+
+#include "xenbus_probe.h"
+
+struct xenbus_map_node {
+ struct list_head next;
+ union {
+ struct vm_struct *area; /* PV */
+ struct page *page; /* HVM */
+ };
+ grant_handle_t handle;
+};
+
+static DEFINE_SPINLOCK(xenbus_valloc_lock);
+static LIST_HEAD(xenbus_valloc_pages);
+
+struct xenbus_ring_ops {
+ int (*map)(struct xenbus_device *dev, int gnt, void **vaddr);
+ int (*unmap)(struct xenbus_device *dev, void *vaddr);
+};
+
+static const struct xenbus_ring_ops *ring_ops __read_mostly;
const char *xenbus_strstate(enum xenbus_state state)
{
@@ -434,39 +460,94 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
*/
int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
{
+ return ring_ops->map(dev, gnt_ref, vaddr);
+}
+EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
+
+static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
+ int gnt_ref, void **vaddr)
+{
struct gnttab_map_grant_ref op = {
- .flags = GNTMAP_host_map,
+ .flags = GNTMAP_host_map | GNTMAP_contains_pte,
.ref = gnt_ref,
.dom = dev->otherend_id,
};
+ struct xenbus_map_node *node;
struct vm_struct *area;
+ pte_t *pte;
*vaddr = NULL;
- area = xen_alloc_vm_area(PAGE_SIZE);
- if (!area)
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
return -ENOMEM;
- op.host_addr = (unsigned long)area->addr;
+ area = alloc_vm_area(PAGE_SIZE, &pte);
+ if (!area) {
+ kfree(node);
+ return -ENOMEM;
+ }
+
+ op.host_addr = arbitrary_virt_to_machine(pte).maddr;
if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
BUG();
if (op.status != GNTST_okay) {
- xen_free_vm_area(area);
+ free_vm_area(area);
+ kfree(node);
xenbus_dev_fatal(dev, op.status,
"mapping in shared page %d from domain %d",
gnt_ref, dev->otherend_id);
return op.status;
}
- /* Stuff the handle in an unused field */
- area->phys_addr = (unsigned long)op.handle;
+ node->handle = op.handle;
+ node->area = area;
+
+ spin_lock(&xenbus_valloc_lock);
+ list_add(&node->next, &xenbus_valloc_pages);
+ spin_unlock(&xenbus_valloc_lock);
*vaddr = area->addr;
return 0;
}
-EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
+
+static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
+ int gnt_ref, void **vaddr)
+{
+ struct xenbus_map_node *node;
+ int err;
+ void *addr;
+
+ *vaddr = NULL;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */);
+ if (err)
+ goto out_err;
+
+ addr = pfn_to_kaddr(page_to_pfn(node->page));
+
+ err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr);
+ if (err)
+ goto out_err;
+
+ spin_lock(&xenbus_valloc_lock);
+ list_add(&node->next, &xenbus_valloc_pages);
+ spin_unlock(&xenbus_valloc_lock);
+
+ *vaddr = addr;
+ return 0;
+
+ out_err:
+ free_xenballooned_pages(1, &node->page);
+ kfree(node);
+ return err;
+}
/**
@@ -486,12 +567,10 @@ EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
grant_handle_t *handle, void *vaddr)
{
- struct gnttab_map_grant_ref op = {
- .host_addr = (unsigned long)vaddr,
- .flags = GNTMAP_host_map,
- .ref = gnt_ref,
- .dom = dev->otherend_id,
- };
+ struct gnttab_map_grant_ref op;
+
+ gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref,
+ dev->otherend_id);
if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
BUG();
@@ -522,46 +601,87 @@ EXPORT_SYMBOL_GPL(xenbus_map_ring);
*/
int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
{
- struct vm_struct *area;
+ return ring_ops->unmap(dev, vaddr);
+}
+EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
+
+static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
+{
+ struct xenbus_map_node *node;
struct gnttab_unmap_grant_ref op = {
.host_addr = (unsigned long)vaddr,
};
-
- /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr)
- * method so that we don't have to muck with vmalloc internals here.
- * We could force the user to hang on to their struct vm_struct from
- * xenbus_map_ring_valloc, but these 6 lines considerably simplify
- * this API.
- */
- read_lock(&vmlist_lock);
- for (area = vmlist; area != NULL; area = area->next) {
- if (area->addr == vaddr)
- break;
+ unsigned int level;
+
+ spin_lock(&xenbus_valloc_lock);
+ list_for_each_entry(node, &xenbus_valloc_pages, next) {
+ if (node->area->addr == vaddr) {
+ list_del(&node->next);
+ goto found;
+ }
}
- read_unlock(&vmlist_lock);
+ node = NULL;
+ found:
+ spin_unlock(&xenbus_valloc_lock);
- if (!area) {
+ if (!node) {
xenbus_dev_error(dev, -ENOENT,
"can't find mapped virtual address %p", vaddr);
return GNTST_bad_virt_addr;
}
- op.handle = (grant_handle_t)area->phys_addr;
+ op.handle = node->handle;
+ op.host_addr = arbitrary_virt_to_machine(
+ lookup_address((unsigned long)vaddr, &level)).maddr;
if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
BUG();
if (op.status == GNTST_okay)
- xen_free_vm_area(area);
+ free_vm_area(node->area);
else
xenbus_dev_error(dev, op.status,
"unmapping page at handle %d error %d",
- (int16_t)area->phys_addr, op.status);
+ node->handle, op.status);
+ kfree(node);
return op.status;
}
-EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
+static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
+{
+ int rv;
+ struct xenbus_map_node *node;
+ void *addr;
+
+ spin_lock(&xenbus_valloc_lock);
+ list_for_each_entry(node, &xenbus_valloc_pages, next) {
+ addr = pfn_to_kaddr(page_to_pfn(node->page));
+ if (addr == vaddr) {
+ list_del(&node->next);
+ goto found;
+ }
+ }
+ node = NULL;
+ found:
+ spin_unlock(&xenbus_valloc_lock);
+
+ if (!node) {
+ xenbus_dev_error(dev, -ENOENT,
+ "can't find mapped virtual address %p", vaddr);
+ return GNTST_bad_virt_addr;
+ }
+
+ rv = xenbus_unmap_ring(dev, node->handle, addr);
+
+ if (!rv)
+ free_xenballooned_pages(1, &node->page);
+ else
+ WARN(1, "Leaking %p\n", vaddr);
+
+ kfree(node);
+ return rv;
+}
/**
* xenbus_unmap_ring
@@ -576,10 +696,9 @@ EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
int xenbus_unmap_ring(struct xenbus_device *dev,
grant_handle_t handle, void *vaddr)
{
- struct gnttab_unmap_grant_ref op = {
- .host_addr = (unsigned long)vaddr,
- .handle = handle,
- };
+ struct gnttab_unmap_grant_ref op;
+
+ gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle);
if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
BUG();
@@ -611,3 +730,21 @@ enum xenbus_state xenbus_read_driver_state(const char *path)
return result;
}
EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
+
+static const struct xenbus_ring_ops ring_ops_pv = {
+ .map = xenbus_map_ring_valloc_pv,
+ .unmap = xenbus_unmap_ring_vfree_pv,
+};
+
+static const struct xenbus_ring_ops ring_ops_hvm = {
+ .map = xenbus_map_ring_valloc_hvm,
+ .unmap = xenbus_unmap_ring_vfree_hvm,
+};
+
+void __init xenbus_ring_ops_init(void)
+{
+ if (xen_pv_domain())
+ ring_ops = &ring_ops_pv;
+ else
+ ring_ops = &ring_ops_hvm;
+}
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
index 090c61ee8fd0..2eff7a6aaa20 100644
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -212,7 +212,9 @@ int xb_init_comms(void)
printk(KERN_WARNING "XENBUS response ring is not quiescent "
"(%08x:%08x): fixing up\n",
intf->rsp_cons, intf->rsp_prod);
- intf->rsp_cons = intf->rsp_prod;
+ /* breaks kdump */
+ if (!reset_devices)
+ intf->rsp_cons = intf->rsp_prod;
}
if (xenbus_irq) {
diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h
index c21db7513736..6e42800fa499 100644
--- a/drivers/xen/xenbus/xenbus_comms.h
+++ b/drivers/xen/xenbus/xenbus_comms.h
@@ -31,6 +31,8 @@
#ifndef _XENBUS_COMMS_H
#define _XENBUS_COMMS_H
+#include <linux/fs.h>
+
int xs_init(void);
int xb_init_comms(void);
@@ -43,4 +45,6 @@ int xs_input_avail(void);
extern struct xenstore_domain_interface *xen_store_interface;
extern int xen_store_evtchn;
+extern const struct file_operations xen_xenbus_fops;
+
#endif /* _XENBUS_COMMS_H */
diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c
new file mode 100644
index 000000000000..3d3be78c1093
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_dev_backend.c
@@ -0,0 +1,90 @@
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/capability.h>
+
+#include <xen/xen.h>
+#include <xen/page.h>
+#include <xen/xenbus_dev.h>
+
+#include "xenbus_comms.h"
+
+MODULE_LICENSE("GPL");
+
+static int xenbus_backend_open(struct inode *inode, struct file *filp)
+{
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ return nonseekable_open(inode, filp);
+}
+
+static long xenbus_backend_ioctl(struct file *file, unsigned int cmd, unsigned long data)
+{
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IOCTL_XENBUS_BACKEND_EVTCHN:
+ if (xen_store_evtchn > 0)
+ return xen_store_evtchn;
+ return -ENODEV;
+
+ default:
+ return -ENOTTY;
+ }
+}
+
+static int xenbus_backend_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ size_t size = vma->vm_end - vma->vm_start;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0))
+ return -EINVAL;
+
+ if (remap_pfn_range(vma, vma->vm_start,
+ virt_to_pfn(xen_store_interface),
+ size, vma->vm_page_prot))
+ return -EAGAIN;
+
+ return 0;
+}
+
+const struct file_operations xenbus_backend_fops = {
+ .open = xenbus_backend_open,
+ .mmap = xenbus_backend_mmap,
+ .unlocked_ioctl = xenbus_backend_ioctl,
+};
+
+static struct miscdevice xenbus_backend_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "xen/xenbus_backend",
+ .fops = &xenbus_backend_fops,
+};
+
+static int __init xenbus_backend_init(void)
+{
+ int err;
+
+ if (!xen_initial_domain())
+ return -ENODEV;
+
+ err = misc_register(&xenbus_backend_dev);
+ if (err)
+ printk(KERN_ERR "Could not register xenbus backend device\n");
+ return err;
+}
+
+static void __exit xenbus_backend_exit(void)
+{
+ misc_deregister(&xenbus_backend_dev);
+}
+
+module_init(xenbus_backend_init);
+module_exit(xenbus_backend_exit);
diff --git a/drivers/xen/xenfs/xenbus.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index bbd000f88af7..527dc2a3b89f 100644
--- a/drivers/xen/xenfs/xenbus.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -52,13 +52,17 @@
#include <linux/namei.h>
#include <linux/string.h>
#include <linux/slab.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
-#include "xenfs.h"
-#include "../xenbus/xenbus_comms.h"
+#include "xenbus_comms.h"
#include <xen/xenbus.h>
+#include <xen/xen.h>
#include <asm/xen/hypervisor.h>
+MODULE_LICENSE("GPL");
+
/*
* An element of a list of outstanding transactions, for which we're
* still waiting a reply.
@@ -101,7 +105,7 @@ struct xenbus_file_priv {
unsigned int len;
union {
struct xsd_sockmsg msg;
- char buffer[PAGE_SIZE];
+ char buffer[XENSTORE_PAYLOAD_MAX];
} u;
/* Response queue. */
@@ -583,7 +587,7 @@ static unsigned int xenbus_file_poll(struct file *file, poll_table *wait)
return 0;
}
-const struct file_operations xenbus_file_ops = {
+const struct file_operations xen_xenbus_fops = {
.read = xenbus_file_read,
.write = xenbus_file_write,
.open = xenbus_file_open,
@@ -591,3 +595,31 @@ const struct file_operations xenbus_file_ops = {
.poll = xenbus_file_poll,
.llseek = no_llseek,
};
+EXPORT_SYMBOL_GPL(xen_xenbus_fops);
+
+static struct miscdevice xenbus_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "xen/xenbus",
+ .fops = &xen_xenbus_fops,
+};
+
+static int __init xenbus_init(void)
+{
+ int err;
+
+ if (!xen_domain())
+ return -ENODEV;
+
+ err = misc_register(&xenbus_dev);
+ if (err)
+ printk(KERN_ERR "Could not register xenbus frontend device\n");
+ return err;
+}
+
+static void __exit xenbus_exit(void)
+{
+ misc_deregister(&xenbus_dev);
+}
+
+module_init(xenbus_init);
+module_exit(xenbus_exit);
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index bd2f90c9ac8b..3864967202b5 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -46,6 +46,7 @@
#include <linux/mutex.h>
#include <linux/io.h>
#include <linux/slab.h>
+#include <linux/module.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -290,14 +291,9 @@ void xenbus_dev_shutdown(struct device *_dev)
EXPORT_SYMBOL_GPL(xenbus_dev_shutdown);
int xenbus_register_driver_common(struct xenbus_driver *drv,
- struct xen_bus_type *bus,
- struct module *owner,
- const char *mod_name)
+ struct xen_bus_type *bus)
{
- drv->driver.name = drv->name;
drv->driver.bus = &bus->bus;
- drv->driver.owner = owner;
- drv->driver.mod_name = mod_name;
return driver_register(&drv->driver);
}
@@ -309,8 +305,7 @@ void xenbus_unregister_driver(struct xenbus_driver *drv)
}
EXPORT_SYMBOL_GPL(xenbus_unregister_driver);
-struct xb_find_info
-{
+struct xb_find_info {
struct xenbus_device *dev;
const char *nodename;
};
@@ -639,7 +634,7 @@ int xenbus_dev_cancel(struct device *dev)
EXPORT_SYMBOL_GPL(xenbus_dev_cancel);
/* A flag to determine if xenstored is 'ready' (i.e. has started) */
-int xenstored_ready = 0;
+int xenstored_ready;
int register_xenstore_notifier(struct notifier_block *nb)
@@ -684,64 +679,76 @@ static int __init xenbus_probe_initcall(void)
device_initcall(xenbus_probe_initcall);
-static int __init xenbus_init(void)
+/* Set up event channel for xenstored which is run as a local process
+ * (this is normally used only in dom0)
+ */
+static int __init xenstored_local_init(void)
{
int err = 0;
unsigned long page = 0;
+ struct evtchn_alloc_unbound alloc_unbound;
- DPRINTK("");
+ /* Allocate Xenstore page */
+ page = get_zeroed_page(GFP_KERNEL);
+ if (!page)
+ goto out_err;
- err = -ENODEV;
- if (!xen_domain())
- return err;
+ xen_store_mfn = xen_start_info->store_mfn =
+ pfn_to_mfn(virt_to_phys((void *)page) >>
+ PAGE_SHIFT);
- /*
- * Domain0 doesn't have a store_evtchn or store_mfn yet.
- */
- if (xen_initial_domain()) {
- struct evtchn_alloc_unbound alloc_unbound;
+ /* Next allocate a local port which xenstored can bind to */
+ alloc_unbound.dom = DOMID_SELF;
+ alloc_unbound.remote_dom = DOMID_SELF;
- /* Allocate Xenstore page */
- page = get_zeroed_page(GFP_KERNEL);
- if (!page)
- goto out_error;
+ err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+ &alloc_unbound);
+ if (err == -ENOSYS)
+ goto out_err;
- xen_store_mfn = xen_start_info->store_mfn =
- pfn_to_mfn(virt_to_phys((void *)page) >>
- PAGE_SHIFT);
+ BUG_ON(err);
+ xen_store_evtchn = xen_start_info->store_evtchn =
+ alloc_unbound.port;
- /* Next allocate a local port which xenstored can bind to */
- alloc_unbound.dom = DOMID_SELF;
- alloc_unbound.remote_dom = 0;
+ return 0;
- err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
- &alloc_unbound);
- if (err == -ENOSYS)
- goto out_error;
+ out_err:
+ if (page != 0)
+ free_page(page);
+ return err;
+}
- BUG_ON(err);
- xen_store_evtchn = xen_start_info->store_evtchn =
- alloc_unbound.port;
+static int __init xenbus_init(void)
+{
+ int err = 0;
- xen_store_interface = mfn_to_virt(xen_store_mfn);
+ if (!xen_domain())
+ return -ENODEV;
+
+ xenbus_ring_ops_init();
+
+ if (xen_hvm_domain()) {
+ uint64_t v = 0;
+ err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
+ if (err)
+ goto out_error;
+ xen_store_evtchn = (int)v;
+ err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
+ if (err)
+ goto out_error;
+ xen_store_mfn = (unsigned long)v;
+ xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
} else {
- if (xen_hvm_domain()) {
- uint64_t v = 0;
- err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
- if (err)
- goto out_error;
- xen_store_evtchn = (int)v;
- err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
+ xen_store_evtchn = xen_start_info->store_evtchn;
+ xen_store_mfn = xen_start_info->store_mfn;
+ if (xen_store_evtchn)
+ xenstored_ready = 1;
+ else {
+ err = xenstored_local_init();
if (err)
goto out_error;
- xen_store_mfn = (unsigned long)v;
- xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
- } else {
- xen_store_evtchn = xen_start_info->store_evtchn;
- xen_store_mfn = xen_start_info->store_mfn;
- xen_store_interface = mfn_to_virt(xen_store_mfn);
- xenstored_ready = 1;
}
+ xen_store_interface = mfn_to_virt(xen_store_mfn);
}
/* Initialize the interface to xenstore. */
@@ -760,12 +767,7 @@ static int __init xenbus_init(void)
proc_mkdir("xen", NULL);
#endif
- return 0;
-
- out_error:
- if (page != 0)
- free_page(page);
-
+out_error:
return err;
}
diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h
index b814935378c7..bb4f92ed8730 100644
--- a/drivers/xen/xenbus/xenbus_probe.h
+++ b/drivers/xen/xenbus/xenbus_probe.h
@@ -36,8 +36,7 @@
#define XEN_BUS_ID_SIZE 20
-struct xen_bus_type
-{
+struct xen_bus_type {
char *root;
unsigned int levels;
int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename);
@@ -54,9 +53,7 @@ extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
extern int xenbus_dev_probe(struct device *_dev);
extern int xenbus_dev_remove(struct device *_dev);
extern int xenbus_register_driver_common(struct xenbus_driver *drv,
- struct xen_bus_type *bus,
- struct module *owner,
- const char *mod_name);
+ struct xen_bus_type *bus);
extern int xenbus_probe_node(struct xen_bus_type *bus,
const char *type,
const char *nodename);
@@ -77,4 +74,6 @@ extern void xenbus_otherend_changed(struct xenbus_watch *watch,
extern int xenbus_read_otherend_details(struct xenbus_device *xendev,
char *id_node, char *path_node);
+void xenbus_ring_ops_init(void);
+
#endif
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
index 60adf919d78d..257be37d9091 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -42,6 +42,7 @@
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/notifier.h>
+#include <linux/export.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -104,8 +105,6 @@ static int xenbus_uevent_backend(struct device *dev,
xdev = to_xenbus_device(dev);
bus = container_of(xdev->dev.bus, struct xen_bus_type, bus);
- if (xdev == NULL)
- return -ENODEV;
if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype))
return -ENOMEM;
@@ -233,15 +232,13 @@ int xenbus_dev_is_online(struct xenbus_device *dev)
}
EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
-int __xenbus_register_backend(struct xenbus_driver *drv,
- struct module *owner, const char *mod_name)
+int xenbus_register_backend(struct xenbus_driver *drv)
{
drv->read_otherend_details = read_frontend_details;
- return xenbus_register_driver_common(drv, &xenbus_backend,
- owner, mod_name);
+ return xenbus_register_driver_common(drv, &xenbus_backend);
}
-EXPORT_SYMBOL_GPL(__xenbus_register_backend);
+EXPORT_SYMBOL_GPL(xenbus_register_backend);
static int backend_probe_and_watch(struct notifier_block *notifier,
unsigned long event,
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index ed2ba474a560..9c57819df51a 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -13,6 +13,7 @@
#include <linux/kthread.h>
#include <linux/mutex.h>
#include <linux/io.h>
+#include <linux/module.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -229,15 +230,13 @@ static void wait_for_devices(struct xenbus_driver *xendrv)
print_device_status);
}
-int __xenbus_register_frontend(struct xenbus_driver *drv,
- struct module *owner, const char *mod_name)
+int xenbus_register_frontend(struct xenbus_driver *drv)
{
int ret;
drv->read_otherend_details = read_backend_details;
- ret = xenbus_register_driver_common(drv, &xenbus_frontend,
- owner, mod_name);
+ ret = xenbus_register_driver_common(drv, &xenbus_frontend);
if (ret)
return ret;
@@ -246,12 +245,133 @@ int __xenbus_register_frontend(struct xenbus_driver *drv,
return 0;
}
-EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
+EXPORT_SYMBOL_GPL(xenbus_register_frontend);
+
+static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq);
+static int backend_state;
+
+static void xenbus_reset_backend_state_changed(struct xenbus_watch *w,
+ const char **v, unsigned int l)
+{
+ xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state);
+ printk(KERN_DEBUG "XENBUS: backend %s %s\n",
+ v[XS_WATCH_PATH], xenbus_strstate(backend_state));
+ wake_up(&backend_state_wq);
+}
+
+static void xenbus_reset_wait_for_backend(char *be, int expected)
+{
+ long timeout;
+ timeout = wait_event_interruptible_timeout(backend_state_wq,
+ backend_state == expected, 5 * HZ);
+ if (timeout <= 0)
+ printk(KERN_INFO "XENBUS: backend %s timed out.\n", be);
+}
+
+/*
+ * Reset frontend if it is in Connected or Closed state.
+ * Wait for backend to catch up.
+ * State Connected happens during kdump, Closed after kexec.
+ */
+static void xenbus_reset_frontend(char *fe, char *be, int be_state)
+{
+ struct xenbus_watch be_watch;
+
+ printk(KERN_DEBUG "XENBUS: backend %s %s\n",
+ be, xenbus_strstate(be_state));
+
+ memset(&be_watch, 0, sizeof(be_watch));
+ be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be);
+ if (!be_watch.node)
+ return;
+
+ be_watch.callback = xenbus_reset_backend_state_changed;
+ backend_state = XenbusStateUnknown;
+
+ printk(KERN_INFO "XENBUS: triggering reconnect on %s\n", be);
+ register_xenbus_watch(&be_watch);
+
+ /* fall through to forward backend to state XenbusStateInitialising */
+ switch (be_state) {
+ case XenbusStateConnected:
+ xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing);
+ xenbus_reset_wait_for_backend(be, XenbusStateClosing);
+
+ case XenbusStateClosing:
+ xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed);
+ xenbus_reset_wait_for_backend(be, XenbusStateClosed);
+
+ case XenbusStateClosed:
+ xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising);
+ xenbus_reset_wait_for_backend(be, XenbusStateInitWait);
+ }
+
+ unregister_xenbus_watch(&be_watch);
+ printk(KERN_INFO "XENBUS: reconnect done on %s\n", be);
+ kfree(be_watch.node);
+}
+
+static void xenbus_check_frontend(char *class, char *dev)
+{
+ int be_state, fe_state, err;
+ char *backend, *frontend;
+
+ frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev);
+ if (!frontend)
+ return;
+
+ err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state);
+ if (err != 1)
+ goto out;
+
+ switch (fe_state) {
+ case XenbusStateConnected:
+ case XenbusStateClosed:
+ printk(KERN_DEBUG "XENBUS: frontend %s %s\n",
+ frontend, xenbus_strstate(fe_state));
+ backend = xenbus_read(XBT_NIL, frontend, "backend", NULL);
+ if (!backend || IS_ERR(backend))
+ goto out;
+ err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state);
+ if (err == 1)
+ xenbus_reset_frontend(frontend, backend, be_state);
+ kfree(backend);
+ break;
+ default:
+ break;
+ }
+out:
+ kfree(frontend);
+}
+
+static void xenbus_reset_state(void)
+{
+ char **devclass, **dev;
+ int devclass_n, dev_n;
+ int i, j;
+
+ devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n);
+ if (IS_ERR(devclass))
+ return;
+
+ for (i = 0; i < devclass_n; i++) {
+ dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n);
+ if (IS_ERR(dev))
+ continue;
+ for (j = 0; j < dev_n; j++)
+ xenbus_check_frontend(devclass[i], dev[j]);
+ kfree(dev);
+ }
+ kfree(devclass);
+}
static int frontend_probe_and_watch(struct notifier_block *notifier,
unsigned long event,
void *data)
{
+ /* reset devices in Connected or Closed state */
+ if (xen_hvm_domain())
+ xenbus_reset_state();
/* Enumerate devices in xenstore and watch for changes. */
xenbus_probe_devices(&xenbus_frontend);
register_xenbus_watch(&fe_watch);
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 5534690075af..d1c217b23a42 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -45,6 +45,7 @@
#include <linux/module.h>
#include <linux/mutex.h>
#include <xen/xenbus.h>
+#include <xen/xen.h>
#include "xenbus_comms.h"
struct xs_stored_msg {
@@ -531,21 +532,18 @@ int xenbus_printf(struct xenbus_transaction t,
{
va_list ap;
int ret;
-#define PRINTF_BUFFER_SIZE 4096
- char *printf_buffer;
-
- printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_NOIO | __GFP_HIGH);
- if (printf_buffer == NULL)
- return -ENOMEM;
+ char *buf;
va_start(ap, fmt);
- ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
+ buf = kvasprintf(GFP_NOIO | __GFP_HIGH, fmt, ap);
va_end(ap);
- BUG_ON(ret > PRINTF_BUFFER_SIZE-1);
- ret = xenbus_write(t, dir, node, printf_buffer);
+ if (!buf)
+ return -ENOMEM;
- kfree(printf_buffer);
+ ret = xenbus_write(t, dir, node, buf);
+
+ kfree(buf);
return ret;
}
@@ -638,8 +636,7 @@ int register_xenbus_watch(struct xenbus_watch *watch)
err = xs_watch(watch->node, token);
- /* Ignore errors due to multiple registration. */
- if ((err != 0) && (err != -EEXIST)) {
+ if (err) {
spin_lock(&watches_lock);
list_del(&watch->list);
spin_unlock(&watches_lock);
@@ -801,6 +798,12 @@ static int process_msg(void)
goto out;
}
+ if (msg->hdr.len > XENSTORE_PAYLOAD_MAX) {
+ kfree(msg);
+ err = -EINVAL;
+ goto out;
+ }
+
body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH);
if (body == NULL) {
kfree(msg);
diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile
index 4fde9440fe1f..b019865fcc56 100644
--- a/drivers/xen/xenfs/Makefile
+++ b/drivers/xen/xenfs/Makefile
@@ -1,4 +1,4 @@
obj-$(CONFIG_XENFS) += xenfs.o
-xenfs-y = super.o xenbus.o privcmd.o
+xenfs-y = super.o
xenfs-$(CONFIG_XEN_DOM0) += xenstored.o
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index 1aa389719846..a84b53c01436 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -16,6 +16,8 @@
#include <xen/xen.h>
#include "xenfs.h"
+#include "../privcmd.h"
+#include "../xenbus/xenbus_comms.h"
#include <asm/xen/hypervisor.h>
@@ -82,9 +84,9 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
{
static struct tree_descr xenfs_files[] = {
[1] = {},
- { "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR },
+ { "xenbus", &xen_xenbus_fops, S_IRUSR|S_IWUSR },
{ "capabilities", &capabilities_file_ops, S_IRUGO },
- { "privcmd", &privcmd_file_ops, S_IRUSR|S_IWUSR },
+ { "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR },
{""},
};
int rc;
diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
index b68aa6200003..6b80c7779c02 100644
--- a/drivers/xen/xenfs/xenfs.h
+++ b/drivers/xen/xenfs/xenfs.h
@@ -1,8 +1,6 @@
#ifndef _XENFS_XENBUS_H
#define _XENFS_XENBUS_H
-extern const struct file_operations xenbus_file_ops;
-extern const struct file_operations privcmd_file_ops;
extern const struct file_operations xsd_kva_file_ops;
extern const struct file_operations xsd_port_file_ops;