summaryrefslogtreecommitdiff
path: root/drivers/xen
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/cpu_hotplug.c7
-rw-r--r--drivers/xen/events/events_base.c1
-rw-r--r--drivers/xen/grant-table.c8
-rw-r--r--drivers/xen/manage.c8
-rw-r--r--drivers/xen/platform-pci.c71
-rw-r--r--drivers/xen/privcmd.c226
-rw-r--r--drivers/xen/swiotlb-xen.c13
-rw-r--r--drivers/xen/xen-balloon.c2
-rw-r--r--drivers/xen/xen-pciback/xenbus.c2
-rw-r--r--drivers/xen/xenbus/xenbus.h135
-rw-r--r--drivers/xen/xenbus/xenbus_client.c45
-rw-r--r--drivers/xen/xenbus/xenbus_comms.c309
-rw-r--r--drivers/xen/xenbus/xenbus_comms.h51
-rw-r--r--drivers/xen/xenbus/xenbus_dev_backend.c2
-rw-r--r--drivers/xen/xenbus/xenbus_dev_frontend.c213
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c14
-rw-r--r--drivers/xen/xenbus/xenbus_probe.h88
-rw-r--r--drivers/xen/xenbus/xenbus_probe_backend.c11
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c17
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c526
-rw-r--r--drivers/xen/xenfs/super.c2
-rw-r--r--drivers/xen/xenfs/xenstored.c2
22 files changed, 1121 insertions, 632 deletions
diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
index 5676aefdf2bc..0003912a8111 100644
--- a/drivers/xen/cpu_hotplug.c
+++ b/drivers/xen/cpu_hotplug.c
@@ -68,13 +68,12 @@ static void vcpu_hotplug(unsigned int cpu)
}
static void handle_vcpu_hotplug_event(struct xenbus_watch *watch,
- const char **vec, unsigned int len)
+ const char *path, const char *token)
{
unsigned int cpu;
char *cpustr;
- const char *node = vec[XS_WATCH_PATH];
- cpustr = strstr(node, "cpu/");
+ cpustr = strstr(path, "cpu/");
if (cpustr != NULL) {
sscanf(cpustr, "cpu/%u", &cpu);
vcpu_hotplug(cpu);
@@ -107,7 +106,7 @@ static int __init setup_vcpu_hotplug_event(void)
.notifier_call = setup_cpu_watcher };
#ifdef CONFIG_X86
- if (!xen_pv_domain())
+ if (!xen_pv_domain() && !xen_pvh_domain())
#else
if (!xen_domain())
#endif
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index fd8e872d2943..6a53577772c9 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1704,7 +1704,6 @@ void __init xen_init_IRQ(void)
pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map);
rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
- /* TODO: No PVH support for PIRQ EOI */
if (rc != 0) {
free_page((unsigned long) pirq_eoi_map);
pirq_eoi_map = NULL;
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index bb36b1e1dbcc..d6786b87e13b 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -1146,13 +1146,13 @@ EXPORT_SYMBOL_GPL(gnttab_init);
static int __gnttab_init(void)
{
+ if (!xen_domain())
+ return -ENODEV;
+
/* Delay grant-table initialization in the PV on HVM case */
- if (xen_hvm_domain())
+ if (xen_hvm_domain() && !xen_pvh_domain())
return 0;
- if (!xen_pv_domain())
- return -ENODEV;
-
return gnttab_init();
}
/* Starts after core_initcall so that xen_pvh_gnttab_setup can be called
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 26e5e8507f03..c1ec8ee80924 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -218,7 +218,7 @@ static struct shutdown_handler shutdown_handlers[] = {
};
static void shutdown_handler(struct xenbus_watch *watch,
- const char **vec, unsigned int len)
+ const char *path, const char *token)
{
char *str;
struct xenbus_transaction xbt;
@@ -266,8 +266,8 @@ static void shutdown_handler(struct xenbus_watch *watch,
}
#ifdef CONFIG_MAGIC_SYSRQ
-static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
- unsigned int len)
+static void sysrq_handler(struct xenbus_watch *watch, const char *path,
+ const char *token)
{
char sysrq_key = '\0';
struct xenbus_transaction xbt;
@@ -277,7 +277,7 @@ static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
err = xenbus_transaction_start(&xbt);
if (err)
return;
- if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
+ if (xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key) < 0) {
pr_err("Unable to read sysrq code in control/sysrq\n");
xenbus_transaction_end(xbt, 1);
return;
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index 112ce422dc22..2a165cc8a43c 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -42,6 +42,7 @@
static unsigned long platform_mmio;
static unsigned long platform_mmio_alloc;
static unsigned long platform_mmiolen;
+static uint64_t callback_via;
static unsigned long alloc_xen_mmio(unsigned long len)
{
@@ -54,6 +55,51 @@ static unsigned long alloc_xen_mmio(unsigned long len)
return addr;
}
+static uint64_t get_callback_via(struct pci_dev *pdev)
+{
+ u8 pin;
+ int irq;
+
+ irq = pdev->irq;
+ if (irq < 16)
+ return irq; /* ISA IRQ */
+
+ pin = pdev->pin;
+
+ /* We don't know the GSI. Specify the PCI INTx line instead. */
+ return ((uint64_t)0x01 << HVM_CALLBACK_VIA_TYPE_SHIFT) | /* PCI INTx identifier */
+ ((uint64_t)pci_domain_nr(pdev->bus) << 32) |
+ ((uint64_t)pdev->bus->number << 16) |
+ ((uint64_t)(pdev->devfn & 0xff) << 8) |
+ ((uint64_t)(pin - 1) & 3);
+}
+
+static irqreturn_t do_hvm_evtchn_intr(int irq, void *dev_id)
+{
+ xen_hvm_evtchn_do_upcall();
+ return IRQ_HANDLED;
+}
+
+static int xen_allocate_irq(struct pci_dev *pdev)
+{
+ return request_irq(pdev->irq, do_hvm_evtchn_intr,
+ IRQF_NOBALANCING | IRQF_TRIGGER_RISING,
+ "xen-platform-pci", pdev);
+}
+
+static int platform_pci_resume(struct pci_dev *pdev)
+{
+ int err;
+ if (!xen_pv_domain())
+ return 0;
+ err = xen_set_callback_via(callback_via);
+ if (err) {
+ dev_err(&pdev->dev, "platform_pci_resume failure!\n");
+ return err;
+ }
+ return 0;
+}
+
static int platform_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
@@ -92,6 +138,28 @@ static int platform_pci_probe(struct pci_dev *pdev,
platform_mmio = mmio_addr;
platform_mmiolen = mmio_len;
+ /*
+ * Xen HVM guests always use the vector callback mechanism.
+ * L1 Dom0 in a nested Xen environment is a PV guest inside in an
+ * HVM environment. It needs the platform-pci driver to get
+ * notifications from L0 Xen, but it cannot use the vector callback
+ * as it is not exported by L1 Xen.
+ */
+ if (xen_pv_domain()) {
+ ret = xen_allocate_irq(pdev);
+ if (ret) {
+ dev_warn(&pdev->dev, "request_irq failed err=%d\n", ret);
+ goto out;
+ }
+ callback_via = get_callback_via(pdev);
+ ret = xen_set_callback_via(callback_via);
+ if (ret) {
+ dev_warn(&pdev->dev, "Unable to set the evtchn callback "
+ "err=%d\n", ret);
+ goto out;
+ }
+ }
+
max_nr_gframes = gnttab_max_grant_frames();
grant_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
ret = gnttab_setup_auto_xlat_frames(grant_frames);
@@ -123,6 +191,9 @@ static struct pci_driver platform_driver = {
.name = DRV_NAME,
.probe = platform_pci_probe,
.id_table = platform_pci_tbl,
+#ifdef CONFIG_PM
+ .resume_early = platform_pci_resume,
+#endif
};
builtin_pci_driver(platform_driver);
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 6e3306f4a525..2077a3ac7c0c 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -22,6 +22,7 @@
#include <linux/pagemap.h>
#include <linux/seq_file.h>
#include <linux/miscdevice.h>
+#include <linux/moduleparam.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
@@ -32,6 +33,7 @@
#include <xen/xen.h>
#include <xen/privcmd.h>
#include <xen/interface/xen.h>
+#include <xen/interface/hvm/dm_op.h>
#include <xen/features.h>
#include <xen/page.h>
#include <xen/xen-ops.h>
@@ -43,16 +45,36 @@ MODULE_LICENSE("GPL");
#define PRIV_VMA_LOCKED ((void *)1)
+static unsigned int privcmd_dm_op_max_num = 16;
+module_param_named(dm_op_max_nr_bufs, privcmd_dm_op_max_num, uint, 0644);
+MODULE_PARM_DESC(dm_op_max_nr_bufs,
+ "Maximum number of buffers per dm_op hypercall");
+
+static unsigned int privcmd_dm_op_buf_max_size = 4096;
+module_param_named(dm_op_buf_max_size, privcmd_dm_op_buf_max_size, uint,
+ 0644);
+MODULE_PARM_DESC(dm_op_buf_max_size,
+ "Maximum size of a dm_op hypercall buffer");
+
+struct privcmd_data {
+ domid_t domid;
+};
+
static int privcmd_vma_range_is_mapped(
struct vm_area_struct *vma,
unsigned long addr,
unsigned long nr_pages);
-static long privcmd_ioctl_hypercall(void __user *udata)
+static long privcmd_ioctl_hypercall(struct file *file, void __user *udata)
{
+ struct privcmd_data *data = file->private_data;
struct privcmd_hypercall hypercall;
long ret;
+ /* Disallow arbitrary hypercalls if restricted */
+ if (data->domid != DOMID_INVALID)
+ return -EPERM;
+
if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
return -EFAULT;
@@ -229,8 +251,9 @@ static int mmap_gfn_range(void *data, void *state)
return 0;
}
-static long privcmd_ioctl_mmap(void __user *udata)
+static long privcmd_ioctl_mmap(struct file *file, void __user *udata)
{
+ struct privcmd_data *data = file->private_data;
struct privcmd_mmap mmapcmd;
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
@@ -245,6 +268,10 @@ static long privcmd_ioctl_mmap(void __user *udata)
if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
return -EFAULT;
+ /* If restriction is in place, check the domid matches */
+ if (data->domid != DOMID_INVALID && data->domid != mmapcmd.dom)
+ return -EPERM;
+
rc = gather_array(&pagelist,
mmapcmd.num, sizeof(struct privcmd_mmap_entry),
mmapcmd.entry);
@@ -416,8 +443,10 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
static const struct vm_operations_struct privcmd_vm_ops;
-static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
+static long privcmd_ioctl_mmap_batch(
+ struct file *file, void __user *udata, int version)
{
+ struct privcmd_data *data = file->private_data;
int ret;
struct privcmd_mmapbatch_v2 m;
struct mm_struct *mm = current->mm;
@@ -446,6 +475,10 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
return -EINVAL;
}
+ /* If restriction is in place, check the domid matches */
+ if (data->domid != DOMID_INVALID && data->domid != m.dom)
+ return -EPERM;
+
nr_pages = DIV_ROUND_UP(m.num, XEN_PFN_PER_PAGE);
if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
return -EINVAL;
@@ -548,37 +581,210 @@ out_unlock:
goto out;
}
+static int lock_pages(
+ struct privcmd_dm_op_buf kbufs[], unsigned int num,
+ struct page *pages[], unsigned int nr_pages)
+{
+ unsigned int i;
+
+ for (i = 0; i < num; i++) {
+ unsigned int requested;
+ int pinned;
+
+ requested = DIV_ROUND_UP(
+ offset_in_page(kbufs[i].uptr) + kbufs[i].size,
+ PAGE_SIZE);
+ if (requested > nr_pages)
+ return -ENOSPC;
+
+ pinned = get_user_pages_fast(
+ (unsigned long) kbufs[i].uptr,
+ requested, FOLL_WRITE, pages);
+ if (pinned < 0)
+ return pinned;
+
+ nr_pages -= pinned;
+ pages += pinned;
+ }
+
+ return 0;
+}
+
+static void unlock_pages(struct page *pages[], unsigned int nr_pages)
+{
+ unsigned int i;
+
+ if (!pages)
+ return;
+
+ for (i = 0; i < nr_pages; i++) {
+ if (pages[i])
+ put_page(pages[i]);
+ }
+}
+
+static long privcmd_ioctl_dm_op(struct file *file, void __user *udata)
+{
+ struct privcmd_data *data = file->private_data;
+ struct privcmd_dm_op kdata;
+ struct privcmd_dm_op_buf *kbufs;
+ unsigned int nr_pages = 0;
+ struct page **pages = NULL;
+ struct xen_dm_op_buf *xbufs = NULL;
+ unsigned int i;
+ long rc;
+
+ if (copy_from_user(&kdata, udata, sizeof(kdata)))
+ return -EFAULT;
+
+ /* If restriction is in place, check the domid matches */
+ if (data->domid != DOMID_INVALID && data->domid != kdata.dom)
+ return -EPERM;
+
+ if (kdata.num == 0)
+ return 0;
+
+ if (kdata.num > privcmd_dm_op_max_num)
+ return -E2BIG;
+
+ kbufs = kcalloc(kdata.num, sizeof(*kbufs), GFP_KERNEL);
+ if (!kbufs)
+ return -ENOMEM;
+
+ if (copy_from_user(kbufs, kdata.ubufs,
+ sizeof(*kbufs) * kdata.num)) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ for (i = 0; i < kdata.num; i++) {
+ if (kbufs[i].size > privcmd_dm_op_buf_max_size) {
+ rc = -E2BIG;
+ goto out;
+ }
+
+ if (!access_ok(VERIFY_WRITE, kbufs[i].uptr,
+ kbufs[i].size)) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ nr_pages += DIV_ROUND_UP(
+ offset_in_page(kbufs[i].uptr) + kbufs[i].size,
+ PAGE_SIZE);
+ }
+
+ pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL);
+ if (!pages) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ xbufs = kcalloc(kdata.num, sizeof(*xbufs), GFP_KERNEL);
+ if (!xbufs) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ rc = lock_pages(kbufs, kdata.num, pages, nr_pages);
+ if (rc)
+ goto out;
+
+ for (i = 0; i < kdata.num; i++) {
+ set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr);
+ xbufs[i].size = kbufs[i].size;
+ }
+
+ xen_preemptible_hcall_begin();
+ rc = HYPERVISOR_dm_op(kdata.dom, kdata.num, xbufs);
+ xen_preemptible_hcall_end();
+
+out:
+ unlock_pages(pages, nr_pages);
+ kfree(xbufs);
+ kfree(pages);
+ kfree(kbufs);
+
+ return rc;
+}
+
+static long privcmd_ioctl_restrict(struct file *file, void __user *udata)
+{
+ struct privcmd_data *data = file->private_data;
+ domid_t dom;
+
+ if (copy_from_user(&dom, udata, sizeof(dom)))
+ return -EFAULT;
+
+ /* Set restriction to the specified domain, or check it matches */
+ if (data->domid == DOMID_INVALID)
+ data->domid = dom;
+ else if (data->domid != dom)
+ return -EINVAL;
+
+ return 0;
+}
+
static long privcmd_ioctl(struct file *file,
unsigned int cmd, unsigned long data)
{
- int ret = -ENOSYS;
+ int ret = -ENOTTY;
void __user *udata = (void __user *) data;
switch (cmd) {
case IOCTL_PRIVCMD_HYPERCALL:
- ret = privcmd_ioctl_hypercall(udata);
+ ret = privcmd_ioctl_hypercall(file, udata);
break;
case IOCTL_PRIVCMD_MMAP:
- ret = privcmd_ioctl_mmap(udata);
+ ret = privcmd_ioctl_mmap(file, udata);
break;
case IOCTL_PRIVCMD_MMAPBATCH:
- ret = privcmd_ioctl_mmap_batch(udata, 1);
+ ret = privcmd_ioctl_mmap_batch(file, udata, 1);
break;
case IOCTL_PRIVCMD_MMAPBATCH_V2:
- ret = privcmd_ioctl_mmap_batch(udata, 2);
+ ret = privcmd_ioctl_mmap_batch(file, udata, 2);
+ break;
+
+ case IOCTL_PRIVCMD_DM_OP:
+ ret = privcmd_ioctl_dm_op(file, udata);
+ break;
+
+ case IOCTL_PRIVCMD_RESTRICT:
+ ret = privcmd_ioctl_restrict(file, udata);
break;
default:
- ret = -EINVAL;
break;
}
return ret;
}
+static int privcmd_open(struct inode *ino, struct file *file)
+{
+ struct privcmd_data *data = kzalloc(sizeof(*data), GFP_KERNEL);
+
+ if (!data)
+ return -ENOMEM;
+
+ /* DOMID_INVALID implies no restriction */
+ data->domid = DOMID_INVALID;
+
+ file->private_data = data;
+ return 0;
+}
+
+static int privcmd_release(struct inode *ino, struct file *file)
+{
+ struct privcmd_data *data = file->private_data;
+
+ kfree(data);
+ return 0;
+}
+
static void privcmd_close(struct vm_area_struct *vma)
{
struct page **pages = vma->vm_private_data;
@@ -647,6 +853,8 @@ static int privcmd_vma_range_is_mapped(
const struct file_operations xen_privcmd_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = privcmd_ioctl,
+ .open = privcmd_open,
+ .release = privcmd_release,
.mmap = privcmd_mmap,
};
EXPORT_SYMBOL_GPL(xen_privcmd_fops);
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 478fb91e3df2..f8afc6dcc29f 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -275,6 +275,10 @@ retry:
rc = 0;
} else
rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs);
+
+ if (!rc)
+ swiotlb_set_max_segment(PAGE_SIZE);
+
return rc;
error:
if (repeat--) {
@@ -392,7 +396,7 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
if (dma_capable(dev, dev_addr, size) &&
!range_straddles_page_boundary(phys, size) &&
!xen_arch_need_swiotlb(dev, phys, dev_addr) &&
- !swiotlb_force) {
+ (swiotlb_force != SWIOTLB_FORCE)) {
/* we are not interested in the dma_addr returned by
* xen_dma_map_page, only in the potential cache flushes executed
* by the function. */
@@ -410,9 +414,9 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
if (map == SWIOTLB_MAP_ERROR)
return DMA_ERROR_CODE;
+ dev_addr = xen_phys_to_bus(map);
xen_dma_map_page(dev, pfn_to_page(map >> PAGE_SHIFT),
dev_addr, map & ~PAGE_MASK, size, dir, attrs);
- dev_addr = xen_phys_to_bus(map);
/*
* Ensure that the address returned is DMA'ble
@@ -552,7 +556,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
phys_addr_t paddr = sg_phys(sg);
dma_addr_t dev_addr = xen_phys_to_bus(paddr);
- if (swiotlb_force ||
+ if (swiotlb_force == SWIOTLB_FORCE ||
xen_arch_need_swiotlb(hwdev, paddr, dev_addr) ||
!dma_capable(hwdev, dev_addr, sg->length) ||
range_straddles_page_boundary(paddr, sg->length)) {
@@ -571,13 +575,14 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
sg_dma_len(sgl) = 0;
return 0;
}
+ dev_addr = xen_phys_to_bus(map);
xen_dma_map_page(hwdev, pfn_to_page(map >> PAGE_SHIFT),
dev_addr,
map & ~PAGE_MASK,
sg->length,
dir,
attrs);
- sg->dma_address = xen_phys_to_bus(map);
+ sg->dma_address = dev_addr;
} else {
/* we are not interested in the dma_addr returned by
* xen_dma_map_page, only in the potential cache flushes executed
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index 79865b8901ba..e7715cb62eef 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -55,7 +55,7 @@ static int register_balloon(struct device *dev);
/* React to a change in the target key */
static void watch_target(struct xenbus_watch *watch,
- const char **vec, unsigned int len)
+ const char *path, const char *token)
{
unsigned long long new_target;
int err;
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 3f0aee0a068b..3814b44bf1f7 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -652,7 +652,7 @@ out:
}
static void xen_pcibk_be_watch(struct xenbus_watch *watch,
- const char **vec, unsigned int len)
+ const char *path, const char *token)
{
struct xen_pcibk_device *pdev =
container_of(watch, struct xen_pcibk_device, be_watch);
diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h
new file mode 100644
index 000000000000..149c5e7efc89
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus.h
@@ -0,0 +1,135 @@
+/*
+ * Private include for xenbus communications.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 XenSource Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _XENBUS_XENBUS_H
+#define _XENBUS_XENBUS_H
+
+#include <linux/mutex.h>
+#include <linux/uio.h>
+#include <xen/xenbus.h>
+
+#define XEN_BUS_ID_SIZE 20
+
+struct xen_bus_type {
+ char *root;
+ unsigned int levels;
+ int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename);
+ int (*probe)(struct xen_bus_type *bus, const char *type,
+ const char *dir);
+ void (*otherend_changed)(struct xenbus_watch *watch, const char *path,
+ const char *token);
+ struct bus_type bus;
+};
+
+enum xenstore_init {
+ XS_UNKNOWN,
+ XS_PV,
+ XS_HVM,
+ XS_LOCAL,
+};
+
+struct xs_watch_event {
+ struct list_head list;
+ unsigned int len;
+ struct xenbus_watch *handle;
+ const char *path;
+ const char *token;
+ char body[];
+};
+
+enum xb_req_state {
+ xb_req_state_queued,
+ xb_req_state_wait_reply,
+ xb_req_state_got_reply,
+ xb_req_state_aborted
+};
+
+struct xb_req_data {
+ struct list_head list;
+ wait_queue_head_t wq;
+ struct xsd_sockmsg msg;
+ enum xsd_sockmsg_type type;
+ char *body;
+ const struct kvec *vec;
+ int num_vecs;
+ int err;
+ enum xb_req_state state;
+ void (*cb)(struct xb_req_data *);
+ void *par;
+};
+
+extern enum xenstore_init xen_store_domain_type;
+extern const struct attribute_group *xenbus_dev_groups[];
+extern struct mutex xs_response_mutex;
+extern struct list_head xs_reply_list;
+extern struct list_head xb_write_list;
+extern wait_queue_head_t xb_waitq;
+extern struct mutex xb_write_mutex;
+
+int xs_init(void);
+int xb_init_comms(void);
+void xb_deinit_comms(void);
+int xs_watch_msg(struct xs_watch_event *event);
+void xs_request_exit(struct xb_req_data *req);
+
+int xenbus_match(struct device *_dev, struct device_driver *_drv);
+int xenbus_dev_probe(struct device *_dev);
+int xenbus_dev_remove(struct device *_dev);
+int xenbus_register_driver_common(struct xenbus_driver *drv,
+ struct xen_bus_type *bus,
+ struct module *owner,
+ const char *mod_name);
+int xenbus_probe_node(struct xen_bus_type *bus,
+ const char *type,
+ const char *nodename);
+int xenbus_probe_devices(struct xen_bus_type *bus);
+
+void xenbus_dev_changed(const char *node, struct xen_bus_type *bus);
+
+void xenbus_dev_shutdown(struct device *_dev);
+
+int xenbus_dev_suspend(struct device *dev);
+int xenbus_dev_resume(struct device *dev);
+int xenbus_dev_cancel(struct device *dev);
+
+void xenbus_otherend_changed(struct xenbus_watch *watch,
+ const char *path, const char *token,
+ int ignore_on_shutdown);
+
+int xenbus_read_otherend_details(struct xenbus_device *xendev,
+ char *id_node, char *path_node);
+
+void xenbus_ring_ops_init(void);
+
+int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void *par);
+void xenbus_dev_queue_reply(struct xb_req_data *req);
+
+#endif
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 056da6ee1a35..82a8866758ee 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -47,7 +47,7 @@
#include <xen/xen.h>
#include <xen/features.h>
-#include "xenbus_probe.h"
+#include "xenbus.h"
#define XENBUS_PAGES(_grants) (DIV_ROUND_UP(_grants, XEN_PFN_PER_PAGE))
@@ -115,7 +115,7 @@ EXPORT_SYMBOL_GPL(xenbus_strstate);
int xenbus_watch_path(struct xenbus_device *dev, const char *path,
struct xenbus_watch *watch,
void (*callback)(struct xenbus_watch *,
- const char **, unsigned int))
+ const char *, const char *))
{
int err;
@@ -153,7 +153,7 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path);
int xenbus_watch_pathfmt(struct xenbus_device *dev,
struct xenbus_watch *watch,
void (*callback)(struct xenbus_watch *,
- const char **, unsigned int),
+ const char *, const char *),
const char *pathfmt, ...)
{
int err;
@@ -259,53 +259,34 @@ int xenbus_frontend_closed(struct xenbus_device *dev)
}
EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
-/**
- * Return the path to the error node for the given device, or NULL on failure.
- * If the value returned is non-NULL, then it is the caller's to kfree.
- */
-static char *error_path(struct xenbus_device *dev)
-{
- return kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
-}
-
-
static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
const char *fmt, va_list ap)
{
unsigned int len;
- char *printf_buffer = NULL;
- char *path_buffer = NULL;
+ char *printf_buffer;
+ char *path_buffer;
#define PRINTF_BUFFER_SIZE 4096
+
printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
- if (printf_buffer == NULL)
- goto fail;
+ if (!printf_buffer)
+ return;
len = sprintf(printf_buffer, "%i ", -err);
- vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
+ vsnprintf(printf_buffer + len, PRINTF_BUFFER_SIZE - len, fmt, ap);
dev_err(&dev->dev, "%s\n", printf_buffer);
- path_buffer = error_path(dev);
-
- if (path_buffer == NULL) {
+ path_buffer = kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
+ if (!path_buffer ||
+ xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer))
dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
- dev->nodename, printf_buffer);
- goto fail;
- }
+ dev->nodename, printf_buffer);
- if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) {
- dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
- dev->nodename, printf_buffer);
- goto fail;
- }
-
-fail:
kfree(printf_buffer);
kfree(path_buffer);
}
-
/**
* xenbus_dev_error
* @dev: xenbus device
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
index ecdecce80a6c..856ada5d39c9 100644
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -34,19 +34,31 @@
#include <linux/wait.h>
#include <linux/interrupt.h>
+#include <linux/kthread.h>
#include <linux/sched.h>
#include <linux/err.h>
#include <xen/xenbus.h>
#include <asm/xen/hypervisor.h>
#include <xen/events.h>
#include <xen/page.h>
-#include "xenbus_comms.h"
+#include "xenbus.h"
+
+/* A list of replies. Currently only one will ever be outstanding. */
+LIST_HEAD(xs_reply_list);
+
+/* A list of write requests. */
+LIST_HEAD(xb_write_list);
+DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
+DEFINE_MUTEX(xb_write_mutex);
+
+/* Protect xenbus reader thread against save/restore. */
+DEFINE_MUTEX(xs_response_mutex);
static int xenbus_irq;
+static struct task_struct *xenbus_task;
static DECLARE_WORK(probe_work, xenbus_probe);
-static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
static irqreturn_t wake_waiting(int irq, void *unused)
{
@@ -84,30 +96,31 @@ static const void *get_input_chunk(XENSTORE_RING_IDX cons,
return buf + MASK_XENSTORE_IDX(cons);
}
+static int xb_data_to_write(void)
+{
+ struct xenstore_domain_interface *intf = xen_store_interface;
+
+ return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE &&
+ !list_empty(&xb_write_list);
+}
+
/**
* xb_write - low level write
* @data: buffer to send
* @len: length of buffer
*
- * Returns 0 on success, error otherwise.
+ * Returns number of bytes written or -err.
*/
-int xb_write(const void *data, unsigned len)
+static int xb_write(const void *data, unsigned int len)
{
struct xenstore_domain_interface *intf = xen_store_interface;
XENSTORE_RING_IDX cons, prod;
- int rc;
+ unsigned int bytes = 0;
while (len != 0) {
void *dst;
unsigned int avail;
- rc = wait_event_interruptible(
- xb_waitq,
- (intf->req_prod - intf->req_cons) !=
- XENSTORE_RING_SIZE);
- if (rc < 0)
- return rc;
-
/* Read indexes, then verify. */
cons = intf->req_cons;
prod = intf->req_prod;
@@ -115,6 +128,11 @@ int xb_write(const void *data, unsigned len)
intf->req_cons = intf->req_prod = 0;
return -EIO;
}
+ if (!xb_data_to_write())
+ return bytes;
+
+ /* Must write data /after/ reading the consumer index. */
+ virt_mb();
dst = get_output_chunk(cons, prod, intf->req, &avail);
if (avail == 0)
@@ -122,52 +140,45 @@ int xb_write(const void *data, unsigned len)
if (avail > len)
avail = len;
- /* Must write data /after/ reading the consumer index. */
- virt_mb();
-
memcpy(dst, data, avail);
data += avail;
len -= avail;
+ bytes += avail;
/* Other side must not see new producer until data is there. */
virt_wmb();
intf->req_prod += avail;
/* Implies mb(): other side will see the updated producer. */
- notify_remote_via_evtchn(xen_store_evtchn);
+ if (prod <= intf->req_cons)
+ notify_remote_via_evtchn(xen_store_evtchn);
}
- return 0;
+ return bytes;
}
-int xb_data_to_read(void)
+static int xb_data_to_read(void)
{
struct xenstore_domain_interface *intf = xen_store_interface;
return (intf->rsp_cons != intf->rsp_prod);
}
-int xb_wait_for_data_to_read(void)
-{
- return wait_event_interruptible(xb_waitq, xb_data_to_read());
-}
-
-int xb_read(void *data, unsigned len)
+static int xb_read(void *data, unsigned int len)
{
struct xenstore_domain_interface *intf = xen_store_interface;
XENSTORE_RING_IDX cons, prod;
- int rc;
+ unsigned int bytes = 0;
while (len != 0) {
unsigned int avail;
const char *src;
- rc = xb_wait_for_data_to_read();
- if (rc < 0)
- return rc;
-
/* Read indexes, then verify. */
cons = intf->rsp_cons;
prod = intf->rsp_prod;
+ if (cons == prod)
+ return bytes;
+
if (!check_indexes(cons, prod)) {
intf->rsp_cons = intf->rsp_prod = 0;
return -EIO;
@@ -185,17 +196,243 @@ int xb_read(void *data, unsigned len)
memcpy(data, src, avail);
data += avail;
len -= avail;
+ bytes += avail;
/* Other side must not see free space until we've copied out */
virt_mb();
intf->rsp_cons += avail;
- pr_debug("Finished read of %i bytes (%i to go)\n", avail, len);
-
/* Implies mb(): other side will see the updated consumer. */
- notify_remote_via_evtchn(xen_store_evtchn);
+ if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE)
+ notify_remote_via_evtchn(xen_store_evtchn);
+ }
+
+ return bytes;
+}
+
+static int process_msg(void)
+{
+ static struct {
+ struct xsd_sockmsg msg;
+ char *body;
+ union {
+ void *alloc;
+ struct xs_watch_event *watch;
+ };
+ bool in_msg;
+ bool in_hdr;
+ unsigned int read;
+ } state;
+ struct xb_req_data *req;
+ int err;
+ unsigned int len;
+
+ if (!state.in_msg) {
+ state.in_msg = true;
+ state.in_hdr = true;
+ state.read = 0;
+
+ /*
+ * We must disallow save/restore while reading a message.
+ * A partial read across s/r leaves us out of sync with
+ * xenstored.
+ * xs_response_mutex is locked as long as we are processing one
+ * message. state.in_msg will be true as long as we are holding
+ * the lock here.
+ */
+ mutex_lock(&xs_response_mutex);
+
+ if (!xb_data_to_read()) {
+ /* We raced with save/restore: pending data 'gone'. */
+ mutex_unlock(&xs_response_mutex);
+ state.in_msg = false;
+ return 0;
+ }
+ }
+
+ if (state.in_hdr) {
+ if (state.read != sizeof(state.msg)) {
+ err = xb_read((void *)&state.msg + state.read,
+ sizeof(state.msg) - state.read);
+ if (err < 0)
+ goto out;
+ state.read += err;
+ if (state.read != sizeof(state.msg))
+ return 0;
+ if (state.msg.len > XENSTORE_PAYLOAD_MAX) {
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ len = state.msg.len + 1;
+ if (state.msg.type == XS_WATCH_EVENT)
+ len += sizeof(*state.watch);
+
+ state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH);
+ if (!state.alloc)
+ return -ENOMEM;
+
+ if (state.msg.type == XS_WATCH_EVENT)
+ state.body = state.watch->body;
+ else
+ state.body = state.alloc;
+ state.in_hdr = false;
+ state.read = 0;
+ }
+
+ err = xb_read(state.body + state.read, state.msg.len - state.read);
+ if (err < 0)
+ goto out;
+
+ state.read += err;
+ if (state.read != state.msg.len)
+ return 0;
+
+ state.body[state.msg.len] = '\0';
+
+ if (state.msg.type == XS_WATCH_EVENT) {
+ state.watch->len = state.msg.len;
+ err = xs_watch_msg(state.watch);
+ } else {
+ err = -ENOENT;
+ mutex_lock(&xb_write_mutex);
+ list_for_each_entry(req, &xs_reply_list, list) {
+ if (req->msg.req_id == state.msg.req_id) {
+ if (req->state == xb_req_state_wait_reply) {
+ req->msg.type = state.msg.type;
+ req->msg.len = state.msg.len;
+ req->body = state.body;
+ req->state = xb_req_state_got_reply;
+ list_del(&req->list);
+ req->cb(req);
+ } else {
+ list_del(&req->list);
+ kfree(req);
+ }
+ err = 0;
+ break;
+ }
+ }
+ mutex_unlock(&xb_write_mutex);
+ if (err)
+ goto out;
}
+ mutex_unlock(&xs_response_mutex);
+
+ state.in_msg = false;
+ state.alloc = NULL;
+ return err;
+
+ out:
+ mutex_unlock(&xs_response_mutex);
+ state.in_msg = false;
+ kfree(state.alloc);
+ state.alloc = NULL;
+ return err;
+}
+
+static int process_writes(void)
+{
+ static struct {
+ struct xb_req_data *req;
+ int idx;
+ unsigned int written;
+ } state;
+ void *base;
+ unsigned int len;
+ int err = 0;
+
+ if (!xb_data_to_write())
+ return 0;
+
+ mutex_lock(&xb_write_mutex);
+
+ if (!state.req) {
+ state.req = list_first_entry(&xb_write_list,
+ struct xb_req_data, list);
+ state.idx = -1;
+ state.written = 0;
+ }
+
+ if (state.req->state == xb_req_state_aborted)
+ goto out_err;
+
+ while (state.idx < state.req->num_vecs) {
+ if (state.idx < 0) {
+ base = &state.req->msg;
+ len = sizeof(state.req->msg);
+ } else {
+ base = state.req->vec[state.idx].iov_base;
+ len = state.req->vec[state.idx].iov_len;
+ }
+ err = xb_write(base + state.written, len - state.written);
+ if (err < 0)
+ goto out_err;
+ state.written += err;
+ if (state.written != len)
+ goto out;
+
+ state.idx++;
+ state.written = 0;
+ }
+
+ list_del(&state.req->list);
+ state.req->state = xb_req_state_wait_reply;
+ list_add_tail(&state.req->list, &xs_reply_list);
+ state.req = NULL;
+
+ out:
+ mutex_unlock(&xb_write_mutex);
+
+ return 0;
+
+ out_err:
+ state.req->msg.type = XS_ERROR;
+ state.req->err = err;
+ list_del(&state.req->list);
+ if (state.req->state == xb_req_state_aborted)
+ kfree(state.req);
+ else {
+ state.req->state = xb_req_state_got_reply;
+ wake_up(&state.req->wq);
+ }
+
+ mutex_unlock(&xb_write_mutex);
+
+ state.req = NULL;
+
+ return err;
+}
+
+static int xb_thread_work(void)
+{
+ return xb_data_to_read() || xb_data_to_write();
+}
+
+static int xenbus_thread(void *unused)
+{
+ int err;
+
+ while (!kthread_should_stop()) {
+ if (wait_event_interruptible(xb_waitq, xb_thread_work()))
+ continue;
+
+ err = process_msg();
+ if (err == -ENOMEM)
+ schedule();
+ else if (err)
+ pr_warn_ratelimited("error %d while reading message\n",
+ err);
+
+ err = process_writes();
+ if (err)
+ pr_warn_ratelimited("error %d while writing message\n",
+ err);
+ }
+
+ xenbus_task = NULL;
return 0;
}
@@ -223,6 +460,7 @@ int xb_init_comms(void)
rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
} else {
int err;
+
err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
0, "xenbus", &xb_waitq);
if (err < 0) {
@@ -231,6 +469,13 @@ int xb_init_comms(void)
}
xenbus_irq = err;
+
+ if (!xenbus_task) {
+ xenbus_task = kthread_run(xenbus_thread, NULL,
+ "xenbus");
+ if (IS_ERR(xenbus_task))
+ return PTR_ERR(xenbus_task);
+ }
}
return 0;
diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h
deleted file mode 100644
index 867a2e425208..000000000000
--- a/drivers/xen/xenbus/xenbus_comms.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Private include for xenbus communications.
- *
- * Copyright (C) 2005 Rusty Russell, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef _XENBUS_COMMS_H
-#define _XENBUS_COMMS_H
-
-#include <linux/fs.h>
-
-int xs_init(void);
-int xb_init_comms(void);
-void xb_deinit_comms(void);
-
-/* Low level routines. */
-int xb_write(const void *data, unsigned len);
-int xb_read(void *data, unsigned len);
-int xb_data_to_read(void);
-int xb_wait_for_data_to_read(void);
-extern struct xenstore_domain_interface *xen_store_interface;
-extern int xen_store_evtchn;
-extern enum xenstore_init xen_store_domain_type;
-
-extern const struct file_operations xen_xenbus_fops;
-
-#endif /* _XENBUS_COMMS_H */
diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c
index 4a41ac9af966..1126701e212e 100644
--- a/drivers/xen/xenbus/xenbus_dev_backend.c
+++ b/drivers/xen/xenbus/xenbus_dev_backend.c
@@ -16,7 +16,7 @@
#include <xen/events.h>
#include <asm/xen/hypervisor.h>
-#include "xenbus_comms.h"
+#include "xenbus.h"
static int xenbus_backend_open(struct inode *inode, struct file *filp)
{
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index 79130b310247..4d343eed08f5 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -57,12 +57,12 @@
#include <linux/miscdevice.h>
#include <linux/init.h>
-#include "xenbus_comms.h"
-
#include <xen/xenbus.h>
#include <xen/xen.h>
#include <asm/xen/hypervisor.h>
+#include "xenbus.h"
+
/*
* An element of a list of outstanding transactions, for which we're
* still waiting a reply.
@@ -113,6 +113,7 @@ struct xenbus_file_priv {
struct list_head read_buffers;
wait_queue_head_t read_waitq;
+ struct kref kref;
};
/* Read out any raw xenbus messages queued up. */
@@ -258,26 +259,23 @@ out_fail:
}
static void watch_fired(struct xenbus_watch *watch,
- const char **vec,
- unsigned int len)
+ const char *path,
+ const char *token)
{
struct watch_adapter *adap;
struct xsd_sockmsg hdr;
- const char *path, *token;
- int path_len, tok_len, body_len, data_len = 0;
+ const char *token_caller;
+ int path_len, tok_len, body_len;
int ret;
LIST_HEAD(staging_q);
adap = container_of(watch, struct watch_adapter, watch);
- path = vec[XS_WATCH_PATH];
- token = adap->token;
+ token_caller = adap->token;
path_len = strlen(path) + 1;
- tok_len = strlen(token) + 1;
- if (len > 2)
- data_len = vec[len] - vec[2] + 1;
- body_len = path_len + tok_len + data_len;
+ tok_len = strlen(token_caller) + 1;
+ body_len = path_len + tok_len;
hdr.type = XS_WATCH_EVENT;
hdr.len = body_len;
@@ -288,9 +286,7 @@ static void watch_fired(struct xenbus_watch *watch,
if (!ret)
ret = queue_reply(&staging_q, path, path_len);
if (!ret)
- ret = queue_reply(&staging_q, token, tok_len);
- if (!ret && len > 2)
- ret = queue_reply(&staging_q, vec[2], data_len);
+ ret = queue_reply(&staging_q, token_caller, tok_len);
if (!ret) {
/* success: pass reply list onto watcher */
@@ -302,6 +298,107 @@ static void watch_fired(struct xenbus_watch *watch,
mutex_unlock(&adap->dev_data->reply_mutex);
}
+static void xenbus_file_free(struct kref *kref)
+{
+ struct xenbus_file_priv *u;
+ struct xenbus_transaction_holder *trans, *tmp;
+ struct watch_adapter *watch, *tmp_watch;
+ struct read_buffer *rb, *tmp_rb;
+
+ u = container_of(kref, struct xenbus_file_priv, kref);
+
+ /*
+ * No need for locking here because there are no other users,
+ * by definition.
+ */
+
+ list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
+ xenbus_transaction_end(trans->handle, 1);
+ list_del(&trans->list);
+ kfree(trans);
+ }
+
+ list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
+ unregister_xenbus_watch(&watch->watch);
+ list_del(&watch->list);
+ free_watch_adapter(watch);
+ }
+
+ list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) {
+ list_del(&rb->list);
+ kfree(rb);
+ }
+ kfree(u);
+}
+
+static struct xenbus_transaction_holder *xenbus_get_transaction(
+ struct xenbus_file_priv *u, uint32_t tx_id)
+{
+ struct xenbus_transaction_holder *trans;
+
+ list_for_each_entry(trans, &u->transactions, list)
+ if (trans->handle.id == tx_id)
+ return trans;
+
+ return NULL;
+}
+
+void xenbus_dev_queue_reply(struct xb_req_data *req)
+{
+ struct xenbus_file_priv *u = req->par;
+ struct xenbus_transaction_holder *trans = NULL;
+ int rc;
+ LIST_HEAD(staging_q);
+
+ xs_request_exit(req);
+
+ mutex_lock(&u->msgbuffer_mutex);
+
+ if (req->type == XS_TRANSACTION_START) {
+ trans = xenbus_get_transaction(u, 0);
+ if (WARN_ON(!trans))
+ goto out;
+ if (req->msg.type == XS_ERROR) {
+ list_del(&trans->list);
+ kfree(trans);
+ } else {
+ rc = kstrtou32(req->body, 10, &trans->handle.id);
+ if (WARN_ON(rc))
+ goto out;
+ }
+ } else if (req->msg.type == XS_TRANSACTION_END) {
+ trans = xenbus_get_transaction(u, req->msg.tx_id);
+ if (WARN_ON(!trans))
+ goto out;
+ list_del(&trans->list);
+ kfree(trans);
+ }
+
+ mutex_unlock(&u->msgbuffer_mutex);
+
+ mutex_lock(&u->reply_mutex);
+ rc = queue_reply(&staging_q, &req->msg, sizeof(req->msg));
+ if (!rc)
+ rc = queue_reply(&staging_q, req->body, req->msg.len);
+ if (!rc) {
+ list_splice_tail(&staging_q, &u->read_buffers);
+ wake_up(&u->read_waitq);
+ } else {
+ queue_cleanup(&staging_q);
+ }
+ mutex_unlock(&u->reply_mutex);
+
+ kfree(req->body);
+ kfree(req);
+
+ kref_put(&u->kref, xenbus_file_free);
+
+ return;
+
+ out:
+ mutex_unlock(&u->msgbuffer_mutex);
+}
+
static int xenbus_command_reply(struct xenbus_file_priv *u,
unsigned int msg_type, const char *reply)
{
@@ -322,6 +419,9 @@ static int xenbus_command_reply(struct xenbus_file_priv *u,
wake_up(&u->read_waitq);
mutex_unlock(&u->reply_mutex);
+ if (!rc)
+ kref_put(&u->kref, xenbus_file_free);
+
return rc;
}
@@ -329,57 +429,22 @@ static int xenbus_write_transaction(unsigned msg_type,
struct xenbus_file_priv *u)
{
int rc;
- void *reply;
struct xenbus_transaction_holder *trans = NULL;
- LIST_HEAD(staging_q);
if (msg_type == XS_TRANSACTION_START) {
- trans = kmalloc(sizeof(*trans), GFP_KERNEL);
+ trans = kzalloc(sizeof(*trans), GFP_KERNEL);
if (!trans) {
rc = -ENOMEM;
goto out;
}
- } else if (u->u.msg.tx_id != 0) {
- list_for_each_entry(trans, &u->transactions, list)
- if (trans->handle.id == u->u.msg.tx_id)
- break;
- if (&trans->list == &u->transactions)
- return xenbus_command_reply(u, XS_ERROR, "ENOENT");
- }
-
- reply = xenbus_dev_request_and_reply(&u->u.msg);
- if (IS_ERR(reply)) {
- if (msg_type == XS_TRANSACTION_START)
- kfree(trans);
- rc = PTR_ERR(reply);
- goto out;
- }
+ list_add(&trans->list, &u->transactions);
+ } else if (u->u.msg.tx_id != 0 &&
+ !xenbus_get_transaction(u, u->u.msg.tx_id))
+ return xenbus_command_reply(u, XS_ERROR, "ENOENT");
- if (msg_type == XS_TRANSACTION_START) {
- if (u->u.msg.type == XS_ERROR)
- kfree(trans);
- else {
- trans->handle.id = simple_strtoul(reply, NULL, 0);
- list_add(&trans->list, &u->transactions);
- }
- } else if (u->u.msg.type == XS_TRANSACTION_END) {
- list_del(&trans->list);
+ rc = xenbus_dev_request_and_reply(&u->u.msg, u);
+ if (rc)
kfree(trans);
- }
-
- mutex_lock(&u->reply_mutex);
- rc = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg));
- if (!rc)
- rc = queue_reply(&staging_q, reply, u->u.msg.len);
- if (!rc) {
- list_splice_tail(&staging_q, &u->read_buffers);
- wake_up(&u->read_waitq);
- } else {
- queue_cleanup(&staging_q);
- }
- mutex_unlock(&u->reply_mutex);
-
- kfree(reply);
out:
return rc;
@@ -511,6 +576,8 @@ static ssize_t xenbus_file_write(struct file *filp,
* OK, now we have a complete message. Do something with it.
*/
+ kref_get(&u->kref);
+
msg_type = u->u.msg.type;
switch (msg_type) {
@@ -525,8 +592,10 @@ static ssize_t xenbus_file_write(struct file *filp,
ret = xenbus_write_transaction(msg_type, u);
break;
}
- if (ret != 0)
+ if (ret != 0) {
rc = ret;
+ kref_put(&u->kref, xenbus_file_free);
+ }
/* Buffered message consumed */
u->len = 0;
@@ -551,6 +620,8 @@ static int xenbus_file_open(struct inode *inode, struct file *filp)
if (u == NULL)
return -ENOMEM;
+ kref_init(&u->kref);
+
INIT_LIST_HEAD(&u->transactions);
INIT_LIST_HEAD(&u->watches);
INIT_LIST_HEAD(&u->read_buffers);
@@ -567,32 +638,8 @@ static int xenbus_file_open(struct inode *inode, struct file *filp)
static int xenbus_file_release(struct inode *inode, struct file *filp)
{
struct xenbus_file_priv *u = filp->private_data;
- struct xenbus_transaction_holder *trans, *tmp;
- struct watch_adapter *watch, *tmp_watch;
- struct read_buffer *rb, *tmp_rb;
-
- /*
- * No need for locking here because there are no other users,
- * by definition.
- */
-
- list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
- xenbus_transaction_end(trans->handle, 1);
- list_del(&trans->list);
- kfree(trans);
- }
-
- list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
- unregister_xenbus_watch(&watch->watch);
- list_del(&watch->list);
- free_watch_adapter(watch);
- }
- list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) {
- list_del(&rb->list);
- kfree(rb);
- }
- kfree(u);
+ kref_put(&u->kref, xenbus_file_free);
return 0;
}
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 4bdf654041e9..74888cacd0b0 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -62,8 +62,7 @@
#include <xen/hvm.h>
-#include "xenbus_comms.h"
-#include "xenbus_probe.h"
+#include "xenbus.h"
int xen_store_evtchn;
@@ -170,7 +169,7 @@ int xenbus_read_otherend_details(struct xenbus_device *xendev,
EXPORT_SYMBOL_GPL(xenbus_read_otherend_details);
void xenbus_otherend_changed(struct xenbus_watch *watch,
- const char **vec, unsigned int len,
+ const char *path, const char *token,
int ignore_on_shutdown)
{
struct xenbus_device *dev =
@@ -181,18 +180,15 @@ void xenbus_otherend_changed(struct xenbus_watch *watch,
/* Protect us against watches firing on old details when the otherend
details change, say immediately after a resume. */
if (!dev->otherend ||
- strncmp(dev->otherend, vec[XS_WATCH_PATH],
- strlen(dev->otherend))) {
- dev_dbg(&dev->dev, "Ignoring watch at %s\n",
- vec[XS_WATCH_PATH]);
+ strncmp(dev->otherend, path, strlen(dev->otherend))) {
+ dev_dbg(&dev->dev, "Ignoring watch at %s\n", path);
return;
}
state = xenbus_read_driver_state(dev->otherend);
dev_dbg(&dev->dev, "state is %d, (%s), %s, %s\n",
- state, xenbus_strstate(state), dev->otherend_watch.node,
- vec[XS_WATCH_PATH]);
+ state, xenbus_strstate(state), dev->otherend_watch.node, path);
/*
* Ignore xenbus transitions during shutdown. This prevents us doing
diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h
deleted file mode 100644
index c9ec7ca1f7ab..000000000000
--- a/drivers/xen/xenbus/xenbus_probe.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/******************************************************************************
- * xenbus_probe.h
- *
- * Talks to Xen Store to figure out what devices we have.
- *
- * Copyright (C) 2005 Rusty Russell, IBM Corporation
- * Copyright (C) 2005 XenSource Ltd.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef _XENBUS_PROBE_H
-#define _XENBUS_PROBE_H
-
-#define XEN_BUS_ID_SIZE 20
-
-struct xen_bus_type {
- char *root;
- unsigned int levels;
- int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename);
- int (*probe)(struct xen_bus_type *bus, const char *type,
- const char *dir);
- void (*otherend_changed)(struct xenbus_watch *watch, const char **vec,
- unsigned int len);
- struct bus_type bus;
-};
-
-enum xenstore_init {
- XS_UNKNOWN,
- XS_PV,
- XS_HVM,
- XS_LOCAL,
-};
-
-extern const struct attribute_group *xenbus_dev_groups[];
-
-extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
-extern int xenbus_dev_probe(struct device *_dev);
-extern int xenbus_dev_remove(struct device *_dev);
-extern int xenbus_register_driver_common(struct xenbus_driver *drv,
- struct xen_bus_type *bus,
- struct module *owner,
- const char *mod_name);
-extern int xenbus_probe_node(struct xen_bus_type *bus,
- const char *type,
- const char *nodename);
-extern int xenbus_probe_devices(struct xen_bus_type *bus);
-
-extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus);
-
-extern void xenbus_dev_shutdown(struct device *_dev);
-
-extern int xenbus_dev_suspend(struct device *dev);
-extern int xenbus_dev_resume(struct device *dev);
-extern int xenbus_dev_cancel(struct device *dev);
-
-extern void xenbus_otherend_changed(struct xenbus_watch *watch,
- const char **vec, unsigned int len,
- int ignore_on_shutdown);
-
-extern int xenbus_read_otherend_details(struct xenbus_device *xendev,
- char *id_node, char *path_node);
-
-void xenbus_ring_ops_init(void);
-
-#endif
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
index 37929df829a3..b0bed4faf44c 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -53,8 +53,7 @@
#include <xen/xenbus.h>
#include <xen/features.h>
-#include "xenbus_comms.h"
-#include "xenbus_probe.h"
+#include "xenbus.h"
/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
static int backend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename)
@@ -182,9 +181,9 @@ static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type,
}
static void frontend_changed(struct xenbus_watch *watch,
- const char **vec, unsigned int len)
+ const char *path, const char *token)
{
- xenbus_otherend_changed(watch, vec, len, 0);
+ xenbus_otherend_changed(watch, path, token, 0);
}
static struct xen_bus_type xenbus_backend = {
@@ -205,11 +204,11 @@ static struct xen_bus_type xenbus_backend = {
};
static void backend_changed(struct xenbus_watch *watch,
- const char **vec, unsigned int len)
+ const char *path, const char *token)
{
DPRINTK("");
- xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
+ xenbus_dev_changed(path, &xenbus_backend);
}
static struct xenbus_watch be_watch = {
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index 6d40a972ffb2..19e45ce21f89 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -27,8 +27,7 @@
#include <xen/platform_pci.h>
-#include "xenbus_comms.h"
-#include "xenbus_probe.h"
+#include "xenbus.h"
@@ -87,9 +86,9 @@ static int xenbus_uevent_frontend(struct device *_dev,
static void backend_changed(struct xenbus_watch *watch,
- const char **vec, unsigned int len)
+ const char *path, const char *token)
{
- xenbus_otherend_changed(watch, vec, len, 1);
+ xenbus_otherend_changed(watch, path, token, 1);
}
static void xenbus_frontend_delayed_resume(struct work_struct *w)
@@ -154,11 +153,11 @@ static struct xen_bus_type xenbus_frontend = {
};
static void frontend_changed(struct xenbus_watch *watch,
- const char **vec, unsigned int len)
+ const char *path, const char *token)
{
DPRINTK("");
- xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
+ xenbus_dev_changed(path, &xenbus_frontend);
}
@@ -333,13 +332,13 @@ static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq);
static int backend_state;
static void xenbus_reset_backend_state_changed(struct xenbus_watch *w,
- const char **v, unsigned int l)
+ const char *path, const char *token)
{
- if (xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i",
+ if (xenbus_scanf(XBT_NIL, path, "", "%i",
&backend_state) != 1)
backend_state = XenbusStateUnknown;
printk(KERN_DEBUG "XENBUS: backend %s %s\n",
- v[XS_WATCH_PATH], xenbus_strstate(backend_state));
+ path, xenbus_strstate(backend_state));
wake_up(&backend_state_wq);
}
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 6afb993c5809..e46080214955 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -43,69 +43,36 @@
#include <linux/slab.h>
#include <linux/fcntl.h>
#include <linux/kthread.h>
+#include <linux/reboot.h>
#include <linux/rwsem.h>
#include <linux/mutex.h>
#include <asm/xen/hypervisor.h>
#include <xen/xenbus.h>
#include <xen/xen.h>
-#include "xenbus_comms.h"
-#include "xenbus_probe.h"
-
-struct xs_stored_msg {
- struct list_head list;
-
- struct xsd_sockmsg hdr;
-
- union {
- /* Queued replies. */
- struct {
- char *body;
- } reply;
-
- /* Queued watch events. */
- struct {
- struct xenbus_watch *handle;
- char **vec;
- unsigned int vec_size;
- } watch;
- } u;
-};
+#include "xenbus.h"
-struct xs_handle {
- /* A list of replies. Currently only one will ever be outstanding. */
- struct list_head reply_list;
- spinlock_t reply_lock;
- wait_queue_head_t reply_waitq;
-
- /*
- * Mutex ordering: transaction_mutex -> watch_mutex -> request_mutex.
- * response_mutex is never taken simultaneously with the other three.
- *
- * transaction_mutex must be held before incrementing
- * transaction_count. The mutex is held when a suspend is in
- * progress to prevent new transactions starting.
- *
- * When decrementing transaction_count to zero the wait queue
- * should be woken up, the suspend code waits for count to
- * reach zero.
- */
-
- /* One request at a time. */
- struct mutex request_mutex;
-
- /* Protect xenbus reader thread against save/restore. */
- struct mutex response_mutex;
-
- /* Protect transactions against save/restore. */
- struct mutex transaction_mutex;
- atomic_t transaction_count;
- wait_queue_head_t transaction_wq;
-
- /* Protect watch (de)register against save/restore. */
- struct rw_semaphore watch_mutex;
-};
+/*
+ * Framework to protect suspend/resume handling against normal Xenstore
+ * message handling:
+ * During suspend/resume there must be no open transaction and no pending
+ * Xenstore request.
+ * New watch events happening in this time can be ignored by firing all watches
+ * after resume.
+ */
+
+/* Lock protecting enter/exit critical region. */
+static DEFINE_SPINLOCK(xs_state_lock);
+/* Number of users in critical region (protected by xs_state_lock). */
+static unsigned int xs_state_users;
+/* Suspend handler waiting or already active (protected by xs_state_lock)? */
+static int xs_suspend_active;
+/* Unique Xenstore request id (protected by xs_state_lock). */
+static uint32_t xs_request_id;
-static struct xs_handle xs_state;
+/* Wait queue for all callers waiting for critical region to become usable. */
+static DECLARE_WAIT_QUEUE_HEAD(xs_state_enter_wq);
+/* Wait queue for suspend handling waiting for critical region being empty. */
+static DECLARE_WAIT_QUEUE_HEAD(xs_state_exit_wq);
/* List of registered watches, and a lock to protect it. */
static LIST_HEAD(watches);
@@ -115,6 +82,9 @@ static DEFINE_SPINLOCK(watches_lock);
static LIST_HEAD(watch_events);
static DEFINE_SPINLOCK(watch_events_lock);
+/* Protect watch (de)register against save/restore. */
+static DECLARE_RWSEM(xs_watch_rwsem);
+
/*
* Details of the xenwatch callback kernel thread. The thread waits on the
* watch_events_waitq for work to do (queued on watch_events list). When it
@@ -125,6 +95,59 @@ static pid_t xenwatch_pid;
static DEFINE_MUTEX(xenwatch_mutex);
static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq);
+static void xs_suspend_enter(void)
+{
+ spin_lock(&xs_state_lock);
+ xs_suspend_active++;
+ spin_unlock(&xs_state_lock);
+ wait_event(xs_state_exit_wq, xs_state_users == 0);
+}
+
+static void xs_suspend_exit(void)
+{
+ spin_lock(&xs_state_lock);
+ xs_suspend_active--;
+ spin_unlock(&xs_state_lock);
+ wake_up_all(&xs_state_enter_wq);
+}
+
+static uint32_t xs_request_enter(struct xb_req_data *req)
+{
+ uint32_t rq_id;
+
+ req->type = req->msg.type;
+
+ spin_lock(&xs_state_lock);
+
+ while (!xs_state_users && xs_suspend_active) {
+ spin_unlock(&xs_state_lock);
+ wait_event(xs_state_enter_wq, xs_suspend_active == 0);
+ spin_lock(&xs_state_lock);
+ }
+
+ if (req->type == XS_TRANSACTION_START)
+ xs_state_users++;
+ xs_state_users++;
+ rq_id = xs_request_id++;
+
+ spin_unlock(&xs_state_lock);
+
+ return rq_id;
+}
+
+void xs_request_exit(struct xb_req_data *req)
+{
+ spin_lock(&xs_state_lock);
+ xs_state_users--;
+ if ((req->type == XS_TRANSACTION_START && req->msg.type == XS_ERROR) ||
+ req->type == XS_TRANSACTION_END)
+ xs_state_users--;
+ spin_unlock(&xs_state_lock);
+
+ if (xs_suspend_active && !xs_state_users)
+ wake_up(&xs_state_exit_wq);
+}
+
static int get_error(const char *errorstring)
{
unsigned int i;
@@ -162,21 +185,24 @@ static bool xenbus_ok(void)
}
return false;
}
-static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
+
+static bool test_reply(struct xb_req_data *req)
{
- struct xs_stored_msg *msg;
- char *body;
+ if (req->state == xb_req_state_got_reply || !xenbus_ok())
+ return true;
+
+ /* Make sure to reread req->state each time. */
+ barrier();
- spin_lock(&xs_state.reply_lock);
+ return false;
+}
+
+static void *read_reply(struct xb_req_data *req)
+{
+ while (req->state != xb_req_state_got_reply) {
+ wait_event(req->wq, test_reply(req));
- while (list_empty(&xs_state.reply_list)) {
- spin_unlock(&xs_state.reply_lock);
- if (xenbus_ok())
- /* XXX FIXME: Avoid synchronous wait for response here. */
- wait_event_timeout(xs_state.reply_waitq,
- !list_empty(&xs_state.reply_list),
- msecs_to_jiffies(500));
- else {
+ if (!xenbus_ok())
/*
* If we are in the process of being shut-down there is
* no point of trying to contact XenBus - it is either
@@ -184,76 +210,82 @@ static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
* has been killed or is unreachable.
*/
return ERR_PTR(-EIO);
- }
- spin_lock(&xs_state.reply_lock);
+ if (req->err)
+ return ERR_PTR(req->err);
+
}
- msg = list_entry(xs_state.reply_list.next,
- struct xs_stored_msg, list);
- list_del(&msg->list);
+ return req->body;
+}
- spin_unlock(&xs_state.reply_lock);
+static void xs_send(struct xb_req_data *req, struct xsd_sockmsg *msg)
+{
+ bool notify;
- *type = msg->hdr.type;
- if (len)
- *len = msg->hdr.len;
- body = msg->u.reply.body;
+ req->msg = *msg;
+ req->err = 0;
+ req->state = xb_req_state_queued;
+ init_waitqueue_head(&req->wq);
- kfree(msg);
+ req->msg.req_id = xs_request_enter(req);
- return body;
-}
+ mutex_lock(&xb_write_mutex);
+ list_add_tail(&req->list, &xb_write_list);
+ notify = list_is_singular(&xb_write_list);
+ mutex_unlock(&xb_write_mutex);
-static void transaction_start(void)
-{
- mutex_lock(&xs_state.transaction_mutex);
- atomic_inc(&xs_state.transaction_count);
- mutex_unlock(&xs_state.transaction_mutex);
+ if (notify)
+ wake_up(&xb_waitq);
}
-static void transaction_end(void)
+static void *xs_wait_for_reply(struct xb_req_data *req, struct xsd_sockmsg *msg)
{
- if (atomic_dec_and_test(&xs_state.transaction_count))
- wake_up(&xs_state.transaction_wq);
-}
+ void *ret;
-static void transaction_suspend(void)
-{
- mutex_lock(&xs_state.transaction_mutex);
- wait_event(xs_state.transaction_wq,
- atomic_read(&xs_state.transaction_count) == 0);
+ ret = read_reply(req);
+
+ xs_request_exit(req);
+
+ msg->type = req->msg.type;
+ msg->len = req->msg.len;
+
+ mutex_lock(&xb_write_mutex);
+ if (req->state == xb_req_state_queued ||
+ req->state == xb_req_state_wait_reply)
+ req->state = xb_req_state_aborted;
+ else
+ kfree(req);
+ mutex_unlock(&xb_write_mutex);
+
+ return ret;
}
-static void transaction_resume(void)
+static void xs_wake_up(struct xb_req_data *req)
{
- mutex_unlock(&xs_state.transaction_mutex);
+ wake_up(&req->wq);
}
-void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
+int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void *par)
{
- void *ret;
- enum xsd_sockmsg_type type = msg->type;
- int err;
-
- if (type == XS_TRANSACTION_START)
- transaction_start();
+ struct xb_req_data *req;
+ struct kvec *vec;
- mutex_lock(&xs_state.request_mutex);
+ req = kmalloc(sizeof(*req) + sizeof(*vec), GFP_KERNEL);
+ if (!req)
+ return -ENOMEM;
- err = xb_write(msg, sizeof(*msg) + msg->len);
- if (err) {
- msg->type = XS_ERROR;
- ret = ERR_PTR(err);
- } else
- ret = read_reply(&msg->type, &msg->len);
+ vec = (struct kvec *)(req + 1);
+ vec->iov_len = msg->len;
+ vec->iov_base = msg + 1;
- mutex_unlock(&xs_state.request_mutex);
+ req->vec = vec;
+ req->num_vecs = 1;
+ req->cb = xenbus_dev_queue_reply;
+ req->par = par;
- if ((msg->type == XS_TRANSACTION_END) ||
- ((type == XS_TRANSACTION_START) && (msg->type == XS_ERROR)))
- transaction_end();
+ xs_send(req, msg);
- return ret;
+ return 0;
}
EXPORT_SYMBOL(xenbus_dev_request_and_reply);
@@ -264,37 +296,31 @@ static void *xs_talkv(struct xenbus_transaction t,
unsigned int num_vecs,
unsigned int *len)
{
+ struct xb_req_data *req;
struct xsd_sockmsg msg;
void *ret = NULL;
unsigned int i;
int err;
+ req = kmalloc(sizeof(*req), GFP_NOIO | __GFP_HIGH);
+ if (!req)
+ return ERR_PTR(-ENOMEM);
+
+ req->vec = iovec;
+ req->num_vecs = num_vecs;
+ req->cb = xs_wake_up;
+
msg.tx_id = t.id;
- msg.req_id = 0;
msg.type = type;
msg.len = 0;
for (i = 0; i < num_vecs; i++)
msg.len += iovec[i].iov_len;
- mutex_lock(&xs_state.request_mutex);
-
- err = xb_write(&msg, sizeof(msg));
- if (err) {
- mutex_unlock(&xs_state.request_mutex);
- return ERR_PTR(err);
- }
-
- for (i = 0; i < num_vecs; i++) {
- err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
- if (err) {
- mutex_unlock(&xs_state.request_mutex);
- return ERR_PTR(err);
- }
- }
-
- ret = read_reply(&msg.type, len);
+ xs_send(req, &msg);
- mutex_unlock(&xs_state.request_mutex);
+ ret = xs_wait_for_reply(req, &msg);
+ if (len)
+ *len = msg.len;
if (IS_ERR(ret))
return ret;
@@ -501,13 +527,9 @@ int xenbus_transaction_start(struct xenbus_transaction *t)
{
char *id_str;
- transaction_start();
-
id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL);
- if (IS_ERR(id_str)) {
- transaction_end();
+ if (IS_ERR(id_str))
return PTR_ERR(id_str);
- }
t->id = simple_strtoul(id_str, NULL, 0);
kfree(id_str);
@@ -521,18 +543,13 @@ EXPORT_SYMBOL_GPL(xenbus_transaction_start);
int xenbus_transaction_end(struct xenbus_transaction t, int abort)
{
char abortstr[2];
- int err;
if (abort)
strcpy(abortstr, "F");
else
strcpy(abortstr, "T");
- err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL));
-
- transaction_end();
-
- return err;
+ return xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL));
}
EXPORT_SYMBOL_GPL(xenbus_transaction_end);
@@ -665,6 +682,30 @@ static struct xenbus_watch *find_watch(const char *token)
return NULL;
}
+
+int xs_watch_msg(struct xs_watch_event *event)
+{
+ if (count_strings(event->body, event->len) != 2) {
+ kfree(event);
+ return -EINVAL;
+ }
+ event->path = (const char *)event->body;
+ event->token = (const char *)strchr(event->body, '\0') + 1;
+
+ spin_lock(&watches_lock);
+ event->handle = find_watch(event->token);
+ if (event->handle != NULL) {
+ spin_lock(&watch_events_lock);
+ list_add_tail(&event->list, &watch_events);
+ wake_up(&watch_events_waitq);
+ spin_unlock(&watch_events_lock);
+ } else
+ kfree(event);
+ spin_unlock(&watches_lock);
+
+ return 0;
+}
+
/*
* Certain older XenBus toolstack cannot handle reading values that are
* not populated. Some Xen 3.4 installation are incapable of doing this
@@ -713,7 +754,7 @@ int register_xenbus_watch(struct xenbus_watch *watch)
sprintf(token, "%lX", (long)watch);
- down_read(&xs_state.watch_mutex);
+ down_read(&xs_watch_rwsem);
spin_lock(&watches_lock);
BUG_ON(find_watch(token));
@@ -728,7 +769,7 @@ int register_xenbus_watch(struct xenbus_watch *watch)
spin_unlock(&watches_lock);
}
- up_read(&xs_state.watch_mutex);
+ up_read(&xs_watch_rwsem);
return err;
}
@@ -736,13 +777,13 @@ EXPORT_SYMBOL_GPL(register_xenbus_watch);
void unregister_xenbus_watch(struct xenbus_watch *watch)
{
- struct xs_stored_msg *msg, *tmp;
+ struct xs_watch_event *event, *tmp;
char token[sizeof(watch) * 2 + 1];
int err;
sprintf(token, "%lX", (long)watch);
- down_read(&xs_state.watch_mutex);
+ down_read(&xs_watch_rwsem);
spin_lock(&watches_lock);
BUG_ON(!find_watch(token));
@@ -753,7 +794,7 @@ void unregister_xenbus_watch(struct xenbus_watch *watch)
if (err)
pr_warn("Failed to release watch %s: %i\n", watch->node, err);
- up_read(&xs_state.watch_mutex);
+ up_read(&xs_watch_rwsem);
/* Make sure there are no callbacks running currently (unless
its us) */
@@ -762,12 +803,11 @@ void unregister_xenbus_watch(struct xenbus_watch *watch)
/* Cancel pending watch events. */
spin_lock(&watch_events_lock);
- list_for_each_entry_safe(msg, tmp, &watch_events, list) {
- if (msg->u.watch.handle != watch)
+ list_for_each_entry_safe(event, tmp, &watch_events, list) {
+ if (event->handle != watch)
continue;
- list_del(&msg->list);
- kfree(msg->u.watch.vec);
- kfree(msg);
+ list_del(&event->list);
+ kfree(event);
}
spin_unlock(&watch_events_lock);
@@ -778,10 +818,10 @@ EXPORT_SYMBOL_GPL(unregister_xenbus_watch);
void xs_suspend(void)
{
- transaction_suspend();
- down_write(&xs_state.watch_mutex);
- mutex_lock(&xs_state.request_mutex);
- mutex_lock(&xs_state.response_mutex);
+ xs_suspend_enter();
+
+ down_write(&xs_watch_rwsem);
+ mutex_lock(&xs_response_mutex);
}
void xs_resume(void)
@@ -791,31 +831,31 @@ void xs_resume(void)
xb_init_comms();
- mutex_unlock(&xs_state.response_mutex);
- mutex_unlock(&xs_state.request_mutex);
- transaction_resume();
+ mutex_unlock(&xs_response_mutex);
+
+ xs_suspend_exit();
- /* No need for watches_lock: the watch_mutex is sufficient. */
+ /* No need for watches_lock: the xs_watch_rwsem is sufficient. */
list_for_each_entry(watch, &watches, list) {
sprintf(token, "%lX", (long)watch);
xs_watch(watch->node, token);
}
- up_write(&xs_state.watch_mutex);
+ up_write(&xs_watch_rwsem);
}
void xs_suspend_cancel(void)
{
- mutex_unlock(&xs_state.response_mutex);
- mutex_unlock(&xs_state.request_mutex);
- up_write(&xs_state.watch_mutex);
- mutex_unlock(&xs_state.transaction_mutex);
+ mutex_unlock(&xs_response_mutex);
+ up_write(&xs_watch_rwsem);
+
+ xs_suspend_exit();
}
static int xenwatch_thread(void *unused)
{
struct list_head *ent;
- struct xs_stored_msg *msg;
+ struct xs_watch_event *event;
for (;;) {
wait_event_interruptible(watch_events_waitq,
@@ -833,13 +873,10 @@ static int xenwatch_thread(void *unused)
spin_unlock(&watch_events_lock);
if (ent != &watch_events) {
- msg = list_entry(ent, struct xs_stored_msg, list);
- msg->u.watch.handle->callback(
- msg->u.watch.handle,
- (const char **)msg->u.watch.vec,
- msg->u.watch.vec_size);
- kfree(msg->u.watch.vec);
- kfree(msg);
+ event = list_entry(ent, struct xs_watch_event, list);
+ event->handle->callback(event->handle, event->path,
+ event->token);
+ kfree(event);
}
mutex_unlock(&xenwatch_mutex);
@@ -848,126 +885,37 @@ static int xenwatch_thread(void *unused)
return 0;
}
-static int process_msg(void)
+/*
+ * Wake up all threads waiting for a xenstore reply. In case of shutdown all
+ * pending replies will be marked as "aborted" in order to let the waiters
+ * return in spite of xenstore possibly no longer being able to reply. This
+ * will avoid blocking shutdown by a thread waiting for xenstore but being
+ * necessary for shutdown processing to proceed.
+ */
+static int xs_reboot_notify(struct notifier_block *nb,
+ unsigned long code, void *unused)
{
- struct xs_stored_msg *msg;
- char *body;
- int err;
-
- /*
- * We must disallow save/restore while reading a xenstore message.
- * A partial read across s/r leaves us out of sync with xenstored.
- */
- for (;;) {
- err = xb_wait_for_data_to_read();
- if (err)
- return err;
- mutex_lock(&xs_state.response_mutex);
- if (xb_data_to_read())
- break;
- /* We raced with save/restore: pending data 'disappeared'. */
- mutex_unlock(&xs_state.response_mutex);
- }
-
-
- msg = kmalloc(sizeof(*msg), GFP_NOIO | __GFP_HIGH);
- if (msg == NULL) {
- err = -ENOMEM;
- goto out;
- }
-
- err = xb_read(&msg->hdr, sizeof(msg->hdr));
- if (err) {
- kfree(msg);
- goto out;
- }
-
- if (msg->hdr.len > XENSTORE_PAYLOAD_MAX) {
- kfree(msg);
- err = -EINVAL;
- goto out;
- }
-
- body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH);
- if (body == NULL) {
- kfree(msg);
- err = -ENOMEM;
- goto out;
- }
-
- err = xb_read(body, msg->hdr.len);
- if (err) {
- kfree(body);
- kfree(msg);
- goto out;
- }
- body[msg->hdr.len] = '\0';
-
- if (msg->hdr.type == XS_WATCH_EVENT) {
- msg->u.watch.vec = split(body, msg->hdr.len,
- &msg->u.watch.vec_size);
- if (IS_ERR(msg->u.watch.vec)) {
- err = PTR_ERR(msg->u.watch.vec);
- kfree(msg);
- goto out;
- }
-
- spin_lock(&watches_lock);
- msg->u.watch.handle = find_watch(
- msg->u.watch.vec[XS_WATCH_TOKEN]);
- if (msg->u.watch.handle != NULL) {
- spin_lock(&watch_events_lock);
- list_add_tail(&msg->list, &watch_events);
- wake_up(&watch_events_waitq);
- spin_unlock(&watch_events_lock);
- } else {
- kfree(msg->u.watch.vec);
- kfree(msg);
- }
- spin_unlock(&watches_lock);
- } else {
- msg->u.reply.body = body;
- spin_lock(&xs_state.reply_lock);
- list_add_tail(&msg->list, &xs_state.reply_list);
- spin_unlock(&xs_state.reply_lock);
- wake_up(&xs_state.reply_waitq);
- }
+ struct xb_req_data *req;
- out:
- mutex_unlock(&xs_state.response_mutex);
- return err;
+ mutex_lock(&xb_write_mutex);
+ list_for_each_entry(req, &xs_reply_list, list)
+ wake_up(&req->wq);
+ list_for_each_entry(req, &xb_write_list, list)
+ wake_up(&req->wq);
+ mutex_unlock(&xb_write_mutex);
+ return NOTIFY_DONE;
}
-static int xenbus_thread(void *unused)
-{
- int err;
-
- for (;;) {
- err = process_msg();
- if (err)
- pr_warn("error %d while reading message\n", err);
- if (kthread_should_stop())
- break;
- }
-
- return 0;
-}
+static struct notifier_block xs_reboot_nb = {
+ .notifier_call = xs_reboot_notify,
+};
int xs_init(void)
{
int err;
struct task_struct *task;
- INIT_LIST_HEAD(&xs_state.reply_list);
- spin_lock_init(&xs_state.reply_lock);
- init_waitqueue_head(&xs_state.reply_waitq);
-
- mutex_init(&xs_state.request_mutex);
- mutex_init(&xs_state.response_mutex);
- mutex_init(&xs_state.transaction_mutex);
- init_rwsem(&xs_state.watch_mutex);
- atomic_set(&xs_state.transaction_count, 0);
- init_waitqueue_head(&xs_state.transaction_wq);
+ register_reboot_notifier(&xs_reboot_nb);
/* Initialize the shared memory rings to talk to xenstored */
err = xb_init_comms();
@@ -979,10 +927,6 @@ int xs_init(void)
return PTR_ERR(task);
xenwatch_pid = task->pid;
- task = kthread_run(xenbus_thread, NULL, "xenbus");
- if (IS_ERR(task))
- return PTR_ERR(task);
-
/* shutdown watches for kexec boot */
xs_reset_watches();
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index 8559a71f36b1..328c3987b112 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -16,10 +16,10 @@
#include <linux/magic.h>
#include <xen/xen.h>
+#include <xen/xenbus.h>
#include "xenfs.h"
#include "../privcmd.h"
-#include "../xenbus/xenbus_comms.h"
#include <asm/xen/hypervisor.h>
diff --git a/drivers/xen/xenfs/xenstored.c b/drivers/xen/xenfs/xenstored.c
index fef20dbc6a5c..82fd2a396d96 100644
--- a/drivers/xen/xenfs/xenstored.c
+++ b/drivers/xen/xenfs/xenstored.c
@@ -4,9 +4,9 @@
#include <linux/fs.h>
#include <xen/page.h>
+#include <xen/xenbus.h>
#include "xenfs.h"
-#include "../xenbus/xenbus_comms.h"
static ssize_t xsd_read(struct file *file, char __user *buf,
size_t size, loff_t *off)