summaryrefslogtreecommitdiff
path: root/drivers/xen
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/Kconfig3
-rw-r--r--drivers/xen/Makefile2
-rw-r--r--drivers/xen/biomerge.c13
-rw-r--r--drivers/xen/events.c610
-rw-r--r--drivers/xen/pci.c117
-rw-r--r--drivers/xen/xenbus/xenbus_client.c2
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c33
-rw-r--r--drivers/xen/xenfs/Makefile3
-rw-r--r--drivers/xen/xenfs/privcmd.c404
-rw-r--r--drivers/xen/xenfs/super.c103
-rw-r--r--drivers/xen/xenfs/xenfs.h3
-rw-r--r--drivers/xen/xenfs/xenstored.c68
12 files changed, 1295 insertions, 66 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 60d71e9abe9f..6e6180ccd726 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -74,6 +74,7 @@ config XEN_PLATFORM_PCI
config SWIOTLB_XEN
def_bool y
- depends on SWIOTLB
+ depends on PCI
+ select SWIOTLB
endmenu
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index fcaf838f54be..eb8a78d77d9d 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -4,6 +4,7 @@ obj-y += xenbus/
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_features.o := $(nostackp)
+obj-$(CONFIG_BLOCK) += biomerge.o
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
obj-$(CONFIG_XEN_BALLOON) += balloon.o
@@ -12,3 +13,4 @@ obj-$(CONFIG_XENFS) += xenfs/
obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
+obj-$(CONFIG_XEN_DOM0) += pci.o
diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c
new file mode 100644
index 000000000000..ba6eda4b5143
--- /dev/null
+++ b/drivers/xen/biomerge.c
@@ -0,0 +1,13 @@
+#include <linux/bio.h>
+#include <linux/io.h>
+#include <xen/page.h>
+
+bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
+ const struct bio_vec *vec2)
+{
+ unsigned long mfn1 = pfn_to_mfn(page_to_pfn(vec1->bv_page));
+ unsigned long mfn2 = pfn_to_mfn(page_to_pfn(vec2->bv_page));
+
+ return __BIOVEC_PHYS_MERGEABLE(vec1, vec2) &&
+ ((mfn1 == mfn2) || ((mfn1+1) == mfn2));
+}
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 7d24b0d94ed4..97612f548a8e 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -16,7 +16,7 @@
* (typically dom0).
* 2. VIRQs, typically used for timers. These are per-cpu events.
* 3. IPIs.
- * 4. Hardware interrupts. Not supported at present.
+ * 4. PIRQs - Hardware interrupts.
*
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
*/
@@ -28,12 +28,16 @@
#include <linux/string.h>
#include <linux/bootmem.h>
#include <linux/slab.h>
+#include <linux/irqnr.h>
+#include <linux/pci.h>
#include <asm/desc.h>
#include <asm/ptrace.h>
#include <asm/irq.h>
#include <asm/idle.h>
+#include <asm/io_apic.h>
#include <asm/sync_bitops.h>
+#include <asm/xen/pci.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
@@ -73,7 +77,8 @@ enum xen_irq_type {
* event channel - irq->event channel mapping
* cpu - cpu this event channel is bound to
* index - type-specific information:
- * PIRQ - vector, with MSB being "needs EIO"
+ * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
+ * guest, or GSI (real passthrough IRQ) of the device.
* VIRQ - virq number
* IPI - IPI vector
* EVTCHN -
@@ -88,21 +93,30 @@ struct irq_info
unsigned short virq;
enum ipi_vector ipi;
struct {
+ unsigned short pirq;
unsigned short gsi;
- unsigned short vector;
+ unsigned char vector;
+ unsigned char flags;
} pirq;
} u;
};
+#define PIRQ_NEEDS_EOI (1 << 0)
+#define PIRQ_SHAREABLE (1 << 1)
-static struct irq_info irq_info[NR_IRQS];
+static struct irq_info *irq_info;
+static int *pirq_to_irq;
+static int nr_pirqs;
-static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
- [0 ... NR_EVENT_CHANNELS-1] = -1
-};
+static int *evtchn_to_irq;
struct cpu_evtchn_s {
unsigned long bits[NR_EVENT_CHANNELS/BITS_PER_LONG];
};
-static struct cpu_evtchn_s *cpu_evtchn_mask_p;
+
+static __initdata struct cpu_evtchn_s init_evtchn_mask = {
+ .bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul,
+};
+static struct cpu_evtchn_s *cpu_evtchn_mask_p = &init_evtchn_mask;
+
static inline unsigned long *cpu_evtchn_mask(int cpu)
{
return cpu_evtchn_mask_p[cpu].bits;
@@ -113,6 +127,7 @@ static inline unsigned long *cpu_evtchn_mask(int cpu)
static struct irq_chip xen_dynamic_chip;
static struct irq_chip xen_percpu_chip;
+static struct irq_chip xen_pirq_chip;
/* Constructor for packed IRQ information. */
static struct irq_info mk_unbound_info(void)
@@ -138,11 +153,12 @@ static struct irq_info mk_virq_info(unsigned short evtchn, unsigned short virq)
.cpu = 0, .u.virq = virq };
}
-static struct irq_info mk_pirq_info(unsigned short evtchn,
+static struct irq_info mk_pirq_info(unsigned short evtchn, unsigned short pirq,
unsigned short gsi, unsigned short vector)
{
return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn,
- .cpu = 0, .u.pirq = { .gsi = gsi, .vector = vector } };
+ .cpu = 0,
+ .u.pirq = { .pirq = pirq, .gsi = gsi, .vector = vector } };
}
/*
@@ -184,6 +200,16 @@ static unsigned virq_from_irq(unsigned irq)
return info->u.virq;
}
+static unsigned pirq_from_irq(unsigned irq)
+{
+ struct irq_info *info = info_for_irq(irq);
+
+ BUG_ON(info == NULL);
+ BUG_ON(info->type != IRQT_PIRQ);
+
+ return info->u.pirq.pirq;
+}
+
static unsigned gsi_from_irq(unsigned irq)
{
struct irq_info *info = info_for_irq(irq);
@@ -225,6 +251,15 @@ static unsigned int cpu_from_evtchn(unsigned int evtchn)
return ret;
}
+static bool pirq_needs_eoi(unsigned irq)
+{
+ struct irq_info *info = info_for_irq(irq);
+
+ BUG_ON(info->type != IRQT_PIRQ);
+
+ return info->u.pirq.flags & PIRQ_NEEDS_EOI;
+}
+
static inline unsigned long active_evtchns(unsigned int cpu,
struct shared_info *sh,
unsigned int idx)
@@ -261,7 +296,7 @@ static void init_evtchn_cpu_bindings(void)
}
#endif
- memset(cpu_evtchn_mask(0), ~0, sizeof(cpu_evtchn_mask(0)));
+ memset(cpu_evtchn_mask(0), ~0, sizeof(struct cpu_evtchn_s));
}
static inline void clear_evtchn(int port)
@@ -336,12 +371,40 @@ static void unmask_evtchn(int port)
put_cpu();
}
+static int get_nr_hw_irqs(void)
+{
+ int ret = 1;
+
+#ifdef CONFIG_X86_IO_APIC
+ ret = get_nr_irqs_gsi();
+#endif
+
+ return ret;
+}
+
+/* callers of this function should make sure that PHYSDEVOP_get_nr_pirqs
+ * succeeded otherwise nr_pirqs won't hold the right value */
+static int find_unbound_pirq(void)
+{
+ int i;
+ for (i = nr_pirqs-1; i >= 0; i--) {
+ if (pirq_to_irq[i] < 0)
+ return i;
+ }
+ return -1;
+}
+
static int find_unbound_irq(void)
{
struct irq_data *data;
int irq, res;
+ int start = get_nr_hw_irqs();
- for (irq = 0; irq < nr_irqs; irq++) {
+ if (start == nr_irqs)
+ goto no_irqs;
+
+ /* nr_irqs is a magic value. Must not use it.*/
+ for (irq = nr_irqs-1; irq > start; irq--) {
data = irq_get_irq_data(irq);
/* only 0->15 have init'd desc; handle irq > 16 */
if (!data)
@@ -354,8 +417,8 @@ static int find_unbound_irq(void)
return irq;
}
- if (irq == nr_irqs)
- panic("No available IRQ to bind to: increase nr_irqs!\n");
+ if (irq == start)
+ goto no_irqs;
res = irq_alloc_desc_at(irq, 0);
@@ -363,6 +426,357 @@ static int find_unbound_irq(void)
return -1;
return irq;
+
+no_irqs:
+ panic("No available IRQ to bind to: increase nr_irqs!\n");
+}
+
+static bool identity_mapped_irq(unsigned irq)
+{
+ /* identity map all the hardware irqs */
+ return irq < get_nr_hw_irqs();
+}
+
+static void pirq_unmask_notify(int irq)
+{
+ struct physdev_eoi eoi = { .irq = pirq_from_irq(irq) };
+
+ if (unlikely(pirq_needs_eoi(irq))) {
+ int rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
+ WARN_ON(rc);
+ }
+}
+
+static void pirq_query_unmask(int irq)
+{
+ struct physdev_irq_status_query irq_status;
+ struct irq_info *info = info_for_irq(irq);
+
+ BUG_ON(info->type != IRQT_PIRQ);
+
+ irq_status.irq = pirq_from_irq(irq);
+ if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
+ irq_status.flags = 0;
+
+ info->u.pirq.flags &= ~PIRQ_NEEDS_EOI;
+ if (irq_status.flags & XENIRQSTAT_needs_eoi)
+ info->u.pirq.flags |= PIRQ_NEEDS_EOI;
+}
+
+static bool probing_irq(int irq)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ return desc && desc->action == NULL;
+}
+
+static unsigned int startup_pirq(unsigned int irq)
+{
+ struct evtchn_bind_pirq bind_pirq;
+ struct irq_info *info = info_for_irq(irq);
+ int evtchn = evtchn_from_irq(irq);
+ int rc;
+
+ BUG_ON(info->type != IRQT_PIRQ);
+
+ if (VALID_EVTCHN(evtchn))
+ goto out;
+
+ bind_pirq.pirq = pirq_from_irq(irq);
+ /* NB. We are happy to share unless we are probing. */
+ bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
+ BIND_PIRQ__WILL_SHARE : 0;
+ rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
+ if (rc != 0) {
+ if (!probing_irq(irq))
+ printk(KERN_INFO "Failed to obtain physical IRQ %d\n",
+ irq);
+ return 0;
+ }
+ evtchn = bind_pirq.port;
+
+ pirq_query_unmask(irq);
+
+ evtchn_to_irq[evtchn] = irq;
+ bind_evtchn_to_cpu(evtchn, 0);
+ info->evtchn = evtchn;
+
+out:
+ unmask_evtchn(evtchn);
+ pirq_unmask_notify(irq);
+
+ return 0;
+}
+
+static void shutdown_pirq(unsigned int irq)
+{
+ struct evtchn_close close;
+ struct irq_info *info = info_for_irq(irq);
+ int evtchn = evtchn_from_irq(irq);
+
+ BUG_ON(info->type != IRQT_PIRQ);
+
+ if (!VALID_EVTCHN(evtchn))
+ return;
+
+ mask_evtchn(evtchn);
+
+ close.port = evtchn;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+ BUG();
+
+ bind_evtchn_to_cpu(evtchn, 0);
+ evtchn_to_irq[evtchn] = -1;
+ info->evtchn = 0;
+}
+
+static void enable_pirq(unsigned int irq)
+{
+ startup_pirq(irq);
+}
+
+static void disable_pirq(unsigned int irq)
+{
+}
+
+static void ack_pirq(unsigned int irq)
+{
+ int evtchn = evtchn_from_irq(irq);
+
+ move_native_irq(irq);
+
+ if (VALID_EVTCHN(evtchn)) {
+ mask_evtchn(evtchn);
+ clear_evtchn(evtchn);
+ }
+}
+
+static void end_pirq(unsigned int irq)
+{
+ int evtchn = evtchn_from_irq(irq);
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ if (WARN_ON(!desc))
+ return;
+
+ if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
+ (IRQ_DISABLED|IRQ_PENDING)) {
+ shutdown_pirq(irq);
+ } else if (VALID_EVTCHN(evtchn)) {
+ unmask_evtchn(evtchn);
+ pirq_unmask_notify(irq);
+ }
+}
+
+static int find_irq_by_gsi(unsigned gsi)
+{
+ int irq;
+
+ for (irq = 0; irq < nr_irqs; irq++) {
+ struct irq_info *info = info_for_irq(irq);
+
+ if (info == NULL || info->type != IRQT_PIRQ)
+ continue;
+
+ if (gsi_from_irq(irq) == gsi)
+ return irq;
+ }
+
+ return -1;
+}
+
+int xen_allocate_pirq(unsigned gsi, int shareable, char *name)
+{
+ return xen_map_pirq_gsi(gsi, gsi, shareable, name);
+}
+
+/* xen_map_pirq_gsi might allocate irqs from the top down, as a
+ * consequence don't assume that the irq number returned has a low value
+ * or can be used as a pirq number unless you know otherwise.
+ *
+ * One notable exception is when xen_map_pirq_gsi is called passing an
+ * hardware gsi as argument, in that case the irq number returned
+ * matches the gsi number passed as second argument.
+ *
+ * Note: We don't assign an event channel until the irq actually started
+ * up. Return an existing irq if we've already got one for the gsi.
+ */
+int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
+{
+ int irq = 0;
+ struct physdev_irq irq_op;
+
+ spin_lock(&irq_mapping_update_lock);
+
+ if ((pirq > nr_pirqs) || (gsi > nr_irqs)) {
+ printk(KERN_WARNING "xen_map_pirq_gsi: %s %s is incorrect!\n",
+ pirq > nr_pirqs ? "nr_pirqs" :"",
+ gsi > nr_irqs ? "nr_irqs" : "");
+ goto out;
+ }
+
+ irq = find_irq_by_gsi(gsi);
+ if (irq != -1) {
+ printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n",
+ irq, gsi);
+ goto out; /* XXX need refcount? */
+ }
+
+ /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore
+ * we are using the !xen_initial_domain() to drop in the function.*/
+ if (identity_mapped_irq(gsi) || (!xen_initial_domain() &&
+ xen_pv_domain())) {
+ irq = gsi;
+ irq_alloc_desc_at(irq, 0);
+ } else
+ irq = find_unbound_irq();
+
+ set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
+ handle_level_irq, name);
+
+ irq_op.irq = irq;
+ irq_op.vector = 0;
+
+ /* Only the privileged domain can do this. For non-priv, the pcifront
+ * driver provides a PCI bus that does the call to do exactly
+ * this in the priv domain. */
+ if (xen_initial_domain() &&
+ HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
+ irq_free_desc(irq);
+ irq = -ENOSPC;
+ goto out;
+ }
+
+ irq_info[irq] = mk_pirq_info(0, pirq, gsi, irq_op.vector);
+ irq_info[irq].u.pirq.flags |= shareable ? PIRQ_SHAREABLE : 0;
+ pirq_to_irq[pirq] = irq;
+
+out:
+ spin_unlock(&irq_mapping_update_lock);
+
+ return irq;
+}
+
+#ifdef CONFIG_PCI_MSI
+#include <linux/msi.h>
+#include "../pci/msi.h"
+
+void xen_allocate_pirq_msi(char *name, int *irq, int *pirq)
+{
+ spin_lock(&irq_mapping_update_lock);
+
+ *irq = find_unbound_irq();
+ if (*irq == -1)
+ goto out;
+
+ *pirq = find_unbound_pirq();
+ if (*pirq == -1)
+ goto out;
+
+ set_irq_chip_and_handler_name(*irq, &xen_pirq_chip,
+ handle_level_irq, name);
+
+ irq_info[*irq] = mk_pirq_info(0, *pirq, 0, 0);
+ pirq_to_irq[*pirq] = *irq;
+
+out:
+ spin_unlock(&irq_mapping_update_lock);
+}
+
+int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
+{
+ int irq = -1;
+ struct physdev_map_pirq map_irq;
+ int rc;
+ int pos;
+ u32 table_offset, bir;
+
+ memset(&map_irq, 0, sizeof(map_irq));
+ map_irq.domid = DOMID_SELF;
+ map_irq.type = MAP_PIRQ_TYPE_MSI;
+ map_irq.index = -1;
+ map_irq.pirq = -1;
+ map_irq.bus = dev->bus->number;
+ map_irq.devfn = dev->devfn;
+
+ if (type == PCI_CAP_ID_MSIX) {
+ pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+
+ pci_read_config_dword(dev, msix_table_offset_reg(pos),
+ &table_offset);
+ bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
+
+ map_irq.table_base = pci_resource_start(dev, bir);
+ map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
+ }
+
+ spin_lock(&irq_mapping_update_lock);
+
+ irq = find_unbound_irq();
+
+ if (irq == -1)
+ goto out;
+
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
+ if (rc) {
+ printk(KERN_WARNING "xen map irq failed %d\n", rc);
+
+ irq_free_desc(irq);
+
+ irq = -1;
+ goto out;
+ }
+ irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index);
+
+ set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
+ handle_level_irq,
+ (type == PCI_CAP_ID_MSIX) ? "msi-x":"msi");
+
+out:
+ spin_unlock(&irq_mapping_update_lock);
+ return irq;
+}
+#endif
+
+int xen_destroy_irq(int irq)
+{
+ struct irq_desc *desc;
+ struct physdev_unmap_pirq unmap_irq;
+ struct irq_info *info = info_for_irq(irq);
+ int rc = -ENOENT;
+
+ spin_lock(&irq_mapping_update_lock);
+
+ desc = irq_to_desc(irq);
+ if (!desc)
+ goto out;
+
+ if (xen_initial_domain()) {
+ unmap_irq.pirq = info->u.pirq.gsi;
+ unmap_irq.domid = DOMID_SELF;
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
+ if (rc) {
+ printk(KERN_WARNING "unmap irq failed %d\n", rc);
+ goto out;
+ }
+ }
+ irq_info[irq] = mk_unbound_info();
+
+ irq_free_desc(irq);
+
+out:
+ spin_unlock(&irq_mapping_update_lock);
+ return rc;
+}
+
+int xen_vector_from_irq(unsigned irq)
+{
+ return vector_from_irq(irq);
+}
+
+int xen_gsi_from_irq(unsigned irq)
+{
+ return gsi_from_irq(irq);
}
int bind_evtchn_to_irq(unsigned int evtchn)
@@ -377,7 +791,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
irq = find_unbound_irq();
set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
- handle_edge_irq, "event");
+ handle_fasteoi_irq, "event");
evtchn_to_irq[evtchn] = irq;
irq_info[irq] = mk_evtchn_info(evtchn);
@@ -425,7 +839,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
}
-static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
+int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
{
struct evtchn_bind_virq bind_virq;
int evtchn, irq;
@@ -435,6 +849,11 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
irq = per_cpu(virq_to_irq, cpu)[virq];
if (irq == -1) {
+ irq = find_unbound_irq();
+
+ set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
+ handle_percpu_irq, "virq");
+
bind_virq.virq = virq;
bind_virq.vcpu = cpu;
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
@@ -442,11 +861,6 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
BUG();
evtchn = bind_virq.port;
- irq = find_unbound_irq();
-
- set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
- handle_percpu_irq, "virq");
-
evtchn_to_irq[evtchn] = irq;
irq_info[irq] = mk_virq_info(evtchn, virq);
@@ -578,41 +992,75 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
{
struct shared_info *sh = HYPERVISOR_shared_info;
int cpu = smp_processor_id();
+ unsigned long *cpu_evtchn = cpu_evtchn_mask(cpu);
int i;
unsigned long flags;
static DEFINE_SPINLOCK(debug_lock);
+ struct vcpu_info *v;
spin_lock_irqsave(&debug_lock, flags);
- printk("vcpu %d\n ", cpu);
+ printk("\nvcpu %d\n ", cpu);
for_each_online_cpu(i) {
- struct vcpu_info *v = per_cpu(xen_vcpu, i);
- printk("%d: masked=%d pending=%d event_sel %08lx\n ", i,
- (get_irq_regs() && i == cpu) ? xen_irqs_disabled(get_irq_regs()) : v->evtchn_upcall_mask,
- v->evtchn_upcall_pending,
- v->evtchn_pending_sel);
+ int pending;
+ v = per_cpu(xen_vcpu, i);
+ pending = (get_irq_regs() && i == cpu)
+ ? xen_irqs_disabled(get_irq_regs())
+ : v->evtchn_upcall_mask;
+ printk("%d: masked=%d pending=%d event_sel %0*lx\n ", i,
+ pending, v->evtchn_upcall_pending,
+ (int)(sizeof(v->evtchn_pending_sel)*2),
+ v->evtchn_pending_sel);
+ }
+ v = per_cpu(xen_vcpu, cpu);
+
+ printk("\npending:\n ");
+ for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
+ printk("%0*lx%s", (int)sizeof(sh->evtchn_pending[0])*2,
+ sh->evtchn_pending[i],
+ i % 8 == 0 ? "\n " : " ");
+ printk("\nglobal mask:\n ");
+ for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+ printk("%0*lx%s",
+ (int)(sizeof(sh->evtchn_mask[0])*2),
+ sh->evtchn_mask[i],
+ i % 8 == 0 ? "\n " : " ");
+
+ printk("\nglobally unmasked:\n ");
+ for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+ printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2),
+ sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
+ i % 8 == 0 ? "\n " : " ");
+
+ printk("\nlocal cpu%d mask:\n ", cpu);
+ for (i = (NR_EVENT_CHANNELS/BITS_PER_LONG)-1; i >= 0; i--)
+ printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2),
+ cpu_evtchn[i],
+ i % 8 == 0 ? "\n " : " ");
+
+ printk("\nlocally unmasked:\n ");
+ for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
+ unsigned long pending = sh->evtchn_pending[i]
+ & ~sh->evtchn_mask[i]
+ & cpu_evtchn[i];
+ printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2),
+ pending, i % 8 == 0 ? "\n " : " ");
}
- printk("pending:\n ");
- for(i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
- printk("%08lx%s", sh->evtchn_pending[i],
- i % 8 == 0 ? "\n " : " ");
- printk("\nmasks:\n ");
- for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
- printk("%08lx%s", sh->evtchn_mask[i],
- i % 8 == 0 ? "\n " : " ");
-
- printk("\nunmasked:\n ");
- for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
- printk("%08lx%s", sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
- i % 8 == 0 ? "\n " : " ");
printk("\npending list:\n");
- for(i = 0; i < NR_EVENT_CHANNELS; i++) {
+ for (i = 0; i < NR_EVENT_CHANNELS; i++) {
if (sync_test_bit(i, sh->evtchn_pending)) {
- printk(" %d: event %d -> irq %d\n",
+ int word_idx = i / BITS_PER_LONG;
+ printk(" %d: event %d -> irq %d%s%s%s\n",
cpu_from_evtchn(i), i,
- evtchn_to_irq[i]);
+ evtchn_to_irq[i],
+ sync_test_bit(word_idx, &v->evtchn_pending_sel)
+ ? "" : " l2-clear",
+ !sync_test_bit(i, sh->evtchn_mask)
+ ? "" : " globally-masked",
+ sync_test_bit(i, cpu_evtchn)
+ ? "" : " locally-masked");
}
}
@@ -663,6 +1111,9 @@ static void __xen_evtchn_do_upcall(void)
int irq = evtchn_to_irq[port];
struct irq_desc *desc;
+ mask_evtchn(port);
+ clear_evtchn(port);
+
if (irq != -1) {
desc = irq_to_desc(irq);
if (desc)
@@ -800,10 +1251,10 @@ static void ack_dynirq(unsigned int irq)
{
int evtchn = evtchn_from_irq(irq);
- move_native_irq(irq);
+ move_masked_irq(irq);
if (VALID_EVTCHN(evtchn))
- clear_evtchn(evtchn);
+ unmask_evtchn(evtchn);
}
static int retrigger_dynirq(unsigned int irq)
@@ -891,7 +1342,7 @@ void xen_clear_irq_pending(int irq)
if (VALID_EVTCHN(evtchn))
clear_evtchn(evtchn);
}
-
+EXPORT_SYMBOL(xen_clear_irq_pending);
void xen_set_irq_pending(int irq)
{
int evtchn = evtchn_from_irq(irq);
@@ -911,9 +1362,9 @@ bool xen_test_irq_pending(int irq)
return ret;
}
-/* Poll waiting for an irq to become pending. In the usual case, the
- irq will be disabled so it won't deliver an interrupt. */
-void xen_poll_irq(int irq)
+/* Poll waiting for an irq to become pending with timeout. In the usual case,
+ * the irq will be disabled so it won't deliver an interrupt. */
+void xen_poll_irq_timeout(int irq, u64 timeout)
{
evtchn_port_t evtchn = evtchn_from_irq(irq);
@@ -921,13 +1372,20 @@ void xen_poll_irq(int irq)
struct sched_poll poll;
poll.nr_ports = 1;
- poll.timeout = 0;
+ poll.timeout = timeout;
set_xen_guest_handle(poll.ports, &evtchn);
if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
BUG();
}
}
+EXPORT_SYMBOL(xen_poll_irq_timeout);
+/* Poll waiting for an irq to become pending. In the usual case, the
+ * irq will be disabled so it won't deliver an interrupt. */
+void xen_poll_irq(int irq)
+{
+ xen_poll_irq_timeout(irq, 0 /* no timeout */);
+}
void xen_irq_resume(void)
{
@@ -959,8 +1417,28 @@ static struct irq_chip xen_dynamic_chip __read_mostly = {
.mask = disable_dynirq,
.unmask = enable_dynirq,
- .ack = ack_dynirq,
+ .eoi = ack_dynirq,
+ .set_affinity = set_affinity_irq,
+ .retrigger = retrigger_dynirq,
+};
+
+static struct irq_chip xen_pirq_chip __read_mostly = {
+ .name = "xen-pirq",
+
+ .startup = startup_pirq,
+ .shutdown = shutdown_pirq,
+
+ .enable = enable_pirq,
+ .unmask = enable_pirq,
+
+ .disable = disable_pirq,
+ .mask = disable_pirq,
+
+ .ack = ack_pirq,
+ .end = end_pirq,
+
.set_affinity = set_affinity_irq,
+
.retrigger = retrigger_dynirq,
};
@@ -1014,11 +1492,32 @@ void xen_callback_vector(void) {}
void __init xen_init_IRQ(void)
{
- int i;
+ int i, rc;
+ struct physdev_nr_pirqs op_nr_pirqs;
cpu_evtchn_mask_p = kcalloc(nr_cpu_ids, sizeof(struct cpu_evtchn_s),
GFP_KERNEL);
- BUG_ON(cpu_evtchn_mask_p == NULL);
+ irq_info = kcalloc(nr_irqs, sizeof(*irq_info), GFP_KERNEL);
+
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_nr_pirqs, &op_nr_pirqs);
+ if (rc < 0) {
+ nr_pirqs = nr_irqs;
+ if (rc != -ENOSYS)
+ printk(KERN_WARNING "PHYSDEVOP_get_nr_pirqs returned rc=%d\n", rc);
+ } else {
+ if (xen_pv_domain() && !xen_initial_domain())
+ nr_pirqs = max((int)op_nr_pirqs.nr_pirqs, nr_irqs);
+ else
+ nr_pirqs = op_nr_pirqs.nr_pirqs;
+ }
+ pirq_to_irq = kcalloc(nr_pirqs, sizeof(*pirq_to_irq), GFP_KERNEL);
+ for (i = 0; i < nr_pirqs; i++)
+ pirq_to_irq[i] = -1;
+
+ evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
+ GFP_KERNEL);
+ for (i = 0; i < NR_EVENT_CHANNELS; i++)
+ evtchn_to_irq[i] = -1;
init_evtchn_cpu_bindings();
@@ -1029,7 +1528,12 @@ void __init xen_init_IRQ(void)
if (xen_hvm_domain()) {
xen_callback_vector();
native_init_IRQ();
+ /* pci_xen_hvm_init must be called after native_init_IRQ so that
+ * __acpi_register_gsi can point at the right function */
+ pci_xen_hvm_init();
} else {
irq_ctx_init(smp_processor_id());
+ if (xen_initial_domain())
+ xen_setup_pirqs();
}
}
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
new file mode 100644
index 000000000000..cef4bafc07dc
--- /dev/null
+++ b/drivers/xen/pci.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Weidong Han <weidong.han@intel.com>
+ */
+
+#include <linux/pci.h>
+#include <xen/xen.h>
+#include <xen/interface/physdev.h>
+#include <xen/interface/xen.h>
+
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+#include "../pci/pci.h"
+
+static int xen_add_device(struct device *dev)
+{
+ int r;
+ struct pci_dev *pci_dev = to_pci_dev(dev);
+
+#ifdef CONFIG_PCI_IOV
+ if (pci_dev->is_virtfn) {
+ struct physdev_manage_pci_ext manage_pci_ext = {
+ .bus = pci_dev->bus->number,
+ .devfn = pci_dev->devfn,
+ .is_virtfn = 1,
+ .physfn.bus = pci_dev->physfn->bus->number,
+ .physfn.devfn = pci_dev->physfn->devfn,
+ };
+
+ r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
+ &manage_pci_ext);
+ } else
+#endif
+ if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
+ struct physdev_manage_pci_ext manage_pci_ext = {
+ .bus = pci_dev->bus->number,
+ .devfn = pci_dev->devfn,
+ .is_extfn = 1,
+ };
+
+ r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
+ &manage_pci_ext);
+ } else {
+ struct physdev_manage_pci manage_pci = {
+ .bus = pci_dev->bus->number,
+ .devfn = pci_dev->devfn,
+ };
+
+ r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add,
+ &manage_pci);
+ }
+
+ return r;
+}
+
+static int xen_remove_device(struct device *dev)
+{
+ int r;
+ struct pci_dev *pci_dev = to_pci_dev(dev);
+ struct physdev_manage_pci manage_pci;
+
+ manage_pci.bus = pci_dev->bus->number;
+ manage_pci.devfn = pci_dev->devfn;
+
+ r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
+ &manage_pci);
+
+ return r;
+}
+
+static int xen_pci_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+ int r = 0;
+
+ switch (action) {
+ case BUS_NOTIFY_ADD_DEVICE:
+ r = xen_add_device(dev);
+ break;
+ case BUS_NOTIFY_DEL_DEVICE:
+ r = xen_remove_device(dev);
+ break;
+ default:
+ break;
+ }
+
+ return r;
+}
+
+struct notifier_block device_nb = {
+ .notifier_call = xen_pci_notifier,
+};
+
+static int __init register_xen_pci_notifier(void)
+{
+ if (!xen_initial_domain())
+ return 0;
+
+ return bus_register_notifier(&pci_bus_type, &device_nb);
+}
+
+arch_initcall(register_xen_pci_notifier);
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 7e49527189b6..cdacf923e073 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -50,6 +50,8 @@ const char *xenbus_strstate(enum xenbus_state state)
[ XenbusStateConnected ] = "Connected",
[ XenbusStateClosing ] = "Closing",
[ XenbusStateClosed ] = "Closed",
+ [XenbusStateReconfiguring] = "Reconfiguring",
+ [XenbusStateReconfigured] = "Reconfigured",
};
return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
}
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index d409495876f1..deb9c4ba3a93 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -64,9 +64,11 @@
int xen_store_evtchn;
-EXPORT_SYMBOL(xen_store_evtchn);
+EXPORT_SYMBOL_GPL(xen_store_evtchn);
struct xenstore_domain_interface *xen_store_interface;
+EXPORT_SYMBOL_GPL(xen_store_interface);
+
static unsigned long xen_store_mfn;
static BLOCKING_NOTIFIER_HEAD(xenstore_chain);
@@ -801,6 +803,7 @@ device_initcall(xenbus_probe_initcall);
static int __init xenbus_init(void)
{
int err = 0;
+ unsigned long page = 0;
DPRINTK("");
@@ -821,7 +824,31 @@ static int __init xenbus_init(void)
* Domain0 doesn't have a store_evtchn or store_mfn yet.
*/
if (xen_initial_domain()) {
- /* dom0 not yet supported */
+ struct evtchn_alloc_unbound alloc_unbound;
+
+ /* Allocate Xenstore page */
+ page = get_zeroed_page(GFP_KERNEL);
+ if (!page)
+ goto out_error;
+
+ xen_store_mfn = xen_start_info->store_mfn =
+ pfn_to_mfn(virt_to_phys((void *)page) >>
+ PAGE_SHIFT);
+
+ /* Next allocate a local port which xenstored can bind to */
+ alloc_unbound.dom = DOMID_SELF;
+ alloc_unbound.remote_dom = 0;
+
+ err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+ &alloc_unbound);
+ if (err == -ENOSYS)
+ goto out_error;
+
+ BUG_ON(err);
+ xen_store_evtchn = xen_start_info->store_evtchn =
+ alloc_unbound.port;
+
+ xen_store_interface = mfn_to_virt(xen_store_mfn);
} else {
if (xen_hvm_domain()) {
uint64_t v = 0;
@@ -867,6 +894,8 @@ static int __init xenbus_init(void)
bus_unregister(&xenbus_frontend.bus);
out_error:
+ if (page != 0)
+ free_page(page);
return err;
}
diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile
index 25275c3bbdff..4fde9440fe1f 100644
--- a/drivers/xen/xenfs/Makefile
+++ b/drivers/xen/xenfs/Makefile
@@ -1,3 +1,4 @@
obj-$(CONFIG_XENFS) += xenfs.o
-xenfs-objs = super.o xenbus.o \ No newline at end of file
+xenfs-y = super.o xenbus.o privcmd.o
+xenfs-$(CONFIG_XEN_DOM0) += xenstored.o
diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c
new file mode 100644
index 000000000000..f80be7f6eb95
--- /dev/null
+++ b/drivers/xen/xenfs/privcmd.c
@@ -0,0 +1,404 @@
+/******************************************************************************
+ * privcmd.c
+ *
+ * Interface to privileged domain-0 commands.
+ *
+ * Copyright (c) 2002-2004, K A Fraser, B Dragovic
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/uaccess.h>
+#include <linux/swap.h>
+#include <linux/smp_lock.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/seq_file.h>
+
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+#include <xen/xen.h>
+#include <xen/privcmd.h>
+#include <xen/interface/xen.h>
+#include <xen/features.h>
+#include <xen/page.h>
+#include <xen/xen-ops.h>
+
+#ifndef HAVE_ARCH_PRIVCMD_MMAP
+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
+#endif
+
+static long privcmd_ioctl_hypercall(void __user *udata)
+{
+ struct privcmd_hypercall hypercall;
+ long ret;
+
+ if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
+ return -EFAULT;
+
+ ret = privcmd_call(hypercall.op,
+ hypercall.arg[0], hypercall.arg[1],
+ hypercall.arg[2], hypercall.arg[3],
+ hypercall.arg[4]);
+
+ return ret;
+}
+
+static void free_page_list(struct list_head *pages)
+{
+ struct page *p, *n;
+
+ list_for_each_entry_safe(p, n, pages, lru)
+ __free_page(p);
+
+ INIT_LIST_HEAD(pages);
+}
+
+/*
+ * Given an array of items in userspace, return a list of pages
+ * containing the data. If copying fails, either because of memory
+ * allocation failure or a problem reading user memory, return an
+ * error code; its up to the caller to dispose of any partial list.
+ */
+static int gather_array(struct list_head *pagelist,
+ unsigned nelem, size_t size,
+ void __user *data)
+{
+ unsigned pageidx;
+ void *pagedata;
+ int ret;
+
+ if (size > PAGE_SIZE)
+ return 0;
+
+ pageidx = PAGE_SIZE;
+ pagedata = NULL; /* quiet, gcc */
+ while (nelem--) {
+ if (pageidx > PAGE_SIZE-size) {
+ struct page *page = alloc_page(GFP_KERNEL);
+
+ ret = -ENOMEM;
+ if (page == NULL)
+ goto fail;
+
+ pagedata = page_address(page);
+
+ list_add_tail(&page->lru, pagelist);
+ pageidx = 0;
+ }
+
+ ret = -EFAULT;
+ if (copy_from_user(pagedata + pageidx, data, size))
+ goto fail;
+
+ data += size;
+ pageidx += size;
+ }
+
+ ret = 0;
+
+fail:
+ return ret;
+}
+
+/*
+ * Call function "fn" on each element of the array fragmented
+ * over a list of pages.
+ */
+static int traverse_pages(unsigned nelem, size_t size,
+ struct list_head *pos,
+ int (*fn)(void *data, void *state),
+ void *state)
+{
+ void *pagedata;
+ unsigned pageidx;
+ int ret = 0;
+
+ BUG_ON(size > PAGE_SIZE);
+
+ pageidx = PAGE_SIZE;
+ pagedata = NULL; /* hush, gcc */
+
+ while (nelem--) {
+ if (pageidx > PAGE_SIZE-size) {
+ struct page *page;
+ pos = pos->next;
+ page = list_entry(pos, struct page, lru);
+ pagedata = page_address(page);
+ pageidx = 0;
+ }
+
+ ret = (*fn)(pagedata + pageidx, state);
+ if (ret)
+ break;
+ pageidx += size;
+ }
+
+ return ret;
+}
+
+struct mmap_mfn_state {
+ unsigned long va;
+ struct vm_area_struct *vma;
+ domid_t domain;
+};
+
+static int mmap_mfn_range(void *data, void *state)
+{
+ struct privcmd_mmap_entry *msg = data;
+ struct mmap_mfn_state *st = state;
+ struct vm_area_struct *vma = st->vma;
+ int rc;
+
+ /* Do not allow range to wrap the address space. */
+ if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
+ ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
+ return -EINVAL;
+
+ /* Range chunks must be contiguous in va space. */
+ if ((msg->va != st->va) ||
+ ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
+ return -EINVAL;
+
+ rc = xen_remap_domain_mfn_range(vma,
+ msg->va & PAGE_MASK,
+ msg->mfn, msg->npages,
+ vma->vm_page_prot,
+ st->domain);
+ if (rc < 0)
+ return rc;
+
+ st->va += msg->npages << PAGE_SHIFT;
+
+ return 0;
+}
+
+static long privcmd_ioctl_mmap(void __user *udata)
+{
+ struct privcmd_mmap mmapcmd;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int rc;
+ LIST_HEAD(pagelist);
+ struct mmap_mfn_state state;
+
+ if (!xen_initial_domain())
+ return -EPERM;
+
+ if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
+ return -EFAULT;
+
+ rc = gather_array(&pagelist,
+ mmapcmd.num, sizeof(struct privcmd_mmap_entry),
+ mmapcmd.entry);
+
+ if (rc || list_empty(&pagelist))
+ goto out;
+
+ down_write(&mm->mmap_sem);
+
+ {
+ struct page *page = list_first_entry(&pagelist,
+ struct page, lru);
+ struct privcmd_mmap_entry *msg = page_address(page);
+
+ vma = find_vma(mm, msg->va);
+ rc = -EINVAL;
+
+ if (!vma || (msg->va != vma->vm_start) ||
+ !privcmd_enforce_singleshot_mapping(vma))
+ goto out_up;
+ }
+
+ state.va = vma->vm_start;
+ state.vma = vma;
+ state.domain = mmapcmd.dom;
+
+ rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
+ &pagelist,
+ mmap_mfn_range, &state);
+
+
+out_up:
+ up_write(&mm->mmap_sem);
+
+out:
+ free_page_list(&pagelist);
+
+ return rc;
+}
+
+struct mmap_batch_state {
+ domid_t domain;
+ unsigned long va;
+ struct vm_area_struct *vma;
+ int err;
+
+ xen_pfn_t __user *user;
+};
+
+static int mmap_batch_fn(void *data, void *state)
+{
+ xen_pfn_t *mfnp = data;
+ struct mmap_batch_state *st = state;
+
+ if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
+ st->vma->vm_page_prot, st->domain) < 0) {
+ *mfnp |= 0xf0000000U;
+ st->err++;
+ }
+ st->va += PAGE_SIZE;
+
+ return 0;
+}
+
+static int mmap_return_errors(void *data, void *state)
+{
+ xen_pfn_t *mfnp = data;
+ struct mmap_batch_state *st = state;
+
+ put_user(*mfnp, st->user++);
+
+ return 0;
+}
+
+static struct vm_operations_struct privcmd_vm_ops;
+
+static long privcmd_ioctl_mmap_batch(void __user *udata)
+{
+ int ret;
+ struct privcmd_mmapbatch m;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long nr_pages;
+ LIST_HEAD(pagelist);
+ struct mmap_batch_state state;
+
+ if (!xen_initial_domain())
+ return -EPERM;
+
+ if (copy_from_user(&m, udata, sizeof(m)))
+ return -EFAULT;
+
+ nr_pages = m.num;
+ if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
+ return -EINVAL;
+
+ ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
+ m.arr);
+
+ if (ret || list_empty(&pagelist))
+ goto out;
+
+ down_write(&mm->mmap_sem);
+
+ vma = find_vma(mm, m.addr);
+ ret = -EINVAL;
+ if (!vma ||
+ vma->vm_ops != &privcmd_vm_ops ||
+ (m.addr != vma->vm_start) ||
+ ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
+ !privcmd_enforce_singleshot_mapping(vma)) {
+ up_write(&mm->mmap_sem);
+ goto out;
+ }
+
+ state.domain = m.dom;
+ state.vma = vma;
+ state.va = m.addr;
+ state.err = 0;
+
+ ret = traverse_pages(m.num, sizeof(xen_pfn_t),
+ &pagelist, mmap_batch_fn, &state);
+
+ up_write(&mm->mmap_sem);
+
+ if (state.err > 0) {
+ ret = 0;
+
+ state.user = m.arr;
+ traverse_pages(m.num, sizeof(xen_pfn_t),
+ &pagelist,
+ mmap_return_errors, &state);
+ }
+
+out:
+ free_page_list(&pagelist);
+
+ return ret;
+}
+
+static long privcmd_ioctl(struct file *file,
+ unsigned int cmd, unsigned long data)
+{
+ int ret = -ENOSYS;
+ void __user *udata = (void __user *) data;
+
+ switch (cmd) {
+ case IOCTL_PRIVCMD_HYPERCALL:
+ ret = privcmd_ioctl_hypercall(udata);
+ break;
+
+ case IOCTL_PRIVCMD_MMAP:
+ ret = privcmd_ioctl_mmap(udata);
+ break;
+
+ case IOCTL_PRIVCMD_MMAPBATCH:
+ ret = privcmd_ioctl_mmap_batch(udata);
+ break;
+
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+#ifndef HAVE_ARCH_PRIVCMD_MMAP
+static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
+ vma, vma->vm_start, vma->vm_end,
+ vmf->pgoff, vmf->virtual_address);
+
+ return VM_FAULT_SIGBUS;
+}
+
+static struct vm_operations_struct privcmd_vm_ops = {
+ .fault = privcmd_fault
+};
+
+static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ /* Unsupported for auto-translate guests. */
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return -ENOSYS;
+
+ /* DONTCOPY is essential for Xen as copy_page_range is broken. */
+ vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
+ vma->vm_ops = &privcmd_vm_ops;
+ vma->vm_private_data = NULL;
+
+ return 0;
+}
+
+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
+{
+ return (xchg(&vma->vm_private_data, (void *)1) == NULL);
+}
+#endif
+
+const struct file_operations privcmd_file_ops = {
+ .unlocked_ioctl = privcmd_ioctl,
+ .mmap = privcmd_mmap,
+};
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index bd96340063c1..f6339d11d59c 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -12,6 +12,8 @@
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/magic.h>
+#include <linux/mm.h>
+#include <linux/backing-dev.h>
#include <xen/xen.h>
@@ -22,6 +24,62 @@
MODULE_DESCRIPTION("Xen filesystem");
MODULE_LICENSE("GPL");
+static int xenfs_set_page_dirty(struct page *page)
+{
+ return !TestSetPageDirty(page);
+}
+
+static const struct address_space_operations xenfs_aops = {
+ .set_page_dirty = xenfs_set_page_dirty,
+};
+
+static struct backing_dev_info xenfs_backing_dev_info = {
+ .ra_pages = 0, /* No readahead */
+ .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
+};
+
+static struct inode *xenfs_make_inode(struct super_block *sb, int mode)
+{
+ struct inode *ret = new_inode(sb);
+
+ if (ret) {
+ ret->i_mode = mode;
+ ret->i_mapping->a_ops = &xenfs_aops;
+ ret->i_mapping->backing_dev_info = &xenfs_backing_dev_info;
+ ret->i_uid = ret->i_gid = 0;
+ ret->i_blocks = 0;
+ ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
+ }
+ return ret;
+}
+
+static struct dentry *xenfs_create_file(struct super_block *sb,
+ struct dentry *parent,
+ const char *name,
+ const struct file_operations *fops,
+ void *data,
+ int mode)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+
+ dentry = d_alloc_name(parent, name);
+ if (!dentry)
+ return NULL;
+
+ inode = xenfs_make_inode(sb, S_IFREG | mode);
+ if (!inode) {
+ dput(dentry);
+ return NULL;
+ }
+
+ inode->i_fop = fops;
+ inode->i_private = data;
+
+ d_add(dentry, inode);
+ return dentry;
+}
+
static ssize_t capabilities_read(struct file *file, char __user *buf,
size_t size, loff_t *off)
{
@@ -44,33 +102,60 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
[1] = {},
{ "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR },
{ "capabilities", &capabilities_file_ops, S_IRUGO },
+ { "privcmd", &privcmd_file_ops, S_IRUSR|S_IWUSR },
{""},
};
+ int rc;
- return simple_fill_super(sb, XENFS_SUPER_MAGIC, xenfs_files);
+ rc = simple_fill_super(sb, XENFS_SUPER_MAGIC, xenfs_files);
+ if (rc < 0)
+ return rc;
+
+ if (xen_initial_domain()) {
+ xenfs_create_file(sb, sb->s_root, "xsd_kva",
+ &xsd_kva_file_ops, NULL, S_IRUSR|S_IWUSR);
+ xenfs_create_file(sb, sb->s_root, "xsd_port",
+ &xsd_port_file_ops, NULL, S_IRUSR|S_IWUSR);
+ }
+
+ return rc;
}
-static int xenfs_get_sb(struct file_system_type *fs_type,
+static int xenfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
- return get_sb_single(fs_type, flags, data, xenfs_fill_super, mnt);
+ return mount_single(fs_type, flags, data, xenfs_fill_super);
}
static struct file_system_type xenfs_type = {
.owner = THIS_MODULE,
.name = "xenfs",
- .get_sb = xenfs_get_sb,
+ .mount = xenfs_mount,
.kill_sb = kill_litter_super,
};
static int __init xenfs_init(void)
{
- if (xen_domain())
- return register_filesystem(&xenfs_type);
+ int err;
+ if (!xen_domain()) {
+ printk(KERN_INFO "xenfs: not registering filesystem on non-xen platform\n");
+ return 0;
+ }
+
+ err = register_filesystem(&xenfs_type);
+ if (err) {
+ printk(KERN_ERR "xenfs: Unable to register filesystem!\n");
+ goto out;
+ }
+
+ err = bdi_init(&xenfs_backing_dev_info);
+ if (err)
+ unregister_filesystem(&xenfs_type);
+
+ out:
- printk(KERN_INFO "XENFS: not registering filesystem on non-xen platform\n");
- return 0;
+ return err;
}
static void __exit xenfs_exit(void)
diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
index 51f08b2d0bf1..b68aa6200003 100644
--- a/drivers/xen/xenfs/xenfs.h
+++ b/drivers/xen/xenfs/xenfs.h
@@ -2,5 +2,8 @@
#define _XENFS_XENBUS_H
extern const struct file_operations xenbus_file_ops;
+extern const struct file_operations privcmd_file_ops;
+extern const struct file_operations xsd_kva_file_ops;
+extern const struct file_operations xsd_port_file_ops;
#endif /* _XENFS_XENBUS_H */
diff --git a/drivers/xen/xenfs/xenstored.c b/drivers/xen/xenfs/xenstored.c
new file mode 100644
index 000000000000..fef20dbc6a5c
--- /dev/null
+++ b/drivers/xen/xenfs/xenstored.c
@@ -0,0 +1,68 @@
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+
+#include <xen/page.h>
+
+#include "xenfs.h"
+#include "../xenbus/xenbus_comms.h"
+
+static ssize_t xsd_read(struct file *file, char __user *buf,
+ size_t size, loff_t *off)
+{
+ const char *str = (const char *)file->private_data;
+ return simple_read_from_buffer(buf, size, off, str, strlen(str));
+}
+
+static int xsd_release(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
+
+static int xsd_kva_open(struct inode *inode, struct file *file)
+{
+ file->private_data = (void *)kasprintf(GFP_KERNEL, "0x%p",
+ xen_store_interface);
+ if (!file->private_data)
+ return -ENOMEM;
+ return 0;
+}
+
+static int xsd_kva_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ size_t size = vma->vm_end - vma->vm_start;
+
+ if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0))
+ return -EINVAL;
+
+ if (remap_pfn_range(vma, vma->vm_start,
+ virt_to_pfn(xen_store_interface),
+ size, vma->vm_page_prot))
+ return -EAGAIN;
+
+ return 0;
+}
+
+const struct file_operations xsd_kva_file_ops = {
+ .open = xsd_kva_open,
+ .mmap = xsd_kva_mmap,
+ .read = xsd_read,
+ .release = xsd_release,
+};
+
+static int xsd_port_open(struct inode *inode, struct file *file)
+{
+ file->private_data = (void *)kasprintf(GFP_KERNEL, "%d",
+ xen_store_evtchn);
+ if (!file->private_data)
+ return -ENOMEM;
+ return 0;
+}
+
+const struct file_operations xsd_port_file_ops = {
+ .open = xsd_port_open,
+ .read = xsd_read,
+ .release = xsd_release,
+};