From 7e77506a5918d82cafa2ffa783ab57c23f9e9817 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 30 Sep 2010 12:37:26 +0100 Subject: xen: implement XENMEM_machphys_mapping This hypercall allows Xen to specify a non-default location for the machine to physical mapping. This capability is used when running a 32 bit domain 0 on a 64 bit hypervisor to shrink the hypervisor hole to exactly the size required. [ Impact: add Xen hypercall definitions ] Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Stefano Stabellini --- include/xen/interface/memory.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/xen') diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index d7a6c13bde69..eac3ce153719 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -140,6 +140,19 @@ struct xen_machphys_mfn_list { }; DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); +/* + * Returns the location in virtual address space of the machine_to_phys + * mapping table. Architectures which do not have a m2p table, or which do not + * map it by default into guest address space, do not implement this command. + * arg == addr of xen_machphys_mapping_t. + */ +#define XENMEM_machphys_mapping 12 +struct xen_machphys_mapping { + unsigned long v_start, v_end; /* Start and end virtual addresses. */ + unsigned long max_mfn; /* Maximum MFN that can be looked up. */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t); + /* * Sets the GPFN at which a particular page appears in the specified guest's * pseudophysical address space. -- cgit v1.2.3 From 744f9f104ea262de1dc3e29265870c649f0d9473 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 13 Nov 2010 11:44:39 -0500 Subject: xen: fix header export to userspace scripts/headers_install.pl prevents "__user" from being exported to userspace headers, so just use compiler.h to make sure that __user is defined and avoid the error. unifdef: linux-next-20101112/xx64/usr/include/xen/privcmd.h.tmp: 79: Premature EOF (#if line 33 depth 1) Signed-off-by: Randy Dunlap Cc: Jeremy Fitzhardinge Cc: Konrad Rzeszutek Wilk Cc: xen-devel@lists.xensource.com (moderated for non-subscribers) Cc: virtualization@lists.osdl.org Cc: Tony Finch Signed-off-by: Jeremy Fitzhardinge --- include/xen/privcmd.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/xen') diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h index b42cdfd92fee..17857fb4d550 100644 --- a/include/xen/privcmd.h +++ b/include/xen/privcmd.h @@ -34,13 +34,10 @@ #define __LINUX_PUBLIC_PRIVCMD_H__ #include +#include typedef unsigned long xen_pfn_t; -#ifndef __user -#define __user -#endif - struct privcmd_hypercall { __u64 op; __u64 arg[5]; -- cgit v1.2.3 From 9be4d4575906af9698de660e477f949a076c87e1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 31 Aug 2010 15:01:16 -0700 Subject: xen: add extra pages to balloon Add extra pages in the pseudo-physical address space to the balloon so we can extend into them later. Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/balloon.c | 15 +++++++++++---- include/xen/page.h | 7 +++++++ 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include/xen') diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 500290b150bb..df26ee9caa68 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -119,7 +119,7 @@ static void scrub_page(struct page *page) } /* balloon_append: add the given page to the balloon. */ -static void balloon_append(struct page *page) +static void __balloon_append(struct page *page) { /* Lowmem is re-populated first, so highmem pages go at list tail. */ if (PageHighMem(page)) { @@ -130,7 +130,11 @@ static void balloon_append(struct page *page) list_add(&page->lru, &ballooned_pages); balloon_stats.balloon_low++; } +} +static void balloon_append(struct page *page) +{ + __balloon_append(page); totalram_pages--; } @@ -416,10 +420,13 @@ static int __init balloon_init(void) register_balloon(&balloon_sysdev); /* Initialise the balloon with excess memory space. */ - for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { + for (pfn = PFN_UP(xen_extra_mem_start); + pfn < PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size); + pfn++) { page = pfn_to_page(pfn); - if (!PageReserved(page)) - balloon_append(page); + /* totalram_pages doesn't include the boot-time + balloon extension, so don't subtract from it. */ + __balloon_append(page); } target_watch.callback = watch_target; diff --git a/include/xen/page.h b/include/xen/page.h index eaf85fab1263..0be36b976f4b 100644 --- a/include/xen/page.h +++ b/include/xen/page.h @@ -1 +1,8 @@ +#ifndef _XEN_PAGE_H +#define _XEN_PAGE_H + #include + +extern phys_addr_t xen_extra_mem_start, xen_extra_mem_size; + +#endif /* _XEN_PAGE_H */ -- cgit v1.2.3 From e5fc7345412d5e4758fcef55a74354c5cbefd61e Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 1 Dec 2010 14:51:44 +0000 Subject: xen: use PHYSDEVOP_get_free_pirq to implement find_unbound_pirq Use the new hypercall PHYSDEVOP_get_free_pirq to ask Xen to allocate a pirq. Remove the unsupported PHYSDEVOP_get_nr_pirqs hypercall to get the amount of pirq available. This fixes find_unbound_pirq that otherwise would return a number starting from nr_irqs that might very well be out of range in Xen. The symptom of this bug is that when you passthrough an MSI capable pci device to a PV on HVM guest, Linux would fail to enable MSIs on the device. Signed-off-by: Stefano Stabellini --- drivers/xen/events.c | 45 ++++++++++++++++++----------------------- include/xen/interface/physdev.h | 10 +++++++++ 2 files changed, 30 insertions(+), 25 deletions(-) (limited to 'include/xen') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 2811bb988ea0..7ab43c33f746 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -105,7 +105,6 @@ struct irq_info static struct irq_info *irq_info; static int *pirq_to_irq; -static int nr_pirqs; static int *evtchn_to_irq; struct cpu_evtchn_s { @@ -385,12 +384,17 @@ static int get_nr_hw_irqs(void) return ret; } -/* callers of this function should make sure that PHYSDEVOP_get_nr_pirqs - * succeeded otherwise nr_pirqs won't hold the right value */ -static int find_unbound_pirq(void) +static int find_unbound_pirq(int type) { - int i; - for (i = nr_pirqs-1; i >= 0; i--) { + int rc, i; + struct physdev_get_free_pirq op_get_free_pirq; + op_get_free_pirq.type = type; + + rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq); + if (!rc) + return op_get_free_pirq.pirq; + + for (i = 0; i < nr_irqs; i++) { if (pirq_to_irq[i] < 0) return i; } @@ -611,10 +615,10 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name) spin_lock(&irq_mapping_update_lock); - if ((pirq > nr_pirqs) || (gsi > nr_irqs)) { + if ((pirq > nr_irqs) || (gsi > nr_irqs)) { printk(KERN_WARNING "xen_map_pirq_gsi: %s %s is incorrect!\n", - pirq > nr_pirqs ? "nr_pirqs" :"", - gsi > nr_irqs ? "nr_irqs" : ""); + pirq > nr_irqs ? "pirq" :"", + gsi > nr_irqs ? "gsi" : ""); goto out; } @@ -672,7 +676,7 @@ void xen_allocate_pirq_msi(char *name, int *irq, int *pirq) if (*irq == -1) goto out; - *pirq = find_unbound_pirq(); + *pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI); if (*pirq == -1) goto out; @@ -1506,26 +1510,17 @@ void xen_callback_vector(void) {} void __init xen_init_IRQ(void) { - int i, rc; - struct physdev_nr_pirqs op_nr_pirqs; + int i; cpu_evtchn_mask_p = kcalloc(nr_cpu_ids, sizeof(struct cpu_evtchn_s), GFP_KERNEL); irq_info = kcalloc(nr_irqs, sizeof(*irq_info), GFP_KERNEL); - rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_nr_pirqs, &op_nr_pirqs); - if (rc < 0) { - nr_pirqs = nr_irqs; - if (rc != -ENOSYS) - printk(KERN_WARNING "PHYSDEVOP_get_nr_pirqs returned rc=%d\n", rc); - } else { - if (xen_pv_domain() && !xen_initial_domain()) - nr_pirqs = max((int)op_nr_pirqs.nr_pirqs, nr_irqs); - else - nr_pirqs = op_nr_pirqs.nr_pirqs; - } - pirq_to_irq = kcalloc(nr_pirqs, sizeof(*pirq_to_irq), GFP_KERNEL); - for (i = 0; i < nr_pirqs; i++) + /* We are using nr_irqs as the maximum number of pirq available but + * that number is actually chosen by Xen and we don't know exactly + * what it is. Be careful choosing high pirq numbers. */ + pirq_to_irq = kcalloc(nr_irqs, sizeof(*pirq_to_irq), GFP_KERNEL); + for (i = 0; i < nr_irqs; i++) pirq_to_irq[i] = -1; evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index 2b2c66c3df00..534cac89a77d 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -188,6 +188,16 @@ struct physdev_nr_pirqs { uint32_t nr_pirqs; }; +/* type is MAP_PIRQ_TYPE_GSI or MAP_PIRQ_TYPE_MSI + * the hypercall returns a free pirq */ +#define PHYSDEVOP_get_free_pirq 23 +struct physdev_get_free_pirq { + /* IN */ + int type; + /* OUT */ + uint32_t pirq; +}; + /* * Notify that some PIRQ-bound event channels have been unmasked. * ** This command is obsolete since interface version 0x00030202 and is ** -- cgit v1.2.3 From af42b8d12f8adec6711cb824549a0edac6a4ae8f Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 1 Dec 2010 14:51:44 +0000 Subject: xen: fix MSI setup and teardown for PV on HVM guests When remapping MSIs into pirqs for PV on HVM guests, qemu is responsible for doing the actual mapping and unmapping. We only give qemu the desired pirq number when we ask to do the mapping the first time, after that we should be reading back the pirq number from qemu every time we want to re-enable the MSI. This fixes a bug in xen_hvm_setup_msi_irqs that manifests itself when trying to enable the same MSI for the second time: the old MSI to pirq mapping is still valid at this point but xen_hvm_setup_msi_irqs would try to assign a new pirq anyway. A simple way to reproduce this bug is to assign an MSI capable network card to a PV on HVM guest, if the user brings down the corresponding ethernet interface and up again, Linux would fail to enable MSIs on the device. Signed-off-by: Stefano Stabellini --- arch/x86/pci/xen.c | 27 ++++++++++++++++++++------- drivers/xen/events.c | 24 +++++++++++++++++------- include/xen/events.h | 7 ++++++- 3 files changed, 43 insertions(+), 15 deletions(-) (limited to 'include/xen') diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index d7b5109f7a9c..25cd4a07d09f 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -70,6 +70,9 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, struct xen_pci_frontend_ops *xen_pci_frontend; EXPORT_SYMBOL_GPL(xen_pci_frontend); +#define XEN_PIRQ_MSI_DATA (MSI_DATA_TRIGGER_EDGE | \ + MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0)) + static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, struct msi_msg *msg) { @@ -83,12 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, MSI_ADDR_REDIRECTION_CPU | MSI_ADDR_DEST_ID(pirq); - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - /* delivery mode reserved */ - (3 << 8) | - MSI_DATA_VECTOR(0); + msg->data = XEN_PIRQ_MSI_DATA; } static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) @@ -98,8 +96,23 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) struct msi_msg msg; list_for_each_entry(msidesc, &dev->msi_list, list) { + __read_msi_msg(msidesc, &msg); + pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | + ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); + if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) { + xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? + "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ); + if (irq < 0) + goto error; + ret = set_irq_msi(irq, msidesc); + if (ret < 0) + goto error_while; + printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d" + " pirq=%d\n", irq, pirq); + return 0; + } xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? - "msi-x" : "msi", &irq, &pirq); + "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ)); if (irq < 0 || pirq < 0) goto error; printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq); diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 7ab43c33f746..f78945ce8aeb 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -668,17 +668,21 @@ out: #include #include "../pci/msi.h" -void xen_allocate_pirq_msi(char *name, int *irq, int *pirq) +void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc) { spin_lock(&irq_mapping_update_lock); - *irq = find_unbound_irq(); - if (*irq == -1) - goto out; + if (alloc & XEN_ALLOC_IRQ) { + *irq = find_unbound_irq(); + if (*irq == -1) + goto out; + } - *pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI); - if (*pirq == -1) - goto out; + if (alloc & XEN_ALLOC_PIRQ) { + *pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI); + if (*pirq == -1) + goto out; + } set_irq_chip_and_handler_name(*irq, &xen_pirq_chip, handle_level_irq, name); @@ -766,6 +770,7 @@ int xen_destroy_irq(int irq) printk(KERN_WARNING "unmap irq failed %d\n", rc); goto out; } + pirq_to_irq[info->u.pirq.pirq] = -1; } irq_info[irq] = mk_unbound_info(); @@ -786,6 +791,11 @@ int xen_gsi_from_irq(unsigned irq) return gsi_from_irq(irq); } +int xen_irq_from_pirq(unsigned pirq) +{ + return pirq_to_irq[pirq]; +} + int bind_evtchn_to_irq(unsigned int evtchn) { int irq; diff --git a/include/xen/events.h b/include/xen/events.h index 646dd17d3aa4..00f53ddcc062 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -76,7 +76,9 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name); #ifdef CONFIG_PCI_MSI /* Allocate an irq and a pirq to be used with MSIs. */ -void xen_allocate_pirq_msi(char *name, int *irq, int *pirq); +#define XEN_ALLOC_PIRQ (1 << 0) +#define XEN_ALLOC_IRQ (1 << 1) +void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc_mask); int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type); #endif @@ -89,4 +91,7 @@ int xen_vector_from_irq(unsigned pirq); /* Return gsi allocated to pirq */ int xen_gsi_from_irq(unsigned pirq); +/* Return irq from pirq */ +int xen_irq_from_pirq(unsigned pirq); + #endif /* _XEN_EVENTS_H */ -- cgit v1.2.3 From 667c78afaec0ac500908e191e8f236e9578d7b1f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 8 Dec 2010 12:39:12 -0800 Subject: xen: Provide a variant of __RING_SIZE() that is an integer constant expression Without this, gcc 4.5 won't compile xen-netfront and xen-blkfront, where this is being used to specify array sizes. Signed-off-by: Jan Beulich Signed-off-by: Jeremy Fitzhardinge Cc: Jens Axboe Cc: David Miller Cc: Stable Kernel Signed-off-by: Linus Torvalds --- drivers/block/xen-blkfront.c | 2 +- drivers/net/xen-netfront.c | 4 ++-- include/xen/interface/io/ring.h | 11 +++++++++-- 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include/xen') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 4f9e22f29138..657873e4328d 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -72,7 +72,7 @@ struct blk_shadow { static DEFINE_MUTEX(blkfront_mutex); static const struct block_device_operations xlvbd_block_fops; -#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) +#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) /* * We have one of these per vbd, whether ide, scsi or 'other'. They diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 458bb57914a3..cdbeec9f83ea 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -66,8 +66,8 @@ struct netfront_cb { #define GRANT_INVALID_REF 0 -#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE) -#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE) +#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE) +#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE) #define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) struct netfront_info { diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h index e8cbf431c8cc..75271b9a8f61 100644 --- a/include/xen/interface/io/ring.h +++ b/include/xen/interface/io/ring.h @@ -24,8 +24,15 @@ typedef unsigned int RING_IDX; * A ring contains as many entries as will fit, rounded down to the nearest * power of two (so we can mask with (size-1) to loop around). */ -#define __RING_SIZE(_s, _sz) \ - (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) +#define __CONST_RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ + sizeof(((struct _s##_sring *)0)->ring[0]))) + +/* + * The same for passing in an actual pointer instead of a name tag. + */ +#define __RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) /* * Macros to make the correct C datatypes for a new kind of ring. -- cgit v1.2.3