From 19001c8c5bfa032ed45b10dfe48e355f5df88c61 Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Mon, 9 Feb 2009 12:05:46 -0800 Subject: xen: Rename the balloon lock * xen_create_contiguous_region needs access to the balloon lock to ensure memory doesn't change under its feet, so expose the balloon lock * Change the name of the lock to xen_reservation_lock, to imply it's now less-specific usage. [ Impact: cleanup ] Signed-off-by: Alex Nixon Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk --- include/xen/interface/memory.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/xen') diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index af36ead16817..e6adce6bc75c 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -9,6 +9,8 @@ #ifndef __XEN_PUBLIC_MEMORY_H__ #define __XEN_PUBLIC_MEMORY_H__ +#include + /* * Increase or decrease the specified domain's memory reservation. Returns a * -ve errcode on failure, or the # extents successfully allocated or freed. @@ -142,4 +144,10 @@ struct xen_translate_gpfn_list { }; DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list); + +/* + * Prevent the balloon driver from changing the memory reservation + * during a driver critical region. + */ +extern spinlock_t xen_reservation_lock; #endif /* __XEN_PUBLIC_MEMORY_H__ */ -- cgit v1.2.3 From 08bbc9da92f7e44b9c208c6a1adba70c403b255e Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Mon, 9 Feb 2009 12:05:46 -0800 Subject: xen: Add xen_create_contiguous_region A memory region must be physically contiguous in order to be accessed through DMA. This patch adds xen_create_contiguous_region, which ensures a region of contiguous virtual memory is also physically contiguous. Based on Stephen Tweedie's port of the 2.6.18-xen version. Remove contiguous_bitmap[] as it's no longer needed. Ported from linux-2.6.18-xen.hg 707:e410857fd83c [ Impact: add Xen-internal API to make pages phys-contig ] Signed-off-by: Alex Nixon Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 201 +++++++++++++++++++++++++++++++++++++++++ include/xen/interface/memory.h | 42 +++++++++ include/xen/xen-ops.h | 6 ++ 3 files changed, 249 insertions(+) (limited to 'include/xen') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 9e0d82fc21e4..eb51402dd99a 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include @@ -2027,6 +2028,206 @@ void __init xen_init_mmu_ops(void) pv_mmu_ops = xen_mmu_ops; } +/* Protected by xen_reservation_lock. */ +#define MAX_CONTIG_ORDER 9 /* 2MB */ +static unsigned long discontig_frames[1< MAX_CONTIG_ORDER)) + return -ENOMEM; + + memset((void *) vstart, 0, PAGE_SIZE << order); + + vm_unmap_aliases(); + + spin_lock_irqsave(&xen_reservation_lock, flags); + + /* 1. Zap current PTEs, remembering MFNs. */ + xen_zap_pfn_range(vstart, order, in_frames, NULL); + + /* 2. Get a new contiguous memory extent. */ + out_frame = virt_to_pfn(vstart); + success = xen_exchange_memory(1UL << order, 0, in_frames, + 1, order, &out_frame, + address_bits); + + /* 3. Map the new extent in place of old pages. */ + if (success) + xen_remap_exchanged_ptes(vstart, order, NULL, out_frame); + else + xen_remap_exchanged_ptes(vstart, order, in_frames, 0); + + spin_unlock_irqrestore(&xen_reservation_lock, flags); + + return success ? 0 : -ENOMEM; +} +EXPORT_SYMBOL_GPL(xen_create_contiguous_region); + +void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) +{ + unsigned long *out_frames = discontig_frames, in_frame; + unsigned long flags; + int success; + + if (xen_feature(XENFEAT_auto_translated_physmap)) + return; + + if (unlikely(order > MAX_CONTIG_ORDER)) + return; + + memset((void *) vstart, 0, PAGE_SIZE << order); + + vm_unmap_aliases(); + + spin_lock_irqsave(&xen_reservation_lock, flags); + + /* 1. Find start MFN of contiguous extent. */ + in_frame = virt_to_mfn(vstart); + + /* 2. Zap current PTEs. */ + xen_zap_pfn_range(vstart, order, NULL, out_frames); + + /* 3. Do the exchange for non-contiguous MFNs. */ + success = xen_exchange_memory(1, order, &in_frame, 1UL << order, + 0, out_frames, 0); + + /* 4. Map new pages in place of old pages. */ + if (success) + xen_remap_exchanged_ptes(vstart, order, out_frames, 0); + else + xen_remap_exchanged_ptes(vstart, order, NULL, in_frame); + + spin_unlock_irqrestore(&xen_reservation_lock, flags); +} +EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); + #ifdef CONFIG_XEN_DEBUG_FS static struct dentry *d_mmu_debug; diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index e6adce6bc75c..d3938d3e71f8 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -54,6 +54,48 @@ struct xen_memory_reservation { }; DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation); +/* + * An atomic exchange of memory pages. If return code is zero then + * @out.extent_list provides GMFNs of the newly-allocated memory. + * Returns zero on complete success, otherwise a negative error code. + * On complete success then always @nr_exchanged == @in.nr_extents. + * On partial success @nr_exchanged indicates how much work was done. + */ +#define XENMEM_exchange 11 +struct xen_memory_exchange { + /* + * [IN] Details of memory extents to be exchanged (GMFN bases). + * Note that @in.address_bits is ignored and unused. + */ + struct xen_memory_reservation in; + + /* + * [IN/OUT] Details of new memory extents. + * We require that: + * 1. @in.domid == @out.domid + * 2. @in.nr_extents << @in.extent_order == + * @out.nr_extents << @out.extent_order + * 3. @in.extent_start and @out.extent_start lists must not overlap + * 4. @out.extent_start lists GPFN bases to be populated + * 5. @out.extent_start is overwritten with allocated GMFN bases + */ + struct xen_memory_reservation out; + + /* + * [OUT] Number of input extents that were successfully exchanged: + * 1. The first @nr_exchanged input extents were successfully + * deallocated. + * 2. The corresponding first entries in the output extent list correctly + * indicate the GMFNs that were successfully exchanged. + * 3. All other input and output extents are untouched. + * 4. If not all input exents are exchanged then the return code of this + * command will be non-zero. + * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER! + */ + unsigned long nr_exchanged; +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_exchange); /* * Returns the maximum machine frame number of mapped RAM in this system. * This command always succeeds (it never returns an error code). diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 883a21bba24b..d789c937c48a 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -14,4 +14,10 @@ void xen_mm_unpin_all(void); void xen_timer_resume(void); void xen_arch_resume(void); +extern unsigned long *xen_contiguous_bitmap; +int xen_create_contiguous_region(unsigned long vstart, unsigned int order, + unsigned int address_bits); + +void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order); + #endif /* INCLUDE_XEN_OPS_H */ -- cgit v1.2.3 From b097186fd29d5bc5a26d1ae87995821ffc27b66e Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 11 May 2010 10:05:49 -0400 Subject: swiotlb-xen: SWIOTLB library for Xen PV guest with PCI passthrough. This patchset: PV guests under Xen are running in an non-contiguous memory architecture. When PCI pass-through is utilized, this necessitates an IOMMU for translating bus (DMA) to virtual and vice-versa and also providing a mechanism to have contiguous pages for device drivers operations (say DMA operations). Specifically, under Xen the Linux idea of pages is an illusion. It assumes that pages start at zero and go up to the available memory. To help with that, the Linux Xen MMU provides a lookup mechanism to translate the page frame numbers (PFN) to machine frame numbers (MFN) and vice-versa. The MFN are the "real" frame numbers. Furthermore memory is not contiguous. Xen hypervisor stitches memory for guests from different pools, which means there is no guarantee that PFN==MFN and PFN+1==MFN+1. Lastly with Xen 4.0, pages (in debug mode) are allocated in descending order (high to low), meaning the guest might never get any MFN's under the 4GB mark. Signed-off-by: Konrad Rzeszutek Wilk Acked-by: Jeremy Fitzhardinge Cc: FUJITA Tomonori Cc: Albert Herranz Cc: Ian Campbell --- drivers/xen/Kconfig | 4 + drivers/xen/Makefile | 3 +- drivers/xen/swiotlb-xen.c | 515 ++++++++++++++++++++++++++++++++++++++++++++++ include/xen/swiotlb-xen.h | 65 ++++++ 4 files changed, 586 insertions(+), 1 deletion(-) create mode 100644 drivers/xen/swiotlb-xen.c create mode 100644 include/xen/swiotlb-xen.h (limited to 'include/xen') diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index fad3df2c1276..97199c2a64a0 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -62,4 +62,8 @@ config XEN_SYS_HYPERVISOR virtual environment, /sys/hypervisor will still be present, but will have no xen contents. +config SWIOTLB_XEN + def_bool y + depends on SWIOTLB + endmenu diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 7c284342f30f..85f84cff8104 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -9,4 +9,5 @@ obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += balloon.o obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o obj-$(CONFIG_XENFS) += xenfs/ -obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o \ No newline at end of file +obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o +obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c new file mode 100644 index 000000000000..54469c3eeacd --- /dev/null +++ b/drivers/xen/swiotlb-xen.c @@ -0,0 +1,515 @@ +/* + * Copyright 2010 + * by Konrad Rzeszutek Wilk + * + * This code provides a IOMMU for Xen PV guests with PCI passthrough. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License v2.0 as published by + * the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * PV guests under Xen are running in an non-contiguous memory architecture. + * + * When PCI pass-through is utilized, this necessitates an IOMMU for + * translating bus (DMA) to virtual and vice-versa and also providing a + * mechanism to have contiguous pages for device drivers operations (say DMA + * operations). + * + * Specifically, under Xen the Linux idea of pages is an illusion. It + * assumes that pages start at zero and go up to the available memory. To + * help with that, the Linux Xen MMU provides a lookup mechanism to + * translate the page frame numbers (PFN) to machine frame numbers (MFN) + * and vice-versa. The MFN are the "real" frame numbers. Furthermore + * memory is not contiguous. Xen hypervisor stitches memory for guests + * from different pools, which means there is no guarantee that PFN==MFN + * and PFN+1==MFN+1. Lastly with Xen 4.0, pages (in debug mode) are + * allocated in descending order (high to low), meaning the guest might + * never get any MFN's under the 4GB mark. + * + */ + +#include +#include +#include +#include +#include +/* + * Used to do a quick range check in swiotlb_tbl_unmap_single and + * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this + * API. + */ + +static char *xen_io_tlb_start, *xen_io_tlb_end; +static unsigned long xen_io_tlb_nslabs; +/* + * Quick lookup value of the bus address of the IOTLB. + */ + +u64 start_dma_addr; + +static dma_addr_t xen_phys_to_bus(phys_addr_t paddr) +{ + return phys_to_machine(XPADDR(paddr)).maddr;; +} + +static phys_addr_t xen_bus_to_phys(dma_addr_t baddr) +{ + return machine_to_phys(XMADDR(baddr)).paddr; +} + +static dma_addr_t xen_virt_to_bus(void *address) +{ + return xen_phys_to_bus(virt_to_phys(address)); +} + +static int check_pages_physically_contiguous(unsigned long pfn, + unsigned int offset, + size_t length) +{ + unsigned long next_mfn; + int i; + int nr_pages; + + next_mfn = pfn_to_mfn(pfn); + nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT; + + for (i = 1; i < nr_pages; i++) { + if (pfn_to_mfn(++pfn) != ++next_mfn) + return 0; + } + return 1; +} + +static int range_straddles_page_boundary(phys_addr_t p, size_t size) +{ + unsigned long pfn = PFN_DOWN(p); + unsigned int offset = p & ~PAGE_MASK; + + if (offset + size <= PAGE_SIZE) + return 0; + if (check_pages_physically_contiguous(pfn, offset, size)) + return 0; + return 1; +} + +static int is_xen_swiotlb_buffer(dma_addr_t dma_addr) +{ + unsigned long mfn = PFN_DOWN(dma_addr); + unsigned long pfn = mfn_to_local_pfn(mfn); + phys_addr_t paddr; + + /* If the address is outside our domain, it CAN + * have the same virtual address as another address + * in our domain. Therefore _only_ check address within our domain. + */ + if (pfn_valid(pfn)) { + paddr = PFN_PHYS(pfn); + return paddr >= virt_to_phys(xen_io_tlb_start) && + paddr < virt_to_phys(xen_io_tlb_end); + } + return 0; +} + +static int max_dma_bits = 32; + +static int +xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs) +{ + int i, rc; + int dma_bits; + + dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT; + + i = 0; + do { + int slabs = min(nslabs - i, (unsigned long)IO_TLB_SEGSIZE); + + do { + rc = xen_create_contiguous_region( + (unsigned long)buf + (i << IO_TLB_SHIFT), + get_order(slabs << IO_TLB_SHIFT), + dma_bits); + } while (rc && dma_bits++ < max_dma_bits); + if (rc) + return rc; + + i += slabs; + } while (i < nslabs); + return 0; +} + +void __init xen_swiotlb_init(int verbose) +{ + unsigned long bytes; + int rc; + + xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT); + xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE); + + bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT; + + /* + * Get IO TLB memory from any location. + */ + xen_io_tlb_start = alloc_bootmem(bytes); + if (!xen_io_tlb_start) + panic("Cannot allocate SWIOTLB buffer"); + + xen_io_tlb_end = xen_io_tlb_start + bytes; + /* + * And replace that memory with pages under 4GB. + */ + rc = xen_swiotlb_fixup(xen_io_tlb_start, + bytes, + xen_io_tlb_nslabs); + if (rc) + goto error; + + start_dma_addr = xen_virt_to_bus(xen_io_tlb_start); + swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose); + + return; +error: + panic("DMA(%d): Failed to exchange pages allocated for DMA with Xen! "\ + "We either don't have the permission or you do not have enough"\ + "free memory under 4GB!\n", rc); +} + +void * +xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags) +{ + void *ret; + int order = get_order(size); + u64 dma_mask = DMA_BIT_MASK(32); + unsigned long vstart; + + /* + * Ignore region specifiers - the kernel's ideas of + * pseudo-phys memory layout has nothing to do with the + * machine physical layout. We can't allocate highmem + * because we can't return a pointer to it. + */ + flags &= ~(__GFP_DMA | __GFP_HIGHMEM); + + if (dma_alloc_from_coherent(hwdev, size, dma_handle, &ret)) + return ret; + + vstart = __get_free_pages(flags, order); + ret = (void *)vstart; + + if (hwdev && hwdev->coherent_dma_mask) + dma_mask = dma_alloc_coherent_mask(hwdev, flags); + + if (ret) { + if (xen_create_contiguous_region(vstart, order, + fls64(dma_mask)) != 0) { + free_pages(vstart, order); + return NULL; + } + memset(ret, 0, size); + *dma_handle = virt_to_machine(ret).maddr; + } + return ret; +} +EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent); + +void +xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, + dma_addr_t dev_addr) +{ + int order = get_order(size); + + if (dma_release_from_coherent(hwdev, order, vaddr)) + return; + + xen_destroy_contiguous_region((unsigned long)vaddr, order); + free_pages((unsigned long)vaddr, order); +} +EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent); + + +/* + * Map a single buffer of the indicated size for DMA in streaming mode. The + * physical address to use is returned. + * + * Once the device is given the dma address, the device owns this memory until + * either xen_swiotlb_unmap_page or xen_swiotlb_dma_sync_single is performed. + */ +dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + phys_addr_t phys = page_to_phys(page) + offset; + dma_addr_t dev_addr = xen_phys_to_bus(phys); + void *map; + + BUG_ON(dir == DMA_NONE); + /* + * If the address happens to be in the device's DMA window, + * we can safely return the device addr and not worry about bounce + * buffering it. + */ + if (dma_capable(dev, dev_addr, size) && + !range_straddles_page_boundary(phys, size) && !swiotlb_force) + return dev_addr; + + /* + * Oh well, have to allocate and map a bounce buffer. + */ + map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir); + if (!map) + return DMA_ERROR_CODE; + + dev_addr = xen_virt_to_bus(map); + + /* + * Ensure that the address returned is DMA'ble + */ + if (!dma_capable(dev, dev_addr, size)) + panic("map_single: bounce buffer is not DMA'ble"); + + return dev_addr; +} +EXPORT_SYMBOL_GPL(xen_swiotlb_map_page); + +/* + * Unmap a single streaming mode DMA translation. The dma_addr and size must + * match what was provided for in a previous xen_swiotlb_map_page call. All + * other usages are undefined. + * + * After this call, reads by the cpu to the buffer are guaranteed to see + * whatever the device wrote there. + */ +static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir) +{ + phys_addr_t paddr = xen_bus_to_phys(dev_addr); + + BUG_ON(dir == DMA_NONE); + + /* NOTE: We use dev_addr here, not paddr! */ + if (is_xen_swiotlb_buffer(dev_addr)) { + swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir); + return; + } + + if (dir != DMA_FROM_DEVICE) + return; + + /* + * phys_to_virt doesn't work with hihgmem page but we could + * call dma_mark_clean() with hihgmem page here. However, we + * are fine since dma_mark_clean() is null on POWERPC. We can + * make dma_mark_clean() take a physical address if necessary. + */ + dma_mark_clean(phys_to_virt(paddr), size); +} + +void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + xen_unmap_single(hwdev, dev_addr, size, dir); +} +EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_page); + +/* + * Make physical memory consistent for a single streaming mode DMA translation + * after a transfer. + * + * If you perform a xen_swiotlb_map_page() but wish to interrogate the buffer + * using the cpu, yet do not wish to teardown the dma mapping, you must + * call this function before doing so. At the next point you give the dma + * address back to the card, you must first perform a + * xen_swiotlb_dma_sync_for_device, and then the device again owns the buffer + */ +static void +xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + enum dma_sync_target target) +{ + phys_addr_t paddr = xen_bus_to_phys(dev_addr); + + BUG_ON(dir == DMA_NONE); + + /* NOTE: We use dev_addr here, not paddr! */ + if (is_xen_swiotlb_buffer(dev_addr)) { + swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir, + target); + return; + } + + if (dir != DMA_FROM_DEVICE) + return; + + dma_mark_clean(phys_to_virt(paddr), size); +} + +void +xen_swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir) +{ + xen_swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); +} +EXPORT_SYMBOL_GPL(xen_swiotlb_sync_single_for_cpu); + +void +xen_swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir) +{ + xen_swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); +} +EXPORT_SYMBOL_GPL(xen_swiotlb_sync_single_for_device); + +/* + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * This is the scatter-gather version of the above xen_swiotlb_map_page + * interface. Here the scatter gather list elements are each tagged with the + * appropriate dma address and length. They are obtained via + * sg_dma_{address,length}(SG). + * + * NOTE: An implementation may be able to use a smaller number of + * DMA address/length pairs than there are SG table elements. + * (for example via virtual mapping capabilities) + * The routine returns the number of addr/length pairs actually + * used, at most nents. + * + * Device ownership issues as mentioned above for xen_swiotlb_map_page are the + * same here. + */ +int +xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct scatterlist *sg; + int i; + + BUG_ON(dir == DMA_NONE); + + for_each_sg(sgl, sg, nelems, i) { + phys_addr_t paddr = sg_phys(sg); + dma_addr_t dev_addr = xen_phys_to_bus(paddr); + + if (swiotlb_force || + !dma_capable(hwdev, dev_addr, sg->length) || + range_straddles_page_boundary(paddr, sg->length)) { + void *map = swiotlb_tbl_map_single(hwdev, + start_dma_addr, + sg_phys(sg), + sg->length, dir); + if (!map) { + /* Don't panic here, we expect map_sg users + to do proper error handling. */ + xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, + attrs); + sgl[0].dma_length = 0; + return DMA_ERROR_CODE; + } + sg->dma_address = xen_virt_to_bus(map); + } else + sg->dma_address = dev_addr; + sg->dma_length = sg->length; + } + return nelems; +} +EXPORT_SYMBOL_GPL(xen_swiotlb_map_sg_attrs); + +int +xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + return xen_swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); +} +EXPORT_SYMBOL_GPL(xen_swiotlb_map_sg); + +/* + * Unmap a set of streaming mode DMA translations. Again, cpu read rules + * concerning calls here are the same as for swiotlb_unmap_page() above. + */ +void +xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct scatterlist *sg; + int i; + + BUG_ON(dir == DMA_NONE); + + for_each_sg(sgl, sg, nelems, i) + xen_unmap_single(hwdev, sg->dma_address, sg->dma_length, dir); + +} +EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs); + +void +xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + return xen_swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); +} +EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg); + +/* + * Make physical memory consistent for a set of streaming mode DMA translations + * after a transfer. + * + * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules + * and usage. + */ +static void +xen_swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + enum dma_sync_target target) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nelems, i) + xen_swiotlb_sync_single(hwdev, sg->dma_address, + sg->dma_length, dir, target); +} + +void +xen_swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) +{ + xen_swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); +} +EXPORT_SYMBOL_GPL(xen_swiotlb_sync_sg_for_cpu); + +void +xen_swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) +{ + xen_swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); +} +EXPORT_SYMBOL_GPL(xen_swiotlb_sync_sg_for_device); + +int +xen_swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) +{ + return !dma_addr; +} +EXPORT_SYMBOL_GPL(xen_swiotlb_dma_mapping_error); + +/* + * Return whether the given device DMA address mask can be supported + * properly. For example, if your device can only drive the low 24-bits + * during bus mastering, then you would pass 0x00ffffff as the mask to + * this function. + */ +int +xen_swiotlb_dma_supported(struct device *hwdev, u64 mask) +{ + return xen_virt_to_bus(xen_io_tlb_end - 1) <= mask; +} +EXPORT_SYMBOL_GPL(xen_swiotlb_dma_supported); diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h new file mode 100644 index 000000000000..2ea2fdc79c16 --- /dev/null +++ b/include/xen/swiotlb-xen.h @@ -0,0 +1,65 @@ +#ifndef __LINUX_SWIOTLB_XEN_H +#define __LINUX_SWIOTLB_XEN_H + +#include + +extern void xen_swiotlb_init(int verbose); + +extern void +*xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags); + +extern void +xen_swiotlb_free_coherent(struct device *hwdev, size_t size, + void *vaddr, dma_addr_t dma_handle); + +extern dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs); + +extern void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs); +/* +extern int +xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, + enum dma_data_direction dir); + +extern void +xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, + enum dma_data_direction dir); +*/ +extern int +xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs); + +extern void +xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs); + +extern void +xen_swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir); + +extern void +xen_swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir); + +extern void +xen_swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir); + +extern void +xen_swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir); + +extern int +xen_swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); + +extern int +xen_swiotlb_dma_supported(struct device *hwdev, u64 mask); + +#endif /* __LINUX_SWIOTLB_XEN_H */ -- cgit v1.2.3 From c93a4dfb31f2c023da3ad1238c352452f2cc0e05 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 23 Aug 2010 11:59:28 +0100 Subject: xen: pvhvm: allow user to request no emulated device unplug this allows the user to disable pvhvm and revert to emulated devices in case of a system misconfiguration (e.g. initramfs with only emulated drivers in it). Signed-off-by: Ian Campbell Acked-by: Jeremy Fitzhardinge Acked-by: Stefano Stabellini --- Documentation/kernel-parameters.txt | 1 + arch/x86/xen/platform-pci-unplug.c | 5 +++++ include/xen/platform_pci.h | 1 + 3 files changed, 7 insertions(+) (limited to 'include/xen') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 2c85c0692b01..8bbe83b9d0b2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2631,6 +2631,7 @@ and is between 256 and 4096 characters. It is defined in the file all -- unplug all emulated devices (NICs and IDE disks) ignore -- continue loading the Xen platform PCI driver even if the version check failed + never -- do not unplug even if version check succeeds xirc2ps_cs= [NET,PCMCIA] Format: diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 554c002a1e1a..070dfa0654bd 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -72,6 +72,9 @@ void __init xen_unplug_emulated_devices(void) { int r; + /* user explicitly requested no unplug */ + if (xen_emul_unplug & XEN_UNPLUG_NEVER) + return; /* check the version of the xen platform PCI device */ r = check_platform_magic(); /* If the version matches enable the Xen platform PCI driver. @@ -127,6 +130,8 @@ static int __init parse_xen_emul_unplug(char *arg) xen_emul_unplug |= XEN_UNPLUG_ALL_NICS; else if (!strncmp(p, "ignore", l)) xen_emul_unplug |= XEN_UNPLUG_IGNORE; + else if (!strncmp(p, "never", l)) + xen_emul_unplug |= XEN_UNPLUG_NEVER; else printk(KERN_WARNING "unrecognised option '%s' " "in parameter 'xen_emul_unplug'\n", p); diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h index ce9d671c636c..123b7752fa6a 100644 --- a/include/xen/platform_pci.h +++ b/include/xen/platform_pci.h @@ -21,6 +21,7 @@ #define XEN_UNPLUG_AUX_IDE_DISKS 4 #define XEN_UNPLUG_ALL 7 #define XEN_UNPLUG_IGNORE 8 +#define XEN_UNPLUG_NEVER 16 static inline int xen_must_unplug_nics(void) { #if (defined(CONFIG_XEN_NETDEV_FRONTEND) || \ -- cgit v1.2.3 From 1dc7ce99b091a11cce0f34456c1ffcb928f17edd Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 23 Aug 2010 11:59:29 +0100 Subject: xen: pvhvm: rename xen_emul_unplug=ignore to =unnnecessary It is not immediately clear what this option causes to become ignored. The actual meaning is that it is not necessary to unplug the emulated devices to safely use the PV ones, even if the platform does not support the unplug protocol. (pressumably the user will only add this option if they have ensured that their domain configuration is safe). I think xen_emul_unplug=unnecessary better captures this. Signed-off-by: Ian Campbell Acked-by: Jeremy Fitzhardinge Acked-by: Stefano Stabellini --- Documentation/kernel-parameters.txt | 5 +++-- arch/x86/xen/platform-pci-unplug.c | 13 +++++++------ drivers/block/xen-blkfront.c | 2 +- include/xen/platform_pci.h | 2 +- 4 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include/xen') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 8bbe83b9d0b2..f084af0cb8e0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2629,8 +2629,9 @@ and is between 256 and 4096 characters. It is defined in the file aux-ide-disks -- unplug non-primary-master IDE devices nics -- unplug network devices all -- unplug all emulated devices (NICs and IDE disks) - ignore -- continue loading the Xen platform PCI driver even - if the version check failed + unnecessary -- unplugging emulated devices is + unnecessary even if the host did not respond to + the unplug protocol never -- do not unplug even if version check succeeds xirc2ps_cs= [NET,PCMCIA] diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 070dfa0654bd..0f456386cce5 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -78,10 +78,11 @@ void __init xen_unplug_emulated_devices(void) /* check the version of the xen platform PCI device */ r = check_platform_magic(); /* If the version matches enable the Xen platform PCI driver. - * Also enable the Xen platform PCI driver if the version is really old - * and the user told us to ignore it. */ + * Also enable the Xen platform PCI driver if the host does + * not support the unplug protocol (XEN_PLATFORM_ERR_MAGIC) + * but the user told us that unplugging is unnecessary. */ if (r && !(r == XEN_PLATFORM_ERR_MAGIC && - (xen_emul_unplug & XEN_UNPLUG_IGNORE))) + (xen_emul_unplug & XEN_UNPLUG_UNNECESSARY))) return; /* Set the default value of xen_emul_unplug depending on whether or * not the Xen PV frontends and the Xen platform PCI driver have @@ -102,7 +103,7 @@ void __init xen_unplug_emulated_devices(void) } } /* Now unplug the emulated devices */ - if (!(xen_emul_unplug & XEN_UNPLUG_IGNORE)) + if (!(xen_emul_unplug & XEN_UNPLUG_UNNECESSARY)) outw(xen_emul_unplug, XEN_IOPORT_UNPLUG); xen_platform_pci_unplug = xen_emul_unplug; } @@ -128,8 +129,8 @@ static int __init parse_xen_emul_unplug(char *arg) xen_emul_unplug |= XEN_UNPLUG_AUX_IDE_DISKS; else if (!strncmp(p, "nics", l)) xen_emul_unplug |= XEN_UNPLUG_ALL_NICS; - else if (!strncmp(p, "ignore", l)) - xen_emul_unplug |= XEN_UNPLUG_IGNORE; + else if (!strncmp(p, "unnecessary", l)) + xen_emul_unplug |= XEN_UNPLUG_UNNECESSARY; else if (!strncmp(p, "never", l)) xen_emul_unplug |= XEN_UNPLUG_NEVER; else diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index ac1b682edecb..ab735a605cf3 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -834,7 +834,7 @@ static int blkfront_probe(struct xenbus_device *dev, char *type; int len; /* no unplug has been done: do not hook devices != xen vbds */ - if (xen_platform_pci_unplug & XEN_UNPLUG_IGNORE) { + if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) { int major; if (!VDEV_IS_EXTENDED(vdevice)) diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h index 123b7752fa6a..590ccfd82645 100644 --- a/include/xen/platform_pci.h +++ b/include/xen/platform_pci.h @@ -20,7 +20,7 @@ #define XEN_UNPLUG_ALL_NICS 2 #define XEN_UNPLUG_AUX_IDE_DISKS 4 #define XEN_UNPLUG_ALL 7 -#define XEN_UNPLUG_IGNORE 8 +#define XEN_UNPLUG_UNNECESSARY 8 #define XEN_UNPLUG_NEVER 16 static inline int xen_must_unplug_nics(void) { -- cgit v1.2.3 From 9c35e90c6fcf7f5baf27a63d9565e9f47633f299 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 23 Aug 2010 12:01:35 +0100 Subject: xen: pvhvm: make it clearer that XEN_UNPLUG_* define bits in a bitfield by defining in terms of (1< Acked-by: Jeremy Fitzhardinge Acked-by: Stefano Stabellini --- include/xen/platform_pci.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include/xen') diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h index 590ccfd82645..a785a3b0c8c7 100644 --- a/include/xen/platform_pci.h +++ b/include/xen/platform_pci.h @@ -16,12 +16,15 @@ #define XEN_IOPORT_PROTOVER (XEN_IOPORT_BASE + 2) /* 1 byte access (R) */ #define XEN_IOPORT_PRODNUM (XEN_IOPORT_BASE + 2) /* 2 byte access (W) */ -#define XEN_UNPLUG_ALL_IDE_DISKS 1 -#define XEN_UNPLUG_ALL_NICS 2 -#define XEN_UNPLUG_AUX_IDE_DISKS 4 -#define XEN_UNPLUG_ALL 7 -#define XEN_UNPLUG_UNNECESSARY 8 -#define XEN_UNPLUG_NEVER 16 +#define XEN_UNPLUG_ALL_IDE_DISKS (1<<0) +#define XEN_UNPLUG_ALL_NICS (1<<1) +#define XEN_UNPLUG_AUX_IDE_DISKS (1<<2) +#define XEN_UNPLUG_ALL (XEN_UNPLUG_ALL_IDE_DISKS|\ + XEN_UNPLUG_ALL_NICS|\ + XEN_UNPLUG_AUX_IDE_DISKS) + +#define XEN_UNPLUG_UNNECESSARY (1<<16) +#define XEN_UNPLUG_NEVER (1<<17) static inline int xen_must_unplug_nics(void) { #if (defined(CONFIG_XEN_NETDEV_FRONTEND) || \ -- cgit v1.2.3 From d46a78b05c0e37f76ddf4a7a67bf0b6c68bada55 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 1 Oct 2010 12:20:09 -0400 Subject: xen: implement pirq type event channels A privileged PV Xen domain can get direct access to hardware. In order for this to be useful, it must be able to get hardware interrupts. Being a PV Xen domain, all interrupts are delivered as event channels. PIRQ event channels are bound to a pirq number and an interrupt vector. When a IO APIC raises a hardware interrupt on that vector, it is delivered as an event channel, which we can deliver to the appropriate device driver(s). This patch simply implements the infrastructure for dealing with pirq event channels. [ Impact: integrate hardware interrupts into Xen's event scheme ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 243 ++++++++++++++++++++++++++++++++++++++++++++++++++- include/xen/events.h | 11 +++ 2 files changed, 252 insertions(+), 2 deletions(-) (limited to 'include/xen') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 7d24b0d94ed4..bc69a9d92abc 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -16,7 +16,7 @@ * (typically dom0). * 2. VIRQs, typically used for timers. These are per-cpu events. * 3. IPIs. - * 4. Hardware interrupts. Not supported at present. + * 4. PIRQs - Hardware interrupts. * * Jeremy Fitzhardinge , XenSource Inc, 2007 */ @@ -46,6 +46,9 @@ #include #include +/* Leave low irqs free for identity mapping */ +#define LEGACY_IRQS 16 + /* * This lock protects updates to the following mapping and reference-count * arrays. The lock does not need to be acquired to read the mapping tables. @@ -89,10 +92,12 @@ struct irq_info enum ipi_vector ipi; struct { unsigned short gsi; - unsigned short vector; + unsigned char vector; + unsigned char flags; } pirq; } u; }; +#define PIRQ_NEEDS_EOI (1 << 0) static struct irq_info irq_info[NR_IRQS]; @@ -113,6 +118,7 @@ static inline unsigned long *cpu_evtchn_mask(int cpu) static struct irq_chip xen_dynamic_chip; static struct irq_chip xen_percpu_chip; +static struct irq_chip xen_pirq_chip; /* Constructor for packed IRQ information. */ static struct irq_info mk_unbound_info(void) @@ -225,6 +231,15 @@ static unsigned int cpu_from_evtchn(unsigned int evtchn) return ret; } +static bool pirq_needs_eoi(unsigned irq) +{ + struct irq_info *info = info_for_irq(irq); + + BUG_ON(info->type != IRQT_PIRQ); + + return info->u.pirq.flags & PIRQ_NEEDS_EOI; +} + static inline unsigned long active_evtchns(unsigned int cpu, struct shared_info *sh, unsigned int idx) @@ -365,6 +380,210 @@ static int find_unbound_irq(void) return irq; } +static bool identity_mapped_irq(unsigned irq) +{ + /* only identity map legacy irqs */ + return irq < LEGACY_IRQS; +} + +static void pirq_unmask_notify(int irq) +{ + struct physdev_eoi eoi = { .irq = irq }; + + if (unlikely(pirq_needs_eoi(irq))) { + int rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); + WARN_ON(rc); + } +} + +static void pirq_query_unmask(int irq) +{ + struct physdev_irq_status_query irq_status; + struct irq_info *info = info_for_irq(irq); + + BUG_ON(info->type != IRQT_PIRQ); + + irq_status.irq = irq; + if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) + irq_status.flags = 0; + + info->u.pirq.flags &= ~PIRQ_NEEDS_EOI; + if (irq_status.flags & XENIRQSTAT_needs_eoi) + info->u.pirq.flags |= PIRQ_NEEDS_EOI; +} + +static bool probing_irq(int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + return desc && desc->action == NULL; +} + +static unsigned int startup_pirq(unsigned int irq) +{ + struct evtchn_bind_pirq bind_pirq; + struct irq_info *info = info_for_irq(irq); + int evtchn = evtchn_from_irq(irq); + + BUG_ON(info->type != IRQT_PIRQ); + + if (VALID_EVTCHN(evtchn)) + goto out; + + bind_pirq.pirq = irq; + /* NB. We are happy to share unless we are probing. */ + bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) { + if (!probing_irq(irq)) + printk(KERN_INFO "Failed to obtain physical IRQ %d\n", + irq); + return 0; + } + evtchn = bind_pirq.port; + + pirq_query_unmask(irq); + + evtchn_to_irq[evtchn] = irq; + bind_evtchn_to_cpu(evtchn, 0); + info->evtchn = evtchn; + +out: + unmask_evtchn(evtchn); + pirq_unmask_notify(irq); + + return 0; +} + +static void shutdown_pirq(unsigned int irq) +{ + struct evtchn_close close; + struct irq_info *info = info_for_irq(irq); + int evtchn = evtchn_from_irq(irq); + + BUG_ON(info->type != IRQT_PIRQ); + + if (!VALID_EVTCHN(evtchn)) + return; + + mask_evtchn(evtchn); + + close.port = evtchn; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) + BUG(); + + bind_evtchn_to_cpu(evtchn, 0); + evtchn_to_irq[evtchn] = -1; + info->evtchn = 0; +} + +static void enable_pirq(unsigned int irq) +{ + startup_pirq(irq); +} + +static void disable_pirq(unsigned int irq) +{ +} + +static void ack_pirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + + move_native_irq(irq); + + if (VALID_EVTCHN(evtchn)) { + mask_evtchn(evtchn); + clear_evtchn(evtchn); + } +} + +static void end_pirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + struct irq_desc *desc = irq_to_desc(irq); + + if (WARN_ON(!desc)) + return; + + if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) == + (IRQ_DISABLED|IRQ_PENDING)) { + shutdown_pirq(irq); + } else if (VALID_EVTCHN(evtchn)) { + unmask_evtchn(evtchn); + pirq_unmask_notify(irq); + } +} + +static int find_irq_by_gsi(unsigned gsi) +{ + int irq; + + for (irq = 0; irq < NR_IRQS; irq++) { + struct irq_info *info = info_for_irq(irq); + + if (info == NULL || info->type != IRQT_PIRQ) + continue; + + if (gsi_from_irq(irq) == gsi) + return irq; + } + + return -1; +} + +/* + * Allocate a physical irq, along with a vector. We don't assign an + * event channel until the irq actually started up. Return an + * existing irq if we've already got one for the gsi. + */ +int xen_allocate_pirq(unsigned gsi) +{ + int irq; + struct physdev_irq irq_op; + + spin_lock(&irq_mapping_update_lock); + + irq = find_irq_by_gsi(gsi); + if (irq != -1) { + printk(KERN_INFO "xen_allocate_pirq: returning irq %d for gsi %u\n", + irq, gsi); + goto out; /* XXX need refcount? */ + } + + if (identity_mapped_irq(gsi)) { + irq = gsi; + dynamic_irq_init(irq); + } else + irq = find_unbound_irq(); + + set_irq_chip_and_handler_name(irq, &xen_pirq_chip, + handle_level_irq, "pirq"); + + irq_op.irq = irq; + if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { + dynamic_irq_cleanup(irq); + irq = -ENOSPC; + goto out; + } + + irq_info[irq] = mk_pirq_info(0, gsi, irq_op.vector); + +out: + spin_unlock(&irq_mapping_update_lock); + + return irq; +} + +int xen_vector_from_irq(unsigned irq) +{ + return vector_from_irq(irq); +} + +int xen_gsi_from_irq(unsigned irq) +{ + return gsi_from_irq(irq); +} + int bind_evtchn_to_irq(unsigned int evtchn) { int irq; @@ -964,6 +1183,26 @@ static struct irq_chip xen_dynamic_chip __read_mostly = { .retrigger = retrigger_dynirq, }; +static struct irq_chip xen_pirq_chip __read_mostly = { + .name = "xen-pirq", + + .startup = startup_pirq, + .shutdown = shutdown_pirq, + + .enable = enable_pirq, + .unmask = enable_pirq, + + .disable = disable_pirq, + .mask = disable_pirq, + + .ack = ack_pirq, + .end = end_pirq, + + .set_affinity = set_affinity_irq, + + .retrigger = retrigger_dynirq, +}; + static struct irq_chip xen_percpu_chip __read_mostly = { .name = "xen-percpu", diff --git a/include/xen/events.h b/include/xen/events.h index a15d93262e30..8f6232023b75 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -63,4 +63,15 @@ int xen_set_callback_via(uint64_t via); void xen_evtchn_do_upcall(struct pt_regs *regs); void xen_hvm_evtchn_do_upcall(void); +/* Allocate an irq for a physical interrupt, given a gsi. "Legacy" + * GSIs are identity mapped; others are dynamically allocated as + * usual. */ +int xen_allocate_pirq(unsigned gsi); + +/* Return vector allocated to pirq */ +int xen_vector_from_irq(unsigned pirq); + +/* Return gsi allocated to pirq */ +int xen_gsi_from_irq(unsigned pirq); + #endif /* _XEN_EVENTS_H */ -- cgit v1.2.3 From 1a60d05f40882303dad13f8f0e077e2e49ea8996 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 4 Oct 2010 13:42:27 -0400 Subject: xen: set pirq name to something useful. Impact: cleanup Make pirq show useful information in /proc/interrupts [v2: Removed the parts for arch/x86/xen/pci.c ] Signed-off-by: Gerd Hoffmann Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 4 ++-- include/xen/events.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/xen') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 19a93297e890..0fcfb4a1ceab 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -544,7 +544,7 @@ static int find_irq_by_gsi(unsigned gsi) * event channel until the irq actually started up. Return an * existing irq if we've already got one for the gsi. */ -int xen_allocate_pirq(unsigned gsi) +int xen_allocate_pirq(unsigned gsi, char *name) { int irq; struct physdev_irq irq_op; @@ -566,7 +566,7 @@ int xen_allocate_pirq(unsigned gsi) irq = find_unbound_irq(); set_irq_chip_and_handler_name(irq, &xen_pirq_chip, - handle_level_irq, "pirq"); + handle_level_irq, name); irq_op.irq = irq; if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { diff --git a/include/xen/events.h b/include/xen/events.h index 8f6232023b75..8227da8f7165 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -66,7 +66,7 @@ void xen_hvm_evtchn_do_upcall(void); /* Allocate an irq for a physical interrupt, given a gsi. "Legacy" * GSIs are identity mapped; others are dynamically allocated as * usual. */ -int xen_allocate_pirq(unsigned gsi); +int xen_allocate_pirq(unsigned gsi, char *name); /* Return vector allocated to pirq */ int xen_vector_from_irq(unsigned pirq); -- cgit v1.2.3 From d9a8814f27080cec6126fca3ef0c210d9f56181e Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 5 Nov 2009 16:33:09 -0500 Subject: xen: Provide a variant of xen_poll_irq with timeout. The 'xen_poll_irq_timeout' provides a method to pass in the poll timeout for IRQs if requested. We also export those two poll functions as Xen PCI fronted uses them. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/events.c | 17 ++++++++++++----- include/xen/events.h | 4 ++++ 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'include/xen') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index bab5ac18fe0e..4e0f868517be 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -1139,7 +1139,7 @@ void xen_clear_irq_pending(int irq) if (VALID_EVTCHN(evtchn)) clear_evtchn(evtchn); } - +EXPORT_SYMBOL(xen_clear_irq_pending); void xen_set_irq_pending(int irq) { int evtchn = evtchn_from_irq(irq); @@ -1159,9 +1159,9 @@ bool xen_test_irq_pending(int irq) return ret; } -/* Poll waiting for an irq to become pending. In the usual case, the - irq will be disabled so it won't deliver an interrupt. */ -void xen_poll_irq(int irq) +/* Poll waiting for an irq to become pending with timeout. In the usual case, + * the irq will be disabled so it won't deliver an interrupt. */ +void xen_poll_irq_timeout(int irq, u64 timeout) { evtchn_port_t evtchn = evtchn_from_irq(irq); @@ -1169,13 +1169,20 @@ void xen_poll_irq(int irq) struct sched_poll poll; poll.nr_ports = 1; - poll.timeout = 0; + poll.timeout = timeout; set_xen_guest_handle(poll.ports, &evtchn); if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0) BUG(); } } +EXPORT_SYMBOL(xen_poll_irq_timeout); +/* Poll waiting for an irq to become pending. In the usual case, the + * irq will be disabled so it won't deliver an interrupt. */ +void xen_poll_irq(int irq) +{ + xen_poll_irq_timeout(irq, 0 /* no timeout */); +} void xen_irq_resume(void) { diff --git a/include/xen/events.h b/include/xen/events.h index 8227da8f7165..2532f8bd2401 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -53,6 +53,10 @@ bool xen_test_irq_pending(int irq); irq will be disabled so it won't deliver an interrupt. */ void xen_poll_irq(int irq); +/* Poll waiting for an irq to become pending with a timeout. In the usual case, + * the irq will be disabled so it won't deliver an interrupt. */ +void xen_poll_irq_timeout(int irq, u64 timeout); + /* Determine the IRQ which is bound to an event channel */ unsigned irq_from_evtchn(unsigned int evtchn); -- cgit v1.2.3 From 15ebbb82bac700db3c91e662fb70cb3559e9d930 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 4 Oct 2010 13:43:27 -0400 Subject: xen: fix shared irq device passthrough In driver/xen/events.c, whether bind_pirq is shareable or not is determined by desc->action is NULL or not. But in __setup_irq, startup(irq) is invoked before desc->action is assigned with new action. So desc->action in startup_irq is always NULL, and bind_pirq is always not shareable. This results in pt_irq_create_bind failure when passthrough a device which shares irq to other devices. This patch doesn't use probing_irq to determine if pirq is shareable or not, instead set shareable flag in irq_info according to trigger mode in xen_allocate_pirq. Set level triggered interrupts shareable. Thus use this flag to set bind_pirq flag accordingly. [v2: arch/x86/xen/pci.c no more, so file skipped] Signed-off-by: Weidong Han Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 11 ++++++++--- include/xen/events.h | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/xen') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 4e0f868517be..cd504092299b 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -97,6 +97,7 @@ struct irq_info } u; }; #define PIRQ_NEEDS_EOI (1 << 0) +#define PIRQ_SHAREABLE (1 << 1) static struct irq_info *irq_info; @@ -445,6 +446,7 @@ static unsigned int startup_pirq(unsigned int irq) struct evtchn_bind_pirq bind_pirq; struct irq_info *info = info_for_irq(irq); int evtchn = evtchn_from_irq(irq); + int rc; BUG_ON(info->type != IRQT_PIRQ); @@ -453,8 +455,10 @@ static unsigned int startup_pirq(unsigned int irq) bind_pirq.pirq = irq; /* NB. We are happy to share unless we are probing. */ - bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE; - if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) { + bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ? + BIND_PIRQ__WILL_SHARE : 0; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq); + if (rc != 0) { if (!probing_irq(irq)) printk(KERN_INFO "Failed to obtain physical IRQ %d\n", irq); @@ -564,7 +568,7 @@ static int find_irq_by_gsi(unsigned gsi) * event channel until the irq actually started up. Return an * existing irq if we've already got one for the gsi. */ -int xen_allocate_pirq(unsigned gsi, char *name) +int xen_allocate_pirq(unsigned gsi, int shareable, char *name) { int irq; struct physdev_irq irq_op; @@ -596,6 +600,7 @@ int xen_allocate_pirq(unsigned gsi, char *name) } irq_info[irq] = mk_pirq_info(0, gsi, irq_op.vector); + irq_info[irq].u.pirq.flags |= shareable ? PIRQ_SHAREABLE : 0; out: spin_unlock(&irq_mapping_update_lock); diff --git a/include/xen/events.h b/include/xen/events.h index 2532f8bd2401..d7a4ca7d17b5 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -70,7 +70,7 @@ void xen_hvm_evtchn_do_upcall(void); /* Allocate an irq for a physical interrupt, given a gsi. "Legacy" * GSIs are identity mapped; others are dynamically allocated as * usual. */ -int xen_allocate_pirq(unsigned gsi, char *name); +int xen_allocate_pirq(unsigned gsi, int shareable, char *name); /* Return vector allocated to pirq */ int xen_vector_from_irq(unsigned pirq); -- cgit v1.2.3 From b5401a96b59475c1c878439caecb8c521bdfd4ad Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Thu, 18 Mar 2010 16:31:34 -0400 Subject: xen/x86/PCI: Add support for the Xen PCI subsystem The frontend stub lives in arch/x86/pci/xen.c, alongside other sub-arch PCI init code (e.g. olpc.c). It provides a mechanism for Xen PCI frontend to setup/destroy legacy interrupts, MSI/MSI-X, and PCI configuration operations. [ Impact: add core of Xen PCI support ] [ v2: Removed the IOMMU code and only focusing on PCI.] [ v3: removed usage of pci_scan_all_fns as that does not exist] [ v4: introduced pci_xen value to fix compile warnings] [ v5: squished fixes+features in one patch, changed Reviewed-by to Ccs] [ v7: added Acked-by] Signed-off-by: Alex Nixon Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Stefano Stabellini Acked-by: Jesse Barnes Cc: "H. Peter Anvin" Cc: Matthew Wilcox Cc: Qing He Cc: Thomas Gleixner Cc: x86@kernel.org --- arch/x86/Kconfig | 5 ++ arch/x86/include/asm/xen/pci.h | 53 +++++++++++++++ arch/x86/pci/Makefile | 1 + arch/x86/pci/xen.c | 147 +++++++++++++++++++++++++++++++++++++++++ arch/x86/xen/enlighten.c | 3 + drivers/xen/events.c | 32 ++++++++- include/xen/events.h | 3 + 7 files changed, 242 insertions(+), 2 deletions(-) create mode 100644 arch/x86/include/asm/xen/pci.h create mode 100644 arch/x86/pci/xen.c (limited to 'include/xen') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8cc510874e1b..74ea59d34076 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1898,6 +1898,11 @@ config PCI_OLPC def_bool y depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY) +config PCI_XEN + def_bool y + depends on PCI && XEN + select SWIOTLB_XEN + config PCI_DOMAINS def_bool y depends on PCI diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h new file mode 100644 index 000000000000..449c82f71677 --- /dev/null +++ b/arch/x86/include/asm/xen/pci.h @@ -0,0 +1,53 @@ +#ifndef _ASM_X86_XEN_PCI_H +#define _ASM_X86_XEN_PCI_H + +#if defined(CONFIG_PCI_XEN) +extern int __init pci_xen_init(void); +#define pci_xen 1 +#else +#define pci_xen 0 +#define pci_xen_init (0) +#endif + +#if defined(CONFIG_PCI_MSI) +#if defined(CONFIG_PCI_XEN) +/* The drivers/pci/xen-pcifront.c sets this structure to + * its own functions. + */ +struct xen_pci_frontend_ops { + int (*enable_msi)(struct pci_dev *dev, int **vectors); + void (*disable_msi)(struct pci_dev *dev); + int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec); + void (*disable_msix)(struct pci_dev *dev); +}; + +extern struct xen_pci_frontend_ops *xen_pci_frontend; + +static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev, + int **vectors) +{ + if (xen_pci_frontend && xen_pci_frontend->enable_msi) + return xen_pci_frontend->enable_msi(dev, vectors); + return -ENODEV; +} +static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev) +{ + if (xen_pci_frontend && xen_pci_frontend->disable_msi) + xen_pci_frontend->disable_msi(dev); +} +static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev, + int **vectors, int nvec) +{ + if (xen_pci_frontend && xen_pci_frontend->enable_msix) + return xen_pci_frontend->enable_msix(dev, vectors, nvec); + return -ENODEV; +} +static inline void xen_pci_frontend_disable_msix(struct pci_dev *dev) +{ + if (xen_pci_frontend && xen_pci_frontend->disable_msix) + xen_pci_frontend->disable_msix(dev); +} +#endif /* CONFIG_PCI_XEN */ +#endif /* CONFIG_PCI_MSI */ + +#endif /* _ASM_X86_XEN_PCI_H */ diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index a0207a7fdf39..effd96e33f16 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_PCI_BIOS) += pcbios.o obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o obj-$(CONFIG_PCI_DIRECT) += direct.o obj-$(CONFIG_PCI_OLPC) += olpc.o +obj-$(CONFIG_PCI_XEN) += xen.o obj-y += fixup.o obj-$(CONFIG_ACPI) += acpi.o diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c new file mode 100644 index 000000000000..b19c873d8d0c --- /dev/null +++ b/arch/x86/pci/xen.c @@ -0,0 +1,147 @@ +/* + * Xen PCI Frontend Stub - puts some "dummy" functions in to the Linux + * x86 PCI core to support the Xen PCI Frontend + * + * Author: Ryan Wilson + */ +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include + +#if defined(CONFIG_PCI_MSI) +#include + +struct xen_pci_frontend_ops *xen_pci_frontend; +EXPORT_SYMBOL_GPL(xen_pci_frontend); + +/* + * For MSI interrupts we have to use drivers/xen/event.s functions to + * allocate an irq_desc and setup the right */ + + +static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int irq, ret, i; + struct msi_desc *msidesc; + int *v; + + v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL); + if (!v) + return -ENOMEM; + + if (!xen_initial_domain()) { + if (type == PCI_CAP_ID_MSIX) + ret = xen_pci_frontend_enable_msix(dev, &v, nvec); + else + ret = xen_pci_frontend_enable_msi(dev, &v); + if (ret) + goto error; + } + i = 0; + list_for_each_entry(msidesc, &dev->msi_list, list) { + irq = xen_allocate_pirq(v[i], 0, /* not sharable */ + (type == PCI_CAP_ID_MSIX) ? + "pcifront-msi-x" : "pcifront-msi"); + if (irq < 0) + return -1; + + ret = set_irq_msi(irq, msidesc); + if (ret) + goto error_while; + i++; + } + kfree(v); + return 0; + +error_while: + unbind_from_irqhandler(irq, NULL); +error: + if (ret == -ENODEV) + dev_err(&dev->dev, "Xen PCI frontend has not registered" \ + " MSI/MSI-X support!\n"); + + kfree(v); + return ret; +} + +static void xen_teardown_msi_irqs(struct pci_dev *dev) +{ + /* Only do this when were are in non-privileged mode.*/ + if (!xen_initial_domain()) { + struct msi_desc *msidesc; + + msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); + if (msidesc->msi_attrib.is_msix) + xen_pci_frontend_disable_msix(dev); + else + xen_pci_frontend_disable_msi(dev); + } + +} + +static void xen_teardown_msi_irq(unsigned int irq) +{ + xen_destroy_irq(irq); +} +#endif + +static int xen_pcifront_enable_irq(struct pci_dev *dev) +{ + int rc; + int share = 1; + + dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq); + + if (dev->irq < 0) + return -EINVAL; + + if (dev->irq < NR_IRQS_LEGACY) + share = 0; + + rc = xen_allocate_pirq(dev->irq, share, "pcifront"); + if (rc < 0) { + dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n", + dev->irq, rc); + return rc; + } + return 0; +} + +int __init pci_xen_init(void) +{ + if (!xen_pv_domain() || xen_initial_domain()) + return -ENODEV; + + printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n"); + + pcibios_set_cache_line_size(); + + pcibios_enable_irq = xen_pcifront_enable_irq; + pcibios_disable_irq = NULL; + +#ifdef CONFIG_ACPI + /* Keep ACPI out of the picture */ + acpi_noirq = 1; +#endif + +#ifdef CONFIG_ISAPNP + /* Stop isapnp from probing */ + isapnp_disable = 1; +#endif + +#ifdef CONFIG_PCI_MSI + x86_msi.setup_msi_irqs = xen_setup_msi_irqs; + x86_msi.teardown_msi_irq = xen_teardown_msi_irq; + x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs; +#endif + return 0; +} diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 7d46c8441418..1ccfa1bf0f89 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -1220,6 +1221,8 @@ asmlinkage void __init xen_start_kernel(void) add_preferred_console("xenboot", 0, NULL); add_preferred_console("tty", 0, NULL); add_preferred_console("hvc", 0, NULL); + if (pci_xen) + x86_init.pci.arch_init = pci_xen_init; } else { /* Make sure ACS will be enabled */ pci_request_acs(); diff --git a/drivers/xen/events.c b/drivers/xen/events.c index cd504092299b..7016a734257c 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -582,7 +582,9 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name) goto out; /* XXX need refcount? */ } - if (identity_mapped_irq(gsi)) { + /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore + * we are using the !xen_initial_domain() to drop in the function.*/ + if (identity_mapped_irq(gsi) || !xen_initial_domain()) { irq = gsi; irq_to_desc_alloc_node(irq, 0); dynamic_irq_init(irq); @@ -593,7 +595,13 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name) handle_level_irq, name); irq_op.irq = irq; - if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { + irq_op.vector = 0; + + /* Only the privileged domain can do this. For non-priv, the pcifront + * driver provides a PCI bus that does the call to do exactly + * this in the priv domain. */ + if (xen_initial_domain() && + HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { dynamic_irq_cleanup(irq); irq = -ENOSPC; goto out; @@ -608,6 +616,26 @@ out: return irq; } +int xen_destroy_irq(int irq) +{ + struct irq_desc *desc; + int rc = -ENOENT; + + spin_lock(&irq_mapping_update_lock); + + desc = irq_to_desc(irq); + if (!desc) + goto out; + + irq_info[irq] = mk_unbound_info(); + + dynamic_irq_cleanup(irq); + +out: + spin_unlock(&irq_mapping_update_lock); + return rc; +} + int xen_vector_from_irq(unsigned irq) { return vector_from_irq(irq); diff --git a/include/xen/events.h b/include/xen/events.h index d7a4ca7d17b5..c1717ca5ac13 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -72,6 +72,9 @@ void xen_hvm_evtchn_do_upcall(void); * usual. */ int xen_allocate_pirq(unsigned gsi, int shareable, char *name); +/* De-allocates the above mentioned physical interrupt. */ +int xen_destroy_irq(int irq); + /* Return vector allocated to pirq */ int xen_vector_from_irq(unsigned pirq); -- cgit v1.2.3 From 89afb6e46a0f72e0e5c51ef44aa900b74681664b Mon Sep 17 00:00:00 2001 From: Yosuke Iwamatsu Date: Tue, 13 Oct 2009 17:22:27 -0400 Subject: xenbus: Xen paravirtualised PCI hotplug support. The Xen PCI front driver adds two new states that are utilizez for PCI hotplug support. This is a patch pulled from the linux-2.6-xen-sparse tree. Signed-off-by: Noboru Iwamatsu Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Yosuke Iwamatsu --- drivers/xen/xenbus/xenbus_client.c | 2 ++ include/xen/interface/io/xenbus.h | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/xen') diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 7e49527189b6..cdacf923e073 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -50,6 +50,8 @@ const char *xenbus_strstate(enum xenbus_state state) [ XenbusStateConnected ] = "Connected", [ XenbusStateClosing ] = "Closing", [ XenbusStateClosed ] = "Closed", + [XenbusStateReconfiguring] = "Reconfiguring", + [XenbusStateReconfigured] = "Reconfigured", }; return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID"; } diff --git a/include/xen/interface/io/xenbus.h b/include/xen/interface/io/xenbus.h index 46508c7fa399..9fda532973a5 100644 --- a/include/xen/interface/io/xenbus.h +++ b/include/xen/interface/io/xenbus.h @@ -27,8 +27,14 @@ enum xenbus_state XenbusStateClosing = 5, /* The device is being closed due to an error or an unplug event. */ - XenbusStateClosed = 6 + XenbusStateClosed = 6, + /* + * Reconfiguring: The device is being reconfigured. + */ + XenbusStateReconfiguring = 7, + + XenbusStateReconfigured = 8 }; #endif /* _XEN_PUBLIC_IO_XENBUS_H */ -- cgit v1.2.3 From 956a9202cd1220397933a07beda9f96b3df1fa24 Mon Sep 17 00:00:00 2001 From: Ryan Wilson Date: Mon, 2 Aug 2010 21:31:05 -0400 Subject: xen-pcifront: Xen PCI frontend driver. This is a port of the 2.6.18 Xen PCI front driver with fixes to make it build under 2.6.34 and later (for the full list of changes: git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git historic/xen-pcifront-0.1). It also includes the fixes to make it work properly. [v2: Updated Kconfig, removed crud, added Reviewed-by] [v3: Added 'static', fixed grant table leak, redid Kconfig] [v4: Added one more 'static' and removed comments] Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Stefano Stabellini Reviewed-by: Jan Beulich --- drivers/pci/Kconfig | 21 + drivers/pci/Makefile | 2 + drivers/pci/xen-pcifront.c | 1148 ++++++++++++++++++++++++++++++++++++++ include/xen/interface/io/pciif.h | 112 ++++ 4 files changed, 1283 insertions(+) create mode 100644 drivers/pci/xen-pcifront.c create mode 100644 include/xen/interface/io/pciif.h (limited to 'include/xen') diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 34ef70d562b2..5b1630e4e9e3 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -40,6 +40,27 @@ config PCI_STUB When in doubt, say N. +config XEN_PCIDEV_FRONTEND + tristate "Xen PCI Frontend" + depends on PCI && X86 && XEN + select HOTPLUG + select PCI_XEN + default y + help + The PCI device frontend driver allows the kernel to import arbitrary + PCI devices from a PCI backend to support PCI driver domains. + +config XEN_PCIDEV_FE_DEBUG + bool "Xen PCI Frontend debugging" + depends on XEN_PCIDEV_FRONTEND && PCI_DEBUG + help + Say Y here if you want the Xen PCI frontend to produce a bunch of debug + messages to the system log. Select this if you are having a + problem with Xen PCI frontend support and want to see more of what is + going on. + + When in doubt, say N. + config HT_IRQ bool "Interrupts on hypertransport devices" default y diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index dc1aa0922868..d5e27050c4e3 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -65,6 +65,8 @@ obj-$(CONFIG_PCI_SYSCALL) += syscall.o obj-$(CONFIG_PCI_STUB) += pci-stub.o +obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o + ifeq ($(CONFIG_PCI_DEBUG),y) EXTRA_CFLAGS += -DDEBUG endif diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c new file mode 100644 index 000000000000..a87c4985326e --- /dev/null +++ b/drivers/pci/xen-pcifront.c @@ -0,0 +1,1148 @@ +/* + * Xen PCI Frontend. + * + * Author: Ryan Wilson + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define INVALID_GRANT_REF (0) +#define INVALID_EVTCHN (-1) + +struct pci_bus_entry { + struct list_head list; + struct pci_bus *bus; +}; + +#define _PDEVB_op_active (0) +#define PDEVB_op_active (1 << (_PDEVB_op_active)) + +struct pcifront_device { + struct xenbus_device *xdev; + struct list_head root_buses; + + int evtchn; + int gnt_ref; + + int irq; + + /* Lock this when doing any operations in sh_info */ + spinlock_t sh_info_lock; + struct xen_pci_sharedinfo *sh_info; + struct work_struct op_work; + unsigned long flags; + +}; + +struct pcifront_sd { + int domain; + struct pcifront_device *pdev; +}; + +static inline struct pcifront_device * +pcifront_get_pdev(struct pcifront_sd *sd) +{ + return sd->pdev; +} + +static inline void pcifront_init_sd(struct pcifront_sd *sd, + unsigned int domain, unsigned int bus, + struct pcifront_device *pdev) +{ + sd->domain = domain; + sd->pdev = pdev; +} + +static DEFINE_SPINLOCK(pcifront_dev_lock); +static struct pcifront_device *pcifront_dev; + +static int verbose_request; +module_param(verbose_request, int, 0644); + +static int errno_to_pcibios_err(int errno) +{ + switch (errno) { + case XEN_PCI_ERR_success: + return PCIBIOS_SUCCESSFUL; + + case XEN_PCI_ERR_dev_not_found: + return PCIBIOS_DEVICE_NOT_FOUND; + + case XEN_PCI_ERR_invalid_offset: + case XEN_PCI_ERR_op_failed: + return PCIBIOS_BAD_REGISTER_NUMBER; + + case XEN_PCI_ERR_not_implemented: + return PCIBIOS_FUNC_NOT_SUPPORTED; + + case XEN_PCI_ERR_access_denied: + return PCIBIOS_SET_FAILED; + } + return errno; +} + +static inline void schedule_pcifront_aer_op(struct pcifront_device *pdev) +{ + if (test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) + && !test_and_set_bit(_PDEVB_op_active, &pdev->flags)) { + dev_dbg(&pdev->xdev->dev, "schedule aer frontend job\n"); + schedule_work(&pdev->op_work); + } +} + +static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op) +{ + int err = 0; + struct xen_pci_op *active_op = &pdev->sh_info->op; + unsigned long irq_flags; + evtchn_port_t port = pdev->evtchn; + unsigned irq = pdev->irq; + s64 ns, ns_timeout; + struct timeval tv; + + spin_lock_irqsave(&pdev->sh_info_lock, irq_flags); + + memcpy(active_op, op, sizeof(struct xen_pci_op)); + + /* Go */ + wmb(); + set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); + notify_remote_via_evtchn(port); + + /* + * We set a poll timeout of 3 seconds but give up on return after + * 2 seconds. It is better to time out too late rather than too early + * (in the latter case we end up continually re-executing poll() with a + * timeout in the past). 1s difference gives plenty of slack for error. + */ + do_gettimeofday(&tv); + ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC; + + xen_clear_irq_pending(irq); + + while (test_bit(_XEN_PCIF_active, + (unsigned long *)&pdev->sh_info->flags)) { + xen_poll_irq_timeout(irq, jiffies + 3*HZ); + xen_clear_irq_pending(irq); + do_gettimeofday(&tv); + ns = timeval_to_ns(&tv); + if (ns > ns_timeout) { + dev_err(&pdev->xdev->dev, + "pciback not responding!!!\n"); + clear_bit(_XEN_PCIF_active, + (unsigned long *)&pdev->sh_info->flags); + err = XEN_PCI_ERR_dev_not_found; + goto out; + } + } + + /* + * We might lose backend service request since we + * reuse same evtchn with pci_conf backend response. So re-schedule + * aer pcifront service. + */ + if (test_bit(_XEN_PCIB_active, + (unsigned long *)&pdev->sh_info->flags)) { + dev_err(&pdev->xdev->dev, + "schedule aer pcifront service\n"); + schedule_pcifront_aer_op(pdev); + } + + memcpy(op, active_op, sizeof(struct xen_pci_op)); + + err = op->err; +out: + spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags); + return err; +} + +/* Access to this function is spinlocked in drivers/pci/access.c */ +static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val) +{ + int err = 0; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_conf_read, + .domain = pci_domain_nr(bus), + .bus = bus->number, + .devfn = devfn, + .offset = where, + .size = size, + }; + struct pcifront_sd *sd = bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + if (verbose_request) + dev_info(&pdev->xdev->dev, + "read dev=%04x:%02x:%02x.%01x - offset %x size %d\n", + pci_domain_nr(bus), bus->number, PCI_SLOT(devfn), + PCI_FUNC(devfn), where, size); + + err = do_pci_op(pdev, &op); + + if (likely(!err)) { + if (verbose_request) + dev_info(&pdev->xdev->dev, "read got back value %x\n", + op.value); + + *val = op.value; + } else if (err == -ENODEV) { + /* No device here, pretend that it just returned 0 */ + err = 0; + *val = 0; + } + + return errno_to_pcibios_err(err); +} + +/* Access to this function is spinlocked in drivers/pci/access.c */ +static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val) +{ + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_conf_write, + .domain = pci_domain_nr(bus), + .bus = bus->number, + .devfn = devfn, + .offset = where, + .size = size, + .value = val, + }; + struct pcifront_sd *sd = bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + if (verbose_request) + dev_info(&pdev->xdev->dev, + "write dev=%04x:%02x:%02x.%01x - " + "offset %x size %d val %x\n", + pci_domain_nr(bus), bus->number, + PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val); + + return errno_to_pcibios_err(do_pci_op(pdev, &op)); +} + +struct pci_ops pcifront_bus_ops = { + .read = pcifront_bus_read, + .write = pcifront_bus_write, +}; + +#ifdef CONFIG_PCI_MSI +static int pci_frontend_enable_msix(struct pci_dev *dev, + int **vector, int nvec) +{ + int err; + int i; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_enable_msix, + .domain = pci_domain_nr(dev->bus), + .bus = dev->bus->number, + .devfn = dev->devfn, + .value = nvec, + }; + struct pcifront_sd *sd = dev->bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + struct msi_desc *entry; + + if (nvec > SH_INFO_MAX_VEC) { + dev_err(&dev->dev, "too much vector for pci frontend: %x." + " Increase SH_INFO_MAX_VEC.\n", nvec); + return -EINVAL; + } + + i = 0; + list_for_each_entry(entry, &dev->msi_list, list) { + op.msix_entries[i].entry = entry->msi_attrib.entry_nr; + /* Vector is useless at this point. */ + op.msix_entries[i].vector = -1; + i++; + } + + err = do_pci_op(pdev, &op); + + if (likely(!err)) { + if (likely(!op.value)) { + /* we get the result */ + for (i = 0; i < nvec; i++) + *(*vector+i) = op.msix_entries[i].vector; + return 0; + } else { + printk(KERN_DEBUG "enable msix get value %x\n", + op.value); + return op.value; + } + } else { + dev_err(&dev->dev, "enable msix get err %x\n", err); + return err; + } +} + +static void pci_frontend_disable_msix(struct pci_dev *dev) +{ + int err; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_disable_msix, + .domain = pci_domain_nr(dev->bus), + .bus = dev->bus->number, + .devfn = dev->devfn, + }; + struct pcifront_sd *sd = dev->bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + err = do_pci_op(pdev, &op); + + /* What should do for error ? */ + if (err) + dev_err(&dev->dev, "pci_disable_msix get err %x\n", err); +} + +static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector) +{ + int err; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_enable_msi, + .domain = pci_domain_nr(dev->bus), + .bus = dev->bus->number, + .devfn = dev->devfn, + }; + struct pcifront_sd *sd = dev->bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + err = do_pci_op(pdev, &op); + if (likely(!err)) { + *(*vector) = op.value; + } else { + dev_err(&dev->dev, "pci frontend enable msi failed for dev " + "%x:%x\n", op.bus, op.devfn); + err = -EINVAL; + } + return err; +} + +static void pci_frontend_disable_msi(struct pci_dev *dev) +{ + int err; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_disable_msi, + .domain = pci_domain_nr(dev->bus), + .bus = dev->bus->number, + .devfn = dev->devfn, + }; + struct pcifront_sd *sd = dev->bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + err = do_pci_op(pdev, &op); + if (err == XEN_PCI_ERR_dev_not_found) { + /* XXX No response from backend, what shall we do? */ + printk(KERN_DEBUG "get no response from backend for disable MSI\n"); + return; + } + if (err) + /* how can pciback notify us fail? */ + printk(KERN_DEBUG "get fake response frombackend\n"); +} + +static struct xen_pci_frontend_ops pci_frontend_ops = { + .enable_msi = pci_frontend_enable_msi, + .disable_msi = pci_frontend_disable_msi, + .enable_msix = pci_frontend_enable_msix, + .disable_msix = pci_frontend_disable_msix, +}; + +static void pci_frontend_registrar(int enable) +{ + if (enable) + xen_pci_frontend = &pci_frontend_ops; + else + xen_pci_frontend = NULL; +}; +#else +static inline void pci_frontend_registrar(int enable) { }; +#endif /* CONFIG_PCI_MSI */ + +/* Claim resources for the PCI frontend as-is, backend won't allow changes */ +static int pcifront_claim_resource(struct pci_dev *dev, void *data) +{ + struct pcifront_device *pdev = data; + int i; + struct resource *r; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + r = &dev->resource[i]; + + if (!r->parent && r->start && r->flags) { + dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n", + pci_name(dev), i); + if (pci_claim_resource(dev, i)) { + dev_err(&pdev->xdev->dev, "Could not claim " + "resource %s/%d! Device offline. Try " + "giving less than 4GB to domain.\n", + pci_name(dev), i); + } + } + } + + return 0; +} + +static int __devinit pcifront_scan_bus(struct pcifront_device *pdev, + unsigned int domain, unsigned int bus, + struct pci_bus *b) +{ + struct pci_dev *d; + unsigned int devfn; + + /* Scan the bus for functions and add. + * We omit handling of PCI bridge attachment because pciback prevents + * bridges from being exported. + */ + for (devfn = 0; devfn < 0x100; devfn++) { + d = pci_get_slot(b, devfn); + if (d) { + /* Device is already known. */ + pci_dev_put(d); + continue; + } + + d = pci_scan_single_device(b, devfn); + if (d) + dev_info(&pdev->xdev->dev, "New device on " + "%04x:%02x:%02x.%02x found.\n", domain, bus, + PCI_SLOT(devfn), PCI_FUNC(devfn)); + } + + return 0; +} + +static int __devinit pcifront_scan_root(struct pcifront_device *pdev, + unsigned int domain, unsigned int bus) +{ + struct pci_bus *b; + struct pcifront_sd *sd = NULL; + struct pci_bus_entry *bus_entry = NULL; + int err = 0; + +#ifndef CONFIG_PCI_DOMAINS + if (domain != 0) { + dev_err(&pdev->xdev->dev, + "PCI Root in non-zero PCI Domain! domain=%d\n", domain); + dev_err(&pdev->xdev->dev, + "Please compile with CONFIG_PCI_DOMAINS\n"); + err = -EINVAL; + goto err_out; + } +#endif + + dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n", + domain, bus); + + bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL); + sd = kmalloc(sizeof(*sd), GFP_KERNEL); + if (!bus_entry || !sd) { + err = -ENOMEM; + goto err_out; + } + pcifront_init_sd(sd, domain, bus, pdev); + + b = pci_scan_bus_parented(&pdev->xdev->dev, bus, + &pcifront_bus_ops, sd); + if (!b) { + dev_err(&pdev->xdev->dev, + "Error creating PCI Frontend Bus!\n"); + err = -ENOMEM; + goto err_out; + } + + bus_entry->bus = b; + + list_add(&bus_entry->list, &pdev->root_buses); + + /* pci_scan_bus_parented skips devices which do not have a have + * devfn==0. The pcifront_scan_bus enumerates all devfn. */ + err = pcifront_scan_bus(pdev, domain, bus, b); + + /* Claim resources before going "live" with our devices */ + pci_walk_bus(b, pcifront_claim_resource, pdev); + + /* Create SysFS and notify udev of the devices. Aka: "going live" */ + pci_bus_add_devices(b); + + return err; + +err_out: + kfree(bus_entry); + kfree(sd); + + return err; +} + +static int __devinit pcifront_rescan_root(struct pcifront_device *pdev, + unsigned int domain, unsigned int bus) +{ + int err; + struct pci_bus *b; + +#ifndef CONFIG_PCI_DOMAINS + if (domain != 0) { + dev_err(&pdev->xdev->dev, + "PCI Root in non-zero PCI Domain! domain=%d\n", domain); + dev_err(&pdev->xdev->dev, + "Please compile with CONFIG_PCI_DOMAINS\n"); + return -EINVAL; + } +#endif + + dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n", + domain, bus); + + b = pci_find_bus(domain, bus); + if (!b) + /* If the bus is unknown, create it. */ + return pcifront_scan_root(pdev, domain, bus); + + err = pcifront_scan_bus(pdev, domain, bus, b); + + /* Claim resources before going "live" with our devices */ + pci_walk_bus(b, pcifront_claim_resource, pdev); + + /* Create SysFS and notify udev of the devices. Aka: "going live" */ + pci_bus_add_devices(b); + + return err; +} + +static void free_root_bus_devs(struct pci_bus *bus) +{ + struct pci_dev *dev; + + while (!list_empty(&bus->devices)) { + dev = container_of(bus->devices.next, struct pci_dev, + bus_list); + dev_dbg(&dev->dev, "removing device\n"); + pci_remove_bus_device(dev); + } +} + +static void pcifront_free_roots(struct pcifront_device *pdev) +{ + struct pci_bus_entry *bus_entry, *t; + + dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n"); + + list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) { + list_del(&bus_entry->list); + + free_root_bus_devs(bus_entry->bus); + + kfree(bus_entry->bus->sysdata); + + device_unregister(bus_entry->bus->bridge); + pci_remove_bus(bus_entry->bus); + + kfree(bus_entry); + } +} + +static pci_ers_result_t pcifront_common_process(int cmd, + struct pcifront_device *pdev, + pci_channel_state_t state) +{ + pci_ers_result_t result; + struct pci_driver *pdrv; + int bus = pdev->sh_info->aer_op.bus; + int devfn = pdev->sh_info->aer_op.devfn; + struct pci_dev *pcidev; + int flag = 0; + + dev_dbg(&pdev->xdev->dev, + "pcifront AER process: cmd %x (bus:%x, devfn%x)", + cmd, bus, devfn); + result = PCI_ERS_RESULT_NONE; + + pcidev = pci_get_bus_and_slot(bus, devfn); + if (!pcidev || !pcidev->driver) { + dev_err(&pcidev->dev, + "device or driver is NULL\n"); + return result; + } + pdrv = pcidev->driver; + + if (get_driver(&pdrv->driver)) { + if (pdrv->err_handler && pdrv->err_handler->error_detected) { + dev_dbg(&pcidev->dev, + "trying to call AER service\n"); + if (pcidev) { + flag = 1; + switch (cmd) { + case XEN_PCI_OP_aer_detected: + result = pdrv->err_handler-> + error_detected(pcidev, state); + break; + case XEN_PCI_OP_aer_mmio: + result = pdrv->err_handler-> + mmio_enabled(pcidev); + break; + case XEN_PCI_OP_aer_slotreset: + result = pdrv->err_handler-> + slot_reset(pcidev); + break; + case XEN_PCI_OP_aer_resume: + pdrv->err_handler->resume(pcidev); + break; + default: + dev_err(&pdev->xdev->dev, + "bad request in aer recovery " + "operation!\n"); + + } + } + } + put_driver(&pdrv->driver); + } + if (!flag) + result = PCI_ERS_RESULT_NONE; + + return result; +} + + +static void pcifront_do_aer(struct work_struct *data) +{ + struct pcifront_device *pdev = + container_of(data, struct pcifront_device, op_work); + int cmd = pdev->sh_info->aer_op.cmd; + pci_channel_state_t state = + (pci_channel_state_t)pdev->sh_info->aer_op.err; + + /*If a pci_conf op is in progress, + we have to wait until it is done before service aer op*/ + dev_dbg(&pdev->xdev->dev, + "pcifront service aer bus %x devfn %x\n", + pdev->sh_info->aer_op.bus, pdev->sh_info->aer_op.devfn); + + pdev->sh_info->aer_op.err = pcifront_common_process(cmd, pdev, state); + + /* Post the operation to the guest. */ + wmb(); + clear_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags); + notify_remote_via_evtchn(pdev->evtchn); + + /*in case of we lost an aer request in four lines time_window*/ + smp_mb__before_clear_bit(); + clear_bit(_PDEVB_op_active, &pdev->flags); + smp_mb__after_clear_bit(); + + schedule_pcifront_aer_op(pdev); + +} + +static irqreturn_t pcifront_handler_aer(int irq, void *dev) +{ + struct pcifront_device *pdev = dev; + schedule_pcifront_aer_op(pdev); + return IRQ_HANDLED; +} +static int pcifront_connect(struct pcifront_device *pdev) +{ + int err = 0; + + spin_lock(&pcifront_dev_lock); + + if (!pcifront_dev) { + dev_info(&pdev->xdev->dev, "Installing PCI frontend\n"); + pcifront_dev = pdev; + } else { + dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n"); + err = -EEXIST; + } + + spin_unlock(&pcifront_dev_lock); + + return err; +} + +static void pcifront_disconnect(struct pcifront_device *pdev) +{ + spin_lock(&pcifront_dev_lock); + + if (pdev == pcifront_dev) { + dev_info(&pdev->xdev->dev, + "Disconnecting PCI Frontend Buses\n"); + pcifront_dev = NULL; + } + + spin_unlock(&pcifront_dev_lock); +} +static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev) +{ + struct pcifront_device *pdev; + + pdev = kzalloc(sizeof(struct pcifront_device), GFP_KERNEL); + if (pdev == NULL) + goto out; + + pdev->sh_info = + (struct xen_pci_sharedinfo *)__get_free_page(GFP_KERNEL); + if (pdev->sh_info == NULL) { + kfree(pdev); + pdev = NULL; + goto out; + } + pdev->sh_info->flags = 0; + + /*Flag for registering PV AER handler*/ + set_bit(_XEN_PCIB_AERHANDLER, (void *)&pdev->sh_info->flags); + + dev_set_drvdata(&xdev->dev, pdev); + pdev->xdev = xdev; + + INIT_LIST_HEAD(&pdev->root_buses); + + spin_lock_init(&pdev->sh_info_lock); + + pdev->evtchn = INVALID_EVTCHN; + pdev->gnt_ref = INVALID_GRANT_REF; + pdev->irq = -1; + + INIT_WORK(&pdev->op_work, pcifront_do_aer); + + dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n", + pdev, pdev->sh_info); +out: + return pdev; +} + +static void free_pdev(struct pcifront_device *pdev) +{ + dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n", pdev); + + pcifront_free_roots(pdev); + + /*For PCIE_AER error handling job*/ + flush_scheduled_work(); + + if (pdev->irq >= 0) + unbind_from_irqhandler(pdev->irq, pdev); + + if (pdev->evtchn != INVALID_EVTCHN) + xenbus_free_evtchn(pdev->xdev, pdev->evtchn); + + if (pdev->gnt_ref != INVALID_GRANT_REF) + gnttab_end_foreign_access(pdev->gnt_ref, 0 /* r/w page */, + (unsigned long)pdev->sh_info); + else + free_page((unsigned long)pdev->sh_info); + + dev_set_drvdata(&pdev->xdev->dev, NULL); + + kfree(pdev); +} + +static int pcifront_publish_info(struct pcifront_device *pdev) +{ + int err = 0; + struct xenbus_transaction trans; + + err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info)); + if (err < 0) + goto out; + + pdev->gnt_ref = err; + + err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn); + if (err) + goto out; + + err = bind_evtchn_to_irqhandler(pdev->evtchn, pcifront_handler_aer, + 0, "pcifront", pdev); + + if (err < 0) + return err; + + pdev->irq = err; + +do_publish: + err = xenbus_transaction_start(&trans); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error writing configuration for backend " + "(start transaction)"); + goto out; + } + + err = xenbus_printf(trans, pdev->xdev->nodename, + "pci-op-ref", "%u", pdev->gnt_ref); + if (!err) + err = xenbus_printf(trans, pdev->xdev->nodename, + "event-channel", "%u", pdev->evtchn); + if (!err) + err = xenbus_printf(trans, pdev->xdev->nodename, + "magic", XEN_PCI_MAGIC); + + if (err) { + xenbus_transaction_end(trans, 1); + xenbus_dev_fatal(pdev->xdev, err, + "Error writing configuration for backend"); + goto out; + } else { + err = xenbus_transaction_end(trans, 0); + if (err == -EAGAIN) + goto do_publish; + else if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error completing transaction " + "for backend"); + goto out; + } + } + + xenbus_switch_state(pdev->xdev, XenbusStateInitialised); + + dev_dbg(&pdev->xdev->dev, "publishing successful!\n"); + +out: + return err; +} + +static int __devinit pcifront_try_connect(struct pcifront_device *pdev) +{ + int err = -EFAULT; + int i, num_roots, len; + char str[64]; + unsigned int domain, bus; + + + /* Only connect once */ + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateInitialised) + goto out; + + err = pcifront_connect(pdev); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error connecting PCI Frontend"); + goto out; + } + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, + "root_num", "%d", &num_roots); + if (err == -ENOENT) { + xenbus_dev_error(pdev->xdev, err, + "No PCI Roots found, trying 0000:00"); + err = pcifront_scan_root(pdev, 0, 0); + num_roots = 0; + } else if (err != 1) { + if (err == 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading number of PCI roots"); + goto out; + } + + for (i = 0; i < num_roots; i++) { + len = snprintf(str, sizeof(str), "root-%d", i); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, + "%x:%x", &domain, &bus); + if (err != 2) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading PCI root %d", i); + goto out; + } + + err = pcifront_scan_root(pdev, domain, bus); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error scanning PCI root %04x:%02x", + domain, bus); + goto out; + } + } + + err = xenbus_switch_state(pdev->xdev, XenbusStateConnected); + +out: + return err; +} + +static int pcifront_try_disconnect(struct pcifront_device *pdev) +{ + int err = 0; + enum xenbus_state prev_state; + + + prev_state = xenbus_read_driver_state(pdev->xdev->nodename); + + if (prev_state >= XenbusStateClosing) + goto out; + + if (prev_state == XenbusStateConnected) { + pcifront_free_roots(pdev); + pcifront_disconnect(pdev); + } + + err = xenbus_switch_state(pdev->xdev, XenbusStateClosed); + +out: + + return err; +} + +static int __devinit pcifront_attach_devices(struct pcifront_device *pdev) +{ + int err = -EFAULT; + int i, num_roots, len; + unsigned int domain, bus; + char str[64]; + + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateReconfiguring) + goto out; + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, + "root_num", "%d", &num_roots); + if (err == -ENOENT) { + xenbus_dev_error(pdev->xdev, err, + "No PCI Roots found, trying 0000:00"); + err = pcifront_rescan_root(pdev, 0, 0); + num_roots = 0; + } else if (err != 1) { + if (err == 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading number of PCI roots"); + goto out; + } + + for (i = 0; i < num_roots; i++) { + len = snprintf(str, sizeof(str), "root-%d", i); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, + "%x:%x", &domain, &bus); + if (err != 2) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading PCI root %d", i); + goto out; + } + + err = pcifront_rescan_root(pdev, domain, bus); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error scanning PCI root %04x:%02x", + domain, bus); + goto out; + } + } + + xenbus_switch_state(pdev->xdev, XenbusStateConnected); + +out: + return err; +} + +static int pcifront_detach_devices(struct pcifront_device *pdev) +{ + int err = 0; + int i, num_devs; + unsigned int domain, bus, slot, func; + struct pci_bus *pci_bus; + struct pci_dev *pci_dev; + char str[64]; + + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateConnected) + goto out; + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d", + &num_devs); + if (err != 1) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading number of PCI devices"); + goto out; + } + + /* Find devices being detached and remove them. */ + for (i = 0; i < num_devs; i++) { + int l, state; + l = snprintf(str, sizeof(str), "state-%d", i); + if (unlikely(l >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, "%d", + &state); + if (err != 1) + state = XenbusStateUnknown; + + if (state != XenbusStateClosing) + continue; + + /* Remove device. */ + l = snprintf(str, sizeof(str), "vdev-%d", i); + if (unlikely(l >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, + "%x:%x:%x.%x", &domain, &bus, &slot, &func); + if (err != 4) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading PCI device %d", i); + goto out; + } + + pci_bus = pci_find_bus(domain, bus); + if (!pci_bus) { + dev_dbg(&pdev->xdev->dev, "Cannot get bus %04x:%02x\n", + domain, bus); + continue; + } + pci_dev = pci_get_slot(pci_bus, PCI_DEVFN(slot, func)); + if (!pci_dev) { + dev_dbg(&pdev->xdev->dev, + "Cannot get PCI device %04x:%02x:%02x.%02x\n", + domain, bus, slot, func); + continue; + } + pci_remove_bus_device(pci_dev); + pci_dev_put(pci_dev); + + dev_dbg(&pdev->xdev->dev, + "PCI device %04x:%02x:%02x.%02x removed.\n", + domain, bus, slot, func); + } + + err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring); + +out: + return err; +} + +static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev, + enum xenbus_state be_state) +{ + struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev); + + switch (be_state) { + case XenbusStateUnknown: + case XenbusStateInitialising: + case XenbusStateInitWait: + case XenbusStateInitialised: + case XenbusStateClosed: + break; + + case XenbusStateConnected: + pcifront_try_connect(pdev); + break; + + case XenbusStateClosing: + dev_warn(&xdev->dev, "backend going away!\n"); + pcifront_try_disconnect(pdev); + break; + + case XenbusStateReconfiguring: + pcifront_detach_devices(pdev); + break; + + case XenbusStateReconfigured: + pcifront_attach_devices(pdev); + break; + } +} + +static int pcifront_xenbus_probe(struct xenbus_device *xdev, + const struct xenbus_device_id *id) +{ + int err = 0; + struct pcifront_device *pdev = alloc_pdev(xdev); + + if (pdev == NULL) { + err = -ENOMEM; + xenbus_dev_fatal(xdev, err, + "Error allocating pcifront_device struct"); + goto out; + } + + err = pcifront_publish_info(pdev); + if (err) + free_pdev(pdev); + +out: + return err; +} + +static int pcifront_xenbus_remove(struct xenbus_device *xdev) +{ + struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev); + if (pdev) + free_pdev(pdev); + + return 0; +} + +static const struct xenbus_device_id xenpci_ids[] = { + {"pci"}, + {""}, +}; + +static struct xenbus_driver xenbus_pcifront_driver = { + .name = "pcifront", + .owner = THIS_MODULE, + .ids = xenpci_ids, + .probe = pcifront_xenbus_probe, + .remove = pcifront_xenbus_remove, + .otherend_changed = pcifront_backend_changed, +}; + +static int __init pcifront_init(void) +{ + if (!xen_pv_domain() || xen_initial_domain()) + return -ENODEV; + + pci_frontend_registrar(1 /* enable */); + + return xenbus_register_frontend(&xenbus_pcifront_driver); +} + +static void __exit pcifront_cleanup(void) +{ + xenbus_unregister_driver(&xenbus_pcifront_driver); + pci_frontend_registrar(0 /* disable */); +} +module_init(pcifront_init); +module_exit(pcifront_cleanup); + +MODULE_DESCRIPTION("Xen PCI passthrough frontend."); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("xen:pci"); diff --git a/include/xen/interface/io/pciif.h b/include/xen/interface/io/pciif.h new file mode 100644 index 000000000000..d9922ae36eb5 --- /dev/null +++ b/include/xen/interface/io/pciif.h @@ -0,0 +1,112 @@ +/* + * PCI Backend/Frontend Common Data Structures & Macros + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Ryan Wilson + */ +#ifndef __XEN_PCI_COMMON_H__ +#define __XEN_PCI_COMMON_H__ + +/* Be sure to bump this number if you change this file */ +#define XEN_PCI_MAGIC "7" + +/* xen_pci_sharedinfo flags */ +#define _XEN_PCIF_active (0) +#define XEN_PCIF_active (1<<_XEN_PCIF_active) +#define _XEN_PCIB_AERHANDLER (1) +#define XEN_PCIB_AERHANDLER (1<<_XEN_PCIB_AERHANDLER) +#define _XEN_PCIB_active (2) +#define XEN_PCIB_active (1<<_XEN_PCIB_active) + +/* xen_pci_op commands */ +#define XEN_PCI_OP_conf_read (0) +#define XEN_PCI_OP_conf_write (1) +#define XEN_PCI_OP_enable_msi (2) +#define XEN_PCI_OP_disable_msi (3) +#define XEN_PCI_OP_enable_msix (4) +#define XEN_PCI_OP_disable_msix (5) +#define XEN_PCI_OP_aer_detected (6) +#define XEN_PCI_OP_aer_resume (7) +#define XEN_PCI_OP_aer_mmio (8) +#define XEN_PCI_OP_aer_slotreset (9) + +/* xen_pci_op error numbers */ +#define XEN_PCI_ERR_success (0) +#define XEN_PCI_ERR_dev_not_found (-1) +#define XEN_PCI_ERR_invalid_offset (-2) +#define XEN_PCI_ERR_access_denied (-3) +#define XEN_PCI_ERR_not_implemented (-4) +/* XEN_PCI_ERR_op_failed - backend failed to complete the operation */ +#define XEN_PCI_ERR_op_failed (-5) + +/* + * it should be PAGE_SIZE-sizeof(struct xen_pci_op))/sizeof(struct msix_entry)) + * Should not exceed 128 + */ +#define SH_INFO_MAX_VEC 128 + +struct xen_msix_entry { + uint16_t vector; + uint16_t entry; +}; +struct xen_pci_op { + /* IN: what action to perform: XEN_PCI_OP_* */ + uint32_t cmd; + + /* OUT: will contain an error number (if any) from errno.h */ + int32_t err; + + /* IN: which device to touch */ + uint32_t domain; /* PCI Domain/Segment */ + uint32_t bus; + uint32_t devfn; + + /* IN: which configuration registers to touch */ + int32_t offset; + int32_t size; + + /* IN/OUT: Contains the result after a READ or the value to WRITE */ + uint32_t value; + /* IN: Contains extra infor for this operation */ + uint32_t info; + /*IN: param for msi-x */ + struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC]; +}; + +/*used for pcie aer handling*/ +struct xen_pcie_aer_op { + /* IN: what action to perform: XEN_PCI_OP_* */ + uint32_t cmd; + /*IN/OUT: return aer_op result or carry error_detected state as input*/ + int32_t err; + + /* IN: which device to touch */ + uint32_t domain; /* PCI Domain/Segment*/ + uint32_t bus; + uint32_t devfn; +}; +struct xen_pci_sharedinfo { + /* flags - XEN_PCIF_* */ + uint32_t flags; + struct xen_pci_op op; + struct xen_pcie_aer_op aer_op; +}; + +#endif /* __XEN_PCI_COMMON_H__ */ -- cgit v1.2.3 From 1c5de1939c204bde9cce87f4eb3d26e9f9eb732b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:49 -0800 Subject: xen: add privcmd driver The privcmd interface in xenfs allows the tool stack in the privileged domain to get fairly direct access to the hypervisor in order to do various management things such as domain construction. [ Impact: new xenfs interface for privileged operations ] Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/xenfs/Makefile | 2 +- drivers/xen/xenfs/privcmd.c | 436 ++++++++++++++++++++++++++++++++++++++++++++ drivers/xen/xenfs/super.c | 2 + drivers/xen/xenfs/xenfs.h | 1 + include/xen/Kbuild | 1 + include/xen/privcmd.h | 80 ++++++++ 6 files changed, 521 insertions(+), 1 deletion(-) create mode 100644 drivers/xen/xenfs/privcmd.c create mode 100644 include/xen/privcmd.h (limited to 'include/xen') diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile index 5d45ff13cc01..4a0be9a82af3 100644 --- a/drivers/xen/xenfs/Makefile +++ b/drivers/xen/xenfs/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_XENFS) += xenfs.o xenfs-y = super.o xenbus.o -xenfs-$(CONFIG_XEN_DOM0) += xenstored.o +xenfs-$(CONFIG_XEN_DOM0) += xenstored.o privcmd.o diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c new file mode 100644 index 000000000000..c7192f314f86 --- /dev/null +++ b/drivers/xen/xenfs/privcmd.c @@ -0,0 +1,436 @@ +/****************************************************************************** + * privcmd.c + * + * Interface to privileged domain-0 commands. + * + * Copyright (c) 2002-2004, K A Fraser, B Dragovic + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifndef HAVE_ARCH_PRIVCMD_MMAP +static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); +#endif + +struct remap_data { + unsigned long mfn; + unsigned domid; + pgprot_t prot; +}; + +static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, + unsigned long addr, void *data) +{ + struct remap_data *rmd = data; + pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); + + xen_set_domain_pte(ptep, pte, rmd->domid); + + return 0; +} + +int remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr, + unsigned long mfn, unsigned long size, + pgprot_t prot, unsigned domid) +{ + struct remap_data rmd; + int err; + + prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); + + vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; + + rmd.mfn = mfn; + rmd.prot = prot; + rmd.domid = domid; + + err = apply_to_page_range(vma->vm_mm, addr, size, + remap_area_mfn_pte_fn, &rmd); + + return err; +} + +static long privcmd_ioctl_hypercall(void __user *udata) +{ + struct privcmd_hypercall hypercall; + long ret; + + if (copy_from_user(&hypercall, udata, sizeof(hypercall))) + return -EFAULT; + + ret = privcmd_call(hypercall.op, + hypercall.arg[0], hypercall.arg[1], + hypercall.arg[2], hypercall.arg[3], + hypercall.arg[4]); + + return ret; +} + +static void free_page_list(struct list_head *pages) +{ + struct page *p, *n; + + list_for_each_entry_safe(p, n, pages, lru) + __free_page(p); + + INIT_LIST_HEAD(pages); +} + +/* + * Given an array of items in userspace, return a list of pages + * containing the data. If copying fails, either because of memory + * allocation failure or a problem reading user memory, return an + * error code; its up to the caller to dispose of any partial list. + */ +static int gather_array(struct list_head *pagelist, + unsigned nelem, size_t size, + void __user *data) +{ + unsigned pageidx; + void *pagedata; + int ret; + + if (size > PAGE_SIZE) + return 0; + + pageidx = PAGE_SIZE; + pagedata = NULL; /* quiet, gcc */ + while (nelem--) { + if (pageidx > PAGE_SIZE-size) { + struct page *page = alloc_page(GFP_KERNEL); + + ret = -ENOMEM; + if (page == NULL) + goto fail; + + pagedata = page_address(page); + + list_add_tail(&page->lru, pagelist); + pageidx = 0; + } + + ret = -EFAULT; + if (copy_from_user(pagedata + pageidx, data, size)) + goto fail; + + data += size; + pageidx += size; + } + + ret = 0; + +fail: + return ret; +} + +/* + * Call function "fn" on each element of the array fragmented + * over a list of pages. + */ +static int traverse_pages(unsigned nelem, size_t size, + struct list_head *pos, + int (*fn)(void *data, void *state), + void *state) +{ + void *pagedata; + unsigned pageidx; + int ret; + + BUG_ON(size > PAGE_SIZE); + + pageidx = PAGE_SIZE; + pagedata = NULL; /* hush, gcc */ + + while (nelem--) { + if (pageidx > PAGE_SIZE-size) { + struct page *page; + pos = pos->next; + page = list_entry(pos, struct page, lru); + pagedata = page_address(page); + pageidx = 0; + } + + ret = (*fn)(pagedata + pageidx, state); + if (ret) + break; + pageidx += size; + } + + return ret; +} + +struct mmap_mfn_state { + unsigned long va; + struct vm_area_struct *vma; + domid_t domain; +}; + +static int mmap_mfn_range(void *data, void *state) +{ + struct privcmd_mmap_entry *msg = data; + struct mmap_mfn_state *st = state; + struct vm_area_struct *vma = st->vma; + int rc; + + /* Do not allow range to wrap the address space. */ + if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) || + ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va)) + return -EINVAL; + + /* Range chunks must be contiguous in va space. */ + if ((msg->va != st->va) || + ((msg->va+(msg->npages< vma->vm_end)) + return -EINVAL; + + rc = remap_domain_mfn_range(vma, + msg->va & PAGE_MASK, + msg->mfn, + msg->npages << PAGE_SHIFT, + vma->vm_page_prot, + st->domain); + if (rc < 0) + return rc; + + st->va += msg->npages << PAGE_SHIFT; + + return 0; +} + +static long privcmd_ioctl_mmap(void __user *udata) +{ + struct privcmd_mmap mmapcmd; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc; + LIST_HEAD(pagelist); + struct mmap_mfn_state state; + + if (!xen_initial_domain()) + return -EPERM; + + if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) + return -EFAULT; + + rc = gather_array(&pagelist, + mmapcmd.num, sizeof(struct privcmd_mmap_entry), + mmapcmd.entry); + + if (rc || list_empty(&pagelist)) + goto out; + + down_write(&mm->mmap_sem); + + { + struct page *page = list_first_entry(&pagelist, + struct page, lru); + struct privcmd_mmap_entry *msg = page_address(page); + + vma = find_vma(mm, msg->va); + rc = -EINVAL; + + if (!vma || (msg->va != vma->vm_start) || + !privcmd_enforce_singleshot_mapping(vma)) + goto out_up; + } + + state.va = vma->vm_start; + state.vma = vma; + state.domain = mmapcmd.dom; + + rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), + &pagelist, + mmap_mfn_range, &state); + + +out_up: + up_write(&mm->mmap_sem); + +out: + free_page_list(&pagelist); + + return rc; +} + +struct mmap_batch_state { + domid_t domain; + unsigned long va; + struct vm_area_struct *vma; + int err; + + xen_pfn_t __user *user; +}; + +static int mmap_batch_fn(void *data, void *state) +{ + xen_pfn_t *mfnp = data; + struct mmap_batch_state *st = state; + + if (remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, + *mfnp, PAGE_SIZE, + st->vma->vm_page_prot, st->domain) < 0) { + *mfnp |= 0xf0000000U; + st->err++; + } + st->va += PAGE_SIZE; + + return 0; +} + +static int mmap_return_errors(void *data, void *state) +{ + xen_pfn_t *mfnp = data; + struct mmap_batch_state *st = state; + + put_user(*mfnp, st->user++); + + return 0; +} + +static long privcmd_ioctl_mmap_batch(void __user *udata) +{ + int ret; + struct privcmd_mmapbatch m; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long nr_pages; + LIST_HEAD(pagelist); + struct mmap_batch_state state; + + if (!xen_initial_domain()) + return -EPERM; + + if (copy_from_user(&m, udata, sizeof(m))) + return -EFAULT; + + nr_pages = m.num; + if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) + return -EINVAL; + + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), + m.arr); + + if (ret || list_empty(&pagelist)) + goto out; + + down_write(&mm->mmap_sem); + + vma = find_vma(mm, m.addr); + ret = -EINVAL; + if (!vma || + (m.addr != vma->vm_start) || + ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) || + !privcmd_enforce_singleshot_mapping(vma)) { + up_write(&mm->mmap_sem); + goto out; + } + + state.domain = m.dom; + state.vma = vma; + state.va = m.addr; + state.err = 0; + + ret = traverse_pages(m.num, sizeof(xen_pfn_t), + &pagelist, mmap_batch_fn, &state); + + up_write(&mm->mmap_sem); + + if (state.err > 0) { + ret = state.err; + + state.user = udata; + traverse_pages(m.num, sizeof(xen_pfn_t), + &pagelist, + mmap_return_errors, &state); + } + +out: + free_page_list(&pagelist); + + return ret; +} + +static long privcmd_ioctl(struct file *file, + unsigned int cmd, unsigned long data) +{ + int ret = -ENOSYS; + void __user *udata = (void __user *) data; + + switch (cmd) { + case IOCTL_PRIVCMD_HYPERCALL: + ret = privcmd_ioctl_hypercall(udata); + break; + + case IOCTL_PRIVCMD_MMAP: + ret = privcmd_ioctl_mmap(udata); + break; + + case IOCTL_PRIVCMD_MMAPBATCH: + ret = privcmd_ioctl_mmap_batch(udata); + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +#ifndef HAVE_ARCH_PRIVCMD_MMAP +static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +static struct vm_operations_struct privcmd_vm_ops = { + .fault = privcmd_fault +}; + +static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) +{ + /* Unsupported for auto-translate guests. */ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return -ENOSYS; + + /* DONTCOPY is essential for Xen as copy_page_range is broken. */ + vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY; + vma->vm_ops = &privcmd_vm_ops; + vma->vm_private_data = NULL; + + return 0; +} + +static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) +{ + return (xchg(&vma->vm_private_data, (void *)1) == NULL); +} +#endif + +const struct file_operations privcmd_file_ops = { + .unlocked_ioctl = privcmd_ioctl, + .mmap = privcmd_mmap, +}; diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index 3cf7707217f2..8c7462866e90 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -96,6 +96,8 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent) &xsd_kva_file_ops, NULL, S_IRUSR|S_IWUSR); xenfs_create_file(sb, sb->s_root, "xsd_port", &xsd_port_file_ops, NULL, S_IRUSR|S_IWUSR); + xenfs_create_file(sb, sb->s_root, "privcmd", + &privcmd_file_ops, NULL, S_IRUSR|S_IWUSR); } return rc; diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h index 5056306e7aa8..b68aa6200003 100644 --- a/drivers/xen/xenfs/xenfs.h +++ b/drivers/xen/xenfs/xenfs.h @@ -2,6 +2,7 @@ #define _XENFS_XENBUS_H extern const struct file_operations xenbus_file_ops; +extern const struct file_operations privcmd_file_ops; extern const struct file_operations xsd_kva_file_ops; extern const struct file_operations xsd_port_file_ops; diff --git a/include/xen/Kbuild b/include/xen/Kbuild index 4e65c16a445b..84ad8f02fee5 100644 --- a/include/xen/Kbuild +++ b/include/xen/Kbuild @@ -1 +1,2 @@ header-y += evtchn.h +header-y += privcmd.h diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h new file mode 100644 index 000000000000..b42cdfd92fee --- /dev/null +++ b/include/xen/privcmd.h @@ -0,0 +1,80 @@ +/****************************************************************************** + * privcmd.h + * + * Interface to /proc/xen/privcmd. + * + * Copyright (c) 2003-2005, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __LINUX_PUBLIC_PRIVCMD_H__ +#define __LINUX_PUBLIC_PRIVCMD_H__ + +#include + +typedef unsigned long xen_pfn_t; + +#ifndef __user +#define __user +#endif + +struct privcmd_hypercall { + __u64 op; + __u64 arg[5]; +}; + +struct privcmd_mmap_entry { + __u64 va; + __u64 mfn; + __u64 npages; +}; + +struct privcmd_mmap { + int num; + domid_t dom; /* target domain */ + struct privcmd_mmap_entry __user *entry; +}; + +struct privcmd_mmapbatch { + int num; /* number of pages to populate */ + domid_t dom; /* target domain */ + __u64 addr; /* virtual address */ + xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */ +}; + +/* + * @cmd: IOCTL_PRIVCMD_HYPERCALL + * @arg: &privcmd_hypercall_t + * Return: Value returned from execution of the specified hypercall. + */ +#define IOCTL_PRIVCMD_HYPERCALL \ + _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall)) +#define IOCTL_PRIVCMD_MMAP \ + _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap)) +#define IOCTL_PRIVCMD_MMAPBATCH \ + _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch)) + +#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ -- cgit v1.2.3 From de1ef2065c4675ab1062ebc8d1cb6c5f42b61d04 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 21 May 2009 10:09:46 +0100 Subject: xen/privcmd: move remap_domain_mfn_range() to core xen code and export. This allows xenfs to be built as a module, previously it required flush_tlb_all and arbitrary_virt_to_machine to be exported. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/mmu.c | 66 ++++++++++++++++++++++++++++++++++++ drivers/xen/xenfs/privcmd.c | 81 +++++---------------------------------------- include/xen/xen-ops.h | 5 +++ 3 files changed, 79 insertions(+), 73 deletions(-) (limited to 'include/xen') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 1ceb0f2fa0af..f08ea045620f 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2265,6 +2265,72 @@ void __init xen_hvm_init_mmu_ops(void) } #endif +#define REMAP_BATCH_SIZE 16 + +struct remap_data { + unsigned long mfn; + pgprot_t prot; + struct mmu_update *mmu_update; +}; + +static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, + unsigned long addr, void *data) +{ + struct remap_data *rmd = data; + pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); + + rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr; + rmd->mmu_update->val = pte_val_ma(pte); + rmd->mmu_update++; + + return 0; +} + +int xen_remap_domain_mfn_range(struct vm_area_struct *vma, + unsigned long addr, + unsigned long mfn, int nr, + pgprot_t prot, unsigned domid) +{ + struct remap_data rmd; + struct mmu_update mmu_update[REMAP_BATCH_SIZE]; + int batch; + unsigned long range; + int err = 0; + + prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); + + vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; + + rmd.mfn = mfn; + rmd.prot = prot; + + while (nr) { + batch = min(REMAP_BATCH_SIZE, nr); + range = (unsigned long)batch << PAGE_SHIFT; + + rmd.mmu_update = mmu_update; + err = apply_to_page_range(vma->vm_mm, addr, range, + remap_area_mfn_pte_fn, &rmd); + if (err) + goto out; + + err = -EFAULT; + if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0) + goto out; + + nr -= batch; + addr += range; + } + + err = 0; +out: + + flush_tlb_all(); + + return err; +} +EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); + #ifdef CONFIG_XEN_DEBUG_FS static struct dentry *d_mmu_debug; diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c index 438223ae0fc3..f80be7f6eb95 100644 --- a/drivers/xen/xenfs/privcmd.c +++ b/drivers/xen/xenfs/privcmd.c @@ -31,76 +31,12 @@ #include #include #include - -#define REMAP_BATCH_SIZE 16 +#include #ifndef HAVE_ARCH_PRIVCMD_MMAP static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); #endif -struct remap_data { - unsigned long mfn; - pgprot_t prot; - struct mmu_update *mmu_update; -}; - -static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, - unsigned long addr, void *data) -{ - struct remap_data *rmd = data; - pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); - - rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr; - rmd->mmu_update->val = pte_val_ma(pte); - rmd->mmu_update++; - - return 0; -} - -static int remap_domain_mfn_range(struct vm_area_struct *vma, - unsigned long addr, - unsigned long mfn, int nr, - pgprot_t prot, unsigned domid) -{ - struct remap_data rmd; - struct mmu_update mmu_update[REMAP_BATCH_SIZE]; - int batch; - unsigned long range; - int err = 0; - - prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); - - vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; - - rmd.mfn = mfn; - rmd.prot = prot; - - while (nr) { - batch = min(REMAP_BATCH_SIZE, nr); - range = (unsigned long)batch << PAGE_SHIFT; - - rmd.mmu_update = mmu_update; - err = apply_to_page_range(vma->vm_mm, addr, range, - remap_area_mfn_pte_fn, &rmd); - if (err) - goto out; - - err = -EFAULT; - if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0) - goto out; - - nr -= batch; - addr += range; - } - - err = 0; -out: - - flush_tlb_all(); - - return err; -} - static long privcmd_ioctl_hypercall(void __user *udata) { struct privcmd_hypercall hypercall; @@ -233,11 +169,11 @@ static int mmap_mfn_range(void *data, void *state) ((msg->va+(msg->npages< vma->vm_end)) return -EINVAL; - rc = remap_domain_mfn_range(vma, - msg->va & PAGE_MASK, - msg->mfn, msg->npages, - vma->vm_page_prot, - st->domain); + rc = xen_remap_domain_mfn_range(vma, + msg->va & PAGE_MASK, + msg->mfn, msg->npages, + vma->vm_page_prot, + st->domain); if (rc < 0) return rc; @@ -315,9 +251,8 @@ static int mmap_batch_fn(void *data, void *state) xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; - if (remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, - *mfnp, 1, - st->vma->vm_page_prot, st->domain) < 0) { + if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, + st->vma->vm_page_prot, st->domain) < 0) { *mfnp |= 0xf0000000U; st->err++; } diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 351f4051f6d8..98b92154a264 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -23,4 +23,9 @@ int xen_create_contiguous_region(unsigned long vstart, unsigned int order, void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order); +int xen_remap_domain_mfn_range(struct vm_area_struct *vma, + unsigned long addr, + unsigned long mfn, int nr, + pgprot_t prot, unsigned domid); + #endif /* INCLUDE_XEN_OPS_H */ -- cgit v1.2.3 From 35ae11fd146384d222f3bb1f17eed1970cc92c36 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 6 Feb 2009 19:09:48 -0800 Subject: xen: Use host-provided E820 map Rather than simply using a flat memory map from Xen, use its provided E820 map. This allows the domain builder to tell the domain to reserve space for more pages than those initially provided at domain-build time. It also allows the host to specify holes in the address space (for PCI-passthrough, for example). Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/setup.c | 38 ++++++++++++++++++++++++++++++++++++-- include/xen/interface/memory.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) (limited to 'include/xen') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 328b00305426..dd2eb2a9303f 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -107,13 +108,46 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, char * __init xen_memory_setup(void) { + static struct e820entry map[E820MAX] __initdata; + unsigned long max_pfn = xen_start_info->nr_pages; + unsigned long long mem_end; + int rc; + struct xen_memory_map memmap; + int i; max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); + mem_end = PFN_PHYS(max_pfn); + + memmap.nr_entries = E820MAX; + set_xen_guest_handle(memmap.buffer, map); + + rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); + if (rc == -ENOSYS) { + memmap.nr_entries = 1; + map[0].addr = 0ULL; + map[0].size = mem_end; + /* 8MB slack (to balance backend allocations). */ + map[0].size += 8ULL << 20; + map[0].type = E820_RAM; + rc = 0; + } + BUG_ON(rc); e820.nr_map = 0; - - e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM); + for (i = 0; i < memmap.nr_entries; i++) { + unsigned long long end = map[i].addr + map[i].size; + if (map[i].type == E820_RAM) { + if (map[i].addr > mem_end) + continue; + if (end > mem_end) { + /* Truncate region to max_mem. */ + map[i].size -= end - mem_end; + } + } + if (map[i].size > 0) + e820_add_region(map[i].addr, map[i].size, map[i].type); + } /* * Even though this is normal, usable memory under Xen, reserve diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index d3938d3e71f8..d7a6c13bde69 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -186,6 +186,35 @@ struct xen_translate_gpfn_list { }; DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list); +/* + * Returns the pseudo-physical memory map as it was when the domain + * was started (specified by XENMEM_set_memory_map). + * arg == addr of struct xen_memory_map. + */ +#define XENMEM_memory_map 9 +struct xen_memory_map { + /* + * On call the number of entries which can be stored in buffer. On + * return the number of entries which have been stored in + * buffer. + */ + unsigned int nr_entries; + + /* + * Entries in the buffer are in the same format as returned by the + * BIOS INT 0x15 EAX=0xE820 call. + */ + GUEST_HANDLE(void) buffer; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map); + +/* + * Returns the real physical memory map. Passes the same structure as + * XENMEM_memory_map. + * arg == addr of struct xen_memory_map. + */ +#define XENMEM_machine_memory_map 10 + /* * Prevent the balloon driver from changing the memory reservation -- cgit v1.2.3 From 7a043f119c0e4b460306f868d9638ac55c6afa6f Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 1 Jul 2010 17:08:14 +0100 Subject: xen: support pirq != irq PHYSDEVOP_map_pirq might return a pirq different from what we asked if we are running as an HVM guest, so we need to be able to support pirqs that are different from linux irqs. Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 64 +++++++++++++++++++++++++++++++++++++++------------- include/xen/events.h | 1 + 2 files changed, 49 insertions(+), 16 deletions(-) (limited to 'include/xen') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 3df53de6b43a..018a96275ee4 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -90,6 +90,7 @@ struct irq_info unsigned short virq; enum ipi_vector ipi; struct { + unsigned short pirq; unsigned short gsi; unsigned char vector; unsigned char flags; @@ -100,6 +101,7 @@ struct irq_info #define PIRQ_SHAREABLE (1 << 1) static struct irq_info *irq_info; +static int *pirq_to_irq; static int *evtchn_to_irq; struct cpu_evtchn_s { @@ -147,11 +149,12 @@ static struct irq_info mk_virq_info(unsigned short evtchn, unsigned short virq) .cpu = 0, .u.virq = virq }; } -static struct irq_info mk_pirq_info(unsigned short evtchn, +static struct irq_info mk_pirq_info(unsigned short evtchn, unsigned short pirq, unsigned short gsi, unsigned short vector) { return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn, - .cpu = 0, .u.pirq = { .gsi = gsi, .vector = vector } }; + .cpu = 0, + .u.pirq = { .pirq = pirq, .gsi = gsi, .vector = vector } }; } /* @@ -193,6 +196,16 @@ static unsigned virq_from_irq(unsigned irq) return info->u.virq; } +static unsigned pirq_from_irq(unsigned irq) +{ + struct irq_info *info = info_for_irq(irq); + + BUG_ON(info == NULL); + BUG_ON(info->type != IRQT_PIRQ); + + return info->u.pirq.pirq; +} + static unsigned gsi_from_irq(unsigned irq) { struct irq_info *info = info_for_irq(irq); @@ -365,6 +378,16 @@ static int get_nr_hw_irqs(void) return ret; } +static int find_unbound_pirq(void) +{ + int i; + for (i = 0; i < nr_irqs; i++) { + if (pirq_to_irq[i] < 0) + return i; + } + return -1; +} + static int find_unbound_irq(void) { struct irq_data *data; @@ -410,7 +433,7 @@ static bool identity_mapped_irq(unsigned irq) static void pirq_unmask_notify(int irq) { - struct physdev_eoi eoi = { .irq = irq }; + struct physdev_eoi eoi = { .irq = pirq_from_irq(irq) }; if (unlikely(pirq_needs_eoi(irq))) { int rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); @@ -425,7 +448,7 @@ static void pirq_query_unmask(int irq) BUG_ON(info->type != IRQT_PIRQ); - irq_status.irq = irq; + irq_status.irq = pirq_from_irq(irq); if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) irq_status.flags = 0; @@ -453,7 +476,7 @@ static unsigned int startup_pirq(unsigned int irq) if (VALID_EVTCHN(evtchn)) goto out; - bind_pirq.pirq = irq; + bind_pirq.pirq = pirq_from_irq(irq); /* NB. We are happy to share unless we are probing. */ bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ? BIND_PIRQ__WILL_SHARE : 0; @@ -556,28 +579,32 @@ static int find_irq_by_gsi(unsigned gsi) return -1; } -/* xen_allocate_irq might allocate irqs from the top down, as a +int xen_allocate_pirq(unsigned gsi, int shareable, char *name) +{ + return xen_map_pirq_gsi(gsi, gsi, shareable, name); +} + +/* xen_map_pirq_gsi might allocate irqs from the top down, as a * consequence don't assume that the irq number returned has a low value * or can be used as a pirq number unless you know otherwise. * - * One notable exception is when xen_allocate_irq is called passing an + * One notable exception is when xen_map_pirq_gsi is called passing an * hardware gsi as argument, in that case the irq number returned - * matches the gsi number passed as first argument. - - * Note: We don't assign an - * event channel until the irq actually started up. Return an - * existing irq if we've already got one for the gsi. + * matches the gsi number passed as second argument. + * + * Note: We don't assign an event channel until the irq actually started + * up. Return an existing irq if we've already got one for the gsi. */ -int xen_allocate_pirq(unsigned gsi, int shareable, char *name) +int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name) { - int irq; + int irq = 0; struct physdev_irq irq_op; spin_lock(&irq_mapping_update_lock); irq = find_irq_by_gsi(gsi); if (irq != -1) { - printk(KERN_INFO "xen_allocate_pirq: returning irq %d for gsi %u\n", + printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n", irq, gsi); goto out; /* XXX need refcount? */ } @@ -606,8 +633,9 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name) goto out; } - irq_info[irq] = mk_pirq_info(0, gsi, irq_op.vector); + irq_info[irq] = mk_pirq_info(0, pirq, gsi, irq_op.vector); irq_info[irq].u.pirq.flags |= shareable ? PIRQ_SHAREABLE : 0; + pirq_to_irq[pirq] = irq; out: spin_unlock(&irq_mapping_update_lock); @@ -1327,6 +1355,10 @@ void __init xen_init_IRQ(void) GFP_KERNEL); irq_info = kcalloc(nr_irqs, sizeof(*irq_info), GFP_KERNEL); + pirq_to_irq = kcalloc(nr_irqs, sizeof(*pirq_to_irq), GFP_KERNEL); + for (i = 0; i < nr_irqs; i++) + pirq_to_irq[i] = -1; + evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), GFP_KERNEL); for (i = 0; i < NR_EVENT_CHANNELS; i++) diff --git a/include/xen/events.h b/include/xen/events.h index c1717ca5ac13..deec8faace22 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -71,6 +71,7 @@ void xen_hvm_evtchn_do_upcall(void); * GSIs are identity mapped; others are dynamically allocated as * usual. */ int xen_allocate_pirq(unsigned gsi, int shareable, char *name); +int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name); /* De-allocates the above mentioned physical interrupt. */ int xen_destroy_irq(int irq); -- cgit v1.2.3 From 01557baff6e9c371d4c96e01089dca32cf347500 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 20 Aug 2010 14:46:52 +0100 Subject: xen: get the maximum number of pirqs from xen Use PHYSDEVOP_get_nr_pirqs to get the maximum number of pirqs from xen. Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 30 ++++++++++++++++++++++++++---- include/xen/interface/physdev.h | 6 ++++++ 2 files changed, 32 insertions(+), 4 deletions(-) (limited to 'include/xen') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 018a96275ee4..07e56e5a5d2d 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -102,6 +102,7 @@ struct irq_info static struct irq_info *irq_info; static int *pirq_to_irq; +static int nr_pirqs; static int *evtchn_to_irq; struct cpu_evtchn_s { @@ -378,10 +379,12 @@ static int get_nr_hw_irqs(void) return ret; } +/* callers of this function should make sure that PHYSDEVOP_get_nr_pirqs + * succeeded otherwise nr_pirqs won't hold the right value */ static int find_unbound_pirq(void) { int i; - for (i = 0; i < nr_irqs; i++) { + for (i = nr_pirqs-1; i >= 0; i--) { if (pirq_to_irq[i] < 0) return i; } @@ -602,6 +605,13 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name) spin_lock(&irq_mapping_update_lock); + if ((pirq > nr_pirqs) || (gsi > nr_irqs)) { + printk(KERN_WARNING "xen_map_pirq_gsi: %s %s is incorrect!\n", + pirq > nr_pirqs ? "nr_pirqs" :"", + gsi > nr_irqs ? "nr_irqs" : ""); + goto out; + } + irq = find_irq_by_gsi(gsi); if (irq != -1) { printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n", @@ -1349,14 +1359,26 @@ void xen_callback_vector(void) {} void __init xen_init_IRQ(void) { - int i; + int i, rc; + struct physdev_nr_pirqs op_nr_pirqs; cpu_evtchn_mask_p = kcalloc(nr_cpu_ids, sizeof(struct cpu_evtchn_s), GFP_KERNEL); irq_info = kcalloc(nr_irqs, sizeof(*irq_info), GFP_KERNEL); - pirq_to_irq = kcalloc(nr_irqs, sizeof(*pirq_to_irq), GFP_KERNEL); - for (i = 0; i < nr_irqs; i++) + rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_nr_pirqs, &op_nr_pirqs); + if (rc < 0) { + nr_pirqs = nr_irqs; + if (rc != -ENOSYS) + printk(KERN_WARNING "PHYSDEVOP_get_nr_pirqs returned rc=%d\n", rc); + } else { + if (xen_pv_domain() && !xen_initial_domain()) + nr_pirqs = max((int)op_nr_pirqs.nr_pirqs, nr_irqs); + else + nr_pirqs = op_nr_pirqs.nr_pirqs; + } + pirq_to_irq = kcalloc(nr_pirqs, sizeof(*pirq_to_irq), GFP_KERNEL); + for (i = 0; i < nr_pirqs; i++) pirq_to_irq[i] = -1; evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index cd6939147cb6..fbb58833f13e 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -121,6 +121,12 @@ struct physdev_op { } u; }; +#define PHYSDEVOP_get_nr_pirqs 22 +struct physdev_nr_pirqs { + /* OUT */ + uint32_t nr_pirqs; +}; + /* * Notify that some PIRQ-bound event channels have been unmasked. * ** This command is obsolete since interface version 0x00030202 and is ** -- cgit v1.2.3 From 42a1de56f35a9c87932f45439dc1b09c8da0cc95 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 24 Jun 2010 16:42:04 +0100 Subject: xen: implement xen_hvm_register_pirq xen_hvm_register_pirq allows the kernel to map a GSI into a Xen pirq and receive the interrupt as an event channel from that point on. Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/pci/xen.c | 38 ++++++++++++++++++++++++++++++++++++++ drivers/xen/events.c | 4 +++- include/xen/interface/physdev.h | 30 ++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 1 deletion(-) (limited to 'include/xen') diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 4e371065ce41..08e3cdccdfa8 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -17,6 +17,44 @@ #include #include +#ifdef CONFIG_ACPI +static int xen_hvm_register_pirq(u32 gsi, int triggering) +{ + int rc, irq; + struct physdev_map_pirq map_irq; + int shareable = 0; + char *name; + + if (!xen_hvm_domain()) + return -1; + + map_irq.domid = DOMID_SELF; + map_irq.type = MAP_PIRQ_TYPE_GSI; + map_irq.index = gsi; + map_irq.pirq = -1; + + rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); + if (rc) { + printk(KERN_WARNING "xen map irq failed %d\n", rc); + return -1; + } + + if (triggering == ACPI_EDGE_SENSITIVE) { + shareable = 0; + name = "ioapic-edge"; + } else { + shareable = 1; + name = "ioapic-level"; + } + + irq = xen_map_pirq_gsi(map_irq.pirq, gsi, shareable, name); + + printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); + + return irq; +} +#endif + #if defined(CONFIG_PCI_MSI) #include diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 07e56e5a5d2d..239b011ef56f 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -75,7 +76,8 @@ enum xen_irq_type { * event channel - irq->event channel mapping * cpu - cpu this event channel is bound to * index - type-specific information: - * PIRQ - vector, with MSB being "needs EIO" + * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM + * guest, or GSI (real passthrough IRQ) of the device. * VIRQ - virq number * IPI - IPI vector * EVTCHN - diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index fbb58833f13e..69a72b96a6cb 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -106,6 +106,36 @@ struct physdev_irq { uint32_t vector; }; +#define MAP_PIRQ_TYPE_MSI 0x0 +#define MAP_PIRQ_TYPE_GSI 0x1 +#define MAP_PIRQ_TYPE_UNKNOWN 0x2 + +#define PHYSDEVOP_map_pirq 13 +struct physdev_map_pirq { + domid_t domid; + /* IN */ + int type; + /* IN */ + int index; + /* IN or OUT */ + int pirq; + /* IN */ + int bus; + /* IN */ + int devfn; + /* IN */ + int entry_nr; + /* IN */ + uint64_t table_base; +}; + +#define PHYSDEVOP_unmap_pirq 14 +struct physdev_unmap_pirq { + domid_t domid; + /* IN */ + int pirq; +}; + /* * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() * hypercall since 0x00030202. -- cgit v1.2.3 From 3942b740e5183caad47a4a3fcb37a4509ce7af83 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 24 Jun 2010 17:50:18 +0100 Subject: xen: support GSI -> pirq remapping in PV on HVM guests Disable pcifront when running on HVM: it is meant to be used with pv guests that don't have PCI bus. Use acpi_register_gsi_xen_hvm to remap GSIs into pirqs. Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/xen/pci.h | 5 +++++ arch/x86/pci/xen.c | 16 ++++++++++++++++ drivers/xen/events.c | 6 +++++- include/xen/interface/features.h | 3 +++ 4 files changed, 29 insertions(+), 1 deletion(-) (limited to 'include/xen') diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index 449c82f71677..f89a42aff284 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h @@ -3,10 +3,15 @@ #if defined(CONFIG_PCI_XEN) extern int __init pci_xen_init(void); +extern int __init pci_xen_hvm_init(void); #define pci_xen 1 #else #define pci_xen 0 #define pci_xen_init (0) +static inline int pci_xen_hvm_init(void) +{ + return -1; +} #endif #if defined(CONFIG_PCI_MSI) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 3a4ab0b4dcca..d5284c491aef 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -14,6 +14,7 @@ #include +#include #include #include @@ -184,3 +185,18 @@ int __init pci_xen_init(void) #endif return 0; } + +int __init pci_xen_hvm_init(void) +{ + if (!xen_feature(XENFEAT_hvm_pirqs)) + return 0; + +#ifdef CONFIG_ACPI + /* + * We don't want to change the actual ACPI delivery model, + * just how GSIs get registered. + */ + __acpi_register_gsi = acpi_register_gsi_xen_hvm; +#endif + return 0; +} diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 239b011ef56f..32269bcbd88c 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -623,7 +623,8 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name) /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore * we are using the !xen_initial_domain() to drop in the function.*/ - if (identity_mapped_irq(gsi) || !xen_initial_domain()) { + if (identity_mapped_irq(gsi) || (!xen_initial_domain() && + xen_pv_domain())) { irq = gsi; irq_alloc_desc_at(irq, 0); } else @@ -1397,6 +1398,9 @@ void __init xen_init_IRQ(void) if (xen_hvm_domain()) { xen_callback_vector(); native_init_IRQ(); + /* pci_xen_hvm_init must be called after native_init_IRQ so that + * __acpi_register_gsi can point at the right function */ + pci_xen_hvm_init(); } else { irq_ctx_init(smp_processor_id()); } diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h index 70d2563ab166..b6ca39a069d8 100644 --- a/include/xen/interface/features.h +++ b/include/xen/interface/features.h @@ -47,6 +47,9 @@ /* x86: pvclock algorithm is safe to use on HVM */ #define XENFEAT_hvm_safe_pvclock 9 +/* x86: pirq can be used by HVM guests */ +#define XENFEAT_hvm_pirqs 10 + #define XENFEAT_NR_SUBMAPS 1 #endif /* __XEN_PUBLIC_FEATURES_H__ */ -- cgit v1.2.3 From 809f9267bbaba7765cdb86a47f2e6e4bf4951b69 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 1 Jul 2010 17:10:39 +0100 Subject: xen: map MSIs into pirqs Map MSIs into pirqs, writing 0 in the MSI vector data field and the pirq number in the MSI destination id field. Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/pci/xen.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/xen/events.c | 22 ++++++++++++++++++++ include/xen/events.h | 2 ++ 3 files changed, 81 insertions(+) (limited to 'include/xen') diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index d5284c491aef..b5bd6420851e 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -64,10 +64,62 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, #if defined(CONFIG_PCI_MSI) #include +#include struct xen_pci_frontend_ops *xen_pci_frontend; EXPORT_SYMBOL_GPL(xen_pci_frontend); +static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, + struct msi_msg *msg) +{ + /* We set vector == 0 to tell the hypervisor we don't care about it, + * but we want a pirq setup instead. + * We use the dest_id field to pass the pirq that we want. */ + msg->address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(pirq); + msg->address_lo = + MSI_ADDR_BASE_LO | + MSI_ADDR_DEST_MODE_PHYSICAL | + MSI_ADDR_REDIRECTION_CPU | + MSI_ADDR_DEST_ID(pirq); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + /* delivery mode reserved */ + (3 << 8) | + MSI_DATA_VECTOR(0); +} + +static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int irq, pirq, ret = 0; + struct msi_desc *msidesc; + struct msi_msg msg; + + list_for_each_entry(msidesc, &dev->msi_list, list) { + xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? + "msi-x" : "msi", &irq, &pirq); + if (irq < 0 || pirq < 0) + goto error; + printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq); + xen_msi_compose_msg(dev, pirq, &msg); + ret = set_irq_msi(irq, msidesc); + if (ret < 0) + goto error_while; + write_msi_msg(irq, &msg); + } + return 0; + +error_while: + unbind_from_irqhandler(irq, NULL); +error: + if (ret == -ENODEV) + dev_err(&dev->dev, "Xen PCI frontend has not registered" \ + " MSI/MSI-X support!\n"); + + return ret; +} + /* * For MSI interrupts we have to use drivers/xen/event.s functions to * allocate an irq_desc and setup the right */ @@ -198,5 +250,10 @@ int __init pci_xen_hvm_init(void) */ __acpi_register_gsi = acpi_register_gsi_xen_hvm; #endif + +#ifdef CONFIG_PCI_MSI + x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs; + x86_msi.teardown_msi_irq = xen_teardown_msi_irq; +#endif return 0; } diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 32269bcbd88c..efa683ee8840 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -656,6 +656,28 @@ out: return irq; } +void xen_allocate_pirq_msi(char *name, int *irq, int *pirq) +{ + spin_lock(&irq_mapping_update_lock); + + *irq = find_unbound_irq(); + if (*irq == -1) + goto out; + + *pirq = find_unbound_pirq(); + if (*pirq == -1) + goto out; + + set_irq_chip_and_handler_name(*irq, &xen_pirq_chip, + handle_level_irq, name); + + irq_info[*irq] = mk_pirq_info(0, *pirq, 0, 0); + pirq_to_irq[*pirq] = *irq; + +out: + spin_unlock(&irq_mapping_update_lock); +} + int xen_destroy_irq(int irq) { struct irq_desc *desc; diff --git a/include/xen/events.h b/include/xen/events.h index deec8faace22..0c58db6ea3f4 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -72,6 +72,8 @@ void xen_hvm_evtchn_do_upcall(void); * usual. */ int xen_allocate_pirq(unsigned gsi, int shareable, char *name); int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name); +/* Allocate an irq and a pirq to be used with MSIs. */ +void xen_allocate_pirq_msi(char *name, int *irq, int *pirq); /* De-allocates the above mentioned physical interrupt. */ int xen_destroy_irq(int irq); -- cgit v1.2.3 From 38aa66fcb79e0a46c24bba96b6f2b851a6ec2037 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 2 Sep 2010 14:51:39 +0100 Subject: xen: remap GSIs as pirqs when running as initial domain Implement xen_register_gsi to setup the correct triggering and polarity properties of a gsi. Implement xen_register_pirq to register a particular gsi as pirq and receive interrupts as events. Call xen_setup_pirqs to register all the legacy ISA irqs as pirqs. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/xen/pci.h | 7 +++ arch/x86/pci/xen.c | 135 ++++++++++++++++++++++++++++++++++++++++ drivers/xen/events.c | 13 ++++ include/xen/interface/physdev.h | 10 +++ 4 files changed, 165 insertions(+) (limited to 'include/xen') diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index f89a42aff284..2329b3eaf8d3 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h @@ -13,6 +13,13 @@ static inline int pci_xen_hvm_init(void) return -1; } #endif +#if defined(CONFIG_XEN_DOM0) +void __init xen_setup_pirqs(void); +#else +static inline void __init xen_setup_pirqs(void) +{ +} +#endif #if defined(CONFIG_PCI_MSI) #if defined(CONFIG_PCI_XEN) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index b5bd6420851e..dd0b5fdb27b9 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -257,3 +257,138 @@ int __init pci_xen_hvm_init(void) #endif return 0; } + +#ifdef CONFIG_XEN_DOM0 +static int xen_register_pirq(u32 gsi, int triggering) +{ + int rc, irq; + struct physdev_map_pirq map_irq; + int shareable = 0; + char *name; + + if (!xen_pv_domain()) + return -1; + + if (triggering == ACPI_EDGE_SENSITIVE) { + shareable = 0; + name = "ioapic-edge"; + } else { + shareable = 1; + name = "ioapic-level"; + } + + irq = xen_allocate_pirq(gsi, shareable, name); + + printk(KERN_DEBUG "xen: --> irq=%d\n", irq); + + if (irq < 0) + goto out; + + map_irq.domid = DOMID_SELF; + map_irq.type = MAP_PIRQ_TYPE_GSI; + map_irq.index = gsi; + map_irq.pirq = irq; + + rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); + if (rc) { + printk(KERN_WARNING "xen map irq failed %d\n", rc); + return -1; + } + +out: + return irq; +} + +static int xen_register_gsi(u32 gsi, int triggering, int polarity) +{ + int rc, irq; + struct physdev_setup_gsi setup_gsi; + + if (!xen_pv_domain()) + return -1; + + printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n", + gsi, triggering, polarity); + + irq = xen_register_pirq(gsi, triggering); + + setup_gsi.gsi = gsi; + setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1); + setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1); + + rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi); + if (rc == -EEXIST) + printk(KERN_INFO "Already setup the GSI :%d\n", gsi); + else if (rc) { + printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n", + gsi, rc); + } + + return irq; +} + +static __init void xen_setup_acpi_sci(void) +{ + int rc; + int trigger, polarity; + int gsi = acpi_sci_override_gsi; + + if (!gsi) + return; + + rc = acpi_get_override_irq(gsi, &trigger, &polarity); + if (rc) { + printk(KERN_WARNING "xen: acpi_get_override_irq failed for acpi" + " sci, rc=%d\n", rc); + return; + } + trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; + polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; + + printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d " + "polarity=%d\n", gsi, trigger, polarity); + + gsi = xen_register_gsi(gsi, trigger, polarity); + printk(KERN_INFO "xen: acpi sci %d\n", gsi); + + return; +} + +static int acpi_register_gsi_xen(struct device *dev, u32 gsi, + int trigger, int polarity) +{ + return xen_register_gsi(gsi, trigger, polarity); +} + +static int __init pci_xen_initial_domain(void) +{ + xen_setup_acpi_sci(); + __acpi_register_gsi = acpi_register_gsi_xen; + + return 0; +} + +void __init xen_setup_pirqs(void) +{ + int irq; + + pci_xen_initial_domain(); + + if (0 == nr_ioapics) { + for (irq = 0; irq < NR_IRQS_LEGACY; irq++) + xen_allocate_pirq(irq, 0, "xt-pic"); + return; + } + + /* Pre-allocate legacy irqs */ + for (irq = 0; irq < NR_IRQS_LEGACY; irq++) { + int trigger, polarity; + + if (acpi_get_override_irq(irq, &trigger, &polarity) == -1) + continue; + + xen_register_pirq(irq, + trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE); + } +} +#endif diff --git a/drivers/xen/events.c b/drivers/xen/events.c index efa683ee8840..c649ac0aaeef 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -681,6 +681,8 @@ out: int xen_destroy_irq(int irq) { struct irq_desc *desc; + struct physdev_unmap_pirq unmap_irq; + struct irq_info *info = info_for_irq(irq); int rc = -ENOENT; spin_lock(&irq_mapping_update_lock); @@ -689,6 +691,15 @@ int xen_destroy_irq(int irq) if (!desc) goto out; + if (xen_initial_domain()) { + unmap_irq.pirq = info->u.pirq.gsi; + unmap_irq.domid = DOMID_SELF; + rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq); + if (rc) { + printk(KERN_WARNING "unmap irq failed %d\n", rc); + goto out; + } + } irq_info[irq] = mk_unbound_info(); irq_free_desc(irq); @@ -1425,5 +1436,7 @@ void __init xen_init_IRQ(void) pci_xen_hvm_init(); } else { irq_ctx_init(smp_processor_id()); + if (xen_initial_domain()) + xen_setup_pirqs(); } } diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index 69a72b96a6cb..a85d76c2e360 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -151,6 +151,16 @@ struct physdev_op { } u; }; +#define PHYSDEVOP_setup_gsi 21 +struct physdev_setup_gsi { + int gsi; + /* IN */ + uint8_t triggering; + /* IN */ + uint8_t polarity; + /* IN */ +}; + #define PHYSDEVOP_get_nr_pirqs 22 struct physdev_nr_pirqs { /* OUT */ -- cgit v1.2.3 From f731e3ef02b4744f4d7ca2f63539b900e47db31f Mon Sep 17 00:00:00 2001 From: Qing He Date: Mon, 11 Oct 2010 15:30:09 +0100 Subject: xen: remap MSIs into pirqs when running as initial domain Implement xen_create_msi_irq to create an msi and remap it as pirq. Use xen_create_msi_irq to implement an initial domain specific version of setup_msi_irqs. Signed-off-by: Qing He Signed-off-by: Yunhong Jiang Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/pci/xen.c | 55 +++++++++++++++++++++++++++++++---------------- drivers/xen/events.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++ include/xen/events.h | 4 ++++ 3 files changed, 101 insertions(+), 18 deletions(-) (limited to 'include/xen') diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index dd0b5fdb27b9..b3f4b30222fa 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -135,14 +135,12 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) if (!v) return -ENOMEM; - if (!xen_initial_domain()) { - if (type == PCI_CAP_ID_MSIX) - ret = xen_pci_frontend_enable_msix(dev, &v, nvec); - else - ret = xen_pci_frontend_enable_msi(dev, &v); - if (ret) - goto error; - } + if (type == PCI_CAP_ID_MSIX) + ret = xen_pci_frontend_enable_msix(dev, &v, nvec); + else + ret = xen_pci_frontend_enable_msi(dev, &v); + if (ret) + goto error; i = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { irq = xen_allocate_pirq(v[i], 0, /* not sharable */ @@ -172,23 +170,40 @@ error: static void xen_teardown_msi_irqs(struct pci_dev *dev) { - /* Only do this when were are in non-privileged mode.*/ - if (!xen_initial_domain()) { - struct msi_desc *msidesc; - - msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); - if (msidesc->msi_attrib.is_msix) - xen_pci_frontend_disable_msix(dev); - else - xen_pci_frontend_disable_msi(dev); - } + struct msi_desc *msidesc; + msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); + if (msidesc->msi_attrib.is_msix) + xen_pci_frontend_disable_msix(dev); + else + xen_pci_frontend_disable_msi(dev); } static void xen_teardown_msi_irq(unsigned int irq) { xen_destroy_irq(irq); } + +static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int irq, ret; + struct msi_desc *msidesc; + + list_for_each_entry(msidesc, &dev->msi_list, list) { + irq = xen_create_msi_irq(dev, msidesc, type); + if (irq < 0) + return -1; + + ret = set_irq_msi(irq, msidesc); + if (ret) + goto error; + } + return 0; + +error: + xen_destroy_irq(irq); + return ret; +} #endif static int xen_pcifront_enable_irq(struct pci_dev *dev) @@ -362,6 +377,10 @@ static int acpi_register_gsi_xen(struct device *dev, u32 gsi, static int __init pci_xen_initial_domain(void) { +#ifdef CONFIG_PCI_MSI + x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs; + x86_msi.teardown_msi_irq = xen_teardown_msi_irq; +#endif xen_setup_acpi_sci(); __acpi_register_gsi = acpi_register_gsi_xen; diff --git a/drivers/xen/events.c b/drivers/xen/events.c index c649ac0aaeef..a7d9555e664d 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -656,6 +657,10 @@ out: return irq; } +#ifdef CONFIG_PCI_MSI +#include +#include "../pci/msi.h" + void xen_allocate_pirq_msi(char *name, int *irq, int *pirq) { spin_lock(&irq_mapping_update_lock); @@ -678,6 +683,61 @@ out: spin_unlock(&irq_mapping_update_lock); } +int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type) +{ + int irq = -1; + struct physdev_map_pirq map_irq; + int rc; + int pos; + u32 table_offset, bir; + + memset(&map_irq, 0, sizeof(map_irq)); + map_irq.domid = DOMID_SELF; + map_irq.type = MAP_PIRQ_TYPE_MSI; + map_irq.index = -1; + map_irq.pirq = -1; + map_irq.bus = dev->bus->number; + map_irq.devfn = dev->devfn; + + if (type == PCI_CAP_ID_MSIX) { + pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); + + pci_read_config_dword(dev, msix_table_offset_reg(pos), + &table_offset); + bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK); + + map_irq.table_base = pci_resource_start(dev, bir); + map_irq.entry_nr = msidesc->msi_attrib.entry_nr; + } + + spin_lock(&irq_mapping_update_lock); + + irq = find_unbound_irq(); + + if (irq == -1) + goto out; + + rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); + if (rc) { + printk(KERN_WARNING "xen map irq failed %d\n", rc); + + irq_free_desc(irq); + + irq = -1; + goto out; + } + irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index); + + set_irq_chip_and_handler_name(irq, &xen_pirq_chip, + handle_level_irq, + (type == PCI_CAP_ID_MSIX) ? "msi-x":"msi"); + +out: + spin_unlock(&irq_mapping_update_lock); + return irq; +} +#endif + int xen_destroy_irq(int irq) { struct irq_desc *desc; diff --git a/include/xen/events.h b/include/xen/events.h index 0c58db6ea3f4..8fa27dc7358b 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -72,8 +72,12 @@ void xen_hvm_evtchn_do_upcall(void); * usual. */ int xen_allocate_pirq(unsigned gsi, int shareable, char *name); int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name); + +#ifdef CONFIG_PCI_MSI /* Allocate an irq and a pirq to be used with MSIs. */ void xen_allocate_pirq_msi(char *name, int *irq, int *pirq); +int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type); +#endif /* De-allocates the above mentioned physical interrupt. */ int xen_destroy_irq(int irq); -- cgit v1.2.3 From 4fe7d5a708a955b35e3fdc4dea3e0b7a6ae2eb06 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 2 Sep 2010 16:17:06 +0100 Subject: xen: make hvc_xen console work for dom0. Use the console hypercalls for dom0 console. [ Impact: Add Xen dom0 console ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Juan Quintela Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk --- drivers/char/hvc_xen.c | 98 +++++++++++++++++++++++++++++++++----------------- drivers/xen/events.c | 2 +- include/xen/events.h | 1 + 3 files changed, 67 insertions(+), 34 deletions(-) (limited to 'include/xen') diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index 60446f82a3fc..1f7e13a43a88 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c @@ -78,7 +78,7 @@ static int __write_console(const char *data, int len) return sent; } -static int write_console(uint32_t vtermno, const char *data, int len) +static int domU_write_console(uint32_t vtermno, const char *data, int len) { int ret = len; @@ -101,7 +101,7 @@ static int write_console(uint32_t vtermno, const char *data, int len) return ret; } -static int read_console(uint32_t vtermno, char *buf, int len) +static int domU_read_console(uint32_t vtermno, char *buf, int len) { struct xencons_interface *intf = xencons_interface(); XENCONS_RING_IDX cons, prod; @@ -122,28 +122,62 @@ static int read_console(uint32_t vtermno, char *buf, int len) return recv; } -static const struct hv_ops hvc_ops = { - .get_chars = read_console, - .put_chars = write_console, +static struct hv_ops domU_hvc_ops = { + .get_chars = domU_read_console, + .put_chars = domU_write_console, .notifier_add = notifier_add_irq, .notifier_del = notifier_del_irq, .notifier_hangup = notifier_hangup_irq, }; -static int __init xen_init(void) +static int dom0_read_console(uint32_t vtermno, char *buf, int len) +{ + return HYPERVISOR_console_io(CONSOLEIO_read, len, buf); +} + +/* + * Either for a dom0 to write to the system console, or a domU with a + * debug version of Xen + */ +static int dom0_write_console(uint32_t vtermno, const char *str, int len) +{ + int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str); + if (rc < 0) + return 0; + + return len; +} + +static struct hv_ops dom0_hvc_ops = { + .get_chars = dom0_read_console, + .put_chars = dom0_write_console, + .notifier_add = notifier_add_irq, + .notifier_del = notifier_del_irq, + .notifier_hangup = notifier_hangup_irq, +}; + +static int __init xen_hvc_init(void) { struct hvc_struct *hp; + struct hv_ops *ops; - if (!xen_pv_domain() || - xen_initial_domain() || - !xen_start_info->console.domU.evtchn) + if (!xen_pv_domain()) return -ENODEV; - xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn); + if (xen_initial_domain()) { + ops = &dom0_hvc_ops; + xencons_irq = bind_virq_to_irq(VIRQ_CONSOLE, 0); + } else { + if (!xen_start_info->console.domU.evtchn) + return -ENODEV; + + ops = &domU_hvc_ops; + xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn); + } if (xencons_irq < 0) xencons_irq = 0; /* NO_IRQ */ - hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256); + hp = hvc_alloc(HVC_COOKIE, xencons_irq, ops, 256); if (IS_ERR(hp)) return PTR_ERR(hp); @@ -160,7 +194,7 @@ void xen_console_resume(void) rebind_evtchn_irq(xen_start_info->console.domU.evtchn, xencons_irq); } -static void __exit xen_fini(void) +static void __exit xen_hvc_fini(void) { if (hvc) hvc_remove(hvc); @@ -168,29 +202,24 @@ static void __exit xen_fini(void) static int xen_cons_init(void) { + struct hv_ops *ops; + if (!xen_pv_domain()) return 0; - hvc_instantiate(HVC_COOKIE, 0, &hvc_ops); + if (xen_initial_domain()) + ops = &dom0_hvc_ops; + else + ops = &domU_hvc_ops; + + hvc_instantiate(HVC_COOKIE, 0, ops); return 0; } -module_init(xen_init); -module_exit(xen_fini); +module_init(xen_hvc_init); +module_exit(xen_hvc_fini); console_initcall(xen_cons_init); -static void raw_console_write(const char *str, int len) -{ - while(len > 0) { - int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str); - if (rc <= 0) - break; - - str += rc; - len -= rc; - } -} - #ifdef CONFIG_EARLY_PRINTK static void xenboot_write_console(struct console *console, const char *string, unsigned len) @@ -198,19 +227,22 @@ static void xenboot_write_console(struct console *console, const char *string, unsigned int linelen, off = 0; const char *pos; - raw_console_write(string, len); + dom0_write_console(0, string, len); + + if (xen_initial_domain()) + return; - write_console(0, "(early) ", 8); + domU_write_console(0, "(early) ", 8); while (off < len && NULL != (pos = strchr(string+off, '\n'))) { linelen = pos-string+off; if (off + linelen > len) break; - write_console(0, string+off, linelen); - write_console(0, "\r\n", 2); + domU_write_console(0, string+off, linelen); + domU_write_console(0, "\r\n", 2); off += linelen + 1; } if (off < len) - write_console(0, string+off, len-off); + domU_write_console(0, string+off, len-off); } struct console xenboot_console = { @@ -222,7 +254,7 @@ struct console xenboot_console = { void xen_raw_console_write(const char *str) { - raw_console_write(str, strlen(str)); + dom0_write_console(0, str, strlen(str)); } void xen_raw_printk(const char *fmt, ...) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index a7d9555e664d..93e98ffe71ae 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -839,7 +839,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) } -static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) +int bind_virq_to_irq(unsigned int virq, unsigned int cpu) { struct evtchn_bind_virq bind_virq; int evtchn, irq; diff --git a/include/xen/events.h b/include/xen/events.h index 8fa27dc7358b..646dd17d3aa4 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -12,6 +12,7 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id); +int bind_virq_to_irq(unsigned int virq, unsigned int cpu); int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, irq_handler_t handler, unsigned long irqflags, const char *devname, -- cgit v1.2.3 From e28c31a96b1570f17731b18e8efabb7308d0c22c Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Wed, 27 Oct 2010 17:55:04 +0100 Subject: xen: register xen pci notifier Register a pci notifier to add (or remove) pci devices to Xen via hypercalls. Xen needs to know the pci devices present in the system to handle pci passthrough and even MSI remapping in the initial domain. Signed-off-by: Weidong Han Signed-off-by: Qing He Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Stefano Stabellini --- drivers/xen/Makefile | 1 + drivers/xen/pci.c | 117 ++++++++++++++++++++++++++++++++++++++++ include/xen/interface/physdev.h | 21 ++++++++ 3 files changed, 139 insertions(+) create mode 100644 drivers/xen/pci.c (limited to 'include/xen') diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index b97864551718..eb8a78d77d9d 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -13,3 +13,4 @@ obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o +obj-$(CONFIG_XEN_DOM0) += pci.o diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c new file mode 100644 index 000000000000..cef4bafc07dc --- /dev/null +++ b/drivers/xen/pci.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2009, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Weidong Han + */ + +#include +#include +#include +#include + +#include +#include +#include "../pci/pci.h" + +static int xen_add_device(struct device *dev) +{ + int r; + struct pci_dev *pci_dev = to_pci_dev(dev); + +#ifdef CONFIG_PCI_IOV + if (pci_dev->is_virtfn) { + struct physdev_manage_pci_ext manage_pci_ext = { + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn, + .is_virtfn = 1, + .physfn.bus = pci_dev->physfn->bus->number, + .physfn.devfn = pci_dev->physfn->devfn, + }; + + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext, + &manage_pci_ext); + } else +#endif + if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) { + struct physdev_manage_pci_ext manage_pci_ext = { + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn, + .is_extfn = 1, + }; + + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext, + &manage_pci_ext); + } else { + struct physdev_manage_pci manage_pci = { + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn, + }; + + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, + &manage_pci); + } + + return r; +} + +static int xen_remove_device(struct device *dev) +{ + int r; + struct pci_dev *pci_dev = to_pci_dev(dev); + struct physdev_manage_pci manage_pci; + + manage_pci.bus = pci_dev->bus->number; + manage_pci.devfn = pci_dev->devfn; + + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, + &manage_pci); + + return r; +} + +static int xen_pci_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + int r = 0; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + r = xen_add_device(dev); + break; + case BUS_NOTIFY_DEL_DEVICE: + r = xen_remove_device(dev); + break; + default: + break; + } + + return r; +} + +struct notifier_block device_nb = { + .notifier_call = xen_pci_notifier, +}; + +static int __init register_xen_pci_notifier(void) +{ + if (!xen_initial_domain()) + return 0; + + return bus_register_notifier(&pci_bus_type, &device_nb); +} + +arch_initcall(register_xen_pci_notifier); diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index a85d76c2e360..2b2c66c3df00 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -136,6 +136,27 @@ struct physdev_unmap_pirq { int pirq; }; +#define PHYSDEVOP_manage_pci_add 15 +#define PHYSDEVOP_manage_pci_remove 16 +struct physdev_manage_pci { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; + +#define PHYSDEVOP_manage_pci_add_ext 20 +struct physdev_manage_pci_ext { + /* IN */ + uint8_t bus; + uint8_t devfn; + unsigned is_extfn; + unsigned is_virtfn; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; +}; + /* * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() * hypercall since 0x00030202. -- cgit v1.2.3 From 7e77506a5918d82cafa2ffa783ab57c23f9e9817 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 30 Sep 2010 12:37:26 +0100 Subject: xen: implement XENMEM_machphys_mapping This hypercall allows Xen to specify a non-default location for the machine to physical mapping. This capability is used when running a 32 bit domain 0 on a 64 bit hypervisor to shrink the hypervisor hole to exactly the size required. [ Impact: add Xen hypercall definitions ] Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Stefano Stabellini --- arch/x86/include/asm/xen/interface.h | 6 +++--- arch/x86/include/asm/xen/interface_32.h | 5 +++++ arch/x86/include/asm/xen/interface_64.h | 13 +------------ arch/x86/include/asm/xen/page.h | 7 ++++--- arch/x86/xen/enlighten.c | 7 +++++++ arch/x86/xen/mmu.c | 14 ++++++++++++++ include/xen/interface/memory.h | 13 +++++++++++++ 7 files changed, 47 insertions(+), 18 deletions(-) (limited to 'include/xen') diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index e8506c1f0c55..1c10c88ee4e1 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -61,9 +61,9 @@ DEFINE_GUEST_HANDLE(void); #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) #endif -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT) /* Maximum number of virtual CPUs in multi-processor guests. */ #define MAX_VIRT_CPUS 32 diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h index 42a7e004ae5c..8413688b2571 100644 --- a/arch/x86/include/asm/xen/interface_32.h +++ b/arch/x86/include/asm/xen/interface_32.h @@ -32,6 +32,11 @@ /* And the trap vector is... */ #define TRAP_INSTR "int $0x82" +#define __MACH2PHYS_VIRT_START 0xF5800000 +#define __MACH2PHYS_VIRT_END 0xF6800000 + +#define __MACH2PHYS_SHIFT 2 + /* * Virtual addresses beyond this are not modifiable by guest OSes. The * machine->physical mapping table starts at this address, read-only. diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h index 100d2662b97c..839a4811cf98 100644 --- a/arch/x86/include/asm/xen/interface_64.h +++ b/arch/x86/include/asm/xen/interface_64.h @@ -39,18 +39,7 @@ #define __HYPERVISOR_VIRT_END 0xFFFF880000000000 #define __MACH2PHYS_VIRT_START 0xFFFF800000000000 #define __MACH2PHYS_VIRT_END 0xFFFF804000000000 - -#ifndef HYPERVISOR_VIRT_START -#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) -#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) -#endif - -#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) -#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) -#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif +#define __MACH2PHYS_SHIFT 3 /* * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index dd8c1414b3d5..8760cc60a21c 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,8 @@ typedef struct xpaddr { #define MAX_DOMAIN_PAGES \ ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) +extern unsigned long *machine_to_phys_mapping; +extern unsigned int machine_to_phys_order; extern unsigned long get_phys_to_machine(unsigned long pfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); @@ -69,10 +72,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; -#if 0 if (unlikely((mfn >> machine_to_phys_order) != 0)) - return max_mapnr; -#endif + return ~0; pfn = 0; /* diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 235c0f4d3861..bd3554934613 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -75,6 +75,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); enum xen_domain_type xen_domain_type = XEN_NATIVE; EXPORT_SYMBOL_GPL(xen_domain_type); +unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; +EXPORT_SYMBOL(machine_to_phys_mapping); +unsigned int machine_to_phys_order; +EXPORT_SYMBOL(machine_to_phys_order); + struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); @@ -1097,6 +1102,8 @@ asmlinkage void __init xen_start_kernel(void) xen_domain_type = XEN_PV_DOMAIN; + xen_setup_machphys_mapping(); + /* Install Xen paravirt ops */ pv_info = xen_info; pv_init_ops = xen_init_ops; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 21ed8d7f75a5..bd2713a82571 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2034,6 +2034,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) set_page_prot(pmd, PAGE_KERNEL_RO); } +void __init xen_setup_machphys_mapping(void) +{ + struct xen_machphys_mapping mapping; + unsigned long machine_to_phys_nr_ents; + + if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { + machine_to_phys_mapping = (unsigned long *)mapping.v_start; + machine_to_phys_nr_ents = mapping.max_mfn + 1; + } else { + machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; + } + machine_to_phys_order = fls(machine_to_phys_nr_ents - 1); +} + #ifdef CONFIG_X86_64 static void convert_pfn_mfn(void *v) { diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index d7a6c13bde69..eac3ce153719 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -140,6 +140,19 @@ struct xen_machphys_mfn_list { }; DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); +/* + * Returns the location in virtual address space of the machine_to_phys + * mapping table. Architectures which do not have a m2p table, or which do not + * map it by default into guest address space, do not implement this command. + * arg == addr of xen_machphys_mapping_t. + */ +#define XENMEM_machphys_mapping 12 +struct xen_machphys_mapping { + unsigned long v_start, v_end; /* Start and end virtual addresses. */ + unsigned long max_mfn; /* Maximum MFN that can be looked up. */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t); + /* * Sets the GPFN at which a particular page appears in the specified guest's * pseudophysical address space. -- cgit v1.2.3 From 744f9f104ea262de1dc3e29265870c649f0d9473 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 13 Nov 2010 11:44:39 -0500 Subject: xen: fix header export to userspace scripts/headers_install.pl prevents "__user" from being exported to userspace headers, so just use compiler.h to make sure that __user is defined and avoid the error. unifdef: linux-next-20101112/xx64/usr/include/xen/privcmd.h.tmp: 79: Premature EOF (#if line 33 depth 1) Signed-off-by: Randy Dunlap Cc: Jeremy Fitzhardinge Cc: Konrad Rzeszutek Wilk Cc: xen-devel@lists.xensource.com (moderated for non-subscribers) Cc: virtualization@lists.osdl.org Cc: Tony Finch Signed-off-by: Jeremy Fitzhardinge --- include/xen/privcmd.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/xen') diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h index b42cdfd92fee..17857fb4d550 100644 --- a/include/xen/privcmd.h +++ b/include/xen/privcmd.h @@ -34,13 +34,10 @@ #define __LINUX_PUBLIC_PRIVCMD_H__ #include +#include typedef unsigned long xen_pfn_t; -#ifndef __user -#define __user -#endif - struct privcmd_hypercall { __u64 op; __u64 arg[5]; -- cgit v1.2.3 From 9be4d4575906af9698de660e477f949a076c87e1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 31 Aug 2010 15:01:16 -0700 Subject: xen: add extra pages to balloon Add extra pages in the pseudo-physical address space to the balloon so we can extend into them later. Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/balloon.c | 15 +++++++++++---- include/xen/page.h | 7 +++++++ 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include/xen') diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 500290b150bb..df26ee9caa68 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -119,7 +119,7 @@ static void scrub_page(struct page *page) } /* balloon_append: add the given page to the balloon. */ -static void balloon_append(struct page *page) +static void __balloon_append(struct page *page) { /* Lowmem is re-populated first, so highmem pages go at list tail. */ if (PageHighMem(page)) { @@ -130,7 +130,11 @@ static void balloon_append(struct page *page) list_add(&page->lru, &ballooned_pages); balloon_stats.balloon_low++; } +} +static void balloon_append(struct page *page) +{ + __balloon_append(page); totalram_pages--; } @@ -416,10 +420,13 @@ static int __init balloon_init(void) register_balloon(&balloon_sysdev); /* Initialise the balloon with excess memory space. */ - for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { + for (pfn = PFN_UP(xen_extra_mem_start); + pfn < PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size); + pfn++) { page = pfn_to_page(pfn); - if (!PageReserved(page)) - balloon_append(page); + /* totalram_pages doesn't include the boot-time + balloon extension, so don't subtract from it. */ + __balloon_append(page); } target_watch.callback = watch_target; diff --git a/include/xen/page.h b/include/xen/page.h index eaf85fab1263..0be36b976f4b 100644 --- a/include/xen/page.h +++ b/include/xen/page.h @@ -1 +1,8 @@ +#ifndef _XEN_PAGE_H +#define _XEN_PAGE_H + #include + +extern phys_addr_t xen_extra_mem_start, xen_extra_mem_size; + +#endif /* _XEN_PAGE_H */ -- cgit v1.2.3 From e5fc7345412d5e4758fcef55a74354c5cbefd61e Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 1 Dec 2010 14:51:44 +0000 Subject: xen: use PHYSDEVOP_get_free_pirq to implement find_unbound_pirq Use the new hypercall PHYSDEVOP_get_free_pirq to ask Xen to allocate a pirq. Remove the unsupported PHYSDEVOP_get_nr_pirqs hypercall to get the amount of pirq available. This fixes find_unbound_pirq that otherwise would return a number starting from nr_irqs that might very well be out of range in Xen. The symptom of this bug is that when you passthrough an MSI capable pci device to a PV on HVM guest, Linux would fail to enable MSIs on the device. Signed-off-by: Stefano Stabellini --- drivers/xen/events.c | 45 ++++++++++++++++++----------------------- include/xen/interface/physdev.h | 10 +++++++++ 2 files changed, 30 insertions(+), 25 deletions(-) (limited to 'include/xen') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 2811bb988ea0..7ab43c33f746 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -105,7 +105,6 @@ struct irq_info static struct irq_info *irq_info; static int *pirq_to_irq; -static int nr_pirqs; static int *evtchn_to_irq; struct cpu_evtchn_s { @@ -385,12 +384,17 @@ static int get_nr_hw_irqs(void) return ret; } -/* callers of this function should make sure that PHYSDEVOP_get_nr_pirqs - * succeeded otherwise nr_pirqs won't hold the right value */ -static int find_unbound_pirq(void) +static int find_unbound_pirq(int type) { - int i; - for (i = nr_pirqs-1; i >= 0; i--) { + int rc, i; + struct physdev_get_free_pirq op_get_free_pirq; + op_get_free_pirq.type = type; + + rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq); + if (!rc) + return op_get_free_pirq.pirq; + + for (i = 0; i < nr_irqs; i++) { if (pirq_to_irq[i] < 0) return i; } @@ -611,10 +615,10 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name) spin_lock(&irq_mapping_update_lock); - if ((pirq > nr_pirqs) || (gsi > nr_irqs)) { + if ((pirq > nr_irqs) || (gsi > nr_irqs)) { printk(KERN_WARNING "xen_map_pirq_gsi: %s %s is incorrect!\n", - pirq > nr_pirqs ? "nr_pirqs" :"", - gsi > nr_irqs ? "nr_irqs" : ""); + pirq > nr_irqs ? "pirq" :"", + gsi > nr_irqs ? "gsi" : ""); goto out; } @@ -672,7 +676,7 @@ void xen_allocate_pirq_msi(char *name, int *irq, int *pirq) if (*irq == -1) goto out; - *pirq = find_unbound_pirq(); + *pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI); if (*pirq == -1) goto out; @@ -1506,26 +1510,17 @@ void xen_callback_vector(void) {} void __init xen_init_IRQ(void) { - int i, rc; - struct physdev_nr_pirqs op_nr_pirqs; + int i; cpu_evtchn_mask_p = kcalloc(nr_cpu_ids, sizeof(struct cpu_evtchn_s), GFP_KERNEL); irq_info = kcalloc(nr_irqs, sizeof(*irq_info), GFP_KERNEL); - rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_nr_pirqs, &op_nr_pirqs); - if (rc < 0) { - nr_pirqs = nr_irqs; - if (rc != -ENOSYS) - printk(KERN_WARNING "PHYSDEVOP_get_nr_pirqs returned rc=%d\n", rc); - } else { - if (xen_pv_domain() && !xen_initial_domain()) - nr_pirqs = max((int)op_nr_pirqs.nr_pirqs, nr_irqs); - else - nr_pirqs = op_nr_pirqs.nr_pirqs; - } - pirq_to_irq = kcalloc(nr_pirqs, sizeof(*pirq_to_irq), GFP_KERNEL); - for (i = 0; i < nr_pirqs; i++) + /* We are using nr_irqs as the maximum number of pirq available but + * that number is actually chosen by Xen and we don't know exactly + * what it is. Be careful choosing high pirq numbers. */ + pirq_to_irq = kcalloc(nr_irqs, sizeof(*pirq_to_irq), GFP_KERNEL); + for (i = 0; i < nr_irqs; i++) pirq_to_irq[i] = -1; evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index 2b2c66c3df00..534cac89a77d 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -188,6 +188,16 @@ struct physdev_nr_pirqs { uint32_t nr_pirqs; }; +/* type is MAP_PIRQ_TYPE_GSI or MAP_PIRQ_TYPE_MSI + * the hypercall returns a free pirq */ +#define PHYSDEVOP_get_free_pirq 23 +struct physdev_get_free_pirq { + /* IN */ + int type; + /* OUT */ + uint32_t pirq; +}; + /* * Notify that some PIRQ-bound event channels have been unmasked. * ** This command is obsolete since interface version 0x00030202 and is ** -- cgit v1.2.3 From af42b8d12f8adec6711cb824549a0edac6a4ae8f Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 1 Dec 2010 14:51:44 +0000 Subject: xen: fix MSI setup and teardown for PV on HVM guests When remapping MSIs into pirqs for PV on HVM guests, qemu is responsible for doing the actual mapping and unmapping. We only give qemu the desired pirq number when we ask to do the mapping the first time, after that we should be reading back the pirq number from qemu every time we want to re-enable the MSI. This fixes a bug in xen_hvm_setup_msi_irqs that manifests itself when trying to enable the same MSI for the second time: the old MSI to pirq mapping is still valid at this point but xen_hvm_setup_msi_irqs would try to assign a new pirq anyway. A simple way to reproduce this bug is to assign an MSI capable network card to a PV on HVM guest, if the user brings down the corresponding ethernet interface and up again, Linux would fail to enable MSIs on the device. Signed-off-by: Stefano Stabellini --- arch/x86/pci/xen.c | 27 ++++++++++++++++++++------- drivers/xen/events.c | 24 +++++++++++++++++------- include/xen/events.h | 7 ++++++- 3 files changed, 43 insertions(+), 15 deletions(-) (limited to 'include/xen') diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index d7b5109f7a9c..25cd4a07d09f 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -70,6 +70,9 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, struct xen_pci_frontend_ops *xen_pci_frontend; EXPORT_SYMBOL_GPL(xen_pci_frontend); +#define XEN_PIRQ_MSI_DATA (MSI_DATA_TRIGGER_EDGE | \ + MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0)) + static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, struct msi_msg *msg) { @@ -83,12 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, MSI_ADDR_REDIRECTION_CPU | MSI_ADDR_DEST_ID(pirq); - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - /* delivery mode reserved */ - (3 << 8) | - MSI_DATA_VECTOR(0); + msg->data = XEN_PIRQ_MSI_DATA; } static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) @@ -98,8 +96,23 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) struct msi_msg msg; list_for_each_entry(msidesc, &dev->msi_list, list) { + __read_msi_msg(msidesc, &msg); + pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | + ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); + if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) { + xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? + "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ); + if (irq < 0) + goto error; + ret = set_irq_msi(irq, msidesc); + if (ret < 0) + goto error_while; + printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d" + " pirq=%d\n", irq, pirq); + return 0; + } xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? - "msi-x" : "msi", &irq, &pirq); + "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ)); if (irq < 0 || pirq < 0) goto error; printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq); diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 7ab43c33f746..f78945ce8aeb 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -668,17 +668,21 @@ out: #include #include "../pci/msi.h" -void xen_allocate_pirq_msi(char *name, int *irq, int *pirq) +void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc) { spin_lock(&irq_mapping_update_lock); - *irq = find_unbound_irq(); - if (*irq == -1) - goto out; + if (alloc & XEN_ALLOC_IRQ) { + *irq = find_unbound_irq(); + if (*irq == -1) + goto out; + } - *pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI); - if (*pirq == -1) - goto out; + if (alloc & XEN_ALLOC_PIRQ) { + *pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI); + if (*pirq == -1) + goto out; + } set_irq_chip_and_handler_name(*irq, &xen_pirq_chip, handle_level_irq, name); @@ -766,6 +770,7 @@ int xen_destroy_irq(int irq) printk(KERN_WARNING "unmap irq failed %d\n", rc); goto out; } + pirq_to_irq[info->u.pirq.pirq] = -1; } irq_info[irq] = mk_unbound_info(); @@ -786,6 +791,11 @@ int xen_gsi_from_irq(unsigned irq) return gsi_from_irq(irq); } +int xen_irq_from_pirq(unsigned pirq) +{ + return pirq_to_irq[pirq]; +} + int bind_evtchn_to_irq(unsigned int evtchn) { int irq; diff --git a/include/xen/events.h b/include/xen/events.h index 646dd17d3aa4..00f53ddcc062 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -76,7 +76,9 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name); #ifdef CONFIG_PCI_MSI /* Allocate an irq and a pirq to be used with MSIs. */ -void xen_allocate_pirq_msi(char *name, int *irq, int *pirq); +#define XEN_ALLOC_PIRQ (1 << 0) +#define XEN_ALLOC_IRQ (1 << 1) +void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc_mask); int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type); #endif @@ -89,4 +91,7 @@ int xen_vector_from_irq(unsigned pirq); /* Return gsi allocated to pirq */ int xen_gsi_from_irq(unsigned pirq); +/* Return irq from pirq */ +int xen_irq_from_pirq(unsigned pirq); + #endif /* _XEN_EVENTS_H */ -- cgit v1.2.3 From 667c78afaec0ac500908e191e8f236e9578d7b1f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 8 Dec 2010 12:39:12 -0800 Subject: xen: Provide a variant of __RING_SIZE() that is an integer constant expression Without this, gcc 4.5 won't compile xen-netfront and xen-blkfront, where this is being used to specify array sizes. Signed-off-by: Jan Beulich Signed-off-by: Jeremy Fitzhardinge Cc: Jens Axboe Cc: David Miller Cc: Stable Kernel Signed-off-by: Linus Torvalds --- drivers/block/xen-blkfront.c | 2 +- drivers/net/xen-netfront.c | 4 ++-- include/xen/interface/io/ring.h | 11 +++++++++-- 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include/xen') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 4f9e22f29138..657873e4328d 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -72,7 +72,7 @@ struct blk_shadow { static DEFINE_MUTEX(blkfront_mutex); static const struct block_device_operations xlvbd_block_fops; -#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) +#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) /* * We have one of these per vbd, whether ide, scsi or 'other'. They diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 458bb57914a3..cdbeec9f83ea 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -66,8 +66,8 @@ struct netfront_cb { #define GRANT_INVALID_REF 0 -#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE) -#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE) +#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE) +#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE) #define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) struct netfront_info { diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h index e8cbf431c8cc..75271b9a8f61 100644 --- a/include/xen/interface/io/ring.h +++ b/include/xen/interface/io/ring.h @@ -24,8 +24,15 @@ typedef unsigned int RING_IDX; * A ring contains as many entries as will fit, rounded down to the nearest * power of two (so we can mask with (size-1) to loop around). */ -#define __RING_SIZE(_s, _sz) \ - (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) +#define __CONST_RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ + sizeof(((struct _s##_sring *)0)->ring[0]))) + +/* + * The same for passing in an actual pointer instead of a name tag. + */ +#define __RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) /* * Macros to make the correct C datatypes for a new kind of ring. -- cgit v1.2.3