summaryrefslogtreecommitdiff
path: root/arch/tile/include/asm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/tile/include/asm')
-rw-r--r--arch/tile/include/asm/Kbuild2
-rw-r--r--arch/tile/include/asm/cache.h12
-rw-r--r--arch/tile/include/asm/checksum.h18
-rw-r--r--arch/tile/include/asm/device.h33
-rw-r--r--arch/tile/include/asm/dma-mapping.h146
-rw-r--r--arch/tile/include/asm/fixmap.h14
-rw-r--r--arch/tile/include/asm/homecache.h19
-rw-r--r--arch/tile/include/asm/io.h144
-rw-r--r--arch/tile/include/asm/kmap_types.h31
-rw-r--r--arch/tile/include/asm/memprof.h33
-rw-r--r--arch/tile/include/asm/page.h7
-rw-r--r--arch/tile/include/asm/pci.h151
-rw-r--r--arch/tile/include/asm/uaccess.h2
13 files changed, 441 insertions, 171 deletions
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 143473e3a0bb..5bd71994452d 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -9,7 +9,6 @@ header-y += hardwall.h
generic-y += bug.h
generic-y += bugs.h
generic-y += cputime.h
-generic-y += device.h
generic-y += div64.h
generic-y += emergency-restart.h
generic-y += errno.h
@@ -17,7 +16,6 @@ generic-y += fb.h
generic-y += fcntl.h
generic-y += ioctl.h
generic-y += ioctls.h
-generic-y += ipc.h
generic-y += ipcbuf.h
generic-y += irq_regs.h
generic-y += kdebug.h
diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h
index 392e5333dd8b..a9a529964e07 100644
--- a/arch/tile/include/asm/cache.h
+++ b/arch/tile/include/asm/cache.h
@@ -27,11 +27,17 @@
#define L2_CACHE_ALIGN(x) (((x)+(L2_CACHE_BYTES-1)) & -L2_CACHE_BYTES)
/*
- * TILE-Gx is fully coherent so we don't need to define ARCH_DMA_MINALIGN.
+ * TILEPro I/O is not always coherent (networking typically uses coherent
+ * I/O, but PCI traffic does not) and setting ARCH_DMA_MINALIGN to the
+ * L2 cacheline size helps ensure that kernel heap allocations are aligned.
+ * TILE-Gx I/O is always coherent when used on hash-for-home pages.
+ *
+ * However, it's possible at runtime to request not to use hash-for-home
+ * for the kernel heap, in which case the kernel will use flush-and-inval
+ * to manage coherence. As a result, we use L2_CACHE_BYTES for the
+ * DMA minimum alignment to avoid false sharing in the kernel heap.
*/
-#ifndef __tilegx__
#define ARCH_DMA_MINALIGN L2_CACHE_BYTES
-#endif
/* use the cache line size for the L2, which is where it counts */
#define SMP_CACHE_BYTES_SHIFT L2_CACHE_SHIFT
diff --git a/arch/tile/include/asm/checksum.h b/arch/tile/include/asm/checksum.h
index a120766c7264..b21a2fdec9f7 100644
--- a/arch/tile/include/asm/checksum.h
+++ b/arch/tile/include/asm/checksum.h
@@ -21,4 +21,22 @@
__wsum do_csum(const unsigned char *buff, int len);
#define do_csum do_csum
+/*
+ * Return the sum of all the 16-bit subwords in a long.
+ * This sums two subwords on a 32-bit machine, and four on 64 bits.
+ * The implementation does two vector adds to capture any overflow.
+ */
+static inline unsigned int csum_long(unsigned long x)
+{
+ unsigned long ret;
+#ifdef __tilegx__
+ ret = __insn_v2sadu(x, 0);
+ ret = __insn_v2sadu(ret, 0);
+#else
+ ret = __insn_sadh_u(x, 0);
+ ret = __insn_sadh_u(ret, 0);
+#endif
+ return ret;
+}
+
#endif /* _ASM_TILE_CHECKSUM_H */
diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h
new file mode 100644
index 000000000000..5182705bd056
--- /dev/null
+++ b/arch/tile/include/asm/device.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ * Arch specific extensions to struct device
+ */
+
+#ifndef _ASM_TILE_DEVICE_H
+#define _ASM_TILE_DEVICE_H
+
+struct dev_archdata {
+ /* DMA operations on that device */
+ struct dma_map_ops *dma_ops;
+
+ /* Offset of the DMA address from the PA. */
+ dma_addr_t dma_offset;
+
+ /* Highest DMA address that can be generated by this device. */
+ dma_addr_t max_direct_dma_addr;
+};
+
+struct pdev_archdata {
+};
+
+#endif /* _ASM_TILE_DEVICE_H */
diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h
index eaa06d175b39..4b6247d1a315 100644
--- a/arch/tile/include/asm/dma-mapping.h
+++ b/arch/tile/include/asm/dma-mapping.h
@@ -20,69 +20,80 @@
#include <linux/cache.h>
#include <linux/io.h>
-/*
- * Note that on x86 and powerpc, there is a "struct dma_mapping_ops"
- * that is used for all the DMA operations. For now, we don't have an
- * equivalent on tile, because we only have a single way of doing DMA.
- * (Tilera bug 7994 to use dma_mapping_ops.)
- */
+extern struct dma_map_ops *tile_dma_map_ops;
+extern struct dma_map_ops *gx_pci_dma_map_ops;
+extern struct dma_map_ops *gx_legacy_pci_dma_map_ops;
+
+static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+{
+ if (dev && dev->archdata.dma_ops)
+ return dev->archdata.dma_ops;
+ else
+ return tile_dma_map_ops;
+}
+
+static inline dma_addr_t get_dma_offset(struct device *dev)
+{
+ return dev->archdata.dma_offset;
+}
+
+static inline void set_dma_offset(struct device *dev, dma_addr_t off)
+{
+ dev->archdata.dma_offset = off;
+}
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
-extern dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
- enum dma_data_direction);
-extern void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
- size_t size, enum dma_data_direction);
-extern int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction);
-extern void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nhwentries, enum dma_data_direction);
-extern dma_addr_t dma_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction);
-extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address,
- size_t size, enum dma_data_direction);
-extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
- int nelems, enum dma_data_direction);
-extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
- int nelems, enum dma_data_direction);
-
-
-void *dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag);
-
-void dma_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle);
-
-extern void dma_sync_single_for_cpu(struct device *, dma_addr_t, size_t,
- enum dma_data_direction);
-extern void dma_sync_single_for_device(struct device *, dma_addr_t,
- size_t, enum dma_data_direction);
-extern void dma_sync_single_range_for_cpu(struct device *, dma_addr_t,
- unsigned long offset, size_t,
- enum dma_data_direction);
-extern void dma_sync_single_range_for_device(struct device *, dma_addr_t,
- unsigned long offset, size_t,
- enum dma_data_direction);
-extern void dma_cache_sync(struct device *dev, void *vaddr, size_t,
- enum dma_data_direction);
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+ return paddr + get_dma_offset(dev);
+}
+
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+ return daddr - get_dma_offset(dev);
+}
+
+static inline void dma_mark_clean(void *addr, size_t size) {}
+
+#include <asm-generic/dma-mapping-common.h>
+
+static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
+{
+ dev->archdata.dma_ops = ops;
+}
+
+static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
+{
+ if (!dev->dma_mask)
+ return 0;
+
+ return addr + size - 1 <= *dev->dma_mask;
+}
static inline int
dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
- return 0;
+ return get_dma_ops(dev)->mapping_error(dev, dma_addr);
}
static inline int
dma_supported(struct device *dev, u64 mask)
{
- return 1;
+ return get_dma_ops(dev)->dma_supported(dev, mask);
}
static inline int
dma_set_mask(struct device *dev, u64 mask)
{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+ /* Handle legacy PCI devices with limited memory addressability. */
+ if ((dma_ops == gx_pci_dma_map_ops) && (mask <= DMA_BIT_MASK(32))) {
+ set_dma_ops(dev, gx_legacy_pci_dma_map_ops);
+ set_dma_offset(dev, 0);
+ if (mask > dev->archdata.max_direct_dma_addr)
+ mask = dev->archdata.max_direct_dma_addr;
+ }
+
if (!dev->dma_mask || !dma_supported(dev, mask))
return -EIO;
@@ -91,4 +102,43 @@ dma_set_mask(struct device *dev, u64 mask)
return 0;
}
+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag,
+ struct dma_attrs *attrs)
+{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+ void *cpu_addr;
+
+ cpu_addr = dma_ops->alloc(dev, size, dma_handle, flag, attrs);
+
+ debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
+
+ return cpu_addr;
+}
+
+static inline void dma_free_attrs(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+ debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
+
+ dma_ops->free(dev, size, cpu_addr, dma_handle, attrs);
+}
+
+#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
+#define dma_free_coherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
+#define dma_free_noncoherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
+
+/*
+ * dma_alloc_noncoherent() is #defined to return coherent memory,
+ * so there's no need to do any flushing here.
+ */
+static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+ enum dma_data_direction direction)
+{
+}
+
#endif /* _ASM_TILE_DMA_MAPPING_H */
diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h
index c66f7933beaa..e16dbf929cb5 100644
--- a/arch/tile/include/asm/fixmap.h
+++ b/arch/tile/include/asm/fixmap.h
@@ -45,15 +45,23 @@
*
* TLB entries of such buffers will not be flushed across
* task switches.
- *
- * We don't bother with a FIX_HOLE since above the fixmaps
- * is unmapped memory in any case.
*/
enum fixed_addresses {
+#ifdef __tilegx__
+ /*
+ * TILEPro has unmapped memory above so the hole isn't needed,
+ * and in any case the hole pushes us over a single 16MB pmd.
+ */
+ FIX_HOLE,
+#endif
#ifdef CONFIG_HIGHMEM
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
#endif
+#ifdef __tilegx__ /* see homecache.c */
+ FIX_HOMECACHE_BEGIN,
+ FIX_HOMECACHE_END = FIX_HOMECACHE_BEGIN+(NR_CPUS)-1,
+#endif
__end_of_permanent_fixed_addresses,
/*
diff --git a/arch/tile/include/asm/homecache.h b/arch/tile/include/asm/homecache.h
index a8243865d49e..7b7771328642 100644
--- a/arch/tile/include/asm/homecache.h
+++ b/arch/tile/include/asm/homecache.h
@@ -79,10 +79,17 @@ extern void homecache_change_page_home(struct page *, int order, int home);
/*
* Flush a page out of whatever cache(s) it is in.
* This is more than just finv, since it properly handles waiting
- * for the data to reach memory on tilepro, but it can be quite
- * heavyweight, particularly on hash-for-home memory.
+ * for the data to reach memory, but it can be quite
+ * heavyweight, particularly on incoherent or immutable memory.
*/
-extern void homecache_flush_cache(struct page *, int order);
+extern void homecache_finv_page(struct page *);
+
+/*
+ * Flush a page out of the specified home cache.
+ * Note that the specified home need not be the actual home of the page,
+ * as for example might be the case when coordinating with I/O devices.
+ */
+extern void homecache_finv_map_page(struct page *, int home);
/*
* Allocate a page with the given GFP flags, home, and optionally
@@ -104,10 +111,10 @@ extern struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask,
* routines use homecache_change_page_home() to reset the home
* back to the default before returning the page to the allocator.
*/
+void __homecache_free_pages(struct page *, unsigned int order);
void homecache_free_pages(unsigned long addr, unsigned int order);
-#define homecache_free_page(page) \
- homecache_free_pages((page), 0)
-
+#define __homecache_free_page(page) __homecache_free_pages((page), 0)
+#define homecache_free_page(page) homecache_free_pages((page), 0)
/*
diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h
index d2152deb1f3c..2a9b293fece6 100644
--- a/arch/tile/include/asm/io.h
+++ b/arch/tile/include/asm/io.h
@@ -62,6 +62,92 @@ extern void iounmap(volatile void __iomem *addr);
#define mm_ptov(addr) ((void *)phys_to_virt(addr))
#define mm_vtop(addr) ((unsigned long)virt_to_phys(addr))
+#if CHIP_HAS_MMIO()
+
+/*
+ * We use inline assembly to guarantee that the compiler does not
+ * split an access into multiple byte-sized accesses as it might
+ * sometimes do if a register data structure is marked "packed".
+ * Obviously on tile we can't tolerate such an access being
+ * actually unaligned, but we want to avoid the case where the
+ * compiler conservatively would generate multiple accesses even
+ * for an aligned read or write.
+ */
+
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+ return *(const volatile u8 __force *)addr;
+}
+
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+ u16 ret;
+ asm volatile("ld2u %0, %1" : "=r" (ret) : "r" (addr));
+ barrier();
+ return le16_to_cpu(ret);
+}
+
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+ u32 ret;
+ /* Sign-extend to conform to u32 ABI sign-extension convention. */
+ asm volatile("ld4s %0, %1" : "=r" (ret) : "r" (addr));
+ barrier();
+ return le32_to_cpu(ret);
+}
+
+static inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+ u64 ret;
+ asm volatile("ld %0, %1" : "=r" (ret) : "r" (addr));
+ barrier();
+ return le64_to_cpu(ret);
+}
+
+static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
+{
+ *(volatile u8 __force *)addr = val;
+}
+
+static inline void __raw_writew(u16 val, volatile void __iomem *addr)
+{
+ asm volatile("st2 %0, %1" :: "r" (addr), "r" (cpu_to_le16(val)));
+}
+
+static inline void __raw_writel(u32 val, volatile void __iomem *addr)
+{
+ asm volatile("st4 %0, %1" :: "r" (addr), "r" (cpu_to_le32(val)));
+}
+
+static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
+{
+ asm volatile("st %0, %1" :: "r" (addr), "r" (cpu_to_le64(val)));
+}
+
+/*
+ * The on-chip I/O hardware on tilegx is configured with VA=PA for the
+ * kernel's PA range. The low-level APIs and field names use "va" and
+ * "void *" nomenclature, to be consistent with the general notion
+ * that the addresses in question are virtualizable, but in the kernel
+ * context we are actually manipulating PA values. (In other contexts,
+ * e.g. access from user space, we do in fact use real virtual addresses
+ * in the va fields.) To allow readers of the code to understand what's
+ * happening, we direct their attention to this comment by using the
+ * following two functions that just duplicate __va() and __pa().
+ */
+typedef unsigned long tile_io_addr_t;
+static inline tile_io_addr_t va_to_tile_io_addr(void *va)
+{
+ BUILD_BUG_ON(sizeof(phys_addr_t) != sizeof(tile_io_addr_t));
+ return __pa(va);
+}
+static inline void *tile_io_addr_to_va(tile_io_addr_t tile_io_addr)
+{
+ return __va(tile_io_addr);
+}
+
+#else /* CHIP_HAS_MMIO() */
+
#ifdef CONFIG_PCI
extern u8 _tile_readb(unsigned long addr);
@@ -73,10 +159,19 @@ extern void _tile_writew(u16 val, unsigned long addr);
extern void _tile_writel(u32 val, unsigned long addr);
extern void _tile_writeq(u64 val, unsigned long addr);
-#else
+#define __raw_readb(addr) _tile_readb((unsigned long)addr)
+#define __raw_readw(addr) _tile_readw((unsigned long)addr)
+#define __raw_readl(addr) _tile_readl((unsigned long)addr)
+#define __raw_readq(addr) _tile_readq((unsigned long)addr)
+#define __raw_writeb(val, addr) _tile_writeb(val, (unsigned long)addr)
+#define __raw_writew(val, addr) _tile_writew(val, (unsigned long)addr)
+#define __raw_writel(val, addr) _tile_writel(val, (unsigned long)addr)
+#define __raw_writeq(val, addr) _tile_writeq(val, (unsigned long)addr)
+
+#else /* CONFIG_PCI */
/*
- * The Tile architecture does not support IOMEM unless PCI is enabled.
+ * The tilepro architecture does not support IOMEM unless PCI is enabled.
* Unfortunately we can't yet simply not declare these methods,
* since some generic code that compiles into the kernel, but
* we never run, uses them unconditionally.
@@ -88,65 +183,58 @@ static inline int iomem_panic(void)
return 0;
}
-static inline u8 _tile_readb(unsigned long addr)
+static inline u8 readb(unsigned long addr)
{
return iomem_panic();
}
-static inline u16 _tile_readw(unsigned long addr)
+static inline u16 _readw(unsigned long addr)
{
return iomem_panic();
}
-static inline u32 _tile_readl(unsigned long addr)
+static inline u32 readl(unsigned long addr)
{
return iomem_panic();
}
-static inline u64 _tile_readq(unsigned long addr)
+static inline u64 readq(unsigned long addr)
{
return iomem_panic();
}
-static inline void _tile_writeb(u8 val, unsigned long addr)
+static inline void writeb(u8 val, unsigned long addr)
{
iomem_panic();
}
-static inline void _tile_writew(u16 val, unsigned long addr)
+static inline void writew(u16 val, unsigned long addr)
{
iomem_panic();
}
-static inline void _tile_writel(u32 val, unsigned long addr)
+static inline void writel(u32 val, unsigned long addr)
{
iomem_panic();
}
-static inline void _tile_writeq(u64 val, unsigned long addr)
+static inline void writeq(u64 val, unsigned long addr)
{
iomem_panic();
}
-#endif
+#endif /* CONFIG_PCI */
+
+#endif /* CHIP_HAS_MMIO() */
-#define readb(addr) _tile_readb((unsigned long)addr)
-#define readw(addr) _tile_readw((unsigned long)addr)
-#define readl(addr) _tile_readl((unsigned long)addr)
-#define readq(addr) _tile_readq((unsigned long)addr)
-#define writeb(val, addr) _tile_writeb(val, (unsigned long)addr)
-#define writew(val, addr) _tile_writew(val, (unsigned long)addr)
-#define writel(val, addr) _tile_writel(val, (unsigned long)addr)
-#define writeq(val, addr) _tile_writeq(val, (unsigned long)addr)
-
-#define __raw_readb readb
-#define __raw_readw readw
-#define __raw_readl readl
-#define __raw_readq readq
-#define __raw_writeb writeb
-#define __raw_writew writew
-#define __raw_writel writel
-#define __raw_writeq writeq
+#define readb __raw_readb
+#define readw __raw_readw
+#define readl __raw_readl
+#define readq __raw_readq
+#define writeb __raw_writeb
+#define writew __raw_writew
+#define writel __raw_writel
+#define writeq __raw_writeq
#define readb_relaxed readb
#define readw_relaxed readw
diff --git a/arch/tile/include/asm/kmap_types.h b/arch/tile/include/asm/kmap_types.h
index 3d0f20246260..92b28e3e9972 100644
--- a/arch/tile/include/asm/kmap_types.h
+++ b/arch/tile/include/asm/kmap_types.h
@@ -23,35 +23,6 @@
* adds 4MB of required address-space. For now we leave KM_TYPE_NR
* set to depth 8.
*/
-enum km_type {
- KM_TYPE_NR = 8
-};
-
-/*
- * We provide dummy definitions of all the stray values that used to be
- * required for kmap_atomic() and no longer are.
- */
-enum {
- KM_BOUNCE_READ,
- KM_SKB_SUNRPC_DATA,
- KM_SKB_DATA_SOFTIRQ,
- KM_USER0,
- KM_USER1,
- KM_BIO_SRC_IRQ,
- KM_BIO_DST_IRQ,
- KM_PTE0,
- KM_PTE1,
- KM_IRQ0,
- KM_IRQ1,
- KM_SOFTIRQ0,
- KM_SOFTIRQ1,
- KM_SYNC_ICACHE,
- KM_SYNC_DCACHE,
- KM_UML_USERCOPY,
- KM_IRQ_PTE,
- KM_NMI,
- KM_NMI_PTE,
- KM_KDB
-};
+#define KM_TYPE_NR 8
#endif /* _ASM_TILE_KMAP_TYPES_H */
diff --git a/arch/tile/include/asm/memprof.h b/arch/tile/include/asm/memprof.h
deleted file mode 100644
index 359949be28c1..000000000000
--- a/arch/tile/include/asm/memprof.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, version 2.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for
- * more details.
- *
- * The hypervisor's memory controller profiling infrastructure allows
- * the programmer to find out what fraction of the available memory
- * bandwidth is being consumed at each memory controller. The
- * profiler provides start, stop, and clear operations to allows
- * profiling over a specific time window, as well as an interface for
- * reading the most recent profile values.
- *
- * This header declares IOCTL codes necessary to control memprof.
- */
-#ifndef _ASM_TILE_MEMPROF_H
-#define _ASM_TILE_MEMPROF_H
-
-#include <linux/ioctl.h>
-
-#define MEMPROF_IOCTL_TYPE 0xB4
-#define MEMPROF_IOCTL_START _IO(MEMPROF_IOCTL_TYPE, 0)
-#define MEMPROF_IOCTL_STOP _IO(MEMPROF_IOCTL_TYPE, 1)
-#define MEMPROF_IOCTL_CLEAR _IO(MEMPROF_IOCTL_TYPE, 2)
-
-#endif /* _ASM_TILE_MEMPROF_H */
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index 9d9131e5c552..dd033a4fd627 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -174,7 +174,9 @@ static inline __attribute_const__ int get_order(unsigned long size)
#define MEM_LOW_END (HALF_VA_SPACE - 1) /* low half */
#define MEM_HIGH_START (-HALF_VA_SPACE) /* high half */
#define PAGE_OFFSET MEM_HIGH_START
-#define _VMALLOC_START _AC(0xfffffff500000000, UL) /* 4 GB */
+#define FIXADDR_BASE _AC(0xfffffff400000000, UL) /* 4 GB */
+#define FIXADDR_TOP _AC(0xfffffff500000000, UL) /* 4 GB */
+#define _VMALLOC_START FIXADDR_TOP
#define HUGE_VMAP_BASE _AC(0xfffffff600000000, UL) /* 4 GB */
#define MEM_SV_START _AC(0xfffffff700000000, UL) /* 256 MB */
#define MEM_SV_INTRPT MEM_SV_START
@@ -185,9 +187,6 @@ static inline __attribute_const__ int get_order(unsigned long size)
/* Highest DTLB address we will use */
#define KERNEL_HIGH_VADDR MEM_SV_START
-/* Since we don't currently provide any fixmaps, we use an impossible VA. */
-#define FIXADDR_TOP MEM_HV_START
-
#else /* !__tilegx__ */
/*
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
index 32e6cbe8dff3..302cdf71ceed 100644
--- a/arch/tile/include/asm/pci.h
+++ b/arch/tile/include/asm/pci.h
@@ -15,9 +15,13 @@
#ifndef _ASM_TILE_PCI_H
#define _ASM_TILE_PCI_H
+#include <linux/dma-mapping.h>
#include <linux/pci.h>
+#include <linux/numa.h>
#include <asm-generic/pci_iomap.h>
+#ifndef __tilegx__
+
/*
* Structure of a PCI controller (host bridge)
*/
@@ -41,21 +45,151 @@ struct pci_controller {
};
/*
+ * This flag tells if the platform is TILEmpower that needs
+ * special configuration for the PLX switch chip.
+ */
+extern int tile_plx_gen1;
+
+static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
+
+#define TILE_NUM_PCIE 2
+
+/*
* The hypervisor maps the entirety of CPA-space as bus addresses, so
* bus addresses are physical addresses. The networking and block
* device layers use this boolean for bounce buffer decisions.
*/
#define PCI_DMA_BUS_IS_PHYS 1
+/* generic pci stuff */
+#include <asm-generic/pci.h>
+
+#else
+
+#include <asm/page.h>
+#include <gxio/trio.h>
+
+/**
+ * We reserve the hugepage-size address range at the top of the 64-bit address
+ * space to serve as the PCI window, emulating the BAR0 space of an endpoint
+ * device. This window is used by the chip-to-chip applications running on
+ * the RC node. The reason for carving out this window is that Mem-Maps that
+ * back up this window will not overlap with those that map the real physical
+ * memory.
+ */
+#define PCIE_HOST_BAR0_SIZE HPAGE_SIZE
+#define PCIE_HOST_BAR0_START HPAGE_MASK
+
+/**
+ * The first PAGE_SIZE of the above "BAR" window is mapped to the
+ * gxpci_host_regs structure.
+ */
+#define PCIE_HOST_REGS_SIZE PAGE_SIZE
+
+/*
+ * This is the PCI address where the Mem-Map interrupt regions start.
+ * We use the 2nd to the last huge page of the 64-bit address space.
+ * The last huge page is used for the rootcomplex "bar", for C2C purpose.
+ */
+#define MEM_MAP_INTR_REGIONS_BASE (HPAGE_MASK - HPAGE_SIZE)
+
+/*
+ * Each Mem-Map interrupt region occupies 4KB.
+ */
+#define MEM_MAP_INTR_REGION_SIZE (1 << TRIO_MAP_MEM_LIM__ADDR_SHIFT)
+
+/*
+ * Allocate the PCI BAR window right below 4GB.
+ */
+#define TILE_PCI_BAR_WINDOW_TOP (1ULL << 32)
+
+/*
+ * Allocate 1GB for the PCI BAR window.
+ */
+#define TILE_PCI_BAR_WINDOW_SIZE (1 << 30)
+
+/*
+ * This is the highest bus address targeting the host memory that
+ * can be generated by legacy PCI devices with 32-bit or less
+ * DMA capability, dictated by the BAR window size and location.
+ */
+#define TILE_PCI_MAX_DIRECT_DMA_ADDRESS \
+ (TILE_PCI_BAR_WINDOW_TOP - TILE_PCI_BAR_WINDOW_SIZE - 1)
+
+/*
+ * We shift the PCI bus range for all the physical memory up by the whole PA
+ * range. The corresponding CPA of an incoming PCI request will be the PCI
+ * address minus TILE_PCI_MEM_MAP_BASE_OFFSET. This also implies
+ * that the 64-bit capable devices will be given DMA addresses as
+ * the CPA plus TILE_PCI_MEM_MAP_BASE_OFFSET. To support 32-bit
+ * devices, we create a separate map region that handles the low
+ * 4GB.
+ */
+#define TILE_PCI_MEM_MAP_BASE_OFFSET (1ULL << CHIP_PA_WIDTH())
+
+/*
+ * Start of the PCI memory resource, which starts at the end of the
+ * maximum system physical RAM address.
+ */
+#define TILE_PCI_MEM_START (1ULL << CHIP_PA_WIDTH())
+
+/*
+ * Structure of a PCI controller (host bridge) on Gx.
+ */
+struct pci_controller {
+
+ /* Pointer back to the TRIO that this PCIe port is connected to. */
+ gxio_trio_context_t *trio;
+ int mac; /* PCIe mac index on the TRIO shim */
+ int trio_index; /* Index of TRIO shim that contains the MAC. */
+
+ int pio_mem_index; /* PIO region index for memory access */
+
+ /*
+ * Mem-Map regions for all the memory controllers so that Linux can
+ * map all of its physical memory space to the PCI bus.
+ */
+ int mem_maps[MAX_NUMNODES];
+
+ int index; /* PCI domain number */
+ struct pci_bus *root_bus;
+
+ /* PCI memory space resource for this controller. */
+ struct resource mem_space;
+ char mem_space_name[32];
+
+ uint64_t mem_offset; /* cpu->bus memory mapping offset. */
+
+ int first_busno;
+
+ struct pci_ops *ops;
+
+ /* Table that maps the INTx numbers to Linux irq numbers. */
+ int irq_intx_table[4];
+
+ /* Address ranges that are routed to this controller/bridge. */
+ struct resource mem_resources[3];
+};
+
+extern struct pci_controller pci_controllers[TILEGX_NUM_TRIO * TILEGX_TRIO_PCIES];
+extern gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO];
+
+extern void pci_iounmap(struct pci_dev *dev, void __iomem *);
+
+/*
+ * The PCI address space does not equal the physical memory address
+ * space (we have an IOMMU). The IDE and SCSI device layers use this
+ * boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS 0
+
+#endif /* __tilegx__ */
+
int __init tile_pci_init(void);
int __init pcibios_init(void);
-static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
-
void __devinit pcibios_fixup_bus(struct pci_bus *bus);
-#define TILE_NUM_PCIE 2
-
#define pci_domain_nr(bus) (((struct pci_controller *)(bus)->sysdata)->index)
/*
@@ -79,19 +213,10 @@ static inline int pcibios_assign_all_busses(void)
#define PCIBIOS_MIN_MEM 0
#define PCIBIOS_MIN_IO 0
-/*
- * This flag tells if the platform is TILEmpower that needs
- * special configuration for the PLX switch chip.
- */
-extern int tile_plx_gen1;
-
/* Use any cpu for PCI. */
#define cpumask_of_pcibus(bus) cpu_online_mask
/* implement the pci_ DMA API in terms of the generic device dma_ one */
#include <asm-generic/pci-dma-compat.h>
-/* generic pci stuff */
-#include <asm-generic/pci.h>
-
#endif /* _ASM_TILE_PCI_H */
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
index c3dd275f25e2..9ab078a4605d 100644
--- a/arch/tile/include/asm/uaccess.h
+++ b/arch/tile/include/asm/uaccess.h
@@ -146,7 +146,7 @@ extern int fixup_exception(struct pt_regs *regs);
#ifdef __tilegx__
#define __get_user_1(x, ptr, ret) __get_user_asm(ld1u, x, ptr, ret)
#define __get_user_2(x, ptr, ret) __get_user_asm(ld2u, x, ptr, ret)
-#define __get_user_4(x, ptr, ret) __get_user_asm(ld4u, x, ptr, ret)
+#define __get_user_4(x, ptr, ret) __get_user_asm(ld4s, x, ptr, ret)
#define __get_user_8(x, ptr, ret) __get_user_asm(ld, x, ptr, ret)
#else
#define __get_user_1(x, ptr, ret) __get_user_asm(lb_u, x, ptr, ret)