From 10083072bfabc40bc47306e512c158c57cf55c2e Mon Sep 17 00:00:00 2001 From: Mark Maule Date: Fri, 14 Apr 2006 16:03:49 -0500 Subject: [PATCH] PCI: per-platform IA64_{FIRST,LAST}_DEVICE_VECTOR definitions Abstract IA64_FIRST_DEVICE_VECTOR/IA64_LAST_DEVICE_VECTOR since SN platforms use a subset of the IA64 range. Implement this by making the above macros global variables which the platform can override in it setup code. Also add a reserve_irq_vector() routine used by SN to mark a vector's as in-use when that weren't allocated through assign_irq_vector(). Signed-off-by: Mark Maule Signed-off-by: Greg Kroah-Hartman --- arch/ia64/sn/kernel/irq.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/ia64/sn') diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index c265e02f5036..db187f5cdae1 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -202,6 +202,9 @@ void sn_irq_init(void) int i; irq_desc_t *base_desc = irq_desc; + ia64_first_device_vector = IA64_SN2_FIRST_DEVICE_VECTOR; + ia64_last_device_vector = IA64_SN2_LAST_DEVICE_VECTOR; + for (i = 0; i < NR_IRQS; i++) { if (base_desc[i].handler == &no_irq_type) { base_desc[i].handler = &irq_type_sn; @@ -285,6 +288,7 @@ void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info) /* link it into the sn_irq[irq] list */ spin_lock(&sn_irq_info_lock); list_add_rcu(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]); + reserve_irq_vector(sn_irq_info->irq_irq); spin_unlock(&sn_irq_info_lock); register_intr_pda(sn_irq_info); @@ -310,8 +314,11 @@ void sn_irq_unfixup(struct pci_dev *pci_dev) spin_lock(&sn_irq_info_lock); list_del_rcu(&sn_irq_info->list); spin_unlock(&sn_irq_info_lock); + if (list_empty(sn_irq_lh[sn_irq_info->irq_irq])) + free_irq_vector(sn_irq_info->irq_irq); call_rcu(&sn_irq_info->rcu, sn_irq_info_free); pci_dev_put(pci_dev); + } static inline void -- cgit v1.2.3 From 83821d3f558dc651e555d62182ed0c95651f41a6 Mon Sep 17 00:00:00 2001 From: Mark Maule Date: Fri, 14 Apr 2006 16:03:54 -0500 Subject: [PATCH] PCI: altix: msi support MSI callouts for altix. Involves a fair amount of code reorg in sn irq.c code as well as adding some extensions to the altix PCI provider abstaction. Signed-off-by: Mark Maule Signed-off-by: Greg Kroah-Hartman --- arch/ia64/sn/kernel/io_init.c | 9 +- arch/ia64/sn/kernel/irq.c | 135 ++++++++++--------- arch/ia64/sn/pci/pci_dma.c | 10 +- arch/ia64/sn/pci/pcibr/pcibr_dma.c | 62 ++++++--- arch/ia64/sn/pci/tioca_provider.c | 8 +- arch/ia64/sn/pci/tioce_provider.c | 65 ++++++---- drivers/pci/msi-altix.c | 200 ++++++++++++++++++++++++++++- include/asm-ia64/sn/intr.h | 8 ++ include/asm-ia64/sn/pcibr_provider.h | 5 +- include/asm-ia64/sn/pcibus_provider_defs.h | 17 ++- include/asm-ia64/sn/tiocp.h | 3 +- 11 files changed, 401 insertions(+), 121 deletions(-) (limited to 'arch/ia64/sn') diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 5101ac462643..dc09a6a28a37 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -58,7 +58,7 @@ static int max_pcibus_number = 255; /* Default highest pci bus number */ */ static dma_addr_t -sn_default_pci_map(struct pci_dev *pdev, unsigned long paddr, size_t size) +sn_default_pci_map(struct pci_dev *pdev, unsigned long paddr, size_t size, int type) { return 0; } @@ -457,13 +457,6 @@ void sn_pci_fixup_slot(struct pci_dev *dev) pcidev_info->pdi_sn_irq_info = NULL; kfree(sn_irq_info); } - - /* - * MSI currently not supported on altix. Remove this when - * the MSI abstraction patches are integrated into the kernel - * (sometime after 2.6.16 releases) - */ - dev->no_msi = 1; } /* diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index db187f5cdae1..dc8e2b696713 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -26,11 +26,11 @@ static void unregister_intr_pda(struct sn_irq_info *sn_irq_info); int sn_force_interrupt_flag = 1; extern int sn_ioif_inited; -static struct list_head **sn_irq_lh; +struct list_head **sn_irq_lh; static spinlock_t sn_irq_info_lock = SPIN_LOCK_UNLOCKED; /* non-IRQ lock */ -static inline u64 sn_intr_alloc(nasid_t local_nasid, int local_widget, - u64 sn_irq_info, +u64 sn_intr_alloc(nasid_t local_nasid, int local_widget, + struct sn_irq_info *sn_irq_info, int req_irq, nasid_t req_nasid, int req_slice) { @@ -40,12 +40,13 @@ static inline u64 sn_intr_alloc(nasid_t local_nasid, int local_widget, SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT, (u64) SAL_INTR_ALLOC, (u64) local_nasid, - (u64) local_widget, (u64) sn_irq_info, (u64) req_irq, + (u64) local_widget, __pa(sn_irq_info), (u64) req_irq, (u64) req_nasid, (u64) req_slice); + return ret_stuff.status; } -static inline void sn_intr_free(nasid_t local_nasid, int local_widget, +void sn_intr_free(nasid_t local_nasid, int local_widget, struct sn_irq_info *sn_irq_info) { struct ia64_sal_retval ret_stuff; @@ -112,73 +113,91 @@ static void sn_end_irq(unsigned int irq) static void sn_irq_info_free(struct rcu_head *head); -static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) +struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, + nasid_t nasid, int slice) { - struct sn_irq_info *sn_irq_info, *sn_irq_info_safe; - int cpuid, cpuphys; + int vector; + int cpuphys; + int64_t bridge; + int local_widget, status; + nasid_t local_nasid; + struct sn_irq_info *new_irq_info; + struct sn_pcibus_provider *pci_provider; - cpuid = first_cpu(mask); - cpuphys = cpu_physical_id(cpuid); + new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC); + if (new_irq_info == NULL) + return NULL; - list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, - sn_irq_lh[irq], list) { - u64 bridge; - int local_widget, status; - nasid_t local_nasid; - struct sn_irq_info *new_irq_info; - struct sn_pcibus_provider *pci_provider; - - new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC); - if (new_irq_info == NULL) - break; - memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info)); - - bridge = (u64) new_irq_info->irq_bridge; - if (!bridge) { - kfree(new_irq_info); - break; /* irq is not a device interrupt */ - } + memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info)); - local_nasid = NASID_GET(bridge); + bridge = (u64) new_irq_info->irq_bridge; + if (!bridge) { + kfree(new_irq_info); + return NULL; /* irq is not a device interrupt */ + } - if (local_nasid & 1) - local_widget = TIO_SWIN_WIDGETNUM(bridge); - else - local_widget = SWIN_WIDGETNUM(bridge); + local_nasid = NASID_GET(bridge); - /* Free the old PROM new_irq_info structure */ - sn_intr_free(local_nasid, local_widget, new_irq_info); - /* Update kernels new_irq_info with new target info */ - unregister_intr_pda(new_irq_info); + if (local_nasid & 1) + local_widget = TIO_SWIN_WIDGETNUM(bridge); + else + local_widget = SWIN_WIDGETNUM(bridge); - /* allocate a new PROM new_irq_info struct */ - status = sn_intr_alloc(local_nasid, local_widget, - __pa(new_irq_info), irq, - cpuid_to_nasid(cpuid), - cpuid_to_slice(cpuid)); + vector = sn_irq_info->irq_irq; + /* Free the old PROM new_irq_info structure */ + sn_intr_free(local_nasid, local_widget, new_irq_info); + /* Update kernels new_irq_info with new target info */ + unregister_intr_pda(new_irq_info); - /* SAL call failed */ - if (status) { - kfree(new_irq_info); - break; - } + /* allocate a new PROM new_irq_info struct */ + status = sn_intr_alloc(local_nasid, local_widget, + new_irq_info, vector, + nasid, slice); + + /* SAL call failed */ + if (status) { + kfree(new_irq_info); + return NULL; + } - new_irq_info->irq_cpuid = cpuid; - register_intr_pda(new_irq_info); + cpuphys = nasid_slice_to_cpuid(nasid, slice); + new_irq_info->irq_cpuid = cpuphys; + register_intr_pda(new_irq_info); - pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type]; - if (pci_provider && pci_provider->target_interrupt) - (pci_provider->target_interrupt)(new_irq_info); + pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type]; + + /* + * If this represents a line interrupt, target it. If it's + * an msi (irq_int_bit < 0), it's already targeted. + */ + if (new_irq_info->irq_int_bit >= 0 && + pci_provider && pci_provider->target_interrupt) + (pci_provider->target_interrupt)(new_irq_info); - spin_lock(&sn_irq_info_lock); - list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); - spin_unlock(&sn_irq_info_lock); - call_rcu(&sn_irq_info->rcu, sn_irq_info_free); + spin_lock(&sn_irq_info_lock); + list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); + spin_unlock(&sn_irq_info_lock); + call_rcu(&sn_irq_info->rcu, sn_irq_info_free); #ifdef CONFIG_SMP - set_irq_affinity_info((irq & 0xff), cpuphys, 0); + set_irq_affinity_info((vector & 0xff), cpuphys, 0); #endif - } + + return new_irq_info; +} + +static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) +{ + struct sn_irq_info *sn_irq_info, *sn_irq_info_safe; + nasid_t nasid; + int slice; + + nasid = cpuid_to_nasid(first_cpu(mask)); + slice = cpuid_to_slice(first_cpu(mask)); + + list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, + sn_irq_lh[irq], list) + (void)sn_retarget_vector(sn_irq_info, nasid, slice); } struct hw_interrupt_type irq_type_sn = { diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index b4b84c269210..7a291a271511 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include #include @@ -113,7 +113,8 @@ void *sn_dma_alloc_coherent(struct device *dev, size_t size, * resources. */ - *dma_handle = provider->dma_map_consistent(pdev, phys_addr, size); + *dma_handle = provider->dma_map_consistent(pdev, phys_addr, size, + SN_DMA_ADDR_PHYS); if (!*dma_handle) { printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__); free_pages((unsigned long)cpuaddr, get_order(size)); @@ -176,7 +177,7 @@ dma_addr_t sn_dma_map_single(struct device *dev, void *cpu_addr, size_t size, BUG_ON(dev->bus != &pci_bus_type); phys_addr = __pa(cpu_addr); - dma_addr = provider->dma_map(pdev, phys_addr, size); + dma_addr = provider->dma_map(pdev, phys_addr, size, SN_DMA_ADDR_PHYS); if (!dma_addr) { printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__); return 0; @@ -260,7 +261,8 @@ int sn_dma_map_sg(struct device *dev, struct scatterlist *sg, int nhwentries, for (i = 0; i < nhwentries; i++, sg++) { phys_addr = SG_ENT_PHYS_ADDRESS(sg); sg->dma_address = provider->dma_map(pdev, - phys_addr, sg->length); + phys_addr, sg->length, + SN_DMA_ADDR_PHYS); if (!sg->dma_address) { printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__); diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c index 9f86bb6519aa..a86c7b945962 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -41,7 +41,7 @@ extern int sn_ioif_inited; static dma_addr_t pcibr_dmamap_ate32(struct pcidev_info *info, - u64 paddr, size_t req_size, u64 flags) + u64 paddr, size_t req_size, u64 flags, int dma_flags) { struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info; @@ -81,9 +81,12 @@ pcibr_dmamap_ate32(struct pcidev_info *info, if (IS_PCIX(pcibus_info)) ate_flags &= ~(PCI32_ATE_PREF); - xio_addr = - IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) : - PHYS_TO_TIODMA(paddr); + if (SN_DMA_ADDRTYPE(dma_flags == SN_DMA_ADDR_PHYS)) + xio_addr = IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) : + PHYS_TO_TIODMA(paddr); + else + xio_addr = paddr; + offset = IOPGOFF(xio_addr); ate = ate_flags | (xio_addr - offset); @@ -91,6 +94,13 @@ pcibr_dmamap_ate32(struct pcidev_info *info, if (IS_PIC_SOFT(pcibus_info)) { ate |= (pcibus_info->pbi_hub_xid << PIC_ATE_TARGETID_SHFT); } + + /* + * If we're mapping for MSI, set the MSI bit in the ATE + */ + if (dma_flags & SN_DMA_MSI) + ate |= PCI32_ATE_MSI; + ate_write(pcibus_info, ate_index, ate_count, ate); /* @@ -105,20 +115,27 @@ pcibr_dmamap_ate32(struct pcidev_info *info, if (pcibus_info->pbi_devreg[internal_device] & PCIBR_DEV_SWAP_DIR) ATE_SWAP_ON(pci_addr); + return pci_addr; } static dma_addr_t pcibr_dmatrans_direct64(struct pcidev_info * info, u64 paddr, - u64 dma_attributes) + u64 dma_attributes, int dma_flags) { struct pcibus_info *pcibus_info = (struct pcibus_info *) ((info->pdi_host_pcidev_info)->pdi_pcibus_info); u64 pci_addr; /* Translate to Crosstalk View of Physical Address */ - pci_addr = (IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) : - PHYS_TO_TIODMA(paddr)) | dma_attributes; + if (SN_DMA_ADDRTYPE(dma_flags) == SN_DMA_ADDR_PHYS) + pci_addr = IS_PIC_SOFT(pcibus_info) ? + PHYS_TO_DMA(paddr) : + PHYS_TO_TIODMA(paddr) | dma_attributes; + else + pci_addr = IS_PIC_SOFT(pcibus_info) ? + paddr : + paddr | dma_attributes; /* Handle Bus mode */ if (IS_PCIX(pcibus_info)) @@ -130,7 +147,9 @@ pcibr_dmatrans_direct64(struct pcidev_info * info, u64 paddr, ((u64) pcibus_info-> pbi_hub_xid << PIC_PCI64_ATTR_TARG_SHFT); } else - pci_addr |= TIOCP_PCI64_CMDTYPE_MEM; + pci_addr |= (dma_flags & SN_DMA_MSI) ? + TIOCP_PCI64_CMDTYPE_MSI : + TIOCP_PCI64_CMDTYPE_MEM; /* If PCI mode, func zero uses VCHAN0, every other func uses VCHAN1 */ if (!IS_PCIX(pcibus_info) && PCI_FUNC(info->pdi_linux_pcidev->devfn)) @@ -141,7 +160,7 @@ pcibr_dmatrans_direct64(struct pcidev_info * info, u64 paddr, static dma_addr_t pcibr_dmatrans_direct32(struct pcidev_info * info, - u64 paddr, size_t req_size, u64 flags) + u64 paddr, size_t req_size, u64 flags, int dma_flags) { struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info; struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info-> @@ -156,8 +175,14 @@ pcibr_dmatrans_direct32(struct pcidev_info * info, return 0; } - xio_addr = IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) : - PHYS_TO_TIODMA(paddr); + if (dma_flags & SN_DMA_MSI) + return 0; + + if (SN_DMA_ADDRTYPE(dma_flags) == SN_DMA_ADDR_PHYS) + xio_addr = IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) : + PHYS_TO_TIODMA(paddr); + else + xio_addr = paddr; xio_base = pcibus_info->pbi_dir_xbase; offset = xio_addr - xio_base; @@ -327,7 +352,7 @@ void sn_dma_flush(u64 addr) */ dma_addr_t -pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size) +pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size, int dma_flags) { dma_addr_t dma_handle; struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev); @@ -344,11 +369,11 @@ pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size) */ dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr, - PCI64_ATTR_PREF); + PCI64_ATTR_PREF, dma_flags); } else { /* Handle 32-63 bit cards via direct mapping */ dma_handle = pcibr_dmatrans_direct32(pcidev_info, phys_addr, - size, 0); + size, 0, dma_flags); if (!dma_handle) { /* * It is a 32 bit card and we cannot do direct mapping, @@ -356,7 +381,8 @@ pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size) */ dma_handle = pcibr_dmamap_ate32(pcidev_info, phys_addr, - size, PCI32_ATE_PREF); + size, PCI32_ATE_PREF, + dma_flags); } } @@ -365,18 +391,18 @@ pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size) dma_addr_t pcibr_dma_map_consistent(struct pci_dev * hwdev, unsigned long phys_addr, - size_t size) + size_t size, int dma_flags) { dma_addr_t dma_handle; struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev); if (hwdev->dev.coherent_dma_mask == ~0UL) { dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr, - PCI64_ATTR_BAR); + PCI64_ATTR_BAR, dma_flags); } else { dma_handle = (dma_addr_t) pcibr_dmamap_ate32(pcidev_info, phys_addr, size, - PCI32_ATE_BAR); + PCI32_ATE_BAR, dma_flags); } return dma_handle; diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c index be0176912968..20de72791b97 100644 --- a/arch/ia64/sn/pci/tioca_provider.c +++ b/arch/ia64/sn/pci/tioca_provider.c @@ -515,10 +515,16 @@ tioca_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir) * use the GART mapped mode. */ static u64 -tioca_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count) +tioca_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma_flags) { u64 mapaddr; + /* + * Not supported for now ... + */ + if (dma_flags & SN_DMA_MSI) + return 0; + /* * If card is 64 or 48 bit addresable, use a direct mapping. 32 * bit direct is so restrictive w.r.t. where the memory resides that diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c index 833295624e5d..4cac7bdc7c7f 100644 --- a/arch/ia64/sn/pci/tioce_provider.c +++ b/arch/ia64/sn/pci/tioce_provider.c @@ -170,7 +170,8 @@ tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr) (ATE_PAGE((start)+(len)-1, pagesize) - ATE_PAGE(start, pagesize) + 1) #define ATE_VALID(ate) ((ate) & (1UL << 63)) -#define ATE_MAKE(addr, ps) (((addr) & ~ATE_PAGEMASK(ps)) | (1UL << 63)) +#define ATE_MAKE(addr, ps, msi) \ + (((addr) & ~ATE_PAGEMASK(ps)) | (1UL << 63) | ((msi)?(1UL << 62):0)) /* * Flavors of ate-based mapping supported by tioce_alloc_map() @@ -196,15 +197,17 @@ tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr) * * 63 - must be 1 to indicate d64 mode to CE hardware * 62 - barrier bit ... controlled with tioce_dma_barrier() - * 61 - 0 since this is not an MSI transaction + * 61 - msi bit ... specified through dma_flags * 60:54 - reserved, MBZ */ static u64 -tioce_dma_d64(unsigned long ct_addr) +tioce_dma_d64(unsigned long ct_addr, int dma_flags) { u64 bus_addr; bus_addr = ct_addr | (1UL << 63); + if (dma_flags & SN_DMA_MSI) + bus_addr |= (1UL << 61); return bus_addr; } @@ -261,7 +264,7 @@ pcidev_to_tioce(struct pci_dev *pdev, struct tioce **base, */ static u64 tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, - u64 ct_addr, int len) + u64 ct_addr, int len, int dma_flags) { int i; int j; @@ -270,6 +273,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, int entries; int nates; u64 pagesize; + int msi_capable, msi_wanted; u64 *ate_shadow; u64 *ate_reg; u64 addr; @@ -291,6 +295,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, ate_reg = ce_mmr->ce_ure_ate3240; pagesize = ce_kern->ce_ate3240_pagesize; bus_base = TIOCE_M32_MIN; + msi_capable = 1; break; case TIOCE_ATE_M40: first = 0; @@ -299,6 +304,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, ate_reg = ce_mmr->ce_ure_ate40; pagesize = MB(64); bus_base = TIOCE_M40_MIN; + msi_capable = 0; break; case TIOCE_ATE_M40S: /* @@ -311,11 +317,16 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, ate_reg = ce_mmr->ce_ure_ate3240; pagesize = GB(16); bus_base = TIOCE_M40S_MIN; + msi_capable = 0; break; default: return 0; } + msi_wanted = dma_flags & SN_DMA_MSI; + if (msi_wanted && !msi_capable) + return 0; + nates = ATE_NPAGES(ct_addr, len, pagesize); if (nates > entries) return 0; @@ -344,7 +355,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, for (j = 0; j < nates; j++) { u64 ate; - ate = ATE_MAKE(addr, pagesize); + ate = ATE_MAKE(addr, pagesize, msi_wanted); ate_shadow[i + j] = ate; tioce_mmr_storei(ce_kern, &ate_reg[i + j], ate); addr += pagesize; @@ -371,7 +382,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, * Map @paddr into 32-bit bus space of the CE associated with @pcidev_info. */ static u64 -tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr) +tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr, int dma_flags) { int dma_ok; int port; @@ -381,6 +392,9 @@ tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr) u64 ct_lower; dma_addr_t bus_addr; + if (dma_flags & SN_DMA_MSI) + return 0; + ct_upper = ct_addr & ~0x3fffffffUL; ct_lower = ct_addr & 0x3fffffffUL; @@ -507,7 +521,7 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir) */ static u64 tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, - int barrier) + int barrier, int dma_flags) { unsigned long flags; u64 ct_addr; @@ -523,15 +537,18 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, if (dma_mask < 0x7fffffffUL) return 0; - ct_addr = PHYS_TO_TIODMA(paddr); + if (SN_DMA_ADDRTYPE(dma_flags) == SN_DMA_ADDR_PHYS) + ct_addr = PHYS_TO_TIODMA(paddr); + else + ct_addr = paddr; /* * If the device can generate 64 bit addresses, create a D64 map. - * Since this should never fail, bypass the rest of the checks. */ if (dma_mask == ~0UL) { - mapaddr = tioce_dma_d64(ct_addr); - goto dma_map_done; + mapaddr = tioce_dma_d64(ct_addr, dma_flags); + if (mapaddr) + goto dma_map_done; } pcidev_to_tioce(pdev, NULL, &ce_kern, &port); @@ -574,18 +591,22 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, if (byte_count > MB(64)) { mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40S, - port, ct_addr, byte_count); + port, ct_addr, byte_count, + dma_flags); if (!mapaddr) mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40, -1, - ct_addr, byte_count); + ct_addr, byte_count, + dma_flags); } else { mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40, -1, - ct_addr, byte_count); + ct_addr, byte_count, + dma_flags); if (!mapaddr) mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40S, - port, ct_addr, byte_count); + port, ct_addr, byte_count, + dma_flags); } } @@ -593,7 +614,7 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, * 32-bit direct is the next mode to try */ if (!mapaddr && dma_mask >= 0xffffffffUL) - mapaddr = tioce_dma_d32(pdev, ct_addr); + mapaddr = tioce_dma_d32(pdev, ct_addr, dma_flags); /* * Last resort, try 32-bit ATE-based map. @@ -601,7 +622,7 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, if (!mapaddr) mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M32, -1, ct_addr, - byte_count); + byte_count, dma_flags); spin_unlock_irqrestore(&ce_kern->ce_lock, flags); @@ -622,9 +643,9 @@ dma_map_done: * in the address. */ static u64 -tioce_dma(struct pci_dev *pdev, u64 paddr, size_t byte_count) +tioce_dma(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma_flags) { - return tioce_do_dma_map(pdev, paddr, byte_count, 0); + return tioce_do_dma_map(pdev, paddr, byte_count, 0, dma_flags); } /** @@ -636,9 +657,9 @@ tioce_dma(struct pci_dev *pdev, u64 paddr, size_t byte_count) * Simply call tioce_do_dma_map() to create a map with the barrier bit set * in the address. */ static u64 -tioce_dma_consistent(struct pci_dev *pdev, u64 paddr, size_t byte_count) +tioce_dma_consistent(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma_flags) { - return tioce_do_dma_map(pdev, paddr, byte_count, 1); + return tioce_do_dma_map(pdev, paddr, byte_count, 1, dma_flags); } /** @@ -696,7 +717,7 @@ tioce_reserve_m32(struct tioce_kernel *ce_kern, u64 base, u64 limit) while (ate_index <= last_ate) { u64 ate; - ate = ATE_MAKE(0xdeadbeef, ps); + ate = ATE_MAKE(0xdeadbeef, ps, 0); ce_kern->ce_ate3240_shadow[ate_index] = ate; tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_ate3240[ate_index], ate); diff --git a/drivers/pci/msi-altix.c b/drivers/pci/msi-altix.c index 9bd240602c1e..bed4183a5e39 100644 --- a/drivers/pci/msi-altix.c +++ b/drivers/pci/msi-altix.c @@ -6,13 +6,205 @@ * Copyright (C) 2006 Silicon Graphics, Inc. All Rights Reserved. */ -#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "msi.h" + +struct sn_msi_info { + u64 pci_addr; + struct sn_irq_info *sn_irq_info; +}; + +static struct sn_msi_info *sn_msi_info; + +static void +sn_msi_teardown(unsigned int vector) +{ + nasid_t nasid; + int widget; + struct pci_dev *pdev; + struct pcidev_info *sn_pdev; + struct sn_irq_info *sn_irq_info; + struct pcibus_bussoft *bussoft; + struct sn_pcibus_provider *provider; + + sn_irq_info = sn_msi_info[vector].sn_irq_info; + if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0) + return; + + sn_pdev = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + pdev = sn_pdev->pdi_linux_pcidev; + provider = SN_PCIDEV_BUSPROVIDER(pdev); + + (*provider->dma_unmap)(pdev, + sn_msi_info[vector].pci_addr, + PCI_DMA_FROMDEVICE); + sn_msi_info[vector].pci_addr = 0; + + bussoft = SN_PCIDEV_BUSSOFT(pdev); + nasid = NASID_GET(bussoft->bs_base); + widget = (nasid & 1) ? + TIO_SWIN_WIDGETNUM(bussoft->bs_base) : + SWIN_WIDGETNUM(bussoft->bs_base); + + sn_intr_free(nasid, widget, sn_irq_info); + sn_msi_info[vector].sn_irq_info = NULL; + + return; +} int -sn_msi_init(void) +sn_msi_setup(struct pci_dev *pdev, unsigned int vector, + u32 *addr_hi, u32 *addr_lo, u32 *data) { + int widget; + int status; + nasid_t nasid; + u64 bus_addr; + struct sn_irq_info *sn_irq_info; + struct pcibus_bussoft *bussoft = SN_PCIDEV_BUSSOFT(pdev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); + + if (bussoft == NULL) + return -EINVAL; + + if (provider == NULL || provider->dma_map_consistent == NULL) + return -EINVAL; + + /* + * Set up the vector plumbing. Let the prom (via sn_intr_alloc) + * decide which cpu to direct this msi at by default. + */ + + nasid = NASID_GET(bussoft->bs_base); + widget = (nasid & 1) ? + TIO_SWIN_WIDGETNUM(bussoft->bs_base) : + SWIN_WIDGETNUM(bussoft->bs_base); + + sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL); + if (! sn_irq_info) + return -ENOMEM; + + status = sn_intr_alloc(nasid, widget, sn_irq_info, vector, -1, -1); + if (status) { + kfree(sn_irq_info); + return -ENOMEM; + } + + sn_irq_info->irq_int_bit = -1; /* mark this as an MSI irq */ + sn_irq_fixup(pdev, sn_irq_info); + + /* Prom probably should fill these in, but doesn't ... */ + sn_irq_info->irq_bridge_type = bussoft->bs_asic_type; + sn_irq_info->irq_bridge = (void *)bussoft->bs_base; + /* - * return error until MSI is supported on altix platforms + * Map the xio address into bus space */ - return -EINVAL; + bus_addr = (*provider->dma_map_consistent)(pdev, + sn_irq_info->irq_xtalkaddr, + sizeof(sn_irq_info->irq_xtalkaddr), + SN_DMA_MSI|SN_DMA_ADDR_XIO); + if (! bus_addr) { + sn_intr_free(nasid, widget, sn_irq_info); + kfree(sn_irq_info); + return -ENOMEM; + } + + sn_msi_info[vector].sn_irq_info = sn_irq_info; + sn_msi_info[vector].pci_addr = bus_addr; + + *addr_hi = (u32)(bus_addr >> 32); + *addr_lo = (u32)(bus_addr & 0x00000000ffffffff); + + /* + * In the SN platform, bit 16 is a "send vector" bit which + * must be present in order to move the vector through the system. + */ + *data = 0x100 + (unsigned int)vector; + +#ifdef CONFIG_SMP + set_irq_affinity_info((vector & 0xff), sn_irq_info->irq_cpuid, 0); +#endif + + return 0; +} + +static void +sn_msi_target(unsigned int vector, unsigned int cpu, + u32 *addr_hi, u32 *addr_lo) +{ + int slice; + nasid_t nasid; + u64 bus_addr; + struct pci_dev *pdev; + struct pcidev_info *sn_pdev; + struct sn_irq_info *sn_irq_info; + struct sn_irq_info *new_irq_info; + struct sn_pcibus_provider *provider; + + sn_irq_info = sn_msi_info[vector].sn_irq_info; + if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0) + return; + + /* + * Release XIO resources for the old MSI PCI address + */ + + sn_pdev = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + pdev = sn_pdev->pdi_linux_pcidev; + provider = SN_PCIDEV_BUSPROVIDER(pdev); + + bus_addr = (u64)(*addr_hi) << 32 | (u64)(*addr_lo); + (*provider->dma_unmap)(pdev, bus_addr, PCI_DMA_FROMDEVICE); + sn_msi_info[vector].pci_addr = 0; + + nasid = cpuid_to_nasid(cpu); + slice = cpuid_to_slice(cpu); + + new_irq_info = sn_retarget_vector(sn_irq_info, nasid, slice); + sn_msi_info[vector].sn_irq_info = new_irq_info; + if (new_irq_info == NULL) + return; + + /* + * Map the xio address into bus space + */ + + bus_addr = (*provider->dma_map_consistent)(pdev, + new_irq_info->irq_xtalkaddr, + sizeof(new_irq_info->irq_xtalkaddr), + SN_DMA_MSI|SN_DMA_ADDR_XIO); + + sn_msi_info[vector].pci_addr = bus_addr; + *addr_hi = (u32)(bus_addr >> 32); + *addr_lo = (u32)(bus_addr & 0x00000000ffffffff); +} + +struct msi_ops sn_msi_ops = { + .setup = sn_msi_setup, + .teardown = sn_msi_teardown, +#ifdef CONFIG_SMP + .target = sn_msi_target, +#endif +}; + +int +sn_msi_init(void) +{ + sn_msi_info = + kzalloc(sizeof(struct sn_msi_info) * NR_VECTORS, GFP_KERNEL); + if (! sn_msi_info) + return -ENOMEM; + + msi_register(&sn_msi_ops); + return 0; } diff --git a/include/asm-ia64/sn/intr.h b/include/asm-ia64/sn/intr.h index 60a51a406eec..12b54ddb06be 100644 --- a/include/asm-ia64/sn/intr.h +++ b/include/asm-ia64/sn/intr.h @@ -10,6 +10,7 @@ #define _ASM_IA64_SN_INTR_H #include +#include #define SGI_UART_VECTOR 0xe9 @@ -40,6 +41,7 @@ struct sn_irq_info { int irq_cpuid; /* kernel logical cpuid */ int irq_irq; /* the IRQ number */ int irq_int_bit; /* Bridge interrupt pin */ + /* <0 means MSI */ u64 irq_xtalkaddr; /* xtalkaddr IRQ is sent to */ int irq_bridge_type;/* pciio asic type (pciio.h) */ void *irq_bridge; /* bridge generating irq */ @@ -53,6 +55,12 @@ struct sn_irq_info { }; extern void sn_send_IPI_phys(int, long, int, int); +extern u64 sn_intr_alloc(nasid_t, int, + struct sn_irq_info *, + int, nasid_t, int); +extern void sn_intr_free(nasid_t, int, struct sn_irq_info *); +extern struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *, nasid_t, int); +extern struct list_head **sn_irq_lh; #define CPU_VECTOR_TO_IRQ(cpuid,vector) (vector) diff --git a/include/asm-ia64/sn/pcibr_provider.h b/include/asm-ia64/sn/pcibr_provider.h index 51260ab70d91..e3b0c3fe5eed 100644 --- a/include/asm-ia64/sn/pcibr_provider.h +++ b/include/asm-ia64/sn/pcibr_provider.h @@ -55,6 +55,7 @@ #define PCI32_ATE_V (0x1 << 0) #define PCI32_ATE_CO (0x1 << 1) #define PCI32_ATE_PREC (0x1 << 2) +#define PCI32_ATE_MSI (0x1 << 2) #define PCI32_ATE_PREF (0x1 << 3) #define PCI32_ATE_BAR (0x1 << 4) #define PCI32_ATE_ADDR_SHFT 12 @@ -117,8 +118,8 @@ struct pcibus_info { extern int pcibr_init_provider(void); extern void *pcibr_bus_fixup(struct pcibus_bussoft *, struct pci_controller *); -extern dma_addr_t pcibr_dma_map(struct pci_dev *, unsigned long, size_t); -extern dma_addr_t pcibr_dma_map_consistent(struct pci_dev *, unsigned long, size_t); +extern dma_addr_t pcibr_dma_map(struct pci_dev *, unsigned long, size_t, int type); +extern dma_addr_t pcibr_dma_map_consistent(struct pci_dev *, unsigned long, size_t, int type); extern void pcibr_dma_unmap(struct pci_dev *, dma_addr_t, int); /* diff --git a/include/asm-ia64/sn/pcibus_provider_defs.h b/include/asm-ia64/sn/pcibus_provider_defs.h index ce3f6c328241..8f7c83d0f6d3 100644 --- a/include/asm-ia64/sn/pcibus_provider_defs.h +++ b/include/asm-ia64/sn/pcibus_provider_defs.h @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H #define _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H @@ -45,13 +45,24 @@ struct pci_controller; */ struct sn_pcibus_provider { - dma_addr_t (*dma_map)(struct pci_dev *, unsigned long, size_t); - dma_addr_t (*dma_map_consistent)(struct pci_dev *, unsigned long, size_t); + dma_addr_t (*dma_map)(struct pci_dev *, unsigned long, size_t, int flags); + dma_addr_t (*dma_map_consistent)(struct pci_dev *, unsigned long, size_t, int flags); void (*dma_unmap)(struct pci_dev *, dma_addr_t, int); void * (*bus_fixup)(struct pcibus_bussoft *, struct pci_controller *); void (*force_interrupt)(struct sn_irq_info *); void (*target_interrupt)(struct sn_irq_info *); }; +/* + * Flags used by the map interfaces + * bits 3:0 specifies format of passed in address + * bit 4 specifies that address is to be used for MSI + */ + +#define SN_DMA_ADDRTYPE(x) ((x) & 0xf) +#define SN_DMA_ADDR_PHYS 1 /* address is an xio address. */ +#define SN_DMA_ADDR_XIO 2 /* address is phys memory */ +#define SN_DMA_MSI 0x10 /* Bus address is to be used for MSI */ + extern struct sn_pcibus_provider *sn_pci_provider[]; #endif /* _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H */ diff --git a/include/asm-ia64/sn/tiocp.h b/include/asm-ia64/sn/tiocp.h index f47c08ab483c..e8ad0bb5b6c5 100644 --- a/include/asm-ia64/sn/tiocp.h +++ b/include/asm-ia64/sn/tiocp.h @@ -3,13 +3,14 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2003-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2003-2005 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_IA64_SN_PCI_TIOCP_H #define _ASM_IA64_SN_PCI_TIOCP_H #define TIOCP_HOST_INTR_ADDR 0x003FFFFFFFFFFFFFUL #define TIOCP_PCI64_CMDTYPE_MEM (0x1ull << 60) +#define TIOCP_PCI64_CMDTYPE_MSI (0x3ull << 60) /***************************************************************************** -- cgit v1.2.3 From a1d7057727bc9e20a0a417812f538fe6b8a02c03 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Wed, 31 May 2006 08:58:08 -0500 Subject: [IA64-SGI] SN topology fix for large systems There is an SN bug in sn_hwperf.c that affects systems with 1024n or 1024p. The bug manifests itself 2 ways: IO interrupts are not always targeted to the nearest node, and 2) the "cat /proc/sgi_sn/sn_topology" commands fails with "cannot allocate memory". The code is using the wrong macros for validating node numbers. Signed-off-by: Jack Steiner Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/sn2/sn_hwperf.c | 50 ++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 23 deletions(-) (limited to 'arch/ia64/sn') diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 739c948dc504..9a8a29339d2d 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -51,6 +51,8 @@ static nasid_t sn_hwperf_master_nasid = INVALID_NASID; static int sn_hwperf_init(void); static DECLARE_MUTEX(sn_hwperf_init_mutex); +#define cnode_possible(n) ((n) < num_cnodes) + static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret) { int e; @@ -127,14 +129,14 @@ static int sn_hwperf_geoid_to_cnode(char *location) } } - return node_possible(cnode) ? cnode : -1; + return cnode_possible(cnode) ? cnode : -1; } static int sn_hwperf_obj_to_cnode(struct sn_hwperf_object_info * obj) { if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)) BUG(); - if (!obj->sn_hwp_this_part) + if (SN_HWPERF_FOREIGN(obj)) return -1; return sn_hwperf_geoid_to_cnode(obj->location); } @@ -199,12 +201,12 @@ static void print_pci_topology(struct seq_file *s) static inline int sn_hwperf_has_cpus(cnodeid_t node) { - return node_online(node) && nr_cpus_node(node); + return node < MAX_NUMNODES && node_online(node) && nr_cpus_node(node); } static inline int sn_hwperf_has_mem(cnodeid_t node) { - return node_online(node) && NODE_DATA(node)->node_present_pages; + return node < MAX_NUMNODES && node_online(node) && NODE_DATA(node)->node_present_pages; } static struct sn_hwperf_object_info * @@ -237,7 +239,7 @@ static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objb int found_mem = 0; int found_cpu = 0; - if (!node_possible(node)) + if (!cnode_possible(node)) return -EINVAL; if (sn_hwperf_has_cpus(node)) { @@ -442,7 +444,7 @@ static int sn_topology_show(struct seq_file *s, void *d) seq_printf(s, "%s %d %s %s asic %s", slabname, ordinal, obj->location, obj->sn_hwp_this_part ? "local" : "shared", obj->name); - if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)) + if (ordinal < 0 || (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj))) seq_putc(s, '\n'); else { cnodeid_t near_mem = -1; @@ -468,22 +470,24 @@ static int sn_topology_show(struct seq_file *s, void *d) /* * CPUs on this node, if any */ - cpumask = node_to_cpumask(ordinal); - for_each_online_cpu(i) { - if (cpu_isset(i, cpumask)) { - slice = 'a' + cpuid_to_slice(i); - c = cpu_data(i); - seq_printf(s, "cpu %d %s%c local" - " freq %luMHz, arch ia64", - i, obj->location, slice, - c->proc_freq / 1000000); - for_each_online_cpu(j) { - seq_printf(s, j ? ":%d" : ", dist %d", - node_distance( - cpu_to_node(i), - cpu_to_node(j))); + if (!SN_HWPERF_IS_IONODE(obj)) { + cpumask = node_to_cpumask(ordinal); + for_each_online_cpu(i) { + if (cpu_isset(i, cpumask)) { + slice = 'a' + cpuid_to_slice(i); + c = cpu_data(i); + seq_printf(s, "cpu %d %s%c local" + " freq %luMHz, arch ia64", + i, obj->location, slice, + c->proc_freq / 1000000); + for_each_online_cpu(j) { + seq_printf(s, j ? ":%d" : ", dist %d", + node_distance( + cpu_to_node(i), + cpu_to_node(j))); + } + seq_putc(s, '\n'); } - seq_putc(s, '\n'); } } } @@ -523,7 +527,7 @@ static int sn_topology_show(struct seq_file *s, void *d) if (obj->sn_hwp_this_part && p->sn_hwp_this_part) /* both ends local to this partition */ seq_puts(s, " local"); - else if (!obj->sn_hwp_this_part && !p->sn_hwp_this_part) + else if (SN_HWPERF_FOREIGN(p)) /* both ends of the link in foreign partiton */ seq_puts(s, " foreign"); else @@ -776,7 +780,7 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg) case SN_HWPERF_GET_NODE_NASID: if (a.sz != sizeof(u64) || - (node = a.arg) < 0 || !node_possible(node)) { + (node = a.arg) < 0 || !cnode_possible(node)) { r = -EINVAL; goto error; } -- cgit v1.2.3 From f640f94ec4f39e8a4d91d58354d3e09b28769edf Mon Sep 17 00:00:00 2001 From: Mike Habeck Date: Mon, 19 Jun 2006 15:38:10 -0500 Subject: [IA64-SGI] fix SGI Altix tioce_bus_fixup() bug The following patch fixes a bug in the SGI Altix tioce_bus_fixup() code. ce_dre_comp_err_addr needs to be zero'd out not ~0ULL. As a result completion errors weren't being captured. Signed-off-by: Mike Habeck Signed-off-by: Tony Luck --- arch/ia64/sn/pci/tioce_provider.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/ia64/sn') diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c index 833295624e5d..85f3b3d4c606 100644 --- a/arch/ia64/sn/pci/tioce_provider.c +++ b/arch/ia64/sn/pci/tioce_provider.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2003-2005 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (C) 2003-2006 Silicon Graphics, Inc. All Rights Reserved. */ #include @@ -1002,7 +1002,7 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_int_status_alias, ~0ULL); tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_error_summary_alias, ~0ULL); - tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, ~0ULL); + tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, 0ULL); if (request_irq(SGI_PCIASIC_ERROR, tioce_error_intr_handler, -- cgit v1.2.3 From 762834e8bf46bf41ce9034d062a7c1f8563175f3 Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Fri, 23 Jun 2006 02:03:19 -0700 Subject: [PATCH] Unify pxm_to_node() and node_to_pxm() Consolidate the various arch-specific implementations of pxm_to_node() and node_to_pxm() into a single generic version. Signed-off-by: Yasunori Goto Cc: "Luck, Tony" Cc: Andi Kleen Cc: Dave Hansen Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/Kconfig | 6 ++++++ arch/i386/kernel/srat.c | 19 ++-------------- arch/ia64/hp/common/sba_iommu.c | 2 +- arch/ia64/kernel/acpi.c | 24 +++++++-------------- arch/ia64/pci/pci.c | 2 +- arch/ia64/sn/kernel/setup.c | 4 ++-- arch/x86_64/mm/srat.c | 33 +--------------------------- drivers/acpi/Kconfig | 2 +- drivers/acpi/numa.c | 48 +++++++++++++++++++++++++++++++++++++++++ include/acpi/acpi_numa.h | 23 ++++++++++++++++++++ include/asm-x86_64/numa.h | 1 - include/linux/acpi.h | 9 ++++++++ 12 files changed, 102 insertions(+), 71 deletions(-) create mode 100644 include/acpi/acpi_numa.h (limited to 'arch/ia64/sn') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 8dfa3054f10f..15d23da2455f 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -173,6 +173,12 @@ config ACPI_SRAT bool default y depends on NUMA && (X86_SUMMIT || X86_GENERICARCH) + select ACPI_NUMA + +config HAVE_ARCH_PARSE_SRAT + bool + default y + depends on ACPI_SRAT config X86_SUMMIT_NUMA bool diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c index 52b3ed5d2cb5..989c85255dbe 100644 --- a/arch/i386/kernel/srat.c +++ b/arch/i386/kernel/srat.c @@ -39,7 +39,6 @@ #define NODE_ARRAY_OFFSET(x) ((x) % 8) /* 8 bits/char */ #define BMAP_SET(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit)) #define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit))) -#define MAX_PXM_DOMAINS 256 /* 1 byte and no promises about values */ /* bitmap length; _PXM is at most 255 */ #define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ @@ -213,19 +212,11 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c node_end_pfn[nid] = memory_chunk->end_pfn; } -static u8 pxm_to_nid_map[MAX_PXM_DOMAINS];/* _PXM to logical node ID map */ - -int pxm_to_node(int pxm) -{ - return pxm_to_nid_map[pxm]; -} - /* Parse the ACPI Static Resource Affinity Table */ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) { u8 *start, *end, *p; int i, j, nid; - u8 nid_to_pxm_map[MAX_NUMNODES];/* logical node ID to _PXM map */ start = (u8 *)(&(sratp->reserved) + 1); /* skip header */ p = start; @@ -235,10 +226,6 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) memset(node_memory_chunk, 0, sizeof(node_memory_chunk)); memset(zholes_size, 0, sizeof(zholes_size)); - /* -1 in these maps means not available */ - memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map)); - memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map)); - num_memory_chunks = 0; while (p < end) { switch (*p) { @@ -278,9 +265,7 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) nodes_clear(node_online_map); for (i = 0; i < MAX_PXM_DOMAINS; i++) { if (BMAP_TEST(pxm_bitmap, i)) { - nid = num_online_nodes(); - pxm_to_nid_map[i] = nid; - nid_to_pxm_map[nid] = i; + int nid = acpi_map_pxm_to_node(i); node_set_online(nid); } } @@ -288,7 +273,7 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) /* set cnode id in memory chunk structure */ for (i = 0; i < num_memory_chunks; i++) - node_memory_chunk[i].nid = pxm_to_nid_map[node_memory_chunk[i].pxm]; + node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm); printk("pxm bitmap: "); for (i = 0; i < sizeof(pxm_bitmap); i++) { diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index bdccd0b1eb60..3ce443e6c016 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -1958,7 +1958,7 @@ sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) if (pxm < 0) return; - node = pxm_to_nid_map[pxm]; + node = pxm_to_node(pxm); if (node >= MAX_NUMNODES || !node_online(node)) return; diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 58c93a30348c..d1c52cf67882 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -415,9 +415,6 @@ static int __initdata srat_num_cpus; /* number of cpus */ static u32 __devinitdata pxm_flag[PXM_FLAG_LEN]; #define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag)) #define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag)) -/* maps to convert between proximity domain and logical node ID */ -int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS]; -int __initdata nid_to_pxm_map[MAX_NUMNODES]; static struct acpi_table_slit __initdata *slit_table; static int get_processor_proximity_domain(struct acpi_table_processor_affinity *pa) @@ -533,22 +530,17 @@ void __init acpi_numa_arch_fixup(void) * MCD - This can probably be dropped now. No need for pxm ID to node ID * mapping with sparse node numbering iff MAX_PXM_DOMAINS <= MAX_NUMNODES. */ - /* calculate total number of nodes in system from PXM bitmap */ - memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map)); - memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map)); nodes_clear(node_online_map); for (i = 0; i < MAX_PXM_DOMAINS; i++) { if (pxm_bit_test(i)) { - int nid = num_online_nodes(); - pxm_to_nid_map[i] = nid; - nid_to_pxm_map[nid] = i; + int nid = acpi_map_pxm_to_node(i); node_set_online(nid); } } /* set logical node id in memory chunk structure */ for (i = 0; i < num_node_memblks; i++) - node_memblk[i].nid = pxm_to_nid_map[node_memblk[i].nid]; + node_memblk[i].nid = pxm_to_node(node_memblk[i].nid); /* assign memory bank numbers for each chunk on each node */ for_each_online_node(i) { @@ -562,7 +554,7 @@ void __init acpi_numa_arch_fixup(void) /* set logical node id in cpu structure */ for (i = 0; i < srat_num_cpus; i++) - node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid]; + node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid); printk(KERN_INFO "Number of logical nodes in system = %d\n", num_online_nodes()); @@ -575,11 +567,11 @@ void __init acpi_numa_arch_fixup(void) for (i = 0; i < slit_table->localities; i++) { if (!pxm_bit_test(i)) continue; - node_from = pxm_to_nid_map[i]; + node_from = pxm_to_node(i); for (j = 0; j < slit_table->localities; j++) { if (!pxm_bit_test(j)) continue; - node_to = pxm_to_nid_map[j]; + node_to = pxm_to_node(j); node_distance(node_from, node_to) = slit_table->entry[i * slit_table->localities + j]; } @@ -785,9 +777,9 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid) /* * Assuming that the container driver would have set the proximity - * domain and would have initialized pxm_to_nid_map[pxm_id] && pxm_flag + * domain and would have initialized pxm_to_node(pxm_id) && pxm_flag */ - node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_nid_map[pxm_id]; + node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_node(pxm_id); node_cpuid[cpu].phys_id = physid; #endif @@ -966,7 +958,7 @@ acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret) if (pxm < 0) return AE_OK; - node = pxm_to_nid_map[pxm]; + node = pxm_to_node(pxm); if (node >= MAX_NUMNODES || !node_online(node) || cpus_empty(node_to_cpumask(node))) diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index ab829a22f8a4..cf7751b99d1c 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -352,7 +352,7 @@ pci_acpi_scan_root(struct acpi_device *device, int domain, int bus) pxm = acpi_get_pxm(controller->acpi_handle); #ifdef CONFIG_NUMA if (pxm >= 0) - controller->node = pxm_to_nid_map[pxm]; + controller->node = pxm_to_node(pxm); #endif acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window, diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 30988dfbddff..93577abae36d 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -139,7 +139,7 @@ static int __init pxm_to_nasid(int pxm) int i; int nid; - nid = pxm_to_nid_map[pxm]; + nid = pxm_to_node(pxm); for (i = 0; i < num_node_memblks; i++) { if (node_memblk[i].nid == nid) { return NASID_GET(node_memblk[i].start_paddr); @@ -704,7 +704,7 @@ void __init build_cnode_tables(void) * cnode == node for all C & M bricks. */ for_each_online_node(node) { - nasid = pxm_to_nasid(nid_to_pxm_map[node]); + nasid = pxm_to_nasid(node_to_pxm(node)); sn_cnodeid_to_nasid[node] = nasid; physical_node_map[nasid] = node; } diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c index 474df22c6ed2..502fce65e96a 100644 --- a/arch/x86_64/mm/srat.c +++ b/arch/x86_64/mm/srat.c @@ -30,7 +30,6 @@ static struct acpi_table_slit *acpi_slit; static nodemask_t nodes_parsed __initdata; -static nodemask_t nodes_found __initdata; static struct bootnode nodes[MAX_NUMNODES] __initdata; static struct bootnode nodes_add[MAX_NUMNODES] __initdata; static int found_add_area __initdata; @@ -38,33 +37,14 @@ int hotadd_percent __initdata = 0; #ifndef RESERVE_HOTADD #define hotadd_percent 0 /* Ignore all settings */ #endif -static u8 pxm2node[256] = { [0 ... 255] = 0xff }; /* Too small nodes confuse the VM badly. Usually they result from BIOS bugs. */ #define NODE_MIN_SIZE (4*1024*1024) -static int node_to_pxm(int n); - -int pxm_to_node(int pxm) -{ - if ((unsigned)pxm >= 256) - return -1; - /* Extend 0xff to (int)-1 */ - return (signed char)pxm2node[pxm]; -} - static __init int setup_node(int pxm) { - unsigned node = pxm2node[pxm]; - if (node == 0xff) { - if (nodes_weight(nodes_found) >= MAX_NUMNODES) - return -1; - node = first_unset_node(nodes_found); - node_set(node, nodes_found); - pxm2node[pxm] = node; - } - return pxm2node[pxm]; + return acpi_map_pxm_to_node(pxm); } static __init int conflicting_nodes(unsigned long start, unsigned long end) @@ -440,17 +420,6 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) return 0; } -static int node_to_pxm(int n) -{ - int i; - if (pxm2node[n] == n) - return n; - for (i = 0; i < 256; i++) - if (pxm2node[i] == n) - return i; - return 0; -} - void __init srat_reserve_add_area(int nodeid) { if (found_add_area && nodes_add[nodeid].end) { diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index c24652d31bf9..230c53852231 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -162,7 +162,7 @@ config ACPI_THERMAL config ACPI_NUMA bool "NUMA support" depends on NUMA - depends on (IA64 || X86_64) + depends on (X86 || IA64) default y if IA64_GENERIC || IA64_SGI_SN2 config ACPI_ASUS diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 64b98e82feb7..e2c1a16078c9 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -36,12 +36,60 @@ #define _COMPONENT ACPI_NUMA ACPI_MODULE_NAME("numa") +static nodemask_t nodes_found_map = NODE_MASK_NONE; +#define PXM_INVAL -1 +#define NID_INVAL -1 + +/* maps to convert between proximity domain and logical node ID */ +int __cpuinitdata pxm_to_node_map[MAX_PXM_DOMAINS] + = { [0 ... MAX_PXM_DOMAINS - 1] = NID_INVAL }; +int __cpuinitdata node_to_pxm_map[MAX_NUMNODES] + = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL }; + extern int __init acpi_table_parse_madt_family(enum acpi_table_id id, unsigned long madt_size, int entry_id, acpi_madt_entry_handler handler, unsigned int max_entries); +int __cpuinit pxm_to_node(int pxm) +{ + if (pxm < 0) + return NID_INVAL; + return pxm_to_node_map[pxm]; +} + +int __cpuinit node_to_pxm(int node) +{ + if (node < 0) + return PXM_INVAL; + return node_to_pxm_map[node]; +} + +int __cpuinit acpi_map_pxm_to_node(int pxm) +{ + int node = pxm_to_node_map[pxm]; + + if (node < 0){ + if (nodes_weight(nodes_found_map) >= MAX_NUMNODES) + return NID_INVAL; + node = first_unset_node(nodes_found_map); + pxm_to_node_map[pxm] = node; + node_to_pxm_map[node] = pxm; + node_set(node, nodes_found_map); + } + + return node; +} + +void __cpuinit acpi_unmap_pxm_to_node(int node) +{ + int pxm = node_to_pxm_map[node]; + pxm_to_node_map[pxm] = NID_INVAL; + node_to_pxm_map[node] = PXM_INVAL; + node_clear(node, nodes_found_map); +} + void __init acpi_table_print_srat_entry(acpi_table_entry_header * header) { diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h new file mode 100644 index 000000000000..1049f2a0a6db --- /dev/null +++ b/include/acpi/acpi_numa.h @@ -0,0 +1,23 @@ +#ifndef __ACPI_NUMA_H +#define __ACPI_NUMA_H + +#ifdef CONFIG_ACPI_NUMA +#include + +/* Proximity bitmap length */ +#if MAX_NUMNODES > 256 +#define MAX_PXM_DOMAINS MAX_NUMNODES +#else +#define MAX_PXM_DOMAINS (256) /* Old pxm spec is defined 8 bit */ +#endif + +extern int __cpuinitdata pxm_to_node_map[MAX_PXM_DOMAINS]; +extern int __cpuinitdata node_to_pxm_map[MAX_NUMNODES]; + +extern int __cpuinit pxm_to_node(int); +extern int __cpuinit node_to_pxm(int); +extern int __cpuinit acpi_map_pxm_to_node(int); +extern void __cpuinit acpi_unmap_pxm_to_node(int); + +#endif /* CONFIG_ACPI_NUMA */ +#endif /* __ACP_NUMA_H */ diff --git a/include/asm-x86_64/numa.h b/include/asm-x86_64/numa.h index 1cc92fe02503..933ff11ece15 100644 --- a/include/asm-x86_64/numa.h +++ b/include/asm-x86_64/numa.h @@ -8,7 +8,6 @@ struct bootnode { }; extern int compute_hash_shift(struct bootnode *nodes, int numnodes); -extern int pxm_to_node(int nid); #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 1cf0b91d05bd..90d6df1551ed 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -37,6 +37,7 @@ #include #include #include +#include #include @@ -407,10 +408,18 @@ void acpi_table_print_madt_entry (acpi_table_entry_header *madt); void acpi_table_print_srat_entry (acpi_table_entry_header *srat); /* the following four functions are architecture-dependent */ +#ifdef CONFIG_HAVE_ARCH_PARSE_SRAT +#define NR_NODE_MEMBLKS MAX_NUMNODES +#define acpi_numa_slit_init(slit) do {} while (0) +#define acpi_numa_processor_affinity_init(pa) do {} while (0) +#define acpi_numa_memory_affinity_init(ma) do {} while (0) +#define acpi_numa_arch_fixup() do {} while (0) +#else void acpi_numa_slit_init (struct acpi_table_slit *slit); void acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa); void acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma); void acpi_numa_arch_fixup(void); +#endif #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ -- cgit v1.2.3 From 929f97276bcf7f4a95272ed08a85339b98ba210d Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Fri, 23 Jun 2006 02:03:21 -0700 Subject: [PATCH] change gen_pool allocator to not touch managed memory Modify the gen_pool allocator (lib/genalloc.c) to utilize a bitmap scheme instead of the buddy scheme. The purpose of this change is to eliminate the touching of the actual memory being allocated. Since the change modifies the interface, a change to the uncached allocator (arch/ia64/kernel/uncached.c) is also required. Both Andrey Volkov and Jes Sorenson have expressed a desire that the gen_pool allocator not write to the memory being managed. See the following: http://marc.theaimsgroup.com/?l=linux-kernel&m=113518602713125&w=2 http://marc.theaimsgroup.com/?l=linux-kernel&m=113533568827916&w=2 Signed-off-by: Dean Nelson Cc: Andrey Volkov Acked-by: Jes Sorensen Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/uncached.c | 200 +++++++++++++++--------------- arch/ia64/sn/kernel/sn2/cache.c | 15 ++- include/linux/genalloc.h | 35 +++--- lib/genalloc.c | 263 ++++++++++++++++++---------------------- 4 files changed, 252 insertions(+), 261 deletions(-) (limited to 'arch/ia64/sn') diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c index fcd2bad0286f..5f03b9e524dd 100644 --- a/arch/ia64/kernel/uncached.c +++ b/arch/ia64/kernel/uncached.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2001-2005 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2001-2006 Silicon Graphics, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -29,15 +29,8 @@ #include #include -#define DEBUG 0 -#if DEBUG -#define dprintk printk -#else -#define dprintk(x...) do { } while (0) -#endif - -void __init efi_memmap_walk_uc (efi_freemem_callback_t callback); +extern void __init efi_memmap_walk_uc(efi_freemem_callback_t, void *); #define MAX_UNCACHED_GRANULES 5 static int allocated_granules; @@ -60,6 +53,7 @@ static void uncached_ipi_visibility(void *data) static void uncached_ipi_mc_drain(void *data) { int status; + status = ia64_pal_mc_drain(); if (status) printk(KERN_WARNING "ia64_pal_mc_drain() failed with %i on " @@ -67,30 +61,35 @@ static void uncached_ipi_mc_drain(void *data) } -static unsigned long -uncached_get_new_chunk(struct gen_pool *poolp) +/* + * Add a new chunk of uncached memory pages to the specified pool. + * + * @pool: pool to add new chunk of uncached memory to + * @nid: node id of node to allocate memory from, or -1 + * + * This is accomplished by first allocating a granule of cached memory pages + * and then converting them to uncached memory pages. + */ +static int uncached_add_chunk(struct gen_pool *pool, int nid) { struct page *page; - void *tmp; int status, i; - unsigned long addr, node; + unsigned long c_addr, uc_addr; if (allocated_granules >= MAX_UNCACHED_GRANULES) - return 0; + return -1; + + /* attempt to allocate a granule's worth of cached memory pages */ - node = poolp->private; - page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, + page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO, IA64_GRANULE_SHIFT-PAGE_SHIFT); + if (!page) + return -1; - dprintk(KERN_INFO "get_new_chunk page %p, addr %lx\n", - page, (unsigned long)(page-vmem_map) << PAGE_SHIFT); + /* convert the memory pages from cached to uncached */ - /* - * Do magic if no mem on local node! XXX - */ - if (!page) - return 0; - tmp = page_address(page); + c_addr = (unsigned long)page_address(page); + uc_addr = c_addr - PAGE_OFFSET + __IA64_UNCACHED_OFFSET; /* * There's a small race here where it's possible for someone to @@ -100,76 +99,90 @@ uncached_get_new_chunk(struct gen_pool *poolp) for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++) SetPageUncached(&page[i]); - flush_tlb_kernel_range(tmp, tmp + IA64_GRANULE_SIZE); + flush_tlb_kernel_range(uc_addr, uc_adddr + IA64_GRANULE_SIZE); status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); - - dprintk(KERN_INFO "pal_prefetch_visibility() returns %i on cpu %i\n", - status, raw_smp_processor_id()); - if (!status) { status = smp_call_function(uncached_ipi_visibility, NULL, 0, 1); if (status) - printk(KERN_WARNING "smp_call_function failed for " - "uncached_ipi_visibility! (%i)\n", status); + goto failed; } + preempt_disable(); + if (ia64_platform_is("sn2")) - sn_flush_all_caches((unsigned long)tmp, IA64_GRANULE_SIZE); + sn_flush_all_caches(uc_addr, IA64_GRANULE_SIZE); else - flush_icache_range((unsigned long)tmp, - (unsigned long)tmp+IA64_GRANULE_SIZE); + flush_icache_range(uc_addr, uc_addr + IA64_GRANULE_SIZE); + + /* flush the just introduced uncached translation from the TLB */ + local_flush_tlb_all(); + + preempt_enable(); ia64_pal_mc_drain(); status = smp_call_function(uncached_ipi_mc_drain, NULL, 0, 1); if (status) - printk(KERN_WARNING "smp_call_function failed for " - "uncached_ipi_mc_drain! (%i)\n", status); + goto failed; - addr = (unsigned long)tmp - PAGE_OFFSET + __IA64_UNCACHED_OFFSET; + /* + * The chunk of memory pages has been converted to uncached so now we + * can add it to the pool. + */ + status = gen_pool_add(pool, uc_addr, IA64_GRANULE_SIZE, nid); + if (status) + goto failed; allocated_granules++; - return addr; + return 0; + + /* failed to convert or add the chunk so give it back to the kernel */ +failed: + for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++) + ClearPageUncached(&page[i]); + + free_pages(c_addr, IA64_GRANULE_SHIFT-PAGE_SHIFT); + return -1; } /* * uncached_alloc_page * + * @starting_nid: node id of node to start with, or -1 + * * Allocate 1 uncached page. Allocates on the requested node. If no * uncached pages are available on the requested node, roundrobin starting - * with higher nodes. + * with the next higher node. */ -unsigned long -uncached_alloc_page(int nid) +unsigned long uncached_alloc_page(int starting_nid) { - unsigned long maddr; + unsigned long uc_addr; + struct gen_pool *pool; + int nid; - maddr = gen_pool_alloc(uncached_pool[nid], PAGE_SIZE); + if (unlikely(starting_nid >= MAX_NUMNODES)) + return 0; - dprintk(KERN_DEBUG "uncached_alloc_page returns %lx on node %i\n", - maddr, nid); + if (starting_nid < 0) + starting_nid = numa_node_id(); + nid = starting_nid; - /* - * If no memory is availble on our local node, try the - * remaining nodes in the system. - */ - if (!maddr) { - int i; - - for (i = MAX_NUMNODES - 1; i >= 0; i--) { - if (i == nid || !node_online(i)) - continue; - maddr = gen_pool_alloc(uncached_pool[i], PAGE_SIZE); - dprintk(KERN_DEBUG "uncached_alloc_page alternate search " - "returns %lx on node %i\n", maddr, i); - if (maddr) { - break; - } - } - } + do { + if (!node_online(nid)) + continue; + pool = uncached_pool[nid]; + if (pool == NULL) + continue; + do { + uc_addr = gen_pool_alloc(pool, PAGE_SIZE); + if (uc_addr != 0) + return uc_addr; + } while (uncached_add_chunk(pool, nid) == 0); + + } while ((nid = (nid + 1) % MAX_NUMNODES) != starting_nid); - return maddr; + return 0; } EXPORT_SYMBOL(uncached_alloc_page); @@ -177,21 +190,22 @@ EXPORT_SYMBOL(uncached_alloc_page); /* * uncached_free_page * + * @uc_addr: uncached address of page to free + * * Free a single uncached page. */ -void -uncached_free_page(unsigned long maddr) +void uncached_free_page(unsigned long uc_addr) { - int node; - - node = paddr_to_nid(maddr - __IA64_UNCACHED_OFFSET); + int nid = paddr_to_nid(uc_addr - __IA64_UNCACHED_OFFSET); + struct gen_pool *pool = uncached_pool[nid]; - dprintk(KERN_DEBUG "uncached_free_page(%lx) on node %i\n", maddr, node); + if (unlikely(pool == NULL)) + return; - if ((maddr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET) - panic("uncached_free_page invalid address %lx\n", maddr); + if ((uc_addr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET) + panic("uncached_free_page invalid address %lx\n", uc_addr); - gen_pool_free(uncached_pool[node], maddr, PAGE_SIZE); + gen_pool_free(pool, uc_addr, PAGE_SIZE); } EXPORT_SYMBOL(uncached_free_page); @@ -199,43 +213,39 @@ EXPORT_SYMBOL(uncached_free_page); /* * uncached_build_memmap, * + * @uc_start: uncached starting address of a chunk of uncached memory + * @uc_end: uncached ending address of a chunk of uncached memory + * @arg: ignored, (NULL argument passed in on call to efi_memmap_walk_uc()) + * * Called at boot time to build a map of pages that can be used for * memory special operations. */ -static int __init -uncached_build_memmap(unsigned long start, unsigned long end, void *arg) +static int __init uncached_build_memmap(unsigned long uc_start, + unsigned long uc_end, void *arg) { - long length = end - start; - int node; - - dprintk(KERN_ERR "uncached_build_memmap(%lx %lx)\n", start, end); + int nid = paddr_to_nid(uc_start - __IA64_UNCACHED_OFFSET); + struct gen_pool *pool = uncached_pool[nid]; + size_t size = uc_end - uc_start; touch_softlockup_watchdog(); - memset((char *)start, 0, length); - node = paddr_to_nid(start - __IA64_UNCACHED_OFFSET); - - for (; start < end ; start += PAGE_SIZE) { - dprintk(KERN_INFO "sticking %lx into the pool!\n", start); - gen_pool_free(uncached_pool[node], start, PAGE_SIZE); + if (pool != NULL) { + memset((char *)uc_start, 0, size); + (void) gen_pool_add(pool, uc_start, size, nid); } - return 0; } -static int __init uncached_init(void) { - int i; +static int __init uncached_init(void) +{ + int nid; - for (i = 0; i < MAX_NUMNODES; i++) { - if (!node_online(i)) - continue; - uncached_pool[i] = gen_pool_create(0, IA64_GRANULE_SHIFT, - &uncached_get_new_chunk, i); + for_each_online_node(nid) { + uncached_pool[nid] = gen_pool_create(PAGE_SHIFT, nid); } - efi_memmap_walk_uc(uncached_build_memmap); - + efi_memmap_walk_uc(uncached_build_memmap, NULL); return 0; } diff --git a/arch/ia64/sn/kernel/sn2/cache.c b/arch/ia64/sn/kernel/sn2/cache.c index bc3cfa17cd0f..2862cb33026d 100644 --- a/arch/ia64/sn/kernel/sn2/cache.c +++ b/arch/ia64/sn/kernel/sn2/cache.c @@ -3,11 +3,12 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2001-2003 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2001-2003, 2006 Silicon Graphics, Inc. All rights reserved. * */ #include #include +#include /** * sn_flush_all_caches - flush a range of address from all caches (incl. L4) @@ -17,18 +18,24 @@ * Flush a range of addresses from all caches including L4. * All addresses fully or partially contained within * @flush_addr to @flush_addr + @bytes are flushed - * from the all caches. + * from all caches. */ void sn_flush_all_caches(long flush_addr, long bytes) { - flush_icache_range(flush_addr, flush_addr+bytes); + unsigned long addr = flush_addr; + + /* SHub1 requires a cached address */ + if (is_shub1() && (addr & RGN_BITS) == RGN_BASE(RGN_UNCACHED)) + addr = (addr - RGN_BASE(RGN_UNCACHED)) + RGN_BASE(RGN_KERNEL); + + flush_icache_range(addr, addr + bytes); /* * The last call may have returned before the caches * were actually flushed, so we call it again to make * sure. */ - flush_icache_range(flush_addr, flush_addr+bytes); + flush_icache_range(addr, addr + bytes); mb(); } EXPORT_SYMBOL(sn_flush_all_caches); diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 7fd0576a4454..690c42803d2e 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -4,37 +4,32 @@ * Uses for this includes on-device special memory, uncached memory * etc. * - * This code is based on the buddy allocator found in the sym53c8xx_2 - * driver, adapted for general purpose use. - * * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. */ -#include -#define ALLOC_MIN_SHIFT 5 /* 32 bytes minimum */ /* - * Link between free memory chunks of a given size. + * General purpose special memory pool descriptor. */ -struct gen_pool_link { - struct gen_pool_link *next; +struct gen_pool { + rwlock_t lock; + struct list_head chunks; /* list of chunks in this pool */ + int min_alloc_order; /* minimum allocation order */ }; /* - * Memory pool descriptor. + * General purpose special memory pool chunk descriptor. */ -struct gen_pool { +struct gen_pool_chunk { spinlock_t lock; - unsigned long (*get_new_chunk)(struct gen_pool *); - struct gen_pool *next; - struct gen_pool_link *h; - unsigned long private; - int max_chunk_shift; + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start_addr; /* starting address of memory chunk */ + unsigned long end_addr; /* ending address of memory chunk */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ }; -unsigned long gen_pool_alloc(struct gen_pool *poolp, int size); -void gen_pool_free(struct gen_pool *mp, unsigned long ptr, int size); -struct gen_pool *gen_pool_create(int nr_chunks, int max_chunk_shift, - unsigned long (*fp)(struct gen_pool *), - unsigned long data); +extern struct gen_pool *gen_pool_create(int, int); +extern int gen_pool_add(struct gen_pool *, unsigned long, size_t, int); +extern unsigned long gen_pool_alloc(struct gen_pool *, size_t); +extern void gen_pool_free(struct gen_pool *, unsigned long, size_t); diff --git a/lib/genalloc.c b/lib/genalloc.c index 9ce0a6a3b85a..71338b48e889 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -4,10 +4,6 @@ * Uses for this includes on-device special memory, uncached memory * etc. * - * This code is based on the buddy allocator found in the sym53c8xx_2 - * driver Copyright (C) 1999-2001 Gerard Roudier , - * and adapted for general purpose use. - * * Copyright 2005 (C) Jes Sorensen * * This source code is licensed under the GNU General Public License, @@ -15,172 +11,155 @@ */ #include -#include -#include -#include -#include -#include -#include -#include #include -#include - -struct gen_pool *gen_pool_create(int nr_chunks, int max_chunk_shift, - unsigned long (*fp)(struct gen_pool *), - unsigned long data) +/* + * Create a new special memory pool. + * + * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents + * @nid: node id of the node the pool structure should be allocated on, or -1 + */ +struct gen_pool *gen_pool_create(int min_alloc_order, int nid) { - struct gen_pool *poolp; - unsigned long tmp; - int i; - - /* - * This is really an arbitrary limit, +10 is enough for - * IA64_GRANULE_SHIFT, aka 16MB. If anyone needs a large limit - * this can be increased without problems. - */ - if ((max_chunk_shift > (PAGE_SHIFT + 10)) || - ((max_chunk_shift < ALLOC_MIN_SHIFT) && max_chunk_shift)) - return NULL; - - if (!max_chunk_shift) - max_chunk_shift = PAGE_SHIFT; - - poolp = kmalloc(sizeof(struct gen_pool), GFP_KERNEL); - if (!poolp) - return NULL; - memset(poolp, 0, sizeof(struct gen_pool)); - poolp->h = kmalloc(sizeof(struct gen_pool_link) * - (max_chunk_shift - ALLOC_MIN_SHIFT + 1), - GFP_KERNEL); - if (!poolp->h) { - printk(KERN_WARNING "gen_pool_alloc() failed to allocate\n"); - kfree(poolp); - return NULL; - } - memset(poolp->h, 0, sizeof(struct gen_pool_link) * - (max_chunk_shift - ALLOC_MIN_SHIFT + 1)); - - spin_lock_init(&poolp->lock); - poolp->get_new_chunk = fp; - poolp->max_chunk_shift = max_chunk_shift; - poolp->private = data; - - for (i = 0; i < nr_chunks; i++) { - tmp = poolp->get_new_chunk(poolp); - printk(KERN_INFO "allocated %lx\n", tmp); - if (!tmp) - break; - gen_pool_free(poolp, tmp, (1 << poolp->max_chunk_shift)); - } + struct gen_pool *pool; - return poolp; + pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); + if (pool != NULL) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->min_alloc_order = min_alloc_order; + } + return pool; } EXPORT_SYMBOL(gen_pool_create); /* - * Simple power of two buddy-like generic allocator. - * Provides naturally aligned memory chunks. + * Add a new chunk of memory to the specified pool. + * + * @pool: pool to add new memory chunk to + * @addr: starting address of memory chunk to add to pool + * @size: size in bytes of the memory chunk to add to pool + * @nid: node id of the node the chunk structure and bitmap should be + * allocated on, or -1 */ -unsigned long gen_pool_alloc(struct gen_pool *poolp, int size) +int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, + int nid) { - int j, i, s, max_chunk_size; - unsigned long a, flags; - struct gen_pool_link *h = poolp->h; + struct gen_pool_chunk *chunk; + int nbits = size >> pool->min_alloc_order; + int nbytes = sizeof(struct gen_pool_chunk) + + (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; - max_chunk_size = 1 << poolp->max_chunk_shift; + chunk = kmalloc_node(nbytes, GFP_KERNEL, nid); + if (unlikely(chunk == NULL)) + return -1; - if (size > max_chunk_size) - return 0; + memset(chunk, 0, nbytes); + spin_lock_init(&chunk->lock); + chunk->start_addr = addr; + chunk->end_addr = addr + size; - size = max(size, 1 << ALLOC_MIN_SHIFT); - i = fls(size - 1); - s = 1 << i; - j = i -= ALLOC_MIN_SHIFT; - - spin_lock_irqsave(&poolp->lock, flags); - while (!h[j].next) { - if (s == max_chunk_size) { - struct gen_pool_link *ptr; - spin_unlock_irqrestore(&poolp->lock, flags); - ptr = (struct gen_pool_link *)poolp->get_new_chunk(poolp); - spin_lock_irqsave(&poolp->lock, flags); - h[j].next = ptr; - if (h[j].next) - h[j].next->next = NULL; - break; - } - j++; - s <<= 1; - } - a = (unsigned long) h[j].next; - if (a) { - h[j].next = h[j].next->next; - /* - * This should be split into a seperate function doing - * the chunk split in order to support custom - * handling memory not physically accessible by host - */ - while (j > i) { - j -= 1; - s >>= 1; - h[j].next = (struct gen_pool_link *) (a + s); - h[j].next->next = NULL; - } - } - spin_unlock_irqrestore(&poolp->lock, flags); - return a; + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; } -EXPORT_SYMBOL(gen_pool_alloc); +EXPORT_SYMBOL(gen_pool_add); /* - * Counter-part of the generic allocator. + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + * + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool */ -void gen_pool_free(struct gen_pool *poolp, unsigned long ptr, int size) +unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) { - struct gen_pool_link *q; - struct gen_pool_link *h = poolp->h; - unsigned long a, b, flags; - int i, s, max_chunk_size; - - max_chunk_size = 1 << poolp->max_chunk_shift; + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long addr, flags; + int order = pool->min_alloc_order; + int nbits, bit, start_bit, end_bit; - if (size > max_chunk_size) - return; - - size = max(size, 1 << ALLOC_MIN_SHIFT); - i = fls(size - 1); - s = 1 << i; - i -= ALLOC_MIN_SHIFT; - - a = ptr; + if (size == 0) + return 0; - spin_lock_irqsave(&poolp->lock, flags); - while (1) { - if (s == max_chunk_size) { - ((struct gen_pool_link *)a)->next = h[i].next; - h[i].next = (struct gen_pool_link *)a; - break; + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit -= nbits + 1; + + spin_lock_irqsave(&chunk->lock, flags); + bit = -1; + while (bit + 1 < end_bit) { + bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); + if (bit >= end_bit) + break; + + start_bit = bit; + if (nbits > 1) { + bit = find_next_bit(chunk->bits, bit + nbits, + bit + 1); + if (bit - start_bit < nbits) + continue; + } + + addr = chunk->start_addr + + ((unsigned long)start_bit << order); + while (nbits--) + __set_bit(start_bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); + read_unlock(&pool->lock); + return addr; } - b = a ^ s; - q = &h[i]; + spin_unlock_irqrestore(&chunk->lock, flags); + } + read_unlock(&pool->lock); + return 0; +} +EXPORT_SYMBOL(gen_pool_alloc); - while (q->next && q->next != (struct gen_pool_link *)b) - q = q->next; - if (!q->next) { - ((struct gen_pool_link *)a)->next = h[i].next; - h[i].next = (struct gen_pool_link *)a; +/* + * Free the specified memory back to the specified pool. + * + * @pool: pool to free to + * @addr: starting address of memory to free back to pool + * @size: size in bytes of memory to free + */ +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct list_head *_chunk; + struct gen_pool_chunk *chunk; + unsigned long flags; + int order = pool->min_alloc_order; + int bit, nbits; + + nbits = (size + (1UL << order) - 1) >> order; + + read_lock(&pool->lock); + list_for_each(_chunk, &pool->chunks) { + chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + BUG_ON(addr + size > chunk->end_addr); + spin_lock_irqsave(&chunk->lock, flags); + bit = (addr - chunk->start_addr) >> order; + while (nbits--) + __clear_bit(bit++, &chunk->bits); + spin_unlock_irqrestore(&chunk->lock, flags); break; } - q->next = q->next->next; - a = a & b; - s <<= 1; - i++; } - spin_unlock_irqrestore(&poolp->lock, flags); + BUG_ON(nbits > 0); + read_unlock(&pool->lock); } EXPORT_SYMBOL(gen_pool_free); -- cgit v1.2.3