diff options
Diffstat (limited to 'arch/powerpc/platforms')
50 files changed, 1178 insertions, 1029 deletions
diff --git a/arch/powerpc/platforms/4xx/msi.c b/arch/powerpc/platforms/4xx/msi.c index 81b2cbce7df8..7c324eff2f22 100644 --- a/arch/powerpc/platforms/4xx/msi.c +++ b/arch/powerpc/platforms/4xx/msi.c @@ -146,13 +146,19 @@ static int ppc4xx_setup_pcieh_hw(struct platform_device *dev, const u32 *sdr_addr; dma_addr_t msi_phys; void *msi_virt; + int err; sdr_addr = of_get_property(dev->dev.of_node, "sdr-base", NULL); if (!sdr_addr) - return -1; + return -EINVAL; - mtdcri(SDR0, *sdr_addr, upper_32_bits(res.start)); /*HIGH addr */ - mtdcri(SDR0, *sdr_addr + 1, lower_32_bits(res.start)); /* Low addr */ + msi_data = of_get_property(dev->dev.of_node, "msi-data", NULL); + if (!msi_data) + return -EINVAL; + + msi_mask = of_get_property(dev->dev.of_node, "msi-mask", NULL); + if (!msi_mask) + return -EINVAL; msi->msi_dev = of_find_node_by_name(NULL, "ppc4xx-msi"); if (!msi->msi_dev) @@ -160,30 +166,30 @@ static int ppc4xx_setup_pcieh_hw(struct platform_device *dev, msi->msi_regs = of_iomap(msi->msi_dev, 0); if (!msi->msi_regs) { - dev_err(&dev->dev, "of_iomap problem failed\n"); - return -ENOMEM; + dev_err(&dev->dev, "of_iomap failed\n"); + err = -ENOMEM; + goto node_put; } dev_dbg(&dev->dev, "PCIE-MSI: msi register mapped 0x%x 0x%x\n", (u32) (msi->msi_regs + PEIH_TERMADH), (u32) (msi->msi_regs)); msi_virt = dma_alloc_coherent(&dev->dev, 64, &msi_phys, GFP_KERNEL); - if (!msi_virt) - return -ENOMEM; + if (!msi_virt) { + err = -ENOMEM; + goto iounmap; + } msi->msi_addr_hi = upper_32_bits(msi_phys); msi->msi_addr_lo = lower_32_bits(msi_phys & 0xffffffff); dev_dbg(&dev->dev, "PCIE-MSI: msi address high 0x%x, low 0x%x\n", msi->msi_addr_hi, msi->msi_addr_lo); + mtdcri(SDR0, *sdr_addr, upper_32_bits(res.start)); /*HIGH addr */ + mtdcri(SDR0, *sdr_addr + 1, lower_32_bits(res.start)); /* Low addr */ + /* Progam the Interrupt handler Termination addr registers */ out_be32(msi->msi_regs + PEIH_TERMADH, msi->msi_addr_hi); out_be32(msi->msi_regs + PEIH_TERMADL, msi->msi_addr_lo); - msi_data = of_get_property(dev->dev.of_node, "msi-data", NULL); - if (!msi_data) - return -1; - msi_mask = of_get_property(dev->dev.of_node, "msi-mask", NULL); - if (!msi_mask) - return -1; /* Program MSI Expected data and Mask bits */ out_be32(msi->msi_regs + PEIH_MSIED, *msi_data); out_be32(msi->msi_regs + PEIH_MSIMK, *msi_mask); @@ -191,6 +197,12 @@ static int ppc4xx_setup_pcieh_hw(struct platform_device *dev, dma_free_coherent(&dev->dev, 64, msi_virt, msi_phys); return 0; + +iounmap: + iounmap(msi->msi_regs); +node_put: + of_node_put(msi->msi_dev); + return err; } static int ppc4xx_of_msi_remove(struct platform_device *dev) @@ -209,7 +221,6 @@ static int ppc4xx_of_msi_remove(struct platform_device *dev) msi_bitmap_free(&msi->bitmap); iounmap(msi->msi_regs); of_node_put(msi->msi_dev); - kfree(msi); return 0; } @@ -223,18 +234,16 @@ static int ppc4xx_msi_probe(struct platform_device *dev) dev_dbg(&dev->dev, "PCIE-MSI: Setting up MSI support...\n"); - msi = kzalloc(sizeof(*msi), GFP_KERNEL); - if (!msi) { - dev_err(&dev->dev, "No memory for MSI structure\n"); + msi = devm_kzalloc(&dev->dev, sizeof(*msi), GFP_KERNEL); + if (!msi) return -ENOMEM; - } dev->dev.platform_data = msi; /* Get MSI ranges */ err = of_address_to_resource(dev->dev.of_node, 0, &res); if (err) { dev_err(&dev->dev, "%pOF resource error!\n", dev->dev.of_node); - goto error_out; + return err; } msi_irqs = of_irq_count(dev->dev.of_node); @@ -243,7 +252,7 @@ static int ppc4xx_msi_probe(struct platform_device *dev) err = ppc4xx_setup_pcieh_hw(dev, res, msi); if (err) - goto error_out; + return err; err = ppc4xx_msi_init_allocator(dev, msi); if (err) { @@ -256,7 +265,7 @@ static int ppc4xx_msi_probe(struct platform_device *dev) phb->controller_ops.setup_msi_irqs = ppc4xx_setup_msi_irqs; phb->controller_ops.teardown_msi_irqs = ppc4xx_teardown_msi_irqs; } - return err; + return 0; error_out: ppc4xx_of_msi_remove(dev); diff --git a/arch/powerpc/platforms/52xx/Makefile b/arch/powerpc/platforms/52xx/Makefile index ff2f86fe5429..f40d48eab779 100644 --- a/arch/powerpc/platforms/52xx/Makefile +++ b/arch/powerpc/platforms/52xx/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_PPC_LITE5200) += lite5200.o obj-$(CONFIG_PPC_MEDIA5200) += media5200.o obj-$(CONFIG_PM) += mpc52xx_sleep.o mpc52xx_pm.o -ifeq ($(CONFIG_PPC_LITE5200),y) +ifdef CONFIG_PPC_LITE5200 obj-$(CONFIG_PM) += lite5200_sleep.o lite5200_pm.o endif diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pm.c b/arch/powerpc/platforms/52xx/mpc52xx_pm.c index 31d3515672f3..b1d208ded981 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pm.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pm.c @@ -117,7 +117,10 @@ int mpc52xx_pm_enter(suspend_state_t state) u32 intr_main_mask; void __iomem * irq_0x500 = (void __iomem *)CONFIG_KERNEL_START + 0x500; unsigned long irq_0x500_stop = (unsigned long)irq_0x500 + mpc52xx_ds_cached_size; - char saved_0x500[mpc52xx_ds_cached_size]; + char saved_0x500[0x600-0x500]; + + if (WARN_ON(mpc52xx_ds_cached_size > sizeof(saved_0x500))) + return -ENOMEM; /* disable all interrupts in PIC */ intr_main_mask = in_be32(&intr->main_mask); diff --git a/arch/powerpc/platforms/85xx/t1042rdb_diu.c b/arch/powerpc/platforms/85xx/t1042rdb_diu.c index 58fa3d319f1c..dac36ba82fea 100644 --- a/arch/powerpc/platforms/85xx/t1042rdb_diu.c +++ b/arch/powerpc/platforms/85xx/t1042rdb_diu.c @@ -9,8 +9,10 @@ * option) any later version. */ +#include <linux/init.h> #include <linux/io.h> #include <linux/kernel.h> +#include <linux/module.h> #include <linux/of.h> #include <linux/of_address.h> @@ -150,3 +152,5 @@ static int __init t1042rdb_diu_init(void) } early_initcall(t1042rdb_diu_init); + +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index e6a1de521319..6c6a7c72cae4 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -74,7 +74,6 @@ config PPC_BOOK3S_64 select HAVE_ARCH_TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_NUMA_BALANCING select IRQ_WORK - select HAVE_KERNEL_XZ config PPC_BOOK3E_64 bool "Embedded processors" @@ -86,7 +85,6 @@ endchoice choice prompt "CPU selection" - depends on PPC64 default GENERIC_CPU help This will create a kernel which is optimised for a particular CPU. @@ -96,13 +94,17 @@ choice config GENERIC_CPU bool "Generic (POWER4 and above)" - depends on !CPU_LITTLE_ENDIAN + depends on PPC64 && !CPU_LITTLE_ENDIAN config GENERIC_CPU bool "Generic (POWER8 and above)" - depends on CPU_LITTLE_ENDIAN + depends on PPC64 && CPU_LITTLE_ENDIAN select ARCH_HAS_FAST_MULTIPLIER +config GENERIC_CPU + bool "Generic 32 bits powerpc" + depends on PPC32 && !PPC_8xx + config CELL_CPU bool "Cell Broadband Engine" depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN @@ -138,8 +140,37 @@ config E6500_CPU bool "Freescale e6500" depends on E500 +config 860_CPU + bool "8xx family" + depends on PPC_8xx + +config E300C2_CPU + bool "e300c2 (832x)" + depends on PPC_BOOK3S_32 + +config E300C3_CPU + bool "e300c3 (831x)" + depends on PPC_BOOK3S_32 + endchoice +config TARGET_CPU_BOOL + bool + default !GENERIC_CPU + +config TARGET_CPU + string + depends on TARGET_CPU_BOOL + default "cell" if CELL_CPU + default "power5" if POWER5_CPU + default "power6" if POWER6_CPU + default "power7" if POWER7_CPU + default "power8" if POWER8_CPU + default "power9" if POWER9_CPU + default "860" if 860_CPU + default "e300c2" if E300C2_CPU + default "e300c3" if E300C3_CPU + config PPC_BOOK3S def_bool y depends on PPC_BOOK3S_32 || PPC_BOOK3S_64 diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile index d5f808e8a5f3..10064a33ca96 100644 --- a/arch/powerpc/platforms/cell/Makefile +++ b/arch/powerpc/platforms/cell/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_CBE_CPUFREQ_SPU_GOVERNOR) += cpufreq_spudemand.o obj-$(CONFIG_PPC_IBM_CELL_POWERBUTTON) += cbe_powerbutton.o -ifeq ($(CONFIG_SMP),y) +ifdef CONFIG_SMP obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o endif diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c index 2c15ff094483..55aac74e1cb9 100644 --- a/arch/powerpc/platforms/cell/cbe_thermal.c +++ b/arch/powerpc/platforms/cell/cbe_thermal.c @@ -49,6 +49,7 @@ #include <linux/device.h> #include <linux/kernel.h> #include <linux/cpu.h> +#include <linux/stringify.h> #include <asm/spu.h> #include <asm/io.h> #include <asm/prom.h> diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index 1e002e94d0f6..83cf58daaa79 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -111,7 +111,7 @@ int spufs_handle_class1(struct spu_context *ctx) { u64 ea, dsisr, access; unsigned long flags; - unsigned flt = 0; + vm_fault_t flt = 0; int ret; /* diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.h b/arch/powerpc/platforms/cell/spufs/sputrace.h index d557e999b662..1def11e911ac 100644 --- a/arch/powerpc/platforms/cell/spufs/sputrace.h +++ b/arch/powerpc/platforms/cell/spufs/sputrace.h @@ -3,6 +3,7 @@ #define _TRACE_SPUFS_H #include <linux/tracepoint.h> +#include <linux/stringify.h> #undef TRACE_SYSTEM #define TRACE_SYSTEM spufs diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c index c3ede2c365c3..791b86398e1d 100644 --- a/arch/powerpc/platforms/chrp/nvram.c +++ b/arch/powerpc/platforms/chrp/nvram.c @@ -11,6 +11,7 @@ */ #include <linux/kernel.h> +#include <linux/module.h> #include <linux/init.h> #include <linux/spinlock.h> #include <linux/uaccess.h> @@ -89,3 +90,5 @@ void __init chrp_nvram_init(void) return; } + +MODULE_LICENSE("GPL v2"); diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 8bb46dcbebd8..403523c061ba 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -67,16 +67,6 @@ void __init wii_memory_fixups(void) { struct memblock_region *p = memblock.memory.regions; - /* - * This is part of a workaround to allow the use of two - * discontinuous RAM ranges on the Wii, even if this is - * currently unsupported on 32-bit PowerPC Linux. - * - * We coalesce the two memory ranges of the Wii into a - * single range, then create a reservation for the "hole" - * between both ranges. - */ - BUG_ON(memblock.memory.cnt != 2); BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base)); diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c index 2c72263ad6ab..c80f72c370ae 100644 --- a/arch/powerpc/platforms/pasemi/dma_lib.c +++ b/arch/powerpc/platforms/pasemi/dma_lib.c @@ -531,7 +531,7 @@ int pasemi_dma_init(void) iob_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa001, NULL); if (!iob_pdev) { BUG(); - printk(KERN_WARNING "Can't find I/O Bridge\n"); + pr_warn("Can't find I/O Bridge\n"); err = -ENODEV; goto out; } @@ -540,7 +540,7 @@ int pasemi_dma_init(void) dma_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa007, NULL); if (!dma_pdev) { BUG(); - printk(KERN_WARNING "Can't find DMA controller\n"); + pr_warn("Can't find DMA controller\n"); err = -ENODEV; goto out; } @@ -623,7 +623,7 @@ int pasemi_dma_init(void) pasemi_write_dma_reg(PAS_DMA_TXF_CFLG0, 0xffffffff); pasemi_write_dma_reg(PAS_DMA_TXF_CFLG1, 0xffffffff); - printk(KERN_INFO "PA Semi PWRficient DMA library initialized " + pr_info("PA Semi PWRficient DMA library initialized " "(%d tx, %d rx channels)\n", num_txch, num_rxch); out: diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c index c23e60959aa8..6f35a2afe522 100644 --- a/arch/powerpc/platforms/pasemi/gpio_mdio.c +++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c @@ -256,7 +256,7 @@ static int gpio_mdio_probe(struct platform_device *ofdev) err = of_mdiobus_register(new_bus, np); if (err != 0) { - printk(KERN_ERR "%s: Cannot register as MDIO bus, err %d\n", + pr_err("%s: Cannot register as MDIO bus, err %d\n", new_bus->name, err); goto out_free_irq; } diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c index 44e0d9226f0a..8bb4e8082441 100644 --- a/arch/powerpc/platforms/pasemi/idle.c +++ b/arch/powerpc/platforms/pasemi/idle.c @@ -78,13 +78,13 @@ static int pasemi_system_reset_exception(struct pt_regs *regs) static int __init pasemi_idle_init(void) { #ifndef CONFIG_PPC_PASEMI_CPUFREQ - printk(KERN_WARNING "No cpufreq driver, powersavings modes disabled\n"); + pr_warn("No cpufreq driver, powersavings modes disabled\n"); current_mode = 0; #endif ppc_md.system_reset_exception = pasemi_system_reset_exception; ppc_md.power_save = modes[current_mode].entry; - printk(KERN_INFO "Using PA6T idle loop (%s)\n", modes[current_mode].name); + pr_info("Using PA6T idle loop (%s)\n", modes[current_mode].name); return 0; } diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index 78b80cbd9768..f06c83f321e6 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -210,7 +210,7 @@ static int __init iob_init(struct device_node *dn) /* For 2G space, 8x64 pages (2^21 bytes) is max total l2 size */ iob_l2_base = (u32 *)__va(memblock_alloc_base(1UL<<21, 1UL<<21, 0x80000000)); - printk(KERN_INFO "IOBMAP L2 allocated at: %p\n", iob_l2_base); + pr_info("IOBMAP L2 allocated at: %p\n", iob_l2_base); /* Allocate a spare page to map all invalid IOTLB pages. */ tmp = memblock_alloc(IOBMAP_PAGE_SIZE, IOBMAP_PAGE_SIZE); diff --git a/arch/powerpc/platforms/pasemi/misc.c b/arch/powerpc/platforms/pasemi/misc.c index 8571e7bf78b6..aa958a46957f 100644 --- a/arch/powerpc/platforms/pasemi/misc.c +++ b/arch/powerpc/platforms/pasemi/misc.c @@ -69,9 +69,7 @@ static int __init pasemi_register_i2c_devices(void) addr = of_get_property(node, "reg", &len); if (!addr || len < sizeof(int) || *addr > (1 << 10) - 1) { - printk(KERN_WARNING - "pasemi_register_i2c_devices: " - "invalid i2c device entry\n"); + pr_warn("pasemi_register_i2c_devices: invalid i2c device entry\n"); continue; } diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c index aea9ff2c8e6d..c3c64172482d 100644 --- a/arch/powerpc/platforms/pasemi/pci.c +++ b/arch/powerpc/platforms/pasemi/pci.c @@ -205,7 +205,7 @@ static int __init pas_add_bridge(struct device_node *dev) setup_pa_pxp(hose); - printk(KERN_INFO "Found PA-PXP PCI host bridge.\n"); + pr_info("Found PA-PXP PCI host bridge.\n"); /* Interpret the "ranges" property */ pci_process_bridge_OF_ranges(hose, dev, 1); @@ -216,21 +216,21 @@ static int __init pas_add_bridge(struct device_node *dev) void __init pas_pci_init(void) { struct device_node *np, *root; + int res; root = of_find_node_by_path("/"); if (!root) { - printk(KERN_CRIT "pas_pci_init: can't find root " - "of device tree\n"); + pr_crit("pas_pci_init: can't find root of device tree\n"); return; } pci_set_flags(PCI_SCAN_ALL_PCIE_DEVS); - for (np = NULL; (np = of_get_next_child(root, np)) != NULL;) - if (np->name && !strcmp(np->name, "pxp") && !pas_add_bridge(np)) - of_node_get(np); - - of_node_put(root); + np = of_find_compatible_node(root, NULL, "pasemi,rootbus"); + if (np) { + res = pas_add_bridge(np); + of_node_put(np); + } } void __iomem *pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset) diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index d0b8ae53660d..9a6eb04cca83 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -207,8 +207,7 @@ static __init void pas_init_IRQ(void) break; } if (!mpic_node) { - printk(KERN_ERR - "Failed to locate the MPIC interrupt controller\n"); + pr_err("Failed to locate the MPIC interrupt controller\n"); return; } @@ -217,12 +216,12 @@ static __init void pas_init_IRQ(void) naddr = of_n_addr_cells(root); opprop = of_get_property(root, "platform-open-pic", &opplen); if (!opprop) { - printk(KERN_ERR "No platform-open-pic property.\n"); + pr_err("No platform-open-pic property.\n"); of_node_put(root); return; } openpic_addr = of_read_number(opprop, naddr); - printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr); + pr_debug("OpenPIC addr: %lx\n", openpic_addr); mpic_flags = MPIC_LARGE_VECTORS | MPIC_NO_BIAS | MPIC_NO_RESET; @@ -265,72 +264,72 @@ static int pas_machine_check_handler(struct pt_regs *regs) srr1 = regs->msr; if (nmi_virq && mpic_get_mcirq() == nmi_virq) { - printk(KERN_ERR "NMI delivered\n"); + pr_err("NMI delivered\n"); debugger(regs); mpic_end_irq(irq_get_irq_data(nmi_virq)); goto out; } dsisr = mfspr(SPRN_DSISR); - printk(KERN_ERR "Machine Check on CPU %d\n", cpu); - printk(KERN_ERR "SRR0 0x%016lx SRR1 0x%016lx\n", srr0, srr1); - printk(KERN_ERR "DSISR 0x%016lx DAR 0x%016lx\n", dsisr, regs->dar); - printk(KERN_ERR "BER 0x%016lx MER 0x%016lx\n", mfspr(SPRN_PA6T_BER), + pr_err("Machine Check on CPU %d\n", cpu); + pr_err("SRR0 0x%016lx SRR1 0x%016lx\n", srr0, srr1); + pr_err("DSISR 0x%016lx DAR 0x%016lx\n", dsisr, regs->dar); + pr_err("BER 0x%016lx MER 0x%016lx\n", mfspr(SPRN_PA6T_BER), mfspr(SPRN_PA6T_MER)); - printk(KERN_ERR "IER 0x%016lx DER 0x%016lx\n", mfspr(SPRN_PA6T_IER), + pr_err("IER 0x%016lx DER 0x%016lx\n", mfspr(SPRN_PA6T_IER), mfspr(SPRN_PA6T_DER)); - printk(KERN_ERR "Cause:\n"); + pr_err("Cause:\n"); if (srr1 & 0x200000) - printk(KERN_ERR "Signalled by SDC\n"); + pr_err("Signalled by SDC\n"); if (srr1 & 0x100000) { - printk(KERN_ERR "Load/Store detected error:\n"); + pr_err("Load/Store detected error:\n"); if (dsisr & 0x8000) - printk(KERN_ERR "D-cache ECC double-bit error or bus error\n"); + pr_err("D-cache ECC double-bit error or bus error\n"); if (dsisr & 0x4000) - printk(KERN_ERR "LSU snoop response error\n"); + pr_err("LSU snoop response error\n"); if (dsisr & 0x2000) { - printk(KERN_ERR "MMU SLB multi-hit or invalid B field\n"); + pr_err("MMU SLB multi-hit or invalid B field\n"); dump_slb = 1; } if (dsisr & 0x1000) - printk(KERN_ERR "Recoverable Duptags\n"); + pr_err("Recoverable Duptags\n"); if (dsisr & 0x800) - printk(KERN_ERR "Recoverable D-cache parity error count overflow\n"); + pr_err("Recoverable D-cache parity error count overflow\n"); if (dsisr & 0x400) - printk(KERN_ERR "TLB parity error count overflow\n"); + pr_err("TLB parity error count overflow\n"); } if (srr1 & 0x80000) - printk(KERN_ERR "Bus Error\n"); + pr_err("Bus Error\n"); if (srr1 & 0x40000) { - printk(KERN_ERR "I-side SLB multiple hit\n"); + pr_err("I-side SLB multiple hit\n"); dump_slb = 1; } if (srr1 & 0x20000) - printk(KERN_ERR "I-cache parity error hit\n"); + pr_err("I-cache parity error hit\n"); if (num_mce_regs == 0) - printk(KERN_ERR "No MCE registers mapped yet, can't dump\n"); + pr_err("No MCE registers mapped yet, can't dump\n"); else - printk(KERN_ERR "SoC debug registers:\n"); + pr_err("SoC debug registers:\n"); for (i = 0; i < num_mce_regs; i++) - printk(KERN_ERR "%s: 0x%08x\n", mce_regs[i].name, + pr_err("%s: 0x%08x\n", mce_regs[i].name, in_le32(mce_regs[i].addr)); if (dump_slb) { unsigned long e, v; int i; - printk(KERN_ERR "slb contents:\n"); + pr_err("slb contents:\n"); for (i = 0; i < mmu_slb_size; i++) { asm volatile("slbmfee %0,%1" : "=r" (e) : "r" (i)); asm volatile("slbmfev %0,%1" : "=r" (v) : "r" (i)); - printk(KERN_ERR "%02d %016lx %016lx\n", i, e, v); + pr_err("%02d %016lx %016lx\n", i, e, v); } } diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S index cc5347eb1662..27862feee4a5 100644 --- a/arch/powerpc/platforms/powermac/cache.S +++ b/arch/powerpc/platforms/powermac/cache.S @@ -17,6 +17,7 @@ #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/cputable.h> +#include <asm/feature-fixups.h> /* * Flush and disable all data caches (dL1, L2, L3). This is used diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 3f82cb24eb2b..4eb8cb38fc69 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -2889,10 +2889,8 @@ set_initial_features(void) /* On all machines, switch modem & serial ports off */ for_each_node_by_name(np, "ch-a") initial_serial_shutdown(np); - of_node_put(np); for_each_node_by_name(np, "ch-b") initial_serial_shutdown(np); - of_node_put(np); } void __init diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index df762bb3c735..04527d13d5a4 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -781,12 +781,12 @@ static int __init pmac_add_bridge(struct device_node *dev) struct resource rsrc; char *disp_name; const int *bus_range; - int primary = 1, has_address = 0; + int primary = 1; DBG("Adding PCI host bridge %pOF\n", dev); /* Fetch host bridge registers address */ - has_address = (of_address_to_resource(dev, 0, &rsrc) == 0); + of_address_to_resource(dev, 0, &rsrc); /* Get bus range if any */ bus_range = of_get_property(dev, "bus-range", &len); @@ -904,7 +904,7 @@ static int pmac_pci_root_bridge_prepare(struct pci_host_bridge *bridge) void __init pmac_pci_init(void) { struct device_node *np, *root; - struct device_node *ht = NULL; + struct device_node *ht __maybe_unused = NULL; pci_set_flags(PCI_CAN_SKIP_ISA_ALIGN); @@ -1019,7 +1019,7 @@ static bool pmac_pci_enable_device_hook(struct pci_dev *dev) return true; } -void pmac_pci_fixup_ohci(struct pci_dev *dev) +static void pmac_pci_fixup_ohci(struct pci_dev *dev) { struct device_node *node = pci_device_to_OF_node(dev); @@ -1054,7 +1054,7 @@ void __init pmac_pcibios_after_init(void) } } -void pmac_pci_fixup_cardbus(struct pci_dev* dev) +static void pmac_pci_fixup_cardbus(struct pci_dev *dev) { if (!machine_is(powermac)) return; @@ -1091,7 +1091,7 @@ void pmac_pci_fixup_cardbus(struct pci_dev* dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_ANY_ID, pmac_pci_fixup_cardbus); -void pmac_pci_fixup_pciata(struct pci_dev* dev) +static void pmac_pci_fixup_pciata(struct pci_dev *dev) { u8 progif = 0; diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S index 1c2802fabd57..f89808b9713d 100644 --- a/arch/powerpc/platforms/powermac/sleep.S +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -18,6 +18,7 @@ #include <asm/thread_info.h> #include <asm/asm-offsets.h> #include <asm/mmu.h> +#include <asm/feature-fixups.h> #define MAGIC 0x4c617273 /* 'Lars' */ diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c index 12e6e4d30602..f92c1918fb56 100644 --- a/arch/powerpc/platforms/powermac/time.c +++ b/arch/powerpc/platforms/powermac/time.c @@ -34,6 +34,8 @@ #include <asm/nvram.h> #include <asm/smu.h> +#include "pmac.h" + #undef DEBUG #ifdef DEBUG @@ -249,7 +251,7 @@ int pmac_set_rtc_time(struct rtc_time *tm) * Calibrate the decrementer register using VIA timer 1. * This is used both on powermacs and CHRP machines. */ -int __init via_calibrate_decr(void) +static int __init via_calibrate_decr(void) { struct device_node *vias; volatile unsigned char __iomem *via; diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c index d83135a9830e..8901973ed683 100644 --- a/arch/powerpc/platforms/powermac/udbg_scc.c +++ b/arch/powerpc/platforms/powermac/udbg_scc.c @@ -73,7 +73,7 @@ void udbg_scc_init(int force_scc) struct device_node *stdout = NULL, *escc = NULL, *macio = NULL; struct device_node *ch, *ch_def = NULL, *ch_a = NULL; const char *path; - int i, x; + int i; escc = of_find_node_by_name(NULL, "escc"); if (escc == NULL) @@ -120,7 +120,7 @@ void udbg_scc_init(int force_scc) mb(); for (i = 20000; i != 0; --i) - x = in_8(sccc); + in_8(sccc); out_8(sccc, 0x09); /* reset A or B side */ out_8(sccc, 0xc0); diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 703a350a7f4e..b540ce8eec55 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -6,7 +6,7 @@ obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o -obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o +obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o obj-$(CONFIG_CXL_BASE) += pci-cxl.o obj-$(CONFIG_EEH) += eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index ddfc3544d285..3c1beae29f2d 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -223,6 +223,14 @@ int pnv_eeh_post_init(void) eeh_probe_devices(); eeh_addr_cache_build(); + if (eeh_has_flag(EEH_POSTPONED_PROBE)) { + eeh_clear_flag(EEH_POSTPONED_PROBE); + if (eeh_enabled()) + pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); + else + pr_info("EEH: No capable adapters found\n"); + } + /* Register OPAL event notifier */ eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR)); if (eeh_event_irq < 0) { @@ -384,8 +392,10 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) return NULL; /* Skip if we haven't probed yet */ - if (phb->ioda.pe_rmap[config_addr] == IODA_INVALID_PE) + if (phb->ioda.pe_rmap[config_addr] == IODA_INVALID_PE) { + eeh_add_flag(EEH_POSTPONED_PROBE); return NULL; + } /* Initialize eeh device */ edev->class_code = pdn->class_code; diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 1c5d0675b43c..35f699ebb662 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -36,6 +36,8 @@ #define P9_STOP_SPR_PSSCR 855 static u32 supported_cpuidle_states; +struct pnv_idle_states_t *pnv_idle_states; +int nr_pnv_idle_states; /* * The default stop state that will be used by ppc_md.power_save @@ -177,11 +179,6 @@ static void pnv_alloc_idle_core_states(void) paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state; paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING; paca_ptrs[cpu]->thread_mask = 1 << j; - if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) - continue; - paca_ptrs[cpu]->thread_sibling_pacas = - kmalloc_node(paca_ptr_array_size, - GFP_KERNEL, node); } } @@ -622,48 +619,10 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) * @dt_idle_states: Number of idle state entries * Returns 0 on success */ -static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, - int dt_idle_states) +static int __init pnv_power9_idle_init(void) { - u64 *psscr_val = NULL; - u64 *psscr_mask = NULL; - u32 *residency_ns = NULL; u64 max_residency_ns = 0; - int rc = 0, i; - - psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), GFP_KERNEL); - psscr_mask = kcalloc(dt_idle_states, sizeof(*psscr_mask), GFP_KERNEL); - residency_ns = kcalloc(dt_idle_states, sizeof(*residency_ns), - GFP_KERNEL); - - if (!psscr_val || !psscr_mask || !residency_ns) { - rc = -1; - goto out; - } - - if (of_property_read_u64_array(np, - "ibm,cpu-idle-state-psscr", - psscr_val, dt_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); - rc = -1; - goto out; - } - - if (of_property_read_u64_array(np, - "ibm,cpu-idle-state-psscr-mask", - psscr_mask, dt_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n"); - rc = -1; - goto out; - } - - if (of_property_read_u32_array(np, - "ibm,cpu-idle-state-residency-ns", - residency_ns, dt_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n"); - rc = -1; - goto out; - } + int i; /* * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask}, @@ -679,33 +638,37 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state. */ pnv_first_deep_stop_state = MAX_STOP_STATE; - for (i = 0; i < dt_idle_states; i++) { + for (i = 0; i < nr_pnv_idle_states; i++) { int err; - u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK; + struct pnv_idle_states_t *state = &pnv_idle_states[i]; + u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK; - if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) && - (pnv_first_deep_stop_state > psscr_rl)) + if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) && + pnv_first_deep_stop_state > psscr_rl) pnv_first_deep_stop_state = psscr_rl; - err = validate_psscr_val_mask(&psscr_val[i], &psscr_mask[i], - flags[i]); + err = validate_psscr_val_mask(&state->psscr_val, + &state->psscr_mask, + state->flags); if (err) { - report_invalid_psscr_val(psscr_val[i], err); + report_invalid_psscr_val(state->psscr_val, err); continue; } - if (max_residency_ns < residency_ns[i]) { - max_residency_ns = residency_ns[i]; - pnv_deepest_stop_psscr_val = psscr_val[i]; - pnv_deepest_stop_psscr_mask = psscr_mask[i]; - pnv_deepest_stop_flag = flags[i]; + state->valid = true; + + if (max_residency_ns < state->residency_ns) { + max_residency_ns = state->residency_ns; + pnv_deepest_stop_psscr_val = state->psscr_val; + pnv_deepest_stop_psscr_mask = state->psscr_mask; + pnv_deepest_stop_flag = state->flags; deepest_stop_found = true; } if (!default_stop_found && - (flags[i] & OPAL_PM_STOP_INST_FAST)) { - pnv_default_stop_val = psscr_val[i]; - pnv_default_stop_mask = psscr_mask[i]; + (state->flags & OPAL_PM_STOP_INST_FAST)) { + pnv_default_stop_val = state->psscr_val; + pnv_default_stop_mask = state->psscr_mask; default_stop_found = true; } } @@ -728,11 +691,8 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n", pnv_first_deep_stop_state); -out: - kfree(psscr_val); - kfree(psscr_mask); - kfree(residency_ns); - return rc; + + return 0; } /* @@ -740,50 +700,146 @@ out: */ static void __init pnv_probe_idle_states(void) { - struct device_node *np; - int dt_idle_states; - u32 *flags = NULL; int i; + if (nr_pnv_idle_states < 0) { + pr_warn("cpuidle-powernv: no idle states found in the DT\n"); + return; + } + + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (pnv_power9_idle_init()) + return; + } + + for (i = 0; i < nr_pnv_idle_states; i++) + supported_cpuidle_states |= pnv_idle_states[i].flags; +} + +/* + * This function parses device-tree and populates all the information + * into pnv_idle_states structure. It also sets up nr_pnv_idle_states + * which is the number of cpuidle states discovered through device-tree. + */ + +static int pnv_parse_cpuidle_dt(void) +{ + struct device_node *np; + int nr_idle_states, i; + int rc = 0; + u32 *temp_u32; + u64 *temp_u64; + const char **temp_string; + np = of_find_node_by_path("/ibm,opal/power-mgt"); if (!np) { pr_warn("opal: PowerMgmt Node not found\n"); - goto out; + return -ENODEV; } - dt_idle_states = of_property_count_u32_elems(np, - "ibm,cpu-idle-state-flags"); - if (dt_idle_states < 0) { - pr_warn("cpuidle-powernv: no idle states found in the DT\n"); + nr_idle_states = of_property_count_u32_elems(np, + "ibm,cpu-idle-state-flags"); + + pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states), + GFP_KERNEL); + temp_u32 = kcalloc(nr_idle_states, sizeof(u32), GFP_KERNEL); + temp_u64 = kcalloc(nr_idle_states, sizeof(u64), GFP_KERNEL); + temp_string = kcalloc(nr_idle_states, sizeof(char *), GFP_KERNEL); + + if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) { + pr_err("Could not allocate memory for dt parsing\n"); + rc = -ENOMEM; goto out; } - flags = kcalloc(dt_idle_states, sizeof(*flags), GFP_KERNEL); - - if (of_property_read_u32_array(np, - "ibm,cpu-idle-state-flags", flags, dt_idle_states)) { + /* Read flags */ + if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags", + temp_u32, nr_idle_states)) { pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); + rc = -EINVAL; + goto out; + } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].flags = temp_u32[i]; + + /* Read latencies */ + if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns", + temp_u32, nr_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); + rc = -EINVAL; + goto out; + } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].latency_ns = temp_u32[i]; + + /* Read residencies */ + if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns", + temp_u32, nr_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); + rc = -EINVAL; goto out; } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].residency_ns = temp_u32[i]; + /* For power9 */ if (cpu_has_feature(CPU_FTR_ARCH_300)) { - if (pnv_power9_idle_init(np, flags, dt_idle_states)) + /* Read pm_crtl_val */ + if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr", + temp_u64, nr_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); + rc = -EINVAL; + goto out; + } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].psscr_val = temp_u64[i]; + + /* Read pm_crtl_mask */ + if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask", + temp_u64, nr_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n"); + rc = -EINVAL; goto out; + } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].psscr_mask = temp_u64[i]; } - for (i = 0; i < dt_idle_states; i++) - supported_cpuidle_states |= flags[i]; + /* + * power8 specific properties ibm,cpu-idle-state-pmicr-mask and + * ibm,cpu-idle-state-pmicr-val were never used and there is no + * plan to use it in near future. Hence, not parsing these properties + */ + if (of_property_read_string_array(np, "ibm,cpu-idle-state-names", + temp_string, nr_idle_states) < 0) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n"); + rc = -EINVAL; + goto out; + } + for (i = 0; i < nr_idle_states; i++) + strlcpy(pnv_idle_states[i].name, temp_string[i], + PNV_IDLE_NAME_LEN); + nr_pnv_idle_states = nr_idle_states; + rc = 0; out: - kfree(flags); + kfree(temp_u32); + kfree(temp_u64); + kfree(temp_string); + return rc; } + static int __init pnv_init_idle_states(void) { - + int rc = 0; supported_cpuidle_states = 0; + /* In case we error out nr_pnv_idle_states will be zero */ + nr_pnv_idle_states = 0; if (cpuidle_disable != IDLE_NO_OVERRIDE) goto out; - + rc = pnv_parse_cpuidle_dt(); + if (rc) + return rc; pnv_probe_idle_states(); if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { @@ -805,29 +861,6 @@ static int __init pnv_init_idle_states(void) pnv_alloc_idle_core_states(); - /* - * For each CPU, record its PACA address in each of it's - * sibling thread's PACA at the slot corresponding to this - * CPU's index in the core. - */ - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { - int cpu; - - pr_info("powernv: idle: Saving PACA pointers of all CPUs in their thread sibling PACA\n"); - for_each_present_cpu(cpu) { - int base_cpu = cpu_first_thread_sibling(cpu); - int idx = cpu_thread_in_core(cpu); - int i; - - for (i = 0; i < threads_per_core; i++) { - int j = base_cpu + i; - - paca_ptrs[j]->thread_sibling_pacas[idx] = - paca_ptrs[cpu]; - } - } - } - if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) ppc_md.power_save = power7_idle; diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index b99283df8584..51dc398ae3f7 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -47,38 +47,9 @@ static ssize_t memtrace_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size); } -static bool valid_memtrace_range(struct memtrace_entry *dev, - unsigned long start, unsigned long size) -{ - if ((start >= dev->start) && - ((start + size) <= (dev->start + dev->size))) - return true; - - return false; -} - -static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma) -{ - unsigned long size = vma->vm_end - vma->vm_start; - struct memtrace_entry *dev = filp->private_data; - - if (!valid_memtrace_range(dev, vma->vm_pgoff << PAGE_SHIFT, size)) - return -EINVAL; - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - if (remap_pfn_range(vma, vma->vm_start, - vma->vm_pgoff + (dev->start >> PAGE_SHIFT), - size, vma->vm_page_prot)) - return -EAGAIN; - - return 0; -} - static const struct file_operations memtrace_fops = { .llseek = default_llseek, .read = memtrace_read, - .mmap = memtrace_mmap, .open = simple_open, }; @@ -206,8 +177,11 @@ static int memtrace_init_debugfs(void) snprintf(ent->name, 16, "%08x", ent->nid); dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir); - if (!dir) + if (!dir) { + pr_err("Failed to create debugfs directory for node %d\n", + ent->nid); return -1; + } ent->dir = dir; debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops); @@ -218,18 +192,93 @@ static int memtrace_init_debugfs(void) return ret; } +static int online_mem_block(struct memory_block *mem, void *arg) +{ + return device_online(&mem->dev); +} + +/* + * Iterate through the chunks of memory we have removed from the kernel + * and attempt to add them back to the kernel. + */ +static int memtrace_online(void) +{ + int i, ret = 0; + struct memtrace_entry *ent; + + for (i = memtrace_array_nr - 1; i >= 0; i--) { + ent = &memtrace_array[i]; + + /* We have onlined this chunk previously */ + if (ent->nid == -1) + continue; + + /* Remove from io mappings */ + if (ent->mem) { + iounmap(ent->mem); + ent->mem = 0; + } + + if (add_memory(ent->nid, ent->start, ent->size)) { + pr_err("Failed to add trace memory to node %d\n", + ent->nid); + ret += 1; + continue; + } + + /* + * If kernel isn't compiled with the auto online option + * we need to online the memory ourselves. + */ + if (!memhp_auto_online) { + walk_memory_range(PFN_DOWN(ent->start), + PFN_UP(ent->start + ent->size - 1), + NULL, online_mem_block); + } + + /* + * Memory was added successfully so clean up references to it + * so on reentry we can tell that this chunk was added. + */ + debugfs_remove_recursive(ent->dir); + pr_info("Added trace memory back to node %d\n", ent->nid); + ent->size = ent->start = ent->nid = -1; + } + if (ret) + return ret; + + /* If all chunks of memory were added successfully, reset globals */ + kfree(memtrace_array); + memtrace_array = NULL; + memtrace_size = 0; + memtrace_array_nr = 0; + return 0; +} + static int memtrace_enable_set(void *data, u64 val) { - if (memtrace_size) + u64 bytes; + + /* + * Don't attempt to do anything if size isn't aligned to a memory + * block or equal to zero. + */ + bytes = memory_block_size_bytes(); + if (val & (bytes - 1)) { + pr_err("Value must be aligned with 0x%llx\n", bytes); return -EINVAL; + } - if (!val) - return -EINVAL; + /* Re-add/online previously removed/offlined memory */ + if (memtrace_size) { + if (memtrace_online()) + return -EAGAIN; + } - /* Make sure size is aligned to a memory block */ - if (val & (memory_block_size_bytes() - 1)) - return -EINVAL; + if (!val) + return 0; + /* Offline and remove memory */ if (memtrace_init_regions_runtime(val)) return -EINVAL; diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 8cdf91f5d3a4..8006c54a91e3 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -17,7 +17,9 @@ #include <linux/pci.h> #include <linux/memblock.h> #include <linux/iommu.h> +#include <linux/debugfs.h> +#include <asm/debugfs.h> #include <asm/tlb.h> #include <asm/powernv.h> #include <asm/reg.h> @@ -44,7 +46,8 @@ static DEFINE_SPINLOCK(npu_context_lock); * entire TLB on the GPU for the given PID rather than each specific address in * the range. */ -#define ATSD_THRESHOLD (2*1024*1024) +static uint64_t atsd_threshold = 2 * 1024 * 1024; +static struct dentry *atsd_threshold_dentry; /* * Other types of TCE cache invalidation are not functional in the @@ -437,8 +440,9 @@ static int get_mmio_atsd_reg(struct npu *npu) int i; for (i = 0; i < npu->mmio_atsd_count; i++) { - if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage)) - return i; + if (!test_bit(i, &npu->mmio_atsd_usage)) + if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage)) + return i; } return -ENOSPC; @@ -683,7 +687,7 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, struct npu_context *npu_context = mn_to_npu_context(mn); unsigned long address; - if (end - start > ATSD_THRESHOLD) { + if (end - start > atsd_threshold) { /* * Just invalidate the entire PID if the address range is too * large. @@ -958,6 +962,11 @@ int pnv_npu2_init(struct pnv_phb *phb) static int npu_index; uint64_t rc = 0; + if (!atsd_threshold_dentry) { + atsd_threshold_dentry = debugfs_create_x64("atsd_threshold", + 0600, powerpc_debugfs_root, &atsd_threshold); + } + phb->npu.nmmu_flush = of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush"); for_each_child_of_node(phb->hose->dn, dn) { diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 0dc8fa4e0af2..198143833f00 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -225,13 +225,16 @@ static int64_t dump_read_info(uint32_t *dump_id, uint32_t *dump_size, uint32_t * if (rc == OPAL_PARAMETER) rc = opal_dump_info(&id, &size); + if (rc) { + pr_warn("%s: Failed to get dump info (%d)\n", + __func__, rc); + return rc; + } + *dump_id = be32_to_cpu(id); *dump_size = be32_to_cpu(size); *dump_type = be32_to_cpu(type); - if (rc) - pr_warn("%s: Failed to get dump info (%d)\n", - __func__, rc); return rc; } @@ -368,13 +371,12 @@ static irqreturn_t process_dump(int irq, void *data) { int rc; uint32_t dump_id, dump_size, dump_type; - struct dump_obj *dump; char name[22]; struct kobject *kobj; rc = dump_read_info(&dump_id, &dump_size, &dump_type); if (rc != OPAL_SUCCESS) - return rc; + return IRQ_HANDLED; sprintf(name, "0x%x-0x%x", dump_type, dump_id); @@ -386,12 +388,10 @@ static irqreturn_t process_dump(int irq, void *data) if (kobj) { /* Drop reference added by kset_find_obj() */ kobject_put(kobj); - return 0; + return IRQ_HANDLED; } - dump = create_dump_obj(dump_id, dump_size, dump_type); - if (!dump) - return -1; + create_dump_obj(dump_id, dump_size, dump_type); return IRQ_HANDLED; } diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index 605c7e5d52c2..bc97770a67db 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -22,6 +22,7 @@ #include <linux/kthread.h> #include <linux/delay.h> #include <linux/slab.h> +#include <linux/of_irq.h> #include <asm/machdep.h> #include <asm/opal.h> @@ -38,8 +39,8 @@ struct opal_event_irqchip { }; static struct opal_event_irqchip opal_event_irqchip; static u64 last_outstanding_events; -static unsigned int opal_irq_count; -static unsigned int *opal_irqs; +static int opal_irq_count; +static struct resource *opal_irqs; void opal_handle_events(void) { @@ -165,24 +166,23 @@ void opal_event_shutdown(void) /* First free interrupts, which will also mask them */ for (i = 0; i < opal_irq_count; i++) { - if (!opal_irqs[i]) + if (!opal_irqs || !opal_irqs[i].start) continue; if (in_interrupt() || irqs_disabled()) - disable_irq_nosync(opal_irqs[i]); + disable_irq_nosync(opal_irqs[i].start); else - free_irq(opal_irqs[i], NULL); + free_irq(opal_irqs[i].start, NULL); - opal_irqs[i] = 0; + opal_irqs[i].start = 0; } } int __init opal_event_init(void) { struct device_node *dn, *opal_node; - const char **names; - u32 *irqs; - int i, rc; + bool old_style = false; + int i, rc = 0; opal_node = of_find_node_by_path("/ibm,opal"); if (!opal_node) { @@ -207,67 +207,91 @@ int __init opal_event_init(void) goto out; } - /* Get opal-interrupts property and names if present */ - rc = of_property_count_u32_elems(opal_node, "opal-interrupts"); - if (rc < 0) - goto out; + /* Look for new-style (standard) "interrupts" property */ + opal_irq_count = of_irq_count(opal_node); - opal_irq_count = rc; - pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count); + /* Absent ? Look for the old one */ + if (opal_irq_count < 1) { + /* Get opal-interrupts property and names if present */ + rc = of_property_count_u32_elems(opal_node, "opal-interrupts"); + if (rc > 0) + opal_irq_count = rc; + old_style = true; + } - irqs = kcalloc(opal_irq_count, sizeof(*irqs), GFP_KERNEL); - names = kcalloc(opal_irq_count, sizeof(*names), GFP_KERNEL); - opal_irqs = kcalloc(opal_irq_count, sizeof(*opal_irqs), GFP_KERNEL); + /* No interrupts ? Bail out */ + if (!opal_irq_count) + goto out; - if (WARN_ON(!irqs || !names || !opal_irqs)) - goto out_free; + pr_debug("OPAL: Found %d interrupts reserved for OPAL using %s scheme\n", + opal_irq_count, old_style ? "old" : "new"); - rc = of_property_read_u32_array(opal_node, "opal-interrupts", - irqs, opal_irq_count); - if (rc < 0) { - pr_err("Error %d reading opal-interrupts array\n", rc); - goto out_free; + /* Allocate an IRQ resources array */ + opal_irqs = kcalloc(opal_irq_count, sizeof(struct resource), GFP_KERNEL); + if (WARN_ON(!opal_irqs)) { + rc = -ENOMEM; + goto out; } - /* It's not an error for the names to be missing */ - of_property_read_string_array(opal_node, "opal-interrupts-names", - names, opal_irq_count); + /* Build the resources array */ + if (old_style) { + /* Old style "opal-interrupts" property */ + for (i = 0; i < opal_irq_count; i++) { + struct resource *r = &opal_irqs[i]; + const char *name = NULL; + u32 hw_irq; + int virq; + + rc = of_property_read_u32_index(opal_node, "opal-interrupts", + i, &hw_irq); + if (WARN_ON(rc < 0)) { + opal_irq_count = i; + break; + } + of_property_read_string_index(opal_node, "opal-interrupts-names", + i, &name); + virq = irq_create_mapping(NULL, hw_irq); + if (!virq) { + pr_warn("Failed to map OPAL irq 0x%x\n", hw_irq); + continue; + } + r->start = r->end = virq; + r->flags = IORESOURCE_IRQ | IRQ_TYPE_LEVEL_LOW; + r->name = name; + } + } else { + /* new style standard "interrupts" property */ + rc = of_irq_to_resource_table(opal_node, opal_irqs, opal_irq_count); + if (WARN_ON(rc < 0)) { + opal_irq_count = 0; + kfree(opal_irqs); + goto out; + } + if (WARN_ON(rc < opal_irq_count)) + opal_irq_count = rc; + } /* Install interrupt handlers */ for (i = 0; i < opal_irq_count; i++) { - unsigned int virq; - char *name; - - /* Get hardware and virtual IRQ */ - virq = irq_create_mapping(NULL, irqs[i]); - if (!virq) { - pr_warn("Failed to map irq 0x%x\n", irqs[i]); - continue; - } + struct resource *r = &opal_irqs[i]; + const char *name; - if (names[i] && strlen(names[i])) - name = kasprintf(GFP_KERNEL, "opal-%s", names[i]); + /* Prefix name */ + if (r->name && strlen(r->name)) + name = kasprintf(GFP_KERNEL, "opal-%s", r->name); else name = kasprintf(GFP_KERNEL, "opal"); /* Install interrupt handler */ - rc = request_irq(virq, opal_interrupt, IRQF_TRIGGER_LOW, + rc = request_irq(r->start, opal_interrupt, r->flags & IRQD_TRIGGER_MASK, name, NULL); if (rc) { - irq_dispose_mapping(virq); - pr_warn("Error %d requesting irq %d (0x%x)\n", - rc, virq, irqs[i]); + pr_warn("Error %d requesting OPAL irq %d\n", rc, (int)r->start); continue; } - - /* Cache IRQ */ - opal_irqs[i] = virq; } - -out_free: - kfree(irqs); - kfree(names); -out: + rc = 0; + out: of_node_put(opal_node); return rc; } diff --git a/arch/powerpc/platforms/powernv/opal-kmsg.c b/arch/powerpc/platforms/powernv/opal-kmsg.c index 6f1214d4de92..55691950d981 100644 --- a/arch/powerpc/platforms/powernv/opal-kmsg.c +++ b/arch/powerpc/platforms/powernv/opal-kmsg.c @@ -23,12 +23,9 @@ * may not be completely printed. This function does not actually dump the * message, it just ensures that OPAL completely flushes the console buffer. */ -static void force_opal_console_flush(struct kmsg_dumper *dumper, +static void kmsg_dump_opal_console_flush(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason) { - int i; - int64_t ret; - /* * Outside of a panic context the pollers will continue to run, * so we don't need to do any special flushing. @@ -36,32 +33,11 @@ static void force_opal_console_flush(struct kmsg_dumper *dumper, if (reason != KMSG_DUMP_PANIC) return; - if (opal_check_token(OPAL_CONSOLE_FLUSH)) { - ret = opal_console_flush(0); - - if (ret == OPAL_UNSUPPORTED || ret == OPAL_PARAMETER) - return; - - /* Incrementally flush until there's nothing left */ - while (opal_console_flush(0) != OPAL_SUCCESS); - } else { - /* - * If OPAL_CONSOLE_FLUSH is not implemented in the firmware, - * the console can still be flushed by calling the polling - * function enough times to flush the buffer. We don't know - * how much output still needs to be flushed, but we can be - * generous since the kernel is in panic and doesn't need - * to do much else. - */ - printk(KERN_NOTICE "opal: OPAL_CONSOLE_FLUSH missing.\n"); - for (i = 0; i < 1024; i++) { - opal_poll_events(NULL); - } - } + opal_flush_console(0); } static struct kmsg_dumper opal_kmsg_dumper = { - .dump = force_opal_console_flush + .dump = kmsg_dump_opal_console_flush }; void __init opal_kmsg_init(void) diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c index 541c9ea04a32..f7d04b6a2d7a 100644 --- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c +++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c @@ -32,6 +32,34 @@ static struct sensor_group { struct sg_attr *sgattrs; } *sgs; +int sensor_group_enable(u32 handle, bool enable) +{ + struct opal_msg msg; + int token, ret; + + token = opal_async_get_token_interruptible(); + if (token < 0) + return token; + + ret = opal_sensor_group_enable(handle, token, enable); + if (ret == OPAL_ASYNC_COMPLETION) { + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_devel("Failed to wait for the async response\n"); + ret = -EIO; + goto out; + } + ret = opal_error_code(opal_get_async_rc(msg)); + } else { + ret = opal_error_code(ret); + } + +out: + opal_async_release_token(token); + return ret; +} +EXPORT_SYMBOL_GPL(sensor_group_enable); + static ssize_t sg_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index a8d9b4089c31..251528231a9e 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -14,6 +14,8 @@ #include <asm/hvcall.h> #include <asm/asm-offsets.h> #include <asm/opal.h> +#include <asm/asm-compat.h> +#include <asm/feature-fixups.h> .section ".text" @@ -327,3 +329,5 @@ OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET); OPAL_CALL(opal_pci_get_pbcq_tunnel_bar, OPAL_PCI_GET_PBCQ_TUNNEL_BAR); OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR); OPAL_CALL(opal_sensor_read_u64, OPAL_SENSOR_READ_U64); +OPAL_CALL(opal_sensor_group_enable, OPAL_SENSOR_GROUP_ENABLE); +OPAL_CALL(opal_nx_coproc_init, OPAL_NX_COPROC_INIT); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 0d539c661748..404c379db168 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -344,70 +344,125 @@ int opal_get_chars(uint32_t vtermno, char *buf, int count) return 0; } -int opal_put_chars(uint32_t vtermno, const char *data, int total_len) +static int __opal_put_chars(uint32_t vtermno, const char *data, int total_len, bool atomic) { - int written = 0; + unsigned long flags = 0 /* shut up gcc */; + int written; __be64 olen; - s64 len, rc; - unsigned long flags; - __be64 evt; + s64 rc; if (!opal.entry) return -ENODEV; - /* We want put_chars to be atomic to avoid mangling of hvsi - * packets. To do that, we first test for room and return - * -EAGAIN if there isn't enough. - * - * Unfortunately, opal_console_write_buffer_space() doesn't - * appear to work on opal v1, so we just assume there is - * enough room and be done with it - */ - spin_lock_irqsave(&opal_write_lock, flags); + if (atomic) + spin_lock_irqsave(&opal_write_lock, flags); rc = opal_console_write_buffer_space(vtermno, &olen); - len = be64_to_cpu(olen); - if (rc || len < total_len) { - spin_unlock_irqrestore(&opal_write_lock, flags); + if (rc || be64_to_cpu(olen) < total_len) { /* Closed -> drop characters */ if (rc) - return total_len; - opal_poll_events(NULL); - return -EAGAIN; + written = total_len; + else + written = -EAGAIN; + goto out; } - /* We still try to handle partial completions, though they - * should no longer happen. - */ - rc = OPAL_BUSY; - while(total_len > 0 && (rc == OPAL_BUSY || - rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) { - olen = cpu_to_be64(total_len); - rc = opal_console_write(vtermno, &olen, data); - len = be64_to_cpu(olen); - - /* Closed or other error drop */ - if (rc != OPAL_SUCCESS && rc != OPAL_BUSY && - rc != OPAL_BUSY_EVENT) { - written = total_len; - break; + /* Should not get a partial write here because space is available. */ + olen = cpu_to_be64(total_len); + rc = opal_console_write(vtermno, &olen, data); + if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + if (rc == OPAL_BUSY_EVENT) { + mdelay(OPAL_BUSY_DELAY_MS); + opal_poll_events(NULL); + } else if (rc == OPAL_BUSY_EVENT) { + mdelay(OPAL_BUSY_DELAY_MS); } - if (rc == OPAL_SUCCESS) { - total_len -= len; - data += len; - written += len; + written = -EAGAIN; + goto out; + } + + /* Closed or other error drop */ + if (rc != OPAL_SUCCESS) { + written = opal_error_code(rc); + goto out; + } + + written = be64_to_cpu(olen); + if (written < total_len) { + if (atomic) { + /* Should not happen */ + pr_warn("atomic console write returned partial " + "len=%d written=%d\n", total_len, written); } - /* This is a bit nasty but we need that for the console to - * flush when there aren't any interrupts. We will clean - * things a bit later to limit that to synchronous path - * such as the kernel console and xmon/udbg + if (!written) + written = -EAGAIN; + } + +out: + if (atomic) + spin_unlock_irqrestore(&opal_write_lock, flags); + + /* In the -EAGAIN case, callers loop, so we have to flush the console + * here in case they have interrupts off (and we don't want to wait + * for async flushing if we can make immediate progress here). If + * necessary the API could be made entirely non-flushing if the + * callers had a ->flush API to use. + */ + if (written == -EAGAIN) + opal_flush_console(vtermno); + + return written; +} + +int opal_put_chars(uint32_t vtermno, const char *data, int total_len) +{ + return __opal_put_chars(vtermno, data, total_len, false); +} + +/* + * opal_put_chars_atomic will not perform partial-writes. Data will be + * atomically written to the terminal or not at all. This is not strictly + * true at the moment because console space can race with OPAL's console + * writes. + */ +int opal_put_chars_atomic(uint32_t vtermno, const char *data, int total_len) +{ + return __opal_put_chars(vtermno, data, total_len, true); +} + +int opal_flush_console(uint32_t vtermno) +{ + s64 rc; + + if (!opal_check_token(OPAL_CONSOLE_FLUSH)) { + __be64 evt; + + WARN_ONCE(1, "opal: OPAL_CONSOLE_FLUSH missing.\n"); + /* + * If OPAL_CONSOLE_FLUSH is not implemented in the firmware, + * the console can still be flushed by calling the polling + * function while it has OPAL_EVENT_CONSOLE_OUTPUT events. */ - do + do { opal_poll_events(&evt); - while(rc == OPAL_SUCCESS && - (be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT)); + } while (be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT); + + return OPAL_SUCCESS; } - spin_unlock_irqrestore(&opal_write_lock, flags); - return written; + + do { + rc = OPAL_BUSY; + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + rc = opal_console_flush(vtermno); + if (rc == OPAL_BUSY_EVENT) { + mdelay(OPAL_BUSY_DELAY_MS); + opal_poll_events(NULL); + } else if (rc == OPAL_BUSY) { + mdelay(OPAL_BUSY_DELAY_MS); + } + } + } while (rc == OPAL_PARTIAL); /* More to flush */ + + return opal_error_code(rc); } static int opal_recover_mce(struct pt_regs *regs, @@ -922,6 +977,7 @@ EXPORT_SYMBOL_GPL(opal_flash_read); EXPORT_SYMBOL_GPL(opal_flash_write); EXPORT_SYMBOL_GPL(opal_flash_erase); EXPORT_SYMBOL_GPL(opal_prd_msg); +EXPORT_SYMBOL_GPL(opal_check_token); /* Convert a region of vmalloc memory to an opal sg list */ struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, @@ -1034,3 +1090,5 @@ EXPORT_SYMBOL_GPL(opal_write_oppanel_async); EXPORT_SYMBOL_GPL(opal_int_set_mfrr); EXPORT_SYMBOL_GPL(opal_int_eoi); EXPORT_SYMBOL_GPL(opal_error_code); +/* Export the below symbol for NX compression */ +EXPORT_SYMBOL(opal_nx_coproc_init); diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c index cee003de63af..1b18111453d7 100644 --- a/arch/powerpc/platforms/powernv/pci-cxl.c +++ b/arch/powerpc/platforms/powernv/pci-cxl.c @@ -8,11 +8,8 @@ */ #include <linux/module.h> -#include <linux/msi.h> -#include <asm/pci-bridge.h> #include <asm/pnv-pci.h> #include <asm/opal.h> -#include <misc/cxl.h> #include "pci.h" @@ -179,199 +176,3 @@ static inline int get_cxl_module(void) #else static inline int get_cxl_module(void) { return 0; } #endif - -/* - * Sets flags and switches the controller ops to enable the cxl kernel api. - * Originally the cxl kernel API operated on a virtual PHB, but certain cards - * such as the Mellanox CX4 use a peer model instead and for these cards the - * cxl kernel api will operate on the real PHB. - */ -int pnv_cxl_enable_phb_kernel_api(struct pci_controller *hose, bool enable) -{ - struct pnv_phb *phb = hose->private_data; - int rc; - - if (!enable) { - /* - * Once cxl mode is enabled on the PHB, there is currently no - * known safe method to disable it again, and trying risks a - * checkstop. If we can find a way to safely disable cxl mode - * in the future we can revisit this, but for now the only sane - * thing to do is to refuse to disable cxl mode: - */ - return -EPERM; - } - - /* - * Hold a reference to the cxl module since several PHB operations now - * depend on it, and it would be insane to allow it to be removed so - * long as we are in this mode (and since we can't safely disable this - * mode once enabled...). - */ - rc = get_cxl_module(); - if (rc) - return rc; - - phb->flags |= PNV_PHB_FLAG_CXL; - hose->controller_ops = pnv_cxl_cx4_ioda_controller_ops; - - return 0; -} -EXPORT_SYMBOL_GPL(pnv_cxl_enable_phb_kernel_api); - -bool pnv_pci_on_cxl_phb(struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - - return !!(phb->flags & PNV_PHB_FLAG_CXL); -} -EXPORT_SYMBOL_GPL(pnv_pci_on_cxl_phb); - -struct cxl_afu *pnv_cxl_phb_to_afu(struct pci_controller *hose) -{ - struct pnv_phb *phb = hose->private_data; - - return (struct cxl_afu *)phb->cxl_afu; -} -EXPORT_SYMBOL_GPL(pnv_cxl_phb_to_afu); - -void pnv_cxl_phb_set_peer_afu(struct pci_dev *dev, struct cxl_afu *afu) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - - phb->cxl_afu = afu; -} -EXPORT_SYMBOL_GPL(pnv_cxl_phb_set_peer_afu); - -/* - * In the peer cxl model, the XSL/PSL is physical function 0, and will be used - * by other functions on the device for memory access and interrupts. When the - * other functions are enabled we explicitly take a reference on the cxl - * function since they will use it, and allocate a default context associated - * with that function just like the vPHB model of the cxl kernel API. - */ -bool pnv_cxl_enable_device_hook(struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - struct cxl_afu *afu = phb->cxl_afu; - - if (!pnv_pci_enable_device_hook(dev)) - return false; - - - /* No special handling for the cxl function, which is always PF 0 */ - if (PCI_FUNC(dev->devfn) == 0) - return true; - - if (!afu) { - dev_WARN(&dev->dev, "Attempted to enable function > 0 on CXL PHB without a peer AFU\n"); - return false; - } - - dev_info(&dev->dev, "Enabling function on CXL enabled PHB with peer AFU\n"); - - /* Make sure the peer AFU can't go away while this device is active */ - cxl_afu_get(afu); - - return cxl_pci_associate_default_context(dev, afu); -} - -void pnv_cxl_disable_device(struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - struct cxl_afu *afu = phb->cxl_afu; - - /* No special handling for cxl function: */ - if (PCI_FUNC(dev->devfn) == 0) - return; - - cxl_pci_disable_device(dev); - cxl_afu_put(afu); -} - -/* - * This is a special version of pnv_setup_msi_irqs for cards in cxl mode. This - * function handles setting up the IVTE entries for the XSL to use. - * - * We are currently not filling out the MSIX table, since the only currently - * supported adapter (CX4) uses a custom MSIX table format in cxl mode and it - * is up to their driver to fill that out. In the future we may fill out the - * MSIX table (and change the IVTE entries to be an index to the MSIX table) - * for adapters implementing the Full MSI-X mode described in the CAIA. - */ -int pnv_cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) -{ - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; - struct msi_desc *entry; - struct cxl_context *ctx = NULL; - unsigned int virq; - int hwirq; - int afu_irq = 0; - int rc; - - if (WARN_ON(!phb) || !phb->msi_bmp.bitmap) - return -ENODEV; - - if (pdev->no_64bit_msi && !phb->msi32_support) - return -ENODEV; - - rc = cxl_cx4_setup_msi_irqs(pdev, nvec, type); - if (rc) - return rc; - - for_each_pci_msi_entry(entry, pdev) { - if (!entry->msi_attrib.is_64 && !phb->msi32_support) { - pr_warn("%s: Supports only 64-bit MSIs\n", - pci_name(pdev)); - return -ENXIO; - } - - hwirq = cxl_next_msi_hwirq(pdev, &ctx, &afu_irq); - if (WARN_ON(hwirq <= 0)) - return (hwirq ? hwirq : -ENOMEM); - - virq = irq_create_mapping(NULL, hwirq); - if (!virq) { - pr_warn("%s: Failed to map cxl mode MSI to linux irq\n", - pci_name(pdev)); - return -ENOMEM; - } - - rc = pnv_cxl_ioda_msi_setup(pdev, hwirq, virq); - if (rc) { - pr_warn("%s: Failed to setup cxl mode MSI\n", pci_name(pdev)); - irq_dispose_mapping(virq); - return rc; - } - - irq_set_msi_desc(virq, entry); - } - - return 0; -} - -void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) -{ - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; - struct msi_desc *entry; - irq_hw_number_t hwirq; - - if (WARN_ON(!phb)) - return; - - for_each_pci_msi_entry(entry, pdev) { - if (!entry->irq) - continue; - hwirq = virq_to_hw(entry->irq); - irq_set_msi_desc(entry->irq, NULL); - irq_dispose_mapping(entry->irq); - } - - cxl_cx4_teardown_msi_irqs(pdev); -} diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c new file mode 100644 index 000000000000..6c5db1acbe8d --- /dev/null +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -0,0 +1,399 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * TCE helpers for IODA PCI/PCIe on PowerNV platforms + * + * Copyright 2018 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/iommu.h> + +#include <asm/iommu.h> +#include <asm/tce.h> +#include "pci.h" + +void pnv_pci_setup_iommu_table(struct iommu_table *tbl, + void *tce_mem, u64 tce_size, + u64 dma_offset, unsigned int page_shift) +{ + tbl->it_blocksize = 16; + tbl->it_base = (unsigned long)tce_mem; + tbl->it_page_shift = page_shift; + tbl->it_offset = dma_offset >> tbl->it_page_shift; + tbl->it_index = 0; + tbl->it_size = tce_size >> 3; + tbl->it_busno = 0; + tbl->it_type = TCE_PCI; +} + +static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift) +{ + struct page *tce_mem = NULL; + __be64 *addr; + + tce_mem = alloc_pages_node(nid, GFP_KERNEL, shift - PAGE_SHIFT); + if (!tce_mem) { + pr_err("Failed to allocate a TCE memory, level shift=%d\n", + shift); + return NULL; + } + addr = page_address(tce_mem); + memset(addr, 0, 1UL << shift); + + return addr; +} + +static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc) +{ + __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base; + int level = tbl->it_indirect_levels; + const long shift = ilog2(tbl->it_level_size); + unsigned long mask = (tbl->it_level_size - 1) << (level * shift); + + while (level) { + int n = (idx & mask) >> (level * shift); + unsigned long tce; + + if (tmp[n] == 0) { + __be64 *tmp2; + + if (!alloc) + return NULL; + + tmp2 = pnv_alloc_tce_level(tbl->it_nid, + ilog2(tbl->it_level_size) + 3); + if (!tmp2) + return NULL; + + tmp[n] = cpu_to_be64(__pa(tmp2) | + TCE_PCI_READ | TCE_PCI_WRITE); + } + tce = be64_to_cpu(tmp[n]); + + tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE)); + idx &= ~mask; + mask >>= shift; + --level; + } + + return tmp + idx; +} + +int pnv_tce_build(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, enum dma_data_direction direction, + unsigned long attrs) +{ + u64 proto_tce = iommu_direction_to_tce_perm(direction); + u64 rpn = __pa(uaddr) >> tbl->it_page_shift; + long i; + + if (proto_tce & TCE_PCI_WRITE) + proto_tce |= TCE_PCI_READ; + + for (i = 0; i < npages; i++) { + unsigned long newtce = proto_tce | + ((rpn + i) << tbl->it_page_shift); + unsigned long idx = index - tbl->it_offset + i; + + *(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce); + } + + return 0; +} + +#ifdef CONFIG_IOMMU_API +int pnv_tce_xchg(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction, + bool alloc) +{ + u64 proto_tce = iommu_direction_to_tce_perm(*direction); + unsigned long newtce = *hpa | proto_tce, oldtce; + unsigned long idx = index - tbl->it_offset; + __be64 *ptce = NULL; + + BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl)); + + if (*direction == DMA_NONE) { + ptce = pnv_tce(tbl, false, idx, false); + if (!ptce) { + *hpa = 0; + return 0; + } + } + + if (!ptce) { + ptce = pnv_tce(tbl, false, idx, alloc); + if (!ptce) + return alloc ? H_HARDWARE : H_TOO_HARD; + } + + if (newtce & TCE_PCI_WRITE) + newtce |= TCE_PCI_READ; + + oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce))); + *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); + *direction = iommu_tce_direction(oldtce); + + return 0; +} + +__be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc) +{ + if (WARN_ON_ONCE(!tbl->it_userspace)) + return NULL; + + return pnv_tce(tbl, true, index - tbl->it_offset, alloc); +} +#endif + +void pnv_tce_free(struct iommu_table *tbl, long index, long npages) +{ + long i; + + for (i = 0; i < npages; i++) { + unsigned long idx = index - tbl->it_offset + i; + __be64 *ptce = pnv_tce(tbl, false, idx, false); + + if (ptce) + *ptce = cpu_to_be64(0); + } +} + +unsigned long pnv_tce_get(struct iommu_table *tbl, long index) +{ + __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false); + + if (!ptce) + return 0; + + return be64_to_cpu(*ptce); +} + +static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, + unsigned long size, unsigned int levels) +{ + const unsigned long addr_ul = (unsigned long) addr & + ~(TCE_PCI_READ | TCE_PCI_WRITE); + + if (levels) { + long i; + u64 *tmp = (u64 *) addr_ul; + + for (i = 0; i < size; ++i) { + unsigned long hpa = be64_to_cpu(tmp[i]); + + if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE))) + continue; + + pnv_pci_ioda2_table_do_free_pages(__va(hpa), size, + levels - 1); + } + } + + free_pages(addr_ul, get_order(size << 3)); +} + +void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl) +{ + const unsigned long size = tbl->it_indirect_levels ? + tbl->it_level_size : tbl->it_size; + + if (!tbl->it_size) + return; + + pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size, + tbl->it_indirect_levels); + if (tbl->it_userspace) { + pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size, + tbl->it_indirect_levels); + } +} + +static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift, + unsigned int levels, unsigned long limit, + unsigned long *current_offset, unsigned long *total_allocated) +{ + __be64 *addr, *tmp; + unsigned long allocated = 1UL << shift; + unsigned int entries = 1UL << (shift - 3); + long i; + + addr = pnv_alloc_tce_level(nid, shift); + *total_allocated += allocated; + + --levels; + if (!levels) { + *current_offset += allocated; + return addr; + } + + for (i = 0; i < entries; ++i) { + tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift, + levels, limit, current_offset, total_allocated); + if (!tmp) + break; + + addr[i] = cpu_to_be64(__pa(tmp) | + TCE_PCI_READ | TCE_PCI_WRITE); + + if (*current_offset >= limit) + break; + } + + return addr; +} + +long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, + __u32 page_shift, __u64 window_size, __u32 levels, + bool alloc_userspace_copy, struct iommu_table *tbl) +{ + void *addr, *uas = NULL; + unsigned long offset = 0, level_shift, total_allocated = 0; + unsigned long total_allocated_uas = 0; + const unsigned int window_shift = ilog2(window_size); + unsigned int entries_shift = window_shift - page_shift; + unsigned int table_shift = max_t(unsigned int, entries_shift + 3, + PAGE_SHIFT); + const unsigned long tce_table_size = 1UL << table_shift; + unsigned int tmplevels = levels; + + if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS)) + return -EINVAL; + + if (!is_power_of_2(window_size)) + return -EINVAL; + + if (alloc_userspace_copy && (window_size > (1ULL << 32))) + tmplevels = 1; + + /* Adjust direct table size from window_size and levels */ + entries_shift = (entries_shift + levels - 1) / levels; + level_shift = entries_shift + 3; + level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT); + + if ((level_shift - 3) * levels + page_shift >= 60) + return -EINVAL; + + /* Allocate TCE table */ + addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, + tmplevels, tce_table_size, &offset, &total_allocated); + + /* addr==NULL means that the first level allocation failed */ + if (!addr) + return -ENOMEM; + + /* + * First level was allocated but some lower level failed as + * we did not allocate as much as we wanted, + * release partially allocated table. + */ + if (tmplevels == levels && offset < tce_table_size) + goto free_tces_exit; + + /* Allocate userspace view of the TCE table */ + if (alloc_userspace_copy) { + offset = 0; + uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, + levels, tce_table_size, &offset, + &total_allocated_uas); + if (!uas) + goto free_tces_exit; + if (tmplevels == levels && (offset < tce_table_size || + total_allocated_uas != total_allocated)) + goto free_uas_exit; + } + + /* Setup linux iommu table */ + pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset, + page_shift); + tbl->it_level_size = 1ULL << (level_shift - 3); + tbl->it_indirect_levels = levels - 1; + tbl->it_allocated_size = total_allocated; + tbl->it_userspace = uas; + tbl->it_nid = nid; + + pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n", + window_size, tce_table_size, bus_offset, tbl->it_base, + tbl->it_userspace, tmplevels, levels); + + return 0; + +free_uas_exit: + pnv_pci_ioda2_table_do_free_pages(uas, + 1ULL << (level_shift - 3), levels - 1); +free_tces_exit: + pnv_pci_ioda2_table_do_free_pages(addr, + 1ULL << (level_shift - 3), levels - 1); + + return -ENOMEM; +} + +static void pnv_iommu_table_group_link_free(struct rcu_head *head) +{ + struct iommu_table_group_link *tgl = container_of(head, + struct iommu_table_group_link, rcu); + + kfree(tgl); +} + +void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, + struct iommu_table_group *table_group) +{ + long i; + bool found; + struct iommu_table_group_link *tgl; + + if (!tbl || !table_group) + return; + + /* Remove link to a group from table's list of attached groups */ + found = false; + list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { + if (tgl->table_group == table_group) { + list_del_rcu(&tgl->next); + call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free); + found = true; + break; + } + } + if (WARN_ON(!found)) + return; + + /* Clean a pointer to iommu_table in iommu_table_group::tables[] */ + found = false; + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + if (table_group->tables[i] == tbl) { + table_group->tables[i] = NULL; + found = true; + break; + } + } + WARN_ON(!found); +} + +long pnv_pci_link_table_and_group(int node, int num, + struct iommu_table *tbl, + struct iommu_table_group *table_group) +{ + struct iommu_table_group_link *tgl = NULL; + + if (WARN_ON(!tbl || !table_group)) + return -EINVAL; + + tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, + node); + if (!tgl) + return -ENOMEM; + + tgl->table_group = table_group; + list_add_rcu(&tgl->next, &tbl->it_group_list); + + table_group->tables[num] = tbl; + + return 0; +} diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 5bd0eb6681bc..4e6302bf4073 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -46,17 +46,14 @@ #include "powernv.h" #include "pci.h" +#include "../../../../drivers/pci/pci.h" #define PNV_IODA1_M64_NUM 16 /* Number of M64 BARs */ #define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */ #define PNV_IODA1_DMA32_SEGSIZE 0x10000000 -#define POWERNV_IOMMU_DEFAULT_LEVELS 1 -#define POWERNV_IOMMU_MAX_LEVELS 5 - static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK", "NPU_OCAPI" }; -static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl); void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, const char *fmt, ...) @@ -2007,7 +2004,7 @@ static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index, static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index, unsigned long *hpa, enum dma_data_direction *direction) { - long ret = pnv_tce_xchg(tbl, index, hpa, direction); + long ret = pnv_tce_xchg(tbl, index, hpa, direction, true); if (!ret) pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, false); @@ -2018,7 +2015,7 @@ static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index, static int pnv_ioda1_tce_xchg_rm(struct iommu_table *tbl, long index, unsigned long *hpa, enum dma_data_direction *direction) { - long ret = pnv_tce_xchg(tbl, index, hpa, direction); + long ret = pnv_tce_xchg(tbl, index, hpa, direction, false); if (!ret) pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, true); @@ -2040,6 +2037,7 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { #ifdef CONFIG_IOMMU_API .exchange = pnv_ioda1_tce_xchg, .exchange_rm = pnv_ioda1_tce_xchg_rm, + .useraddrptr = pnv_tce_useraddrptr, #endif .clear = pnv_ioda1_tce_free, .get = pnv_tce_get, @@ -2171,7 +2169,7 @@ static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index, unsigned long *hpa, enum dma_data_direction *direction) { - long ret = pnv_tce_xchg(tbl, index, hpa, direction); + long ret = pnv_tce_xchg(tbl, index, hpa, direction, true); if (!ret) pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false); @@ -2182,7 +2180,7 @@ static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index, static int pnv_ioda2_tce_xchg_rm(struct iommu_table *tbl, long index, unsigned long *hpa, enum dma_data_direction *direction) { - long ret = pnv_tce_xchg(tbl, index, hpa, direction); + long ret = pnv_tce_xchg(tbl, index, hpa, direction, false); if (!ret) pnv_pci_ioda2_tce_invalidate(tbl, index, 1, true); @@ -2199,20 +2197,16 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false); } -static void pnv_ioda2_table_free(struct iommu_table *tbl) -{ - pnv_pci_ioda2_table_free_pages(tbl); -} - static struct iommu_table_ops pnv_ioda2_iommu_ops = { .set = pnv_ioda2_tce_build, #ifdef CONFIG_IOMMU_API .exchange = pnv_ioda2_tce_xchg, .exchange_rm = pnv_ioda2_tce_xchg_rm, + .useraddrptr = pnv_tce_useraddrptr, #endif .clear = pnv_ioda2_tce_free, .get = pnv_tce_get, - .free = pnv_ioda2_table_free, + .free = pnv_pci_ioda2_table_free_pages, }; static int pnv_pci_ioda_dev_dma_weight(struct pci_dev *dev, void *data) @@ -2462,13 +2456,9 @@ void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) pe->tce_bypass_enabled = enable; } -static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, - __u32 page_shift, __u64 window_size, __u32 levels, - struct iommu_table *tbl); - static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, int num, __u32 page_shift, __u64 window_size, __u32 levels, - struct iommu_table **ptbl) + bool alloc_userspace_copy, struct iommu_table **ptbl) { struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, table_group); @@ -2485,7 +2475,7 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, ret = pnv_pci_ioda2_table_alloc_pages(nid, bus_offset, page_shift, window_size, - levels, tbl); + levels, alloc_userspace_copy, tbl); if (ret) { iommu_tce_table_put(tbl); return ret; @@ -2518,7 +2508,7 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, IOMMU_PAGE_SHIFT_4K, window_size, - POWERNV_IOMMU_DEFAULT_LEVELS, &tbl); + POWERNV_IOMMU_DEFAULT_LEVELS, false, &tbl); if (rc) { pe_err(pe, "Failed to create 32-bit TCE table, err %ld", rc); @@ -2605,7 +2595,16 @@ static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, tce_table_size, direct_table_size); } - return bytes; + return bytes + bytes; /* one for HW table, one for userspace copy */ +} + +static long pnv_pci_ioda2_create_table_userspace( + struct iommu_table_group *table_group, + int num, __u32 page_shift, __u64 window_size, __u32 levels, + struct iommu_table **ptbl) +{ + return pnv_pci_ioda2_create_table(table_group, + num, page_shift, window_size, levels, true, ptbl); } static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) @@ -2634,7 +2633,7 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) static struct iommu_table_group_ops pnv_pci_ioda2_ops = { .get_table_size = pnv_pci_ioda2_get_table_size, - .create_table = pnv_pci_ioda2_create_table, + .create_table = pnv_pci_ioda2_create_table_userspace, .set_window = pnv_pci_ioda2_set_window, .unset_window = pnv_pci_ioda2_unset_window, .take_ownership = pnv_ioda2_take_ownership, @@ -2739,7 +2738,7 @@ static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group) static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = { .get_table_size = pnv_pci_ioda2_get_table_size, - .create_table = pnv_pci_ioda2_create_table, + .create_table = pnv_pci_ioda2_create_table_userspace, .set_window = pnv_pci_ioda2_npu_set_window, .unset_window = pnv_pci_ioda2_npu_unset_window, .take_ownership = pnv_ioda2_npu_take_ownership, @@ -2773,144 +2772,6 @@ static void pnv_pci_ioda_setup_iommu_api(void) static void pnv_pci_ioda_setup_iommu_api(void) { }; #endif -static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift, - unsigned levels, unsigned long limit, - unsigned long *current_offset, unsigned long *total_allocated) -{ - struct page *tce_mem = NULL; - __be64 *addr, *tmp; - unsigned order = max_t(unsigned, shift, PAGE_SHIFT) - PAGE_SHIFT; - unsigned long allocated = 1UL << (order + PAGE_SHIFT); - unsigned entries = 1UL << (shift - 3); - long i; - - tce_mem = alloc_pages_node(nid, GFP_KERNEL, order); - if (!tce_mem) { - pr_err("Failed to allocate a TCE memory, order=%d\n", order); - return NULL; - } - addr = page_address(tce_mem); - memset(addr, 0, allocated); - *total_allocated += allocated; - - --levels; - if (!levels) { - *current_offset += allocated; - return addr; - } - - for (i = 0; i < entries; ++i) { - tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift, - levels, limit, current_offset, total_allocated); - if (!tmp) - break; - - addr[i] = cpu_to_be64(__pa(tmp) | - TCE_PCI_READ | TCE_PCI_WRITE); - - if (*current_offset >= limit) - break; - } - - return addr; -} - -static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, - unsigned long size, unsigned level); - -static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, - __u32 page_shift, __u64 window_size, __u32 levels, - struct iommu_table *tbl) -{ - void *addr; - unsigned long offset = 0, level_shift, total_allocated = 0; - const unsigned window_shift = ilog2(window_size); - unsigned entries_shift = window_shift - page_shift; - unsigned table_shift = max_t(unsigned, entries_shift + 3, PAGE_SHIFT); - const unsigned long tce_table_size = 1UL << table_shift; - - if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS)) - return -EINVAL; - - if (!is_power_of_2(window_size)) - return -EINVAL; - - /* Adjust direct table size from window_size and levels */ - entries_shift = (entries_shift + levels - 1) / levels; - level_shift = entries_shift + 3; - level_shift = max_t(unsigned, level_shift, PAGE_SHIFT); - - if ((level_shift - 3) * levels + page_shift >= 60) - return -EINVAL; - - /* Allocate TCE table */ - addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, - levels, tce_table_size, &offset, &total_allocated); - - /* addr==NULL means that the first level allocation failed */ - if (!addr) - return -ENOMEM; - - /* - * First level was allocated but some lower level failed as - * we did not allocate as much as we wanted, - * release partially allocated table. - */ - if (offset < tce_table_size) { - pnv_pci_ioda2_table_do_free_pages(addr, - 1ULL << (level_shift - 3), levels - 1); - return -ENOMEM; - } - - /* Setup linux iommu table */ - pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset, - page_shift); - tbl->it_level_size = 1ULL << (level_shift - 3); - tbl->it_indirect_levels = levels - 1; - tbl->it_allocated_size = total_allocated; - - pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n", - window_size, tce_table_size, bus_offset); - - return 0; -} - -static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, - unsigned long size, unsigned level) -{ - const unsigned long addr_ul = (unsigned long) addr & - ~(TCE_PCI_READ | TCE_PCI_WRITE); - - if (level) { - long i; - u64 *tmp = (u64 *) addr_ul; - - for (i = 0; i < size; ++i) { - unsigned long hpa = be64_to_cpu(tmp[i]); - - if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE))) - continue; - - pnv_pci_ioda2_table_do_free_pages(__va(hpa), size, - level - 1); - } - } - - free_pages(addr_ul, get_order(size << 3)); -} - -static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl) -{ - const unsigned long size = tbl->it_indirect_levels ? - tbl->it_level_size : tbl->it_size; - - if (!tbl->it_size) - return; - - pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size, - tbl->it_indirect_levels); -} - static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb) { struct pci_controller *hose = phb->hose; @@ -2925,7 +2786,7 @@ static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb) /* Add 16M for POWER8 by default */ if (cpu_has_feature(CPU_FTR_ARCH_207S) && !cpu_has_feature(CPU_FTR_ARCH_300)) - mask |= SZ_16M; + mask |= SZ_16M | SZ_256M; return mask; } @@ -3138,7 +2999,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) struct pci_dn *pdn; int mul, total_vfs; - if (!pdev->is_physfn || pdev->is_added) + if (!pdev->is_physfn || pci_dev_is_added(pdev)) return; pdn = pci_get_pdn(pdev); @@ -3575,7 +3436,7 @@ static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, /* Prevent enabling devices for which we couldn't properly * assign a PE */ -bool pnv_pci_enable_device_hook(struct pci_dev *dev) +static bool pnv_pci_enable_device_hook(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); struct pnv_phb *phb = hose->private_data; @@ -3843,26 +3704,6 @@ static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { .shutdown = pnv_pci_ioda_shutdown, }; -#ifdef CONFIG_CXL_BASE -const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops = { - .dma_dev_setup = pnv_pci_dma_dev_setup, - .dma_bus_setup = pnv_pci_dma_bus_setup, -#ifdef CONFIG_PCI_MSI - .setup_msi_irqs = pnv_cxl_cx4_setup_msi_irqs, - .teardown_msi_irqs = pnv_cxl_cx4_teardown_msi_irqs, -#endif - .enable_device_hook = pnv_cxl_enable_device_hook, - .disable_device = pnv_cxl_disable_device, - .release_device = pnv_pci_release_device, - .window_alignment = pnv_pci_window_alignment, - .setup_bridge = pnv_pci_setup_bridge, - .reset_secondary_bus = pnv_pci_reset_secondary_bus, - .dma_set_mask = pnv_pci_ioda_dma_set_mask, - .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask, - .shutdown = pnv_pci_ioda_shutdown, -}; -#endif - static void __init pnv_pci_init_ioda_phb(struct device_node *np, u64 hub_id, int ioda_type) { diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index b265ecc0836a..13aef2323bbc 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -802,85 +802,6 @@ struct pci_ops pnv_pci_ops = { .write = pnv_pci_write_config, }; -static __be64 *pnv_tce(struct iommu_table *tbl, long idx) -{ - __be64 *tmp = ((__be64 *)tbl->it_base); - int level = tbl->it_indirect_levels; - const long shift = ilog2(tbl->it_level_size); - unsigned long mask = (tbl->it_level_size - 1) << (level * shift); - - while (level) { - int n = (idx & mask) >> (level * shift); - unsigned long tce = be64_to_cpu(tmp[n]); - - tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE)); - idx &= ~mask; - mask >>= shift; - --level; - } - - return tmp + idx; -} - -int pnv_tce_build(struct iommu_table *tbl, long index, long npages, - unsigned long uaddr, enum dma_data_direction direction, - unsigned long attrs) -{ - u64 proto_tce = iommu_direction_to_tce_perm(direction); - u64 rpn = __pa(uaddr) >> tbl->it_page_shift; - long i; - - if (proto_tce & TCE_PCI_WRITE) - proto_tce |= TCE_PCI_READ; - - for (i = 0; i < npages; i++) { - unsigned long newtce = proto_tce | - ((rpn + i) << tbl->it_page_shift); - unsigned long idx = index - tbl->it_offset + i; - - *(pnv_tce(tbl, idx)) = cpu_to_be64(newtce); - } - - return 0; -} - -#ifdef CONFIG_IOMMU_API -int pnv_tce_xchg(struct iommu_table *tbl, long index, - unsigned long *hpa, enum dma_data_direction *direction) -{ - u64 proto_tce = iommu_direction_to_tce_perm(*direction); - unsigned long newtce = *hpa | proto_tce, oldtce; - unsigned long idx = index - tbl->it_offset; - - BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl)); - - if (newtce & TCE_PCI_WRITE) - newtce |= TCE_PCI_READ; - - oldtce = be64_to_cpu(xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce))); - *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); - *direction = iommu_tce_direction(oldtce); - - return 0; -} -#endif - -void pnv_tce_free(struct iommu_table *tbl, long index, long npages) -{ - long i; - - for (i = 0; i < npages; i++) { - unsigned long idx = index - tbl->it_offset + i; - - *(pnv_tce(tbl, idx)) = cpu_to_be64(0); - } -} - -unsigned long pnv_tce_get(struct iommu_table *tbl, long index) -{ - return be64_to_cpu(*(pnv_tce(tbl, index - tbl->it_offset))); -} - struct iommu_table *pnv_pci_table_alloc(int nid) { struct iommu_table *tbl; @@ -895,85 +816,6 @@ struct iommu_table *pnv_pci_table_alloc(int nid) return tbl; } -long pnv_pci_link_table_and_group(int node, int num, - struct iommu_table *tbl, - struct iommu_table_group *table_group) -{ - struct iommu_table_group_link *tgl = NULL; - - if (WARN_ON(!tbl || !table_group)) - return -EINVAL; - - tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, - node); - if (!tgl) - return -ENOMEM; - - tgl->table_group = table_group; - list_add_rcu(&tgl->next, &tbl->it_group_list); - - table_group->tables[num] = tbl; - - return 0; -} - -static void pnv_iommu_table_group_link_free(struct rcu_head *head) -{ - struct iommu_table_group_link *tgl = container_of(head, - struct iommu_table_group_link, rcu); - - kfree(tgl); -} - -void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, - struct iommu_table_group *table_group) -{ - long i; - bool found; - struct iommu_table_group_link *tgl; - - if (!tbl || !table_group) - return; - - /* Remove link to a group from table's list of attached groups */ - found = false; - list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { - if (tgl->table_group == table_group) { - list_del_rcu(&tgl->next); - call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free); - found = true; - break; - } - } - if (WARN_ON(!found)) - return; - - /* Clean a pointer to iommu_table in iommu_table_group::tables[] */ - found = false; - for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { - if (table_group->tables[i] == tbl) { - table_group->tables[i] = NULL; - found = true; - break; - } - } - WARN_ON(!found); -} - -void pnv_pci_setup_iommu_table(struct iommu_table *tbl, - void *tce_mem, u64 tce_size, - u64 dma_offset, unsigned page_shift) -{ - tbl->it_blocksize = 16; - tbl->it_base = (unsigned long)tce_mem; - tbl->it_page_shift = page_shift; - tbl->it_offset = dma_offset >> tbl->it_page_shift; - tbl->it_index = 0; - tbl->it_size = tce_size >> 3; - tbl->it_busno = 0; - tbl->it_type = TCE_PCI; -} - void pnv_pci_dma_dev_setup(struct pci_dev *pdev) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index eada4b6068cb..8b37b28e3831 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -88,7 +88,6 @@ struct pnv_ioda_pe { }; #define PNV_PHB_FLAG_EEH (1 << 0) -#define PNV_PHB_FLAG_CXL (1 << 1) /* Real PHB supporting the cxl kernel API */ struct pnv_phb { struct pci_controller *hose; @@ -194,20 +193,10 @@ struct pnv_phb { bool nmmu_flush; } npu; -#ifdef CONFIG_CXL_BASE - struct cxl_afu *cxl_afu; -#endif int p2p_target_count; }; extern struct pci_ops pnv_pci_ops; -extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages, - unsigned long uaddr, enum dma_data_direction direction, - unsigned long attrs); -extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages); -extern int pnv_tce_xchg(struct iommu_table *tbl, long index, - unsigned long *hpa, enum dma_data_direction *direction); -extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index); void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, unsigned char *log_buff); @@ -217,14 +206,6 @@ int pnv_pci_cfg_write(struct pci_dn *pdn, int where, int size, u32 val); extern struct iommu_table *pnv_pci_table_alloc(int nid); -extern long pnv_pci_link_table_and_group(int node, int num, - struct iommu_table *tbl, - struct iommu_table_group *table_group); -extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, - struct iommu_table_group *table_group); -extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, - void *tce_mem, u64 tce_size, - u64 dma_offset, unsigned page_shift); extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); extern void pnv_pci_init_npu_phb(struct device_node *np); @@ -238,7 +219,6 @@ extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev); extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq); -extern bool pnv_pci_enable_device_hook(struct pci_dev *dev); extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); extern int pnv_eeh_post_init(void); @@ -262,14 +242,33 @@ extern void pnv_npu_take_ownership(struct pnv_ioda_pe *npe); extern void pnv_npu_release_ownership(struct pnv_ioda_pe *npe); extern int pnv_npu2_init(struct pnv_phb *phb); -/* cxl functions */ -extern bool pnv_cxl_enable_device_hook(struct pci_dev *dev); -extern void pnv_cxl_disable_device(struct pci_dev *dev); -extern int pnv_cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); -extern void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev); +/* pci-ioda-tce.c */ +#define POWERNV_IOMMU_DEFAULT_LEVELS 1 +#define POWERNV_IOMMU_MAX_LEVELS 5 +extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, enum dma_data_direction direction, + unsigned long attrs); +extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages); +extern int pnv_tce_xchg(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction, + bool alloc); +extern __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, + bool alloc); +extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index); + +extern long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, + __u32 page_shift, __u64 window_size, __u32 levels, + bool alloc_userspace_copy, struct iommu_table *tbl); +extern void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl); -/* phb ops (cxl switches these when enabling the kernel api on the phb) */ -extern const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops; +extern long pnv_pci_link_table_and_group(int node, int num, + struct iommu_table *tbl, + struct iommu_table_group *table_group); +extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, + struct iommu_table_group *table_group); +extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, + void *tce_mem, u64 tce_size, + u64 dma_offset, unsigned int page_shift); #endif /* __POWERNV_PCI_H */ diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index f96df0a25d05..adddde023622 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -78,6 +78,12 @@ static void init_fw_feat_flags(struct device_node *np) if (fw_feature_is("enabled", "fw-count-cache-disabled", np)) security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED); + if (fw_feature_is("enabled", "fw-count-cache-flush-bcctr2,0,0", np)) + security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST); + + if (fw_feature_is("enabled", "needs-count-cache-flush-on-context-switch", np)) + security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE); + /* * The features below are enabled by default, so we instead look to see * if firmware has *disabled* them, and clear them if so. @@ -124,7 +130,7 @@ static void pnv_setup_rfi_flush(void) security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); setup_rfi_flush(type, enable); - setup_barrier_nospec(); + setup_count_cache_flush(); } static void __init pnv_setup_arch(void) @@ -314,7 +320,7 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) u64 reinit_flags; if (xive_enabled()) - xive_kexec_teardown_cpu(secondary); + xive_teardown_cpu(); else xics_kexec_teardown_cpu(secondary); diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index b80909957792..0d354e19ef92 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -283,23 +283,6 @@ static void pnv_cause_ipi(int cpu) ic_cause_ipi(cpu); } -static void pnv_p9_dd1_cause_ipi(int cpu) -{ - int this_cpu = get_cpu(); - - /* - * POWER9 DD1 has a global addressed msgsnd, but for now we restrict - * IPIs to same core, because it requires additional synchronization - * for inter-core doorbells which we do not implement. - */ - if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) - doorbell_global_ipi(cpu); - else - ic_cause_ipi(cpu); - - put_cpu(); -} - static void __init pnv_smp_probe(void) { if (xive_enabled()) @@ -311,14 +294,10 @@ static void __init pnv_smp_probe(void) ic_cause_ipi = smp_ops->cause_ipi; WARN_ON(!ic_cause_ipi); - if (cpu_has_feature(CPU_FTR_ARCH_300)) { - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) - smp_ops->cause_ipi = pnv_p9_dd1_cause_ipi; - else - smp_ops->cause_ipi = doorbell_global_ipi; - } else { + if (cpu_has_feature(CPU_FTR_ARCH_300)) + smp_ops->cause_ipi = doorbell_global_ipi; + else smp_ops->cause_ipi = pnv_cause_ipi; - } } } diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index ae0100fd35bb..f5493dbdd7ff 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -15,6 +15,7 @@ #include <linux/io.h> #include <linux/dcache.h> #include <linux/mutex.h> +#include <linux/stringify.h> /* * Overview of Virtual Accelerator Switchboard (VAS). diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 13eede60c24d..7e89d5c47068 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -25,6 +25,6 @@ obj-$(CONFIG_LPARCFG) += lparcfg.o obj-$(CONFIG_IBMVIO) += vio.o obj-$(CONFIG_IBMEBUS) += ibmebus.o -ifeq ($(CONFIG_PPC_PSERIES),y) +ifdef CONFIG_PPC_PSERIES obj-$(CONFIG_SUSPEND) += suspend.o endif diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index c511a1743a44..d91412c591ef 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -13,6 +13,7 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ptrace.h> +#include <asm/feature-fixups.h> .section ".text" diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c index 46fbaef69a59..23f54223ed56 100644 --- a/arch/powerpc/platforms/pseries/kexec.c +++ b/arch/powerpc/platforms/pseries/kexec.c @@ -58,7 +58,7 @@ void pseries_kexec_cpu_down(int crash_shutdown, int secondary) } if (xive_enabled()) { - xive_kexec_teardown_cpu(secondary); + xive_teardown_cpu(); if (!secondary) xive_shutdown(); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 5a392e40f3d2..d3992ced0782 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -21,6 +21,7 @@ /* Enables debugging of low-level hash table routines - careful! */ #undef DEBUG +#define pr_fmt(fmt) "lpar: " fmt #include <linux/kernel.h> #include <linux/dma-mapping.h> @@ -36,7 +37,6 @@ #include <asm/machdep.h> #include <asm/mmu_context.h> #include <asm/iommu.h> -#include <asm/tlbflush.h> #include <asm/tlb.h> #include <asm/prom.h> #include <asm/cputable.h> @@ -165,8 +165,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot); if (unlikely(lpar_rc == H_PTEG_FULL)) { - if (!(vflags & HPTE_V_BOLTED)) - pr_devel(" full\n"); + pr_devel("Hash table group is full\n"); return -1; } @@ -176,8 +175,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, * or we will loop forever, so return -2 in this case. */ if (unlikely(lpar_rc != H_SUCCESS)) { - if (!(vflags & HPTE_V_BOLTED)) - pr_devel(" lpar err %ld\n", lpar_rc); + pr_err("Failed hash pte insert with error %ld\n", lpar_rc); return -2; } if (!(vflags & HPTE_V_BOLTED)) @@ -240,8 +238,11 @@ static void manual_hpte_clear_all(void) */ for (i = 0; i < hpte_count; i += 4) { lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes); - if (lpar_rc != H_SUCCESS) + if (lpar_rc != H_SUCCESS) { + pr_info("Failed to read hash page table at %ld err %ld\n", + i, lpar_rc); continue; + } for (j = 0; j < 4; j++){ if ((ptes[j].pteh & HPTE_V_VRMA_MASK) == HPTE_V_VRMA_MASK) @@ -340,8 +341,11 @@ static long __pSeries_lpar_hpte_find(unsigned long want_v, unsigned long hpte_gr for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) { lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes); - if (lpar_rc != H_SUCCESS) + if (lpar_rc != H_SUCCESS) { + pr_info("Failed to read hash page table at %ld err %ld\n", + hpte_group, lpar_rc); continue; + } for (j = 0; j < 4; j++) { if (HPTE_V_COMPARE(ptes[j].pteh, want_v) && @@ -612,8 +616,8 @@ static int __init disable_bulk_remove(char *str) { if (strcmp(str, "off") == 0 && firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { - printk(KERN_INFO "Disabling BULK_REMOVE firmware feature"); - powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE; + pr_info("Disabling BULK_REMOVE firmware feature"); + powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE; } return 1; } @@ -659,8 +663,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift) if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE)) return -ENODEV; - printk(KERN_INFO "lpar: Attempting to resize HPT to shift %lu\n", - shift); + pr_info("Attempting to resize HPT to shift %lu\n", shift); t0 = ktime_get(); @@ -672,8 +675,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift) /* prepare with shift==0 cancels an in-progress resize */ rc = plpar_resize_hpt_prepare(0, 0); if (rc != H_SUCCESS) - printk(KERN_WARNING - "lpar: Unexpected error %d cancelling timed out HPT resize\n", + pr_warn("Unexpected error %d cancelling timed out HPT resize\n", rc); return -ETIMEDOUT; } @@ -691,9 +693,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift) case H_RESOURCE: return -EPERM; default: - printk(KERN_WARNING - "lpar: Unexpected error %d from H_RESIZE_HPT_PREPARE\n", - rc); + pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc); return -EIO; } @@ -706,22 +706,19 @@ static int pseries_lpar_resize_hpt(unsigned long shift) if (rc != 0) { switch (state.commit_rc) { case H_PTEG_FULL: - printk(KERN_WARNING - "lpar: Hash collision while resizing HPT\n"); + pr_warn("Hash collision while resizing HPT\n"); return -ENOSPC; default: - printk(KERN_WARNING - "lpar: Unexpected error %d from H_RESIZE_HPT_COMMIT\n", - state.commit_rc); + pr_warn("Unexpected error %d from H_RESIZE_HPT_COMMIT\n", + state.commit_rc); return -EIO; }; } - printk(KERN_INFO - "lpar: HPT resize to shift %lu complete (%lld ms / %lld ms)\n", - shift, (long long) ktime_ms_delta(t1, t0), - (long long) ktime_ms_delta(t2, t1)); + pr_info("HPT resize to shift %lu complete (%lld ms / %lld ms)\n", + shift, (long long) ktime_ms_delta(t1, t0), + (long long) ktime_ms_delta(t2, t1)); return 0; } @@ -785,13 +782,13 @@ static int __init cmo_free_hint(char *str) parm = strstrip(str); if (strcasecmp(parm, "no") == 0 || strcasecmp(parm, "off") == 0) { - printk(KERN_INFO "cmo_free_hint: CMO free page hinting is not active.\n"); + pr_info("%s: CMO free page hinting is not active.\n", __func__); cmo_free_hint_flag = 0; return 1; } cmo_free_hint_flag = 1; - printk(KERN_INFO "cmo_free_hint: CMO free page hinting is active.\n"); + pr_info("%s: CMO free page hinting is active.\n", __func__); if (strcasecmp(parm, "yes") == 0 || strcasecmp(parm, "on") == 0) return 1; diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 8a8033a249c7..f0e30dc94988 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -17,6 +17,7 @@ #include <linux/device.h> #include <linux/delay.h> #include <linux/slab.h> +#include <linux/stringify.h> #include <asm/machdep.h> #include <asm/rtas.h> diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 5e1ef9150182..851ce326874a 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -22,6 +22,7 @@ #include <linux/of.h> #include <linux/fs.h> #include <linux/reboot.h> +#include <linux/irq_work.h> #include <asm/machdep.h> #include <asm/rtas.h> @@ -32,11 +33,13 @@ static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX]; static DEFINE_SPINLOCK(ras_log_buf_lock); -static char global_mce_data_buf[RTAS_ERROR_LOG_MAX]; -static DEFINE_PER_CPU(__u64, mce_data_buf); - static int ras_check_exception_token; +static void mce_process_errlog_event(struct irq_work *work); +static struct irq_work mce_errlog_process_work = { + .func = mce_process_errlog_event, +}; + #define EPOW_SENSOR_TOKEN 9 #define EPOW_SENSOR_INDEX 0 @@ -330,16 +333,20 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) ((((A) >= 0x7000) && ((A) < 0x7ff0)) || \ (((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16)))) +static inline struct rtas_error_log *fwnmi_get_errlog(void) +{ + return (struct rtas_error_log *)local_paca->mce_data_buf; +} + /* * Get the error information for errors coming through the * FWNMI vectors. The pt_regs' r3 will be updated to reflect * the actual r3 if possible, and a ptr to the error log entry * will be returned if found. * - * If the RTAS error is not of the extended type, then we put it in a per - * cpu 64bit buffer. If it is the extended type we use global_mce_data_buf. + * Use one buffer mce_data_buf per cpu to store RTAS error. * - * The global_mce_data_buf does not have any locks or protection around it, + * The mce_data_buf does not have any locks or protection around it, * if a second machine check comes in, or a system reset is done * before we have logged the error, then we will get corruption in the * error log. This is preferable over holding off on calling @@ -349,7 +356,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) { unsigned long *savep; - struct rtas_error_log *h, *errhdr = NULL; + struct rtas_error_log *h; /* Mask top two bits */ regs->gpr[3] &= ~(0x3UL << 62); @@ -360,24 +367,22 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) } savep = __va(regs->gpr[3]); - regs->gpr[3] = savep[0]; /* restore original r3 */ + regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ - /* If it isn't an extended log we can use the per cpu 64bit buffer */ h = (struct rtas_error_log *)&savep[1]; + /* Use the per cpu buffer from paca to store rtas error log */ + memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX); if (!rtas_error_extended(h)) { - memcpy(this_cpu_ptr(&mce_data_buf), h, sizeof(__u64)); - errhdr = (struct rtas_error_log *)this_cpu_ptr(&mce_data_buf); + memcpy(local_paca->mce_data_buf, h, sizeof(__u64)); } else { int len, error_log_length; error_log_length = 8 + rtas_error_extended_log_length(h); - len = max_t(int, error_log_length, RTAS_ERROR_LOG_MAX); - memset(global_mce_data_buf, 0, RTAS_ERROR_LOG_MAX); - memcpy(global_mce_data_buf, h, len); - errhdr = (struct rtas_error_log *)global_mce_data_buf; + len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX); + memcpy(local_paca->mce_data_buf, h, len); } - return errhdr; + return (struct rtas_error_log *)local_paca->mce_data_buf; } /* Call this when done with the data returned by FWNMI_get_errinfo. @@ -423,6 +428,17 @@ int pSeries_system_reset_exception(struct pt_regs *regs) } /* + * Process MCE rtas errlog event. + */ +static void mce_process_errlog_event(struct irq_work *work) +{ + struct rtas_error_log *err; + + err = fwnmi_get_errlog(); + log_error((char *)err, ERR_TYPE_RTAS_LOG, 0); +} + +/* * See if we can recover from a machine check exception. * This is only called on power4 (or above) and only via * the Firmware Non-Maskable Interrupts (fwnmi) handler @@ -466,7 +482,8 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err) recovered = 1; } - log_error((char *)err, ERR_TYPE_RTAS_LOG, 0); + /* Queue irq work to log this rtas event later. */ + irq_work_queue(&mce_errlog_process_work); return recovered; } diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 139f0af6c3d9..ba1791fd3234 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -41,6 +41,7 @@ #include <linux/root_dev.h> #include <linux/of.h> #include <linux/of_pci.h> +#include <linux/memblock.h> #include <asm/mmu.h> #include <asm/processor.h> @@ -69,8 +70,10 @@ #include <asm/kexec.h> #include <asm/isa-bridge.h> #include <asm/security_features.h> +#include <asm/asm-const.h> #include "pseries.h" +#include "../../../../drivers/pci/pci.h" int CMO_PrPSP = -1; int CMO_SecPSP = -1; @@ -101,6 +104,9 @@ static void pSeries_show_cpuinfo(struct seq_file *m) static void __init fwnmi_init(void) { unsigned long system_reset_addr, machine_check_addr; + u8 *mce_data_buf; + unsigned int i; + int nr_cpus = num_possible_cpus(); int ibm_nmi_register = rtas_token("ibm,nmi-register"); if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE) @@ -114,6 +120,18 @@ static void __init fwnmi_init(void) if (0 == rtas_call(ibm_nmi_register, 2, 1, NULL, system_reset_addr, machine_check_addr)) fwnmi_active = 1; + + /* + * Allocate a chunk for per cpu buffer to hold rtas errorlog. + * It will be used in real mode mce handler, hence it needs to be + * below RMA. + */ + mce_data_buf = __va(memblock_alloc_base(RTAS_ERROR_LOG_MAX * nr_cpus, + RTAS_ERROR_LOG_MAX, ppc64_rma_size)); + for_each_possible_cpu(i) { + paca_ptrs[i]->mce_data_buf = mce_data_buf + + (RTAS_ERROR_LOG_MAX * i); + } } static void pseries_8259_cascade(struct irq_desc *desc) @@ -484,6 +502,12 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED) security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED); + if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST) + security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST); + + if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE) + security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE); + /* * The features below are enabled by default, so we instead look to see * if firmware has *disabled* them, and clear them if so. @@ -534,7 +558,7 @@ void pseries_setup_rfi_flush(void) security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR); setup_rfi_flush(types, enable); - setup_barrier_nospec(); + setup_count_cache_flush(); } #ifdef CONFIG_PCI_IOV @@ -646,6 +670,15 @@ void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes) } } +static void pseries_disable_sriov_resources(struct pci_dev *pdev) +{ + int i; + + pci_warn(pdev, "No hypervisor support for SR-IOV on this device, IOV BARs disabled.\n"); + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) + pdev->resource[i + PCI_IOV_RESOURCES].flags = 0; +} + static void pseries_pci_fixup_resources(struct pci_dev *pdev) { const int *indexes; @@ -653,10 +686,10 @@ static void pseries_pci_fixup_resources(struct pci_dev *pdev) /*Firmware must support open sriov otherwise dont configure*/ indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); - if (!indexes) - return; - /* Assign the addresses from device tree*/ - of_pci_set_vf_bar_size(pdev, indexes); + if (indexes) + of_pci_set_vf_bar_size(pdev, indexes); + else + pseries_disable_sriov_resources(pdev); } static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev) @@ -664,14 +697,14 @@ static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev) const int *indexes; struct device_node *dn = pci_device_to_OF_node(pdev); - if (!pdev->is_physfn || pdev->is_added) + if (!pdev->is_physfn || pci_dev_is_added(pdev)) return; /*Firmware must support open sriov otherwise dont configure*/ indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); - if (!indexes) - return; - /* Assign the addresses from device tree*/ - of_pci_parse_iov_addrs(pdev, indexes); + if (indexes) + of_pci_parse_iov_addrs(pdev, indexes); + else + pseries_disable_sriov_resources(pdev); } static resource_size_t pseries_pci_iov_resource_alignment(struct pci_dev *pdev, |