From 202648a6070b69d60c6d0926ff06c8863e231468 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 27 Apr 2015 21:48:47 +0900 Subject: powerpc: Constify irq_domain_ops The irq_domain_ops are not modified by the driver and the irqdomain core code accepts pointer to a const data. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/i8259.c | 2 +- arch/powerpc/sysdev/ipic.c | 2 +- arch/powerpc/sysdev/mpc8xx_pic.c | 2 +- arch/powerpc/sysdev/mpic.c | 2 +- arch/powerpc/sysdev/mv64x60_pic.c | 2 +- arch/powerpc/sysdev/qe_lib/qe_ic.c | 2 +- arch/powerpc/sysdev/tsi108_pci.c | 2 +- arch/powerpc/sysdev/uic.c | 2 +- arch/powerpc/sysdev/xics/xics-common.c | 2 +- arch/powerpc/sysdev/xilinx_intc.c | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index 45598da0b321..31c33475c7b7 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -204,7 +204,7 @@ static int i8259_host_xlate(struct irq_domain *h, struct device_node *ct, return 0; } -static struct irq_domain_ops i8259_host_ops = { +static const struct irq_domain_ops i8259_host_ops = { .match = i8259_host_match, .map = i8259_host_map, .xlate = i8259_host_xlate, diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index b28733727ed3..d78f1364b639 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -691,7 +691,7 @@ static int ipic_host_map(struct irq_domain *h, unsigned int virq, return 0; } -static struct irq_domain_ops ipic_host_ops = { +static const struct irq_domain_ops ipic_host_ops = { .match = ipic_host_match, .map = ipic_host_map, .xlate = irq_domain_xlate_onetwocell, diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/sysdev/mpc8xx_pic.c index c4828c0be5bd..d93a78be4346 100644 --- a/arch/powerpc/sysdev/mpc8xx_pic.c +++ b/arch/powerpc/sysdev/mpc8xx_pic.c @@ -120,7 +120,7 @@ static int mpc8xx_pic_host_xlate(struct irq_domain *h, struct device_node *ct, } -static struct irq_domain_ops mpc8xx_pic_host_ops = { +static const struct irq_domain_ops mpc8xx_pic_host_ops = { .map = mpc8xx_pic_host_map, .xlate = mpc8xx_pic_host_xlate, }; diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index b2b8447a227a..c8e73332eaad 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1195,7 +1195,7 @@ static void mpic_cascade(unsigned int irq, struct irq_desc *desc) chip->irq_eoi(&desc->irq_data); } -static struct irq_domain_ops mpic_host_ops = { +static const struct irq_domain_ops mpic_host_ops = { .match = mpic_host_match, .map = mpic_host_map, .xlate = mpic_host_xlate, diff --git a/arch/powerpc/sysdev/mv64x60_pic.c b/arch/powerpc/sysdev/mv64x60_pic.c index 8848e99a83f2..0f842dd16bcd 100644 --- a/arch/powerpc/sysdev/mv64x60_pic.c +++ b/arch/powerpc/sysdev/mv64x60_pic.c @@ -223,7 +223,7 @@ static int mv64x60_host_map(struct irq_domain *h, unsigned int virq, return 0; } -static struct irq_domain_ops mv64x60_host_ops = { +static const struct irq_domain_ops mv64x60_host_ops = { .map = mv64x60_host_map, }; diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c index 543765e1ef14..6512cd8caa51 100644 --- a/arch/powerpc/sysdev/qe_lib/qe_ic.c +++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c @@ -271,7 +271,7 @@ static int qe_ic_host_map(struct irq_domain *h, unsigned int virq, return 0; } -static struct irq_domain_ops qe_ic_host_ops = { +static const struct irq_domain_ops qe_ic_host_ops = { .match = qe_ic_host_match, .map = qe_ic_host_map, .xlate = irq_domain_xlate_onetwocell, diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c index 188012c58f7f..57b54476e747 100644 --- a/arch/powerpc/sysdev/tsi108_pci.c +++ b/arch/powerpc/sysdev/tsi108_pci.c @@ -397,7 +397,7 @@ static int pci_irq_host_map(struct irq_domain *h, unsigned int virq, return 0; } -static struct irq_domain_ops pci_irq_domain_ops = { +static const struct irq_domain_ops pci_irq_domain_ops = { .map = pci_irq_host_map, .xlate = pci_irq_host_xlate, }; diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c index 7c37157d4c24..1cd057f11725 100644 --- a/arch/powerpc/sysdev/uic.c +++ b/arch/powerpc/sysdev/uic.c @@ -189,7 +189,7 @@ static int uic_host_map(struct irq_domain *h, unsigned int virq, return 0; } -static struct irq_domain_ops uic_host_ops = { +static const struct irq_domain_ops uic_host_ops = { .map = uic_host_map, .xlate = irq_domain_xlate_twocell, }; diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 878a54036a25..5bc5889d4acc 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -360,7 +360,7 @@ static int xics_host_xlate(struct irq_domain *h, struct device_node *ct, return 0; } -static struct irq_domain_ops xics_host_ops = { +static const struct irq_domain_ops xics_host_ops = { .match = xics_host_match, .map = xics_host_map, .xlate = xics_host_xlate, diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c index 56f0524e47a6..43b8b275bc5c 100644 --- a/arch/powerpc/sysdev/xilinx_intc.c +++ b/arch/powerpc/sysdev/xilinx_intc.c @@ -179,7 +179,7 @@ static int xilinx_intc_map(struct irq_domain *h, unsigned int virq, return 0; } -static struct irq_domain_ops xilinx_intc_ops = { +static const struct irq_domain_ops xilinx_intc_ops = { .map = xilinx_intc_map, .xlate = xilinx_intc_xlate, }; -- cgit v1.2.3 From 2222ce0fbbcc4ebfa9995c8d23d72c8239ad712c Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Wed, 29 Apr 2015 20:44:58 -0500 Subject: powerpc/pseries: Fix possible leaked device node reference Failure return from dlpar_configure_connector when dlpar adding cpus results in leaking references to the cpus parent device node. Move the call to of_node_put() prior to checking the result of dlpar_configure_connector. Fixes: 8d5ff320766f ("powerpc/pseries: Make dlpar_configure_connector parent node aware") Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/dlpar.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 019d34aaf054..47d9cebe7159 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -421,11 +421,10 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count) return -ENODEV; dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent); + of_node_put(parent); if (!dn) return -EINVAL; - of_node_put(parent); - rc = dlpar_attach_node(dn); if (rc) { dlpar_release_drc(drc_index); -- cgit v1.2.3 From f1e7c202a98cb87cc650d99d014f87e6248ae530 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Mar 2015 20:11:56 +1100 Subject: powerpc: Make STRICT_MM_TYPECHECKS a config option The STRICT_MM_TYPECHECKS code has bit-rotted over the years. To make it possible to easily build test it, make it a CONFIG option. Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig.debug | 8 ++++++++ arch/powerpc/include/asm/page.h | 4 +--- arch/powerpc/include/asm/pgtable-ppc64.h | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 0efa8f90a8f1..3a510f4a6b68 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -19,6 +19,14 @@ config PPC_WERROR depends on !PPC_DISABLE_WERROR default y +config STRICT_MM_TYPECHECKS + bool "Do extra type checking on mm types" + default n + help + This option turns on extra type checking for some mm related types. + + If you don't know what this means, say N. + config PRINT_STACK_DEPTH int "Stack depth to print" if DEBUG_KERNEL default 64 diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 69c059887a2c..71294a6e976e 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -278,9 +278,7 @@ extern long long virt_phys_offset; #ifndef __ASSEMBLY__ -#undef STRICT_MM_TYPECHECKS - -#ifdef STRICT_MM_TYPECHECKS +#ifdef CONFIG_STRICT_MM_TYPECHECKS /* These are used to make use of C type-checking. */ /* PTE level */ diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 43e6ad424c7f..f951d9cf358a 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -118,7 +118,7 @@ */ #ifndef __real_pte -#ifdef STRICT_MM_TYPECHECKS +#ifdef CONFIG_STRICT_MM_TYPECHECKS #define __real_pte(e,p) ((real_pte_t){(e)}) #define __rpte_to_pte(r) ((r).pte) #else -- cgit v1.2.3 From 5af7a6f3e2d015dcaaeffa48c6d47238415cbe66 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 10 Apr 2015 11:52:06 +1000 Subject: powerpc/pasemi: Only the build the pasemi MSI code for PASEMI=y The pasemi MSI code is currently always built when MPIC=y && PCI_MSI=y. It should not have any effect on other platforms, because it immediately checks the MPIC's compatible property for "pasemi,pwrficient-openpic". However it's odd that it's still built even when PASEMI=n. It also needn't be in sysdev, as it's only used by pasemi. So move it into platforms/pasemi, whereby it will only be built for PASEMI=y. Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pasemi/Makefile | 1 + arch/powerpc/platforms/pasemi/msi.c | 165 ++++++++++++++++++++++++++++++++ arch/powerpc/sysdev/Makefile | 2 +- arch/powerpc/sysdev/mpic.h | 10 +- arch/powerpc/sysdev/mpic_pasemi_msi.c | 167 --------------------------------- 5 files changed, 172 insertions(+), 173 deletions(-) create mode 100644 arch/powerpc/platforms/pasemi/msi.c delete mode 100644 arch/powerpc/sysdev/mpic_pasemi_msi.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/pasemi/Makefile b/arch/powerpc/platforms/pasemi/Makefile index 8e8d4cae5ebe..60b4e0fd9808 100644 --- a/arch/powerpc/platforms/pasemi/Makefile +++ b/arch/powerpc/platforms/pasemi/Makefile @@ -1,2 +1,3 @@ obj-y += setup.o pci.o time.o idle.o powersave.o iommu.o dma_lib.o misc.o obj-$(CONFIG_PPC_PASEMI_MDIO) += gpio_mdio.o +obj-$(CONFIG_PCI_MSI) += msi.o diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c new file mode 100644 index 000000000000..0b3706604543 --- /dev/null +++ b/arch/powerpc/platforms/pasemi/msi.c @@ -0,0 +1,165 @@ +/* + * Copyright 2007, Olof Johansson, PA Semi + * + * Based on arch/powerpc/sysdev/mpic_u3msi.c: + * + * Copyright 2006, Segher Boessenkool, IBM Corporation. + * Copyright 2006-2007, Michael Ellerman, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 of the + * License. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +/* Allocate 16 interrupts per device, to give an alignment of 16, + * since that's the size of the grouping w.r.t. affinity. If someone + * needs more than 32 MSI's down the road we'll have to rethink this, + * but it should be OK for now. + */ +#define ALLOC_CHUNK 16 + +#define PASEMI_MSI_ADDR 0xfc080000 + +/* A bit ugly, can we get this from the pci_dev somehow? */ +static struct mpic *msi_mpic; + + +static void mpic_pasemi_msi_mask_irq(struct irq_data *data) +{ + pr_debug("mpic_pasemi_msi_mask_irq %d\n", data->irq); + pci_msi_mask_irq(data); + mpic_mask_irq(data); +} + +static void mpic_pasemi_msi_unmask_irq(struct irq_data *data) +{ + pr_debug("mpic_pasemi_msi_unmask_irq %d\n", data->irq); + mpic_unmask_irq(data); + pci_msi_unmask_irq(data); +} + +static struct irq_chip mpic_pasemi_msi_chip = { + .irq_shutdown = mpic_pasemi_msi_mask_irq, + .irq_mask = mpic_pasemi_msi_mask_irq, + .irq_unmask = mpic_pasemi_msi_unmask_irq, + .irq_eoi = mpic_end_irq, + .irq_set_type = mpic_set_irq_type, + .irq_set_affinity = mpic_set_affinity, + .name = "PASEMI-MSI", +}; + +static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) +{ + struct msi_desc *entry; + + pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev); + + list_for_each_entry(entry, &pdev->msi_list, list) { + if (entry->irq == NO_IRQ) + continue; + + irq_set_msi_desc(entry->irq, NULL); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, + virq_to_hw(entry->irq), ALLOC_CHUNK); + irq_dispose_mapping(entry->irq); + } + + return; +} + +static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + unsigned int virq; + struct msi_desc *entry; + struct msi_msg msg; + int hwirq; + + if (type == PCI_CAP_ID_MSIX) + pr_debug("pasemi_msi: MSI-X untested, trying anyway\n"); + pr_debug("pasemi_msi_setup_msi_irqs, pdev %p nvec %d type %d\n", + pdev, nvec, type); + + msg.address_hi = 0; + msg.address_lo = PASEMI_MSI_ADDR; + + list_for_each_entry(entry, &pdev->msi_list, list) { + /* Allocate 16 interrupts for now, since that's the grouping for + * affinity. This can be changed later if it turns out 32 is too + * few MSIs for someone, but restrictions will apply to how the + * sources can be changed independently. + */ + hwirq = msi_bitmap_alloc_hwirqs(&msi_mpic->msi_bitmap, + ALLOC_CHUNK); + if (hwirq < 0) { + pr_debug("pasemi_msi: failed allocating hwirq\n"); + return hwirq; + } + + virq = irq_create_mapping(msi_mpic->irqhost, hwirq); + if (virq == NO_IRQ) { + pr_debug("pasemi_msi: failed mapping hwirq 0x%x\n", + hwirq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, + ALLOC_CHUNK); + return -ENOSPC; + } + + /* Vector on MSI is really an offset, the hardware adds + * it to the value written at the magic address. So set + * it to 0 to remain sane. + */ + mpic_set_vector(virq, 0); + + irq_set_msi_desc(virq, entry); + irq_set_chip(virq, &mpic_pasemi_msi_chip); + irq_set_irq_type(virq, IRQ_TYPE_EDGE_RISING); + + pr_debug("pasemi_msi: allocated virq 0x%x (hw 0x%x) " \ + "addr 0x%x\n", virq, hwirq, msg.address_lo); + + /* Likewise, the device writes [0...511] into the target + * register to generate MSI [512...1023] + */ + msg.data = hwirq-0x200; + pci_write_msi_msg(virq, &msg); + } + + return 0; +} + +int mpic_pasemi_msi_init(struct mpic *mpic) +{ + int rc; + + if (!mpic->irqhost->of_node || + !of_device_is_compatible(mpic->irqhost->of_node, + "pasemi,pwrficient-openpic")) + return -ENODEV; + + rc = mpic_msi_init_allocator(mpic); + if (rc) { + pr_debug("pasemi_msi: Error allocating bitmap!\n"); + return rc; + } + + pr_debug("pasemi_msi: Registering PA Semi MPIC MSI callbacks\n"); + + msi_mpic = mpic; + WARN_ON(ppc_md.setup_msi_irqs); + ppc_md.setup_msi_irqs = pasemi_msi_setup_msi_irqs; + ppc_md.teardown_msi_irqs = pasemi_msi_teardown_msi_irqs; + + return 0; +} diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index f7cb2a1b01fa..5b492a6438ff 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -2,7 +2,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -mpic-msi-obj-$(CONFIG_PCI_MSI) += mpic_msi.o mpic_u3msi.o mpic_pasemi_msi.o +mpic-msi-obj-$(CONFIG_PCI_MSI) += mpic_msi.o mpic_u3msi.o obj-$(CONFIG_MPIC) += mpic.o $(mpic-msi-obj-y) obj-$(CONFIG_MPIC_TIMER) += mpic_timer.o obj-$(CONFIG_FSL_MPIC_TIMER_WAKEUP) += fsl_mpic_timer_wakeup.o diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h index 24bf07a63924..32971a41853b 100644 --- a/arch/powerpc/sysdev/mpic.h +++ b/arch/powerpc/sysdev/mpic.h @@ -15,7 +15,6 @@ extern void mpic_msi_reserve_hwirq(struct mpic *mpic, irq_hw_number_t hwirq); extern int mpic_msi_init_allocator(struct mpic *mpic); extern int mpic_u3msi_init(struct mpic *mpic); -extern int mpic_pasemi_msi_init(struct mpic *mpic); #else static inline void mpic_msi_reserve_hwirq(struct mpic *mpic, irq_hw_number_t hwirq) @@ -27,11 +26,12 @@ static inline int mpic_u3msi_init(struct mpic *mpic) { return -1; } +#endif -static inline int mpic_pasemi_msi_init(struct mpic *mpic) -{ - return -1; -} +#if defined(CONFIG_PCI_MSI) && defined(CONFIG_PPC_PASEMI) +int mpic_pasemi_msi_init(struct mpic *mpic); +#else +static inline int mpic_pasemi_msi_init(struct mpic *mpic) { return -1; } #endif extern int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type); diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c deleted file mode 100644 index a3f660eed6de..000000000000 --- a/arch/powerpc/sysdev/mpic_pasemi_msi.c +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright 2007, Olof Johansson, PA Semi - * - * Based on arch/powerpc/sysdev/mpic_u3msi.c: - * - * Copyright 2006, Segher Boessenkool, IBM Corporation. - * Copyright 2006-2007, Michael Ellerman, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 of the - * License. - * - */ - -#undef DEBUG - -#include -#include -#include -#include -#include -#include -#include - -#include "mpic.h" - -/* Allocate 16 interrupts per device, to give an alignment of 16, - * since that's the size of the grouping w.r.t. affinity. If someone - * needs more than 32 MSI's down the road we'll have to rethink this, - * but it should be OK for now. - */ -#define ALLOC_CHUNK 16 - -#define PASEMI_MSI_ADDR 0xfc080000 - -/* A bit ugly, can we get this from the pci_dev somehow? */ -static struct mpic *msi_mpic; - - -static void mpic_pasemi_msi_mask_irq(struct irq_data *data) -{ - pr_debug("mpic_pasemi_msi_mask_irq %d\n", data->irq); - pci_msi_mask_irq(data); - mpic_mask_irq(data); -} - -static void mpic_pasemi_msi_unmask_irq(struct irq_data *data) -{ - pr_debug("mpic_pasemi_msi_unmask_irq %d\n", data->irq); - mpic_unmask_irq(data); - pci_msi_unmask_irq(data); -} - -static struct irq_chip mpic_pasemi_msi_chip = { - .irq_shutdown = mpic_pasemi_msi_mask_irq, - .irq_mask = mpic_pasemi_msi_mask_irq, - .irq_unmask = mpic_pasemi_msi_unmask_irq, - .irq_eoi = mpic_end_irq, - .irq_set_type = mpic_set_irq_type, - .irq_set_affinity = mpic_set_affinity, - .name = "PASEMI-MSI", -}; - -static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) -{ - struct msi_desc *entry; - - pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev); - - list_for_each_entry(entry, &pdev->msi_list, list) { - if (entry->irq == NO_IRQ) - continue; - - irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), ALLOC_CHUNK); - irq_dispose_mapping(entry->irq); - } - - return; -} - -static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) -{ - unsigned int virq; - struct msi_desc *entry; - struct msi_msg msg; - int hwirq; - - if (type == PCI_CAP_ID_MSIX) - pr_debug("pasemi_msi: MSI-X untested, trying anyway\n"); - pr_debug("pasemi_msi_setup_msi_irqs, pdev %p nvec %d type %d\n", - pdev, nvec, type); - - msg.address_hi = 0; - msg.address_lo = PASEMI_MSI_ADDR; - - list_for_each_entry(entry, &pdev->msi_list, list) { - /* Allocate 16 interrupts for now, since that's the grouping for - * affinity. This can be changed later if it turns out 32 is too - * few MSIs for someone, but restrictions will apply to how the - * sources can be changed independently. - */ - hwirq = msi_bitmap_alloc_hwirqs(&msi_mpic->msi_bitmap, - ALLOC_CHUNK); - if (hwirq < 0) { - pr_debug("pasemi_msi: failed allocating hwirq\n"); - return hwirq; - } - - virq = irq_create_mapping(msi_mpic->irqhost, hwirq); - if (virq == NO_IRQ) { - pr_debug("pasemi_msi: failed mapping hwirq 0x%x\n", - hwirq); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, - ALLOC_CHUNK); - return -ENOSPC; - } - - /* Vector on MSI is really an offset, the hardware adds - * it to the value written at the magic address. So set - * it to 0 to remain sane. - */ - mpic_set_vector(virq, 0); - - irq_set_msi_desc(virq, entry); - irq_set_chip(virq, &mpic_pasemi_msi_chip); - irq_set_irq_type(virq, IRQ_TYPE_EDGE_RISING); - - pr_debug("pasemi_msi: allocated virq 0x%x (hw 0x%x) " \ - "addr 0x%x\n", virq, hwirq, msg.address_lo); - - /* Likewise, the device writes [0...511] into the target - * register to generate MSI [512...1023] - */ - msg.data = hwirq-0x200; - pci_write_msi_msg(virq, &msg); - } - - return 0; -} - -int mpic_pasemi_msi_init(struct mpic *mpic) -{ - int rc; - - if (!mpic->irqhost->of_node || - !of_device_is_compatible(mpic->irqhost->of_node, - "pasemi,pwrficient-openpic")) - return -ENODEV; - - rc = mpic_msi_init_allocator(mpic); - if (rc) { - pr_debug("pasemi_msi: Error allocating bitmap!\n"); - return rc; - } - - pr_debug("pasemi_msi: Registering PA Semi MPIC MSI callbacks\n"); - - msi_mpic = mpic; - WARN_ON(ppc_md.setup_msi_irqs); - ppc_md.setup_msi_irqs = pasemi_msi_setup_msi_irqs; - ppc_md.teardown_msi_irqs = pasemi_msi_teardown_msi_irqs; - - return 0; -} -- cgit v1.2.3 From 5c0aebf6e101e9ea6ddea7aaba13582c89206333 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 15 Apr 2015 12:17:07 +1000 Subject: powerpc: Show utsname->machine in boot-up banner Currently we print "Starting Linux PPC64" at boot. But we don't mention anywhere whether the kernel is big or little endian. If we print the utsname->machine value instead we get either "ppc64" or "ppc64le" which is much more informative, eg: Starting Linux ppc64le #1 SMP Wed Apr 15 12:12:20 AEST 2015 Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index c69671c03c3b..9fe3dcdbfca7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -523,7 +523,8 @@ void __init setup_system(void) smp_release_cpus(); #endif - pr_info("Starting Linux PPC64 %s\n", init_utsname()->version); + pr_info("Starting Linux %s %s\n", init_utsname()->machine, + init_utsname()->version); pr_info("-----------------------------------------------------\n"); pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); -- cgit v1.2.3 From e79c8385c878abdf90290cba6bea08fd60058562 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 21 Apr 2015 22:26:57 +1000 Subject: powerpc: Don't do gcc version checks if we're building with clang We have several checks for bad gcc versions in our Makefile. These don't apply if we're building with clang, so skip them in that case. The obvious check would be for ${COMPILER} = "gcc", but because of the way the logic in the top level Makefile conditionally sets COMPILER, it's possible that we're building with gcc but COMPILER was not set. So instead check for ${COMPILER} != "clang", which we know is currently the only other possibility. Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 07a480861f78..3609504b0ad2 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -314,7 +314,8 @@ TOUT := .tmp_gas_check # - Require gcc 4.0 or above on 64-bit # - gcc-4.2.0 has issues compiling modules on 64-bit checkbin: - @if test "$(cc-version)" = "0304" ; then \ + @if test "${COMPILER}" != "clang" \ + && test "$(cc-version)" = "0304" ; then \ if ! /bin/echo mftb 5 | $(AS) -v -mppc -many -o $(TOUT) >/dev/null 2>&1 ; then \ echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build '; \ echo 'correctly with gcc-3.4 and your version of binutils.'; \ @@ -322,13 +323,15 @@ checkbin: false; \ fi ; \ fi - @if test "$(cc-version)" -lt "0400" \ + @if test "${COMPILER}" != "clang" \ + && test "$(cc-version)" -lt "0400" \ && test "x${CONFIG_PPC64}" = "xy" ; then \ echo -n "Sorry, GCC v4.0 or above is required to build " ; \ echo "the 64-bit powerpc kernel." ; \ false ; \ fi - @if test "$(cc-fullversion)" = "040200" \ + @if test "${COMPILER}" != "clang" \ + && test "$(cc-fullversion)" = "040200" \ && test "x${CONFIG_MODULES}${CONFIG_PPC64}" = "xyy" ; then \ echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \ echo 'kernel with modules enabled.' ; \ -- cgit v1.2.3 From 60e065f70bdb0b0e916389024922ad40f3270c96 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 23 Apr 2015 17:27:12 +1000 Subject: powerpc: Reject binutils 2.24 when building little endian There is a bug in binutils 2.24 which causes miscompilation if we're building little endian and using weak symbols (which the kernel does). It is fixed in binutils commit 57fa7b8c7e59 "Correct elf_merge_st_other arguments for weak symbols", which is in binutils 2.25 and has been backported to the binutils 2.24 branch and has been picked up by most distros it seems. However if we're running stock 2.24 (no extra version) then the bug is present, so check for that and bail. Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 3609504b0ad2..a314cb024c8b 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -339,6 +339,14 @@ checkbin: echo 'disable kernel modules' ; \ false ; \ fi + @if test "x${CONFIG_CPU_LITTLE_ENDIAN}" = "xy" \ + && $(LD) --version | head -1 | grep ' 2\.24$$' >/dev/null ; then \ + echo -n '*** binutils 2.24 miscompiles weak symbols ' ; \ + echo 'in some circumstances.' ; \ + echo -n '*** Please use a different binutils version.' ; \ + false ; \ + fi + CLEAN_FILES += $(TOUT) -- cgit v1.2.3 From 63da88dd48268af415b3677e20f6bcb92c1bf32c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 22 Apr 2015 15:40:34 +1000 Subject: powerpc/vdso: Remove unused debug code It's in the git history if we ever need it back. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/vdso.c | 44 -------------------------------------------- 1 file changed, 44 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 305eb0d9b768..869fd0c53280 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -140,50 +140,6 @@ struct lib64_elfinfo }; -#ifdef __DEBUG -static void dump_one_vdso_page(struct page *pg, struct page *upg) -{ - printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT), - page_count(pg), - pg->flags); - if (upg && !IS_ERR(upg) /* && pg != upg*/) { - printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg) - << PAGE_SHIFT), - page_count(upg), - upg->flags); - } - printk("\n"); -} - -static void dump_vdso_pages(struct vm_area_struct * vma) -{ - int i; - - if (!vma || is_32bit_task()) { - printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase); - for (i=0; ivm_mm) ? - follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0) - : NULL; - dump_one_vdso_page(pg, upg); - } - } - if (!vma || !is_32bit_task()) { - printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase); - for (i=0; ivm_mm) ? - follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0) - : NULL; - dump_one_vdso_page(pg, upg); - } - } -} -#endif /* DEBUG */ - /* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree -- cgit v1.2.3 From 6e5c077519f2661c56647fc21b2ec2ba0a50bb75 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 22 Apr 2015 15:40:35 +1000 Subject: powerpc/vdso: Combine start/size variables In vdso_fixup_features() we have start64/start32 and size64/size32, but they have the same types, ie. void * and unsigned long. They're only used to save the return value from find_sectionXX() for the subsequent call to do_feature_fixups(), so there's no overlap in their usage either. So we can just consolidate them into start/size and avoid the duplication. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/vdso.c | 55 ++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 29 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 869fd0c53280..8331d0bef0fb 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -525,55 +525,52 @@ static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, static __init int vdso_fixup_features(struct lib32_elfinfo *v32, struct lib64_elfinfo *v64) { - void *start32; - unsigned long size32; + unsigned long size; + void *start; #ifdef CONFIG_PPC64 - void *start64; - unsigned long size64; - - start64 = find_section64(v64->hdr, "__ftr_fixup", &size64); - if (start64) + start = find_section64(v64->hdr, "__ftr_fixup", &size); + if (start) do_feature_fixups(cur_cpu_spec->cpu_features, - start64, start64 + size64); + start, start + size); - start64 = find_section64(v64->hdr, "__mmu_ftr_fixup", &size64); - if (start64) + start = find_section64(v64->hdr, "__mmu_ftr_fixup", &size); + if (start) do_feature_fixups(cur_cpu_spec->mmu_features, - start64, start64 + size64); + start, start + size); - start64 = find_section64(v64->hdr, "__fw_ftr_fixup", &size64); - if (start64) + start = find_section64(v64->hdr, "__fw_ftr_fixup", &size); + if (start) do_feature_fixups(powerpc_firmware_features, - start64, start64 + size64); + start, start + size); - start64 = find_section64(v64->hdr, "__lwsync_fixup", &size64); - if (start64) + start = find_section64(v64->hdr, "__lwsync_fixup", &size); + if (start) do_lwsync_fixups(cur_cpu_spec->cpu_features, - start64, start64 + size64); + start, start + size); #endif /* CONFIG_PPC64 */ - start32 = find_section32(v32->hdr, "__ftr_fixup", &size32); - if (start32) + start = find_section32(v32->hdr, "__ftr_fixup", &size); + if (start) do_feature_fixups(cur_cpu_spec->cpu_features, - start32, start32 + size32); + start, start + size); - start32 = find_section32(v32->hdr, "__mmu_ftr_fixup", &size32); - if (start32) + start = find_section32(v32->hdr, "__mmu_ftr_fixup", &size); + if (start) do_feature_fixups(cur_cpu_spec->mmu_features, - start32, start32 + size32); + start, start + size); #ifdef CONFIG_PPC64 - start32 = find_section32(v32->hdr, "__fw_ftr_fixup", &size32); - if (start32) + start = find_section32(v32->hdr, "__fw_ftr_fixup", &size); + if (start) do_feature_fixups(powerpc_firmware_features, - start32, start32 + size32); + start, start + size); #endif /* CONFIG_PPC64 */ - start32 = find_section32(v32->hdr, "__lwsync_fixup", &size32); - if (start32) + start = find_section32(v32->hdr, "__lwsync_fixup", &size); + if (start) do_lwsync_fixups(cur_cpu_spec->cpu_features, - start32, start32 + size32); + start, start + size); return 0; } -- cgit v1.2.3 From e0d0059169945c8ee16790d2e7244cea397dfd56 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 11 May 2015 20:01:02 +1000 Subject: powerpc/vdso: Disable building the 32-bit VDSO on little endian The only little endian configuration we support is ppc64le. As such if we're building little endian we don't need a 32-bit VDSO, because there is no 32-bit userspace. This patch is a fairly ugly mess of #ifdefs, but is the minimal logic required to disable the 32-bit VDSO. We can hopefully clean up the result in future with some further refactoring. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/Makefile | 3 ++- arch/powerpc/kernel/vdso.c | 36 ++++++++++++++++++++++++++++++++-- arch/powerpc/platforms/Kconfig.cputype | 10 ++++++++++ 3 files changed, 46 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index c1ebbdaac28f..87c7d1473488 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -33,11 +33,12 @@ obj-y := cputable.o ptrace.o syscalls.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o dma.o \ - misc_$(CONFIG_WORD_SIZE).o vdso32/ \ + misc_$(CONFIG_WORD_SIZE).o \ of_platform.o prom_parse.o obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ signal_64.o ptrace32.o \ paca.o nvram_64.o firmware.o +obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 8331d0bef0fb..b457bfa28436 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -49,13 +49,16 @@ /* The alignment of the vDSO */ #define VDSO_ALIGNMENT (1 << 16) -extern char vdso32_start, vdso32_end; -static void *vdso32_kbase = &vdso32_start; static unsigned int vdso32_pages; +static void *vdso32_kbase; static struct page **vdso32_pagelist; unsigned long vdso32_sigtramp; unsigned long vdso32_rt_sigtramp; +#ifdef CONFIG_VDSO32 +extern char vdso32_start, vdso32_end; +#endif + #ifdef CONFIG_PPC64 extern char vdso64_start, vdso64_end; static void *vdso64_kbase = &vdso64_start; @@ -248,6 +251,7 @@ const char *arch_vma_name(struct vm_area_struct *vma) +#ifdef CONFIG_VDSO32 static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname, unsigned long *size) { @@ -335,6 +339,20 @@ static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32, return 0; } +#else /* !CONFIG_VDSO32 */ +static unsigned long __init find_function32(struct lib32_elfinfo *lib, + const char *symname) +{ + return 0; +} + +static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64, + const char *orig, const char *fix) +{ + return 0; +} +#endif /* CONFIG_VDSO32 */ #ifdef CONFIG_PPC64 @@ -445,6 +463,7 @@ static __init int vdso_do_find_sections(struct lib32_elfinfo *v32, * Locate symbol tables & text section */ +#ifdef CONFIG_VDSO32 v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize); v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL); if (v32->dynsym == NULL || v32->dynstr == NULL) { @@ -457,6 +476,7 @@ static __init int vdso_do_find_sections(struct lib32_elfinfo *v32, return -1; } v32->text = sect - vdso32_kbase; +#endif #ifdef CONFIG_PPC64 v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize); @@ -493,7 +513,9 @@ static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32, static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, struct lib64_elfinfo *v64) { +#ifdef CONFIG_VDSO32 Elf32_Sym *sym32; +#endif #ifdef CONFIG_PPC64 Elf64_Sym *sym64; @@ -508,6 +530,7 @@ static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, (sym64->st_value - VDSO64_LBASE); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_VDSO32 sym32 = find_symbol32(v32, "__kernel_datapage_offset"); if (sym32 == NULL) { printk(KERN_ERR "vDSO32: Can't find symbol " @@ -517,6 +540,7 @@ static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) = (vdso32_pages << PAGE_SHIFT) - (sym32->st_value - VDSO32_LBASE); +#endif return 0; } @@ -550,6 +574,7 @@ static __init int vdso_fixup_features(struct lib32_elfinfo *v32, start, start + size); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_VDSO32 start = find_section32(v32->hdr, "__ftr_fixup", &size); if (start) do_feature_fixups(cur_cpu_spec->cpu_features, @@ -571,6 +596,7 @@ static __init int vdso_fixup_features(struct lib32_elfinfo *v32, if (start) do_lwsync_fixups(cur_cpu_spec->cpu_features, start, start + size); +#endif return 0; } @@ -732,11 +758,15 @@ static int __init vdso_init(void) #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_VDSO32 + vdso32_kbase = &vdso32_start; + /* * Calculate the size of the 32 bits vDSO */ vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT; DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages); +#endif /* @@ -757,6 +787,7 @@ static int __init vdso_init(void) return 0; } +#ifdef CONFIG_VDSO32 /* Make sure pages are in the correct state */ vdso32_pagelist = kzalloc(sizeof(struct page *) * (vdso32_pages + 2), GFP_KERNEL); @@ -769,6 +800,7 @@ static int __init vdso_init(void) } vdso32_pagelist[i++] = virt_to_page(vdso_data); vdso32_pagelist[i] = NULL; +#endif #ifdef CONFIG_PPC64 vdso64_pagelist = kzalloc(sizeof(struct page *) * (vdso64_pages + 2), diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 7264e91190be..724ecc791404 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -405,6 +405,16 @@ config PPC_DOORBELL endmenu +config VDSO32 + def_bool y + depends on PPC32 || CPU_BIG_ENDIAN + help + This symbol controls whether we build the 32-bit VDSO. We obviously + want to do that if we're building a 32-bit kernel. If we're building + a 64-bit kernel then we only want a 32-bit VDSO if we're building for + big endian. That is because the only little endian configuration we + support is ppc64le which is 64-bit only. + choice prompt "Endianness selection" default CPU_BIG_ENDIAN -- cgit v1.2.3 From 38c0488770983b2654f075540cc1fc71f55b6df5 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 30 Apr 2015 13:50:07 +1000 Subject: powerpc/powernv: Silence SYSPARAM warning on boot OpenPower BMC machines do not place any sysparams in the device tree, so at every boot we get a warning: [ 0.437176] SYSPARAM: Opal sysparam node not found Remove the warning, and reorder the init so we don't peform allocations when there is no sysparam node in the device tree. Signed-off-by: Joel Stanley Acked-by: Neelesh Gupta Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-sysparam.c | 31 +++++++++++++------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c index 9d1acf22a099..2e52b47393e7 100644 --- a/arch/powerpc/platforms/powernv/opal-sysparam.c +++ b/arch/powerpc/platforms/powernv/opal-sysparam.c @@ -162,10 +162,20 @@ void __init opal_sys_param_init(void) goto out; } + /* Some systems do not use sysparams; this is not an error */ + sysparam = of_find_node_by_path("/ibm,opal/sysparams"); + if (!sysparam) + goto out; + + if (!of_device_is_compatible(sysparam, "ibm,opal-sysparams")) { + pr_err("SYSPARAM: Opal sysparam node not compatible\n"); + goto out_node_put; + } + sysparam_kobj = kobject_create_and_add("sysparams", opal_kobj); if (!sysparam_kobj) { pr_err("SYSPARAM: Failed to create sysparam kobject\n"); - goto out; + goto out_node_put; } /* Allocate big enough buffer for any get/set transactions */ @@ -176,30 +186,19 @@ void __init opal_sys_param_init(void) goto out_kobj_put; } - sysparam = of_find_node_by_path("/ibm,opal/sysparams"); - if (!sysparam) { - pr_err("SYSPARAM: Opal sysparam node not found\n"); - goto out_param_buf; - } - - if (!of_device_is_compatible(sysparam, "ibm,opal-sysparams")) { - pr_err("SYSPARAM: Opal sysparam node not compatible\n"); - goto out_node_put; - } - /* Number of parameters exposed through DT */ count = of_property_count_strings(sysparam, "param-name"); if (count < 0) { pr_err("SYSPARAM: No string found of property param-name in " "the node %s\n", sysparam->name); - goto out_node_put; + goto out_param_buf; } id = kzalloc(sizeof(*id) * count, GFP_KERNEL); if (!id) { pr_err("SYSPARAM: Failed to allocate memory to read parameter " "id\n"); - goto out_node_put; + goto out_param_buf; } size = kzalloc(sizeof(*size) * count, GFP_KERNEL); @@ -293,12 +292,12 @@ out_free_size: kfree(size); out_free_id: kfree(id); -out_node_put: - of_node_put(sysparam); out_param_buf: kfree(param_data_buf); out_kobj_put: kobject_put(sysparam_kobj); +out_node_put: + of_node_put(sysparam); out: return; } -- cgit v1.2.3 From ed3e81ff2016b180d8f439a1981f2c8b59b0b53c Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 26 Mar 2015 16:42:07 +1100 Subject: powerpc/eeh: Move PE state constants around There are two equivalent sets of PE state constants, defined in arch/powerpc/include/asm/eeh.h and include/uapi/linux/vfio.h. Though the names are different, their corresponding values are exactly same. The former is used by EEH core and the latter is used by userspace. The patch moves those constants from arch/powerpc/include/asm/eeh.h to arch/powerpc/include/uapi/asm/eeh.h, which are expected to be used by userspace from now on. We can't delete those constants in vfio.h as it's uncertain that those constants have been or will be used by userspace. Suggested-by: David Gibson Signed-off-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 7 ++----- arch/powerpc/include/uapi/asm/eeh.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 arch/powerpc/include/uapi/asm/eeh.h (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index a52db28ecc1e..7fd7b5429e5a 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -27,6 +27,8 @@ #include #include +#include + struct pci_dev; struct pci_bus; struct pci_dn; @@ -185,11 +187,6 @@ enum { #define EEH_STATE_DMA_ACTIVE (1 << 4) /* Active DMA */ #define EEH_STATE_MMIO_ENABLED (1 << 5) /* MMIO enabled */ #define EEH_STATE_DMA_ENABLED (1 << 6) /* DMA enabled */ -#define EEH_PE_STATE_NORMAL 0 /* Normal state */ -#define EEH_PE_STATE_RESET 1 /* PE reset asserted */ -#define EEH_PE_STATE_STOPPED_IO_DMA 2 /* Frozen PE */ -#define EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA, Enabled IO */ -#define EEH_PE_STATE_UNAVAIL 5 /* Unavailable */ #define EEH_RESET_DEACTIVATE 0 /* Deactivate the PE reset */ #define EEH_RESET_HOT 1 /* Hot reset */ #define EEH_RESET_FUNDAMENTAL 3 /* Fundamental reset */ diff --git a/arch/powerpc/include/uapi/asm/eeh.h b/arch/powerpc/include/uapi/asm/eeh.h new file mode 100644 index 000000000000..8bb34b004995 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/eeh.h @@ -0,0 +1,30 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2015 + * + * Authors: Gavin Shan + */ + +#ifndef _ASM_POWERPC_EEH_H +#define _ASM_POWERPC_EEH_H + +/* PE states */ +#define EEH_PE_STATE_NORMAL 0 /* Normal state */ +#define EEH_PE_STATE_RESET 1 /* PE reset asserted */ +#define EEH_PE_STATE_STOPPED_IO_DMA 2 /* Frozen PE */ +#define EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA only */ +#define EEH_PE_STATE_UNAVAIL 5 /* Unavailable */ + +#endif /* _ASM_POWERPC_EEH_H */ -- cgit v1.2.3 From ec33d36e5ab5d52d59a8f696f7efb682bfc58494 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 26 Mar 2015 16:42:08 +1100 Subject: powerpc/eeh: Introduce eeh_pe_inject_err() The patch defines PCI error types and functions in uapi/asm/eeh.h and exports function eeh_pe_inject_err(), which will be called by VFIO driver to inject the specified PCI error to the indicated PE for testing purpose. Signed-off-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 2 ++ arch/powerpc/include/uapi/asm/eeh.h | 26 ++++++++++++++++++++++++++ arch/powerpc/kernel/eeh.c | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 7fd7b5429e5a..c5eb86f3d452 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -291,6 +291,8 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option); int eeh_pe_get_state(struct eeh_pe *pe); int eeh_pe_reset(struct eeh_pe *pe, int option); int eeh_pe_configure(struct eeh_pe *pe); +int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask); /** * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. diff --git a/arch/powerpc/include/uapi/asm/eeh.h b/arch/powerpc/include/uapi/asm/eeh.h index 8bb34b004995..291b7d1814a6 100644 --- a/arch/powerpc/include/uapi/asm/eeh.h +++ b/arch/powerpc/include/uapi/asm/eeh.h @@ -27,4 +27,30 @@ #define EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA only */ #define EEH_PE_STATE_UNAVAIL 5 /* Unavailable */ +/* EEH error types and functions */ +#define EEH_ERR_TYPE_32 0 /* 32-bits error */ +#define EEH_ERR_TYPE_64 1 /* 64-bits error */ +#define EEH_ERR_FUNC_MIN 0 +#define EEH_ERR_FUNC_LD_MEM_ADDR 0 /* Memory load */ +#define EEH_ERR_FUNC_LD_MEM_DATA 1 +#define EEH_ERR_FUNC_LD_IO_ADDR 2 /* IO load */ +#define EEH_ERR_FUNC_LD_IO_DATA 3 +#define EEH_ERR_FUNC_LD_CFG_ADDR 4 /* Config load */ +#define EEH_ERR_FUNC_LD_CFG_DATA 5 +#define EEH_ERR_FUNC_ST_MEM_ADDR 6 /* Memory store */ +#define EEH_ERR_FUNC_ST_MEM_DATA 7 +#define EEH_ERR_FUNC_ST_IO_ADDR 8 /* IO store */ +#define EEH_ERR_FUNC_ST_IO_DATA 9 +#define EEH_ERR_FUNC_ST_CFG_ADDR 10 /* Config store */ +#define EEH_ERR_FUNC_ST_CFG_DATA 11 +#define EEH_ERR_FUNC_DMA_RD_ADDR 12 /* DMA read */ +#define EEH_ERR_FUNC_DMA_RD_DATA 13 +#define EEH_ERR_FUNC_DMA_RD_MASTER 14 +#define EEH_ERR_FUNC_DMA_RD_TARGET 15 +#define EEH_ERR_FUNC_DMA_WR_ADDR 16 /* DMA write */ +#define EEH_ERR_FUNC_DMA_WR_DATA 17 +#define EEH_ERR_FUNC_DMA_WR_MASTER 18 +#define EEH_ERR_FUNC_DMA_WR_TARGET 19 +#define EEH_ERR_FUNC_MAX 19 + #endif /* _ASM_POWERPC_EEH_H */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 9ee61d15653d..04b5d94973dc 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1647,6 +1647,41 @@ int eeh_pe_configure(struct eeh_pe *pe) } EXPORT_SYMBOL_GPL(eeh_pe_configure); +/** + * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE + * @pe: the indicated PE + * @type: error type + * @function: error function + * @addr: address + * @mask: address mask + * + * The routine is called to inject the specified PCI error, which + * is determined by @type and @function, to the indicated PE for + * testing purpose. + */ +int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask) +{ + /* Invalid PE ? */ + if (!pe) + return -ENODEV; + + /* Unsupported operation ? */ + if (!eeh_ops || !eeh_ops->err_inject) + return -ENOENT; + + /* Check on PCI error type */ + if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64) + return -EINVAL; + + /* Check on PCI error function */ + if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX) + return -EINVAL; + + return eeh_ops->err_inject(pe, type, func, addr, mask); +} +EXPORT_SYMBOL_GPL(eeh_pe_inject_err); + static int proc_eeh_show(struct seq_file *m, void *v) { if (!eeh_enabled()) { -- cgit v1.2.3 From 3721352990ba5eafa7e92a1c9cf86af3a46de151 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 27 Apr 2015 09:25:09 +0800 Subject: powerpc/eeh: fix start/end/flags type in struct pci_io_addr_range{} struct pci_io_addr_range{} stores the information of pci resources. It would be better to keep these related fields have the same type as in struct resource{}. This patch fixes the start/end/flags type in struct pci_io_addr_range{} to have the same type as in struct resource{}. Signed-off-by: Wei Yang Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_cache.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index eeabeabea49c..a1e86e172e3c 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -48,11 +48,11 @@ */ struct pci_io_addr_range { struct rb_node rb_node; - unsigned long addr_lo; - unsigned long addr_hi; + resource_size_t addr_lo; + resource_size_t addr_hi; struct eeh_dev *edev; struct pci_dev *pcidev; - unsigned int flags; + unsigned long flags; }; static struct pci_io_addr_cache { @@ -125,8 +125,8 @@ static void eeh_addr_cache_print(struct pci_io_addr_cache *cache) /* Insert address range into the rb tree. */ static struct pci_io_addr_range * -eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo, - unsigned long ahi, unsigned int flags) +eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo, + resource_size_t ahi, unsigned long flags) { struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node; struct rb_node *parent = NULL; @@ -197,9 +197,9 @@ static void __eeh_addr_cache_insert_dev(struct pci_dev *dev) /* Walk resources on this device, poke them into the tree */ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - unsigned long start = pci_resource_start(dev,i); - unsigned long end = pci_resource_end(dev,i); - unsigned int flags = pci_resource_flags(dev,i); + resource_size_t start = pci_resource_start(dev,i); + resource_size_t end = pci_resource_end(dev,i); + unsigned long flags = pci_resource_flags(dev,i); /* We are interested only bus addresses, not dma or other stuff */ if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM))) -- cgit v1.2.3 From 2ac3990cc36b1e42feca733b25254fb6dae15431 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 27 Apr 2015 09:25:10 +0800 Subject: powerpc/eeh: fix comment for wait_state() To retrieve the PCI slot state, EEH driver would set a timeout for that. While current comment is not aligned to what the code does. This patch fixes those comments according to the code. Signed-off-by: Wei Yang Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 2 +- arch/powerpc/platforms/powernv/eeh-powernv.c | 2 +- arch/powerpc/platforms/pseries/eeh_pseries.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 24768ff3cb73..89eb4bc34d3a 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -660,7 +660,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) eeh_pe_dev_traverse(pe, eeh_report_error, &result); /* Get the current PCI slot state. This can take a long time, - * sometimes over 3 seconds for certain systems. + * sometimes over 300 seconds for certain systems. */ rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index ce738ab3d5a9..d0254710595d 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -979,7 +979,7 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) /** * pnv_eeh_wait_state - Wait for PE state * @pe: EEH PE - * @max_wait: maximal period in microsecond + * @max_wait: maximal period in millisecond * * Wait for the state of associated PE. It might take some time * to retrieve the PE's state. diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 2039397cc75d..1ba55d0bb449 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -519,7 +519,7 @@ static int pseries_eeh_reset(struct eeh_pe *pe, int option) /** * pseries_eeh_wait_state - Wait for PE state * @pe: EEH PE - * @max_wait: maximal period in microsecond + * @max_wait: maximal period in millisecond * * Wait for the state of associated PE. It might take some time * to retrieve the PE's state. -- cgit v1.2.3 From e17866d5593dc6ea7bff9ee56751fae6c3f56223 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 27 Apr 2015 09:25:11 +0800 Subject: powerpc/eeh: fix powernv_eeh_wait_state delay logic As the comment indicates, powernv_eeh_get_state() will inform EEH core to delay 1 second. This means the delay doesn't happen when powernv_eeh_get_state() returns. This patch moves the delay subtraction just before msleep(), which is the same logic in pseries_eeh_wait_state(). Signed-off-by: Wei Yang Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/eeh-powernv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index d0254710595d..622f08cf54b6 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1000,13 +1000,13 @@ static int pnv_eeh_wait_state(struct eeh_pe *pe, int max_wait) if (ret != EEH_STATE_UNAVAILABLE) return ret; - max_wait -= mwait; if (max_wait <= 0) { pr_warn("%s: Timeout getting PE#%x's state (%d)\n", __func__, pe->addr, max_wait); return EEH_STATE_NOT_SUPPORT; } + max_wait -= mwait; msleep(mwait); } -- cgit v1.2.3 From f77ceb717d0955f1df20601683ea55675101bab6 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 27 Apr 2015 09:25:12 +0800 Subject: powerpc/eeh: remove unused macro IS_BRIDGE Currently, the macro IS_BRIDGE is not used any where. This patch just removes it. Signed-off-by: Wei Yang Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 04b5d94973dc..23e0aa773643 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -144,8 +144,6 @@ struct eeh_stats { static struct eeh_stats eeh_stats; -#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) - static int __init eeh_setup(char *str) { if (!strcmp(str, "off")) -- cgit v1.2.3 From d4d4add9eacf0a3e642228a22df722f00921a184 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 22 Apr 2015 15:36:50 +1000 Subject: powerpc: Little endian should depend on PPC_BOOK3S_64 The only little endian configuration we support is ppc64le, all other configurations are big endian. So we should only offer a choice of endian if we're building for 64-bit Book3S, ie. PPC_BOOK3S_64. Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/Kconfig.cputype | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 724ecc791404..c140e94c7c72 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -431,6 +431,7 @@ config CPU_BIG_ENDIAN config CPU_LITTLE_ENDIAN bool "Build little endian kernel" + depends on PPC_BOOK3S_64 select PPC64_BOOT_WRAPPER help Build a little endian kernel. -- cgit v1.2.3 From 7978f76c4495c194183e03f65406f78cdda37882 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Tue, 5 May 2015 17:30:21 +0200 Subject: powerpc: Enable sys_kcmp() for CRIU The commit 8170a83f15ee ("powerpc: Wireup the kcmp syscall to sys_ni") has disabled the kcmp syscall for powerpc. This has been done due to the use of unsigned long parameters which may require a dedicated wrapper to handle 32bit process on top of 64bit kernel. However in the kcmp() case, the 2 unsigned long parameters are currently only used to carry file descriptors from user space to the kernel. Since such a parameter is passed through register, and file descriptor doesn't need to get extended, there is, today, no need for a wrapper. In the case there will be a need to pass address in or out of this system call, then a wrapper could be required, it will then be to care of it. As today this is not the case, it is safe to enable kcmp() on powerpc. Tested (by Laurent) on 64-bit, 32-bit, and 32-bit userspace on 64-bit kernel using tools/testing/selftests/kcmp [mpe]. Signed-off-by: Laurent Dufour Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/systbl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index f1863a138b4a..71f2b3f02cf8 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -358,7 +358,7 @@ SYSCALL_SPU(setns) COMPAT_SYS(process_vm_readv) COMPAT_SYS(process_vm_writev) SYSCALL(finit_module) -SYSCALL(ni_syscall) /* sys_kcmp */ +SYSCALL(kcmp) /* sys_kcmp */ SYSCALL_SPU(sched_setattr) SYSCALL_SPU(sched_getattr) SYSCALL_SPU(renameat2) -- cgit v1.2.3 From e602ffb2fa6b5533acd6847bd19a135e14ed4c54 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Mon, 20 Apr 2015 10:32:56 +0530 Subject: powerpc: Fix cpu_online_cores_map to return only online threads mask Currently, cpu_online_cores_map returns a mask, which for every core with at least one online thread, has the bit for thread 0 of the core set to 1, and the bits for all other threads of the core set to 0. But thread 0 of the core itself may not be online always. In such cases, if the returned mask is used for IPI, then it'll cause IPIs to be skipped on cores where the first thread is offline, because the IPI code refuses to send IPIs to offline threads. Fix this by setting the bit of the first online thread in the core. This is done by fixing this in the underlying function cpu_thread_mask_to_cores. The result has the property that for all cores with online threads, there is one bit set in the returned map. And further, all bits that are set in the returned map correspond to online threads. Signed-off-by: Shreyas B. Prabhu Reviewed-by: Preeti U Murthy [ Changelog from Michael Ellerman ] Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cputhreads.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index 5be6c4753667..ba42e46ea58e 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -31,9 +31,9 @@ extern cpumask_t threads_core_mask; /* cpu_thread_mask_to_cores - Return a cpumask of one per cores * hit by the argument * - * @threads: a cpumask of threads + * @threads: a cpumask of online threads * - * This function returns a cpumask which will have one "cpu" (or thread) + * This function returns a cpumask which will have one online cpu's * bit set for each core that has at least one thread set in the argument. * * This can typically be used for things like IPI for tlb invalidations @@ -42,13 +42,16 @@ extern cpumask_t threads_core_mask; static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads) { cpumask_t tmp, res; - int i; + int i, cpu; cpumask_clear(&res); for (i = 0; i < NR_CPUS; i += threads_per_core) { cpumask_shift_left(&tmp, &threads_core_mask, i); - if (cpumask_intersects(threads, &tmp)) - cpumask_set_cpu(i, &res); + if (cpumask_intersects(threads, &tmp)) { + cpu = cpumask_next_and(-1, &tmp, cpu_online_mask); + if (cpu < nr_cpu_ids) + cpumask_set_cpu(cpu, &res); + } } return res; } -- cgit v1.2.3 From d405a98c70ebbaa6ef276d7307d034a682321b95 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Mon, 20 Apr 2015 10:32:57 +0530 Subject: powerpc/powernv: Move cpuidle related code from setup.c to new file This is a cleanup patch; doesn't change any functionality. Moves all cpuidle related code from setup.c to a new file. Signed-off-by: Shreyas B. Prabhu Reviewed-by: Preeti U Murthy [mpe: Fix the SMP=n build by including asm/smp.h in idle.c] Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/Makefile | 2 +- arch/powerpc/platforms/powernv/idle.c | 192 ++++++++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/setup.c | 171 ---------------------------- 3 files changed, 193 insertions(+), 172 deletions(-) create mode 100644 arch/powerpc/platforms/powernv/idle.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 33e44f37212f..bee923585afc 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,4 +1,4 @@ -obj-y += setup.o opal-wrappers.o opal.o opal-async.o +obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o obj-y += opal-msglog.o opal-hmi.o opal-power.o diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c new file mode 100644 index 000000000000..5c799324b695 --- /dev/null +++ b/arch/powerpc/platforms/powernv/idle.c @@ -0,0 +1,192 @@ +/* + * PowerNV cpuidle code + * + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "powernv.h" +#include "subcore.h" + +static u32 supported_cpuidle_states; + +int pnv_save_sprs_for_winkle(void) +{ + int cpu; + int rc; + + /* + * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross + * all cpus at boot. Get these reg values of current cpu and use the + * same accross all cpus. + */ + uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1; + uint64_t hid0_val = mfspr(SPRN_HID0); + uint64_t hid1_val = mfspr(SPRN_HID1); + uint64_t hid4_val = mfspr(SPRN_HID4); + uint64_t hid5_val = mfspr(SPRN_HID5); + uint64_t hmeer_val = mfspr(SPRN_HMEER); + + for_each_possible_cpu(cpu) { + uint64_t pir = get_hard_smp_processor_id(cpu); + uint64_t hsprg0_val = (uint64_t)&paca[cpu]; + + /* + * HSPRG0 is used to store the cpu's pointer to paca. Hence last + * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0 + * with 63rd bit set, so that when a thread wakes up at 0x100 we + * can use this bit to distinguish between fastsleep and + * deep winkle. + */ + hsprg0_val |= 1; + + rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); + if (rc != 0) + return rc; + + /* HIDs are per core registers */ + if (cpu_thread_in_core(cpu) == 0) { + + rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); + if (rc != 0) + return rc; + } + } + + return 0; +} + +static void pnv_alloc_idle_core_states(void) +{ + int i, j; + int nr_cores = cpu_nr_cores(); + u32 *core_idle_state; + + /* + * core_idle_state - First 8 bits track the idle state of each thread + * of the core. The 8th bit is the lock bit. Initially all thread bits + * are set. They are cleared when the thread enters deep idle state + * like sleep and winkle. Initially the lock bit is cleared. + * The lock bit has 2 purposes + * a. While the first thread is restoring core state, it prevents + * other threads in the core from switching to process context. + * b. While the last thread in the core is saving the core state, it + * prevents a different thread from waking up. + */ + for (i = 0; i < nr_cores; i++) { + int first_cpu = i * threads_per_core; + int node = cpu_to_node(first_cpu); + + core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node); + *core_idle_state = PNV_CORE_IDLE_THREAD_BITS; + + for (j = 0; j < threads_per_core; j++) { + int cpu = first_cpu + j; + + paca[cpu].core_idle_state_ptr = core_idle_state; + paca[cpu].thread_idle_state = PNV_THREAD_RUNNING; + paca[cpu].thread_mask = 1 << j; + } + } + + update_subcore_sibling_mask(); + + if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) + pnv_save_sprs_for_winkle(); +} + +u32 pnv_get_supported_cpuidle_states(void) +{ + return supported_cpuidle_states; +} +EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); + +static int __init pnv_init_idle_states(void) +{ + struct device_node *power_mgt; + int dt_idle_states; + u32 *flags; + int i; + + supported_cpuidle_states = 0; + + if (cpuidle_disable != IDLE_NO_OVERRIDE) + goto out; + + if (!firmware_has_feature(FW_FEATURE_OPALv3)) + goto out; + + power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); + if (!power_mgt) { + pr_warn("opal: PowerMgmt Node not found\n"); + goto out; + } + dt_idle_states = of_property_count_u32_elems(power_mgt, + "ibm,cpu-idle-state-flags"); + if (dt_idle_states < 0) { + pr_warn("cpuidle-powernv: no idle states found in the DT\n"); + goto out; + } + + flags = kzalloc(sizeof(*flags) * dt_idle_states, GFP_KERNEL); + if (of_property_read_u32_array(power_mgt, + "ibm,cpu-idle-state-flags", flags, dt_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); + goto out_free; + } + + for (i = 0; i < dt_idle_states; i++) + supported_cpuidle_states |= flags[i]; + + if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { + patch_instruction( + (unsigned int *)pnv_fastsleep_workaround_at_entry, + PPC_INST_NOP); + patch_instruction( + (unsigned int *)pnv_fastsleep_workaround_at_exit, + PPC_INST_NOP); + } + pnv_alloc_idle_core_states(); +out_free: + kfree(flags); +out: + return 0; +} + +subsys_initcall(pnv_init_idle_states); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 16fdcb23f4c3..509cdd2f7ad8 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -35,12 +35,8 @@ #include #include #include -#include -#include -#include #include "powernv.h" -#include "subcore.h" static void __init pnv_setup_arch(void) { @@ -277,173 +273,6 @@ static void __init pnv_setup_machdep_opal(void) ppc_md.handle_hmi_exception = opal_handle_hmi_exception; } -static u32 supported_cpuidle_states; - -int pnv_save_sprs_for_winkle(void) -{ - int cpu; - int rc; - - /* - * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross - * all cpus at boot. Get these reg values of current cpu and use the - * same accross all cpus. - */ - uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1; - uint64_t hid0_val = mfspr(SPRN_HID0); - uint64_t hid1_val = mfspr(SPRN_HID1); - uint64_t hid4_val = mfspr(SPRN_HID4); - uint64_t hid5_val = mfspr(SPRN_HID5); - uint64_t hmeer_val = mfspr(SPRN_HMEER); - - for_each_possible_cpu(cpu) { - uint64_t pir = get_hard_smp_processor_id(cpu); - uint64_t hsprg0_val = (uint64_t)&paca[cpu]; - - /* - * HSPRG0 is used to store the cpu's pointer to paca. Hence last - * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0 - * with 63rd bit set, so that when a thread wakes up at 0x100 we - * can use this bit to distinguish between fastsleep and - * deep winkle. - */ - hsprg0_val |= 1; - - rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); - if (rc != 0) - return rc; - - rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); - if (rc != 0) - return rc; - - /* HIDs are per core registers */ - if (cpu_thread_in_core(cpu) == 0) { - - rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); - if (rc != 0) - return rc; - - rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); - if (rc != 0) - return rc; - - rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); - if (rc != 0) - return rc; - - rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); - if (rc != 0) - return rc; - - rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); - if (rc != 0) - return rc; - } - } - - return 0; -} - -static void pnv_alloc_idle_core_states(void) -{ - int i, j; - int nr_cores = cpu_nr_cores(); - u32 *core_idle_state; - - /* - * core_idle_state - First 8 bits track the idle state of each thread - * of the core. The 8th bit is the lock bit. Initially all thread bits - * are set. They are cleared when the thread enters deep idle state - * like sleep and winkle. Initially the lock bit is cleared. - * The lock bit has 2 purposes - * a. While the first thread is restoring core state, it prevents - * other threads in the core from switching to process context. - * b. While the last thread in the core is saving the core state, it - * prevents a different thread from waking up. - */ - for (i = 0; i < nr_cores; i++) { - int first_cpu = i * threads_per_core; - int node = cpu_to_node(first_cpu); - - core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node); - *core_idle_state = PNV_CORE_IDLE_THREAD_BITS; - - for (j = 0; j < threads_per_core; j++) { - int cpu = first_cpu + j; - - paca[cpu].core_idle_state_ptr = core_idle_state; - paca[cpu].thread_idle_state = PNV_THREAD_RUNNING; - paca[cpu].thread_mask = 1 << j; - } - } - - update_subcore_sibling_mask(); - - if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) - pnv_save_sprs_for_winkle(); -} - -u32 pnv_get_supported_cpuidle_states(void) -{ - return supported_cpuidle_states; -} -EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); - -static int __init pnv_init_idle_states(void) -{ - struct device_node *power_mgt; - int dt_idle_states; - u32 *flags; - int i; - - supported_cpuidle_states = 0; - - if (cpuidle_disable != IDLE_NO_OVERRIDE) - goto out; - - if (!firmware_has_feature(FW_FEATURE_OPALv3)) - goto out; - - power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); - if (!power_mgt) { - pr_warn("opal: PowerMgmt Node not found\n"); - goto out; - } - dt_idle_states = of_property_count_u32_elems(power_mgt, - "ibm,cpu-idle-state-flags"); - if (dt_idle_states < 0) { - pr_warn("cpuidle-powernv: no idle states found in the DT\n"); - goto out; - } - - flags = kzalloc(sizeof(*flags) * dt_idle_states, GFP_KERNEL); - if (of_property_read_u32_array(power_mgt, - "ibm,cpu-idle-state-flags", flags, dt_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); - goto out_free; - } - - for (i = 0; i < dt_idle_states; i++) - supported_cpuidle_states |= flags[i]; - - if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { - patch_instruction( - (unsigned int *)pnv_fastsleep_workaround_at_entry, - PPC_INST_NOP); - patch_instruction( - (unsigned int *)pnv_fastsleep_workaround_at_exit, - PPC_INST_NOP); - } - pnv_alloc_idle_core_states(); -out_free: - kfree(flags); -out: - return 0; -} - -subsys_initcall(pnv_init_idle_states); - static int __init pnv_probe(void) { unsigned long root = of_get_flat_dt_root(); -- cgit v1.2.3 From 5703d2f4a1da6d23b3be896947ce255226fc4295 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Mon, 20 Apr 2015 10:32:58 +0530 Subject: powerpc/powernv: Introduce sysfs control for fastsleep workaround behavior Fastsleep is one of the idle state which cpuidle subsystem currently uses on power8 machines. In this state L2 cache is brought down to a threshold voltage. Therefore when the core is in fastsleep, the communication between L2 and L3 needs to be fenced. But there is a bug in the current power8 chips surrounding this fencing. OPAL provides a workaround which precludes the possibility of hitting this bug. But running with this workaround applied causes checkstop if any correctable error in L2 cache directory is detected. Hence OPAL also provides a way to undo the workaround. In the existing implementation, workaround is applied by the last thread of the core entering fastsleep and undone by the first thread waking up. But this has a performance cost. These OPAL calls account for roughly 4000 cycles everytime the core has to enter or wakeup from fastsleep. This patch introduces a sysfs attribute (fastsleep_workaround_applyonce) to choose the behavior of this workaround. By default, fastsleep_workaround_applyonce = 0. In this case, workaround is applied/undone everytime the core enters/exits fastsleep. fastsleep_workaround_applyonce = 1. In this case the workaround is applied once on all the cores and never undone. This can be triggered by echo 1 > /sys/devices/system/cpu/fastsleep_workaround_applyonce For simplicity this attribute can be modified only once. Implying, once fastsleep_workaround_applyonce is changed to 1, it cannot be reverted to the default state. Signed-off-by: Shreyas B. Prabhu Reviewed-by: Preeti U Murthy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal-api.h | 7 ++ arch/powerpc/include/asm/opal.h | 1 + arch/powerpc/platforms/powernv/idle.c | 101 +++++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + 4 files changed, 110 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 0321a909e663..a49e5fa6f939 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -165,6 +165,13 @@ #define OPAL_PM_WINKLE_ENABLED 0x00040000 #define OPAL_PM_SLEEP_ENABLED_ER1 0x00080000 /* with workaround */ +/* + * OPAL_CONFIG_CPU_IDLE_STATE parameters + */ +#define OPAL_CONFIG_IDLE_FASTSLEEP 1 +#define OPAL_CONFIG_IDLE_UNDO 0 +#define OPAL_CONFIG_IDLE_APPLY 1 + #ifndef __ASSEMBLY__ /* Other enums */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 042af1abfc4d..9a4781357fa6 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -186,6 +186,7 @@ int64_t opal_handle_hmi(void); int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end); int64_t opal_unregister_dump_region(uint32_t id); int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val); +int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag); int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number); int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg, uint64_t msg_len); diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 5c799324b695..bd39a120bd60 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include @@ -137,6 +139,96 @@ u32 pnv_get_supported_cpuidle_states(void) } EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); + +static void pnv_fastsleep_workaround_apply(void *info) + +{ + int rc; + int *err = info; + + rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, + OPAL_CONFIG_IDLE_APPLY); + if (rc) + *err = 1; +} + +/* + * Used to store fastsleep workaround state + * 0 - Workaround applied/undone at fastsleep entry/exit path (Default) + * 1 - Workaround applied once, never undone. + */ +static u8 fastsleep_workaround_applyonce; + +static ssize_t show_fastsleep_workaround_applyonce(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", fastsleep_workaround_applyonce); +} + +static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + cpumask_t primary_thread_mask; + int err; + u8 val; + + if (kstrtou8(buf, 0, &val) || val != 1) + return -EINVAL; + + if (fastsleep_workaround_applyonce == 1) + return count; + + /* + * fastsleep_workaround_applyonce = 1 implies + * fastsleep workaround needs to be left in 'applied' state on all + * the cores. Do this by- + * 1. Patching out the call to 'undo' workaround in fastsleep exit path + * 2. Sending ipi to all the cores which have atleast one online thread + * 3. Patching out the call to 'apply' workaround in fastsleep entry + * path + * There is no need to send ipi to cores which have all threads + * offlined, as last thread of the core entering fastsleep or deeper + * state would have applied workaround. + */ + err = patch_instruction( + (unsigned int *)pnv_fastsleep_workaround_at_exit, + PPC_INST_NOP); + if (err) { + pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit"); + goto fail; + } + + get_online_cpus(); + primary_thread_mask = cpu_online_cores_map(); + on_each_cpu_mask(&primary_thread_mask, + pnv_fastsleep_workaround_apply, + &err, 1); + put_online_cpus(); + if (err) { + pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply"); + goto fail; + } + + err = patch_instruction( + (unsigned int *)pnv_fastsleep_workaround_at_entry, + PPC_INST_NOP); + if (err) { + pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry"); + goto fail; + } + + fastsleep_workaround_applyonce = 1; + + return count; +fail: + return -EIO; +} + +static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600, + show_fastsleep_workaround_applyonce, + store_fastsleep_workaround_applyonce); + static int __init pnv_init_idle_states(void) { struct device_node *power_mgt; @@ -181,7 +273,16 @@ static int __init pnv_init_idle_states(void) patch_instruction( (unsigned int *)pnv_fastsleep_workaround_at_exit, PPC_INST_NOP); + } else { + /* + * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that + * workaround is needed to use fastsleep. Provide sysfs + * control to choose how this workaround has to be applied. + */ + device_create_file(cpu_subsys.dev_root, + &dev_attr_fastsleep_workaround_applyonce); } + pnv_alloc_idle_core_states(); out_free: kfree(flags); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index a7ade94cdf87..bf15ead00e12 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -283,6 +283,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ); OPAL_CALL(opal_get_param, OPAL_GET_PARAM); OPAL_CALL(opal_set_param, OPAL_SET_PARAM); OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); +OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE); OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG); OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); -- cgit v1.2.3 From 96e023e7534c16ab54e236c114340e2447c36d2f Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 15 May 2015 14:06:36 +1000 Subject: powerpc/powernv: Reorder OPAL subsystem initialisation Most of the OPAL subsystems are always compiled in for PowerNV and many of them need to be initialised before or after other OPAL subsystems. Rather than trying to control this ordering through machine initcalls it is clearer and easier to control initialisation order with explicit calls in opal_init. Signed-off-by: Alistair Popple Cc: Mahesh Jagannath Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal.h | 3 +++ arch/powerpc/platforms/powernv/opal-async.c | 3 +-- arch/powerpc/platforms/powernv/opal-hmi.c | 3 +-- arch/powerpc/platforms/powernv/opal-memory-errors.c | 2 +- arch/powerpc/platforms/powernv/opal-sensor.c | 3 +-- arch/powerpc/platforms/powernv/opal.c | 13 ++++++++++++- 6 files changed, 19 insertions(+), 8 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 9a4781357fa6..dcdf83c86256 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -240,6 +240,9 @@ extern int opal_elog_init(void); extern void opal_platform_dump_init(void); extern void opal_sys_param_init(void); extern void opal_msglog_init(void); +extern int opal_async_comp_init(void); +extern int opal_sensor_init(void); +extern int opal_hmi_handler_init(void); extern int opal_machine_check(struct pt_regs *regs); extern bool opal_mce_check_early_recovery(struct pt_regs *regs); diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c index 693b6cdac691..bdc8c0c71d15 100644 --- a/arch/powerpc/platforms/powernv/opal-async.c +++ b/arch/powerpc/platforms/powernv/opal-async.c @@ -151,7 +151,7 @@ static struct notifier_block opal_async_comp_nb = { .priority = 0, }; -static int __init opal_async_comp_init(void) +int __init opal_async_comp_init(void) { struct device_node *opal_node; const __be32 *async; @@ -205,4 +205,3 @@ out_opal_node: out: return err; } -machine_subsys_initcall(powernv, opal_async_comp_init); diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c index b322bfb51343..a8f49d380449 100644 --- a/arch/powerpc/platforms/powernv/opal-hmi.c +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -170,7 +170,7 @@ static struct notifier_block opal_hmi_handler_nb = { .priority = 0, }; -static int __init opal_hmi_handler_init(void) +int __init opal_hmi_handler_init(void) { int ret; @@ -186,4 +186,3 @@ static int __init opal_hmi_handler_init(void) } return 0; } -machine_subsys_initcall(powernv, opal_hmi_handler_init); diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c index 43db2136dbff..00a29432be39 100644 --- a/arch/powerpc/platforms/powernv/opal-memory-errors.c +++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c @@ -144,4 +144,4 @@ static int __init opal_mem_err_init(void) } return 0; } -machine_subsys_initcall(powernv, opal_mem_err_init); +machine_device_initcall(powernv, opal_mem_err_init); diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c index 655250499d18..a06059df9239 100644 --- a/arch/powerpc/platforms/powernv/opal-sensor.c +++ b/arch/powerpc/platforms/powernv/opal-sensor.c @@ -77,7 +77,7 @@ out: } EXPORT_SYMBOL_GPL(opal_get_sensor_data); -static __init int opal_sensor_init(void) +int __init opal_sensor_init(void) { struct platform_device *pdev; struct device_node *sensor; @@ -93,4 +93,3 @@ static __init int opal_sensor_init(void) return PTR_ERR_OR_ZERO(pdev); } -machine_subsys_initcall(powernv, opal_sensor_init); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 2241565b0739..eb3decc67c43 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -393,7 +393,6 @@ static int __init opal_message_init(void) } return 0; } -machine_early_initcall(powernv, opal_message_init); int opal_get_chars(uint32_t vtermno, char *buf, int count) { @@ -807,6 +806,18 @@ static int __init opal_init(void) of_node_put(consoles); } + /* Initialise OPAL messaging system */ + opal_message_init(); + + /* Initialise OPAL asynchronous completion interface */ + opal_async_comp_init(); + + /* Initialise OPAL sensor interface */ + opal_sensor_init(); + + /* Initialise OPAL hypervisor maintainence interrupt handling */ + opal_hmi_handler_init(); + /* Create i2c platform devices */ opal_i2c_create_devs(); -- cgit v1.2.3 From 9f0fd0499d30dbd61632463f293e2e826fa363b1 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 15 May 2015 14:06:37 +1000 Subject: powerpc/powernv: Add a virtual irqchip for opal events Whenever an interrupt is received for opal the linux kernel gets a bitfield indicating certain events that have occurred and need handling by the various device drivers. Currently this is handled using a notifier interface where we call every device driver that has registered to receive opal events. This approach has several drawbacks. For example each driver has to do its own checking to see if the event is relevant as well as event masking. There is also no easy method of recording the number of times we receive particular events. This patch solves these issues by exposing opal events via the standard interrupt APIs by adding a new interrupt chip and domain. Drivers can then register for the appropriate events using standard kernel calls such as irq_of_parse_and_map(). Signed-off-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal.h | 3 + arch/powerpc/platforms/powernv/Makefile | 2 +- arch/powerpc/platforms/powernv/opal-irqchip.c | 253 ++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal.c | 78 ++------ arch/powerpc/platforms/powernv/powernv.h | 4 + 5 files changed, 273 insertions(+), 67 deletions(-) create mode 100644 arch/powerpc/platforms/powernv/opal-irqchip.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index dcdf83c86256..1412814347ba 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -243,6 +243,7 @@ extern void opal_msglog_init(void); extern int opal_async_comp_init(void); extern int opal_sensor_init(void); extern int opal_hmi_handler_init(void); +extern int opal_event_init(void); extern int opal_machine_check(struct pt_regs *regs); extern bool opal_mce_check_early_recovery(struct pt_regs *regs); @@ -254,6 +255,8 @@ extern int opal_resync_timebase(void); extern void opal_lpc_init(void); +extern int opal_event_request(unsigned int opal_event_nr); + struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, unsigned long vmalloc_size); void opal_free_sg_list(struct opal_sg_list *sg); diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index bee923585afc..26bd7e417b7c 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,7 +1,7 @@ obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o -obj-y += opal-msglog.o opal-hmi.o opal-power.o +obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c new file mode 100644 index 000000000000..bd5125dcf0d7 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -0,0 +1,253 @@ +/* + * This file implements an irqchip for OPAL events. Whenever there is + * an interrupt that is handled by OPAL we get passed a list of events + * that Linux needs to do something about. These basically look like + * interrupts to Linux so we implement an irqchip to handle them. + * + * Copyright Alistair Popple, IBM Corporation 2014. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "powernv.h" + +/* Maximum number of events supported by OPAL firmware */ +#define MAX_NUM_EVENTS 64 + +struct opal_event_irqchip { + struct irq_chip irqchip; + struct irq_domain *domain; + unsigned long mask; +}; +static struct opal_event_irqchip opal_event_irqchip; + +static unsigned int opal_irq_count; +static unsigned int *opal_irqs; + +static void opal_handle_irq_work(struct irq_work *work); +static __be64 last_outstanding_events; +static struct irq_work opal_event_irq_work = { + .func = opal_handle_irq_work, +}; + +static void opal_event_mask(struct irq_data *d) +{ + clear_bit(d->hwirq, &opal_event_irqchip.mask); +} + +static void opal_event_unmask(struct irq_data *d) +{ + set_bit(d->hwirq, &opal_event_irqchip.mask); + + opal_poll_events(&last_outstanding_events); + if (last_outstanding_events & opal_event_irqchip.mask) + /* Need to retrigger the interrupt */ + irq_work_queue(&opal_event_irq_work); +} + +static int opal_event_set_type(struct irq_data *d, unsigned int flow_type) +{ + /* + * For now we only support level triggered events. The irq + * handler will be called continuously until the event has + * been cleared in OPAL. + */ + if (flow_type != IRQ_TYPE_LEVEL_HIGH) + return -EINVAL; + + return 0; +} + +static struct opal_event_irqchip opal_event_irqchip = { + .irqchip = { + .name = "OPAL EVT", + .irq_mask = opal_event_mask, + .irq_unmask = opal_event_unmask, + .irq_set_type = opal_event_set_type, + }, + .mask = 0, +}; + +static int opal_event_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + irq_set_chip_data(irq, &opal_event_irqchip); + irq_set_chip_and_handler(irq, &opal_event_irqchip.irqchip, + handle_level_irq); + + return 0; +} + +void opal_handle_events(uint64_t events) +{ + int virq, hwirq = 0; + u64 mask = opal_event_irqchip.mask; + u64 notifier_mask = 0; + + if (!in_irq() && (events & mask)) { + last_outstanding_events = events; + irq_work_queue(&opal_event_irq_work); + return; + } + + while (events) { + hwirq = fls64(events) - 1; + virq = irq_find_mapping(opal_event_irqchip.domain, + hwirq); + if (virq) { + if (BIT_ULL(hwirq) & mask) + generic_handle_irq(virq); + } else + notifier_mask |= BIT_ULL(hwirq); + events &= ~BIT_ULL(hwirq); + } + + opal_do_notifier(notifier_mask); +} + +static irqreturn_t opal_interrupt(int irq, void *data) +{ + __be64 events; + + opal_handle_interrupt(virq_to_hw(irq), &events); + opal_handle_events(be64_to_cpu(events)); + + return IRQ_HANDLED; +} + +static void opal_handle_irq_work(struct irq_work *work) +{ + opal_handle_events(be64_to_cpu(last_outstanding_events)); +} + +static int opal_event_match(struct irq_domain *h, struct device_node *node) +{ + return h->of_node == node; +} + +static int opal_event_xlate(struct irq_domain *h, struct device_node *np, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) +{ + *out_hwirq = intspec[0]; + *out_flags = IRQ_TYPE_LEVEL_HIGH; + + return 0; +} + +static const struct irq_domain_ops opal_event_domain_ops = { + .match = opal_event_match, + .map = opal_event_map, + .xlate = opal_event_xlate, +}; + +void opal_event_shutdown(void) +{ + unsigned int i; + + /* First free interrupts, which will also mask them */ + for (i = 0; i < opal_irq_count; i++) { + if (opal_irqs[i]) + free_irq(opal_irqs[i], NULL); + opal_irqs[i] = 0; + } +} + +int __init opal_event_init(void) +{ + struct device_node *dn, *opal_node; + const __be32 *irqs; + int i, irqlen, rc = 0; + + opal_node = of_find_node_by_path("/ibm,opal"); + if (!opal_node) { + pr_warn("opal: Node not found\n"); + return -ENODEV; + } + + /* If dn is NULL it means the domain won't be linked to a DT + * node so therefore irq_of_parse_and_map(...) wont work. But + * that shouldn't be problem because if we're running a + * version of skiboot that doesn't have the dn then the + * devices won't have the correct properties and will have to + * fall back to the legacy method (opal_event_request(...)) + * anyway. */ + dn = of_find_compatible_node(NULL, NULL, "ibm,opal-event"); + opal_event_irqchip.domain = irq_domain_add_linear(dn, MAX_NUM_EVENTS, + &opal_event_domain_ops, &opal_event_irqchip); + of_node_put(dn); + if (!opal_event_irqchip.domain) { + pr_warn("opal: Unable to create irq domain\n"); + rc = -ENOMEM; + goto out; + } + + /* Get interrupt property */ + irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); + opal_irq_count = irqs ? (irqlen / 4) : 0; + pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count); + + /* Install interrupt handlers */ + opal_irqs = kcalloc(opal_irq_count, sizeof(*opal_irqs), GFP_KERNEL); + for (i = 0; irqs && i < opal_irq_count; i++, irqs++) { + unsigned int irq, virq; + + /* Get hardware and virtual IRQ */ + irq = be32_to_cpup(irqs); + virq = irq_create_mapping(NULL, irq); + if (virq == NO_IRQ) { + pr_warn("Failed to map irq 0x%x\n", irq); + continue; + } + + /* Install interrupt handler */ + rc = request_irq(virq, opal_interrupt, 0, "opal", NULL); + if (rc) { + irq_dispose_mapping(virq); + pr_warn("Error %d requesting irq %d (0x%x)\n", + rc, virq, irq); + continue; + } + + /* Cache IRQ */ + opal_irqs[i] = virq; + } + +out: + of_node_put(opal_node); + return rc; +} + +/** + * opal_event_request(unsigned int opal_event_nr) - Request an event + * @opal_event_nr: the opal event number to request + * + * This routine can be used to find the linux virq number which can + * then be passed to request_irq to assign a handler for a particular + * opal event. This should only be used by legacy devices which don't + * have proper device tree bindings. Most devices should use + * irq_of_parse_and_map() instead. + */ +int opal_event_request(unsigned int opal_event_nr) +{ + return irq_create_mapping(opal_event_irqchip.domain, opal_event_nr); +} +EXPORT_SYMBOL(opal_event_request); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index eb3decc67c43..3baca718261a 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -53,8 +53,6 @@ static int mc_recoverable_range_len; struct device_node *opal_node; static DEFINE_SPINLOCK(opal_write_lock); -static unsigned int *opal_irqs; -static unsigned int opal_irq_count; static ATOMIC_NOTIFIER_HEAD(opal_notifier_head); static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX]; static DEFINE_SPINLOCK(opal_notifier_lock); @@ -251,7 +249,7 @@ int opal_notifier_unregister(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(opal_notifier_unregister); -static void opal_do_notifier(uint64_t events) +void opal_do_notifier(uint64_t events) { unsigned long flags; uint64_t changed_mask; @@ -571,8 +569,10 @@ int opal_handle_hmi_exception(struct pt_regs *regs) local_paca->hmi_event_available = 0; rc = opal_poll_events(&evt); - if (rc == OPAL_SUCCESS && evt) + if (rc == OPAL_SUCCESS && evt) { opal_do_notifier(be64_to_cpu(evt)); + opal_handle_events(be64_to_cpu(evt)); + } return 1; } @@ -609,17 +609,6 @@ out: return !!recover_addr; } -static irqreturn_t opal_interrupt(int irq, void *data) -{ - __be64 events; - - opal_handle_interrupt(virq_to_hw(irq), &events); - - opal_do_notifier(be64_to_cpu(events)); - - return IRQ_HANDLED; -} - static int opal_sysfs_init(void) { opal_kobj = kobject_create_and_add("opal", firmware_kobj); @@ -718,52 +707,15 @@ static void opal_i2c_create_devs(void) of_platform_device_create(np, NULL, NULL); } -static void __init opal_irq_init(struct device_node *dn) -{ - const __be32 *irqs; - int i, irqlen; - - /* Get interrupt property */ - irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); - opal_irq_count = irqs ? (irqlen / 4) : 0; - pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count); - if (!opal_irq_count) - return; - - /* Install interrupt handlers */ - opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL); - for (i = 0; irqs && i < opal_irq_count; i++, irqs++) { - unsigned int irq, virq; - int rc; - - /* Get hardware and virtual IRQ */ - irq = be32_to_cpup(irqs); - virq = irq_create_mapping(NULL, irq); - if (virq == NO_IRQ) { - pr_warn("Failed to map irq 0x%x\n", irq); - continue; - } - - /* Install interrupt handler */ - rc = request_irq(virq, opal_interrupt, 0, "opal", NULL); - if (rc) { - irq_dispose_mapping(virq); - pr_warn("Error %d requesting irq %d (0x%x)\n", - rc, virq, irq); - continue; - } - - /* Cache IRQ */ - opal_irqs[i] = virq; - } -} - static int kopald(void *unused) { + __be64 events; + set_freezable(); do { try_to_freeze(); - opal_poll_events(NULL); + opal_poll_events(&events); + opal_handle_events(be64_to_cpu(events)); msleep_interruptible(opal_heartbeat); } while (!kthread_should_stop()); @@ -792,6 +744,9 @@ static int __init opal_init(void) return -ENODEV; } + /* Initialise OPAL events */ + opal_event_init(); + /* Register OPAL consoles if any ports */ if (firmware_has_feature(FW_FEATURE_OPALv2)) consoles = of_find_node_by_path("/ibm,opal/consoles"); @@ -824,9 +779,6 @@ static int __init opal_init(void) /* Setup a heatbeat thread if requested by OPAL */ opal_init_heartbeat(); - /* Find all OPAL interrupts and request them */ - opal_irq_init(opal_node); - /* Create "opal" kobject under /sys/firmware */ rc = opal_sysfs_init(); if (rc == 0) { @@ -857,15 +809,9 @@ machine_subsys_initcall(powernv, opal_init); void opal_shutdown(void) { - unsigned int i; long rc = OPAL_BUSY; - /* First free interrupts, which will also mask them */ - for (i = 0; i < opal_irq_count; i++) { - if (opal_irqs[i]) - free_irq(opal_irqs[i], NULL); - opal_irqs[i] = 0; - } + opal_event_shutdown(); /* * Then sync with OPAL which ensure anything that can diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 826d2c9bea56..221d4c827fc5 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -35,6 +35,10 @@ extern u32 pnv_get_supported_cpuidle_states(void); extern void pnv_lpc_init(void); +extern void opal_do_notifier(uint64_t events); +extern void opal_handle_events(uint64_t events); +extern void opal_event_shutdown(void); + bool cpu_core_split_required(void); #endif /* _POWERNV_H */ -- cgit v1.2.3 From 79231448c929cc0870f6e5cc10c485661dfb4a9f Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 15 May 2015 14:06:40 +1000 Subject: powernv/eeh: Update the EEH code to use the opal irq domain The eeh code currently uses the old notifier method to get eeh events from OPAL. It also contains some logic to filter opal events which has been moved into the virtual irqchip. This patch converts the eeh code to the new event interface which simplifies event handling. Signed-off-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/eeh-powernv.c | 58 +++++++++++++++------------- 1 file changed, 31 insertions(+), 27 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 622f08cf54b6..5cf5e6ea213b 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ #include "pci.h" static bool pnv_eeh_nb_init = false; +static int eeh_event_irq = -EINVAL; /** * pnv_eeh_init - EEH platform dependent initialization @@ -88,34 +90,22 @@ static int pnv_eeh_init(void) return 0; } -static int pnv_eeh_event(struct notifier_block *nb, - unsigned long events, void *change) +static irqreturn_t pnv_eeh_event(int irq, void *data) { - uint64_t changed_evts = (uint64_t)change; - /* - * We simply send special EEH event if EEH has - * been enabled, or clear pending events in - * case that we enable EEH soon + * We simply send a special EEH event if EEH has been + * enabled. We don't care about EEH events until we've + * finished processing the outstanding ones. Event processing + * gets unmasked in next_error() if EEH is enabled. */ - if (!(changed_evts & OPAL_EVENT_PCI_ERROR) || - !(events & OPAL_EVENT_PCI_ERROR)) - return 0; + disable_irq_nosync(irq); if (eeh_enabled()) eeh_send_failure_event(NULL); - else - opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); - return 0; + return IRQ_HANDLED; } -static struct notifier_block pnv_eeh_nb = { - .notifier_call = pnv_eeh_event, - .next = NULL, - .priority = 0 -}; - #ifdef CONFIG_DEBUG_FS static ssize_t pnv_eeh_ei_write(struct file *filp, const char __user *user_buf, @@ -237,16 +227,28 @@ static int pnv_eeh_post_init(void) /* Register OPAL event notifier */ if (!pnv_eeh_nb_init) { - ret = opal_notifier_register(&pnv_eeh_nb); - if (ret) { - pr_warn("%s: Can't register OPAL event notifier (%d)\n", - __func__, ret); + eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR)); + if (eeh_event_irq < 0) { + pr_err("%s: Can't register OPAL event interrupt (%d)\n", + __func__, eeh_event_irq); + return eeh_event_irq; + } + + ret = request_irq(eeh_event_irq, pnv_eeh_event, + IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL); + if (ret < 0) { + irq_dispose_mapping(eeh_event_irq); + pr_err("%s: Can't request OPAL event interrupt (%d)\n", + __func__, eeh_event_irq); return ret; } pnv_eeh_nb_init = true; } + if (!eeh_enabled()) + disable_irq(eeh_event_irq); + list_for_each_entry(hose, &hose_list, list_node) { phb = hose->private_data; @@ -1303,12 +1305,10 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) int state, ret = EEH_NEXT_ERR_NONE; /* - * While running here, it's safe to purge the event queue. - * And we should keep the cached OPAL notifier event sychronized - * between the kernel and firmware. + * While running here, it's safe to purge the event queue. The + * event should still be masked. */ eeh_remove_event(NULL, false); - opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); list_for_each_entry(hose, &hose_list, list_node) { /* @@ -1477,6 +1477,10 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) break; } + /* Unmask the event */ + if (eeh_enabled()) + enable_irq(eeh_event_irq); + return ret; } -- cgit v1.2.3 From a295af24d0d2af238d74a994603407b72de157b3 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 15 May 2015 14:06:41 +1000 Subject: powernv/opal: Convert opal message events to opal irq domain This patch converts the opal message event to use the new opal irq domain. Signed-off-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 3baca718261a..cd5718b74d7c 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -362,33 +362,34 @@ static void opal_handle_message(void) opal_message_do_notify(type, (void *)&msg); } -static int opal_message_notify(struct notifier_block *nb, - unsigned long events, void *change) +static irqreturn_t opal_message_notify(int irq, void *data) { - if (events & OPAL_EVENT_MSG_PENDING) - opal_handle_message(); - return 0; + opal_handle_message(); + return IRQ_HANDLED; } -static struct notifier_block opal_message_nb = { - .notifier_call = opal_message_notify, - .next = NULL, - .priority = 0, -}; - static int __init opal_message_init(void) { - int ret, i; + int ret, i, irq; for (i = 0; i < OPAL_MSG_TYPE_MAX; i++) ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]); - ret = opal_notifier_register(&opal_message_nb); + irq = opal_event_request(ilog2(OPAL_EVENT_MSG_PENDING)); + if (!irq) { + pr_err("%s: Can't register OPAL event irq (%d)\n", + __func__, irq); + return irq; + } + + ret = request_irq(irq, opal_message_notify, + IRQ_TYPE_LEVEL_HIGH, "opal-msg", NULL); if (ret) { - pr_err("%s: Can't register OPAL event notifier (%d)\n", + pr_err("%s: Can't request OPAL event irq (%d)\n", __func__, ret); return ret; } + return 0; } -- cgit v1.2.3 From 74159a70283ba685cd4d856cfc4438596524b1f2 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 15 May 2015 14:06:42 +1000 Subject: powernv/elog: Convert elog to opal irq domain This patch converts the elog code to use the opal irq domain instead of notifier events. Signed-off-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-elog.c | 32 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 38ce757e5e2a..4949ef0d9400 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -10,6 +10,7 @@ */ #include #include +#include #include #include #include @@ -276,24 +277,15 @@ static void elog_work_fn(struct work_struct *work) static DECLARE_WORK(elog_work, elog_work_fn); -static int elog_event(struct notifier_block *nb, - unsigned long events, void *change) +static irqreturn_t elog_event(int irq, void *data) { - /* check for error log event */ - if (events & OPAL_EVENT_ERROR_LOG_AVAIL) - schedule_work(&elog_work); - return 0; + schedule_work(&elog_work); + return IRQ_HANDLED; } -static struct notifier_block elog_nb = { - .notifier_call = elog_event, - .next = NULL, - .priority = 0 -}; - int __init opal_elog_init(void) { - int rc = 0; + int rc = 0, irq; /* ELOG not supported by firmware */ if (!opal_check_token(OPAL_ELOG_READ)) @@ -305,10 +297,18 @@ int __init opal_elog_init(void) return -1; } - rc = opal_notifier_register(&elog_nb); + irq = opal_event_request(ilog2(OPAL_EVENT_ERROR_LOG_AVAIL)); + if (!irq) { + pr_err("%s: Can't register OPAL event irq (%d)\n", + __func__, irq); + return irq; + } + + rc = request_irq(irq, elog_event, + IRQ_TYPE_LEVEL_HIGH, "opal-elog", NULL); if (rc) { - pr_err("%s: Can't register OPAL event notifier (%d)\n", - __func__, rc); + pr_err("%s: Can't request OPAL event irq (%d)\n", + __func__, rc); return rc; } -- cgit v1.2.3 From 8034f715f0a5fce97b14bacd47eefb1513316b17 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 15 May 2015 14:06:43 +1000 Subject: powernv/opal-dump: Convert to irq domain Convert the opal dump driver to the new opal irq domain. Signed-off-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-dump.c | 56 +++++++++--------------------- 1 file changed, 17 insertions(+), 39 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 5aa9c1ce4de3..2ee96431f736 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -60,7 +61,7 @@ static ssize_t dump_type_show(struct dump_obj *dump_obj, struct dump_attribute *attr, char *buf) { - + return sprintf(buf, "0x%x %s\n", dump_obj->type, dump_type_to_string(dump_obj->type)); } @@ -363,7 +364,7 @@ static struct dump_obj *create_dump_obj(uint32_t id, size_t size, return dump; } -static int process_dump(void) +static irqreturn_t process_dump(int irq, void *data) { int rc; uint32_t dump_id, dump_size, dump_type; @@ -387,45 +388,13 @@ static int process_dump(void) if (!dump) return -1; - return 0; -} - -static void dump_work_fn(struct work_struct *work) -{ - process_dump(); + return IRQ_HANDLED; } -static DECLARE_WORK(dump_work, dump_work_fn); - -static void schedule_process_dump(void) -{ - schedule_work(&dump_work); -} - -/* - * New dump available notification - * - * Once we get notification, we add sysfs entries for it. - * We only fetch the dump on demand, and create sysfs asynchronously. - */ -static int dump_event(struct notifier_block *nb, - unsigned long events, void *change) -{ - if (events & OPAL_EVENT_DUMP_AVAIL) - schedule_process_dump(); - - return 0; -} - -static struct notifier_block dump_nb = { - .notifier_call = dump_event, - .next = NULL, - .priority = 0 -}; - void __init opal_platform_dump_init(void) { int rc; + int dump_irq; /* ELOG not supported by firmware */ if (!opal_check_token(OPAL_DUMP_READ)) @@ -445,10 +414,19 @@ void __init opal_platform_dump_init(void) return; } - rc = opal_notifier_register(&dump_nb); + dump_irq = opal_event_request(ilog2(OPAL_EVENT_DUMP_AVAIL)); + if (!dump_irq) { + pr_err("%s: Can't register OPAL event irq (%d)\n", + __func__, dump_irq); + return; + } + + rc = request_threaded_irq(dump_irq, NULL, process_dump, + IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + "opal-dump", NULL); if (rc) { - pr_warn("%s: Can't register OPAL event notifier (%d)\n", - __func__, rc); + pr_err("%s: Can't request OPAL event irq (%d)\n", + __func__, rc); return; } -- cgit v1.2.3 From 81f2f7ce4c5bb688ad691cb3ee37e81ca26a8a3b Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 15 May 2015 14:06:44 +1000 Subject: opal: Remove events notifier All users of the old opal events notifier have been converted over to the irq domain so remove the event notifier functions. Signed-off-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-irqchip.c | 16 ++--- arch/powerpc/platforms/powernv/opal.c | 84 +-------------------------- arch/powerpc/platforms/powernv/powernv.h | 1 - arch/powerpc/platforms/powernv/setup.c | 2 +- 4 files changed, 8 insertions(+), 95 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index bd5125dcf0d7..841135f48981 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -100,7 +100,6 @@ void opal_handle_events(uint64_t events) { int virq, hwirq = 0; u64 mask = opal_event_irqchip.mask; - u64 notifier_mask = 0; if (!in_irq() && (events & mask)) { last_outstanding_events = events; @@ -108,19 +107,16 @@ void opal_handle_events(uint64_t events) return; } - while (events) { + while (events & mask) { hwirq = fls64(events) - 1; - virq = irq_find_mapping(opal_event_irqchip.domain, - hwirq); - if (virq) { - if (BIT_ULL(hwirq) & mask) + if (BIT_ULL(hwirq) & mask) { + virq = irq_find_mapping(opal_event_irqchip.domain, + hwirq); + if (virq) generic_handle_irq(virq); - } else - notifier_mask |= BIT_ULL(hwirq); + } events &= ~BIT_ULL(hwirq); } - - opal_do_notifier(notifier_mask); } static irqreturn_t opal_interrupt(int irq, void *data) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index cd5718b74d7c..8403307c5362 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -53,11 +53,7 @@ static int mc_recoverable_range_len; struct device_node *opal_node; static DEFINE_SPINLOCK(opal_write_lock); -static ATOMIC_NOTIFIER_HEAD(opal_notifier_head); static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX]; -static DEFINE_SPINLOCK(opal_notifier_lock); -static uint64_t last_notified_mask = 0x0ul; -static atomic_t opal_notifier_hold = ATOMIC_INIT(0); static uint32_t opal_heartbeat; static void opal_reinit_cores(void) @@ -223,82 +219,6 @@ static int __init opal_register_exception_handlers(void) } machine_early_initcall(powernv, opal_register_exception_handlers); -int opal_notifier_register(struct notifier_block *nb) -{ - if (!nb) { - pr_warning("%s: Invalid argument (%p)\n", - __func__, nb); - return -EINVAL; - } - - atomic_notifier_chain_register(&opal_notifier_head, nb); - return 0; -} -EXPORT_SYMBOL_GPL(opal_notifier_register); - -int opal_notifier_unregister(struct notifier_block *nb) -{ - if (!nb) { - pr_warning("%s: Invalid argument (%p)\n", - __func__, nb); - return -EINVAL; - } - - atomic_notifier_chain_unregister(&opal_notifier_head, nb); - return 0; -} -EXPORT_SYMBOL_GPL(opal_notifier_unregister); - -void opal_do_notifier(uint64_t events) -{ - unsigned long flags; - uint64_t changed_mask; - - if (atomic_read(&opal_notifier_hold)) - return; - - spin_lock_irqsave(&opal_notifier_lock, flags); - changed_mask = last_notified_mask ^ events; - last_notified_mask = events; - spin_unlock_irqrestore(&opal_notifier_lock, flags); - - /* - * We feed with the event bits and changed bits for - * enough information to the callback. - */ - atomic_notifier_call_chain(&opal_notifier_head, - events, (void *)changed_mask); -} - -void opal_notifier_update_evt(uint64_t evt_mask, - uint64_t evt_val) -{ - unsigned long flags; - - spin_lock_irqsave(&opal_notifier_lock, flags); - last_notified_mask &= ~evt_mask; - last_notified_mask |= evt_val; - spin_unlock_irqrestore(&opal_notifier_lock, flags); -} - -void opal_notifier_enable(void) -{ - int64_t rc; - __be64 evt = 0; - - atomic_set(&opal_notifier_hold, 0); - - /* Process pending events */ - rc = opal_poll_events(&evt); - if (rc == OPAL_SUCCESS && evt) - opal_do_notifier(be64_to_cpu(evt)); -} - -void opal_notifier_disable(void) -{ - atomic_set(&opal_notifier_hold, 1); -} - /* * Opal message notifier based on message type. Allow subscribers to get * notified for specific messgae type. @@ -570,10 +490,8 @@ int opal_handle_hmi_exception(struct pt_regs *regs) local_paca->hmi_event_available = 0; rc = opal_poll_events(&evt); - if (rc == OPAL_SUCCESS && evt) { - opal_do_notifier(be64_to_cpu(evt)); + if (rc == OPAL_SUCCESS && evt) opal_handle_events(be64_to_cpu(evt)); - } return 1; } diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 221d4c827fc5..f907f0a494da 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -35,7 +35,6 @@ extern u32 pnv_get_supported_cpuidle_states(void); extern void pnv_lpc_init(void); -extern void opal_do_notifier(uint64_t events); extern void opal_handle_events(uint64_t events); extern void opal_event_shutdown(void); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 509cdd2f7ad8..15e9337b392c 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -107,7 +107,7 @@ static void pnv_prepare_going_down(void) * Disable all notifiers from OPAL, we can't * service interrupts anymore anyway */ - opal_notifier_disable(); + opal_event_shutdown(); /* Soft disable interrupts */ local_irq_disable(); -- cgit v1.2.3 From e059b105d157d0231e2f0a7fba996724d856114b Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:27:54 +1000 Subject: powerpc: Add MSI operations to pci_controller_ops struct Add MSI setup and teardown functions to pci_controller_ops. Patch the callsites (arch_{setup,teardown}_msi_irqs) to prefer the controller ops version if it's available. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 6 ++++++ arch/powerpc/kernel/msi.c | 18 +++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 1811c44bf34b..a3b6252bcfc9 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -30,6 +30,12 @@ struct pci_controller_ops { /* Called during PCI resource reassignment */ resource_size_t (*window_alignment)(struct pci_bus *, unsigned long type); void (*reset_secondary_bus)(struct pci_dev *dev); + +#ifdef CONFIG_PCI_MSI + int (*setup_msi_irqs)(struct pci_dev *dev, + int nvec, int type); + void (*teardown_msi_irqs)(struct pci_dev *dev); +#endif }; /* diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c index 71bd161640cf..3d452f71fa25 100644 --- a/arch/powerpc/kernel/msi.c +++ b/arch/powerpc/kernel/msi.c @@ -15,7 +15,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - if (!ppc_md.setup_msi_irqs || !ppc_md.teardown_msi_irqs) { + struct pci_controller *phb = pci_bus_to_host(dev->bus); + + if ((!phb->controller_ops.setup_msi_irqs || + !phb->controller_ops.teardown_msi_irqs) && + (!ppc_md.setup_msi_irqs || !ppc_md.teardown_msi_irqs)) { pr_debug("msi: Platform doesn't provide MSI callbacks.\n"); return -ENOSYS; } @@ -24,10 +28,18 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) if (type == PCI_CAP_ID_MSI && nvec > 1) return 1; - return ppc_md.setup_msi_irqs(dev, nvec, type); + if (phb->controller_ops.setup_msi_irqs) + return phb->controller_ops.setup_msi_irqs(dev, nvec, type); + else + return ppc_md.setup_msi_irqs(dev, nvec, type); } void arch_teardown_msi_irqs(struct pci_dev *dev) { - ppc_md.teardown_msi_irqs(dev); + struct pci_controller *phb = pci_bus_to_host(dev->bus); + + if (phb->controller_ops.teardown_msi_irqs) + phb->controller_ops.teardown_msi_irqs(dev); + else + ppc_md.teardown_msi_irqs(dev); } -- cgit v1.2.3 From d6381119a415316c5602710cf7fbf388d15d6d3c Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:27:55 +1000 Subject: powerpc/powernv: Move MSI-related ops to pci_controller_ops Move the PowerNV/BML platform to use the pci_controller_ops structure rather than ppc_md for MSI related PCI controller operations. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index bca2aeb6e4b6..0f04940b7fb0 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -768,16 +768,14 @@ void __init pnv_pci_init(void) ppc_md.tce_free_rm = pnv_tce_free_rm; ppc_md.tce_get = pnv_tce_get; set_pci_dma_ops(&dma_iommu_ops); - - /* Configure MSIs */ -#ifdef CONFIG_PCI_MSI - ppc_md.setup_msi_irqs = pnv_setup_msi_irqs; - ppc_md.teardown_msi_irqs = pnv_teardown_msi_irqs; -#endif } machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init); struct pci_controller_ops pnv_pci_controller_ops = { .dma_dev_setup = pnv_pci_dma_dev_setup, +#ifdef CONFIG_PCI_MSI + .setup_msi_irqs = pnv_setup_msi_irqs, + .teardown_msi_irqs = pnv_teardown_msi_irqs, +#endif }; -- cgit v1.2.3 From 7e3d6c5a4be45769a9d439a4b62ad85cfe9e6754 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:27:56 +1000 Subject: powerpc/cell: Move MSI-related ops to pci_controller_ops Move the Cell platform to use the pci_controller_ops structure rather than ppc_md for MSI related PCI controller operations. We can be confident that the functions will be added to the platform's ops struct before any PCI controller's ops struct is populated because: 1) These ops are added to the struct in a subsys initcall. We populate the ops in axon_msi_probe, which is the probe call for the axon-msi driver. However the driver is registered in axon_msi_init, which is a subsys initcall, so this will happen at the subsys level. 2) The controller recieves the struct later, in a device initcall. Cell populates the controller in cell_setup_phb, which is hooked up to ppc_md.pci_setup_phb. ppc_md.pci_setup_phb is only ever called in of_platform.c, as part of the OpenFirmware PCI driver's probe routine. That driver is registered in a device initcall, so it will occur *after* the struct is properly populated. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/cell/axon_msi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index 623bd961465a..d9b8a443458f 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -22,6 +22,7 @@ #include #include +#include "cell.h" /* * MSIC registers, specified as offsets from dcr_base @@ -406,8 +407,8 @@ static int axon_msi_probe(struct platform_device *device) dev_set_drvdata(&device->dev, msic); - ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs; - ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs; + cell_pci_controller_ops.setup_msi_irqs = axon_msi_setup_msi_irqs; + cell_pci_controller_ops.teardown_msi_irqs = axon_msi_teardown_msi_irqs; axon_msi_debug_setup(dn, msic); -- cgit v1.2.3 From 1d14b8755f0a7d8110f0bdc5b74987f8cc96c18e Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:27:57 +1000 Subject: powerpc/pseries: Move MSI-related ops to pci_controller_ops Move the pseries platform to use the pci_controller_ops structure rather than ppc_md for MSI related PCI controller operations We need to iterate all PHBs because the MSI setup happens later than find_and_init_phbs() - mpe. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/msi.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index c8d24f9a6948..c22bb647cce6 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -18,6 +18,8 @@ #include #include +#include "pseries.h" + static int query_token, change_token; #define RTAS_QUERY_FN 0 @@ -505,6 +507,8 @@ static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev) static int rtas_msi_init(void) { + struct pci_controller *phb; + query_token = rtas_token("ibm,query-interrupt-source-number"); change_token = rtas_token("ibm,change-msi"); @@ -516,9 +520,15 @@ static int rtas_msi_init(void) pr_debug("rtas_msi: Registering RTAS MSI callbacks.\n"); - WARN_ON(ppc_md.setup_msi_irqs); - ppc_md.setup_msi_irqs = rtas_setup_msi_irqs; - ppc_md.teardown_msi_irqs = rtas_teardown_msi_irqs; + WARN_ON(pseries_pci_controller_ops.setup_msi_irqs); + pseries_pci_controller_ops.setup_msi_irqs = rtas_setup_msi_irqs; + pseries_pci_controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs; + + list_for_each_entry(phb, &hose_list, list_node) { + WARN_ON(phb->controller_ops.setup_msi_irqs); + phb->controller_ops.setup_msi_irqs = rtas_setup_msi_irqs; + phb->controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs; + } WARN_ON(ppc_md.pci_irq_fixup); ppc_md.pci_irq_fixup = rtas_msi_pci_irq_fixup; -- cgit v1.2.3 From 00e25397032f590d0a4d0ee89e236a4d1f8c0580 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:27:58 +1000 Subject: powerpc/fsl_msi: Move MSI-related ops to pci_controller_ops Move the fsl_msi subsystem to use the pci_controller_ops structure rather than ppc_md for MSI related PCI controller operations. Previously, MSI ops were added to ppc_md at the subsys level. However, in fsl_pci.c, PCI controllers are created at the at arch level. So, unlike in e.g. PowerNV/pSeries/Cell, we can't simply populate a platform-level controller ops structure and have it copied into the controllers when they are created. Instead, walk every phb, and attempt to populate it with the MSI ops. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/fsl_msi.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index f086c6f22dc9..5236e5427c38 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -405,6 +405,7 @@ static int fsl_of_msi_probe(struct platform_device *dev) const struct fsl_msi_feature *features; int len; u32 offset; + struct pci_controller *phb; match = of_match_device(fsl_of_msi_ids, &dev->dev); if (!match) @@ -541,14 +542,20 @@ static int fsl_of_msi_probe(struct platform_device *dev) list_add_tail(&msi->list, &msi_head); - /* The multiple setting ppc_md.setup_msi_irqs will not harm things */ - if (!ppc_md.setup_msi_irqs) { - ppc_md.setup_msi_irqs = fsl_setup_msi_irqs; - ppc_md.teardown_msi_irqs = fsl_teardown_msi_irqs; - } else if (ppc_md.setup_msi_irqs != fsl_setup_msi_irqs) { - dev_err(&dev->dev, "Different MSI driver already installed!\n"); - err = -ENODEV; - goto error_out; + /* + * Apply the MSI ops to all the controllers. + * It doesn't hurt to reassign the same ops, + * but bail out if we find another MSI driver. + */ + list_for_each_entry(phb, &hose_list, list_node) { + if (!phb->controller_ops.setup_msi_irqs) { + phb->controller_ops.setup_msi_irqs = fsl_setup_msi_irqs; + phb->controller_ops.teardown_msi_irqs = fsl_teardown_msi_irqs; + } else if (phb->controller_ops.setup_msi_irqs != fsl_setup_msi_irqs) { + dev_err(&dev->dev, "Different MSI driver already installed!\n"); + err = -ENODEV; + goto error_out; + } } return 0; error_out: -- cgit v1.2.3 From b7eba2ffccb51d7676a4ddc26658687331feb6a3 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:27:59 +1000 Subject: powerpc/ppc4xx_msi: Move MSI-related ops to pci_controller_ops Move the ppc4xx msi subsystem to use the pci_controller_ops structure rather than ppc_md for MSI related PCI controller operations. As with fsl_msi, operations are plugged in at the subsys level, after controller creation. Again, we iterate over all controllers and populate them with the MSI ops. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/ppc4xx_msi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c index 6e2e6aa378bb..6eb21f2ea585 100644 --- a/arch/powerpc/sysdev/ppc4xx_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -218,6 +218,7 @@ static int ppc4xx_msi_probe(struct platform_device *dev) struct ppc4xx_msi *msi; struct resource res; int err = 0; + struct pci_controller *phb; dev_dbg(&dev->dev, "PCIE-MSI: Setting up MSI support...\n"); @@ -250,8 +251,10 @@ static int ppc4xx_msi_probe(struct platform_device *dev) } ppc4xx_msi = *msi; - ppc_md.setup_msi_irqs = ppc4xx_setup_msi_irqs; - ppc_md.teardown_msi_irqs = ppc4xx_teardown_msi_irqs; + list_for_each_entry(phb, &hose_list, list_node) { + phb->controller_ops.setup_msi_irqs = ppc4xx_setup_msi_irqs; + phb->controller_ops.teardown_msi_irqs = ppc4xx_teardown_msi_irqs; + } return err; error_out: -- cgit v1.2.3 From f2c800aaceb6f26fe78590f971169b1d3d6fe322 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:28:00 +1000 Subject: powerpc/ppc4xx_hsta_msi: Move MSI-related ops to pci_controller_ops Move the ppc4xx hsta msi subsystem to use the pci_controller_ops structure rather than ppc_md for MSI related PCI controller operations. As with fsl_msi, operations are plugged in at the subsys level, after controller creation. Again, we iterate over all controllers and populate them with the MSI ops. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/ppc4xx_hsta_msi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c index f366d2d4c079..2bc33674ebfc 100644 --- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c @@ -128,6 +128,7 @@ static int hsta_msi_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct resource *mem; int irq, ret, irq_count; + struct pci_controller *phb; mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (IS_ERR(mem)) { @@ -171,8 +172,10 @@ static int hsta_msi_probe(struct platform_device *pdev) } } - ppc_md.setup_msi_irqs = hsta_setup_msi_irqs; - ppc_md.teardown_msi_irqs = hsta_teardown_msi_irqs; + list_for_each_entry(phb, &hose_list, list_node) { + phb->controller_ops.setup_msi_irqs = hsta_setup_msi_irqs; + phb->controller_ops.teardown_msi_irqs = hsta_teardown_msi_irqs; + } return 0; out2: -- cgit v1.2.3 From 83922966973e19a48b6e59f9fa1259aa790a33c1 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:28:01 +1000 Subject: powerpc/pasemi: Move MSI-related ops to pci_controller_ops Move the PaSemi MPIC msi subsystem to use the pci_controller_ops structure rather than ppc_md for MSI related PCI controller operations. As with fsl_msi, operations are plugged in at the subsys level, after controller creation. Again, we iterate over all controllers and populate them with the MSI ops. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pasemi/msi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c index 0b3706604543..27f2b187a91b 100644 --- a/arch/powerpc/platforms/pasemi/msi.c +++ b/arch/powerpc/platforms/pasemi/msi.c @@ -142,6 +142,7 @@ static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) int mpic_pasemi_msi_init(struct mpic *mpic) { int rc; + struct pci_controller *phb; if (!mpic->irqhost->of_node || !of_device_is_compatible(mpic->irqhost->of_node, @@ -157,9 +158,11 @@ int mpic_pasemi_msi_init(struct mpic *mpic) pr_debug("pasemi_msi: Registering PA Semi MPIC MSI callbacks\n"); msi_mpic = mpic; - WARN_ON(ppc_md.setup_msi_irqs); - ppc_md.setup_msi_irqs = pasemi_msi_setup_msi_irqs; - ppc_md.teardown_msi_irqs = pasemi_msi_teardown_msi_irqs; + list_for_each_entry(phb, &hose_list, list_node) { + WARN_ON(phb->controller_ops.setup_msi_irqs); + phb->controller_ops.setup_msi_irqs = pasemi_msi_setup_msi_irqs; + phb->controller_ops.teardown_msi_irqs = pasemi_msi_teardown_msi_irqs; + } return 0; } -- cgit v1.2.3 From 14f95acda2f3238f3470beea0ddbf78081b3268d Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:28:02 +1000 Subject: powerpc/mpic_u3msi: Move MSI-related ops to pci_controller_ops Move the u3 MPIC msi subsystem to use the pci_controller_ops structure rather than ppc_md for MSI related PCI controller operations. As with fsl_msi, operations are plugged in at the subsys level, after controller creation. Again, we iterate over all controllers and populate them with the MSI ops. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/mpic_u3msi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index b2cef1809389..fc46ef3b816e 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -181,6 +181,7 @@ static int u3msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) int mpic_u3msi_init(struct mpic *mpic) { int rc; + struct pci_controller *phb; rc = mpic_msi_init_allocator(mpic); if (rc) { @@ -193,9 +194,11 @@ int mpic_u3msi_init(struct mpic *mpic) BUG_ON(msi_mpic); msi_mpic = mpic; - WARN_ON(ppc_md.setup_msi_irqs); - ppc_md.setup_msi_irqs = u3msi_setup_msi_irqs; - ppc_md.teardown_msi_irqs = u3msi_teardown_msi_irqs; + list_for_each_entry(phb, &hose_list, list_node) { + WARN_ON(phb->controller_ops.setup_msi_irqs); + phb->controller_ops.setup_msi_irqs = u3msi_setup_msi_irqs; + phb->controller_ops.teardown_msi_irqs = u3msi_teardown_msi_irqs; + } return 0; } -- cgit v1.2.3 From 1f88d5860e0b8244b28e21b63a521915e5c15313 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:28:03 +1000 Subject: powerpc: Remove MSI-related PCI controller ops from ppc_md Remove unneeded ppc_md functions. Patch callsites to use pci_controller_ops functions exclusively. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/machdep.h | 6 ------ arch/powerpc/kernel/msi.c | 15 ++++----------- 2 files changed, 4 insertions(+), 17 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index ef8899432ae7..0d387d0570cd 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -131,12 +131,6 @@ struct machdep_calls { /* To setup PHBs when using automatic OF platform driver for PCI */ int (*pci_setup_phb)(struct pci_controller *host); -#ifdef CONFIG_PCI_MSI - int (*setup_msi_irqs)(struct pci_dev *dev, - int nvec, int type); - void (*teardown_msi_irqs)(struct pci_dev *dev); -#endif - void (*restart)(char *cmd); void (*halt)(void); void (*panic)(char *str); diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c index 3d452f71fa25..dab616a33b8d 100644 --- a/arch/powerpc/kernel/msi.c +++ b/arch/powerpc/kernel/msi.c @@ -17,9 +17,8 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { struct pci_controller *phb = pci_bus_to_host(dev->bus); - if ((!phb->controller_ops.setup_msi_irqs || - !phb->controller_ops.teardown_msi_irqs) && - (!ppc_md.setup_msi_irqs || !ppc_md.teardown_msi_irqs)) { + if (!phb->controller_ops.setup_msi_irqs || + !phb->controller_ops.teardown_msi_irqs) { pr_debug("msi: Platform doesn't provide MSI callbacks.\n"); return -ENOSYS; } @@ -28,18 +27,12 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) if (type == PCI_CAP_ID_MSI && nvec > 1) return 1; - if (phb->controller_ops.setup_msi_irqs) - return phb->controller_ops.setup_msi_irqs(dev, nvec, type); - else - return ppc_md.setup_msi_irqs(dev, nvec, type); + return phb->controller_ops.setup_msi_irqs(dev, nvec, type); } void arch_teardown_msi_irqs(struct pci_dev *dev) { struct pci_controller *phb = pci_bus_to_host(dev->bus); - if (phb->controller_ops.teardown_msi_irqs) - phb->controller_ops.teardown_msi_irqs(dev); - else - ppc_md.teardown_msi_irqs(dev); + phb->controller_ops.teardown_msi_irqs(dev); } -- cgit v1.2.3 From 92ae03532619dc24fdb7a5ae8ea63785fbd39f86 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 28 Apr 2015 15:12:05 +1000 Subject: powerpc/powernv: Specialise pci_controller_ops for each controller type Remove powernv generic PCI controller operations. Replace it with controller ops for each of the two supported PHBs. As an added bonus, make the two new structs const, which will help guard against bugs such as the one introduced in 65ebf4b63 ("powerpc/powernv: Move controller ops from ppc_md to controller_ops") Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 16 ++++++++++++---- arch/powerpc/platforms/powernv/pci-p5ioc2.c | 10 +++++++++- arch/powerpc/platforms/powernv/pci.c | 14 +++----------- arch/powerpc/platforms/powernv/pci.h | 4 ++++ arch/powerpc/platforms/powernv/powernv.h | 2 -- 5 files changed, 28 insertions(+), 18 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index f8bc950efcae..23125f46f865 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2648,6 +2648,17 @@ static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) OPAL_ASSERT_RESET); } +static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { + .dma_dev_setup = pnv_pci_dma_dev_setup, +#ifdef CONFIG_PCI_MSI + .setup_msi_irqs = pnv_setup_msi_irqs, + .teardown_msi_irqs = pnv_teardown_msi_irqs, +#endif + .enable_device_hook = pnv_pci_enable_device_hook, + .window_alignment = pnv_pci_window_alignment, + .reset_secondary_bus = pnv_pci_reset_secondary_bus, +}; + static void __init pnv_pci_init_ioda_phb(struct device_node *np, u64 hub_id, int ioda_type) { @@ -2808,10 +2819,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, * the child P2P bridges) can form individual PE. */ ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; - pnv_pci_controller_ops.enable_device_hook = pnv_pci_enable_device_hook; - pnv_pci_controller_ops.window_alignment = pnv_pci_window_alignment; - pnv_pci_controller_ops.reset_secondary_bus = pnv_pci_reset_secondary_bus; - hose->controller_ops = pnv_pci_controller_ops; + hose->controller_ops = pnv_pci_ioda_controller_ops; #ifdef CONFIG_PCI_IOV ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources; diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 4729ca793813..7f826e8cc1d4 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -95,6 +95,14 @@ static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, set_iommu_table_base_and_group(&pdev->dev, &phb->p5ioc2.iommu_table); } +static const struct pci_controller_ops pnv_pci_p5ioc2_controller_ops = { + .dma_dev_setup = pnv_pci_dma_dev_setup, +#ifdef CONFIG_PCI_MSI + .setup_msi_irqs = pnv_setup_msi_irqs, + .teardown_msi_irqs = pnv_teardown_msi_irqs, +#endif +}; + static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, void *tce_mem, u64 tce_size) { @@ -133,7 +141,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, phb->hose->first_busno = 0; phb->hose->last_busno = 0xff; phb->hose->private_data = phb; - phb->hose->controller_ops = pnv_pci_controller_ops; + phb->hose->controller_ops = pnv_pci_p5ioc2_controller_ops; phb->hub_id = hub_id; phb->opal_id = phb_id; phb->type = PNV_PHB_P5IOC2; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 0f04940b7fb0..dfad2ba13bdf 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -45,7 +45,7 @@ //#define cfg_dbg(fmt...) printk(fmt) #ifdef CONFIG_PCI_MSI -static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; @@ -94,7 +94,7 @@ static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return 0; } -static void pnv_teardown_msi_irqs(struct pci_dev *pdev) +void pnv_teardown_msi_irqs(struct pci_dev *pdev) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; @@ -662,7 +662,7 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl, tbl->it_type = TCE_PCI; } -static void pnv_pci_dma_dev_setup(struct pci_dev *pdev) +void pnv_pci_dma_dev_setup(struct pci_dev *pdev) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; @@ -771,11 +771,3 @@ void __init pnv_pci_init(void) } machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init); - -struct pci_controller_ops pnv_pci_controller_ops = { - .dma_dev_setup = pnv_pci_dma_dev_setup, -#ifdef CONFIG_PCI_MSI - .setup_msi_irqs = pnv_setup_msi_irqs, - .teardown_msi_irqs = pnv_teardown_msi_irqs, -#endif -}; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 070ee888fc95..15fde52cb5cd 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -218,4 +218,8 @@ extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); +extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev); +extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); +extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); + #endif /* __POWERNV_PCI_H */ diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index f907f0a494da..addd15cdbabc 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -29,8 +29,6 @@ static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) } #endif -extern struct pci_controller_ops pnv_pci_controller_ops; - extern u32 pnv_get_supported_cpuidle_states(void); extern void pnv_lpc_init(void); -- cgit v1.2.3 From 3405c2570fd68fc5ccc703c8de9c23abf5e95819 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 28 Apr 2015 15:12:06 +1000 Subject: powerpc/pci: add dma_set_mask to pci_controller_ops Some systems only need to deal with DMA masks for PCI devices. For these systems, we can avoid the need for a platform hook and instead use a pci controller based hook. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 2 ++ arch/powerpc/kernel/dma.c | 8 ++++++++ 2 files changed, 10 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index a3b6252bcfc9..6d17bb8498bf 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -36,6 +36,8 @@ struct pci_controller_ops { int nvec, int type); void (*teardown_msi_irqs)(struct pci_dev *dev); #endif + + int (*dma_set_mask)(struct pci_dev *dev, u64 dma_mask); }; /* diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 484b2d4462c1..35e4dcc5dce3 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -248,6 +248,14 @@ int dma_set_mask(struct device *dev, u64 dma_mask) { if (ppc_md.dma_set_mask) return ppc_md.dma_set_mask(dev, dma_mask); + + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_controller *phb = pci_bus_to_host(pdev->bus); + if (phb->controller_ops.dma_set_mask) + return phb->controller_ops.dma_set_mask(pdev, dma_mask); + } + return __dma_set_mask(dev, dma_mask); } EXPORT_SYMBOL(dma_set_mask); -- cgit v1.2.3 From 763d2d8df1ee2b92ff09cd58f6034021e2cabf6d Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 28 Apr 2015 15:12:07 +1000 Subject: powerpc/powernv: Move dma_set_mask() from pnv_phb to pci_controller_ops Previously, dma_set_mask() on powernv was convoluted: 0) Call dma_set_mask() (a/p/kernel/dma.c) 1) In dma_set_mask(), ppc_md.dma_set_mask() exists, so call it. 2) On powernv, that function pointer is pnv_dma_set_mask(). In pnv_dma_set_mask(), the device is pci, so call pnv_pci_dma_set_mask(). 3) In pnv_pci_dma_set_mask(), call pnv_phb->set_dma_mask() if it exists. 4) It only exists in the ioda case, where it points to pnv_pci_ioda_dma_set_mask(), which is the final function. So the call chain is: dma_set_mask() -> pnv_dma_set_mask() -> pnv_pci_dma_set_mask() -> pnv_pci_ioda_dma_set_mask() Both ppc_md and pnv_phb function pointers are used. Rip out the ppc_md call, pnv_dma_set_mask() and pnv_pci_dma_set_mask(). Instead: 0) Call dma_set_mask() (a/p/kernel/dma.c) 1) In dma_set_mask(), the device is pci, and pci_controller_ops.dma_set_mask() exists, so call pci_controller_ops.dma_set_mask() 2) In the ioda case, that points to pnv_pci_ioda_dma_set_mask(). The new call chain is dma_set_mask() -> pnv_pci_ioda_dma_set_mask() Now only the pci_controller_ops function pointer is used. The fallback paths for p5ioc2 are the same. Previously, pnv_pci_dma_set_mask() would find no pnv_phb->set_dma_mask() function, to it would call __set_dma_mask(). Now, dma_set_mask() finds no ppc_md call or pci_controller_ops call, so it calls __set_dma_mask(). Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 7 ++++--- arch/powerpc/platforms/powernv/pci.c | 10 ---------- arch/powerpc/platforms/powernv/pci.h | 2 -- arch/powerpc/platforms/powernv/powernv.h | 6 ------ arch/powerpc/platforms/powernv/setup.c | 8 -------- 5 files changed, 4 insertions(+), 29 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 23125f46f865..508f530e367b 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1601,9 +1601,10 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev set_iommu_table_base_and_group(&pdev->dev, pe->tce32_table); } -static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, - struct pci_dev *pdev, u64 dma_mask) +static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) { + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; struct pci_dn *pdn = pci_get_pdn(pdev); struct pnv_ioda_pe *pe; uint64_t top; @@ -2657,6 +2658,7 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .enable_device_hook = pnv_pci_enable_device_hook, .window_alignment = pnv_pci_window_alignment, .reset_secondary_bus = pnv_pci_reset_secondary_bus, + .dma_set_mask = pnv_pci_ioda_dma_set_mask, }; static void __init pnv_pci_init_ioda_phb(struct device_node *np, @@ -2802,7 +2804,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Setup TCEs */ phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; - phb->dma_set_mask = pnv_pci_ioda_dma_set_mask; phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask; /* Setup shutdown function for kexec */ diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index dfad2ba13bdf..8a557b5aabf7 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -689,16 +689,6 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev) phb->dma_dev_setup(phb, pdev); } -int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) -{ - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; - - if (phb && phb->dma_set_mask) - return phb->dma_set_mask(phb, pdev, dma_mask); - return __dma_set_mask(&pdev->dev, dma_mask); -} - u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 15fde52cb5cd..ac8686c853e6 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -106,8 +106,6 @@ struct pnv_phb { unsigned int hwirq, unsigned int virq, unsigned int is_64, struct msi_msg *msg); void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev); - int (*dma_set_mask)(struct pnv_phb *phb, struct pci_dev *pdev, - u64 dma_mask); u64 (*dma_get_required_mask)(struct pnv_phb *phb, struct pci_dev *pdev); void (*fixup_phb)(struct pci_controller *hose); diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index addd15cdbabc..9269e30e4ca0 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -12,17 +12,11 @@ struct pci_dev; #ifdef CONFIG_PCI extern void pnv_pci_init(void); extern void pnv_pci_shutdown(void); -extern int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask); extern u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev); #else static inline void pnv_pci_init(void) { } static inline void pnv_pci_shutdown(void) { } -static inline int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) -{ - return -ENODEV; -} - static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) { return 0; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 15e9337b392c..53737e019ae3 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -165,13 +165,6 @@ static void pnv_progress(char *s, unsigned short hex) { } -static int pnv_dma_set_mask(struct device *dev, u64 dma_mask) -{ - if (dev_is_pci(dev)) - return pnv_pci_dma_set_mask(to_pci_dev(dev), dma_mask); - return __dma_set_mask(dev, dma_mask); -} - static u64 pnv_dma_get_required_mask(struct device *dev) { if (dev_is_pci(dev)) @@ -321,7 +314,6 @@ define_machine(powernv) { .machine_shutdown = pnv_shutdown, .power_save = power7_idle, .calibrate_decr = generic_calibrate_decr, - .dma_set_mask = pnv_dma_set_mask, .dma_get_required_mask = pnv_dma_get_required_mask, #ifdef CONFIG_KEXEC .kexec_cpu_down = pnv_kexec_cpu_down, -- cgit v1.2.3 From 09f3f326cda6b6e2dfc6471c7f8ac77696c87419 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 1 Jun 2015 21:11:35 +1000 Subject: powerpc/mm: Fix build break with STRICT_MM_TYPECHECKS && DEBUG_PAGEALLOC If both STRICT_MM_TYPECHECKS and DEBUG_PAGEALLOC are enabled, the code in kernel_map_linear_page() is built, and so we fail with: arch/powerpc/mm/hash_utils_64.c:1478:2: error: incompatible type for argument 1 of 'htab_convert_pte_flags' Fix it by using pgprot_val(). Reported-by: Geert Uytterhoeven Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index fda236f908eb..a04ca922f0db 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1475,7 +1475,7 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) unsigned long hash; unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); - unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL); + unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL)); long ret; hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); -- cgit v1.2.3 From 72e349f1124a114435e599479c9b8d14bfd1ebcd Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 May 2015 15:10:24 +1000 Subject: powerpc/perf: Fix book3s kernel to userspace backtraces When we take a PMU exception or a software event we call perf_read_regs(). This overloads regs->result with a boolean that describes if we should use the sampled instruction address register (SIAR) or the regs. If the exception is in kernel, we start with the kernel regs and backtrace through the kernel stack. At this point we switch to the userspace regs and backtrace the user stack with perf_callchain_user(). Unfortunately these regs have not got the perf_read_regs() treatment, so regs->result could be anything. If it is non zero, perf_instruction_pointer() decides to use the SIAR, and we get issues like this: 0.11% qemu-system-ppc [kernel.kallsyms] [k] _raw_spin_lock_irqsave | ---_raw_spin_lock_irqsave | |--52.35%-- 0 | | | |--46.39%-- __hrtimer_start_range_ns | | kvmppc_run_core | | kvmppc_vcpu_run_hv | | kvmppc_vcpu_run | | kvm_arch_vcpu_ioctl_run | | kvm_vcpu_ioctl | | do_vfs_ioctl | | sys_ioctl | | system_call | | | | | |--67.08%-- _raw_spin_lock_irqsave <--- hi mum | | | | | | | --100.00%-- 0x7e714 | | | 0x7e714 Notice the bogus _raw_spin_irqsave when we transition from kernel (system_call) to userspace (0x7e714). We inserted what was in the SIAR. Add a check in regs_use_siar() to check that the regs in question are from a PMU exception. With this fix the backtrace makes sense: 0.47% qemu-system-ppc [kernel.vmlinux] [k] _raw_spin_lock_irqsave | ---_raw_spin_lock_irqsave | |--53.83%-- 0 | | | |--44.73%-- hrtimer_try_to_cancel | | kvmppc_start_thread | | kvmppc_run_core | | kvmppc_vcpu_run_hv | | kvmppc_vcpu_run | | kvm_arch_vcpu_ioctl_run | | kvm_vcpu_ioctl | | do_vfs_ioctl | | sys_ioctl | | system_call | | __ioctl | | 0x7e714 | | 0x7e714 Cc: stable@vger.kernel.org Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/perf/core-book3s.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 12b638425bb9..d90893b76e7c 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -131,7 +131,16 @@ static void pmao_restore_workaround(bool ebb) { } static bool regs_use_siar(struct pt_regs *regs) { - return !!regs->result; + /* + * When we take a performance monitor exception the regs are setup + * using perf_read_regs() which overloads some fields, in particular + * regs->result to tell us whether to use SIAR. + * + * However if the regs are from another exception, eg. a syscall, then + * they have not been setup using perf_read_regs() and so regs->result + * is something random. + */ + return ((TRAP(regs) == 0xf00) && regs->result); } /* -- cgit v1.2.3 From 05b05f28fb3835087b3d4e741fd561b9826fe461 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 May 2015 15:46:54 +1000 Subject: powerpc: Relocatable system call no longer uses the LR We had some code to restore the LR in the relocatable system call path back when we used the LR to do an indirect branch. Commit 6a404806dfce ("powerpc: Avoid link stack corruption in MMU on syscall entry path") changed this to use the CTR which is volatile across system calls so does not need restoring. Remove the stale comment and the restore of the LR. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 9519e6bdc6d7..cf8c1b55e6c8 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -59,14 +59,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ #if defined(CONFIG_RELOCATABLE) /* - * We can't branch directly; in the direct case we use LR - * and system_call_entry restores LR. (We thus need to move - * LR to r10 in the RFID case too.) + * We can't branch directly so we do it via the CTR which + * is volatile across system calls. */ #define SYSCALL_PSERIES_2_DIRECT \ mflr r10 ; \ ld r12,PACAKBASE(r13) ; \ - LOAD_HANDLER(r12, system_call_entry_direct) ; \ + LOAD_HANDLER(r12, system_call_entry) ; \ mtctr r12 ; \ mfspr r12,SPRN_SRR1 ; \ /* Re-use of r13... No spare regs to do this */ \ @@ -80,7 +79,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ mfspr r12,SPRN_SRR1 ; \ li r10,MSR_RI ; \ mtmsrd r10,1 ; /* Set RI (EE=0) */ \ - b system_call_entry_direct ; + b system_call_entry ; #endif /* @@ -969,13 +968,6 @@ hv_facility_unavailable_relon_trampoline: __end_interrupts: .align 7 -system_call_entry_direct: -#if defined(CONFIG_RELOCATABLE) - /* The first level prologue may have used LR to get here, saving - * orig in r10. To save hacking/ifdeffing common code, restore here. - */ - mtlr r10 -#endif system_call_entry: b system_call_common -- cgit v1.2.3 From d20be433e6a8892ecf59ef62636cd1333975347d Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 May 2015 15:46:55 +1000 Subject: powerpc: Non relocatable system call doesn't need a trampoline We need to use a trampoline when using LOAD_HANDLER(), because the destination needs to be in the first 64kB. An absolute branch has no such limitations, so just jump there. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index cf8c1b55e6c8..0a0399c2af11 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -79,7 +79,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ mfspr r12,SPRN_SRR1 ; \ li r10,MSR_RI ; \ mtmsrd r10,1 ; /* Set RI (EE=0) */ \ - b system_call_entry ; + b system_call_common ; #endif /* -- cgit v1.2.3 From c1231a784acc25ec01d35a019533da9ab8593e2e Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 20 May 2015 17:59:52 +0800 Subject: powerpc: Use irq_desc_get_xxx() to avoid redundant lookup of irq_desc Use irq_desc_get_xxx() to avoid redundant lookup of irq_desc while we already have a pointer to corresponding irq_desc. Signed-off-by: Jiang Liu Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/52xx/mpc52xx_gpt.c | 2 +- arch/powerpc/platforms/cell/axon_msi.c | 2 +- arch/powerpc/platforms/embedded6xx/hlwd-pic.c | 2 +- arch/powerpc/sysdev/uic.c | 2 +- arch/powerpc/sysdev/xics/xics-common.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index c949ca055712..63016621aff8 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -193,7 +193,7 @@ static struct irq_chip mpc52xx_gpt_irq_chip = { void mpc52xx_gpt_irq_cascade(unsigned int virq, struct irq_desc *desc) { - struct mpc52xx_gpt_priv *gpt = irq_get_handler_data(virq); + struct mpc52xx_gpt_priv *gpt = irq_desc_get_handler_data(desc); int sub_virq; u32 status; diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index d9b8a443458f..fe51de4fcf13 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -96,7 +96,7 @@ static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val) static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); - struct axon_msic *msic = irq_get_handler_data(irq); + struct axon_msic *msic = irq_desc_get_handler_data(desc); u32 write_offset, msi; int idx; int retry = 0; diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index c269caee58f9..9dd154d6f89a 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -124,7 +124,7 @@ static void hlwd_pic_irq_cascade(unsigned int cascade_virq, struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); - struct irq_domain *irq_domain = irq_get_handler_data(cascade_virq); + struct irq_domain *irq_domain = irq_desc_get_handler_data(desc); unsigned int virq; raw_spin_lock(&desc->lock); diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c index 1cd057f11725..d77345338671 100644 --- a/arch/powerpc/sysdev/uic.c +++ b/arch/powerpc/sysdev/uic.c @@ -198,7 +198,7 @@ void uic_irq_cascade(unsigned int virq, struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct irq_data *idata = irq_desc_get_irq_data(desc); - struct uic *uic = irq_get_handler_data(virq); + struct uic *uic = irq_desc_get_handler_data(desc); u32 msr; int src; int subvirq; diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 5bc5889d4acc..08c248eb491b 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -227,7 +227,7 @@ void xics_migrate_irqs_away(void) /* Locate interrupt server */ server = -1; - ics = irq_get_chip_data(virq); + ics = irq_desc_get_chip_data(desc); if (ics) server = ics->get_server(ics, irq); if (server < 0) { -- cgit v1.2.3 From a1c97df2787f6542257f1fd85e6a9b055e1125be Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Tue, 26 May 2015 11:36:56 +1000 Subject: powerpc/configs: Merge pseries_defconfig and pseries_le_defconfig These two configs should be identical with the exception of big or little endian. The big endian version has XMON_DEFAULT turned on while the little endian has XMON_DEFAULT not set. It makes the most sense for defconfigs not to use xmon by default, production systems should get back up as quickly as possible, not sit in xmon. In the event debugging is required, the option can be enabled or xmon=on can be specified on commandline. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/configs/pseries_defconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index c2e39f66b182..4da826004ef8 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -297,7 +297,6 @@ CONFIG_CODE_PATCHING_SELFTEST=y CONFIG_FTR_FIXUP_SELFTEST=y CONFIG_MSI_BITMAP_SELFTEST=y CONFIG_XMON=y -CONFIG_XMON_DEFAULT=y CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y -- cgit v1.2.3 From ea4d1a87e6dee541a525c822b0b5da160940fc67 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Tue, 26 May 2015 11:36:57 +1000 Subject: powerpc/configs: Replace pseries_le_defconfig with a Makefile target using merge_config Rather than continuing to maintain a copy of pseries_defconfig with CONFIG_CPU_LITTLE_ENDIAN enabled, use the generic merge_config script and use an le.config to enable little endian on top of pseries_defconfig without the need for a duplicated _defconfig file. This method will require less maintenance in the future and will ensure that both 'defconfigs' are always in sync. It is worth noting that the seemingly more simple approach of: pseries_le_defconfig: pseries_defconfig $(Q)$(MAKE) le.config Will not work when building using O=builddir. The obvious fix to that: pseries_le_defconfig: $(Q)$(MAKE) -f $(srctree)/Makefile pseries_defconfig le.config Also does not work. This is because if we have for example: config FOO depends on CPU_BIG_ENDIAN select BAR Then BAR will be enabled by the first call to kconfig (via pseries_defconfig), and then will remain enabled after we merge le.config, even though FOO will have been turned off. The solution is to ensure to only invoke the kconfig logic once, after we have merged all the config fragments. This ensures nothing is select'ed on that should then be disabled by the later merged configs. This is done through the explicit call to make olddefconfig Signed-off-by: Cyril Bur Reviewed-by: Samuel Mendoza-Jonas [mpe: Massage change log, fix white space and use ARCH not SRCARCH] Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 15 ++ arch/powerpc/configs/le.config | 1 + arch/powerpc/configs/pseries_le_defconfig | 319 ------------------------------ 3 files changed, 16 insertions(+), 319 deletions(-) create mode 100644 arch/powerpc/configs/le.config delete mode 100644 arch/powerpc/configs/pseries_le_defconfig (limited to 'arch/powerpc') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index a314cb024c8b..edf39cebfed8 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -269,6 +269,21 @@ bootwrapper_install: %.dtb: scripts $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@) +# Used to create 'merged defconfigs' +# To use it $(call) it with the first argument as the base defconfig +# and the second argument as a space separated list of .config files to merge, +# without the .config suffix. +define merge_into_defconfig + $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \ + -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/$(1) \ + $(foreach config,$(2),$(srctree)/arch/$(ARCH)/configs/$(config).config) + +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig +endef + +PHONY += pseries_le_defconfig +pseries_le_defconfig: + $(call merge_into_defconfig,pseries_defconfig,le) + define archhelp @echo '* zImage - Build default images selected by kernel config' @echo ' zImage.* - Compressed kernel image (arch/$(ARCH)/boot/zImage.*)' diff --git a/arch/powerpc/configs/le.config b/arch/powerpc/configs/le.config new file mode 100644 index 000000000000..ee43fdb3b8f4 --- /dev/null +++ b/arch/powerpc/configs/le.config @@ -0,0 +1 @@ +CONFIG_CPU_LITTLE_ENDIAN=y diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig deleted file mode 100644 index 09bc96e792cd..000000000000 --- a/arch/powerpc/configs/pseries_le_defconfig +++ /dev/null @@ -1,319 +0,0 @@ -CONFIG_PPC64=y -CONFIG_SMP=y -CONFIG_NR_CPUS=2048 -CONFIG_CPU_LITTLE_ENDIAN=y -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y -CONFIG_AUDIT=y -CONFIG_AUDITSYSCALL=y -CONFIG_IRQ_DOMAIN_DEBUG=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_TASKSTATS=y -CONFIG_TASK_DELAY_ACCT=y -CONFIG_TASK_XACCT=y -CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_NUMA_BALANCING=y -CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y -CONFIG_CGROUPS=y -CONFIG_CGROUP_FREEZER=y -CONFIG_CGROUP_DEVICE=y -CONFIG_CPUSETS=y -CONFIG_CGROUP_CPUACCT=y -CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y -CONFIG_CGROUP_PERF=y -CONFIG_CGROUP_SCHED=y -CONFIG_USER_NS=y -CONFIG_BLK_DEV_INITRD=y -# CONFIG_COMPAT_BRK is not set -CONFIG_PROFILING=y -CONFIG_OPROFILE=y -CONFIG_KPROBES=y -CONFIG_JUMP_LABEL=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_PPC_SPLPAR=y -CONFIG_SCANLOG=m -CONFIG_PPC_SMLPAR=y -CONFIG_DTL=y -# CONFIG_PPC_PMAC is not set -CONFIG_RTAS_FLASH=m -CONFIG_IBMEBUS=y -CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y -CONFIG_HZ_100=y -CONFIG_BINFMT_MISC=m -CONFIG_PPC_TRANSACTIONAL_MEM=y -CONFIG_KEXEC=y -CONFIG_IRQ_ALL_CPUS=y -CONFIG_MEMORY_HOTPLUG=y -CONFIG_MEMORY_HOTREMOVE=y -CONFIG_KSM=y -CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_PPC_64K_PAGES=y -CONFIG_PPC_SUBPAGE_PROT=y -CONFIG_SCHED_SMT=y -CONFIG_HOTPLUG_PCI=y -CONFIG_HOTPLUG_PCI_RPA=m -CONFIG_HOTPLUG_PCI_RPA_DLPAR=m -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_XFRM_USER=m -CONFIG_NET_KEY=m -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_NET_IPIP=y -CONFIG_SYN_COOKIES=y -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m -# CONFIG_IPV6 is not set -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_ADVANCED is not set -CONFIG_BRIDGE=m -CONFIG_VLAN_8021Q=m -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_DEVTMPFS=y -CONFIG_DEVTMPFS_MOUNT=y -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_BLK_DEV_FD=m -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=65536 -CONFIG_VIRTIO_BLK=m -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_AMD74XX=y -CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_ST=y -CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_FC_ATTRS=y -CONFIG_SCSI_CXGB3_ISCSI=m -CONFIG_SCSI_CXGB4_ISCSI=m -CONFIG_SCSI_BNX2_ISCSI=m -CONFIG_BE2ISCSI=m -CONFIG_SCSI_MPT2SAS=m -CONFIG_SCSI_IBMVSCSI=y -CONFIG_SCSI_IBMVFC=m -CONFIG_SCSI_SYM53C8XX_2=y -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 -CONFIG_SCSI_IPR=y -CONFIG_SCSI_QLA_FC=m -CONFIG_SCSI_QLA_ISCSI=m -CONFIG_SCSI_LPFC=m -CONFIG_SCSI_VIRTIO=m -CONFIG_SCSI_DH=m -CONFIG_SCSI_DH_RDAC=m -CONFIG_SCSI_DH_ALUA=m -CONFIG_ATA=y -CONFIG_SATA_AHCI=y -# CONFIG_ATA_SFF is not set -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=y -CONFIG_MD_RAID0=y -CONFIG_MD_RAID1=y -CONFIG_MD_RAID10=m -CONFIG_MD_RAID456=m -CONFIG_MD_MULTIPATH=m -CONFIG_MD_FAULTY=m -CONFIG_BLK_DEV_DM=y -CONFIG_DM_CRYPT=m -CONFIG_DM_SNAPSHOT=m -CONFIG_DM_THIN_PROVISIONING=m -CONFIG_DM_MIRROR=m -CONFIG_DM_ZERO=m -CONFIG_DM_MULTIPATH=m -CONFIG_DM_MULTIPATH_QL=m -CONFIG_DM_MULTIPATH_ST=m -CONFIG_DM_UEVENT=y -CONFIG_BONDING=m -CONFIG_DUMMY=m -CONFIG_MACVLAN=m -CONFIG_MACVTAP=m -CONFIG_VXLAN=m -CONFIG_NETCONSOLE=y -CONFIG_TUN=m -CONFIG_VETH=m -CONFIG_VIRTIO_NET=m -CONFIG_VHOST_NET=m -CONFIG_VORTEX=y -CONFIG_ACENIC=m -CONFIG_ACENIC_OMIT_TIGON_I=y -CONFIG_PCNET32=y -CONFIG_TIGON3=y -CONFIG_CHELSIO_T1=m -CONFIG_BE2NET=m -CONFIG_S2IO=m -CONFIG_IBMVETH=y -CONFIG_EHEA=y -CONFIG_E100=y -CONFIG_E1000=y -CONFIG_E1000E=y -CONFIG_IXGB=m -CONFIG_IXGBE=m -CONFIG_MLX4_EN=m -CONFIG_MYRI10GE=m -CONFIG_QLGE=m -CONFIG_NETXEN_NIC=m -CONFIG_PPP=m -CONFIG_PPP_BSDCOMP=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPPOE=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set -CONFIG_INPUT_EVDEV=m -CONFIG_INPUT_MISC=y -CONFIG_INPUT_PCSPKR=m -# CONFIG_SERIO_SERPORT is not set -CONFIG_DEVPTS_MULTIPLE_INSTANCES=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_ICOM=m -CONFIG_SERIAL_JSM=m -CONFIG_HVC_CONSOLE=y -CONFIG_HVC_RTAS=y -CONFIG_HVCS=m -CONFIG_VIRTIO_CONSOLE=m -CONFIG_IBM_BSR=m -CONFIG_GEN_RTC=y -CONFIG_RAW_DRIVER=y -CONFIG_MAX_RAW_DEVS=1024 -CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y -CONFIG_FB_OF=y -CONFIG_FB_MATROX=y -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -CONFIG_FB_MATROX_G=y -CONFIG_FB_RADEON=y -CONFIG_FB_IBM_GXT4500=y -CONFIG_LCD_PLATFORM=m -# CONFIG_VGA_CONSOLE is not set -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_LOGO=y -CONFIG_HID_GYRATION=y -CONFIG_HID_PANTHERLORD=y -CONFIG_HID_PETALYNX=y -CONFIG_HID_SAMSUNG=y -CONFIG_HID_SUNPLUS=y -CONFIG_USB_HIDDEV=y -CONFIG_USB=y -CONFIG_USB_MON=m -CONFIG_USB_EHCI_HCD=y -# CONFIG_USB_EHCI_HCD_PPC_OF is not set -CONFIG_USB_OHCI_HCD=y -CONFIG_USB_STORAGE=m -CONFIG_INFINIBAND=m -CONFIG_INFINIBAND_USER_MAD=m -CONFIG_INFINIBAND_USER_ACCESS=m -CONFIG_INFINIBAND_MTHCA=m -CONFIG_INFINIBAND_EHCA=m -CONFIG_INFINIBAND_CXGB3=m -CONFIG_INFINIBAND_CXGB4=m -CONFIG_MLX4_INFINIBAND=m -CONFIG_INFINIBAND_IPOIB=m -CONFIG_INFINIBAND_IPOIB_CM=y -CONFIG_INFINIBAND_SRP=m -CONFIG_INFINIBAND_ISER=m -CONFIG_VIRTIO_PCI=m -CONFIG_VIRTIO_BALLOON=m -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT2_FS_XIP=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y -CONFIG_EXT4_FS=y -CONFIG_EXT4_FS_POSIX_ACL=y -CONFIG_EXT4_FS_SECURITY=y -CONFIG_REISERFS_FS=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -CONFIG_JFS_SECURITY=y -CONFIG_XFS_FS=m -CONFIG_XFS_POSIX_ACL=y -CONFIG_BTRFS_FS=m -CONFIG_BTRFS_FS_POSIX_ACL=y -CONFIG_NILFS2_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_FUSE_FS=m -CONFIG_OVERLAY_FS=m -CONFIG_ISO9660_FS=y -CONFIG_UDF_FS=m -CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_HUGETLBFS=y -CONFIG_CRAMFS=m -CONFIG_SQUASHFS=m -CONFIG_SQUASHFS_XATTR=y -CONFIG_SQUASHFS_LZO=y -CONFIG_SQUASHFS_XZ=y -CONFIG_PSTORE=y -CONFIG_NFS_FS=y -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=y -CONFIG_NFSD=m -CONFIG_NFSD_V3_ACL=y -CONFIG_NFSD_V4=y -CONFIG_CIFS=m -CONFIG_CIFS_XATTR=y -CONFIG_CIFS_POSIX=y -CONFIG_NLS_DEFAULT="utf8" -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_ASCII=y -CONFIG_NLS_ISO8859_1=y -CONFIG_NLS_UTF8=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_STACK_USAGE=y -CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_LOCKUP_DETECTOR=y -CONFIG_LATENCYTOP=y -CONFIG_SCHED_TRACER=y -CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_CODE_PATCHING_SELFTEST=y -CONFIG_FTR_FIXUP_SELFTEST=y -CONFIG_MSI_BITMAP_SELFTEST=y -CONFIG_XMON=y -CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_TGR192=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_TEA=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_LZO=m -# CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_VIRTUALIZATION=y -CONFIG_KVM_BOOK3S_64=m -CONFIG_KVM_BOOK3S_64_HV=m -- cgit v1.2.3 From fb326e9841b2000a8d1b115ccd7f5b3b61a4d123 Mon Sep 17 00:00:00 2001 From: Igal Liberman Date: Mon, 12 Jan 2015 08:03:57 +0200 Subject: powerpc/dts: Unify B4 mux nodes Signed-off-by: Igal Liberman Change-Id: Ic5f28f7b492b708f00a5ff74dda723ce5e1da0ba Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/fsl/b4420si-post.dtsi | 15 ++------------- arch/powerpc/boot/dts/fsl/b4860si-post.dtsi | 15 ++------------- arch/powerpc/boot/dts/fsl/b4si-post.dtsi | 12 ++++++++++++ 3 files changed, 16 insertions(+), 26 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi index 86161ae6c966..1ea8602e4345 100644 --- a/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi @@ -80,20 +80,9 @@ compatible = "fsl,b4420-device-config", "fsl,qoriq-device-config-2.0"; }; -/include/ "qoriq-clockgen2.dtsi" global-utilities@e1000 { - compatible = "fsl,b4420-clockgen", "fsl,qoriq-clockgen-2.0"; - - mux0: mux0@0 { - #clock-cells = <0>; - reg = <0x0 0x4>; - compatible = "fsl,qoriq-core-mux-2.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>, - <&pll1 0>, <&pll1 1>, <&pll1 2>; - clock-names = "pll0", "pll0-div2", "pll0-div4", - "pll1", "pll1-div2", "pll1-div4"; - clock-output-names = "cmux0"; - }; + compatible = "fsl,b4420-clockgen", "fsl,b4-clockgen", + "fsl,qoriq-clockgen-2.0"; }; rcpm: global-utilities@e2000 { diff --git a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi index f35e9e0a5445..68b9a0536485 100644 --- a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi @@ -182,20 +182,9 @@ compatible = "fsl,b4860-device-config", "fsl,qoriq-device-config-2.0"; }; -/include/ "qoriq-clockgen2.dtsi" global-utilities@e1000 { - compatible = "fsl,b4860-clockgen", "fsl,qoriq-clockgen-2.0"; - - mux0: mux0@0 { - #clock-cells = <0>; - reg = <0x0 0x4>; - compatible = "fsl,qoriq-core-mux-2.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>, - <&pll1 0>, <&pll1 1>, <&pll1 2>; - clock-names = "pll0", "pll0-div2", "pll0-div4", - "pll1", "pll1-div2", "pll1-div4"; - clock-output-names = "cmux0"; - }; + compatible = "fsl,b4860-clockgen", "fsl,b4-clockgen", + "fsl,qoriq-clockgen-2.0"; }; rcpm: global-utilities@e2000 { diff --git a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi index 73136c0029d2..c6bab55bb065 100644 --- a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi @@ -296,9 +296,21 @@ fsl,liodn-bits = <12>; }; +/include/ "qoriq-clockgen2.dtsi" clockgen: global-utilities@e1000 { compatible = "fsl,b4-clockgen", "fsl,qoriq-clockgen-2.0"; reg = <0xe1000 0x1000>; + + mux0: mux0@0 { + #clock-cells = <0>; + reg = <0x0 0x4>; + compatible = "fsl,qoriq-core-mux-2.0"; + clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>, + <&pll1 0>, <&pll1 1>, <&pll1 2>; + clock-names = "pll0", "pll0-div2", "pll0-div4", + "pll1", "pll1-div2", "pll1-div4"; + clock-output-names = "cmux0"; + }; }; rcpm: global-utilities@e2000 { -- cgit v1.2.3 From c89ca8ab7457d891abdffca7e6f5e74cef670d6d Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 2 Apr 2015 22:14:11 -0500 Subject: powerpc/e6500: Optimize hugepage TLB misses Some workloads take a lot of TLB misses despite using traditional hugepages. Handle these TLB misses in the asm fastpath rather than going through a bunch of C code. With this patch I measured around a 5x speedup in handling hugepage TLB misses. Signed-off-by: Scott Wood --- arch/powerpc/mm/tlb_low_64e.S | 51 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 89bf95bd63b1..765b419883f2 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -398,18 +398,18 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT) rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 clrrdi r15,r15,3 cmpdi cr0,r14,0 - bge tlb_miss_fault_e6500 /* Bad pgd entry or hugepage; bail */ + bge tlb_miss_huge_e6500 /* Bad pgd entry or hugepage; bail */ ldx r14,r14,r15 /* grab pud entry */ rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3 clrrdi r15,r15,3 cmpdi cr0,r14,0 - bge tlb_miss_fault_e6500 + bge tlb_miss_huge_e6500 ldx r14,r14,r15 /* Grab pmd entry */ mfspr r10,SPRN_MAS0 cmpdi cr0,r14,0 - bge tlb_miss_fault_e6500 + bge tlb_miss_huge_e6500 /* Now we build the MAS for a 2M indirect page: * @@ -428,6 +428,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT) clrrdi r15,r16,21 /* make EA 2M-aligned */ mtspr SPRN_MAS2,r15 +tlb_miss_huge_done_e6500: lbz r15,TCD_ESEL_NEXT(r11) lbz r16,TCD_ESEL_MAX(r11) lbz r14,TCD_ESEL_FIRST(r11) @@ -456,6 +457,50 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT) tlb_epilog_bolted rfi +tlb_miss_huge_e6500: + beq tlb_miss_fault_e6500 + li r10,1 + andi. r15,r14,HUGEPD_SHIFT_MASK@l /* r15 = psize */ + rldimi r14,r10,63,0 /* Set PD_HUGE */ + xor r14,r14,r15 /* Clear size bits */ + ldx r14,0,r14 + + /* + * Now we build the MAS for a huge page. + * + * MAS 0 : ESEL needs to be filled by software round-robin + * - can be handled by indirect code + * MAS 1 : Need to clear IND and set TSIZE + * MAS 2,3+7: Needs to be redone similar to non-tablewalk handler + */ + + subi r15,r15,10 /* Convert psize to tsize */ + mfspr r10,SPRN_MAS1 + rlwinm r10,r10,0,~MAS1_IND + rlwimi r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK + mtspr SPRN_MAS1,r10 + + li r10,-0x400 + sld r15,r10,r15 /* Generate mask based on size */ + and r10,r16,r15 + rldicr r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT + rlwimi r10,r14,32-19,27,31 /* Insert WIMGE */ + clrldi r15,r15,PAGE_SHIFT /* Clear crap at the top */ + rlwimi r15,r14,32-8,22,25 /* Move in U bits */ + mtspr SPRN_MAS2,r10 + andi. r10,r14,_PAGE_DIRTY + rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */ + + /* Mask out SW and UW if !DIRTY (XXX optimize this !) */ + bne 1f + li r10,MAS3_SW|MAS3_UW + andc r15,r15,r10 +1: + mtspr SPRN_MAS7_MAS3,r15 + + mfspr r10,SPRN_MAS0 + b tlb_miss_huge_done_e6500 + tlb_miss_kernel_e6500: ld r14,PACA_KERNELPGD(r13) cmpldi cr1,r15,8 /* Check for vmalloc region */ -- cgit v1.2.3 From 86d63363defc0b2f22288b287f75837d48b561fc Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Mon, 6 Apr 2015 22:00:38 -0500 Subject: powerpc/e500mc: Remove dead L2 flushing code in idle_e500.S This code can never be executed as it is only built when CONFIG_PPC_E500MC is unset, but the only CPUs that have CPU_FTR_L2CSR require CONFIG_PPC_E500MC and do not have the MSR/HID0-based nap mechanism that this file uses. Signed-off-by: Scott Wood --- arch/powerpc/kernel/idle_e500.S | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S index 15448668988d..b9b6ef510be1 100644 --- a/arch/powerpc/kernel/idle_e500.S +++ b/arch/powerpc/kernel/idle_e500.S @@ -58,15 +58,6 @@ BEGIN_FTR_SECTION mtlr r0 lis r3,HID0_NAP@h END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) -BEGIN_FTR_SECTION - msync - li r7,L2CSR0_L2FL@l - mtspr SPRN_L2CSR0,r7 -2: - mfspr r7,SPRN_L2CSR0 - andi. r4,r7,L2CSR0_L2FL@l - bne 2b -END_FTR_SECTION_IFSET(CPU_FTR_L2CSR|CPU_FTR_CAN_NAP) 1: /* Go to NAP or DOZE now */ mfspr r4,SPRN_HID0 -- cgit v1.2.3 From ec66a97d153306cd35250da419f7f8ead0bc142f Mon Sep 17 00:00:00 2001 From: Shengzhou Liu Date: Wed, 15 Apr 2015 11:48:08 +0800 Subject: powerpc/fsl-booke: Add device tree support for T1024/T1023 SoC The T1024 SoC includes the following function and features: - Two 64-bit Power architecture e5500 cores, up to 1.4GHz - private 256KB L2 cache each core and shared 256KB CoreNet platform cache (CPC) - 32-/64-bit DDR3L/DDR4 SDRAM memory controller with ECC and interleaving support - Data Path Acceleration Architecture (DPAA) incorporating acceleration - Four MAC for 1G/2.5G/10G network interfaces (RGMII, SGMII, QSGMII, XFI) - High-speed peripheral interfaces - Three PCI Express 2.0 controllers - Additional peripheral interfaces - One SATA 2.0 controller - Two USB 2.0 controllers with integrated PHY - Enhanced secure digital host controller (SD/eSDHC/eMMC) - Enhanced serial peripheral interface (eSPI) - Four I2C controllers - Four 2-pin UARTs or two 4-pin UARTs - Integrated Flash Controller supporting NAND and NOR flash - Two 8-channel DMA engines - Multicore programmable interrupt controller (PIC) - LCD interface (DIU) with 12 bit dual data rate - QUICC Engine block supporting TDM, HDLC, and UART - Deep Sleep power implementaion (wakeup from GPIO/Timer/Ethernet/USB) - Support for hardware virtualization and partitioning enforcement - QorIQ Platform's Trust Architecture 2.0 Signed-off-by: Shengzhou Liu [scottwood@freescale.com: whitespace fixes] Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/fsl/t1023si-post.dtsi | 330 ++++++++++++++++++++++++++++ arch/powerpc/boot/dts/fsl/t1024si-post.dtsi | 100 +++++++++ arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi | 87 ++++++++ 3 files changed, 517 insertions(+) create mode 100644 arch/powerpc/boot/dts/fsl/t1023si-post.dtsi create mode 100644 arch/powerpc/boot/dts/fsl/t1024si-post.dtsi create mode 100644 arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi new file mode 100644 index 000000000000..dbe65783cc2d --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi @@ -0,0 +1,330 @@ +/* + * T1023 Silicon/SoC Device Tree Source (post include) + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +&ifc { + #address-cells = <2>; + #size-cells = <1>; + compatible = "fsl,ifc", "simple-bus"; + interrupts = <25 2 0 0>; +}; + +&pci0 { + compatible = "fsl,t1023-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie"; + device_type = "pci"; + #size-cells = <2>; + #address-cells = <3>; + bus-range = <0x0 0xff>; + interrupts = <20 2 0 0>; + fsl,iommu-parent = <&pamu0>; + pcie@0 { + reg = <0 0 0 0 0>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + interrupts = <20 2 0 0>; + interrupt-map-mask = <0xf800 0 0 7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0 0 1 &mpic 40 1 0 0 + 0000 0 0 2 &mpic 1 1 0 0 + 0000 0 0 3 &mpic 2 1 0 0 + 0000 0 0 4 &mpic 3 1 0 0 + >; + }; +}; + +&pci1 { + compatible = "fsl,t1023-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie"; + device_type = "pci"; + #size-cells = <2>; + #address-cells = <3>; + bus-range = <0 0xff>; + interrupts = <21 2 0 0>; + fsl,iommu-parent = <&pamu0>; + pcie@0 { + reg = <0 0 0 0 0>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + interrupts = <21 2 0 0>; + interrupt-map-mask = <0xf800 0 0 7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0 0 1 &mpic 41 1 0 0 + 0000 0 0 2 &mpic 5 1 0 0 + 0000 0 0 3 &mpic 6 1 0 0 + 0000 0 0 4 &mpic 7 1 0 0 + >; + }; +}; + +&pci2 { + compatible = "fsl,t1023-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie"; + device_type = "pci"; + #size-cells = <2>; + #address-cells = <3>; + bus-range = <0x0 0xff>; + interrupts = <22 2 0 0>; + fsl,iommu-parent = <&pamu0>; + pcie@0 { + reg = <0 0 0 0 0>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + interrupts = <22 2 0 0>; + interrupt-map-mask = <0xf800 0 0 7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0 0 1 &mpic 42 1 0 0 + 0000 0 0 2 &mpic 9 1 0 0 + 0000 0 0 3 &mpic 10 1 0 0 + 0000 0 0 4 &mpic 11 1 0 0 + >; + }; +}; + +&dcsr { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,dcsr", "simple-bus"; + + dcsr-epu@0 { + compatible = "fsl,t1023-dcsr-epu", "fsl,dcsr-epu"; + interrupts = <52 2 0 0 + 84 2 0 0 + 85 2 0 0>; + reg = <0x0 0x1000>; + }; + dcsr-npc { + compatible = "fsl,t1023-dcsr-cnpc", "fsl,dcsr-cnpc"; + reg = <0x1000 0x1000 0x1002000 0x10000>; + }; + dcsr-nxc@2000 { + compatible = "fsl,dcsr-nxc"; + reg = <0x2000 0x1000>; + }; + dcsr-corenet { + compatible = "fsl,dcsr-corenet"; + reg = <0x8000 0x1000 0x1A000 0x1000>; + }; + dcsr-ocn@11000 { + compatible = "fsl,t1023-dcsr-ocn", "fsl,dcsr-ocn"; + reg = <0x11000 0x1000>; + }; + dcsr-ddr@12000 { + compatible = "fsl,dcsr-ddr"; + dev-handle = <&ddr1>; + reg = <0x12000 0x1000>; + }; + dcsr-nal@18000 { + compatible = "fsl,t1023-dcsr-nal", "fsl,dcsr-nal"; + reg = <0x18000 0x1000>; + }; + dcsr-rcpm@22000 { + compatible = "fsl,t1023-dcsr-rcpm", "fsl,dcsr-rcpm"; + reg = <0x22000 0x1000>; + }; + dcsr-snpc@30000 { + compatible = "fsl,t1023-dcsr-snpc", "fsl,dcsr-snpc"; + reg = <0x30000 0x1000 0x1022000 0x10000>; + }; + dcsr-snpc@31000 { + compatible = "fsl,t1023-dcsr-snpc", "fsl,dcsr-snpc"; + reg = <0x31000 0x1000 0x1042000 0x10000>; + }; + dcsr-cpu-sb-proxy@100000 { + compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy"; + cpu-handle = <&cpu0>; + reg = <0x100000 0x1000 0x101000 0x1000>; + }; + dcsr-cpu-sb-proxy@108000 { + compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy"; + cpu-handle = <&cpu1>; + reg = <0x108000 0x1000 0x109000 0x1000>; + }; +}; + +&soc { + #address-cells = <1>; + #size-cells = <1>; + device_type = "soc"; + compatible = "simple-bus"; + + soc-sram-error { + compatible = "fsl,soc-sram-error"; + interrupts = <16 2 1 29>; + }; + + corenet-law@0 { + compatible = "fsl,corenet-law"; + reg = <0x0 0x1000>; + fsl,num-laws = <16>; + }; + + ddr1: memory-controller@8000 { + compatible = "fsl,qoriq-memory-controller-v5.0", + "fsl,qoriq-memory-controller"; + reg = <0x8000 0x1000>; + interrupts = <16 2 1 23>; + }; + + cpc: l3-cache-controller@10000 { + compatible = "fsl,t1023-l3-cache-controller", "cache"; + reg = <0x10000 0x1000>; + interrupts = <16 2 1 27>; + }; + + corenet-cf@18000 { + compatible = "fsl,corenet2-cf"; + reg = <0x18000 0x1000>; + interrupts = <16 2 1 31>; + }; + + iommu@20000 { + compatible = "fsl,pamu-v1.0", "fsl,pamu"; + reg = <0x20000 0x1000>; + ranges = <0 0x20000 0x1000>; + #address-cells = <1>; + #size-cells = <1>; + interrupts = < + 24 2 0 0 + 16 2 1 30>; + pamu0: pamu@0 { + reg = <0 0x1000>; + fsl,primary-cache-geometry = <128 1>; + fsl,secondary-cache-geometry = <32 2>; + }; + }; + +/include/ "qoriq-mpic.dtsi" + + guts: global-utilities@e0000 { + compatible = "fsl,t1023-device-config", "fsl,qoriq-device-config-2.0"; + reg = <0xe0000 0xe00>; + fsl,has-rstcr; + fsl,liodn-bits = <12>; + }; + +/include/ "qoriq-clockgen2.dtsi" + global-utilities@e1000 { + compatible = "fsl,t1023-clockgen", "fsl,qoriq-clockgen-2.0"; + mux0: mux0@0 { + #clock-cells = <0>; + reg = <0x0 4>; + compatible = "fsl,core-mux-clock"; + clocks = <&pll0 0>, <&pll0 1>; + clock-names = "pll0_0", "pll0_1"; + clock-output-names = "cmux0"; + }; + mux1: mux1@20 { + #clock-cells = <0>; + reg = <0x20 4>; + compatible = "fsl,core-mux-clock"; + clocks = <&pll0 0>, <&pll0 1>; + clock-names = "pll0_0", "pll0_1"; + clock-output-names = "cmux1"; + }; + }; + + rcpm: global-utilities@e2000 { + compatible = "fsl,t1023-rcpm", "fsl,qoriq-rcpm-2.0"; + reg = <0xe2000 0x1000>; + }; + + sfp: sfp@e8000 { + compatible = "fsl,t1023-sfp"; + reg = <0xe8000 0x1000>; + }; + + serdes: serdes@ea000 { + compatible = "fsl,t1023-serdes"; + reg = <0xea000 0x4000>; + }; + + scfg: global-utilities@fc000 { + compatible = "fsl,t1023-scfg"; + reg = <0xfc000 0x1000>; + }; + +/include/ "elo3-dma-0.dtsi" +/include/ "elo3-dma-1.dtsi" + +/include/ "qoriq-espi-0.dtsi" + spi@110000 { + fsl,espi-num-chipselects = <4>; + }; + +/include/ "qoriq-esdhc-0.dtsi" + sdhc@114000 { + compatible = "fsl,t1023-esdhc", "fsl,esdhc"; + fsl,iommu-parent = <&pamu0>; + fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */ + sdhci,auto-cmd12; + no-1-8-v; + }; +/include/ "qoriq-i2c-0.dtsi" +/include/ "qoriq-i2c-1.dtsi" +/include/ "qoriq-duart-0.dtsi" +/include/ "qoriq-duart-1.dtsi" +/include/ "qoriq-gpio-0.dtsi" +/include/ "qoriq-gpio-1.dtsi" +/include/ "qoriq-gpio-2.dtsi" +/include/ "qoriq-gpio-3.dtsi" +/include/ "qoriq-usb2-mph-0.dtsi" + usb0: usb@210000 { + compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph"; + fsl,iommu-parent = <&pamu0>; + fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */ + phy_type = "utmi"; + port0; + }; +/include/ "qoriq-usb2-dr-0.dtsi" + usb1: usb@211000 { + compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr"; + fsl,iommu-parent = <&pamu0>; + fsl,liodn-reg = <&guts 0x524>; /* USB2LIODNR */ + dr_mode = "host"; + phy_type = "utmi"; + }; +/include/ "qoriq-sata2-0.dtsi" +sata@220000 { + fsl,iommu-parent = <&pamu0>; + fsl,liodn-reg = <&guts 0x550>; /* SATA1LIODNR */ +}; + +/include/ "qoriq-sec5.0-0.dtsi" +}; diff --git a/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi new file mode 100644 index 000000000000..95e3af8d768e --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi @@ -0,0 +1,100 @@ +/* + * T1024 Silicon/SoC Device Tree Source (post include) + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "t1023si-post.dtsi" + +/ { + aliases { + vga = &display; + display = &display; + }; + + qe:qe@ffe140000 { + #address-cells = <1>; + #size-cells = <1>; + device_type = "qe"; + compatible = "fsl,qe"; + ranges = <0x0 0xf 0xfe140000 0x40000>; + reg = <0xf 0xfe140000 0 0x480>; + fsl,qe-num-riscs = <1>; + fsl,qe-num-snums = <28>; + brg-frequency = <0>; + bus-frequency = <0>; + }; +}; + +&soc { + display:display@180000 { + compatible = "fsl,t1024-diu", "fsl,diu"; + reg = <0x180000 1000>; + interrupts = <74 2 0 0>; + }; +}; + +&qe { + qeic: interrupt-controller@80 { + interrupt-controller; + compatible = "fsl,qe-ic"; + #address-cells = <0>; + #interrupt-cells = <1>; + reg = <0x80 0x80>; + interrupts = <95 2 0 0 94 2 0 0>; //high:79 low:78 + }; + + ucc@2000 { + cell-index = <1>; + reg = <0x2000 0x200>; + interrupts = <32>; + interrupt-parent = <&qeic>; + }; + + ucc@2200 { + cell-index = <3>; + reg = <0x2200 0x200>; + interrupts = <34>; + interrupt-parent = <&qeic>; + }; + + muram@10000 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,qe-muram", "fsl,cpm-muram"; + ranges = <0x0 0x10000 0x6000>; + + data-only@0 { + compatible = "fsl,qe-muram-data", "fsl,cpm-muram-data"; + reg = <0x0 0x6000>; + }; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi b/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi new file mode 100644 index 000000000000..1f1a9f8474d5 --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi @@ -0,0 +1,87 @@ +/* + * T1024/T1023 Silicon/SoC Device Tree Source (pre include) + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/dts-v1/; + +/include/ "e5500_power_isa.dtsi" + +/ { + #address-cells = <2>; + #size-cells = <2>; + interrupt-parent = <&mpic>; + + aliases { + ccsr = &soc; + dcsr = &dcsr; + + dma0 = &dma0; + dma1 = &dma1; + serial0 = &serial0; + serial1 = &serial1; + serial2 = &serial2; + serial3 = &serial3; + pci0 = &pci0; + pci1 = &pci1; + pci2 = &pci2; + usb0 = &usb0; + usb1 = &usb1; + sdhc = &sdhc; + + crypto = &crypto; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: PowerPC,e5500@0 { + device_type = "cpu"; + reg = <0>; + clocks = <&mux0>; + next-level-cache = <&L2_1>; + L2_1: l2-cache { + next-level-cache = <&cpc>; + }; + }; + cpu1: PowerPC,e5500@1 { + device_type = "cpu"; + reg = <1>; + clocks = <&mux1>; + next-level-cache = <&L2_2>; + L2_2: l2-cache { + next-level-cache = <&cpc>; + }; + }; + }; +}; -- cgit v1.2.3 From 2b6029e2e026771fb4e2add9b38e91f34725813c Mon Sep 17 00:00:00 2001 From: Shengzhou Liu Date: Thu, 9 Apr 2015 16:07:43 +0800 Subject: powerpc/fsl-booke: Add T1024 QDS board support Add support for Freescale T1024/T1023 QorIQ Development System Board. T1024QDS is a high-performance computing evaluation, development and test platform for T1024 QorIQ Power Architecture processor. Signed-off-by: Shengzhou Liu [scottwood: vendor prefix: s/at24/atmel/ and trimmed detailed board description with too-long lines] Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/fsl/t1023si-post.dtsi | 36 ++-- arch/powerpc/boot/dts/t1024qds.dts | 251 ++++++++++++++++++++++++++ arch/powerpc/platforms/85xx/Kconfig | 2 +- arch/powerpc/platforms/85xx/corenet_generic.c | 1 + 4 files changed, 271 insertions(+), 19 deletions(-) create mode 100644 arch/powerpc/boot/dts/t1024qds.dts (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi index dbe65783cc2d..df1f068a5376 100644 --- a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi @@ -305,26 +305,26 @@ /include/ "qoriq-gpio-2.dtsi" /include/ "qoriq-gpio-3.dtsi" /include/ "qoriq-usb2-mph-0.dtsi" - usb0: usb@210000 { - compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph"; - fsl,iommu-parent = <&pamu0>; - fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */ - phy_type = "utmi"; - port0; - }; + usb0: usb@210000 { + compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph"; + fsl,iommu-parent = <&pamu0>; + fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */ + phy_type = "utmi"; + port0; + }; /include/ "qoriq-usb2-dr-0.dtsi" - usb1: usb@211000 { - compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr"; - fsl,iommu-parent = <&pamu0>; - fsl,liodn-reg = <&guts 0x524>; /* USB2LIODNR */ - dr_mode = "host"; - phy_type = "utmi"; - }; + usb1: usb@211000 { + compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr"; + fsl,iommu-parent = <&pamu0>; + fsl,liodn-reg = <&guts 0x524>; /* USB2LIODNR */ + dr_mode = "host"; + phy_type = "utmi"; + }; /include/ "qoriq-sata2-0.dtsi" -sata@220000 { - fsl,iommu-parent = <&pamu0>; - fsl,liodn-reg = <&guts 0x550>; /* SATA1LIODNR */ -}; + sata@220000 { + fsl,iommu-parent = <&pamu0>; + fsl,liodn-reg = <&guts 0x550>; /* SATA1LIODNR */ + }; /include/ "qoriq-sec5.0-0.dtsi" }; diff --git a/arch/powerpc/boot/dts/t1024qds.dts b/arch/powerpc/boot/dts/t1024qds.dts new file mode 100644 index 000000000000..f31fabb383b9 --- /dev/null +++ b/arch/powerpc/boot/dts/t1024qds.dts @@ -0,0 +1,251 @@ +/* + * T1024 QDS Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/t102xsi-pre.dtsi" + +/ { + model = "fsl,T1024QDS"; + compatible = "fsl,T1024QDS"; + #address-cells = <2>; + #size-cells = <2>; + interrupt-parent = <&mpic>; + + ifc: localbus@ffe124000 { + reg = <0xf 0xfe124000 0 0x2000>; + ranges = <0 0 0xf 0xe8000000 0x08000000 + 2 0 0xf 0xff800000 0x00010000 + 3 0 0xf 0xffdf0000 0x00008000>; + + nor@0,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "cfi-flash"; + reg = <0x0 0x0 0x8000000>; + bank-width = <2>; + device-width = <1>; + }; + + nand@2,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,ifc-nand"; + reg = <0x2 0x0 0x10000>; + }; + + board-control@3,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,tetra-fpga", "fsl,fpga-qixis"; + reg = <3 0 0x300>; + ranges = <0 3 0 0x300>; + }; + }; + + memory { + device_type = "memory"; + }; + + dcsr: dcsr@f00000000 { + ranges = <0x00000000 0xf 0x00000000 0x01072000>; + }; + + soc: soc@ffe000000 { + ranges = <0x00000000 0xf 0xfe000000 0x1000000>; + reg = <0xf 0xfe000000 0 0x00001000>; + spi@110000 { + flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "micron,n25q128a11"; /* 16MB */ + reg = <0>; + spi-max-frequency = <10000000>; + }; + + flash@1 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "sst,sst25wf040"; /* 512KB */ + reg = <1>; + spi-max-frequency = <10000000>; + }; + + flash@2 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "eon,en25s64"; /* 8MB */ + reg = <2>; + spi-max-frequency = <10000000>; + }; + + slic@2 { + compatible = "maxim,ds26522"; + reg = <2>; + spi-max-frequency = <2000000>; + }; + + slic@3 { + compatible = "maxim,ds26522"; + reg = <3>; + spi-max-frequency = <2000000>; + }; + }; + + i2c@118000 { + pca9547@77 { + compatible = "nxp,pca9547"; + reg = <0x77>; + #address-cells = <1>; + #size-cells = <0>; + + i2c@0 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0>; + + eeprom@50 { + compatible = "atmel,24c512"; + reg = <0x50>; + }; + + eeprom@51 { + compatible = "atmel,24c02"; + reg = <0x51>; + }; + + eeprom@57 { + compatible = "atmel,24c02"; + reg = <0x57>; + }; + }; + + i2c@2 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x2>; + + ina220@40 { + compatible = "ti,ina220"; + reg = <0x40>; + shunt-resistor = <1000>; + }; + + ina220@41 { + compatible = "ti,ina220"; + reg = <0x41>; + shunt-resistor = <1000>; + }; + }; + + i2c@3 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x3>; + + adt7461@4c { + /* Thermal Monitor */ + compatible = "adi,adt7461"; + reg = <0x4c>; + }; + + eeprom@55 { + compatible = "atmel,24c02"; + reg = <0x55>; + }; + + eeprom@56 { + compatible = "atmel,24c512"; + reg = <0x56>; + }; + + eeprom@57 { + compatible = "atmel,24c512"; + reg = <0x57>; + }; + }; + }; + rtc@68 { + compatible = "dallas,ds3232"; + reg = <0x68>; + interrupts = <0x5 0x1 0 0>; + }; + }; + }; + + pci0: pcie@ffe240000 { + reg = <0xf 0xfe240000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8000000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci1: pcie@ffe250000 { + reg = <0xf 0xfe250000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8010000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci2: pcie@ffe260000 { + reg = <0xf 0xfe260000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; +}; + +/include/ "fsl/t1024si-post.dtsi" diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index 2fb4b24368a6..e628d4001272 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -282,7 +282,7 @@ config CORENET_GENERIC For 64bit kernel, the following boards are supported: T208x QDS/RDB, T4240 QDS/RDB and B4 QDS The following boards are supported for both 32bit and 64bit kernel: - P5020 DS, P5040 DS and T104xQDS/RDB + P5020 DS, P5040 DS, T102x QDS, T104x QDS/RDB endif # FSL_SOC_BOOKE diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 9824d2cf79bd..f61f47780486 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -150,6 +150,7 @@ static const char * const boards[] __initconst = { "fsl,B4860QDS", "fsl,B4420QDS", "fsl,B4220QDS", + "fsl,T1024QDS", "fsl,T1040QDS", "fsl,T1042QDS", "fsl,T1040RDB", -- cgit v1.2.3 From 5afe13fd48c72be61342edbc6ede6e60f6153227 Mon Sep 17 00:00:00 2001 From: Shengzhou Liu Date: Thu, 9 Apr 2015 16:07:44 +0800 Subject: powerpc/fsl-booke: Add T1024 RDB board support T1024RDB is a Freescale Reference Design Board that hosts the T1024 SoC. Signed-off-by: Shengzhou Liu [scottwood: vendor prefix: s/at24/atmel/ and trimmed detailed board description with too-long lines] Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/t1024rdb.dts | 185 ++++++++++++++++++++++++++ arch/powerpc/platforms/85xx/Kconfig | 2 +- arch/powerpc/platforms/85xx/corenet_generic.c | 1 + 3 files changed, 187 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/boot/dts/t1024rdb.dts (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/dts/t1024rdb.dts b/arch/powerpc/boot/dts/t1024rdb.dts new file mode 100644 index 000000000000..733e723ffed6 --- /dev/null +++ b/arch/powerpc/boot/dts/t1024rdb.dts @@ -0,0 +1,185 @@ +/* + * T1024 RDB Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/t102xsi-pre.dtsi" + +/ { + model = "fsl,T1024RDB"; + compatible = "fsl,T1024RDB"; + #address-cells = <2>; + #size-cells = <2>; + interrupt-parent = <&mpic>; + + ifc: localbus@ffe124000 { + reg = <0xf 0xfe124000 0 0x2000>; + ranges = <0 0 0xf 0xe8000000 0x08000000 + 2 0 0xf 0xff800000 0x00010000 + 3 0 0xf 0xffdf0000 0x00008000>; + + nor@0,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "cfi-flash"; + reg = <0x0 0x0 0x8000000>; + bank-width = <2>; + device-width = <1>; + }; + + nand@1,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,ifc-nand"; + reg = <0x2 0x0 0x10000>; + }; + + board-control@2,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,t1024-cpld"; + reg = <3 0 0x300>; + ranges = <0 3 0 0x300>; + bank-width = <1>; + device-width = <1>; + }; + }; + + memory { + device_type = "memory"; + }; + + dcsr: dcsr@f00000000 { + ranges = <0x00000000 0xf 0x00000000 0x01072000>; + }; + + soc: soc@ffe000000 { + ranges = <0x00000000 0xf 0xfe000000 0x1000000>; + reg = <0xf 0xfe000000 0 0x00001000>; + spi@110000 { + flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "micron,n25q512ax3"; + reg = <0>; + spi-max-frequency = <10000000>; /* input clk */ + }; + + slic@1 { + compatible = "maxim,ds26522"; + reg = <1>; + spi-max-frequency = <2000000>; + }; + + slic@2 { + compatible = "maxim,ds26522"; + reg = <2>; + spi-max-frequency = <2000000>; + }; + }; + + i2c@118000 { + adt7461@4c { + /* Thermal Monitor */ + compatible = "adi,adt7461"; + reg = <0x4c>; + }; + + eeprom@50 { + compatible = "atmel,24c256"; + reg = <0x50>; + }; + + rtc@68 { + compatible = "dallas,ds1339"; + reg = <0x68>; + interrupts = <0x1 0x1 0 0>; + }; + }; + + i2c@118100 { + pca9546@77 { + compatible = "nxp,pca9546"; + reg = <0x77>; + #address-cells = <1>; + #size-cells = <0>; + }; + }; + }; + + pci0: pcie@ffe240000 { + reg = <0xf 0xfe240000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8000000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci1: pcie@ffe250000 { + reg = <0xf 0xfe250000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8010000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci2: pcie@ffe260000 { + reg = <0xf 0xfe260000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; +}; + +/include/ "fsl/t1024si-post.dtsi" diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index e628d4001272..97915feffd42 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -282,7 +282,7 @@ config CORENET_GENERIC For 64bit kernel, the following boards are supported: T208x QDS/RDB, T4240 QDS/RDB and B4 QDS The following boards are supported for both 32bit and 64bit kernel: - P5020 DS, P5040 DS, T102x QDS, T104x QDS/RDB + P5020 DS, P5040 DS, T102x QDS/RDB, T104x QDS/RDB endif # FSL_SOC_BOOKE diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index f61f47780486..d7c1b4d4254a 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -151,6 +151,7 @@ static const char * const boards[] __initconst = { "fsl,B4420QDS", "fsl,B4220QDS", "fsl,T1024QDS", + "fsl,T1024RDB", "fsl,T1040QDS", "fsl,T1042QDS", "fsl,T1040RDB", -- cgit v1.2.3 From 65bf2a057021b009ff9924b59203993bdff82ecc Mon Sep 17 00:00:00 2001 From: Shengzhou Liu Date: Thu, 9 Apr 2015 16:07:45 +0800 Subject: powerpc/fsl-booke: Add T1023 RDB board support T1023RDB is a Freescale Reference Design Board that hosts T1023 SoC. T1023RDB board Overview ----------------------- - T1023 SoC integrating two 64-bit e5500 cores up to 1.4GHz - CoreNet fabric supporting coherent and noncoherent transactions with prioritization and bandwidth allocation - Memory: 2GB Micron MT40A512M8HX unbuffered 32-bit fixed DDR4 without ECC - Accelerator: DPAA components consist of FMan, BMan, QMan, DCE and SEC - Ethernet interfaces: - one 1G RGMII port on-board(RTL8211F PHY) - one 1G SGMII port on-board(RTL8211F PHY) - one 2.5G SGMII port on-board(AQR105 PHY) - PCIe: Two Mini-PCIe connectors on-board. - SerDes: 4 lanes up to 10.3125GHz - NOR: 128MB S29GL01GS110TFIV10 Spansion NOR Flash - NAND: 512MB S34MS04G200BFI000 Spansion NAND Flash - eSPI: 64MB S25FL512SAGMFI010 Spansion SPI flash - USB: one Type-A USB 2.0 port with internal PHY - eSDHC: support SD/MMC card and eMMC flash on-board - 256Kbit M24256 I2C EEPROM - RTC: Real-time clock DS1339 on I2C bus - UART: one serial port on-board with RJ45 connector - Debugging: JTAG/COP for T1023 debugging Signed-off-by: Shengzhou Liu Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/t1023rdb.dts | 151 ++++++++++++++++++++++++++ arch/powerpc/platforms/85xx/corenet_generic.c | 1 + 2 files changed, 152 insertions(+) create mode 100644 arch/powerpc/boot/dts/t1023rdb.dts (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/dts/t1023rdb.dts b/arch/powerpc/boot/dts/t1023rdb.dts new file mode 100644 index 000000000000..06b090aba066 --- /dev/null +++ b/arch/powerpc/boot/dts/t1023rdb.dts @@ -0,0 +1,151 @@ +/* + * T1023 RDB Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/t102xsi-pre.dtsi" + +/ { + model = "fsl,T1023RDB"; + compatible = "fsl,T1023RDB"; + #address-cells = <2>; + #size-cells = <2>; + interrupt-parent = <&mpic>; + + ifc: localbus@ffe124000 { + reg = <0xf 0xfe124000 0 0x2000>; + ranges = <0 0 0xf 0xe8000000 0x08000000 + 1 0 0xf 0xff800000 0x00010000>; + + nor@0,0 { + #address-cells = <1>; + #size-cells = <1>; + status = "disabled"; + compatible = "cfi-flash"; + reg = <0x0 0x0 0x8000000>; + bank-width = <2>; + device-width = <1>; + }; + + nand@1,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,ifc-nand"; + reg = <0x2 0x0 0x10000>; + }; + }; + + memory { + device_type = "memory"; + }; + + dcsr: dcsr@f00000000 { + ranges = <0x00000000 0xf 0x00000000 0x01072000>; + }; + + soc: soc@ffe000000 { + ranges = <0x00000000 0xf 0xfe000000 0x1000000>; + reg = <0xf 0xfe000000 0 0x00001000>; + spi@110000 { + flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "spansion,s25fl512s"; + reg = <0>; + spi-max-frequency = <10000000>; /* input clk */ + }; + }; + + i2c@118000 { + eeprom@50 { + compatible = "st,m24256"; + reg = <0x50>; + }; + + rtc@68 { + compatible = "dallas,ds1339"; + reg = <0x68>; + interrupts = <0x5 0x1 0 0>; + }; + }; + + i2c@118100 { + }; + }; + + pci0: pcie@ffe240000 { + reg = <0xf 0xfe240000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8000000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci1: pcie@ffe250000 { + reg = <0xf 0xfe250000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8010000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci2: pcie@ffe260000 { + reg = <0xf 0xfe260000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; +}; + +/include/ "fsl/t1023si-post.dtsi" diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index d7c1b4d4254a..bd839dc287fe 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -150,6 +150,7 @@ static const char * const boards[] __initconst = { "fsl,B4860QDS", "fsl,B4420QDS", "fsl,B4220QDS", + "fsl,T1023RDB", "fsl,T1024QDS", "fsl,T1024RDB", "fsl,T1040QDS", -- cgit v1.2.3 From 9b6179dc1e1a481026ce7b9222275727ec76aef4 Mon Sep 17 00:00:00 2001 From: Igal Liberman Date: Thu, 16 Apr 2015 14:53:36 +0300 Subject: powerpc/dts: Fix incorrect clock-names property Signed-off-by: Igal Liberman Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/fsl/t2081si-post.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi index 86bdaf6cbd14..c9ad929adaf8 100644 --- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi @@ -417,7 +417,7 @@ compatible = "fsl,qoriq-core-mux-2.0"; clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>, <&pll1 0>, <&pll1 1>, <&pll1 2>; - clock-names = "pll0", "pll0-div2", "pll1-div4", + clock-names = "pll0", "pll0-div2", "pll0-div4", "pll1", "pll1-div2", "pll1-div4"; clock-output-names = "cmux0"; }; @@ -428,7 +428,7 @@ compatible = "fsl,qoriq-core-mux-2.0"; clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>, <&pll1 0>, <&pll1 1>, <&pll1 2>; - clock-names = "pll0", "pll0-div2", "pll1-div4", + clock-names = "pll0", "pll0-div2", "pll0-div4", "pll1", "pll1-div2", "pll1-div4"; clock-output-names = "cmux1"; }; -- cgit v1.2.3 From 86c3b16e9f330c94062c40302ab266f0d932057e Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 17 Apr 2015 18:37:17 +0200 Subject: powerpc/8xx: mmu_virtual_psize incorrect for 16k pages mmu_virtual_psize shall be set to MMU_PAGE_16K when 16k pages have been selected Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/mmu-8xx.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 986b9e1e1044..d41200c01d85 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -145,7 +145,14 @@ typedef struct { } mm_context_t; #endif /* !__ASSEMBLY__ */ +#if (PAGE_SHIFT == 12) #define mmu_virtual_psize MMU_PAGE_4K +#elif (PAGE_SHIFT == 14) +#define mmu_virtual_psize MMU_PAGE_16K +#else +#error "Unsupported PAGE_SIZE" +#endif + #define mmu_linear_psize MMU_PAGE_8M #endif /* _ASM_POWERPC_MMU_8XX_H_ */ -- cgit v1.2.3 From 6c0cc62715bfd0b26f7d1a79e6e8143085950ca7 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 17 Apr 2015 16:17:14 -0500 Subject: powerpc/mm: Use PFN_PHYS() in devmem_is_allowed() This function can run on systems where physical addresses don't fit in unsigned long, so make sure to use the macro that contains the proper cast. Signed-off-by: Scott Wood --- arch/powerpc/mm/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 45fda71feb27..0f11819d8f1d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -560,7 +560,7 @@ subsys_initcall(add_system_ram_resources); */ int devmem_is_allowed(unsigned long pfn) { - if (iomem_is_exclusive(pfn << PAGE_SHIFT)) + if (iomem_is_exclusive(PFN_PHYS(pfn))) return 0; if (!page_is_ram(pfn)) return 1; -- cgit v1.2.3 From 7f6972a0d018daacef3b31090057771bc657f18a Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Fri, 17 Apr 2015 17:53:07 -0500 Subject: powerpc/mpc85xx: Add FSL QorIQ DPAA QMan support to device tree(s) Signed-off-by: Kumar Gala Signed-off-by: Geoff Thorpe Signed-off-by: Hai-Ying Wang Signed-off-by: Chunhe Lan Signed-off-by: Poonam Aggrwal [Emil Medve: Sync with the upstream binding] Signed-off-by: Emil Medve [Scott Wood: s/fsl,qman-channel-id/cell-index] Signed-off-by: Scott Wood Cc: Madalin-Cristian Bucur --- arch/powerpc/boot/dts/b4qds.dtsi | 12 + arch/powerpc/boot/dts/fsl/b4860si-post.dtsi | 69 +++++ arch/powerpc/boot/dts/fsl/b4si-post.dtsi | 106 +++++++ arch/powerpc/boot/dts/fsl/p1023si-post.dtsi | 43 +++ arch/powerpc/boot/dts/fsl/p2041si-post.dtsi | 13 + arch/powerpc/boot/dts/fsl/p3041si-post.dtsi | 13 + arch/powerpc/boot/dts/fsl/p4080si-post.dtsi | 13 + arch/powerpc/boot/dts/fsl/p5020si-post.dtsi | 13 + arch/powerpc/boot/dts/fsl/p5040si-post.dtsi | 13 + arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi | 20 +- arch/powerpc/boot/dts/fsl/t1040si-post.dtsi | 78 +++++ arch/powerpc/boot/dts/fsl/t2081si-post.dtsi | 126 ++++++++ arch/powerpc/boot/dts/fsl/t4240si-post.dtsi | 318 +++++++++++++++++++++ arch/powerpc/boot/dts/kmcoge4.dts | 12 + arch/powerpc/boot/dts/oca4080.dts | 12 + arch/powerpc/boot/dts/p1023rdb.dts | 12 + arch/powerpc/boot/dts/p2041rdb.dts | 12 + arch/powerpc/boot/dts/p3041ds.dts | 12 + arch/powerpc/boot/dts/p4080ds.dts | 12 + arch/powerpc/boot/dts/p5020ds.dts | 12 + arch/powerpc/boot/dts/p5040ds.dts | 12 + arch/powerpc/boot/dts/t104xqds.dtsi | 12 + arch/powerpc/boot/dts/t104xrdb.dtsi | 12 + arch/powerpc/boot/dts/t208xqds.dtsi | 12 + arch/powerpc/boot/dts/t208xrdb.dtsi | 12 + arch/powerpc/boot/dts/t4240qds.dts | 12 + arch/powerpc/boot/dts/t4240rdb.dts | 12 + 27 files changed, 995 insertions(+), 10 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/dts/b4qds.dtsi b/arch/powerpc/boot/dts/b4qds.dtsi index 24ed80dc2120..559d00657fb5 100644 --- a/arch/powerpc/boot/dts/b4qds.dtsi +++ b/arch/powerpc/boot/dts/b4qds.dtsi @@ -106,6 +106,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -116,6 +124,10 @@ ranges = <0x0 0xf 0xf4000000 0x2000000>; }; + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi index 68b9a0536485..9ba904be39ee 100644 --- a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi @@ -167,6 +167,75 @@ }; }; +&qportals { + qportal14: qman-portal@38000 { + compatible = "fsl,qman-portal"; + reg = <0x38000 0x4000>, <0x100e000 0x1000>; + interrupts = <132 0x2 0 0>; + cell-index = <0xe>; + }; + qportal15: qman-portal@3c000 { + compatible = "fsl,qman-portal"; + reg = <0x3c000 0x4000>, <0x100f000 0x1000>; + interrupts = <134 0x2 0 0>; + cell-index = <0xf>; + }; + qportal16: qman-portal@40000 { + compatible = "fsl,qman-portal"; + reg = <0x40000 0x4000>, <0x1010000 0x1000>; + interrupts = <136 0x2 0 0>; + cell-index = <0x10>; + }; + qportal17: qman-portal@44000 { + compatible = "fsl,qman-portal"; + reg = <0x44000 0x4000>, <0x1011000 0x1000>; + interrupts = <138 0x2 0 0>; + cell-index = <0x11>; + }; + qportal18: qman-portal@48000 { + compatible = "fsl,qman-portal"; + reg = <0x48000 0x4000>, <0x1012000 0x1000>; + interrupts = <140 0x2 0 0>; + cell-index = <0x12>; + }; + qportal19: qman-portal@4c000 { + compatible = "fsl,qman-portal"; + reg = <0x4c000 0x4000>, <0x1013000 0x1000>; + interrupts = <142 0x2 0 0>; + cell-index = <0x13>; + }; + qportal20: qman-portal@50000 { + compatible = "fsl,qman-portal"; + reg = <0x50000 0x4000>, <0x1014000 0x1000>; + interrupts = <144 0x2 0 0>; + cell-index = <0x14>; + }; + qportal21: qman-portal@54000 { + compatible = "fsl,qman-portal"; + reg = <0x54000 0x4000>, <0x1015000 0x1000>; + interrupts = <146 0x2 0 0>; + cell-index = <0x15>; + }; + qportal22: qman-portal@58000 { + compatible = "fsl,qman-portal"; + reg = <0x58000 0x4000>, <0x1016000 0x1000>; + interrupts = <148 0x2 0 0>; + cell-index = <0x16>; + }; + qportal23: qman-portal@5c000 { + compatible = "fsl,qman-portal"; + reg = <0x5c000 0x4000>, <0x1017000 0x1000>; + interrupts = <150 0x2 0 0>; + cell-index = <0x17>; + }; + qportal24: qman-portal@60000 { + compatible = "fsl,qman-portal"; + reg = <0x60000 0x4000>, <0x1018000 0x1000>; + interrupts = <152 0x2 0 0>; + cell-index = <0x18>; + }; +}; + &soc { ddr2: memory-controller@9000 { compatible = "fsl,qoriq-memory-controller-v4.5", "fsl,qoriq-memory-controller"; diff --git a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi index c6bab55bb065..603910ac1db0 100644 --- a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10000 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10000 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10000 0>; +}; + &ifc { #address-cells = <2>; #size-cells = <1>; @@ -210,6 +220,97 @@ }; }; +&qportals { + #address-cells = <0x1>; + #size-cells = <0x1>; + compatible = "simple-bus"; + + qportal0: qman-portal@0 { + compatible = "fsl,qman-portal"; + reg = <0x0 0x4000>, <0x1000000 0x1000>; + interrupts = <104 0x2 0 0>; + cell-index = <0x0>; + }; + qportal1: qman-portal@4000 { + compatible = "fsl,qman-portal"; + reg = <0x4000 0x4000>, <0x1001000 0x1000>; + interrupts = <106 0x2 0 0>; + cell-index = <0x1>; + }; + qportal2: qman-portal@8000 { + compatible = "fsl,qman-portal"; + reg = <0x8000 0x4000>, <0x1002000 0x1000>; + interrupts = <108 0x2 0 0>; + cell-index = <0x2>; + }; + qportal3: qman-portal@c000 { + compatible = "fsl,qman-portal"; + reg = <0xc000 0x4000>, <0x1003000 0x1000>; + interrupts = <110 0x2 0 0>; + cell-index = <0x3>; + }; + qportal4: qman-portal@10000 { + compatible = "fsl,qman-portal"; + reg = <0x10000 0x4000>, <0x1004000 0x1000>; + interrupts = <112 0x2 0 0>; + cell-index = <0x4>; + }; + qportal5: qman-portal@14000 { + compatible = "fsl,qman-portal"; + reg = <0x14000 0x4000>, <0x1005000 0x1000>; + interrupts = <114 0x2 0 0>; + cell-index = <0x5>; + }; + qportal6: qman-portal@18000 { + compatible = "fsl,qman-portal"; + reg = <0x18000 0x4000>, <0x1006000 0x1000>; + interrupts = <116 0x2 0 0>; + cell-index = <0x6>; + }; + qportal7: qman-portal@1c000 { + compatible = "fsl,qman-portal"; + reg = <0x1c000 0x4000>, <0x1007000 0x1000>; + interrupts = <118 0x2 0 0>; + cell-index = <0x7>; + }; + qportal8: qman-portal@20000 { + compatible = "fsl,qman-portal"; + reg = <0x20000 0x4000>, <0x1008000 0x1000>; + interrupts = <120 0x2 0 0>; + cell-index = <0x8>; + }; + qportal9: qman-portal@24000 { + compatible = "fsl,qman-portal"; + reg = <0x24000 0x4000>, <0x1009000 0x1000>; + interrupts = <122 0x2 0 0>; + cell-index = <0x9>; + }; + qportal10: qman-portal@28000 { + compatible = "fsl,qman-portal"; + reg = <0x28000 0x4000>, <0x100a000 0x1000>; + interrupts = <124 0x2 0 0>; + cell-index = <0xa>; + }; + qportal11: qman-portal@2c000 { + compatible = "fsl,qman-portal"; + reg = <0x2c000 0x4000>, <0x100b000 0x1000>; + interrupts = <126 0x2 0 0>; + cell-index = <0xb>; + }; + qportal12: qman-portal@30000 { + compatible = "fsl,qman-portal"; + reg = <0x30000 0x4000>, <0x100c000 0x1000>; + interrupts = <128 0x2 0 0>; + cell-index = <0xc>; + }; + qportal13: qman-portal@34000 { + compatible = "fsl,qman-portal"; + reg = <0x34000 0x4000>, <0x100d000 0x1000>; + interrupts = <130 0x2 0 0>; + cell-index = <0xd>; + }; +}; + &soc { #address-cells = <1>; #size-cells = <1>; @@ -355,6 +456,11 @@ /include/ "qoriq-duart-1.dtsi" /include/ "qoriq-sec5.3-0.dtsi" +/include/ "qoriq-qman3.dtsi" + qman: qman@318000 { + interrupts = <16 2 1 28>; + }; + /include/ "qoriq-bman1.dtsi" bman: bman@31a000 { interrupts = <16 2 1 29>; diff --git a/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi index 7780f21430cb..da6d3fc6ba41 100644 --- a/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10 0>; +}; + &lbc { #address-cells = <2>; #size-cells = <1>; @@ -102,6 +112,31 @@ }; }; +&qportals { + #address-cells = <1>; + #size-cells = <1>; + compatible = "simple-bus"; + + qportal0: qman-portal@0 { + compatible = "fsl,qman-portal"; + reg = <0x0 0x4000>, <0x100000 0x1000>; + interrupts = <29 2 0 0>; + cell-index = <0>; + }; + qportal1: qman-portal@4000 { + compatible = "fsl,qman-portal"; + reg = <0x4000 0x4000>, <0x101000 0x1000>; + interrupts = <31 2 0 0>; + cell-index = <1>; + }; + qportal2: qman-portal@8000 { + compatible = "fsl,qman-portal"; + reg = <0x8000 0x4000>, <0x102000 0x1000>; + interrupts = <33 2 0 0>; + cell-index = <2>; + }; +}; + &bportals { #address-cells = <1>; #size-cells = <1>; @@ -248,6 +283,14 @@ /include/ "pq3-mpic.dtsi" /include/ "pq3-mpic-timer-B.dtsi" + qman: qman@88000 { + compatible = "fsl,qman"; + reg = <0x88000 0x1000>; + interrupts = <16 2 0 0>; + fsl,qman-portals = <&qportals>; + memory-region = <&qman_fqd &qman_pfdr>; + }; + bman: bman@8a000 { compatible = "fsl,bman"; reg = <0x8a000 0x1000>; diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi index f2feacfd9a25..1f18b8bac1d2 100644 --- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10 0>; +}; + &lbc { compatible = "fsl,p2041-elbc", "fsl,elbc", "simple-bus"; interrupts = <25 2 0 0>; @@ -223,6 +233,8 @@ /include/ "qoriq-bman1-portals.dtsi" +/include/ "qoriq-qman1-portals.dtsi" + &soc { #address-cells = <1>; #size-cells = <1>; @@ -415,5 +427,6 @@ crypto: crypto@300000 { fsl,iommu-parent = <&pamu1>; }; +/include/ "qoriq-qman1.dtsi" /include/ "qoriq-bman1.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi index d6fea37395ad..a555d2491197 100644 --- a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10 0>; +}; + &lbc { compatible = "fsl,p3041-elbc", "fsl,elbc", "simple-bus"; interrupts = <25 2 0 0>; @@ -250,6 +260,8 @@ /include/ "qoriq-bman1-portals.dtsi" +/include/ "qoriq-qman1-portals.dtsi" + &soc { #address-cells = <1>; #size-cells = <1>; @@ -442,5 +454,6 @@ crypto: crypto@300000 { fsl,iommu-parent = <&pamu1>; }; +/include/ "qoriq-qman1.dtsi" /include/ "qoriq-bman1.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi index 89482c9b2301..0fe728113197 100644 --- a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10 0>; +}; + &lbc { compatible = "fsl,p4080-elbc", "fsl,elbc", "simple-bus"; interrupts = <25 2 0 0>; @@ -250,6 +260,8 @@ /include/ "qoriq-bman1-portals.dtsi" +/include/ "qoriq-qman1-portals.dtsi" + &soc { #address-cells = <1>; #size-cells = <1>; @@ -498,5 +510,6 @@ crypto: crypto@300000 { fsl,iommu-parent = <&pamu1>; }; +/include/ "qoriq-qman1.dtsi" /include/ "qoriq-bman1.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi index 6e04851e2fc9..a34ca200d8fb 100644 --- a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10000 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10000 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10000 0>; +}; + &lbc { compatible = "fsl,p5020-elbc", "fsl,elbc", "simple-bus"; interrupts = <25 2 0 0>; @@ -247,6 +257,8 @@ /include/ "qoriq-bman1-portals.dtsi" +/include/ "qoriq-qman1-portals.dtsi" + &soc { #address-cells = <1>; #size-cells = <1>; @@ -428,6 +440,7 @@ fsl,iommu-parent = <&pamu1>; }; +/include/ "qoriq-qman1.dtsi" /include/ "qoriq-bman1.dtsi" /include/ "qoriq-raid1.0-0.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi index 5e44dfa1e1a5..bf575132c309 100644 --- a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10000 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10000 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10000 0>; +}; + &lbc { compatible = "fsl,p5040-elbc", "fsl,elbc", "simple-bus"; interrupts = <25 2 0 0>; @@ -202,6 +212,8 @@ /include/ "qoriq-bman1-portals.dtsi" +/include/ "qoriq-qman1-portals.dtsi" + &soc { #address-cells = <1>; #size-cells = <1>; @@ -407,5 +419,6 @@ fsl,iommu-parent = <&pamu4>; }; +/include/ "qoriq-qman1.dtsi" /include/ "qoriq-bman1.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi index 05d51acafa67..e77e4b4ed53b 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi @@ -41,61 +41,61 @@ compatible = "fsl,qman-portal"; reg = <0x0 0x4000>, <0x100000 0x1000>; interrupts = <104 2 0 0>; - fsl,qman-channel-id = <0x0>; + cell-index = <0x0>; }; qportal1: qman-portal@4000 { compatible = "fsl,qman-portal"; reg = <0x4000 0x4000>, <0x101000 0x1000>; interrupts = <106 2 0 0>; - fsl,qman-channel-id = <1>; + cell-index = <1>; }; qportal2: qman-portal@8000 { compatible = "fsl,qman-portal"; reg = <0x8000 0x4000>, <0x102000 0x1000>; interrupts = <108 2 0 0>; - fsl,qman-channel-id = <2>; + cell-index = <2>; }; qportal3: qman-portal@c000 { compatible = "fsl,qman-portal"; reg = <0xc000 0x4000>, <0x103000 0x1000>; interrupts = <110 2 0 0>; - fsl,qman-channel-id = <3>; + cell-index = <3>; }; qportal4: qman-portal@10000 { compatible = "fsl,qman-portal"; reg = <0x10000 0x4000>, <0x104000 0x1000>; interrupts = <112 2 0 0>; - fsl,qman-channel-id = <4>; + cell-index = <4>; }; qportal5: qman-portal@14000 { compatible = "fsl,qman-portal"; reg = <0x14000 0x4000>, <0x105000 0x1000>; interrupts = <114 2 0 0>; - fsl,qman-channel-id = <5>; + cell-index = <5>; }; qportal6: qman-portal@18000 { compatible = "fsl,qman-portal"; reg = <0x18000 0x4000>, <0x106000 0x1000>; interrupts = <116 2 0 0>; - fsl,qman-channel-id = <6>; + cell-index = <6>; }; qportal7: qman-portal@1c000 { compatible = "fsl,qman-portal"; reg = <0x1c000 0x4000>, <0x107000 0x1000>; interrupts = <118 2 0 0>; - fsl,qman-channel-id = <7>; + cell-index = <7>; }; qportal8: qman-portal@20000 { compatible = "fsl,qman-portal"; reg = <0x20000 0x4000>, <0x108000 0x1000>; interrupts = <120 2 0 0>; - fsl,qman-channel-id = <8>; + cell-index = <8>; }; qportal9: qman-portal@24000 { compatible = "fsl,qman-portal"; reg = <0x24000 0x4000>, <0x109000 0x1000>; interrupts = <122 2 0 0>; - fsl,qman-channel-id = <9>; + cell-index = <9>; }; }; diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi index 5cc01be5b152..9e9f7e201d43 100644 --- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10000 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10000 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10000 0>; +}; + &ifc { #address-cells = <2>; #size-cells = <1>; @@ -280,6 +290,73 @@ }; }; +&qportals { + #address-cells = <0x1>; + #size-cells = <0x1>; + compatible = "simple-bus"; + + qportal0: qman-portal@0 { + compatible = "fsl,qman-portal"; + reg = <0x0 0x4000>, <0x1000000 0x1000>; + interrupts = <104 0x2 0 0>; + cell-index = <0x0>; + }; + qportal1: qman-portal@4000 { + compatible = "fsl,qman-portal"; + reg = <0x4000 0x4000>, <0x1001000 0x1000>; + interrupts = <106 0x2 0 0>; + cell-index = <0x1>; + }; + qportal2: qman-portal@8000 { + compatible = "fsl,qman-portal"; + reg = <0x8000 0x4000>, <0x1002000 0x1000>; + interrupts = <108 0x2 0 0>; + cell-index = <0x2>; + }; + qportal3: qman-portal@c000 { + compatible = "fsl,qman-portal"; + reg = <0xc000 0x4000>, <0x1003000 0x1000>; + interrupts = <110 0x2 0 0>; + cell-index = <0x3>; + }; + qportal4: qman-portal@10000 { + compatible = "fsl,qman-portal"; + reg = <0x10000 0x4000>, <0x1004000 0x1000>; + interrupts = <112 0x2 0 0>; + cell-index = <0x4>; + }; + qportal5: qman-portal@14000 { + compatible = "fsl,qman-portal"; + reg = <0x14000 0x4000>, <0x1005000 0x1000>; + interrupts = <114 0x2 0 0>; + cell-index = <0x5>; + }; + qportal6: qman-portal@18000 { + compatible = "fsl,qman-portal"; + reg = <0x18000 0x4000>, <0x1006000 0x1000>; + interrupts = <116 0x2 0 0>; + cell-index = <0x6>; + }; + qportal7: qman-portal@1c000 { + compatible = "fsl,qman-portal"; + reg = <0x1c000 0x4000>, <0x1007000 0x1000>; + interrupts = <118 0x2 0 0>; + cell-index = <0x7>; + }; + qportal8: qman-portal@20000 { + compatible = "fsl,qman-portal"; + reg = <0x20000 0x4000>, <0x1008000 0x1000>; + interrupts = <120 0x2 0 0>; + cell-index = <0x8>; + }; + qportal9: qman-portal@24000 { + compatible = "fsl,qman-portal"; + reg = <0x24000 0x4000>, <0x1009000 0x1000>; + interrupts = <122 0x2 0 0>; + cell-index = <0x9>; + }; +}; + &soc { #address-cells = <1>; #size-cells = <1>; @@ -463,5 +540,6 @@ fsl,liodn-reg = <&guts 0x554>; /* SATA2LIODNR */ }; /include/ "qoriq-sec5.0-0.dtsi" +/include/ "qoriq-qman3.dtsi" /include/ "qoriq-bman1.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi index c9ad929adaf8..32c790ae7fde 100644 --- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10000 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10000 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10000 0>; +}; + &ifc { #address-cells = <2>; #size-cells = <1>; @@ -326,6 +336,121 @@ }; }; +&qportals { + #address-cells = <0x1>; + #size-cells = <0x1>; + compatible = "simple-bus"; + + qportal0: qman-portal@0 { + compatible = "fsl,qman-portal"; + reg = <0x0 0x4000>, <0x1000000 0x1000>; + interrupts = <104 0x2 0 0>; + cell-index = <0x0>; + }; + qportal1: qman-portal@4000 { + compatible = "fsl,qman-portal"; + reg = <0x4000 0x4000>, <0x1001000 0x1000>; + interrupts = <106 0x2 0 0>; + cell-index = <0x1>; + }; + qportal2: qman-portal@8000 { + compatible = "fsl,qman-portal"; + reg = <0x8000 0x4000>, <0x1002000 0x1000>; + interrupts = <108 0x2 0 0>; + cell-index = <0x2>; + }; + qportal3: qman-portal@c000 { + compatible = "fsl,qman-portal"; + reg = <0xc000 0x4000>, <0x1003000 0x1000>; + interrupts = <110 0x2 0 0>; + cell-index = <0x3>; + }; + qportal4: qman-portal@10000 { + compatible = "fsl,qman-portal"; + reg = <0x10000 0x4000>, <0x1004000 0x1000>; + interrupts = <112 0x2 0 0>; + cell-index = <0x4>; + }; + qportal5: qman-portal@14000 { + compatible = "fsl,qman-portal"; + reg = <0x14000 0x4000>, <0x1005000 0x1000>; + interrupts = <114 0x2 0 0>; + cell-index = <0x5>; + }; + qportal6: qman-portal@18000 { + compatible = "fsl,qman-portal"; + reg = <0x18000 0x4000>, <0x1006000 0x1000>; + interrupts = <116 0x2 0 0>; + cell-index = <0x6>; + }; + qportal7: qman-portal@1c000 { + compatible = "fsl,qman-portal"; + reg = <0x1c000 0x4000>, <0x1007000 0x1000>; + interrupts = <118 0x2 0 0>; + cell-index = <0x7>; + }; + qportal8: qman-portal@20000 { + compatible = "fsl,qman-portal"; + reg = <0x20000 0x4000>, <0x1008000 0x1000>; + interrupts = <120 0x2 0 0>; + cell-index = <0x8>; + }; + qportal9: qman-portal@24000 { + compatible = "fsl,qman-portal"; + reg = <0x24000 0x4000>, <0x1009000 0x1000>; + interrupts = <122 0x2 0 0>; + cell-index = <0x9>; + }; + qportal10: qman-portal@28000 { + compatible = "fsl,qman-portal"; + reg = <0x28000 0x4000>, <0x100a000 0x1000>; + interrupts = <124 0x2 0 0>; + cell-index = <0xa>; + }; + qportal11: qman-portal@2c000 { + compatible = "fsl,qman-portal"; + reg = <0x2c000 0x4000>, <0x100b000 0x1000>; + interrupts = <126 0x2 0 0>; + cell-index = <0xb>; + }; + qportal12: qman-portal@30000 { + compatible = "fsl,qman-portal"; + reg = <0x30000 0x4000>, <0x100c000 0x1000>; + interrupts = <128 0x2 0 0>; + cell-index = <0xc>; + }; + qportal13: qman-portal@34000 { + compatible = "fsl,qman-portal"; + reg = <0x34000 0x4000>, <0x100d000 0x1000>; + interrupts = <130 0x2 0 0>; + cell-index = <0xd>; + }; + qportal14: qman-portal@38000 { + compatible = "fsl,qman-portal"; + reg = <0x38000 0x4000>, <0x100e000 0x1000>; + interrupts = <132 0x2 0 0>; + cell-index = <0xe>; + }; + qportal15: qman-portal@3c000 { + compatible = "fsl,qman-portal"; + reg = <0x3c000 0x4000>, <0x100f000 0x1000>; + interrupts = <134 0x2 0 0>; + cell-index = <0xf>; + }; + qportal16: qman-portal@40000 { + compatible = "fsl,qman-portal"; + reg = <0x40000 0x4000>, <0x1010000 0x1000>; + interrupts = <136 0x2 0 0>; + cell-index = <0x10>; + }; + qportal17: qman-portal@44000 { + compatible = "fsl,qman-portal"; + reg = <0x44000 0x4000>, <0x1011000 0x1000>; + interrupts = <138 0x2 0 0>; + cell-index = <0x11>; + }; +}; + &soc { #address-cells = <1>; #size-cells = <1>; @@ -502,6 +627,7 @@ phy_type = "utmi"; }; /include/ "qoriq-sec5.2-0.dtsi" +/include/ "qoriq-qman3.dtsi" /include/ "qoriq-bman1.dtsi" L2_1: l2-cache-controller@c20000 { diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi index 4d4f25895d8c..d806360d0f64 100644 --- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10000 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10000 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10000 0>; +}; + &ifc { #address-cells = <2>; #size-cells = <1>; @@ -556,6 +566,313 @@ }; }; +&qportals { + #address-cells = <0x1>; + #size-cells = <0x1>; + compatible = "simple-bus"; + + qportal0: qman-portal@0 { + compatible = "fsl,qman-portal"; + reg = <0x0 0x4000>, <0x1000000 0x1000>; + interrupts = <104 0x2 0 0>; + cell-index = <0x0>; + }; + qportal1: qman-portal@4000 { + compatible = "fsl,qman-portal"; + reg = <0x4000 0x4000>, <0x1001000 0x1000>; + interrupts = <106 0x2 0 0>; + cell-index = <0x1>; + }; + qportal2: qman-portal@8000 { + compatible = "fsl,qman-portal"; + reg = <0x8000 0x4000>, <0x1002000 0x1000>; + interrupts = <108 0x2 0 0>; + cell-index = <0x2>; + }; + qportal3: qman-portal@c000 { + compatible = "fsl,qman-portal"; + reg = <0xc000 0x4000>, <0x1003000 0x1000>; + interrupts = <110 0x2 0 0>; + cell-index = <0x3>; + }; + qportal4: qman-portal@10000 { + compatible = "fsl,qman-portal"; + reg = <0x10000 0x4000>, <0x1004000 0x1000>; + interrupts = <112 0x2 0 0>; + cell-index = <0x4>; + }; + qportal5: qman-portal@14000 { + compatible = "fsl,qman-portal"; + reg = <0x14000 0x4000>, <0x1005000 0x1000>; + interrupts = <114 0x2 0 0>; + cell-index = <0x5>; + }; + qportal6: qman-portal@18000 { + compatible = "fsl,qman-portal"; + reg = <0x18000 0x4000>, <0x1006000 0x1000>; + interrupts = <116 0x2 0 0>; + cell-index = <0x6>; + }; + qportal7: qman-portal@1c000 { + compatible = "fsl,qman-portal"; + reg = <0x1c000 0x4000>, <0x1007000 0x1000>; + interrupts = <118 0x2 0 0>; + cell-index = <0x7>; + }; + qportal8: qman-portal@20000 { + compatible = "fsl,qman-portal"; + reg = <0x20000 0x4000>, <0x1008000 0x1000>; + interrupts = <120 0x2 0 0>; + cell-index = <0x8>; + }; + qportal9: qman-portal@24000 { + compatible = "fsl,qman-portal"; + reg = <0x24000 0x4000>, <0x1009000 0x1000>; + interrupts = <122 0x2 0 0>; + cell-index = <0x9>; + }; + qportal10: qman-portal@28000 { + compatible = "fsl,qman-portal"; + reg = <0x28000 0x4000>, <0x100a000 0x1000>; + interrupts = <124 0x2 0 0>; + cell-index = <0xa>; + }; + qportal11: qman-portal@2c000 { + compatible = "fsl,qman-portal"; + reg = <0x2c000 0x4000>, <0x100b000 0x1000>; + interrupts = <126 0x2 0 0>; + cell-index = <0xb>; + }; + qportal12: qman-portal@30000 { + compatible = "fsl,qman-portal"; + reg = <0x30000 0x4000>, <0x100c000 0x1000>; + interrupts = <128 0x2 0 0>; + cell-index = <0xc>; + }; + qportal13: qman-portal@34000 { + compatible = "fsl,qman-portal"; + reg = <0x34000 0x4000>, <0x100d000 0x1000>; + interrupts = <130 0x2 0 0>; + cell-index = <0xd>; + }; + qportal14: qman-portal@38000 { + compatible = "fsl,qman-portal"; + reg = <0x38000 0x4000>, <0x100e000 0x1000>; + interrupts = <132 0x2 0 0>; + cell-index = <0xe>; + }; + qportal15: qman-portal@3c000 { + compatible = "fsl,qman-portal"; + reg = <0x3c000 0x4000>, <0x100f000 0x1000>; + interrupts = <134 0x2 0 0>; + cell-index = <0xf>; + }; + qportal16: qman-portal@40000 { + compatible = "fsl,qman-portal"; + reg = <0x40000 0x4000>, <0x1010000 0x1000>; + interrupts = <136 0x2 0 0>; + cell-index = <0x10>; + }; + qportal17: qman-portal@44000 { + compatible = "fsl,qman-portal"; + reg = <0x44000 0x4000>, <0x1011000 0x1000>; + interrupts = <138 0x2 0 0>; + cell-index = <0x11>; + }; + qportal18: qman-portal@48000 { + compatible = "fsl,qman-portal"; + reg = <0x48000 0x4000>, <0x1012000 0x1000>; + interrupts = <140 0x2 0 0>; + cell-index = <0x12>; + }; + qportal19: qman-portal@4c000 { + compatible = "fsl,qman-portal"; + reg = <0x4c000 0x4000>, <0x1013000 0x1000>; + interrupts = <142 0x2 0 0>; + cell-index = <0x13>; + }; + qportal20: qman-portal@50000 { + compatible = "fsl,qman-portal"; + reg = <0x50000 0x4000>, <0x1014000 0x1000>; + interrupts = <144 0x2 0 0>; + cell-index = <0x14>; + }; + qportal21: qman-portal@54000 { + compatible = "fsl,qman-portal"; + reg = <0x54000 0x4000>, <0x1015000 0x1000>; + interrupts = <146 0x2 0 0>; + cell-index = <0x15>; + }; + qportal22: qman-portal@58000 { + compatible = "fsl,qman-portal"; + reg = <0x58000 0x4000>, <0x1016000 0x1000>; + interrupts = <148 0x2 0 0>; + cell-index = <0x16>; + }; + qportal23: qman-portal@5c000 { + compatible = "fsl,qman-portal"; + reg = <0x5c000 0x4000>, <0x1017000 0x1000>; + interrupts = <150 0x2 0 0>; + cell-index = <0x17>; + }; + qportal24: qman-portal@60000 { + compatible = "fsl,qman-portal"; + reg = <0x60000 0x4000>, <0x1018000 0x1000>; + interrupts = <152 0x2 0 0>; + cell-index = <0x18>; + }; + qportal25: qman-portal@64000 { + compatible = "fsl,qman-portal"; + reg = <0x64000 0x4000>, <0x1019000 0x1000>; + interrupts = <154 0x2 0 0>; + cell-index = <0x19>; + }; + qportal26: qman-portal@68000 { + compatible = "fsl,qman-portal"; + reg = <0x68000 0x4000>, <0x101a000 0x1000>; + interrupts = <156 0x2 0 0>; + cell-index = <0x1a>; + }; + qportal27: qman-portal@6c000 { + compatible = "fsl,qman-portal"; + reg = <0x6c000 0x4000>, <0x101b000 0x1000>; + interrupts = <158 0x2 0 0>; + cell-index = <0x1b>; + }; + qportal28: qman-portal@70000 { + compatible = "fsl,qman-portal"; + reg = <0x70000 0x4000>, <0x101c000 0x1000>; + interrupts = <160 0x2 0 0>; + cell-index = <0x1c>; + }; + qportal29: qman-portal@74000 { + compatible = "fsl,qman-portal"; + reg = <0x74000 0x4000>, <0x101d000 0x1000>; + interrupts = <162 0x2 0 0>; + cell-index = <0x1d>; + }; + qportal30: qman-portal@78000 { + compatible = "fsl,qman-portal"; + reg = <0x78000 0x4000>, <0x101e000 0x1000>; + interrupts = <164 0x2 0 0>; + cell-index = <0x1e>; + }; + qportal31: qman-portal@7c000 { + compatible = "fsl,qman-portal"; + reg = <0x7c000 0x4000>, <0x101f000 0x1000>; + interrupts = <166 0x2 0 0>; + cell-index = <0x1f>; + }; + qportal32: qman-portal@80000 { + compatible = "fsl,qman-portal"; + reg = <0x80000 0x4000>, <0x1020000 0x1000>; + interrupts = <168 0x2 0 0>; + cell-index = <0x20>; + }; + qportal33: qman-portal@84000 { + compatible = "fsl,qman-portal"; + reg = <0x84000 0x4000>, <0x1021000 0x1000>; + interrupts = <170 0x2 0 0>; + cell-index = <0x21>; + }; + qportal34: qman-portal@88000 { + compatible = "fsl,qman-portal"; + reg = <0x88000 0x4000>, <0x1022000 0x1000>; + interrupts = <172 0x2 0 0>; + cell-index = <0x22>; + }; + qportal35: qman-portal@8c000 { + compatible = "fsl,qman-portal"; + reg = <0x8c000 0x4000>, <0x1023000 0x1000>; + interrupts = <174 0x2 0 0>; + cell-index = <0x23>; + }; + qportal36: qman-portal@90000 { + compatible = "fsl,qman-portal"; + reg = <0x90000 0x4000>, <0x1024000 0x1000>; + interrupts = <384 0x2 0 0>; + cell-index = <0x24>; + }; + qportal37: qman-portal@94000 { + compatible = "fsl,qman-portal"; + reg = <0x94000 0x4000>, <0x1025000 0x1000>; + interrupts = <386 0x2 0 0>; + cell-index = <0x25>; + }; + qportal38: qman-portal@98000 { + compatible = "fsl,qman-portal"; + reg = <0x98000 0x4000>, <0x1026000 0x1000>; + interrupts = <388 0x2 0 0>; + cell-index = <0x26>; + }; + qportal39: qman-portal@9c000 { + compatible = "fsl,qman-portal"; + reg = <0x9c000 0x4000>, <0x1027000 0x1000>; + interrupts = <390 0x2 0 0>; + cell-index = <0x27>; + }; + qportal40: qman-portal@a0000 { + compatible = "fsl,qman-portal"; + reg = <0xa0000 0x4000>, <0x1028000 0x1000>; + interrupts = <392 0x2 0 0>; + cell-index = <0x28>; + }; + qportal41: qman-portal@a4000 { + compatible = "fsl,qman-portal"; + reg = <0xa4000 0x4000>, <0x1029000 0x1000>; + interrupts = <394 0x2 0 0>; + cell-index = <0x29>; + }; + qportal42: qman-portal@a8000 { + compatible = "fsl,qman-portal"; + reg = <0xa8000 0x4000>, <0x102a000 0x1000>; + interrupts = <396 0x2 0 0>; + cell-index = <0x2a>; + }; + qportal43: qman-portal@ac000 { + compatible = "fsl,qman-portal"; + reg = <0xac000 0x4000>, <0x102b000 0x1000>; + interrupts = <398 0x2 0 0>; + cell-index = <0x2b>; + }; + qportal44: qman-portal@b0000 { + compatible = "fsl,qman-portal"; + reg = <0xb0000 0x4000>, <0x102c000 0x1000>; + interrupts = <400 0x2 0 0>; + cell-index = <0x2c>; + }; + qportal45: qman-portal@b4000 { + compatible = "fsl,qman-portal"; + reg = <0xb4000 0x4000>, <0x102d000 0x1000>; + interrupts = <402 0x2 0 0>; + cell-index = <0x2d>; + }; + qportal46: qman-portal@b8000 { + compatible = "fsl,qman-portal"; + reg = <0xb8000 0x4000>, <0x102e000 0x1000>; + interrupts = <404 0x2 0 0>; + cell-index = <0x2e>; + }; + qportal47: qman-portal@bc000 { + compatible = "fsl,qman-portal"; + reg = <0xbc000 0x4000>, <0x102f000 0x1000>; + interrupts = <406 0x2 0 0>; + cell-index = <0x2f>; + }; + qportal48: qman-portal@c0000 { + compatible = "fsl,qman-portal"; + reg = <0xc0000 0x4000>, <0x1030000 0x1000>; + interrupts = <408 0x2 0 0>; + cell-index = <0x30>; + }; + qportal49: qman-portal@c4000 { + compatible = "fsl,qman-portal"; + reg = <0xc4000 0x4000>, <0x1031000 0x1000>; + interrupts = <410 0x2 0 0>; + cell-index = <0x31>; + }; +}; + &soc { #address-cells = <1>; #size-cells = <1>; @@ -748,6 +1065,7 @@ /include/ "qoriq-sata2-0.dtsi" /include/ "qoriq-sata2-1.dtsi" /include/ "qoriq-sec5.0-0.dtsi" +/include/ "qoriq-qman3.dtsi" /include/ "qoriq-bman1.dtsi" L2_1: l2-cache-controller@c20000 { diff --git a/arch/powerpc/boot/dts/kmcoge4.dts b/arch/powerpc/boot/dts/kmcoge4.dts index 97e6d11d1e6d..48dab6a50437 100644 --- a/arch/powerpc/boot/dts/kmcoge4.dts +++ b/arch/powerpc/boot/dts/kmcoge4.dts @@ -34,6 +34,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -44,6 +52,10 @@ ranges = <0x0 0xf 0xf4000000 0x200000>; }; + qportals: qman-portals@ff4200000 { + ranges = <0x0 0xf 0xf4200000 0x200000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/oca4080.dts b/arch/powerpc/boot/dts/oca4080.dts index eb76caae11d9..42796c5b0561 100644 --- a/arch/powerpc/boot/dts/oca4080.dts +++ b/arch/powerpc/boot/dts/oca4080.dts @@ -58,6 +58,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -68,6 +76,10 @@ ranges = <0x0 0xf 0xf4000000 0x200000>; }; + qportals: qman-portals@ff4200000 { + ranges = <0x0 0xf 0xf4200000 0x200000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/p1023rdb.dts b/arch/powerpc/boot/dts/p1023rdb.dts index 9236e3742a23..05a00a4d2861 100644 --- a/arch/powerpc/boot/dts/p1023rdb.dts +++ b/arch/powerpc/boot/dts/p1023rdb.dts @@ -56,6 +56,18 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; + }; + + qportals: qman-portals@ff000000 { + ranges = <0x0 0xf 0xff000000 0x200000>; }; bportals: bman-portals@ff200000 { diff --git a/arch/powerpc/boot/dts/p2041rdb.dts b/arch/powerpc/boot/dts/p2041rdb.dts index c1e69dc7188e..d2bb0765bd5a 100644 --- a/arch/powerpc/boot/dts/p2041rdb.dts +++ b/arch/powerpc/boot/dts/p2041rdb.dts @@ -54,6 +54,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -64,6 +72,10 @@ ranges = <0x0 0xf 0xf4000000 0x200000>; }; + qportals: qman-portals@ff4200000 { + ranges = <0x0 0xf 0xf4200000 0x200000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/p3041ds.dts b/arch/powerpc/boot/dts/p3041ds.dts index 2192fe94866d..eca6c697cfd7 100644 --- a/arch/powerpc/boot/dts/p3041ds.dts +++ b/arch/powerpc/boot/dts/p3041ds.dts @@ -54,6 +54,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -64,6 +72,10 @@ ranges = <0x0 0xf 0xf4000000 0x200000>; }; + qportals: qman-portals@ff4200000 { + ranges = <0x0 0xf 0xf4200000 0x200000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/p4080ds.dts b/arch/powerpc/boot/dts/p4080ds.dts index fad441654642..4f80c9d02c27 100644 --- a/arch/powerpc/boot/dts/p4080ds.dts +++ b/arch/powerpc/boot/dts/p4080ds.dts @@ -54,6 +54,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -64,6 +72,10 @@ ranges = <0x0 0xf 0xf4000000 0x200000>; }; + qportals: qman-portals@ff4200000 { + ranges = <0x0 0xf 0xf4200000 0x200000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/p5020ds.dts b/arch/powerpc/boot/dts/p5020ds.dts index 7382636dc560..d0309a8b9749 100644 --- a/arch/powerpc/boot/dts/p5020ds.dts +++ b/arch/powerpc/boot/dts/p5020ds.dts @@ -54,6 +54,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -64,6 +72,10 @@ ranges = <0x0 0xf 0xf4000000 0x200000>; }; + qportals: qman-portals@ff4200000 { + ranges = <0x0 0xf 0xf4200000 0x200000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/p5040ds.dts b/arch/powerpc/boot/dts/p5040ds.dts index 35dabf5b6098..05168236d3ab 100644 --- a/arch/powerpc/boot/dts/p5040ds.dts +++ b/arch/powerpc/boot/dts/p5040ds.dts @@ -54,6 +54,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -64,6 +72,10 @@ ranges = <0x0 0xf 0xf4000000 0x200000>; }; + qportals: qman-portals@ff4200000 { + ranges = <0x0 0xf 0xf4200000 0x200000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/t104xqds.dtsi b/arch/powerpc/boot/dts/t104xqds.dtsi index f7e9bfbeefc7..1498d1e4aecf 100644 --- a/arch/powerpc/boot/dts/t104xqds.dtsi +++ b/arch/powerpc/boot/dts/t104xqds.dtsi @@ -47,6 +47,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; ifc: localbus@ffe124000 { @@ -92,6 +100,10 @@ ranges = <0x0 0xf 0xf4000000 0x2000000>; }; + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/t104xrdb.dtsi b/arch/powerpc/boot/dts/t104xrdb.dtsi index 76e07a3f2ca8..830ea484295b 100644 --- a/arch/powerpc/boot/dts/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/t104xrdb.dtsi @@ -42,6 +42,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; ifc: localbus@ffe124000 { @@ -83,6 +91,10 @@ ranges = <0x0 0xf 0xf4000000 0x2000000>; }; + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/t208xqds.dtsi b/arch/powerpc/boot/dts/t208xqds.dtsi index c42e07f4f648..869f9159b4d1 100644 --- a/arch/powerpc/boot/dts/t208xqds.dtsi +++ b/arch/powerpc/boot/dts/t208xqds.dtsi @@ -48,6 +48,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; ifc: localbus@ffe124000 { @@ -93,6 +101,10 @@ ranges = <0x0 0xf 0xf4000000 0x2000000>; }; + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/t208xrdb.dtsi b/arch/powerpc/boot/dts/t208xrdb.dtsi index e1463b165d0e..693d2a8fa01c 100644 --- a/arch/powerpc/boot/dts/t208xrdb.dtsi +++ b/arch/powerpc/boot/dts/t208xrdb.dtsi @@ -48,6 +48,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; ifc: localbus@ffe124000 { @@ -94,6 +102,10 @@ ranges = <0x0 0xf 0xf4000000 0x2000000>; }; + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/t4240qds.dts b/arch/powerpc/boot/dts/t4240qds.dts index 6df77766410b..93722da10e16 100644 --- a/arch/powerpc/boot/dts/t4240qds.dts +++ b/arch/powerpc/boot/dts/t4240qds.dts @@ -109,6 +109,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -119,6 +127,10 @@ ranges = <0x0 0xf 0xf4000000 0x2000000>; }; + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/t4240rdb.dts b/arch/powerpc/boot/dts/t4240rdb.dts index 46049cf37f02..993eb4b8a487 100644 --- a/arch/powerpc/boot/dts/t4240rdb.dts +++ b/arch/powerpc/boot/dts/t4240rdb.dts @@ -78,6 +78,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -88,6 +96,10 @@ ranges = <0x0 0xf 0xf4000000 0x2000000>; }; + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; -- cgit v1.2.3 From 90883a8255148e98026591e52a3495db18b29905 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 20 Apr 2015 07:54:38 +0200 Subject: powerpc/8xx: macro for handling CPU15 errata Having a macro will help keep clear code. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 9b53fe139bf6..1279018b3749 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -297,6 +297,17 @@ SystemCall: * We have to use the MD_xxx registers for the tablewalk because the * equivalent MI_xxx registers only perform the attribute functions. */ + +#ifdef CONFIG_8xx_CPU15 +#define INVALIDATE_ADJACENT_PAGES_CPU15(tmp, addr) \ + addi tmp, addr, PAGE_SIZE; \ + tlbie tmp; \ + addi tmp, addr, -PAGE_SIZE; \ + tlbie tmp +#else +#define INVALIDATE_ADJACENT_PAGES_CPU15(tmp, addr) +#endif + InstructionTLBMiss: #ifdef CONFIG_8xx_CPU6 mtspr SPRN_DAR, r3 @@ -304,12 +315,7 @@ InstructionTLBMiss: EXCEPTION_PROLOG_0 mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ -#ifdef CONFIG_8xx_CPU15 - addi r11, r10, PAGE_SIZE - tlbie r11 - addi r11, r10, -PAGE_SIZE - tlbie r11 -#endif + INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) /* If we are faulting a kernel address, we have to use the * kernel page tables. -- cgit v1.2.3 From d5fd9d7d669e81e024737a198d12ff8af4fd89cf Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 20 Apr 2015 07:54:40 +0200 Subject: powerpc/8xx: Handle CR out of exception PROLOG/EPILOG In order to be able to reduce scope during which CR is saved, we take CR saving/restoring out of exception PROLOG and EPILOG Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 1279018b3749..5a69c5e82363 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -116,13 +116,13 @@ turn_on_mmu: */ #define EXCEPTION_PROLOG \ EXCEPTION_PROLOG_0; \ + mfcr r10; \ EXCEPTION_PROLOG_1; \ EXCEPTION_PROLOG_2 #define EXCEPTION_PROLOG_0 \ mtspr SPRN_SPRG_SCRATCH0,r10; \ - mtspr SPRN_SPRG_SCRATCH1,r11; \ - mfcr r10 + mtspr SPRN_SPRG_SCRATCH1,r11 #define EXCEPTION_PROLOG_1 \ mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ @@ -162,7 +162,6 @@ turn_on_mmu: * Exception exit code. */ #define EXCEPTION_EPILOG_0 \ - mtcr r10; \ mfspr r10,SPRN_SPRG_SCRATCH0; \ mfspr r11,SPRN_SPRG_SCRATCH1 @@ -313,6 +312,7 @@ InstructionTLBMiss: mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 + mfcr r10 mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) @@ -363,6 +363,7 @@ InstructionTLBMiss: mtspr SPRN_DAR, r11 /* Tag DAR */ #endif mfspr r10, SPRN_SPRG_SCRATCH2 + mtcr r10 EXCEPTION_EPILOG_0 rfi @@ -372,6 +373,7 @@ DataStoreTLBMiss: mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 + mfcr r10 mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_MD_EPN @@ -437,6 +439,7 @@ DataStoreTLBMiss: #endif mtspr SPRN_DAR, r11 /* Tag DAR */ mfspr r10, SPRN_SPRG_SCRATCH2 + mtcr r10 EXCEPTION_EPILOG_0 rfi @@ -462,6 +465,7 @@ InstructionTLBError: . = 0x1400 DataTLBError: EXCEPTION_PROLOG_0 + mfcr r10 mfspr r11, SPRN_DAR cmpwi cr0, r11, RPN_PATTERN -- cgit v1.2.3 From 2eb2fd95001e93c611034f494c7254350ff94d2a Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 20 Apr 2015 07:54:42 +0200 Subject: powerpc/8xx: dont save CR in SCRATCH registers CR only needs to be preserved when checking if we are handling a kernel address. So we can preserve CR in a register: - In ITLBMiss, check is done only when CONFIG_MODULES is defined. Otherwise we don't need to do anything at all with CR. - We use r10, then we reload SRR0/MD_EPN into r10 when CR is restored Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 5a69c5e82363..150d03f79ad8 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -312,10 +312,6 @@ InstructionTLBMiss: mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 - mfcr r10 - mtspr SPRN_SPRG_SCRATCH2, r10 - mfspr r10, SPRN_SRR0 /* Get effective address of fault */ - INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -323,13 +319,20 @@ InstructionTLBMiss: #ifdef CONFIG_MODULES /* Only modules will cause ITLB Misses as we always * pin the first 8MB of kernel memory */ - andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ -#endif + mfspr r11, SPRN_SRR0 /* Get effective address of fault */ + INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) + mfcr r10 + andis. r11, r11, 0x8000 /* Address >= 0x80000000 */ mfspr r11, SPRN_M_TW /* Get level 1 table */ -#ifdef CONFIG_MODULES beq 3f lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: + mtcr r10 + mfspr r10, SPRN_SRR0 /* Get effective address of fault */ +#else + mfspr r10, SPRN_SRR0 /* Get effective address of fault */ + INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) + mfspr r11, SPRN_M_TW /* Get level 1 table base address */ #endif /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 @@ -362,8 +365,6 @@ InstructionTLBMiss: mfspr r3, SPRN_DAR mtspr SPRN_DAR, r11 /* Tag DAR */ #endif - mfspr r10, SPRN_SPRG_SCRATCH2 - mtcr r10 EXCEPTION_EPILOG_0 rfi @@ -374,17 +375,19 @@ DataStoreTLBMiss: #endif EXCEPTION_PROLOG_0 mfcr r10 - mtspr SPRN_SPRG_SCRATCH2, r10 - mfspr r10, SPRN_MD_EPN /* If we are faulting a kernel address, we have to use the * kernel page tables. */ - andis. r11, r10, 0x8000 + mfspr r11, SPRN_MD_EPN + andis. r11, r11, 0x8000 mfspr r11, SPRN_M_TW /* Get level 1 table */ beq 3f lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: + mtcr r10 + mfspr r10, SPRN_MD_EPN + /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ @@ -438,8 +441,6 @@ DataStoreTLBMiss: mfspr r3, SPRN_DAR #endif mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r10, SPRN_SPRG_SCRATCH2 - mtcr r10 EXCEPTION_EPILOG_0 rfi -- cgit v1.2.3 From b821c5fe84829996e179bba5da30ee33fb8e9f9f Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 20 Apr 2015 07:54:44 +0200 Subject: powerpc/8xx: Use SPRG2 instead of DAR for saving r3 We now have SPRG2 available as in it not used anymore for saving CR, so we don't need to crash DAR anymore for saving r3 for CPU6 ERRATA handling. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 150d03f79ad8..ba2dc53cd5a1 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -309,7 +309,7 @@ SystemCall: InstructionTLBMiss: #ifdef CONFIG_8xx_CPU6 - mtspr SPRN_DAR, r3 + mtspr SPRN_SPRG_SCRATCH2, r3 #endif EXCEPTION_PROLOG_0 @@ -362,8 +362,7 @@ InstructionTLBMiss: /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - mfspr r3, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r3, SPRN_SPRG_SCRATCH2 #endif EXCEPTION_EPILOG_0 rfi @@ -371,7 +370,7 @@ InstructionTLBMiss: . = 0x1200 DataStoreTLBMiss: #ifdef CONFIG_8xx_CPU6 - mtspr SPRN_DAR, r3 + mtspr SPRN_SPRG_SCRATCH2, r3 #endif EXCEPTION_PROLOG_0 mfcr r10 @@ -438,7 +437,7 @@ DataStoreTLBMiss: /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - mfspr r3, SPRN_DAR + mfspr r3, SPRN_SPRG_SCRATCH2 #endif mtspr SPRN_DAR, r11 /* Tag DAR */ EXCEPTION_EPILOG_0 -- cgit v1.2.3 From eeba1f7c38425cfc0b70ba133ab7e523d5b6d90e Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 20 Apr 2015 07:54:46 +0200 Subject: powerpc/8xx: Add support for TASK_SIZE greater than 0x80000000 By default, TASK_SIZE is set to 0x80000000 for PPC_8xx, which is most likely sufficient for most cases. However, kernel configuration allows to set TASK_SIZE to another value, so the 8xx shall handle it. This patch also takes into account the case of PAGE_OFFSET lower than 0x80000000, allthought most of the time it is equal to 0xC0000000 Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index ba2dc53cd5a1..c640bbb042b5 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -48,6 +48,19 @@ mtspr spr, reg #endif +/* Macro to test if an address is a kernel address */ +#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 +#define IS_KERNEL(tmp, addr) \ + andis. tmp, addr, 0x8000 /* Address >= 0x80000000 */ +#define BRANCH_UNLESS_KERNEL(label) beq label +#else +#define IS_KERNEL(tmp, addr) \ + rlwinm tmp, addr, 16, 16, 31; \ + cmpli cr0, tmp, PAGE_OFFSET >> 16 +#define BRANCH_UNLESS_KERNEL(label) blt label +#endif + + /* * Value for the bits that have fixed value in RPN entries. * Also used for tagging DAR for DTLBerror. @@ -322,9 +335,9 @@ InstructionTLBMiss: mfspr r11, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) mfcr r10 - andis. r11, r11, 0x8000 /* Address >= 0x80000000 */ + IS_KERNEL(r11, r11) mfspr r11, SPRN_M_TW /* Get level 1 table */ - beq 3f + BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: mtcr r10 @@ -379,9 +392,9 @@ DataStoreTLBMiss: * kernel page tables. */ mfspr r11, SPRN_MD_EPN - andis. r11, r11, 0x8000 + IS_KERNEL(r11, r11) mfspr r11, SPRN_M_TW /* Get level 1 table */ - beq 3f + BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: mtcr r10 @@ -513,9 +526,9 @@ FixupDAR:/* Entry point for dcbx workaround. */ mtspr SPRN_SPRG_SCRATCH2, r10 /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 - andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ + IS_KERNEL(r11, r10) mfspr r11, SPRN_M_TW /* Get level 1 table */ - beq 3f + BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha /* Insert level 1 index */ 3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 -- cgit v1.2.3 From 83b086c5697169f1e34d2430dad062cc714c5c57 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Wed, 22 Apr 2015 12:06:41 +0200 Subject: powerpc/8xx: mark _PAGE_SHARED all types of kernel pages All kernel pages have to be marked as shared in order to not perform CASID verification. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/pte-8xx.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h index 97bae64afdaa..8ea537e76058 100644 --- a/arch/powerpc/include/asm/pte-8xx.h +++ b/arch/powerpc/include/asm/pte-8xx.h @@ -62,7 +62,12 @@ /* We need to add _PAGE_SHARED to kernel pages */ #define _PAGE_KERNEL_RO (_PAGE_SHARED | _PAGE_RO | _PAGE_KNLRO) -#define _PAGE_KERNEL_ROX (_PAGE_EXEC | _PAGE_RO | _PAGE_KNLRO) +#define _PAGE_KERNEL_ROX (_PAGE_SHARED | _PAGE_RO | _PAGE_KNLRO | \ + _PAGE_EXEC) +#define _PAGE_KERNEL_RW (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \ + _PAGE_HWWRITE) +#define _PAGE_KERNEL_RWX (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \ + _PAGE_HWWRITE | _PAGE_EXEC) #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_PTE_8xx_H */ -- cgit v1.2.3 From e0a8e0d90a9f0be66ba49f4f2380a63c22d4aaae Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Wed, 22 Apr 2015 12:06:43 +0200 Subject: powerpc/8xx: Handle PAGE_USER via APG bits Use of APG for handling PAGE_USER. All pages PP exec bits are set to either 000 or 011, which means respectively RW for Supervisor and no access for User, or RO for Supervisor and no access for user. Then we use the APG to say whether accesses are according to Page rules or "all Supervisor" rules (Access to all) Therefore, we define 2 APG groups corresponding to _PAGE_USER. Mx_AP are initialised as follows: GP0 => No user => 01 (all accesses performed according to page definition) GP1 => User => 00 (all accesses performed as supervisor according to page definition) This removes the special 8xx handling in pte_update() Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/pgtable-ppc32.h | 19 ------------------- arch/powerpc/include/asm/pte-8xx.h | 27 +++++++++------------------ arch/powerpc/kernel/head_8xx.S | 21 ++++++++++++--------- 3 files changed, 21 insertions(+), 46 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 64b52b1cf542..9c326565d498 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -170,24 +170,6 @@ static inline unsigned long pte_update(pte_t *p, #ifdef PTE_ATOMIC_UPDATES unsigned long old, tmp; -#ifdef CONFIG_PPC_8xx - unsigned long tmp2; - - __asm__ __volatile__("\ -1: lwarx %0,0,%4\n\ - andc %1,%0,%5\n\ - or %1,%1,%6\n\ - /* 0x200 == Extended encoding, bit 22 */ \ - /* Bit 22 has to be 1 when _PAGE_USER is unset and _PAGE_RO is set */ \ - rlwimi %1,%1,32-1,0x200\n /* get _PAGE_RO */ \ - rlwinm %3,%1,32-2,0x200\n /* get _PAGE_USER */ \ - andc %1,%1,%3\n\ - stwcx. %1,0,%4\n\ - bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*p), "=&r" (tmp2) - : "r" (p), "r" (clr), "r" (set), "m" (*p) - : "cc" ); -#else /* CONFIG_PPC_8xx */ __asm__ __volatile__("\ 1: lwarx %0,0,%3\n\ andc %1,%0,%4\n\ @@ -198,7 +180,6 @@ static inline unsigned long pte_update(pte_t *p, : "=&r" (old), "=&r" (tmp), "=m" (*p) : "r" (p), "r" (clr), "r" (set), "m" (*p) : "cc" ); -#endif /* CONFIG_PPC_8xx */ #else /* PTE_ATOMIC_UPDATES */ unsigned long old = pte_val(*p); *p = __pte((old & ~clr) | set); diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h index 8ea537e76058..b82094e4c242 100644 --- a/arch/powerpc/include/asm/pte-8xx.h +++ b/arch/powerpc/include/asm/pte-8xx.h @@ -34,36 +34,27 @@ #define _PAGE_SPECIAL 0x0008 /* SW entry, forced to 0 by the TLB miss */ #define _PAGE_DIRTY 0x0100 /* C: page changed */ -/* These 4 software bits must be masked out when the entry is loaded - * into the TLB, 1 SW bit left(0x0080). +/* These 4 software bits must be masked out when the L2 entry is loaded + * into the TLB. */ -#define _PAGE_GUARDED 0x0010 /* software: guarded access */ -#define _PAGE_ACCESSED 0x0020 /* software: page referenced */ -#define _PAGE_WRITETHRU 0x0040 /* software: caching is write through */ +#define _PAGE_GUARDED 0x0010 /* Copied to L1 G entry in DTLB */ +#define _PAGE_USER 0x0020 /* Copied to L1 APG lsb */ +#define _PAGE_ACCESSED 0x0040 /* software: page referenced */ +#define _PAGE_WRITETHRU 0x0080 /* software: caching is write through */ -/* Setting any bits in the nibble with the follow two controls will - * require a TLB exception handler change. It is assumed unused bits - * are always zero. - */ -#define _PAGE_RO 0x0400 /* lsb PP bits */ -#define _PAGE_USER 0x0800 /* msb PP bits */ -/* set when _PAGE_USER is unset and _PAGE_RO is set */ -#define _PAGE_KNLRO 0x0200 +#define _PAGE_RO 0x0600 /* Supervisor RO, User no access */ #define _PMD_PRESENT 0x0001 #define _PMD_BAD 0x0ff0 #define _PMD_PAGE_MASK 0x000c #define _PMD_PAGE_8M 0x000c -#define _PTE_NONE_MASK _PAGE_KNLRO - /* Until my rework is finished, 8xx still needs atomic PTE updates */ #define PTE_ATOMIC_UPDATES 1 /* We need to add _PAGE_SHARED to kernel pages */ -#define _PAGE_KERNEL_RO (_PAGE_SHARED | _PAGE_RO | _PAGE_KNLRO) -#define _PAGE_KERNEL_ROX (_PAGE_SHARED | _PAGE_RO | _PAGE_KNLRO | \ - _PAGE_EXEC) +#define _PAGE_KERNEL_RO (_PAGE_SHARED | _PAGE_RO) +#define _PAGE_KERNEL_ROX (_PAGE_SHARED | _PAGE_RO | _PAGE_EXEC) #define _PAGE_KERNEL_RW (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \ _PAGE_HWWRITE) #define _PAGE_KERNEL_RWX (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index c640bbb042b5..c79184d86f58 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -351,12 +351,15 @@ InstructionTLBMiss: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ - /* Load the MI_TWC with the attributes for this "segment." */ - MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ - rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ /* Extract level 2 index */ rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 - lwzx r10, r10, r11 /* Get the pte */ + rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ + lwz r10, 0(r10) /* Get the pte */ + + /* Insert the APG into the TWC from the Linux PTE. */ + rlwimi r11, r10, 0, 26, 26 + /* Load the MI_TWC with the attributes for this "segment." */ + MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ #ifdef CONFIG_SWAP rlwinm r11, r10, 32-5, _PAGE_PRESENT @@ -365,12 +368,12 @@ InstructionTLBMiss: #endif li r11, RPN_PATTERN /* The Linux PTE won't go exactly into the MMU TLB. - * Software indicator bits 21 and 28 must be clear. + * Software indicator bits 20-23 and 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior * of the MMU. */ - rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */ + rlwimi r10, r11, 0, 0x0ff8 /* Set 24-27, clear 20-23,28 */ MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ @@ -411,13 +414,13 @@ DataStoreTLBMiss: rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ - /* Insert the Guarded flag into the TWC from the Linux PTE. - * It is bit 27 of both the Linux PTE and the TWC (at least + /* Insert the Guarded flag and APG into the TWC from the Linux PTE. + * It is bit 26-27 of both the Linux PTE and the TWC (at least * I got that right :-). It will be better when we can put * this into the Linux pgd/pmd and load it in the operation * above. */ - rlwimi r11, r10, 0, 27, 27 + rlwimi r11, r10, 0, 26, 27 /* Insert the WriteThru flag into the TWC from the Linux PTE. * It is bit 25 in the Linux PTE and bit 30 in the TWC */ -- cgit v1.2.3 From 5b2753fc3e8a72253310d01a8b0a5bb05d917ef8 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Wed, 22 Apr 2015 12:06:45 +0200 Subject: powerpc/8xx: Implementation of PAGE_EXEC This patch implements PAGE_EXEC capability on the 8xx. All pages PP exec bits are set to 000, which means Execute for Supervisor and no Execute for User. Then we use the APG to say whether accesses are according to Page rules, "all Supervisor" rules (Exec for all) and "all User" rules (Exec for noone) Therefore, we define 4 APG groups. msb is _PAGE_EXEC, lsb is _PAGE_USER. MI_AP is initialised as follows: GP0 (00) => Not User, no exec => 11 (all accesses performed as user) GP1 (01) => User but no exec => 11 (all accesses performed as user) GP2 (10) => Not User, exec => 01 (rights according to page definition) GP3 (11) => User, exec => 00 (all accesses performed as supervisor) Signed-off-by: Christophe Leroy [scottwood: comments: s/exec/data/ on data side, and s/pages/pages'/] Signed-off-by: Scott Wood --- arch/powerpc/include/asm/cputable.h | 2 +- arch/powerpc/include/asm/mmu-8xx.h | 26 ++++++++++++++++++++++++++ arch/powerpc/include/asm/pte-8xx.h | 3 ++- arch/powerpc/kernel/head_8xx.S | 12 +++++++++--- 4 files changed, 38 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 6367b8347dad..ae1fa65bb26d 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -366,7 +366,7 @@ enum { CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE) #define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | CPU_FTR_USE_TB) -#define CPU_FTRS_8XX (CPU_FTR_USE_TB) +#define CPU_FTRS_8XX (CPU_FTR_USE_TB | CPU_FTR_NOEXECUTE) #define CPU_FTRS_40X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) #define CPU_FTRS_44X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) #define CPU_FTRS_440x6 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \ diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index d41200c01d85..f05500a29a60 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -27,6 +27,19 @@ #define MI_Ks 0x80000000 /* Should not be set */ #define MI_Kp 0x40000000 /* Should always be set */ +/* + * All pages' PP exec bits are set to 000, which means Execute for Supervisor + * and no Execute for User. + * Then we use the APG to say whether accesses are according to Page rules, + * "all Supervisor" rules (Exec for all) and "all User" rules (Exec for noone) + * Therefore, we define 4 APG groups. msb is _PAGE_EXEC, lsb is _PAGE_USER + * 0 (00) => Not User, no exec => 11 (all accesses performed as user) + * 1 (01) => User but no exec => 11 (all accesses performed as user) + * 2 (10) => Not User, exec => 01 (rights according to page definition) + * 3 (11) => User, exec => 00 (all accesses performed as supervisor) + */ +#define MI_APG_INIT 0xf4ffffff + /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in * this register are used to create the TLB entry. @@ -87,6 +100,19 @@ #define MD_Ks 0x80000000 /* Should not be set */ #define MD_Kp 0x40000000 /* Should always be set */ +/* + * All pages' PP data bits are set to either 000 or 011, which means + * respectively RW for Supervisor and no access for User, or RO for + * Supervisor and no access for user. + * Then we use the APG to say whether accesses are according to Page rules or + * "all Supervisor" rules (Access to all) + * Therefore, we define 2 APG groups. lsb is _PAGE_USER + * 0 => No user => 01 (all accesses performed according to page definition) + * 1 => User => 00 (all accesses performed as supervisor + * according to page definition) + */ +#define MD_APG_INIT 0x4fffffff + /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MD_RPN is written, bits in * this register are used to create the TLB entry. diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h index b82094e4c242..a0e2ba960976 100644 --- a/arch/powerpc/include/asm/pte-8xx.h +++ b/arch/powerpc/include/asm/pte-8xx.h @@ -39,8 +39,9 @@ */ #define _PAGE_GUARDED 0x0010 /* Copied to L1 G entry in DTLB */ #define _PAGE_USER 0x0020 /* Copied to L1 APG lsb */ -#define _PAGE_ACCESSED 0x0040 /* software: page referenced */ +#define _PAGE_EXEC 0x0040 /* Copied to L1 APG */ #define _PAGE_WRITETHRU 0x0080 /* software: caching is write through */ +#define _PAGE_ACCESSED 0x0800 /* software: page referenced */ #define _PAGE_RO 0x0600 /* Supervisor RO, User no access */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index c79184d86f58..78c1eba4c04a 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -357,7 +357,7 @@ InstructionTLBMiss: lwz r10, 0(r10) /* Get the pte */ /* Insert the APG into the TWC from the Linux PTE. */ - rlwimi r11, r10, 0, 26, 26 + rlwimi r11, r10, 0, 25, 26 /* Load the MI_TWC with the attributes for this "segment." */ MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ @@ -449,6 +449,7 @@ DataStoreTLBMiss: */ li r11, RPN_PATTERN rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ + rlwimi r10, r11, 0, 20, 20 /* clear 20 */ MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ @@ -769,15 +770,20 @@ initial_mmu: ori r8, r8, MI_EVALID /* Mark it valid */ mtspr SPRN_MI_EPN, r8 mtspr SPRN_MD_EPN, r8 - li r8, MI_PS8MEG /* Set 8M byte page */ + li r8, MI_PS8MEG | (2 << 5) /* Set 8M byte page, APG 2 */ ori r8, r8, MI_SVALID /* Make it valid */ mtspr SPRN_MI_TWC, r8 + li r8, MI_PS8MEG /* Set 8M byte page, APG 0 */ + ori r8, r8, MI_SVALID /* Make it valid */ mtspr SPRN_MD_TWC, r8 li r8, MI_BOOTINIT /* Create RPN for address 0 */ mtspr SPRN_MI_RPN, r8 /* Store TLB entry */ mtspr SPRN_MD_RPN, r8 - lis r8, MI_Kp@h /* Set the protection mode */ + lis r8, MI_APG_INIT@h /* Set protection modes */ + ori r8, r8, MI_APG_INIT@l mtspr SPRN_MI_AP, r8 + lis r8, MD_APG_INIT@h + ori r8, r8, MD_APG_INIT@l mtspr SPRN_MD_AP, r8 /* Map another 8 MByte at the IMMR to get the processor -- cgit v1.2.3 From 379caf606396a13c8da2742e9a6e0bcfaaffa726 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Sun, 17 May 2015 16:12:37 +0800 Subject: powerpc: mpc85xx: flush the l1 cache before cpu down in kexec We observe a "Zero PT_NOTE entries found" warning when vmcore_init() is running on the dump-capture kernel. Actually the PT_NOTE segments is not empty, but the entries generated by crash_save_cpu() are not flushed to the memory before we reset these cores. So we should flush the l1 cache as what we do in cpu hotplug. With this change, we can also kill the mpc85xx_smp_flush_dcache_kexec() since that becomes unnecessary. Please note: this only fix the issue on e500 core, we still need to implement the function to flush the l2 cache for the e500mc core. Fortunately we already had proposing patch for this support [1]. Hope we can fix this issue for e500mc after that merged. [1] https://lists.ozlabs.org/pipermail/linuxppc-dev/2014-March/115830.html Signed-off-by: Kevin Hao Signed-off-by: Scott Wood --- arch/powerpc/platforms/85xx/smp.c | 51 +-------------------------------------- 1 file changed, 1 insertion(+), 50 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 8631ac5f0e57..b8b821697910 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -345,6 +345,7 @@ void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) local_irq_disable(); if (secondary) { + __flush_disable_L1(); atomic_inc(&kexec_down_cpus); /* loop forever */ while (1); @@ -357,61 +358,11 @@ static void mpc85xx_smp_kexec_down(void *arg) ppc_md.kexec_cpu_down(0,1); } -static void map_and_flush(unsigned long paddr) -{ - struct page *page = pfn_to_page(paddr >> PAGE_SHIFT); - unsigned long kaddr = (unsigned long)kmap_atomic(page); - - flush_dcache_range(kaddr, kaddr + PAGE_SIZE); - kunmap_atomic((void *)kaddr); -} - -/** - * Before we reset the other cores, we need to flush relevant cache - * out to memory so we don't get anything corrupted, some of these flushes - * are performed out of an overabundance of caution as interrupts are not - * disabled yet and we can switch cores - */ -static void mpc85xx_smp_flush_dcache_kexec(struct kimage *image) -{ - kimage_entry_t *ptr, entry; - unsigned long paddr; - int i; - - if (image->type == KEXEC_TYPE_DEFAULT) { - /* normal kexec images are stored in temporary pages */ - for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); - ptr = (entry & IND_INDIRECTION) ? - phys_to_virt(entry & PAGE_MASK) : ptr + 1) { - if (!(entry & IND_DESTINATION)) { - map_and_flush(entry); - } - } - /* flush out last IND_DONE page */ - map_and_flush(entry); - } else { - /* crash type kexec images are copied to the crash region */ - for (i = 0; i < image->nr_segments; i++) { - struct kexec_segment *seg = &image->segment[i]; - for (paddr = seg->mem; paddr < seg->mem + seg->memsz; - paddr += PAGE_SIZE) { - map_and_flush(paddr); - } - } - } - - /* also flush the kimage struct to be passed in as well */ - flush_dcache_range((unsigned long)image, - (unsigned long)image + sizeof(*image)); -} - static void mpc85xx_smp_machine_kexec(struct kimage *image) { int timeout = INT_MAX; int i, num_cpus = num_present_cpus(); - mpc85xx_smp_flush_dcache_kexec(image); - if (image->type == KEXEC_TYPE_DEFAULT) smp_call_function(mpc85xx_smp_kexec_down, NULL, 0); -- cgit v1.2.3 From 31ea9d5dfd27b9aa6f92c207041eba8e81f4c497 Mon Sep 17 00:00:00 2001 From: Xie Xiaobo Date: Fri, 22 May 2015 13:07:19 +0800 Subject: powerpc/85xx: p1025twr: add module conditional to fix QE-uart issue A ioport setting was needed when used the QE uart function on TWR-P1025. Added a conditional definition to avoid missing this setting when the QE-uart driver was bulit to a module. Signed-off-by: Xie Xiaobo Signed-off-by: Li Pengbo Signed-off-by: Scott Wood --- arch/powerpc/platforms/85xx/twr_p102x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c index 1eadb6d0dc64..30e002f4648c 100644 --- a/arch/powerpc/platforms/85xx/twr_p102x.c +++ b/arch/powerpc/platforms/85xx/twr_p102x.c @@ -79,7 +79,7 @@ static void __init twr_p1025_setup_arch(void) mpc85xx_qe_init(); mpc85xx_qe_par_io_init(); -#if defined(CONFIG_UCC_GETH) || defined(CONFIG_SERIAL_QE) +#if IS_ENABLED(CONFIG_UCC_GETH) || IS_ENABLED(CONFIG_SERIAL_QE) if (machine_is(twr_p1025)) { struct ccsr_guts __iomem *guts; @@ -101,7 +101,7 @@ static void __init twr_p1025_setup_arch(void) MPC85xx_PMUXCR_QE(12)); iounmap(guts); -#if defined(CONFIG_SERIAL_QE) +#if IS_ENABLED(CONFIG_SERIAL_QE) /* On P1025TWR board, the UCC7 acted as UART port. * However, The UCC7's CTS pin is low level in default, * it will impact the transmission in full duplex -- cgit v1.2.3 From 85a97da9584868b6cd0c54b93c5f65319f615b08 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 27 May 2015 16:06:55 +1000 Subject: powerpc/copro: Fix faulting kernel segments This fixes calculating the key bits (KP and KS) in the SLB VSID for kernel mappings. I'm not CCing this to stable as there are no uses of this currently. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/mm/copro_fault.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index f031a47d7701..7a71d7f81cf6 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -100,7 +100,7 @@ EXPORT_SYMBOL_GPL(copro_handle_mm_fault); int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) { - u64 vsid; + u64 vsid, vsidkey; int psize, ssize; switch (REGION_ID(ea)) { @@ -109,6 +109,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) psize = get_slice_psize(mm, ea); ssize = user_segment_size(ea); vsid = get_vsid(mm->context.id, ea, ssize); + vsidkey = SLB_VSID_USER; break; case VMALLOC_REGION_ID: pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea); @@ -118,19 +119,21 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) psize = mmu_io_psize; ssize = mmu_kernel_ssize; vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + vsidkey = SLB_VSID_KERNEL; break; case KERNEL_REGION_ID: pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea); psize = mmu_linear_psize; ssize = mmu_kernel_ssize; vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + vsidkey = SLB_VSID_KERNEL; break; default: pr_debug("%s: invalid region access at %016llx\n", __func__, ea); return 1; } - vsid = (vsid << slb_vsid_shift(ssize)) | SLB_VSID_USER; + vsid = (vsid << slb_vsid_shift(ssize)) | vsidkey; vsid |= mmu_psize_defs[psize].sllp | ((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0); -- cgit v1.2.3 From 5b64d2cc41216bef3e19913d1a39c324c2587d11 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Wed, 27 May 2015 16:06:56 +1000 Subject: powerpc/pci: Export symbols for CXL Export pcibios_claim_one_bus, pcibios_scan_phb and pcibios_alloc_controller. These will be used by the CXL driver. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci-common.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 0d054068a21d..2040cd2f7358 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -89,6 +89,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev) #endif return phb; } +EXPORT_SYMBOL_GPL(pcibios_alloc_controller); void pcibios_free_controller(struct pci_controller *phb) { @@ -1447,6 +1448,7 @@ void pcibios_claim_one_bus(struct pci_bus *bus) list_for_each_entry(child_bus, &bus->children, node) pcibios_claim_one_bus(child_bus); } +EXPORT_SYMBOL_GPL(pcibios_claim_one_bus); /* pcibios_finish_adding_to_bus @@ -1680,6 +1682,7 @@ void pcibios_scan_phb(struct pci_controller *hose) pcie_bus_configure_settings(child); } } +EXPORT_SYMBOL_GPL(pcibios_scan_phb); static void fixup_hide_host_resource_fsl(struct pci_dev *dev) { -- cgit v1.2.3 From 10e796309a670bb2c13eae5aa6f60e10d2d6a6e1 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 27 May 2015 16:06:57 +1000 Subject: powerpc/pci: Add release_device() hook to phb ops Add release_device() hook to phb ops so we can clean up for specific phbs. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 2 ++ arch/powerpc/kernel/pci-hotplug.c | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 6d17bb8498bf..4cf0caaa33c7 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -27,6 +27,8 @@ struct pci_controller_ops { * allow assignment/enabling of the device. */ bool (*enable_device_hook)(struct pci_dev *); + void (*release_device)(struct pci_dev *); + /* Called during PCI resource reassignment */ resource_size_t (*window_alignment)(struct pci_bus *, unsigned long type); void (*reset_secondary_bus)(struct pci_dev *dev); diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 7ed85a69a9c2..7f9ed0c1f6b9 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -29,7 +29,12 @@ */ void pcibios_release_device(struct pci_dev *dev) { + struct pci_controller *phb = pci_bus_to_host(dev->bus); + eeh_remove_device(dev); + + if (phb->controller_ops.release_device) + phb->controller_ops.release_device(dev); } /** -- cgit v1.2.3 From f46580a5cf07b3d35e357bb2401ae3c49cff7e65 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 27 May 2015 16:06:58 +1000 Subject: powerpc: Add cxl context to device archdata Add cxl context pointer to archdata. We'll want to create one of these for cxl PCI devices. Put them here until we can get a pci_dev specific private data. This location was suggested by benh. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 9f1371bab5fc..e9bdda88f1fb 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -46,6 +46,9 @@ struct dev_archdata { #ifdef CONFIG_FAIL_IOMMU int fail_iommu; #endif +#ifdef CONFIG_CXL_BASE + struct cxl_context *cxl_ctx; +#endif }; struct pdev_archdata { -- cgit v1.2.3 From 7a8e6bbf8593a9395dd6c61f7c5f421570600017 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 27 May 2015 16:06:59 +1000 Subject: powerpc/pci: Add shutdown hook to pci_controller_ops Currently pnv_pci_shutdown() calls the PHB shutdown code for all PHBs in the system. It dereferences the private_data assuming it's a powernv PHB, which won't be the case when we have different PHB in the systems (like when we add vPHBs for CXL). This moves the shutdown hook to the pci_controller_ops and fixes the call site to use that instead. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 2 ++ arch/powerpc/platforms/powernv/pci-ioda.c | 8 ++++---- arch/powerpc/platforms/powernv/pci.c | 9 +++------ arch/powerpc/platforms/powernv/pci.h | 1 - 4 files changed, 9 insertions(+), 11 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 4cf0caaa33c7..3947749de280 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -40,6 +40,8 @@ struct pci_controller_ops { #endif int (*dma_set_mask)(struct pci_dev *dev, u64 dma_mask); + + void (*shutdown)(struct pci_controller *); }; /* diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 508f530e367b..cec43c8dc44d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2643,8 +2643,10 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, return phb->ioda.pe_rmap[(bus->number << 8) | devfn]; } -static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) +static void pnv_pci_ioda_shutdown(struct pci_controller *hose) { + struct pnv_phb *phb = hose->private_data; + opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET); } @@ -2659,6 +2661,7 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .window_alignment = pnv_pci_window_alignment, .reset_secondary_bus = pnv_pci_reset_secondary_bus, .dma_set_mask = pnv_pci_ioda_dma_set_mask, + .shutdown = pnv_pci_ioda_shutdown, }; static void __init pnv_pci_init_ioda_phb(struct device_node *np, @@ -2806,9 +2809,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask; - /* Setup shutdown function for kexec */ - phb->shutdown = pnv_pci_ioda_shutdown; - /* Setup MSI support */ pnv_pci_init_ioda_msis(phb); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 8a557b5aabf7..3c35487e9778 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -704,12 +704,9 @@ void pnv_pci_shutdown(void) { struct pci_controller *hose; - list_for_each_entry(hose, &hose_list, list_node) { - struct pnv_phb *phb = hose->private_data; - - if (phb && phb->shutdown) - phb->shutdown(phb); - } + list_for_each_entry(hose, &hose_list, list_node) + if (hose->controller_ops.shutdown) + hose->controller_ops.shutdown(hose); } /* Fixup wrong class code in p7ioc and p8 root complex */ diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index ac8686c853e6..714ee3c19854 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -110,7 +110,6 @@ struct pnv_phb { struct pci_dev *pdev); void (*fixup_phb)(struct pci_controller *hose); u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); - void (*shutdown)(struct pnv_phb *phb); int (*init_m64)(struct pnv_phb *phb); void (*reserve_m64_pe)(struct pnv_phb *phb); int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all); -- cgit v1.2.3 From abeeed6d3d9948b3235a5bb77759d8c1f84de39d Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 27 May 2015 16:07:00 +1000 Subject: powerpc/pci: Add pcibios_disable_device() hook This adds a hook into the powerpc pci code for pci_disable_device() calls. The generic code already provides a weak pcibios_disable_device() symbol, so we just need to provide our own in powerpc and it'll get picked up. This is passed directly to the phb controller ops, provided one exists. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 2 ++ arch/powerpc/kernel/pci-common.c | 8 ++++++++ 2 files changed, 10 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 3947749de280..b76cd5682a8c 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -27,6 +27,8 @@ struct pci_controller_ops { * allow assignment/enabling of the device. */ bool (*enable_device_hook)(struct pci_dev *); + void (*disable_device)(struct pci_dev *); + void (*release_device)(struct pci_dev *); /* Called during PCI resource reassignment */ diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 2040cd2f7358..b9de34d44fcb 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1490,6 +1490,14 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return pci_enable_resources(dev, mask); } +void pcibios_disable_device(struct pci_dev *dev) +{ + struct pci_controller *phb = pci_bus_to_host(dev->bus); + + if (phb->controller_ops.disable_device) + phb->controller_ops.disable_device(dev); +} + resource_size_t pcibios_io_space_offset(struct pci_controller *hose) { return (unsigned long) hose->io_base_virt - _IO_BASE; -- cgit v1.2.3 From ec249dd860ed88e15b3e2bd363cbfc76ba8c1884 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 27 May 2015 16:07:16 +1000 Subject: cxl: Move include file cxl.h -> cxl-base.h This moves the current include file from cxl.h -> cxl-base.h. This current include file is used only to pass information between the base driver that needs to be built into the kernel and the cxl module. This is to make way for a new include/misc/cxl.h which will contain just the kernel API for other driver to use Signed-off-by: Michael Neuling Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- MAINTAINERS | 2 +- arch/powerpc/include/asm/pnv-pci.h | 2 +- arch/powerpc/mm/copro_fault.c | 2 +- arch/powerpc/mm/hash_native_64.c | 2 +- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- drivers/misc/cxl/base.c | 2 +- drivers/misc/cxl/cxl.h | 2 +- drivers/misc/cxl/irq.c | 2 +- drivers/misc/cxl/main.c | 2 +- drivers/misc/cxl/native.c | 2 +- include/misc/cxl-base.h | 48 +++++++++++++++++++++++++++++++ include/misc/cxl.h | 48 ------------------------------- 12 files changed, 58 insertions(+), 58 deletions(-) create mode 100644 include/misc/cxl-base.h delete mode 100644 include/misc/cxl.h (limited to 'arch/powerpc') diff --git a/MAINTAINERS b/MAINTAINERS index 562ae4ef85c8..80627ddba3e0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2946,7 +2946,7 @@ M: Michael Neuling L: linuxppc-dev@lists.ozlabs.org S: Supported F: drivers/misc/cxl/ -F: include/misc/cxl.h +F: include/misc/cxl* F: include/uapi/misc/cxl.h F: Documentation/powerpc/cxl.txt F: Documentation/powerpc/cxl.txt diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index f9b498292a5c..6f77f71ee964 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -11,7 +11,7 @@ #define _ASM_PNV_PCI_H #include -#include +#include int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode); int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index 7a71d7f81cf6..6527882ce05e 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include /* * This ought to be kept in sync with the powerpc specific do_page_fault diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 9c4880ddecd6..13befa35d8a8 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -29,7 +29,7 @@ #include #include -#include +#include #ifdef DEBUG_LOW #define DBG_LOW(fmt...) udbg_printf(fmt) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index cec43c8dc44d..b2841853550a 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -39,7 +39,7 @@ #include #include -#include +#include #include "powernv.h" #include "pci.h" diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c index 0654ad83675e..a9f0dd3255a2 100644 --- a/drivers/misc/cxl/base.c +++ b/drivers/misc/cxl/base.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include "cxl.h" /* protected by rcu */ diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 778161afd0a3..46d806e3b943 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index 212790b4ee57..680cd263436d 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include "cxl.h" #include "trace.h" diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 8ccddceead66..833348e2c9cb 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include "cxl.h" #include "trace.h" diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index a4b40d72466a..10567f245818 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include "cxl.h" #include "trace.h" diff --git a/include/misc/cxl-base.h b/include/misc/cxl-base.h new file mode 100644 index 000000000000..5ae962512fb8 --- /dev/null +++ b/include/misc/cxl-base.h @@ -0,0 +1,48 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _MISC_CXL_BASE_H +#define _MISC_CXL_BASE_H + +#ifdef CONFIG_CXL_BASE + +#define CXL_IRQ_RANGES 4 + +struct cxl_irq_ranges { + irq_hw_number_t offset[CXL_IRQ_RANGES]; + irq_hw_number_t range[CXL_IRQ_RANGES]; +}; + +extern atomic_t cxl_use_count; + +static inline bool cxl_ctx_in_use(void) +{ + return (atomic_read(&cxl_use_count) != 0); +} + +static inline void cxl_ctx_get(void) +{ + atomic_inc(&cxl_use_count); +} + +static inline void cxl_ctx_put(void) +{ + atomic_dec(&cxl_use_count); +} + +void cxl_slbia(struct mm_struct *mm); + +#else /* CONFIG_CXL_BASE */ + +static inline bool cxl_ctx_in_use(void) { return false; } +static inline void cxl_slbia(struct mm_struct *mm) {} + +#endif /* CONFIG_CXL_BASE */ + +#endif diff --git a/include/misc/cxl.h b/include/misc/cxl.h deleted file mode 100644 index 975cc7861f18..000000000000 --- a/include/misc/cxl.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright 2014 IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _MISC_CXL_H -#define _MISC_CXL_H - -#ifdef CONFIG_CXL_BASE - -#define CXL_IRQ_RANGES 4 - -struct cxl_irq_ranges { - irq_hw_number_t offset[CXL_IRQ_RANGES]; - irq_hw_number_t range[CXL_IRQ_RANGES]; -}; - -extern atomic_t cxl_use_count; - -static inline bool cxl_ctx_in_use(void) -{ - return (atomic_read(&cxl_use_count) != 0); -} - -static inline void cxl_ctx_get(void) -{ - atomic_inc(&cxl_use_count); -} - -static inline void cxl_ctx_put(void) -{ - atomic_dec(&cxl_use_count); -} - -void cxl_slbia(struct mm_struct *mm); - -#else /* CONFIG_CXL_BASE */ - -static inline bool cxl_ctx_in_use(void) { return false; } -static inline void cxl_slbia(struct mm_struct *mm) {} - -#endif /* CONFIG_CXL_BASE */ - -#endif -- cgit v1.2.3 From 14aae78f084af5241476ede8e28b377fc751b191 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 8 Apr 2015 09:31:10 +0200 Subject: powerpc/powernv: convert OPAL codes returned by sysparam calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The opal_{get,set}_param calls return internal error codes which need to be translated in errnos in Linux. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-sysparam.c | 12 ++++++++---- arch/powerpc/platforms/powernv/opal.c | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c index 2e52b47393e7..afe66c576a38 100644 --- a/arch/powerpc/platforms/powernv/opal-sysparam.c +++ b/arch/powerpc/platforms/powernv/opal-sysparam.c @@ -55,8 +55,10 @@ static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer) } ret = opal_get_param(token, param_id, (u64)buffer, length); - if (ret != OPAL_ASYNC_COMPLETION) + if (ret != OPAL_ASYNC_COMPLETION) { + ret = opal_error_code(ret); goto out_token; + } ret = opal_async_wait_response(token, &msg); if (ret) { @@ -65,7 +67,7 @@ static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer) goto out_token; } - ret = be64_to_cpu(msg.params[1]); + ret = opal_error_code(be64_to_cpu(msg.params[1])); out_token: opal_async_release_token(token); @@ -89,8 +91,10 @@ static int opal_set_sys_param(u32 param_id, u32 length, void *buffer) ret = opal_set_param(token, param_id, (u64)buffer, length); - if (ret != OPAL_ASYNC_COMPLETION) + if (ret != OPAL_ASYNC_COMPLETION) { + ret = opal_error_code(ret); goto out_token; + } ret = opal_async_wait_response(token, &msg); if (ret) { @@ -99,7 +103,7 @@ static int opal_set_sys_param(u32 param_id, u32 length, void *buffer) goto out_token; } - ret = be64_to_cpu(msg.params[1]); + ret = opal_error_code(be64_to_cpu(msg.params[1])); out_token: opal_async_release_token(token); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 8403307c5362..fd694bd51b80 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -830,6 +830,7 @@ int opal_error_code(int rc) case OPAL_ASYNC_COMPLETION: return -EINPROGRESS; case OPAL_BUSY_EVENT: return -EBUSY; case OPAL_NO_MEM: return -ENOMEM; + case OPAL_PERMISSION: return -EPERM; case OPAL_UNSUPPORTED: return -EIO; case OPAL_HARDWARE: return -EIO; -- cgit v1.2.3 From 18725226af7a6a0eac8a5bc42a6c9c90af1fb0e7 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 13 May 2015 15:13:18 +1000 Subject: powerpc/config: Enable bnx2x on ppc64 and pseries defconfigs Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/configs/ppc64_defconfig | 1 + arch/powerpc/configs/pseries_defconfig | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index aad501ae3834..a97efc2146fd 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -155,6 +155,7 @@ CONFIG_ACENIC=m CONFIG_ACENIC_OMIT_TIGON_I=y CONFIG_PCNET32=y CONFIG_TIGON3=y +CONFIG_BNX2X=m CONFIG_CHELSIO_T1=m CONFIG_BE2NET=m CONFIG_S2IO=m diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 4da826004ef8..0d9efcedaf34 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -154,6 +154,7 @@ CONFIG_ACENIC=m CONFIG_ACENIC_OMIT_TIGON_I=y CONFIG_PCNET32=y CONFIG_TIGON3=y +CONFIG_BNX2X=m CONFIG_CHELSIO_T1=m CONFIG_BE2NET=m CONFIG_S2IO=m -- cgit v1.2.3 From 48c06154952a18c5cd4d074ec3de627430cb6d23 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 20 May 2015 11:23:33 +0800 Subject: powerpc/powernv: Merge common platform device initialisation opal_ipmi_init and opal_flash_init are equivalent, except for the compatbile string. Merge these two into a common opal_pdev_init function. Signed-off-by: Jeremy Kerr Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index fd694bd51b80..fdce840f0006 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -600,21 +600,13 @@ static void __init opal_dump_region_init(void) "rc = %d\n", rc); } -static void opal_flash_init(struct device_node *opal_node) +static void opal_pdev_init(struct device_node *opal_node, + const char *compatible) { struct device_node *np; for_each_child_of_node(opal_node, np) - if (of_device_is_compatible(np, "ibm,opal-flash")) - of_platform_device_create(np, NULL, NULL); -} - -static void opal_ipmi_init(struct device_node *opal_node) -{ - struct device_node *np; - - for_each_child_of_node(opal_node, np) - if (of_device_is_compatible(np, "ibm,opal-ipmi")) + if (of_device_is_compatible(np, compatible)) of_platform_device_create(np, NULL, NULL); } @@ -717,10 +709,9 @@ static int __init opal_init(void) opal_msglog_init(); } - /* Initialize OPAL IPMI backend */ - opal_ipmi_init(opal_node); - - opal_flash_init(opal_node); + /* Initialize platform devices: IPMI backend & flash interface */ + opal_pdev_init(opal_node, "ibm,opal-ipmi"); + opal_pdev_init(opal_node, "ibm,opal-flash"); return 0; } -- cgit v1.2.3 From 594fcb9ec9e17215b4f98b5d40ba5e3af618ba82 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 20 May 2015 11:23:33 +0800 Subject: powerpc/powernv: Expose OPAL APIs required by PRD interface The (upcoming) opal-prd driver needs to access the message notifier and xscom code, so add EXPORT_SYMBOL_GPL macros for these. Signed-off-by: Jeremy Kerr Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index fdce840f0006..0379068e1c10 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -235,6 +235,7 @@ int opal_message_notifier_register(enum opal_msg_type msg_type, return atomic_notifier_chain_register( &opal_msg_notifier_head[msg_type], nb); } +EXPORT_SYMBOL_GPL(opal_message_notifier_register); int opal_message_notifier_unregister(enum opal_msg_type msg_type, struct notifier_block *nb) @@ -242,6 +243,7 @@ int opal_message_notifier_unregister(enum opal_msg_type msg_type, return atomic_notifier_chain_unregister( &opal_msg_notifier_head[msg_type], nb); } +EXPORT_SYMBOL_GPL(opal_message_notifier_unregister); static void opal_message_do_notify(uint32_t msg_type, void *msg) { @@ -743,6 +745,8 @@ void opal_shutdown(void) /* Export this so that test modules can use it */ EXPORT_SYMBOL_GPL(opal_invalid_call); +EXPORT_SYMBOL_GPL(opal_xscom_read); +EXPORT_SYMBOL_GPL(opal_xscom_write); EXPORT_SYMBOL_GPL(opal_ipmi_send); EXPORT_SYMBOL_GPL(opal_ipmi_recv); EXPORT_SYMBOL_GPL(opal_flash_read); -- cgit v1.2.3 From 0d7cd8550d30906c7461ced654306da30f1590e2 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 4 Jun 2015 21:51:47 +0800 Subject: powerpc/powernv: Add opal-prd channel This change adds a char device to access the "PRD" (processor runtime diagnostics) channel to OPAL firmware. Includes contributions from Vaidyanathan Srinivasan, Neelesh Gupta & Vishal Kulkarni. Signed-off-by: Neelesh Gupta Signed-off-by: Jeremy Kerr Acked-by: Stewart Smith Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal-api.h | 21 +- arch/powerpc/include/asm/opal.h | 1 + arch/powerpc/include/uapi/asm/opal-prd.h | 58 ++++ arch/powerpc/platforms/powernv/Kconfig | 7 + arch/powerpc/platforms/powernv/Makefile | 1 + arch/powerpc/platforms/powernv/opal-prd.c | 444 +++++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + arch/powerpc/platforms/powernv/opal.c | 4 +- 8 files changed, 535 insertions(+), 2 deletions(-) create mode 100644 arch/powerpc/include/uapi/asm/opal-prd.h create mode 100644 arch/powerpc/platforms/powernv/opal-prd.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index a49e5fa6f939..e9e4c52f3685 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -153,7 +153,8 @@ #define OPAL_FLASH_READ 110 #define OPAL_FLASH_WRITE 111 #define OPAL_FLASH_ERASE 112 -#define OPAL_LAST 112 +#define OPAL_PRD_MSG 113 +#define OPAL_LAST 113 /* Device tree flags */ @@ -359,6 +360,7 @@ enum opal_msg_type { OPAL_MSG_SHUTDOWN, /* params[0] = 1 reboot, 0 shutdown */ OPAL_MSG_HMI_EVT, OPAL_MSG_DPO, + OPAL_MSG_PRD, OPAL_MSG_TYPE_MAX, }; @@ -681,6 +683,23 @@ typedef struct oppanel_line { __be64 line_len; } oppanel_line_t; +enum opal_prd_msg_type { + OPAL_PRD_MSG_TYPE_INIT = 0, /* HBRT --> OPAL */ + OPAL_PRD_MSG_TYPE_FINI, /* HBRT/kernel --> OPAL */ + OPAL_PRD_MSG_TYPE_ATTN, /* HBRT <-- OPAL */ + OPAL_PRD_MSG_TYPE_ATTN_ACK, /* HBRT --> OPAL */ + OPAL_PRD_MSG_TYPE_OCC_ERROR, /* HBRT <-- OPAL */ + OPAL_PRD_MSG_TYPE_OCC_RESET, /* HBRT <-- OPAL */ +}; + +struct opal_prd_msg_header { + uint8_t type; + uint8_t pad[1]; + __be16 size; +}; + +struct opal_prd_msg; + /* * SG entries * diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 1412814347ba..958e941c0cda 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -194,6 +194,7 @@ int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg, uint64_t *msg_len); int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id, struct opal_i2c_request *oreq); +int64_t opal_prd_msg(struct opal_prd_msg *msg); int64_t opal_flash_read(uint64_t id, uint64_t offset, uint64_t buf, uint64_t size, uint64_t token); diff --git a/arch/powerpc/include/uapi/asm/opal-prd.h b/arch/powerpc/include/uapi/asm/opal-prd.h new file mode 100644 index 000000000000..319ff4a26158 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/opal-prd.h @@ -0,0 +1,58 @@ +/* + * OPAL Runtime Diagnostics interface driver + * Supported on POWERNV platform + * + * (C) Copyright IBM 2015 + * + * Author: Vaidyanathan Srinivasan + * Author: Jeremy Kerr + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _UAPI_ASM_POWERPC_OPAL_PRD_H_ +#define _UAPI_ASM_POWERPC_OPAL_PRD_H_ + +#include + +/** + * The version of the kernel interface of the PRD system. This describes the + * interface available for the /dev/opal-prd device. The actual PRD message + * layout and content is private to the firmware <--> userspace interface, so + * is not covered by this versioning. + * + * Future interface versions are backwards-compatible; if a later kernel + * version is encountered, functionality provided in earlier versions + * will work. + */ +#define OPAL_PRD_KERNEL_VERSION 1 + +#define OPAL_PRD_GET_INFO _IOR('o', 0x01, struct opal_prd_info) +#define OPAL_PRD_SCOM_READ _IOR('o', 0x02, struct opal_prd_scom) +#define OPAL_PRD_SCOM_WRITE _IOW('o', 0x03, struct opal_prd_scom) + +#ifndef __ASSEMBLY__ + +struct opal_prd_info { + __u64 version; + __u64 reserved[3]; +}; + +struct opal_prd_scom { + __u64 chip; + __u64 addr; + __u64 data; + __s64 rc; +}; + +#endif /* __ASSEMBLY__ */ + +#endif /* _UAPI_ASM_POWERPC_OPAL_PRD_H */ diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 4b044d8cb49a..604190cab522 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -19,3 +19,10 @@ config PPC_POWERNV select CPU_FREQ_GOV_CONSERVATIVE select PPC_DOORBELL default y + +config OPAL_PRD + tristate 'OPAL PRD driver' + depends on PPC_POWERNV + help + This enables the opal-prd driver, a facility to run processor + recovery diagnostics on OpenPower machines diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 26bd7e417b7c..1c8cdb6250e7 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -9,3 +9,4 @@ obj-$(CONFIG_EEH) += eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o +obj-$(CONFIG_OPAL_PRD) += opal-prd.o diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c new file mode 100644 index 000000000000..d9e72bde79f5 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -0,0 +1,444 @@ +/* + * OPAL Runtime Diagnostics interface driver + * Supported on POWERNV platform + * + * Copyright IBM Corporation 2015 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) "opal-prd: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +struct opal_prd_msg_queue_item { + struct opal_prd_msg_header msg; + struct list_head list; +}; + +static struct device_node *prd_node; +static LIST_HEAD(opal_prd_msg_queue); +static DEFINE_SPINLOCK(opal_prd_msg_queue_lock); +static DECLARE_WAIT_QUEUE_HEAD(opal_prd_msg_wait); +static atomic_t prd_usage; + +static bool opal_prd_range_is_valid(uint64_t addr, uint64_t size) +{ + struct device_node *parent, *node; + bool found; + + if (addr + size < addr) + return false; + + parent = of_find_node_by_path("/reserved-memory"); + if (!parent) + return false; + + found = false; + + for_each_child_of_node(parent, node) { + uint64_t range_addr, range_size, range_end; + const __be32 *addrp; + const char *label; + + addrp = of_get_address(node, 0, &range_size, NULL); + + range_addr = of_read_number(addrp, 2); + range_end = range_addr + range_size; + + label = of_get_property(node, "ibm,prd-label", NULL); + + /* PRD ranges need a label */ + if (!label) + continue; + + if (range_end <= range_addr) + continue; + + if (addr >= range_addr && addr + size <= range_end) { + found = true; + of_node_put(node); + break; + } + } + + of_node_put(parent); + return found; +} + +static int opal_prd_open(struct inode *inode, struct file *file) +{ + /* + * Prevent multiple (separate) processes from concurrent interactions + * with the FW PRD channel + */ + if (atomic_xchg(&prd_usage, 1) == 1) + return -EBUSY; + + return 0; +} + +/* + * opal_prd_mmap - maps firmware-provided ranges into userspace + * @file: file structure for the device + * @vma: VMA to map the registers into + */ + +static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) +{ + size_t addr, size; + int rc; + + pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n", + vma->vm_start, vma->vm_end, vma->vm_pgoff, + vma->vm_flags); + + addr = vma->vm_pgoff << PAGE_SHIFT; + size = vma->vm_end - vma->vm_start; + + /* ensure we're mapping within one of the allowable ranges */ + if (!opal_prd_range_is_valid(addr, size)) + return -EINVAL; + + vma->vm_page_prot = __pgprot(pgprot_val(phys_mem_access_prot(file, + vma->vm_pgoff, + size, vma->vm_page_prot)) + | _PAGE_SPECIAL); + + rc = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, + vma->vm_page_prot); + + return rc; +} + +static bool opal_msg_queue_empty(void) +{ + unsigned long flags; + bool ret; + + spin_lock_irqsave(&opal_prd_msg_queue_lock, flags); + ret = list_empty(&opal_prd_msg_queue); + spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags); + + return ret; +} + +static unsigned int opal_prd_poll(struct file *file, + struct poll_table_struct *wait) +{ + poll_wait(file, &opal_prd_msg_wait, wait); + + if (!opal_msg_queue_empty()) + return POLLIN | POLLRDNORM; + + return 0; +} + +static ssize_t opal_prd_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct opal_prd_msg_queue_item *item; + unsigned long flags; + ssize_t size, err; + int rc; + + /* we need at least a header's worth of data */ + if (count < sizeof(item->msg)) + return -EINVAL; + + if (*ppos) + return -ESPIPE; + + item = NULL; + + for (;;) { + + spin_lock_irqsave(&opal_prd_msg_queue_lock, flags); + if (!list_empty(&opal_prd_msg_queue)) { + item = list_first_entry(&opal_prd_msg_queue, + struct opal_prd_msg_queue_item, list); + list_del(&item->list); + } + spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags); + + if (item) + break; + + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + + rc = wait_event_interruptible(opal_prd_msg_wait, + !opal_msg_queue_empty()); + if (rc) + return -EINTR; + } + + size = be16_to_cpu(item->msg.size); + if (size > count) { + err = -EINVAL; + goto err_requeue; + } + + rc = copy_to_user(buf, &item->msg, size); + if (rc) { + err = -EFAULT; + goto err_requeue; + } + + kfree(item); + + return size; + +err_requeue: + /* eep! re-queue at the head of the list */ + spin_lock_irqsave(&opal_prd_msg_queue_lock, flags); + list_add(&item->list, &opal_prd_msg_queue); + spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags); + return err; +} + +static ssize_t opal_prd_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct opal_prd_msg_header hdr; + ssize_t size; + void *msg; + int rc; + + size = sizeof(hdr); + + if (count < size) + return -EINVAL; + + /* grab the header */ + rc = copy_from_user(&hdr, buf, sizeof(hdr)); + if (rc) + return -EFAULT; + + size = be16_to_cpu(hdr.size); + + msg = kmalloc(size, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + rc = copy_from_user(msg, buf, size); + if (rc) { + size = -EFAULT; + goto out_free; + } + + rc = opal_prd_msg(msg); + if (rc) { + pr_warn("write: opal_prd_msg returned %d\n", rc); + size = -EIO; + } + +out_free: + kfree(msg); + + return size; +} + +static int opal_prd_release(struct inode *inode, struct file *file) +{ + struct opal_prd_msg_header msg; + + msg.size = cpu_to_be16(sizeof(msg)); + msg.type = OPAL_PRD_MSG_TYPE_FINI; + + opal_prd_msg((struct opal_prd_msg *)&msg); + + atomic_xchg(&prd_usage, 0); + + return 0; +} + +static long opal_prd_ioctl(struct file *file, unsigned int cmd, + unsigned long param) +{ + struct opal_prd_info info; + struct opal_prd_scom scom; + int rc = 0; + + switch (cmd) { + case OPAL_PRD_GET_INFO: + memset(&info, 0, sizeof(info)); + info.version = OPAL_PRD_KERNEL_VERSION; + rc = copy_to_user((void __user *)param, &info, sizeof(info)); + if (rc) + return -EFAULT; + break; + + case OPAL_PRD_SCOM_READ: + rc = copy_from_user(&scom, (void __user *)param, sizeof(scom)); + if (rc) + return -EFAULT; + + scom.rc = opal_xscom_read(scom.chip, scom.addr, + (__be64 *)&scom.data); + scom.data = be64_to_cpu(scom.data); + pr_devel("ioctl SCOM_READ: chip %llx addr %016llx data %016llx rc %lld\n", + scom.chip, scom.addr, scom.data, scom.rc); + + rc = copy_to_user((void __user *)param, &scom, sizeof(scom)); + if (rc) + return -EFAULT; + break; + + case OPAL_PRD_SCOM_WRITE: + rc = copy_from_user(&scom, (void __user *)param, sizeof(scom)); + if (rc) + return -EFAULT; + + scom.rc = opal_xscom_write(scom.chip, scom.addr, scom.data); + pr_devel("ioctl SCOM_WRITE: chip %llx addr %016llx data %016llx rc %lld\n", + scom.chip, scom.addr, scom.data, scom.rc); + + rc = copy_to_user((void __user *)param, &scom, sizeof(scom)); + if (rc) + return -EFAULT; + break; + + default: + rc = -EINVAL; + } + + return rc; +} + +static const struct file_operations opal_prd_fops = { + .open = opal_prd_open, + .mmap = opal_prd_mmap, + .poll = opal_prd_poll, + .read = opal_prd_read, + .write = opal_prd_write, + .unlocked_ioctl = opal_prd_ioctl, + .release = opal_prd_release, + .owner = THIS_MODULE, +}; + +static struct miscdevice opal_prd_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "opal-prd", + .fops = &opal_prd_fops, +}; + +/* opal interface */ +static int opal_prd_msg_notifier(struct notifier_block *nb, + unsigned long msg_type, void *_msg) +{ + struct opal_prd_msg_queue_item *item; + struct opal_prd_msg_header *hdr; + struct opal_msg *msg = _msg; + unsigned long flags; + int size; + + if (msg_type != OPAL_MSG_PRD) + return 0; + + /* Calculate total size of the item we need to store. The 'size' field + * in the header includes the header itself. */ + hdr = (void *)msg->params; + size = (sizeof(*item) - sizeof(item->msg)) + be16_to_cpu(hdr->size); + + item = kzalloc(size, GFP_ATOMIC); + if (!item) + return -ENOMEM; + + memcpy(&item->msg, msg->params, size); + + spin_lock_irqsave(&opal_prd_msg_queue_lock, flags); + list_add_tail(&item->list, &opal_prd_msg_queue); + spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags); + + wake_up_interruptible(&opal_prd_msg_wait); + + return 0; +} + +static struct notifier_block opal_prd_event_nb = { + .notifier_call = opal_prd_msg_notifier, + .next = NULL, + .priority = 0, +}; + +static int opal_prd_probe(struct platform_device *pdev) +{ + int rc; + + if (!pdev || !pdev->dev.of_node) + return -ENODEV; + + /* We should only have one prd driver instance per machine; ensure + * that we only get a valid probe on a single OF node. + */ + if (prd_node) + return -EBUSY; + + prd_node = pdev->dev.of_node; + + rc = opal_message_notifier_register(OPAL_MSG_PRD, &opal_prd_event_nb); + if (rc) { + pr_err("Couldn't register event notifier\n"); + return rc; + } + + rc = misc_register(&opal_prd_dev); + if (rc) { + pr_err("failed to register miscdev\n"); + opal_message_notifier_unregister(OPAL_MSG_PRD, + &opal_prd_event_nb); + return rc; + } + + return 0; +} + +static int opal_prd_remove(struct platform_device *pdev) +{ + misc_deregister(&opal_prd_dev); + opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); + return 0; +} + +static const struct of_device_id opal_prd_match[] = { + { .compatible = "ibm,opal-prd" }, + { }, +}; + +static struct platform_driver opal_prd_driver = { + .driver = { + .name = "opal-prd", + .owner = THIS_MODULE, + .of_match_table = opal_prd_match, + }, + .probe = opal_prd_probe, + .remove = opal_prd_remove, +}; + +module_platform_driver(opal_prd_driver); + +MODULE_DEVICE_TABLE(of, opal_prd_match); +MODULE_DESCRIPTION("PowerNV OPAL runtime diagnostic driver"); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index bf15ead00e12..d6a7b8252e4d 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -296,3 +296,4 @@ OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST); OPAL_CALL(opal_flash_read, OPAL_FLASH_READ); OPAL_CALL(opal_flash_write, OPAL_FLASH_WRITE); OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE); +OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 0379068e1c10..9e9c483eee4d 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -711,9 +711,10 @@ static int __init opal_init(void) opal_msglog_init(); } - /* Initialize platform devices: IPMI backend & flash interface */ + /* Initialize platform devices: IPMI backend, PRD & flash interface */ opal_pdev_init(opal_node, "ibm,opal-ipmi"); opal_pdev_init(opal_node, "ibm,opal-flash"); + opal_pdev_init(opal_node, "ibm,opal-prd"); return 0; } @@ -752,6 +753,7 @@ EXPORT_SYMBOL_GPL(opal_ipmi_recv); EXPORT_SYMBOL_GPL(opal_flash_read); EXPORT_SYMBOL_GPL(opal_flash_write); EXPORT_SYMBOL_GPL(opal_flash_erase); +EXPORT_SYMBOL_GPL(opal_prd_msg); /* Convert a region of vmalloc memory to an opal sg list */ struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, -- cgit v1.2.3 From f1b3b4450dcbfd0098b6fc3b00f4880701550c83 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 26 May 2015 12:55:51 +0200 Subject: powerpc/85xx: Replace CONFIG_USB_ISP1760_HCD by CONFIG_USB_ISP1760 Since commit 100832abf065bc18 ("usb: isp1760: Make HCD support optional"), CONFIG_USB_ISP1760_HCD is automatically selected when needed. Enabling that option in the defconfig is now a no-op, and no longer enables ISP1760 HCD support. Re-enable the ISP1760 driver in the defconfig by enabling USB_ISP1760_HOST_ROLE instead. Signed-off-by: Geert Uytterhoeven Signed-off-by: Scott Wood --- arch/powerpc/configs/85xx/xes_mpc85xx_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig index 34f3ea1729e0..858b539d004b 100644 --- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig +++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig @@ -108,7 +108,7 @@ CONFIG_SENSORS_LM90=y CONFIG_WATCHDOG=y CONFIG_USB=y CONFIG_USB_MON=y -CONFIG_USB_ISP1760_HCD=y +CONFIG_USB_ISP1760=y CONFIG_USB_STORAGE=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y -- cgit v1.2.3 From 502f159c0239863deebfc50e09c0892d0c157101 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 3 Jun 2015 14:52:59 +1000 Subject: powerpc/eeh: Fix trivial error in eeh_restore_dev_state() Commit 28158cd "powerpc/eeh: Enhance pcibios_set_pcie_reset_state()" introduced a fix for a problem where certain configurations could lead to pci_reset_function() destroying the state of PCI devices other than the one specified. Unfortunately, the fix has a trivial bug - it calls pci_save_state() again, when it should be calling pci_restore_state(). This corrects the problem. Cc: Gavin Shan Signed-off-by: David Gibson Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 23e0aa773643..51dcdf66e9e6 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -717,7 +717,7 @@ static void *eeh_restore_dev_state(void *data, void *userdata) /* The caller should restore state for the specified device */ if (pdev != dev) - pci_save_state(pdev); + pci_restore_state(pdev); return NULL; } -- cgit v1.2.3 From c952c1c48233b95f838c8743f09feace1f0e6cab Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 21 May 2015 12:13:01 +0530 Subject: powerpc: Fix handling of DSCR related facility unavailable exception Currently DSCR (Data Stream Control Register) can be accessed with mfspr or mtspr instructions inside a thread via two different SPR numbers. One being the user accessible problem state SPR number 0x03 and the other being the privilege state SPR number 0x11. All access through the privilege state SPR number get emulated through illegal instruction exception. Any access through the problem state SPR number raises one facility unavailable exception which sets the thread based dscr_inherit bit and enables DSCR facility through FSCR register thus allowing direct access to DSCR without going through this exception in the future. We set the thread.dscr_inherit bit whether the access was with mfspr or mtspr instruction which is neither correct nor does it match the behaviour through the instruction emulation code path driven from privilege state SPR number. User currently observes two different kind of behaviour when accessing the DSCR through these two SPR numbers. This problem can be observed through these two test cases by replacing the privilege state SPR number with the problem state SPR number. (1) http://ozlabs.org/~anton/junkcode/dscr_default_test.c (2) http://ozlabs.org/~anton/junkcode/dscr_explicit_test.c This patch fixes the problem by making sure that the behaviour visible to the user remains the same irrespective of which SPR number is being used. Inside facility unavailable exception, we check whether it was cuased by a mfspr or a mtspr isntrucction. In case of mfspr instruction, just emulate the instruction. In case of mtspr instruction, set the thread based dscr_inherit bit and also enable the facility through FSCR. All user SPR based mfspr instruction will be emulated till one user SPR based mtspr has been executed. Signed-off-by: Anshuman Khandual Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/traps.c | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 19e4744b6eba..6530f1b8874d 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1377,6 +1377,7 @@ void facility_unavailable_exception(struct pt_regs *regs) }; char *facility = "unknown"; u64 value; + u32 instword, rd; u8 status; bool hv; @@ -1388,12 +1389,46 @@ void facility_unavailable_exception(struct pt_regs *regs) status = value >> 56; if (status == FSCR_DSCR_LG) { - /* User is acessing the DSCR. Set the inherit bit and allow - * the user to set it directly in future by setting via the - * FSCR DSCR bit. We always leave HFSCR DSCR set. + /* + * User is accessing the DSCR register using the problem + * state only SPR number (0x03) either through a mfspr or + * a mtspr instruction. If it is a write attempt through + * a mtspr, then we set the inherit bit. This also allows + * the user to write or read the register directly in the + * future by setting via the FSCR DSCR bit. But in case it + * is a read DSCR attempt through a mfspr instruction, we + * just emulate the instruction instead. This code path will + * always emulate all the mfspr instructions till the user + * has attempted atleast one mtspr instruction. This way it + * preserves the same behaviour when the user is accessing + * the DSCR through privilege level only SPR number (0x11) + * which is emulated through illegal instruction exception. + * We always leave HFSCR DSCR set. */ - current->thread.dscr_inherit = 1; - mtspr(SPRN_FSCR, value | FSCR_DSCR); + if (get_user(instword, (u32 __user *)(regs->nip))) { + pr_err("Failed to fetch the user instruction\n"); + return; + } + + /* Write into DSCR (mtspr 0x03, RS) */ + if ((instword & PPC_INST_MTSPR_DSCR_USER_MASK) + == PPC_INST_MTSPR_DSCR_USER) { + rd = (instword >> 21) & 0x1f; + current->thread.dscr = regs->gpr[rd]; + current->thread.dscr_inherit = 1; + mtspr(SPRN_FSCR, value | FSCR_DSCR); + } + + /* Read from DSCR (mfspr RT, 0x03) */ + if ((instword & PPC_INST_MFSPR_DSCR_USER_MASK) + == PPC_INST_MFSPR_DSCR_USER) { + if (emulate_instruction(regs)) { + pr_err("DSCR based mfspr emulation failed\n"); + return; + } + regs->nip += 4; + emulate_single_step(regs); + } return; } -- cgit v1.2.3 From 280e109992831ee16a0f74ae887c08fb26c021f1 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 21 May 2015 12:13:02 +0530 Subject: powerpc/kernel: Remove the unused extern dscr_default The process context switch code no longer uses dscr_default variable from the sysfs.c file. The variable became unused when we started storing the CPU specific DSCR value in the PACA structure instead. This patch just removes this extern declaration. It was originally added by the following commit. Signed-off-by: Anshuman Khandual Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index febb50dd5328..8005e18d1b40 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1112,7 +1112,6 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp) /* * Copy a thread.. */ -extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */ /* * Copy architecture-specific thread state -- cgit v1.2.3 From 1db365258ad9c3624897f48c764f8c557f492b26 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 21 May 2015 12:13:03 +0530 Subject: powerpc/kernel: Rename PACA_DSCR to PACA_DSCR_DEFAULT PACA_DSCR offset macro tracks dscr_default element in the paca structure. Better change the name of this macro to match that of the data element it tracks. Makes the code more readable. Signed-off-by: Anshuman Khandual Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kernel/entry_64.S | 2 +- arch/powerpc/kernel/tm.S | 4 ++-- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 0034b6b3556a..98230579d99c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -247,7 +247,7 @@ int main(void) #endif DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); - DEFINE(PACA_DSCR, offsetof(struct paca_struct, dscr_default)); + DEFINE(PACA_DSCR_DEFAULT, offsetof(struct paca_struct, dscr_default)); DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime)); DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user)); DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index afbc20019c2e..278888e89acc 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -556,7 +556,7 @@ BEGIN_FTR_SECTION ld r0,THREAD_DSCR(r4) cmpwi r6,0 bne 1f - ld r0,PACA_DSCR(r13) + ld r0,PACA_DSCR_DEFAULT(r13) 1: BEGIN_FTR_SECTION_NESTED(70) mfspr r8, SPRN_FSCR diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 5754b226da7e..bf8f34a58670 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -293,7 +293,7 @@ dont_backup_fp: ld r2, STK_GOT(r1) /* Load CPU's default DSCR */ - ld r0, PACA_DSCR(r13) + ld r0, PACA_DSCR_DEFAULT(r13) mtspr SPRN_DSCR, r0 blr @@ -473,7 +473,7 @@ restore_gprs: ld r2, STK_GOT(r1) /* Load CPU's default DSCR */ - ld r0, PACA_DSCR(r13) + ld r0, PACA_DSCR_DEFAULT(r13) mtspr SPRN_DSCR, r0 blr diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 4d70df26c402..faa86e9c0551 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -324,7 +324,7 @@ kvm_start_guest: kvm_secondary_got_guest: /* Set HSTATE_DSCR(r13) to something sensible */ - ld r6, PACA_DSCR(r13) + ld r6, PACA_DSCR_DEFAULT(r13) std r6, HSTATE_DSCR(r13) /* Order load of vcore, ptid etc. after load of vcpu */ -- cgit v1.2.3 From d3cb06e0cdc9841b289a0d68acaffd2868504902 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 21 May 2015 12:13:04 +0530 Subject: powerpc/dscr: Add some in-code documentation This patch adds some in-code documentation to the DSCR related code to make it more readable without having any functional change to it. Signed-off-by: Anshuman Khandual Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/processor.h | 9 +++++++++ arch/powerpc/kernel/sysfs.c | 38 ++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index bf117d8fb45f..28ded5d9b579 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -295,6 +295,15 @@ struct thread_struct { #endif #ifdef CONFIG_PPC64 unsigned long dscr; + /* + * This member element dscr_inherit indicates that the process + * has explicitly attempted and changed the DSCR register value + * for itself. Hence kernel wont use the default CPU DSCR value + * contained in the PACA structure anymore during process context + * switch. Once this variable is set, this behaviour will also be + * inherited to all the children of this process from that point + * onwards. + */ int dscr_inherit; unsigned long ppr; /* used to save/restore SMT priority */ #endif diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index fa1fd8a0c867..692873bff334 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -496,13 +496,34 @@ static DEVICE_ATTR(spurr, 0400, show_spurr, NULL); static DEVICE_ATTR(purr, 0400, show_purr, store_purr); static DEVICE_ATTR(pir, 0400, show_pir, NULL); +/* + * This is the system wide DSCR register default value. Any + * change to this default value through the sysfs interface + * will update all per cpu DSCR default values across the + * system stored in their respective PACA structures. + */ static unsigned long dscr_default; +/** + * read_dscr() - Fetch the cpu specific DSCR default + * @val: Returned cpu specific DSCR default value + * + * This function returns the per cpu DSCR default value + * for any cpu which is contained in it's PACA structure. + */ static void read_dscr(void *val) { *(unsigned long *)val = get_paca()->dscr_default; } + +/** + * write_dscr() - Update the cpu specific DSCR default + * @val: New cpu specific DSCR default value to update + * + * This function updates the per cpu DSCR default value + * for any cpu which is contained in it's PACA structure. + */ static void write_dscr(void *val) { get_paca()->dscr_default = *(unsigned long *)val; @@ -520,12 +541,29 @@ static void add_write_permission_dev_attr(struct device_attribute *attr) attr->attr.mode |= 0200; } +/** + * show_dscr_default() - Fetch the system wide DSCR default + * @dev: Device structure + * @attr: Device attribute structure + * @buf: Interface buffer + * + * This function returns the system wide DSCR default value. + */ static ssize_t show_dscr_default(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "%lx\n", dscr_default); } +/** + * store_dscr_default() - Update the system wide DSCR default + * @dev: Device structure + * @attr: Device attribute structure + * @buf: Interface buffer + * @count: Size of the update + * + * This function updates the system wide DSCR default value. + */ static ssize_t __used store_dscr_default(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -- cgit v1.2.3 From cfcb3d80a28380ba027331eb548ba309c4b66559 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 14 Apr 2015 13:05:57 +0530 Subject: powerpc/mm: Add trace point for tracking hash pte fault This enables us to understand how many hash fault we are taking when running benchmarks. For ex: -bash-4.2# ./perf stat -e powerpc:hash_fault -e page-faults /tmp/ebizzy.ppc64 -S 30 -P -n 1000 ... Performance counter stats for '/tmp/ebizzy.ppc64 -S 30 -P -n 1000': 1,10,04,075 powerpc:hash_fault 1,10,03,429 page-faults 30.865978991 seconds time elapsed NOTE: The impact of the tracepoint was not noticeable when running test. It was within the run-time variance of the test. For ex: without-patch: -------------- Performance counter stats for './a.out 3000 300': 643 page-faults # 0.089 M/sec 7.236562 task-clock (msec) # 0.928 CPUs utilized 2,179,213 stalled-cycles-frontend # 0.00% frontend cycles idle 17,174,367 stalled-cycles-backend # 0.00% backend cycles idle 0 context-switches # 0.000 K/sec 0.007794658 seconds time elapsed And with-patch: --------------- Performance counter stats for './a.out 3000 300': 643 page-faults # 0.089 M/sec 7.233746 task-clock (msec) # 0.921 CPUs utilized 0 context-switches # 0.000 K/sec 0.007854876 seconds time elapsed Performance counter stats for './a.out 3000 300': 643 page-faults # 0.087 M/sec 649 powerpc:hash_fault # 0.087 M/sec 7.430376 task-clock (msec) # 0.938 CPUs utilized 2,347,174 stalled-cycles-frontend # 0.00% frontend cycles idle 17,524,282 stalled-cycles-backend # 0.00% backend cycles idle 0 context-switches # 0.000 K/sec 0.007920284 seconds time elapsed Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/trace.h | 20 ++++++++++++++++++++ arch/powerpc/mm/hash_utils_64.c | 2 ++ 2 files changed, 22 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index c15da6073cb8..8e86b48d0369 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -144,6 +144,26 @@ TRACE_EVENT_FN(opal_exit, ); #endif +TRACE_EVENT(hash_fault, + + TP_PROTO(unsigned long addr, unsigned long access, unsigned long trap), + TP_ARGS(addr, access, trap), + TP_STRUCT__entry( + __field(unsigned long, addr) + __field(unsigned long, access) + __field(unsigned long, trap) + ), + + TP_fast_assign( + __entry->addr = addr; + __entry->access = access; + __entry->trap = trap; + ), + + TP_printk("hash fault with addr 0x%lx and access = 0x%lx trap = 0x%lx", + __entry->addr, __entry->access, __entry->trap) +); + #endif /* _TRACE_POWERPC_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index a04ca922f0db..5ec987f65b2c 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -57,6 +57,7 @@ #include #include #include +#include #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -1004,6 +1005,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", ea, access, trap); + trace_hash_fault(ea, access, trap); /* Get region & vsid */ switch (REGION_ID(ea)) { -- cgit v1.2.3 From ea30e99e8eccb684490f40d011ea534ecd937e98 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:34:53 +1000 Subject: powerpc/eeh/ioda2: Use device::iommu_group to check IOMMU group This relies on the fact that a PCI device always has an IOMMU table which may not be the case when we get dynamic DMA windows so let's use more reliable check for IOMMU group here. As we do not rely on the table presence here, remove the workaround from pnv_pci_ioda2_set_bypass(); also remove the @add_to_iommu_group parameter from pnv_ioda_setup_bus_dma(). Signed-off-by: Alexey Kardashevskiy Acked-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 4 +--- arch/powerpc/platforms/powernv/pci-ioda.c | 27 +++++---------------------- 2 files changed, 6 insertions(+), 25 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 51dcdf66e9e6..af9b597b10af 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1410,13 +1410,11 @@ static int dev_has_iommu_table(struct device *dev, void *data) { struct pci_dev *pdev = to_pci_dev(dev); struct pci_dev **ppdev = data; - struct iommu_table *tbl; if (!dev) return 0; - tbl = get_iommu_table_base(dev); - if (tbl && tbl->it_group) { + if (dev->iommu_group) { *ppdev = pdev; return 1; } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b2841853550a..5491d2f0e192 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1655,21 +1655,15 @@ static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb, } static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, - struct pci_bus *bus, - bool add_to_iommu_group) + struct pci_bus *bus) { struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { - if (add_to_iommu_group) - set_iommu_table_base_and_group(&dev->dev, - pe->tce32_table); - else - set_iommu_table_base(&dev->dev, pe->tce32_table); + set_iommu_table_base_and_group(&dev->dev, pe->tce32_table); if (dev->subordinate) - pnv_ioda_setup_bus_dma(pe, dev->subordinate, - add_to_iommu_group); + pnv_ioda_setup_bus_dma(pe, dev->subordinate); } } @@ -1846,7 +1840,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) { iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); - pnv_ioda_setup_bus_dma(pe, pe->pbus, true); + pnv_ioda_setup_bus_dma(pe, pe->pbus); } else if (pe->flags & PNV_IODA_PE_VF) { iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); @@ -1883,17 +1877,6 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) window_id, pe->tce_bypass_base, 0); - - /* - * EEH needs the mapping between IOMMU table and group - * of those VFIO/KVM pass-through devices. We can postpone - * resetting DMA ops until the DMA mask is configured in - * host side. - */ - if (pe->pdev) - set_iommu_table_base(&pe->pdev->dev, tbl); - else - pnv_ioda_setup_bus_dma(pe, pe->pbus, false); } if (rc) pe_err(pe, "OPAL error %lld configuring bypass window\n", rc); @@ -1985,7 +1968,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) { iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); - pnv_ioda_setup_bus_dma(pe, pe->pbus, true); + pnv_ioda_setup_bus_dma(pe, pe->pbus); } else if (pe->flags & PNV_IODA_PE_VF) { iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); -- cgit v1.2.3 From 4617082ec049d92a797e8be0b4ba2ba6b59d90d1 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:34:54 +1000 Subject: powerpc/iommu/powernv: Get rid of set_iommu_table_base_and_group The set_iommu_table_base_and_group() name suggests that the function sets table base and add a device to an IOMMU group. The actual purpose for table base setting is to put some reference into a device so later iommu_add_device() can get the IOMMU group reference and the device to the group. At the moment a group cannot be explicitly passed to iommu_add_device() as we want it to work from the bus notifier, we can fix it later and remove confusing calls of set_iommu_table_base(). This replaces set_iommu_table_base_and_group() with a couple of set_iommu_table_base() + iommu_add_device() which makes reading the code easier. This adds few comments why set_iommu_table_base() and iommu_add_device() are called where they are called. For IODA1/2, this essentially removes iommu_add_device() call from the pnv_pci_ioda_dma_dev_setup() as it will always fail at this particular place: - for physical PE, the device is already attached by iommu_add_device() in pnv_pci_ioda_setup_dma_pe(); - for virtual PE, the sysfs entries are not ready to create all symlinks so actual adding is happening in tce_iommu_bus_notifier. Signed-off-by: Alexey Kardashevskiy Reviewed-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 7 ------- arch/powerpc/platforms/powernv/pci-ioda.c | 27 +++++++++++++++++++++++---- arch/powerpc/platforms/powernv/pci-p5ioc2.c | 3 ++- arch/powerpc/platforms/pseries/iommu.c | 15 ++++++++------- 4 files changed, 33 insertions(+), 19 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 1e27d6338565..8353c865db6f 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -140,13 +140,6 @@ static inline int __init tce_iommu_bus_notifier_init(void) } #endif /* !CONFIG_IOMMU_API */ -static inline void set_iommu_table_base_and_group(struct device *dev, - void *base) -{ - set_iommu_table_base(dev, base); - iommu_add_device(dev); -} - extern int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, struct scatterlist *sglist, int nelems, unsigned long mask, diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 5491d2f0e192..fd594b28fce1 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1598,7 +1598,13 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev pe = &phb->ioda.pe_array[pdn->pe_number]; WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); - set_iommu_table_base_and_group(&pdev->dev, pe->tce32_table); + set_iommu_table_base(&pdev->dev, pe->tce32_table); + /* + * Note: iommu_add_device() will fail here as + * for physical PE: the device is already added by now; + * for virtual PE: sysfs entries are not ready yet and + * tce_iommu_bus_notifier will add the device to a group later. + */ } static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) @@ -1660,7 +1666,8 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { - set_iommu_table_base_and_group(&dev->dev, pe->tce32_table); + set_iommu_table_base(&dev->dev, pe->tce32_table); + iommu_add_device(&dev->dev); if (dev->subordinate) pnv_ioda_setup_bus_dma(pe, dev->subordinate); @@ -1836,7 +1843,13 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, if (pe->flags & PNV_IODA_PE_DEV) { iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); - set_iommu_table_base_and_group(&pe->pdev->dev, tbl); + /* + * Setting table base here only for carrying iommu_group + * further down to let iommu_add_device() do the job. + * pnv_pci_ioda_dma_dev_setup will override it later anyway. + */ + set_iommu_table_base(&pe->pdev->dev, tbl); + iommu_add_device(&pe->pdev->dev); } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) { iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); @@ -1964,7 +1977,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, if (pe->flags & PNV_IODA_PE_DEV) { iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); - set_iommu_table_base_and_group(&pe->pdev->dev, tbl); + /* + * Setting table base here only for carrying iommu_group + * further down to let iommu_add_device() do the job. + * pnv_pci_ioda_dma_dev_setup will override it later anyway. + */ + set_iommu_table_base(&pe->pdev->dev, tbl); + iommu_add_device(&pe->pdev->dev); } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) { iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 7f826e8cc1d4..61ae20aa9000 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -92,7 +92,8 @@ static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, pci_domain_nr(phb->hose->bus), phb->opal_id); } - set_iommu_table_base_and_group(&pdev->dev, &phb->p5ioc2.iommu_table); + set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table); + iommu_add_device(&pdev->dev); } static const struct pci_controller_ops pnv_pci_p5ioc2_controller_ops = { diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 61d5a17f45c0..05ab06dccb45 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -688,8 +688,8 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) iommu_table_setparms(phb, dn, tbl); PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); iommu_register_group(tbl, pci_domain_nr(phb->bus), 0); - set_iommu_table_base_and_group(&dev->dev, - PCI_DN(dn)->iommu_table); + set_iommu_table_base(&dev->dev, tbl); + iommu_add_device(&dev->dev); return; } @@ -700,10 +700,10 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) while (dn && PCI_DN(dn) && PCI_DN(dn)->iommu_table == NULL) dn = dn->parent; - if (dn && PCI_DN(dn)) - set_iommu_table_base_and_group(&dev->dev, - PCI_DN(dn)->iommu_table); - else + if (dn && PCI_DN(dn)) { + set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); + iommu_add_device(&dev->dev); + } else printk(KERN_WARNING "iommu: Device %s has no iommu table\n", pci_name(dev)); } @@ -1115,7 +1115,8 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) pr_debug(" found DMA window, table: %p\n", pci->iommu_table); } - set_iommu_table_base_and_group(&dev->dev, pci->iommu_table); + set_iommu_table_base(&dev->dev, pci->iommu_table); + iommu_add_device(&dev->dev); } static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) -- cgit v1.2.3 From c5773822c000bafd462ad02db1d61459acef0d52 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:34:55 +1000 Subject: powerpc/powernv/ioda: Clean up IOMMU group registration The existing code has 3 calls to iommu_register_group() and all 3 branches actually cover all possible cases. This replaces 3 calls with one and moves the registration earlier; the latter will make more sense when we add TCE table sharing. Signed-off-by: Alexey Kardashevskiy Reviewed-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index fd594b28fce1..2b829eb07f87 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1785,6 +1785,9 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, if (WARN_ON(pe->tce32_seg >= 0)) return; + tbl = pe->tce32_table; + iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); + /* Grab a 32-bit TCE table */ pe->tce32_seg = base; pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n", @@ -1819,7 +1822,6 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, } /* Setup linux iommu table */ - tbl = pe->tce32_table; pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs, base << 28, IOMMU_PAGE_SHIFT_4K); @@ -1841,8 +1843,6 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, iommu_init_table(tbl, phb->hose->node); if (pe->flags & PNV_IODA_PE_DEV) { - iommu_register_group(tbl, phb->hose->global_number, - pe->pe_number); /* * Setting table base here only for carrying iommu_group * further down to let iommu_add_device() do the job. @@ -1850,14 +1850,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, */ set_iommu_table_base(&pe->pdev->dev, tbl); iommu_add_device(&pe->pdev->dev); - } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) { - iommu_register_group(tbl, phb->hose->global_number, - pe->pe_number); + } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) pnv_ioda_setup_bus_dma(pe, pe->pbus); - } else if (pe->flags & PNV_IODA_PE_VF) { - iommu_register_group(tbl, phb->hose->global_number, - pe->pe_number); - } return; fail: @@ -1924,6 +1918,9 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, if (WARN_ON(pe->tce32_seg >= 0)) return; + tbl = pe->tce32_table; + iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); + /* The PE will reserve all possible 32-bits space */ pe->tce32_seg = 0; end = (1 << ilog2(phb->ioda.m32_pci_base)); @@ -1955,7 +1952,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, } /* Setup linux iommu table */ - tbl = pe->tce32_table; pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0, IOMMU_PAGE_SHIFT_4K); @@ -1975,8 +1971,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, iommu_init_table(tbl, phb->hose->node); if (pe->flags & PNV_IODA_PE_DEV) { - iommu_register_group(tbl, phb->hose->global_number, - pe->pe_number); /* * Setting table base here only for carrying iommu_group * further down to let iommu_add_device() do the job. @@ -1984,14 +1978,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, */ set_iommu_table_base(&pe->pdev->dev, tbl); iommu_add_device(&pe->pdev->dev); - } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) { - iommu_register_group(tbl, phb->hose->global_number, - pe->pe_number); + } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) pnv_ioda_setup_bus_dma(pe, pe->pbus); - } else if (pe->flags & PNV_IODA_PE_VF) { - iommu_register_group(tbl, phb->hose->global_number, - pe->pe_number); - } /* Also create a bypass window */ if (!pnv_iommu_bypass_disabled) -- cgit v1.2.3 From ac9a58891a965216e9cb549a0a2012b97e3abcce Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:34:56 +1000 Subject: powerpc/iommu: Put IOMMU group explicitly So far an iommu_table lifetime was the same as PE. Dynamic DMA windows will change this and iommu_free_table() will not always require the group to be released. This moves iommu_group_put() out of iommu_free_table(). This adds a iommu_pseries_free_table() helper which does iommu_group_put() and iommu_free_table(). Later it will be changed to receive a table_group and we will have to change less lines then. This should cause no behavioural change. Signed-off-by: Alexey Kardashevskiy Reviewed-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/iommu.c | 7 ------- arch/powerpc/platforms/powernv/pci-ioda.c | 5 +++++ arch/powerpc/platforms/pseries/iommu.c | 16 +++++++++++++++- 3 files changed, 20 insertions(+), 8 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index b054f33ab1fb..3d47eb3e15dd 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -726,13 +726,6 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) if (tbl->it_offset == 0) clear_bit(0, tbl->it_map); -#ifdef CONFIG_IOMMU_API - if (tbl->it_group) { - iommu_group_put(tbl->it_group); - BUG_ON(tbl->it_group); - } -#endif - /* verify that table contains no entries */ if (!bitmap_empty(tbl->it_map, tbl->it_size)) pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2b829eb07f87..8070dc9921f2 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -1310,6 +1311,10 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe if (rc) pe_warn(pe, "OPAL error %ld release DMA window\n", rc); + if (tbl->it_group) { + iommu_group_put(tbl->it_group); + BUG_ON(tbl->it_group); + } iommu_free_table(tbl, of_node_full_name(dev->dev.of_node)); free_pages(addr, get_order(TCE32_TABLE_SIZE)); pe->tce32_table = NULL; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 05ab06dccb45..fe5117bac29a 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,18 @@ #include "pseries.h" +static void iommu_pseries_free_table(struct iommu_table *tbl, + const char *node_name) +{ +#ifdef CONFIG_IOMMU_API + if (tbl->it_group) { + iommu_group_put(tbl->it_group); + BUG_ON(tbl->it_group); + } +#endif + iommu_free_table(tbl, node_name); +} + static void tce_invalidate_pSeries_sw(struct iommu_table *tbl, __be64 *startp, __be64 *endp) { @@ -1271,7 +1284,8 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti */ remove_ddw(np, false); if (pci && pci->iommu_table) - iommu_free_table(pci->iommu_table, np->full_name); + iommu_pseries_free_table(pci->iommu_table, + np->full_name); spin_lock(&direct_window_list_lock); list_for_each_entry(window, &direct_window_list, list) { -- cgit v1.2.3 From 8aca92d82d9f259e0b6b0c81e5eb9baaf471e6ad Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:34:57 +1000 Subject: powerpc/iommu: Always release iommu_table in iommu_free_table() At the moment iommu_free_table() only releases memory if the table was initialized for the platform code use, i.e. it had it_map initialized (which purpose is to track DMA memory space use). With dynamic DMA windows, we will need to be able to release iommu_table even if it was used for VFIO in which case it_map is NULL so does the patch. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/iommu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 3d47eb3e15dd..73eb39a7a9e1 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -713,9 +713,11 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) unsigned long bitmap_sz; unsigned int order; - if (!tbl || !tbl->it_map) { - printk(KERN_ERR "%s: expected TCE map for %s\n", __func__, - node_name); + if (!tbl) + return; + + if (!tbl->it_map) { + kfree(tbl); return; } -- cgit v1.2.3 From 9b14a1ff8657d3ee844f8987482bc367a716848c Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:34:58 +1000 Subject: vfio: powerpc/spapr: Move page pinning from arch code to VFIO IOMMU driver This moves page pinning (get_user_pages_fast()/put_page()) code out of the platform IOMMU code and puts it to VFIO IOMMU driver where it belongs to as the platform code does not deal with page pinning. This makes iommu_take_ownership()/iommu_release_ownership() deal with the IOMMU table bitmap only. This removes page unpinning from iommu_take_ownership() as the actual TCE table might contain garbage and doing put_page() on it is undefined behaviour. Besides the last part, the rest of the patch is mechanical. Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 4 -- arch/powerpc/kernel/iommu.c | 55 ------------------------- drivers/vfio/vfio_iommu_spapr_tce.c | 80 +++++++++++++++++++++++++++++++------ 3 files changed, 67 insertions(+), 72 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 8353c865db6f..e94a5e3c5a2f 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -194,10 +194,6 @@ extern int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, unsigned long hwaddr, enum dma_data_direction direction); extern unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry); -extern int iommu_clear_tces_and_put_pages(struct iommu_table *tbl, - unsigned long entry, unsigned long pages); -extern int iommu_put_tce_user_mode(struct iommu_table *tbl, - unsigned long entry, unsigned long tce); extern void iommu_flush_tce(struct iommu_table *tbl); extern int iommu_take_ownership(struct iommu_table *tbl); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 73eb39a7a9e1..0019c80aa81d 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -986,30 +986,6 @@ unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry) } EXPORT_SYMBOL_GPL(iommu_clear_tce); -int iommu_clear_tces_and_put_pages(struct iommu_table *tbl, - unsigned long entry, unsigned long pages) -{ - unsigned long oldtce; - struct page *page; - - for ( ; pages; --pages, ++entry) { - oldtce = iommu_clear_tce(tbl, entry); - if (!oldtce) - continue; - - page = pfn_to_page(oldtce >> PAGE_SHIFT); - WARN_ON(!page); - if (page) { - if (oldtce & TCE_PCI_WRITE) - SetPageDirty(page); - put_page(page); - } - } - - return 0; -} -EXPORT_SYMBOL_GPL(iommu_clear_tces_and_put_pages); - /* * hwaddr is a kernel virtual address here (0xc... bazillion), * tce_build converts it to a physical address. @@ -1039,35 +1015,6 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, } EXPORT_SYMBOL_GPL(iommu_tce_build); -int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry, - unsigned long tce) -{ - int ret; - struct page *page = NULL; - unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; - enum dma_data_direction direction = iommu_tce_direction(tce); - - ret = get_user_pages_fast(tce & PAGE_MASK, 1, - direction != DMA_TO_DEVICE, &page); - if (unlikely(ret != 1)) { - /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n", - tce, entry << tbl->it_page_shift, ret); */ - return -EFAULT; - } - hwaddr = (unsigned long) page_address(page) + offset; - - ret = iommu_tce_build(tbl, entry, hwaddr, direction); - if (ret) - put_page(page); - - if (ret < 0) - pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n", - __func__, entry << tbl->it_page_shift, tce, ret); - - return ret; -} -EXPORT_SYMBOL_GPL(iommu_put_tce_user_mode); - int iommu_take_ownership(struct iommu_table *tbl) { unsigned long sz = (tbl->it_size + 7) >> 3; @@ -1081,7 +1028,6 @@ int iommu_take_ownership(struct iommu_table *tbl) } memset(tbl->it_map, 0xff, sz); - iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size); /* * Disable iommu bypass, otherwise the user can DMA to all of @@ -1099,7 +1045,6 @@ void iommu_release_ownership(struct iommu_table *tbl) { unsigned long sz = (tbl->it_size + 7) >> 3; - iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size); memset(tbl->it_map, 0, sz); /* Restore bit#0 set by iommu_init_table() */ diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 730b4ef3e0cc..b95fa2b64680 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -147,6 +147,67 @@ static void tce_iommu_release(void *iommu_data) kfree(container); } +static int tce_iommu_clear(struct tce_container *container, + struct iommu_table *tbl, + unsigned long entry, unsigned long pages) +{ + unsigned long oldtce; + struct page *page; + + for ( ; pages; --pages, ++entry) { + oldtce = iommu_clear_tce(tbl, entry); + if (!oldtce) + continue; + + page = pfn_to_page(oldtce >> PAGE_SHIFT); + WARN_ON(!page); + if (page) { + if (oldtce & TCE_PCI_WRITE) + SetPageDirty(page); + put_page(page); + } + } + + return 0; +} + +static long tce_iommu_build(struct tce_container *container, + struct iommu_table *tbl, + unsigned long entry, unsigned long tce, unsigned long pages) +{ + long i, ret = 0; + struct page *page = NULL; + unsigned long hva; + enum dma_data_direction direction = iommu_tce_direction(tce); + + for (i = 0; i < pages; ++i) { + unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; + + ret = get_user_pages_fast(tce & PAGE_MASK, 1, + direction != DMA_TO_DEVICE, &page); + if (unlikely(ret != 1)) { + ret = -EFAULT; + break; + } + hva = (unsigned long) page_address(page) + offset; + + ret = iommu_tce_build(tbl, entry + i, hva, direction); + if (ret) { + put_page(page); + pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n", + __func__, entry << tbl->it_page_shift, + tce, ret); + break; + } + tce += IOMMU_PAGE_SIZE_4K; + } + + if (ret) + tce_iommu_clear(container, tbl, entry, i); + + return ret; +} + static long tce_iommu_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { @@ -195,7 +256,7 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_IOMMU_MAP_DMA: { struct vfio_iommu_type1_dma_map param; struct iommu_table *tbl = container->tbl; - unsigned long tce, i; + unsigned long tce; if (!tbl) return -ENXIO; @@ -229,17 +290,9 @@ static long tce_iommu_ioctl(void *iommu_data, if (ret) return ret; - for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT_4K); ++i) { - ret = iommu_put_tce_user_mode(tbl, - (param.iova >> IOMMU_PAGE_SHIFT_4K) + i, - tce); - if (ret) - break; - tce += IOMMU_PAGE_SIZE_4K; - } - if (ret) - iommu_clear_tces_and_put_pages(tbl, - param.iova >> IOMMU_PAGE_SHIFT_4K, i); + ret = tce_iommu_build(container, tbl, + param.iova >> IOMMU_PAGE_SHIFT_4K, + tce, param.size >> IOMMU_PAGE_SHIFT_4K); iommu_flush_tce(tbl); @@ -273,7 +326,7 @@ static long tce_iommu_ioctl(void *iommu_data, if (ret) return ret; - ret = iommu_clear_tces_and_put_pages(tbl, + ret = tce_iommu_clear(container, tbl, param.iova >> IOMMU_PAGE_SHIFT_4K, param.size >> IOMMU_PAGE_SHIFT_4K); iommu_flush_tce(tbl); @@ -357,6 +410,7 @@ static void tce_iommu_detach_group(void *iommu_data, /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n", iommu_group_id(iommu_group), iommu_group); */ container->tbl = NULL; + tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); iommu_release_ownership(tbl); } mutex_unlock(&container->lock); -- cgit v1.2.3 From 10b35b2b7485c342334a48cf199063eed8b8748e Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:05 +1000 Subject: powerpc/powernv: Do not set "read" flag if direction==DMA_NONE Normally a bitmap from the iommu_table is used to track what TCE entry is in use. Since we are going to use iommu_table without its locks and do xchg() instead, it becomes essential not to put bits which are not implied in the direction flag as the old TCE value (more precisely - the permission bits) will be used to decide whether to put the page or not. This adds iommu_direction_to_tce_perm() (its counterpart is there already) and uses it for powernv's pnv_tce_build(). Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 1 + arch/powerpc/kernel/iommu.c | 15 +++++++++++++++ arch/powerpc/platforms/powernv/pci.c | 7 +------ 3 files changed, 17 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index e94a5e3c5a2f..d91bd69d3196 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -200,6 +200,7 @@ extern int iommu_take_ownership(struct iommu_table *tbl); extern void iommu_release_ownership(struct iommu_table *tbl); extern enum dma_data_direction iommu_tce_direction(unsigned long tce); +extern unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir); #endif /* __KERNEL__ */ #endif /* _ASM_IOMMU_H */ diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 0019c80aa81d..ac2f959adf9a 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -866,6 +866,21 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, } } +unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir) +{ + switch (dir) { + case DMA_BIDIRECTIONAL: + return TCE_PCI_READ | TCE_PCI_WRITE; + case DMA_FROM_DEVICE: + return TCE_PCI_WRITE; + case DMA_TO_DEVICE: + return TCE_PCI_READ; + default: + return 0; + } +} +EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm); + #ifdef CONFIG_IOMMU_API /* * SPAPR TCE API diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 3c35487e9778..1477422b907a 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -576,15 +576,10 @@ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs, bool rm) { - u64 proto_tce; + u64 proto_tce = iommu_direction_to_tce_perm(direction); __be64 *tcep, *tces; u64 rpn; - proto_tce = TCE_PCI_READ; // Read allowed - - if (direction != DMA_TO_DEVICE) - proto_tce |= TCE_PCI_WRITE; - tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; rpn = __pa(uaddr) >> tbl->it_page_shift; -- cgit v1.2.3 From da004c3600f52e4f05017f60970e5010978006bc Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:06 +1000 Subject: powerpc/iommu: Move tce_xxx callbacks from ppc_md to iommu_table This adds a iommu_table_ops struct and puts pointer to it into the iommu_table struct. This moves tce_build/tce_free/tce_get/tce_flush callbacks from ppc_md to the new struct where they really belong to. This adds the requirement for @it_ops to be initialized before calling iommu_init_table() to make sure that we do not leave any IOMMU table with iommu_table_ops uninitialized. This is not a parameter of iommu_init_table() though as there will be cases when iommu_init_table() will not be called on TCE tables, for example - VFIO. This does s/tce_build/set/, s/tce_free/clear/ and removes "tce_" redundant prefixes. This removes tce_xxx_rm handlers from ppc_md but does not add them to iommu_table_ops as this will be done later if we decide to support TCE hypercalls in real mode. This removes _vm callbacks as only virtual mode is supported by now so this also removes @rm parameter. For pSeries, this always uses tce_buildmulti_pSeriesLP/ tce_buildmulti_pSeriesLP. This changes multi callback to fall back to tce_build_pSeriesLP/tce_free_pSeriesLP if FW_FEATURE_MULTITCE is not present. The reason for this is we still have to support "multitce=off" boot parameter in disable_multitce() and we do not want to walk through all IOMMU tables in the system and replace "multi" callbacks with single ones. For powernv, this defines _ops per PHB type which are P5IOC2/IODA1/IODA2. This makes the callbacks for them public. Later patches will extend callbacks for IODA1/2. No change in behaviour is expected. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 17 +++++++++++ arch/powerpc/include/asm/machdep.h | 25 --------------- arch/powerpc/kernel/iommu.c | 46 ++++++++++++++-------------- arch/powerpc/kernel/vio.c | 5 +++ arch/powerpc/platforms/cell/iommu.c | 8 +++-- arch/powerpc/platforms/pasemi/iommu.c | 7 +++-- arch/powerpc/platforms/powernv/pci-ioda.c | 14 +++++++++ arch/powerpc/platforms/powernv/pci-p5ioc2.c | 7 +++++ arch/powerpc/platforms/powernv/pci.c | 47 +++++------------------------ arch/powerpc/platforms/powernv/pci.h | 5 +++ arch/powerpc/platforms/pseries/iommu.c | 34 ++++++++++++--------- arch/powerpc/sysdev/dart_iommu.c | 12 +++++--- 12 files changed, 116 insertions(+), 111 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index d91bd69d3196..e2a45c37dba8 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -44,6 +44,22 @@ extern int iommu_is_off; extern int iommu_force_on; +struct iommu_table_ops { + int (*set)(struct iommu_table *tbl, + long index, long npages, + unsigned long uaddr, + enum dma_data_direction direction, + struct dma_attrs *attrs); + void (*clear)(struct iommu_table *tbl, + long index, long npages); + unsigned long (*get)(struct iommu_table *tbl, long index); + void (*flush)(struct iommu_table *tbl); +}; + +/* These are used by VIO */ +extern struct iommu_table_ops iommu_table_lpar_multi_ops; +extern struct iommu_table_ops iommu_table_pseries_ops; + /* * IOMAP_MAX_ORDER defines the largest contiguous block * of dma space we can get. IOMAP_MAX_ORDER = 13 @@ -78,6 +94,7 @@ struct iommu_table { #ifdef CONFIG_IOMMU_API struct iommu_group *it_group; #endif + struct iommu_table_ops *it_ops; void (*set_bypass)(struct iommu_table *tbl, bool enable); #ifdef CONFIG_PPC_POWERNV void *data; diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 0d387d0570cd..952579f5e79a 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -65,31 +65,6 @@ struct machdep_calls { * destroyed as well */ void (*hpte_clear_all)(void); - int (*tce_build)(struct iommu_table *tbl, - long index, - long npages, - unsigned long uaddr, - enum dma_data_direction direction, - struct dma_attrs *attrs); - void (*tce_free)(struct iommu_table *tbl, - long index, - long npages); - unsigned long (*tce_get)(struct iommu_table *tbl, - long index); - void (*tce_flush)(struct iommu_table *tbl); - - /* _rm versions are for real mode use only */ - int (*tce_build_rm)(struct iommu_table *tbl, - long index, - long npages, - unsigned long uaddr, - enum dma_data_direction direction, - struct dma_attrs *attrs); - void (*tce_free_rm)(struct iommu_table *tbl, - long index, - long npages); - void (*tce_flush_rm)(struct iommu_table *tbl); - void __iomem * (*ioremap)(phys_addr_t addr, unsigned long size, unsigned long flags, void *caller); void (*iounmap)(volatile void __iomem *token); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index ac2f959adf9a..c0e67e945c95 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -322,11 +322,11 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, ret = entry << tbl->it_page_shift; /* Set the return dma address */ /* Put the TCEs in the HW table */ - build_fail = ppc_md.tce_build(tbl, entry, npages, + build_fail = tbl->it_ops->set(tbl, entry, npages, (unsigned long)page & IOMMU_PAGE_MASK(tbl), direction, attrs); - /* ppc_md.tce_build() only returns non-zero for transient errors. + /* tbl->it_ops->set() only returns non-zero for transient errors. * Clean up the table bitmap in this case and return * DMA_ERROR_CODE. For all other errors the functionality is * not altered. @@ -337,8 +337,8 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, } /* Flush/invalidate TLB caches if necessary */ - if (ppc_md.tce_flush) - ppc_md.tce_flush(tbl); + if (tbl->it_ops->flush) + tbl->it_ops->flush(tbl); /* Make sure updates are seen by hardware */ mb(); @@ -408,7 +408,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, if (!iommu_free_check(tbl, dma_addr, npages)) return; - ppc_md.tce_free(tbl, entry, npages); + tbl->it_ops->clear(tbl, entry, npages); spin_lock_irqsave(&(pool->lock), flags); bitmap_clear(tbl->it_map, free_entry, npages); @@ -424,8 +424,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, * not do an mb() here on purpose, it is not needed on any of * the current platforms. */ - if (ppc_md.tce_flush) - ppc_md.tce_flush(tbl); + if (tbl->it_ops->flush) + tbl->it_ops->flush(tbl); } int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, @@ -495,7 +495,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, npages, entry, dma_addr); /* Insert into HW table */ - build_fail = ppc_md.tce_build(tbl, entry, npages, + build_fail = tbl->it_ops->set(tbl, entry, npages, vaddr & IOMMU_PAGE_MASK(tbl), direction, attrs); if(unlikely(build_fail)) @@ -534,8 +534,8 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, } /* Flush/invalidate TLB caches if necessary */ - if (ppc_md.tce_flush) - ppc_md.tce_flush(tbl); + if (tbl->it_ops->flush) + tbl->it_ops->flush(tbl); DBG("mapped %d elements:\n", outcount); @@ -600,8 +600,8 @@ void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, * do not do an mb() here, the affected platforms do not need it * when freeing. */ - if (ppc_md.tce_flush) - ppc_md.tce_flush(tbl); + if (tbl->it_ops->flush) + tbl->it_ops->flush(tbl); } static void iommu_table_clear(struct iommu_table *tbl) @@ -613,17 +613,17 @@ static void iommu_table_clear(struct iommu_table *tbl) */ if (!is_kdump_kernel() || is_fadump_active()) { /* Clear the table in case firmware left allocations in it */ - ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size); + tbl->it_ops->clear(tbl, tbl->it_offset, tbl->it_size); return; } #ifdef CONFIG_CRASH_DUMP - if (ppc_md.tce_get) { + if (tbl->it_ops->get) { unsigned long index, tceval, tcecount = 0; /* Reserve the existing mappings left by the first kernel. */ for (index = 0; index < tbl->it_size; index++) { - tceval = ppc_md.tce_get(tbl, index + tbl->it_offset); + tceval = tbl->it_ops->get(tbl, index + tbl->it_offset); /* * Freed TCE entry contains 0x7fffffffffffffff on JS20 */ @@ -657,6 +657,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) unsigned int i; struct iommu_pool *p; + BUG_ON(!tbl->it_ops); + /* number of bytes needed for the bitmap */ sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); @@ -929,8 +931,8 @@ EXPORT_SYMBOL_GPL(iommu_tce_direction); void iommu_flush_tce(struct iommu_table *tbl) { /* Flush/invalidate TLB caches if necessary */ - if (ppc_md.tce_flush) - ppc_md.tce_flush(tbl); + if (tbl->it_ops->flush) + tbl->it_ops->flush(tbl); /* Make sure updates are seen by hardware */ mb(); @@ -941,7 +943,7 @@ int iommu_tce_clear_param_check(struct iommu_table *tbl, unsigned long ioba, unsigned long tce_value, unsigned long npages) { - /* ppc_md.tce_free() does not support any value but 0 */ + /* tbl->it_ops->clear() does not support any value but 0 */ if (tce_value) return -EINVAL; @@ -989,9 +991,9 @@ unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry) spin_lock(&(pool->lock)); - oldtce = ppc_md.tce_get(tbl, entry); + oldtce = tbl->it_ops->get(tbl, entry); if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)) - ppc_md.tce_free(tbl, entry, 1); + tbl->it_ops->clear(tbl, entry, 1); else oldtce = 0; @@ -1014,10 +1016,10 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, spin_lock(&(pool->lock)); - oldtce = ppc_md.tce_get(tbl, entry); + oldtce = tbl->it_ops->get(tbl, entry); /* Add new entry if it is not busy */ if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))) - ret = ppc_md.tce_build(tbl, entry, 1, hwaddr, direction, NULL); + ret = tbl->it_ops->set(tbl, entry, 1, hwaddr, direction, NULL); spin_unlock(&(pool->lock)); diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 5bfdab9047be..b41426c60ef6 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1196,6 +1196,11 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) tbl->it_type = TCE_VB; tbl->it_blocksize = 16; + if (firmware_has_feature(FW_FEATURE_LPAR)) + tbl->it_ops = &iommu_table_lpar_multi_ops; + else + tbl->it_ops = &iommu_table_pseries_ops; + return iommu_init_table(tbl, -1); } diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 21b502398bf3..14a582b21274 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -466,6 +466,11 @@ static inline u32 cell_iommu_get_ioid(struct device_node *np) return *ioid; } +static struct iommu_table_ops cell_iommu_ops = { + .set = tce_build_cell, + .clear = tce_free_cell +}; + static struct iommu_window * __init cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np, unsigned long offset, unsigned long size, @@ -492,6 +497,7 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np, window->table.it_offset = (offset >> window->table.it_page_shift) + pte_offset; window->table.it_size = size >> window->table.it_page_shift; + window->table.it_ops = &cell_iommu_ops; iommu_init_table(&window->table, iommu->nid); @@ -1201,8 +1207,6 @@ static int __init cell_iommu_init(void) /* Setup various callbacks */ cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup; ppc_md.dma_get_required_mask = cell_dma_get_required_mask; - ppc_md.tce_build = tce_build_cell; - ppc_md.tce_free = tce_free_cell; if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0) goto bail; diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index b8f567b2ea19..c929644e74a6 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -134,6 +134,10 @@ static void iobmap_free(struct iommu_table *tbl, long index, } } +static struct iommu_table_ops iommu_table_iobmap_ops = { + .set = iobmap_build, + .clear = iobmap_free +}; static void iommu_table_iobmap_setup(void) { @@ -153,6 +157,7 @@ static void iommu_table_iobmap_setup(void) * Should probably be 8 (64 bytes) */ iommu_table_iobmap.it_blocksize = 4; + iommu_table_iobmap.it_ops = &iommu_table_iobmap_ops; iommu_init_table(&iommu_table_iobmap, 0); pr_debug(" <- %s\n", __func__); } @@ -252,8 +257,6 @@ void __init iommu_init_early_pasemi(void) pasemi_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pasemi; pasemi_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pasemi; - ppc_md.tce_build = iobmap_build; - ppc_md.tce_free = iobmap_free; set_pci_dma_ops(&dma_iommu_ops); } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 8070dc9921f2..b0a14c0e7774 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1726,6 +1726,12 @@ static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, */ } +static struct iommu_table_ops pnv_ioda1_iommu_ops = { + .set = pnv_tce_build, + .clear = pnv_tce_free, + .get = pnv_tce_get, +}; + static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, struct iommu_table *tbl, __be64 *startp, __be64 *endp, bool rm) @@ -1770,6 +1776,12 @@ void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm); } +static struct iommu_table_ops pnv_ioda2_iommu_ops = { + .set = pnv_tce_build, + .clear = pnv_tce_free, + .get = pnv_tce_get, +}; + static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe, unsigned int base, unsigned int segs) @@ -1845,6 +1857,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, TCE_PCI_SWINV_FREE | TCE_PCI_SWINV_PAIR); } + tbl->it_ops = &pnv_ioda1_iommu_ops; iommu_init_table(tbl, phb->hose->node); if (pe->flags & PNV_IODA_PE_DEV) { @@ -1973,6 +1986,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, 8); tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); } + tbl->it_ops = &pnv_ioda2_iommu_ops; iommu_init_table(tbl, phb->hose->node); if (pe->flags & PNV_IODA_PE_DEV) { diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 61ae20aa9000..5a387a9132ff 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -83,10 +83,17 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { } #endif /* CONFIG_PCI_MSI */ +static struct iommu_table_ops pnv_p5ioc2_iommu_ops = { + .set = pnv_tce_build, + .clear = pnv_tce_free, + .get = pnv_tce_get, +}; + static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) { if (phb->p5ioc2.iommu_table.it_map == NULL) { + phb->p5ioc2.iommu_table.it_ops = &pnv_p5ioc2_iommu_ops; iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node); iommu_register_group(&phb->p5ioc2.iommu_table, pci_domain_nr(phb->hose->bus), phb->opal_id); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 1477422b907a..2793b9d576e3 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -572,9 +572,9 @@ struct pci_ops pnv_pci_ops = { .write = pnv_pci_write_config, }; -static int pnv_tce_build(struct iommu_table *tbl, long index, long npages, - unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs, bool rm) +int pnv_tce_build(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, enum dma_data_direction direction, + struct dma_attrs *attrs) { u64 proto_tce = iommu_direction_to_tce_perm(direction); __be64 *tcep, *tces; @@ -592,22 +592,12 @@ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages, * of flags if that becomes the case */ if (tbl->it_type & TCE_PCI_SWINV_CREATE) - pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm); + pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, false); return 0; } -static int pnv_tce_build_vm(struct iommu_table *tbl, long index, long npages, - unsigned long uaddr, - enum dma_data_direction direction, - struct dma_attrs *attrs) -{ - return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs, - false); -} - -static void pnv_tce_free(struct iommu_table *tbl, long index, long npages, - bool rm) +void pnv_tce_free(struct iommu_table *tbl, long index, long npages) { __be64 *tcep, *tces; @@ -617,32 +607,14 @@ static void pnv_tce_free(struct iommu_table *tbl, long index, long npages, *(tcep++) = cpu_to_be64(0); if (tbl->it_type & TCE_PCI_SWINV_FREE) - pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm); -} - -static void pnv_tce_free_vm(struct iommu_table *tbl, long index, long npages) -{ - pnv_tce_free(tbl, index, npages, false); + pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, false); } -static unsigned long pnv_tce_get(struct iommu_table *tbl, long index) +unsigned long pnv_tce_get(struct iommu_table *tbl, long index) { return ((u64 *)tbl->it_base)[index - tbl->it_offset]; } -static int pnv_tce_build_rm(struct iommu_table *tbl, long index, long npages, - unsigned long uaddr, - enum dma_data_direction direction, - struct dma_attrs *attrs) -{ - return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs, true); -} - -static void pnv_tce_free_rm(struct iommu_table *tbl, long index, long npages) -{ - pnv_tce_free(tbl, index, npages, true); -} - void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset, unsigned page_shift) @@ -744,11 +716,6 @@ void __init pnv_pci_init(void) pci_devs_phb_init(); /* Configure IOMMU DMA hooks */ - ppc_md.tce_build = pnv_tce_build_vm; - ppc_md.tce_free = pnv_tce_free_vm; - ppc_md.tce_build_rm = pnv_tce_build_rm; - ppc_md.tce_free_rm = pnv_tce_free_rm; - ppc_md.tce_get = pnv_tce_get; set_pci_dma_ops(&dma_iommu_ops); } diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 714ee3c19854..45a756007ec3 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -197,6 +197,11 @@ struct pnv_phb { }; extern struct pci_ops pnv_pci_ops; +extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, enum dma_data_direction direction, + struct dma_attrs *attrs); +extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages); +extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index); void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, unsigned char *log_buff); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index fe5117bac29a..33f3a855bfd9 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -206,7 +206,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, int ret = 0; unsigned long flags; - if (npages == 1) { + if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) { return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, direction, attrs); } @@ -298,6 +298,9 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n { u64 rc; + if (!firmware_has_feature(FW_FEATURE_MULTITCE)) + return tce_free_pSeriesLP(tbl, tcenum, npages); + rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages); if (rc && printk_ratelimit()) { @@ -473,7 +476,6 @@ static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn, return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg); } - #ifdef CONFIG_PCI static void iommu_table_setparms(struct pci_controller *phb, struct device_node *dn, @@ -559,6 +561,12 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb, tbl->it_size = size >> tbl->it_page_shift; } +struct iommu_table_ops iommu_table_pseries_ops = { + .set = tce_build_pSeries, + .clear = tce_free_pSeries, + .get = tce_get_pseries +}; + static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) { struct device_node *dn; @@ -627,6 +635,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) pci->phb->node); iommu_table_setparms(pci->phb, dn, tbl); + tbl->it_ops = &iommu_table_pseries_ops; pci->iommu_table = iommu_init_table(tbl, pci->phb->node); iommu_register_group(tbl, pci_domain_nr(bus), 0); @@ -638,6 +647,11 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size); } +struct iommu_table_ops iommu_table_lpar_multi_ops = { + .set = tce_buildmulti_pSeriesLP, + .clear = tce_freemulti_pSeriesLP, + .get = tce_get_pSeriesLP +}; static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) { @@ -672,6 +686,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, ppci->phb->node); iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window); + tbl->it_ops = &iommu_table_lpar_multi_ops; ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node); iommu_register_group(tbl, pci_domain_nr(bus), 0); pr_debug(" created table: %p\n", ppci->iommu_table); @@ -699,6 +714,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, phb->node); iommu_table_setparms(phb, dn, tbl); + tbl->it_ops = &iommu_table_pseries_ops; PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); iommu_register_group(tbl, pci_domain_nr(phb->bus), 0); set_iommu_table_base(&dev->dev, tbl); @@ -1121,6 +1137,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, pci->phb->node); iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window); + tbl->it_ops = &iommu_table_lpar_multi_ops; pci->iommu_table = iommu_init_table(tbl, pci->phb->node); iommu_register_group(tbl, pci_domain_nr(pci->phb->bus), 0); pr_debug(" created table: %p\n", pci->iommu_table); @@ -1315,22 +1332,11 @@ void iommu_init_early_pSeries(void) return; if (firmware_has_feature(FW_FEATURE_LPAR)) { - if (firmware_has_feature(FW_FEATURE_MULTITCE)) { - ppc_md.tce_build = tce_buildmulti_pSeriesLP; - ppc_md.tce_free = tce_freemulti_pSeriesLP; - } else { - ppc_md.tce_build = tce_build_pSeriesLP; - ppc_md.tce_free = tce_free_pSeriesLP; - } - ppc_md.tce_get = tce_get_pSeriesLP; pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP; pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP; ppc_md.dma_set_mask = dma_set_mask_pSeriesLP; ppc_md.dma_get_required_mask = dma_get_required_mask_pSeriesLP; } else { - ppc_md.tce_build = tce_build_pSeries; - ppc_md.tce_free = tce_free_pSeries; - ppc_md.tce_get = tce_get_pseries; pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries; pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries; } @@ -1348,8 +1354,6 @@ static int __init disable_multitce(char *str) firmware_has_feature(FW_FEATURE_LPAR) && firmware_has_feature(FW_FEATURE_MULTITCE)) { printk(KERN_INFO "Disabling MULTITCE firmware feature\n"); - ppc_md.tce_build = tce_build_pSeriesLP; - ppc_md.tce_free = tce_free_pSeriesLP; powerpc_firmware_features &= ~FW_FEATURE_MULTITCE; } return 1; diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index d00a5663e312..90bcdfeedf48 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -286,6 +286,12 @@ static int __init dart_init(struct device_node *dart_node) return 0; } +static struct iommu_table_ops iommu_dart_ops = { + .set = dart_build, + .clear = dart_free, + .flush = dart_flush, +}; + static void iommu_table_dart_setup(void) { iommu_table_dart.it_busno = 0; @@ -298,6 +304,7 @@ static void iommu_table_dart_setup(void) iommu_table_dart.it_base = (unsigned long)dart_vbase; iommu_table_dart.it_index = 0; iommu_table_dart.it_blocksize = 1; + iommu_table_dart.it_ops = &iommu_dart_ops; iommu_init_table(&iommu_table_dart, -1); /* Reserve the last page of the DART to avoid possible prefetch @@ -386,11 +393,6 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) if (dart_init(dn) != 0) goto bail; - /* Setup low level TCE operations for the core IOMMU code */ - ppc_md.tce_build = dart_build; - ppc_md.tce_free = dart_free; - ppc_md.tce_flush = dart_flush; - /* Setup bypass if supported */ if (dart_is_u4) ppc_md.dma_set_mask = dart_dma_set_mask; -- cgit v1.2.3 From decbda25728ddfbb28b77749d2545028e892ca99 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:07 +1000 Subject: powerpc/powernv/ioda/ioda2: Rework TCE invalidation in tce_build()/tce_free() The pnv_pci_ioda_tce_invalidate() helper invalidates TCE cache. It is supposed to be called on IODA1/2 and not called on p5ioc2. It receives start and end host addresses of TCE table. IODA2 actually needs PCI addresses to invalidate the cache. Those can be calculated from host addresses but since we are going to implement multi-level TCE tables, calculating PCI address from a host address might get either tricky or ugly as TCE table remains flat on PCI bus but not in RAM. This moves pnv_pci_ioda_tce_invalidate() from generic pnv_tce_build/ pnt_tce_free and defines IODA1/2-specific callbacks which call generic ones and do PHB-model-specific TCE cache invalidation. P5IOC2 keeps using generic callbacks as before. This changes pnv_pci_ioda2_tce_invalidate() to receives TCE index and number of pages which are PCI addresses shifted by IOMMU page shift. No change in behaviour is expected. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 81 ++++++++++++++++++++++--------- arch/powerpc/platforms/powernv/pci.c | 17 ++----- 2 files changed, 61 insertions(+), 37 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b0a14c0e7774..f710729a4a35 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1679,18 +1679,19 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, } } -static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, - struct iommu_table *tbl, - __be64 *startp, __be64 *endp, bool rm) +static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, + unsigned long index, unsigned long npages, bool rm) { + struct pnv_ioda_pe *pe = tbl->data; __be64 __iomem *invalidate = rm ? (__be64 __iomem *)pe->tce_inval_reg_phys : (__be64 __iomem *)tbl->it_index; unsigned long start, end, inc; const unsigned shift = tbl->it_page_shift; - start = __pa(startp); - end = __pa(endp); + start = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset); + end = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset + + npages - 1); /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */ if (tbl->it_busno) { @@ -1726,16 +1727,39 @@ static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, */ } +static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index, + long npages, unsigned long uaddr, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + int ret = pnv_tce_build(tbl, index, npages, uaddr, direction, + attrs); + + if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE)) + pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false); + + return ret; +} + +static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index, + long npages) +{ + pnv_tce_free(tbl, index, npages); + + if (tbl->it_type & TCE_PCI_SWINV_FREE) + pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false); +} + static struct iommu_table_ops pnv_ioda1_iommu_ops = { - .set = pnv_tce_build, - .clear = pnv_tce_free, + .set = pnv_ioda1_tce_build, + .clear = pnv_ioda1_tce_free, .get = pnv_tce_get, }; -static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, - struct iommu_table *tbl, - __be64 *startp, __be64 *endp, bool rm) +static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, + unsigned long index, unsigned long npages, bool rm) { + struct pnv_ioda_pe *pe = tbl->data; unsigned long start, end, inc; __be64 __iomem *invalidate = rm ? (__be64 __iomem *)pe->tce_inval_reg_phys : @@ -1748,10 +1772,8 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, end = start; /* Figure out the start, end and step */ - inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64)); - start |= (inc << shift); - inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64)); - end |= (inc << shift); + start |= (index << shift); + end |= ((index + npages - 1) << shift); inc = (0x1ull << shift); mb(); @@ -1764,21 +1786,32 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, } } -void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, - __be64 *startp, __be64 *endp, bool rm) +static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, + long npages, unsigned long uaddr, + enum dma_data_direction direction, + struct dma_attrs *attrs) { - struct pnv_ioda_pe *pe = tbl->data; - struct pnv_phb *phb = pe->phb; + int ret = pnv_tce_build(tbl, index, npages, uaddr, direction, + attrs); - if (phb->type == PNV_PHB_IODA1) - pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm); - else - pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm); + if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE)) + pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false); + + return ret; +} + +static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, + long npages) +{ + pnv_tce_free(tbl, index, npages); + + if (tbl->it_type & TCE_PCI_SWINV_FREE) + pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false); } static struct iommu_table_ops pnv_ioda2_iommu_ops = { - .set = pnv_tce_build, - .clear = pnv_tce_free, + .set = pnv_ioda2_tce_build, + .clear = pnv_ioda2_tce_free, .get = pnv_tce_get, }; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 2793b9d576e3..f72fc6e7d63d 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -577,37 +577,28 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages, struct dma_attrs *attrs) { u64 proto_tce = iommu_direction_to_tce_perm(direction); - __be64 *tcep, *tces; + __be64 *tcep; u64 rpn; - tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; + tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; rpn = __pa(uaddr) >> tbl->it_page_shift; while (npages--) *(tcep++) = cpu_to_be64(proto_tce | (rpn++ << tbl->it_page_shift)); - /* Some implementations won't cache invalid TCEs and thus may not - * need that flush. We'll probably turn it_type into a bit mask - * of flags if that becomes the case - */ - if (tbl->it_type & TCE_PCI_SWINV_CREATE) - pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, false); return 0; } void pnv_tce_free(struct iommu_table *tbl, long index, long npages) { - __be64 *tcep, *tces; + __be64 *tcep; - tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; + tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; while (npages--) *(tcep++) = cpu_to_be64(0); - - if (tbl->it_type & TCE_PCI_SWINV_FREE) - pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, false); } unsigned long pnv_tce_get(struct iommu_table *tbl, long index) -- cgit v1.2.3 From b348aa65297659c310943221ac1d3f4b4491ea44 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:08 +1000 Subject: powerpc/spapr: vfio: Replace iommu_table with iommu_table_group Modern IBM POWERPC systems support multiple (currently two) TCE tables per IOMMU group (a.k.a. PE). This adds a iommu_table_group container for TCE tables. Right now just one table is supported. This defines iommu_table_group struct which stores pointers to iommu_group and iommu_table(s). This replaces iommu_table with iommu_table_group where iommu_table was used to identify a group: - iommu_register_group(); - iommudata of generic iommu_group; This removes @data from iommu_table as it_table_group provides same access to pnv_ioda_pe. For IODA, instead of embedding iommu_table, the new iommu_table_group keeps pointers to those. The iommu_table structs are allocated dynamically. For P5IOC2, both iommu_table_group and iommu_table are embedded into PE struct. As there is no EEH and SRIOV support for P5IOC2, iommu_free_table() should not be called on iommu_table struct pointers so we can keep it embedded in pnv_phb::p5ioc2. For pSeries, this replaces multiple calls of kzalloc_node() with a new iommu_pseries_alloc_group() helper and stores the table group struct pointer into the pci_dn struct. For release, a iommu_table_free_group() helper is added. This moves iommu_table struct allocation from SR-IOV code to the generic DMA initialization code in pnv_pci_ioda_setup_dma_pe and pnv_pci_ioda2_setup_dma_pe as this is where DMA is actually initialized. This change is here because those lines had to be changed anyway. This should cause no behavioural change. Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 19 ++--- arch/powerpc/include/asm/pci-bridge.h | 2 +- arch/powerpc/kernel/iommu.c | 17 ++--- arch/powerpc/platforms/powernv/pci-ioda.c | 55 +++++++------- arch/powerpc/platforms/powernv/pci-p5ioc2.c | 18 +++-- arch/powerpc/platforms/powernv/pci.h | 3 +- arch/powerpc/platforms/pseries/iommu.c | 107 +++++++++++++++++++--------- drivers/vfio/vfio_iommu_spapr_tce.c | 23 +++--- 8 files changed, 152 insertions(+), 92 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index e2a45c37dba8..5a7267f47bdb 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -91,14 +91,9 @@ struct iommu_table { struct iommu_pool pools[IOMMU_NR_POOLS]; unsigned long *it_map; /* A simple allocation bitmap for now */ unsigned long it_page_shift;/* table iommu page size */ -#ifdef CONFIG_IOMMU_API - struct iommu_group *it_group; -#endif + struct iommu_table_group *it_table_group; struct iommu_table_ops *it_ops; void (*set_bypass)(struct iommu_table *tbl, bool enable); -#ifdef CONFIG_PPC_POWERNV - void *data; -#endif }; /* Pure 2^n version of get_order */ @@ -129,14 +124,22 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); */ extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, int nid); +#define IOMMU_TABLE_GROUP_MAX_TABLES 1 + +struct iommu_table_group { + struct iommu_group *group; + struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; +}; + #ifdef CONFIG_IOMMU_API -extern void iommu_register_group(struct iommu_table *tbl, + +extern void iommu_register_group(struct iommu_table_group *table_group, int pci_domain_number, unsigned long pe_num); extern int iommu_add_device(struct device *dev); extern void iommu_del_device(struct device *dev); extern int __init tce_iommu_bus_notifier_init(void); #else -static inline void iommu_register_group(struct iommu_table *tbl, +static inline void iommu_register_group(struct iommu_table_group *table_group, int pci_domain_number, unsigned long pe_num) { diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index b76cd5682a8c..712add590445 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -199,7 +199,7 @@ struct pci_dn { struct pci_dn *parent; struct pci_controller *phb; /* for pci devices */ - struct iommu_table *iommu_table; /* for phb's or bridges */ + struct iommu_table_group *table_group; /* for phb's or bridges */ struct device_node *node; /* back-pointer to the device_node */ int pci_ext_config_space; /* for pci devices */ diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index c0e67e945c95..719f0485ec79 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -889,11 +889,12 @@ EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm); */ static void group_release(void *iommu_data) { - struct iommu_table *tbl = iommu_data; - tbl->it_group = NULL; + struct iommu_table_group *table_group = iommu_data; + + table_group->group = NULL; } -void iommu_register_group(struct iommu_table *tbl, +void iommu_register_group(struct iommu_table_group *table_group, int pci_domain_number, unsigned long pe_num) { struct iommu_group *grp; @@ -905,8 +906,8 @@ void iommu_register_group(struct iommu_table *tbl, PTR_ERR(grp)); return; } - tbl->it_group = grp; - iommu_group_set_iommudata(grp, tbl, group_release); + table_group->group = grp; + iommu_group_set_iommudata(grp, table_group, group_release); name = kasprintf(GFP_KERNEL, "domain%d-pe%lx", pci_domain_number, pe_num); if (!name) @@ -1094,7 +1095,7 @@ int iommu_add_device(struct device *dev) } tbl = get_iommu_table_base(dev); - if (!tbl || !tbl->it_group) { + if (!tbl || !tbl->it_table_group || !tbl->it_table_group->group) { pr_debug("%s: Skipping device %s with no tbl\n", __func__, dev_name(dev)); return 0; @@ -1102,7 +1103,7 @@ int iommu_add_device(struct device *dev) pr_debug("%s: Adding %s to iommu group %d\n", __func__, dev_name(dev), - iommu_group_id(tbl->it_group)); + iommu_group_id(tbl->it_table_group->group)); if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) { pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n", @@ -1111,7 +1112,7 @@ int iommu_add_device(struct device *dev) return -EINVAL; } - return iommu_group_add_device(tbl->it_group, dev); + return iommu_group_add_device(tbl->it_table_group->group, dev); } EXPORT_SYMBOL_GPL(iommu_add_device); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index f710729a4a35..279dadf43d5a 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1087,10 +1087,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) return; } - pe->tce32_table = kzalloc_node(sizeof(struct iommu_table), - GFP_KERNEL, hose->node); - pe->tce32_table->data = pe; - /* Associate it with all child devices */ pnv_ioda_setup_same_PE(bus, pe); @@ -1292,11 +1288,12 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe struct iommu_table *tbl; unsigned long addr; int64_t rc; + struct iommu_table_group *table_group; bus = dev->bus; hose = pci_bus_to_host(bus); phb = hose->private_data; - tbl = pe->tce32_table; + tbl = pe->table_group.tables[0]; addr = tbl->it_base; opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, @@ -1311,13 +1308,14 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe if (rc) pe_warn(pe, "OPAL error %ld release DMA window\n", rc); - if (tbl->it_group) { - iommu_group_put(tbl->it_group); - BUG_ON(tbl->it_group); + table_group = tbl->it_table_group; + if (table_group->group) { + iommu_group_put(table_group->group); + BUG_ON(table_group->group); } iommu_free_table(tbl, of_node_full_name(dev->dev.of_node)); free_pages(addr, get_order(TCE32_TABLE_SIZE)); - pe->tce32_table = NULL; + pe->table_group.tables[0] = NULL; } static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs) @@ -1465,10 +1463,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) continue; } - pe->tce32_table = kzalloc_node(sizeof(struct iommu_table), - GFP_KERNEL, hose->node); - pe->tce32_table->data = pe; - /* Put PE to the list */ mutex_lock(&phb->ioda.pe_list_mutex); list_add_tail(&pe->list, &phb->ioda.pe_list); @@ -1603,7 +1597,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev pe = &phb->ioda.pe_array[pdn->pe_number]; WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); - set_iommu_table_base(&pdev->dev, pe->tce32_table); + set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]); /* * Note: iommu_add_device() will fail here as * for physical PE: the device is already added by now; @@ -1637,7 +1631,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) } else { dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); set_dma_ops(&pdev->dev, &dma_iommu_ops); - set_iommu_table_base(&pdev->dev, pe->tce32_table); + set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]); } *pdev->dev.dma_mask = dma_mask; return 0; @@ -1671,7 +1665,7 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { - set_iommu_table_base(&dev->dev, pe->tce32_table); + set_iommu_table_base(&dev->dev, pe->table_group.tables[0]); iommu_add_device(&dev->dev); if (dev->subordinate) @@ -1682,7 +1676,8 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, unsigned long index, unsigned long npages, bool rm) { - struct pnv_ioda_pe *pe = tbl->data; + struct pnv_ioda_pe *pe = container_of(tbl->it_table_group, + struct pnv_ioda_pe, table_group); __be64 __iomem *invalidate = rm ? (__be64 __iomem *)pe->tce_inval_reg_phys : (__be64 __iomem *)tbl->it_index; @@ -1759,7 +1754,8 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, unsigned long index, unsigned long npages, bool rm) { - struct pnv_ioda_pe *pe = tbl->data; + struct pnv_ioda_pe *pe = container_of(tbl->it_table_group, + struct pnv_ioda_pe, table_group); unsigned long start, end, inc; __be64 __iomem *invalidate = rm ? (__be64 __iomem *)pe->tce_inval_reg_phys : @@ -1835,8 +1831,12 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, if (WARN_ON(pe->tce32_seg >= 0)) return; - tbl = pe->tce32_table; - iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); + tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, + phb->hose->node); + tbl->it_table_group = &pe->table_group; + pe->table_group.tables[0] = tbl; + iommu_register_group(&pe->table_group, phb->hose->global_number, + pe->pe_number); /* Grab a 32-bit TCE table */ pe->tce32_seg = base; @@ -1915,7 +1915,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) { - struct pnv_ioda_pe *pe = tbl->data; + struct pnv_ioda_pe *pe = container_of(tbl->it_table_group, + struct pnv_ioda_pe, table_group); uint16_t window_id = (pe->pe_number << 1 ) + 1; int64_t rc; @@ -1949,10 +1950,10 @@ static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb, pe->tce_bypass_base = 1ull << 59; /* Install set_bypass callback for VFIO */ - pe->tce32_table->set_bypass = pnv_pci_ioda2_set_bypass; + pe->table_group.tables[0]->set_bypass = pnv_pci_ioda2_set_bypass; /* Enable bypass by default */ - pnv_pci_ioda2_set_bypass(pe->tce32_table, true); + pnv_pci_ioda2_set_bypass(pe->table_group.tables[0], true); } static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, @@ -1969,8 +1970,12 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, if (WARN_ON(pe->tce32_seg >= 0)) return; - tbl = pe->tce32_table; - iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); + tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, + phb->hose->node); + tbl->it_table_group = &pe->table_group; + pe->table_group.tables[0] = tbl; + iommu_register_group(&pe->table_group, phb->hose->global_number, + pe->pe_number); /* The PE will reserve all possible 32-bits space */ pe->tce32_seg = 0; diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 5a387a9132ff..6e00104093d6 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -92,14 +92,16 @@ static struct iommu_table_ops pnv_p5ioc2_iommu_ops = { static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) { - if (phb->p5ioc2.iommu_table.it_map == NULL) { - phb->p5ioc2.iommu_table.it_ops = &pnv_p5ioc2_iommu_ops; - iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node); - iommu_register_group(&phb->p5ioc2.iommu_table, + struct iommu_table *tbl = phb->p5ioc2.table_group.tables[0]; + + if (!tbl->it_map) { + tbl->it_ops = &pnv_p5ioc2_iommu_ops; + iommu_init_table(tbl, phb->hose->node); + iommu_register_group(&phb->p5ioc2.table_group, pci_domain_nr(phb->hose->bus), phb->opal_id); } - set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table); + set_iommu_table_base(&pdev->dev, tbl); iommu_add_device(&pdev->dev); } @@ -188,6 +190,12 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table, tce_mem, tce_size, 0, IOMMU_PAGE_SHIFT_4K); + /* + * We do not allocate iommu_table as we do not support + * hotplug or SRIOV on P5IOC2 and therefore iommu_free_table() + * should not be called for phb->p5ioc2.table_group.tables[0] ever. + */ + phb->p5ioc2.table_group.tables[0] = &phb->p5ioc2.iommu_table; } void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 45a756007ec3..d8ba34d7af1b 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -57,7 +57,7 @@ struct pnv_ioda_pe { /* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */ int tce32_seg; int tce32_segcount; - struct iommu_table *tce32_table; + struct iommu_table_group table_group; phys_addr_t tce_inval_reg_phys; /* 64-bit TCE bypass region */ @@ -120,6 +120,7 @@ struct pnv_phb { union { struct { struct iommu_table iommu_table; + struct iommu_table_group table_group; } p5ioc2; struct { diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 33f3a855bfd9..307d704ffaa3 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -52,16 +52,51 @@ #include "pseries.h" -static void iommu_pseries_free_table(struct iommu_table *tbl, +static struct iommu_table_group *iommu_pseries_alloc_group(int node) +{ + struct iommu_table_group *table_group = NULL; + struct iommu_table *tbl = NULL; + + table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL, + node); + if (!table_group) + goto fail_exit; + + tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node); + if (!tbl) + goto fail_exit; + + tbl->it_table_group = table_group; + table_group->tables[0] = tbl; + + return table_group; + +fail_exit: + kfree(table_group); + kfree(tbl); + + return NULL; +} + +static void iommu_pseries_free_group(struct iommu_table_group *table_group, const char *node_name) { + struct iommu_table *tbl; + + if (!table_group) + return; + #ifdef CONFIG_IOMMU_API - if (tbl->it_group) { - iommu_group_put(tbl->it_group); - BUG_ON(tbl->it_group); + if (table_group->group) { + iommu_group_put(table_group->group); + BUG_ON(table_group->group); } #endif + + tbl = table_group->tables[0]; iommu_free_table(tbl, node_name); + + kfree(table_group); } static void tce_invalidate_pSeries_sw(struct iommu_table *tbl, @@ -631,13 +666,13 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) pci->phb->dma_window_size = 0x8000000ul; pci->phb->dma_window_base_cur = 0x8000000ul; - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - pci->phb->node); + pci->table_group = iommu_pseries_alloc_group(pci->phb->node); + tbl = pci->table_group->tables[0]; iommu_table_setparms(pci->phb, dn, tbl); tbl->it_ops = &iommu_table_pseries_ops; - pci->iommu_table = iommu_init_table(tbl, pci->phb->node); - iommu_register_group(tbl, pci_domain_nr(bus), 0); + iommu_init_table(tbl, pci->phb->node); + iommu_register_group(pci->table_group, pci_domain_nr(bus), 0); /* Divide the rest (1.75GB) among the children */ pci->phb->dma_window_size = 0x80000000ul; @@ -680,16 +715,17 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) ppci = PCI_DN(pdn); pr_debug(" parent is %s, iommu_table: 0x%p\n", - pdn->full_name, ppci->iommu_table); + pdn->full_name, ppci->table_group); - if (!ppci->iommu_table) { - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - ppci->phb->node); + if (!ppci->table_group) { + ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); + tbl = ppci->table_group->tables[0]; iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window); tbl->it_ops = &iommu_table_lpar_multi_ops; - ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node); - iommu_register_group(tbl, pci_domain_nr(bus), 0); - pr_debug(" created table: %p\n", ppci->iommu_table); + iommu_init_table(tbl, ppci->phb->node); + iommu_register_group(ppci->table_group, + pci_domain_nr(bus), 0); + pr_debug(" created table: %p\n", ppci->table_group); } } @@ -711,12 +747,13 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) struct pci_controller *phb = PCI_DN(dn)->phb; pr_debug(" --> first child, no bridge. Allocating iommu table.\n"); - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - phb->node); + PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node); + tbl = PCI_DN(dn)->table_group->tables[0]; iommu_table_setparms(phb, dn, tbl); tbl->it_ops = &iommu_table_pseries_ops; - PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); - iommu_register_group(tbl, pci_domain_nr(phb->bus), 0); + iommu_init_table(tbl, phb->node); + iommu_register_group(PCI_DN(dn)->table_group, + pci_domain_nr(phb->bus), 0); set_iommu_table_base(&dev->dev, tbl); iommu_add_device(&dev->dev); return; @@ -726,11 +763,12 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) * an already allocated iommu table is found and use that. */ - while (dn && PCI_DN(dn) && PCI_DN(dn)->iommu_table == NULL) + while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL) dn = dn->parent; if (dn && PCI_DN(dn)) { - set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); + set_iommu_table_base(&dev->dev, + PCI_DN(dn)->table_group->tables[0]); iommu_add_device(&dev->dev); } else printk(KERN_WARNING "iommu: Device %s has no iommu table\n", @@ -1117,7 +1155,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) dn = pci_device_to_OF_node(dev); pr_debug(" node is %s\n", dn->full_name); - for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table; + for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; pdn = pdn->parent) { dma_window = of_get_property(pdn, "ibm,dma-window", NULL); if (dma_window) @@ -1133,19 +1171,20 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) pr_debug(" parent is %s\n", pdn->full_name); pci = PCI_DN(pdn); - if (!pci->iommu_table) { - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - pci->phb->node); + if (!pci->table_group) { + pci->table_group = iommu_pseries_alloc_group(pci->phb->node); + tbl = pci->table_group->tables[0]; iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window); tbl->it_ops = &iommu_table_lpar_multi_ops; - pci->iommu_table = iommu_init_table(tbl, pci->phb->node); - iommu_register_group(tbl, pci_domain_nr(pci->phb->bus), 0); - pr_debug(" created table: %p\n", pci->iommu_table); + iommu_init_table(tbl, pci->phb->node); + iommu_register_group(pci->table_group, + pci_domain_nr(pci->phb->bus), 0); + pr_debug(" created table: %p\n", pci->table_group); } else { - pr_debug(" found DMA window, table: %p\n", pci->iommu_table); + pr_debug(" found DMA window, table: %p\n", pci->table_group); } - set_iommu_table_base(&dev->dev, pci->iommu_table); + set_iommu_table_base(&dev->dev, pci->table_group->tables[0]); iommu_add_device(&dev->dev); } @@ -1176,7 +1215,7 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) * search upwards in the tree until we either hit a dma-window * property, OR find a parent with a table already allocated. */ - for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table; + for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; pdn = pdn->parent) { dma_window = of_get_property(pdn, "ibm,dma-window", NULL); if (dma_window) @@ -1220,7 +1259,7 @@ static u64 dma_get_required_mask_pSeriesLP(struct device *dev) dn = pci_device_to_OF_node(pdev); /* search upwards for ibm,dma-window */ - for (; dn && PCI_DN(dn) && !PCI_DN(dn)->iommu_table; + for (; dn && PCI_DN(dn) && !PCI_DN(dn)->table_group; dn = dn->parent) if (of_get_property(dn, "ibm,dma-window", NULL)) break; @@ -1300,8 +1339,8 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti * the device node. */ remove_ddw(np, false); - if (pci && pci->iommu_table) - iommu_pseries_free_table(pci->iommu_table, + if (pci && pci->table_group) + iommu_pseries_free_group(pci->table_group, np->full_name); spin_lock(&direct_window_list_lock); diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index e65bc73cc8a8..c4bc345d64d7 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -190,10 +190,11 @@ static void tce_iommu_release(void *iommu_data) { struct tce_container *container = iommu_data; - WARN_ON(container->tbl && !container->tbl->it_group); + WARN_ON(container->tbl && !container->tbl->it_table_group->group); - if (container->tbl && container->tbl->it_group) - tce_iommu_detach_group(iommu_data, container->tbl->it_group); + if (container->tbl && container->tbl->it_table_group->group) + tce_iommu_detach_group(iommu_data, + container->tbl->it_table_group->group); tce_iommu_disable(container); mutex_destroy(&container->lock); @@ -345,7 +346,7 @@ static long tce_iommu_ioctl(void *iommu_data, if (!tbl) return -ENXIO; - BUG_ON(!tbl->it_group); + BUG_ON(!tbl->it_table_group->group); minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); @@ -433,11 +434,12 @@ static long tce_iommu_ioctl(void *iommu_data, mutex_unlock(&container->lock); return 0; case VFIO_EEH_PE_OP: - if (!container->tbl || !container->tbl->it_group) + if (!container->tbl || !container->tbl->it_table_group->group) return -ENODEV; - return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group, - cmd, arg); + return vfio_spapr_iommu_eeh_ioctl( + container->tbl->it_table_group->group, + cmd, arg); } return -ENOTTY; @@ -457,7 +459,8 @@ static int tce_iommu_attach_group(void *iommu_data, iommu_group_id(iommu_group), iommu_group); */ if (container->tbl) { pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n", - iommu_group_id(container->tbl->it_group), + iommu_group_id(container->tbl-> + it_table_group->group), iommu_group_id(iommu_group)); ret = -EBUSY; goto unlock_exit; @@ -491,13 +494,13 @@ static void tce_iommu_detach_group(void *iommu_data, if (tbl != container->tbl) { pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n", iommu_group_id(iommu_group), - iommu_group_id(tbl->it_group)); + iommu_group_id(tbl->it_table_group->group)); goto unlock_exit; } if (container->enabled) { pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n", - iommu_group_id(tbl->it_group)); + iommu_group_id(tbl->it_table_group->group)); tce_iommu_disable(container); } -- cgit v1.2.3 From 0eaf4defc7c44ed5dd33a03cab12a5f88c9b4b86 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:09 +1000 Subject: powerpc/spapr: vfio: Switch from iommu_table to new iommu_table_group So far one TCE table could only be used by one IOMMU group. However IODA2 hardware allows programming the same TCE table address to multiple PE allowing sharing tables. This replaces a single pointer to a group in a iommu_table struct with a linked list of groups which provides the way of invalidating TCE cache for every PE when an actual TCE table is updated. This adds pnv_pci_link_table_and_group() and pnv_pci_unlink_table_and_group() helpers to manage the list. However without VFIO, it is still going to be a single IOMMU group per iommu_table. This changes iommu_add_device() to add a device to a first group from the group list of a table as it is only called from the platform init code or PCI bus notifier and at these moments there is only one group per table. This does not change TCE invalidation code to loop through all attached groups in order to simplify this patch and because it is not really needed in most cases. IODA2 is fixed in a later patch. This should cause no behavioural change. Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 8 +- arch/powerpc/kernel/iommu.c | 14 +++- arch/powerpc/platforms/powernv/pci-ioda.c | 45 ++++++---- arch/powerpc/platforms/powernv/pci-p5ioc2.c | 3 + arch/powerpc/platforms/powernv/pci.c | 75 +++++++++++++++++ arch/powerpc/platforms/powernv/pci.h | 7 ++ arch/powerpc/platforms/pseries/iommu.c | 27 +++++- drivers/vfio/vfio_iommu_spapr_tce.c | 122 ++++++++++++++++++++-------- 8 files changed, 241 insertions(+), 60 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 5a7267f47bdb..44a20ccf06b4 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -91,7 +91,7 @@ struct iommu_table { struct iommu_pool pools[IOMMU_NR_POOLS]; unsigned long *it_map; /* A simple allocation bitmap for now */ unsigned long it_page_shift;/* table iommu page size */ - struct iommu_table_group *it_table_group; + struct list_head it_group_list;/* List of iommu_table_group_link */ struct iommu_table_ops *it_ops; void (*set_bypass)(struct iommu_table *tbl, bool enable); }; @@ -126,6 +126,12 @@ extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, int nid); #define IOMMU_TABLE_GROUP_MAX_TABLES 1 +struct iommu_table_group_link { + struct list_head next; + struct rcu_head rcu; + struct iommu_table_group *table_group; +}; + struct iommu_table_group { struct iommu_group *group; struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 719f0485ec79..be258b2ecb10 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1078,6 +1078,7 @@ EXPORT_SYMBOL_GPL(iommu_release_ownership); int iommu_add_device(struct device *dev) { struct iommu_table *tbl; + struct iommu_table_group_link *tgl; /* * The sysfs entries should be populated before @@ -1095,15 +1096,22 @@ int iommu_add_device(struct device *dev) } tbl = get_iommu_table_base(dev); - if (!tbl || !tbl->it_table_group || !tbl->it_table_group->group) { + if (!tbl) { pr_debug("%s: Skipping device %s with no tbl\n", __func__, dev_name(dev)); return 0; } + tgl = list_first_entry_or_null(&tbl->it_group_list, + struct iommu_table_group_link, next); + if (!tgl) { + pr_debug("%s: Skipping device %s with no group\n", + __func__, dev_name(dev)); + return 0; + } pr_debug("%s: Adding %s to iommu group %d\n", __func__, dev_name(dev), - iommu_group_id(tbl->it_table_group->group)); + iommu_group_id(tgl->table_group->group)); if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) { pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n", @@ -1112,7 +1120,7 @@ int iommu_add_device(struct device *dev) return -EINVAL; } - return iommu_group_add_device(tbl->it_table_group->group, dev); + return iommu_group_add_device(tgl->table_group->group, dev); } EXPORT_SYMBOL_GPL(iommu_add_device); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 279dadf43d5a..3b4130697b05 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1288,7 +1288,6 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe struct iommu_table *tbl; unsigned long addr; int64_t rc; - struct iommu_table_group *table_group; bus = dev->bus; hose = pci_bus_to_host(bus); @@ -1308,14 +1307,13 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe if (rc) pe_warn(pe, "OPAL error %ld release DMA window\n", rc); - table_group = tbl->it_table_group; - if (table_group->group) { - iommu_group_put(table_group->group); - BUG_ON(table_group->group); + pnv_pci_unlink_table_and_group(tbl, &pe->table_group); + if (pe->table_group.group) { + iommu_group_put(pe->table_group.group); + BUG_ON(pe->table_group.group); } iommu_free_table(tbl, of_node_full_name(dev->dev.of_node)); free_pages(addr, get_order(TCE32_TABLE_SIZE)); - pe->table_group.tables[0] = NULL; } static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs) @@ -1676,7 +1674,10 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, unsigned long index, unsigned long npages, bool rm) { - struct pnv_ioda_pe *pe = container_of(tbl->it_table_group, + struct iommu_table_group_link *tgl = list_first_entry_or_null( + &tbl->it_group_list, struct iommu_table_group_link, + next); + struct pnv_ioda_pe *pe = container_of(tgl->table_group, struct pnv_ioda_pe, table_group); __be64 __iomem *invalidate = rm ? (__be64 __iomem *)pe->tce_inval_reg_phys : @@ -1754,7 +1755,10 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, unsigned long index, unsigned long npages, bool rm) { - struct pnv_ioda_pe *pe = container_of(tbl->it_table_group, + struct iommu_table_group_link *tgl = list_first_entry_or_null( + &tbl->it_group_list, struct iommu_table_group_link, + next); + struct pnv_ioda_pe *pe = container_of(tgl->table_group, struct pnv_ioda_pe, table_group); unsigned long start, end, inc; __be64 __iomem *invalidate = rm ? @@ -1831,12 +1835,10 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, if (WARN_ON(pe->tce32_seg >= 0)) return; - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - phb->hose->node); - tbl->it_table_group = &pe->table_group; - pe->table_group.tables[0] = tbl; + tbl = pnv_pci_table_alloc(phb->hose->node); iommu_register_group(&pe->table_group, phb->hose->global_number, pe->pe_number); + pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group); /* Grab a 32-bit TCE table */ pe->tce32_seg = base; @@ -1911,11 +1913,18 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, pe->tce32_seg = -1; if (tce_mem) __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); + if (tbl) { + pnv_pci_unlink_table_and_group(tbl, &pe->table_group); + iommu_free_table(tbl, "pnv"); + } } static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) { - struct pnv_ioda_pe *pe = container_of(tbl->it_table_group, + struct iommu_table_group_link *tgl = list_first_entry_or_null( + &tbl->it_group_list, struct iommu_table_group_link, + next); + struct pnv_ioda_pe *pe = container_of(tgl->table_group, struct pnv_ioda_pe, table_group); uint16_t window_id = (pe->pe_number << 1 ) + 1; int64_t rc; @@ -1970,12 +1979,10 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, if (WARN_ON(pe->tce32_seg >= 0)) return; - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - phb->hose->node); - tbl->it_table_group = &pe->table_group; - pe->table_group.tables[0] = tbl; + tbl = pnv_pci_table_alloc(phb->hose->node); iommu_register_group(&pe->table_group, phb->hose->global_number, pe->pe_number); + pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group); /* The PE will reserve all possible 32-bits space */ pe->tce32_seg = 0; @@ -2048,6 +2055,10 @@ fail: pe->tce32_seg = -1; if (tce_mem) __free_pages(tce_mem, get_order(tce_table_size)); + if (tbl) { + pnv_pci_unlink_table_and_group(tbl, &pe->table_group); + iommu_free_table(tbl, "pnv"); + } } static void pnv_ioda_setup_dma(struct pnv_phb *phb) diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 6e00104093d6..f80e5a1d6117 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -99,6 +99,9 @@ static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, iommu_init_table(tbl, phb->hose->node); iommu_register_group(&phb->p5ioc2.table_group, pci_domain_nr(phb->hose->bus), phb->opal_id); + INIT_LIST_HEAD_RCU(&tbl->it_group_list); + pnv_pci_link_table_and_group(phb->hose->node, 0, + tbl, &phb->p5ioc2.table_group); } set_iommu_table_base(&pdev->dev, tbl); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index f72fc6e7d63d..00f1abd34379 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -606,6 +606,81 @@ unsigned long pnv_tce_get(struct iommu_table *tbl, long index) return ((u64 *)tbl->it_base)[index - tbl->it_offset]; } +struct iommu_table *pnv_pci_table_alloc(int nid) +{ + struct iommu_table *tbl; + + tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid); + INIT_LIST_HEAD_RCU(&tbl->it_group_list); + + return tbl; +} + +long pnv_pci_link_table_and_group(int node, int num, + struct iommu_table *tbl, + struct iommu_table_group *table_group) +{ + struct iommu_table_group_link *tgl = NULL; + + if (WARN_ON(!tbl || !table_group)) + return -EINVAL; + + tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, + node); + if (!tgl) + return -ENOMEM; + + tgl->table_group = table_group; + list_add_rcu(&tgl->next, &tbl->it_group_list); + + table_group->tables[num] = tbl; + + return 0; +} + +static void pnv_iommu_table_group_link_free(struct rcu_head *head) +{ + struct iommu_table_group_link *tgl = container_of(head, + struct iommu_table_group_link, rcu); + + kfree(tgl); +} + +void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, + struct iommu_table_group *table_group) +{ + long i; + bool found; + struct iommu_table_group_link *tgl; + + if (!tbl || !table_group) + return; + + /* Remove link to a group from table's list of attached groups */ + found = false; + list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { + if (tgl->table_group == table_group) { + list_del_rcu(&tgl->next); + call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free); + found = true; + break; + } + } + if (WARN_ON(!found)) + return; + + /* Clean a pointer to iommu_table in iommu_table_group::tables[] */ + found = false; + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + if (table_group->tables[i] == tbl) { + table_group->tables[i] = NULL; + found = true; + break; + } + } + WARN_ON(!found); +} + void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset, unsigned page_shift) diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index d8ba34d7af1b..d37cb4da409f 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -210,6 +210,13 @@ int pnv_pci_cfg_read(struct pci_dn *pdn, int where, int size, u32 *val); int pnv_pci_cfg_write(struct pci_dn *pdn, int where, int size, u32 val); +extern struct iommu_table *pnv_pci_table_alloc(int nid); + +extern long pnv_pci_link_table_and_group(int node, int num, + struct iommu_table *tbl, + struct iommu_table_group *table_group); +extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, + struct iommu_table_group *table_group); extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset, unsigned page_shift); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 307d704ffaa3..10510dea16b3 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -56,6 +57,7 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node) { struct iommu_table_group *table_group = NULL; struct iommu_table *tbl = NULL; + struct iommu_table_group_link *tgl = NULL; table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL, node); @@ -66,12 +68,21 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node) if (!tbl) goto fail_exit; - tbl->it_table_group = table_group; + tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, + node); + if (!tgl) + goto fail_exit; + + INIT_LIST_HEAD_RCU(&tbl->it_group_list); + tgl->table_group = table_group; + list_add_rcu(&tgl->next, &tbl->it_group_list); + table_group->tables[0] = tbl; return table_group; fail_exit: + kfree(tgl); kfree(table_group); kfree(tbl); @@ -82,18 +93,28 @@ static void iommu_pseries_free_group(struct iommu_table_group *table_group, const char *node_name) { struct iommu_table *tbl; +#ifdef CONFIG_IOMMU_API + struct iommu_table_group_link *tgl; +#endif if (!table_group) return; + tbl = table_group->tables[0]; #ifdef CONFIG_IOMMU_API + tgl = list_first_entry_or_null(&tbl->it_group_list, + struct iommu_table_group_link, next); + + WARN_ON_ONCE(!tgl); + if (tgl) { + list_del_rcu(&tgl->next); + kfree(tgl); + } if (table_group->group) { iommu_group_put(table_group->group); BUG_ON(table_group->group); } #endif - - tbl = table_group->tables[0]; iommu_free_table(tbl, node_name); kfree(table_group); diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index c4bc345d64d7..ffc634a75dba 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -88,7 +88,7 @@ static void decrement_locked_vm(long npages) */ struct tce_container { struct mutex lock; - struct iommu_table *tbl; + struct iommu_group *grp; bool enabled; unsigned long locked_pages; }; @@ -103,13 +103,42 @@ static bool tce_page_is_contained(struct page *page, unsigned page_shift) return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift; } +static long tce_iommu_find_table(struct tce_container *container, + phys_addr_t ioba, struct iommu_table **ptbl) +{ + long i; + struct iommu_table_group *table_group; + + table_group = iommu_group_get_iommudata(container->grp); + if (!table_group) + return -1; + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbl = table_group->tables[i]; + + if (tbl) { + unsigned long entry = ioba >> tbl->it_page_shift; + unsigned long start = tbl->it_offset; + unsigned long end = start + tbl->it_size; + + if ((start <= entry) && (entry < end)) { + *ptbl = tbl; + return i; + } + } + } + + return -1; +} + static int tce_iommu_enable(struct tce_container *container) { int ret = 0; unsigned long locked; - struct iommu_table *tbl = container->tbl; + struct iommu_table *tbl; + struct iommu_table_group *table_group; - if (!container->tbl) + if (!container->grp) return -ENXIO; if (!current->mm) @@ -143,6 +172,11 @@ static int tce_iommu_enable(struct tce_container *container) * as this information is only available from KVM and VFIO is * KVM agnostic. */ + table_group = iommu_group_get_iommudata(container->grp); + if (!table_group) + return -ENODEV; + + tbl = table_group->tables[0]; locked = (tbl->it_size << tbl->it_page_shift) >> PAGE_SHIFT; ret = try_increment_locked_vm(locked); if (ret) @@ -190,11 +224,10 @@ static void tce_iommu_release(void *iommu_data) { struct tce_container *container = iommu_data; - WARN_ON(container->tbl && !container->tbl->it_table_group->group); + WARN_ON(container->grp); - if (container->tbl && container->tbl->it_table_group->group) - tce_iommu_detach_group(iommu_data, - container->tbl->it_table_group->group); + if (container->grp) + tce_iommu_detach_group(iommu_data, container->grp); tce_iommu_disable(container); mutex_destroy(&container->lock); @@ -312,9 +345,16 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { struct vfio_iommu_spapr_tce_info info; - struct iommu_table *tbl = container->tbl; + struct iommu_table *tbl; + struct iommu_table_group *table_group; + + if (WARN_ON(!container->grp)) + return -ENXIO; + + table_group = iommu_group_get_iommudata(container->grp); - if (WARN_ON(!tbl)) + tbl = table_group->tables[0]; + if (WARN_ON_ONCE(!tbl)) return -ENXIO; minsz = offsetofend(struct vfio_iommu_spapr_tce_info, @@ -337,17 +377,13 @@ static long tce_iommu_ioctl(void *iommu_data, } case VFIO_IOMMU_MAP_DMA: { struct vfio_iommu_type1_dma_map param; - struct iommu_table *tbl = container->tbl; + struct iommu_table *tbl = NULL; unsigned long tce; + long num; if (!container->enabled) return -EPERM; - if (!tbl) - return -ENXIO; - - BUG_ON(!tbl->it_table_group->group); - minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); if (copy_from_user(¶m, (void __user *)arg, minsz)) @@ -360,6 +396,10 @@ static long tce_iommu_ioctl(void *iommu_data, VFIO_DMA_MAP_FLAG_WRITE)) return -EINVAL; + num = tce_iommu_find_table(container, param.iova, &tbl); + if (num < 0) + return -ENXIO; + if ((param.size & ~IOMMU_PAGE_MASK(tbl)) || (param.vaddr & ~IOMMU_PAGE_MASK(tbl))) return -EINVAL; @@ -385,14 +425,12 @@ static long tce_iommu_ioctl(void *iommu_data, } case VFIO_IOMMU_UNMAP_DMA: { struct vfio_iommu_type1_dma_unmap param; - struct iommu_table *tbl = container->tbl; + struct iommu_table *tbl = NULL; + long num; if (!container->enabled) return -EPERM; - if (WARN_ON(!tbl)) - return -ENXIO; - minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); @@ -406,6 +444,10 @@ static long tce_iommu_ioctl(void *iommu_data, if (param.flags) return -EINVAL; + num = tce_iommu_find_table(container, param.iova, &tbl); + if (num < 0) + return -ENXIO; + if (param.size & ~IOMMU_PAGE_MASK(tbl)) return -EINVAL; @@ -434,12 +476,11 @@ static long tce_iommu_ioctl(void *iommu_data, mutex_unlock(&container->lock); return 0; case VFIO_EEH_PE_OP: - if (!container->tbl || !container->tbl->it_table_group->group) + if (!container->grp) return -ENODEV; - return vfio_spapr_iommu_eeh_ioctl( - container->tbl->it_table_group->group, - cmd, arg); + return vfio_spapr_iommu_eeh_ioctl(container->grp, + cmd, arg); } return -ENOTTY; @@ -450,17 +491,15 @@ static int tce_iommu_attach_group(void *iommu_data, { int ret; struct tce_container *container = iommu_data; - struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); + struct iommu_table_group *table_group; - BUG_ON(!tbl); mutex_lock(&container->lock); /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", iommu_group_id(iommu_group), iommu_group); */ - if (container->tbl) { + if (container->grp) { pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n", - iommu_group_id(container->tbl-> - it_table_group->group), + iommu_group_id(container->grp), iommu_group_id(iommu_group)); ret = -EBUSY; goto unlock_exit; @@ -473,9 +512,15 @@ static int tce_iommu_attach_group(void *iommu_data, goto unlock_exit; } - ret = iommu_take_ownership(tbl); + table_group = iommu_group_get_iommudata(iommu_group); + if (!table_group) { + ret = -ENXIO; + goto unlock_exit; + } + + ret = iommu_take_ownership(table_group->tables[0]); if (!ret) - container->tbl = tbl; + container->grp = iommu_group; unlock_exit: mutex_unlock(&container->lock); @@ -487,26 +532,31 @@ static void tce_iommu_detach_group(void *iommu_data, struct iommu_group *iommu_group) { struct tce_container *container = iommu_data; - struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); + struct iommu_table_group *table_group; + struct iommu_table *tbl; - BUG_ON(!tbl); mutex_lock(&container->lock); - if (tbl != container->tbl) { + if (iommu_group != container->grp) { pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n", iommu_group_id(iommu_group), - iommu_group_id(tbl->it_table_group->group)); + iommu_group_id(container->grp)); goto unlock_exit; } if (container->enabled) { pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n", - iommu_group_id(tbl->it_table_group->group)); + iommu_group_id(container->grp)); tce_iommu_disable(container); } /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n", iommu_group_id(iommu_group), iommu_group); */ - container->tbl = NULL; + container->grp = NULL; + + table_group = iommu_group_get_iommudata(iommu_group); + BUG_ON(!table_group); + + tbl = table_group->tables[0]; tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); iommu_release_ownership(tbl); -- cgit v1.2.3 From f87a88642e660edd8912ad39fe77848c6f9927a2 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:10 +1000 Subject: vfio: powerpc/spapr/iommu/powernv/ioda2: Rework IOMMU ownership control This adds tce_iommu_take_ownership() and tce_iommu_release_ownership which call in a loop iommu_take_ownership()/iommu_release_ownership() for every table on the group. As there is just one now, no change in behaviour is expected. At the moment the iommu_table struct has a set_bypass() which enables/ disables DMA bypass on IODA2 PHB. This is exposed to POWERPC IOMMU code which calls this callback when external IOMMU users such as VFIO are about to get over a PHB. The set_bypass() callback is not really an iommu_table function but IOMMU/PE function. This introduces a iommu_table_group_ops struct and adds take_ownership()/release_ownership() callbacks to it which are called when an external user takes/releases control over the IOMMU. This replaces set_bypass() with ownership callbacks as it is not necessarily just bypass enabling, it can be something else/more so let's give it more generic name. The callbacks is implemented for IODA2 only. Other platforms (P5IOC2, IODA1) will use the old iommu_take_ownership/iommu_release_ownership API. The following patches will replace iommu_take_ownership/ iommu_release_ownership calls in IODA2 with full IOMMU table release/ create. As we here and touching bypass control, this removes pnv_pci_ioda2_setup_bypass_pe() as it does not do much more compared to pnv_pci_ioda2_set_bypass. This moves tce_bypass_base initialization to pnv_pci_ioda2_setup_dma_pe. Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 11 ++++- arch/powerpc/kernel/iommu.c | 12 ------ arch/powerpc/platforms/powernv/pci-ioda.c | 43 ++++++++++++------- drivers/vfio/vfio_iommu_spapr_tce.c | 70 ++++++++++++++++++++++++++++--- 4 files changed, 103 insertions(+), 33 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 44a20ccf06b4..489133cf7c5e 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -93,7 +93,6 @@ struct iommu_table { unsigned long it_page_shift;/* table iommu page size */ struct list_head it_group_list;/* List of iommu_table_group_link */ struct iommu_table_ops *it_ops; - void (*set_bypass)(struct iommu_table *tbl, bool enable); }; /* Pure 2^n version of get_order */ @@ -126,6 +125,15 @@ extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, int nid); #define IOMMU_TABLE_GROUP_MAX_TABLES 1 +struct iommu_table_group; + +struct iommu_table_group_ops { + /* Switch ownership from platform code to external user (e.g. VFIO) */ + void (*take_ownership)(struct iommu_table_group *table_group); + /* Switch ownership from external user (e.g. VFIO) back to core */ + void (*release_ownership)(struct iommu_table_group *table_group); +}; + struct iommu_table_group_link { struct list_head next; struct rcu_head rcu; @@ -135,6 +143,7 @@ struct iommu_table_group_link { struct iommu_table_group { struct iommu_group *group; struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; + struct iommu_table_group_ops *ops; }; #ifdef CONFIG_IOMMU_API diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index be258b2ecb10..e7f81b7399ba 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1047,14 +1047,6 @@ int iommu_take_ownership(struct iommu_table *tbl) memset(tbl->it_map, 0xff, sz); - /* - * Disable iommu bypass, otherwise the user can DMA to all of - * our physical memory via the bypass window instead of just - * the pages that has been explicitly mapped into the iommu - */ - if (tbl->set_bypass) - tbl->set_bypass(tbl, false); - return 0; } EXPORT_SYMBOL_GPL(iommu_take_ownership); @@ -1068,10 +1060,6 @@ void iommu_release_ownership(struct iommu_table *tbl) /* Restore bit#0 set by iommu_init_table() */ if (tbl->it_offset == 0) set_bit(0, tbl->it_map); - - /* The kernel owns the device now, we can restore the iommu bypass */ - if (tbl->set_bypass) - tbl->set_bypass(tbl, true); } EXPORT_SYMBOL_GPL(iommu_release_ownership); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 3b4130697b05..17a77522ea0d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1919,13 +1919,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, } } -static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) { - struct iommu_table_group_link *tgl = list_first_entry_or_null( - &tbl->it_group_list, struct iommu_table_group_link, - next); - struct pnv_ioda_pe *pe = container_of(tgl->table_group, - struct pnv_ioda_pe, table_group); uint16_t window_id = (pe->pe_number << 1 ) + 1; int64_t rc; @@ -1952,19 +1947,31 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) pe->tce_bypass_enabled = enable; } -static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb, - struct pnv_ioda_pe *pe) +#ifdef CONFIG_IOMMU_API +static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) { - /* TVE #1 is selected by PCI address bit 59 */ - pe->tce_bypass_base = 1ull << 59; + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); - /* Install set_bypass callback for VFIO */ - pe->table_group.tables[0]->set_bypass = pnv_pci_ioda2_set_bypass; + iommu_take_ownership(table_group->tables[0]); + pnv_pci_ioda2_set_bypass(pe, false); +} - /* Enable bypass by default */ - pnv_pci_ioda2_set_bypass(pe->table_group.tables[0], true); +static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + + iommu_release_ownership(table_group->tables[0]); + pnv_pci_ioda2_set_bypass(pe, true); } +static struct iommu_table_group_ops pnv_pci_ioda2_ops = { + .take_ownership = pnv_ioda2_take_ownership, + .release_ownership = pnv_ioda2_release_ownership, +}; +#endif + static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { @@ -1979,6 +1986,9 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, if (WARN_ON(pe->tce32_seg >= 0)) return; + /* TVE #1 is selected by PCI address bit 59 */ + pe->tce_bypass_base = 1ull << 59; + tbl = pnv_pci_table_alloc(phb->hose->node); iommu_register_group(&pe->table_group, phb->hose->global_number, pe->pe_number); @@ -2033,6 +2043,9 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, } tbl->it_ops = &pnv_ioda2_iommu_ops; iommu_init_table(tbl, phb->hose->node); +#ifdef CONFIG_IOMMU_API + pe->table_group.ops = &pnv_pci_ioda2_ops; +#endif if (pe->flags & PNV_IODA_PE_DEV) { /* @@ -2047,7 +2060,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, /* Also create a bypass window */ if (!pnv_iommu_bypass_disabled) - pnv_pci_ioda2_setup_bypass_pe(phb, pe); + pnv_pci_ioda2_set_bypass(pe, true); return; fail: diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index ffc634a75dba..9c720de46c33 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -486,6 +486,61 @@ static long tce_iommu_ioctl(void *iommu_data, return -ENOTTY; } +static void tce_iommu_release_ownership(struct tce_container *container, + struct iommu_table_group *table_group) +{ + int i; + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbl = table_group->tables[i]; + + if (!tbl) + continue; + + tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); + if (tbl->it_map) + iommu_release_ownership(tbl); + } +} + +static int tce_iommu_take_ownership(struct tce_container *container, + struct iommu_table_group *table_group) +{ + int i, j, rc = 0; + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbl = table_group->tables[i]; + + if (!tbl || !tbl->it_map) + continue; + + rc = iommu_take_ownership(tbl); + if (rc) { + for (j = 0; j < i; ++j) + iommu_release_ownership( + table_group->tables[j]); + + return rc; + } + } + + return 0; +} + +static void tce_iommu_release_ownership_ddw(struct tce_container *container, + struct iommu_table_group *table_group) +{ + table_group->ops->release_ownership(table_group); +} + +static long tce_iommu_take_ownership_ddw(struct tce_container *container, + struct iommu_table_group *table_group) +{ + table_group->ops->take_ownership(table_group); + + return 0; +} + static int tce_iommu_attach_group(void *iommu_data, struct iommu_group *iommu_group) { @@ -518,7 +573,12 @@ static int tce_iommu_attach_group(void *iommu_data, goto unlock_exit; } - ret = iommu_take_ownership(table_group->tables[0]); + if (!table_group->ops || !table_group->ops->take_ownership || + !table_group->ops->release_ownership) + ret = tce_iommu_take_ownership(container, table_group); + else + ret = tce_iommu_take_ownership_ddw(container, table_group); + if (!ret) container->grp = iommu_group; @@ -533,7 +593,6 @@ static void tce_iommu_detach_group(void *iommu_data, { struct tce_container *container = iommu_data; struct iommu_table_group *table_group; - struct iommu_table *tbl; mutex_lock(&container->lock); if (iommu_group != container->grp) { @@ -556,9 +615,10 @@ static void tce_iommu_detach_group(void *iommu_data, table_group = iommu_group_get_iommudata(iommu_group); BUG_ON(!table_group); - tbl = table_group->tables[0]; - tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); - iommu_release_ownership(tbl); + if (!table_group->ops || !table_group->ops->release_ownership) + tce_iommu_release_ownership(container, table_group); + else + tce_iommu_release_ownership_ddw(container, table_group); unlock_exit: mutex_unlock(&container->lock); -- cgit v1.2.3 From b82c75bfbeb5b4c7c23ee09dd6f295c7f06aeb25 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:11 +1000 Subject: powerpc/iommu: Fix IOMMU ownership control functions This adds missing locks in iommu_take_ownership()/ iommu_release_ownership(). This marks all pages busy in iommu_table::it_map in order to catch errors if there is an attempt to use this table while ownership over it is taken. This only clears TCE content if there is no page marked busy in it_map. Clearing must be done outside of the table locks as iommu_clear_tce() called from iommu_clear_tces_and_put_pages() does this. In order to use bitmap_empty(), the existing code clears bit#0 which is set even in an empty table if it is bus-mapped at 0 as iommu_init_table() reserves page#0 to prevent buggy drivers from crashing when allocated page is bus-mapped at zero (which is correct). This restores the bit in the case of failure to bring the it_map to the state it was in when we called iommu_take_ownership(). Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/iommu.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index e7f81b7399ba..0fb88005c3c5 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1035,31 +1035,51 @@ EXPORT_SYMBOL_GPL(iommu_tce_build); int iommu_take_ownership(struct iommu_table *tbl) { - unsigned long sz = (tbl->it_size + 7) >> 3; + unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; + int ret = 0; + + spin_lock_irqsave(&tbl->large_pool.lock, flags); + for (i = 0; i < tbl->nr_pools; i++) + spin_lock(&tbl->pools[i].lock); if (tbl->it_offset == 0) clear_bit(0, tbl->it_map); if (!bitmap_empty(tbl->it_map, tbl->it_size)) { pr_err("iommu_tce: it_map is not empty"); - return -EBUSY; + ret = -EBUSY; + /* Restore bit#0 set by iommu_init_table() */ + if (tbl->it_offset == 0) + set_bit(0, tbl->it_map); + } else { + memset(tbl->it_map, 0xff, sz); } - memset(tbl->it_map, 0xff, sz); + for (i = 0; i < tbl->nr_pools; i++) + spin_unlock(&tbl->pools[i].lock); + spin_unlock_irqrestore(&tbl->large_pool.lock, flags); - return 0; + return ret; } EXPORT_SYMBOL_GPL(iommu_take_ownership); void iommu_release_ownership(struct iommu_table *tbl) { - unsigned long sz = (tbl->it_size + 7) >> 3; + unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; + + spin_lock_irqsave(&tbl->large_pool.lock, flags); + for (i = 0; i < tbl->nr_pools; i++) + spin_lock(&tbl->pools[i].lock); memset(tbl->it_map, 0, sz); /* Restore bit#0 set by iommu_init_table() */ if (tbl->it_offset == 0) set_bit(0, tbl->it_map); + + for (i = 0; i < tbl->nr_pools; i++) + spin_unlock(&tbl->pools[i].lock); + spin_unlock_irqrestore(&tbl->large_pool.lock, flags); } EXPORT_SYMBOL_GPL(iommu_release_ownership); -- cgit v1.2.3 From 5780fb04263c16fdb9affd1012d5eb956e0cbcd9 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:12 +1000 Subject: powerpc/powernv/ioda2: Move TCE kill register address to PE At the moment the DMA setup code looks for the "ibm,opal-tce-kill" property which contains the TCE kill register address. Writing to this register invalidates TCE cache on IODA/IODA2 hub. This moves the register address from iommu_table to pnv_pnb as this register belongs to PHB and invalidates TCE cache for all tables of all attached PEs. This moves the property reading/remapping code to a helper which is called when DMA is being configured for PE and which does DMA setup for both IODA1 and IODA2. This adds a new pnv_pci_ioda2_tce_invalidate_entire() helper which invalidates cache for the entire table. It should be called after every call to opal_pci_map_pe_dma_window(). It was not required before because there was just a single TCE table and 64bit DMA was handled via bypass window (which has no table so no cache was used) but this is going to change with Dynamic DMA windows (DDW). Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 66 ++++++++++++++++++------------- arch/powerpc/platforms/powernv/pci.h | 7 +++- 2 files changed, 44 insertions(+), 29 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 17a77522ea0d..6bbb2bb35581 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1680,8 +1680,8 @@ static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, struct pnv_ioda_pe *pe = container_of(tgl->table_group, struct pnv_ioda_pe, table_group); __be64 __iomem *invalidate = rm ? - (__be64 __iomem *)pe->tce_inval_reg_phys : - (__be64 __iomem *)tbl->it_index; + (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys : + pe->phb->ioda.tce_inval_reg; unsigned long start, end, inc; const unsigned shift = tbl->it_page_shift; @@ -1752,6 +1752,19 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { .get = pnv_tce_get, }; +static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe) +{ + /* 01xb - invalidate TCEs that match the specified PE# */ + unsigned long val = (0x4ull << 60) | (pe->pe_number & 0xFF); + struct pnv_phb *phb = pe->phb; + + if (!phb->ioda.tce_inval_reg) + return; + + mb(); /* Ensure above stores are visible */ + __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg); +} + static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, unsigned long index, unsigned long npages, bool rm) { @@ -1762,8 +1775,8 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, struct pnv_ioda_pe, table_group); unsigned long start, end, inc; __be64 __iomem *invalidate = rm ? - (__be64 __iomem *)pe->tce_inval_reg_phys : - (__be64 __iomem *)tbl->it_index; + (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys : + pe->phb->ioda.tce_inval_reg; const unsigned shift = tbl->it_page_shift; /* We'll invalidate DMA address in PE scope */ @@ -1821,7 +1834,6 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, { struct page *tce_mem = NULL; - const __be64 *swinvp; struct iommu_table *tbl; unsigned int i; int64_t rc; @@ -1878,20 +1890,11 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, base << 28, IOMMU_PAGE_SHIFT_4K); /* OPAL variant of P7IOC SW invalidated TCEs */ - swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); - if (swinvp) { - /* We need a couple more fields -- an address and a data - * to or. Since the bus is only printed out on table free - * errors, and on the first pass the data will be a relative - * bus number, print that out instead. - */ - pe->tce_inval_reg_phys = be64_to_cpup(swinvp); - tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, - 8); + if (phb->ioda.tce_inval_reg) tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE | TCE_PCI_SWINV_PAIR); - } + tbl->it_ops = &pnv_ioda1_iommu_ops; iommu_init_table(tbl, phb->hose->node); @@ -1972,12 +1975,24 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = { }; #endif +static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb) +{ + const __be64 *swinvp; + + /* OPAL variant of PHB3 invalidated TCEs */ + swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); + if (!swinvp) + return; + + phb->ioda.tce_inval_reg_phys = be64_to_cpup(swinvp); + phb->ioda.tce_inval_reg = ioremap(phb->ioda.tce_inval_reg_phys, 8); +} + static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { struct page *tce_mem = NULL; void *addr; - const __be64 *swinvp; struct iommu_table *tbl; unsigned int tce_table_size, end; int64_t rc; @@ -2024,23 +2039,16 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, goto fail; } + pnv_pci_ioda2_tce_invalidate_entire(pe); + /* Setup linux iommu table */ pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0, IOMMU_PAGE_SHIFT_4K); /* OPAL variant of PHB3 invalidated TCEs */ - swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); - if (swinvp) { - /* We need a couple more fields -- an address and a data - * to or. Since the bus is only printed out on table free - * errors, and on the first pass the data will be a relative - * bus number, print that out instead. - */ - pe->tce_inval_reg_phys = be64_to_cpup(swinvp); - tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, - 8); + if (phb->ioda.tce_inval_reg) tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); - } + tbl->it_ops = &pnv_ioda2_iommu_ops; iommu_init_table(tbl, phb->hose->node); #ifdef CONFIG_IOMMU_API @@ -2096,6 +2104,8 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) pr_info("PCI: %d PE# for a total weight of %d\n", phb->ioda.dma_pe_count, phb->ioda.dma_weight); + pnv_pci_ioda_setup_opal_tce_kill(phb); + /* Walk our PE list and configure their DMA segments, hand them * out one base segment plus any residual segments based on * weight diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index d37cb4da409f..792a723c36ec 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -58,7 +58,6 @@ struct pnv_ioda_pe { int tce32_seg; int tce32_segcount; struct iommu_table_group table_group; - phys_addr_t tce_inval_reg_phys; /* 64-bit TCE bypass region */ bool tce_bypass_enabled; @@ -184,6 +183,12 @@ struct pnv_phb { * boot for resource allocation purposes */ struct list_head pe_dma_list; + + /* TCE cache invalidate registers (physical and + * remapped) + */ + phys_addr_t tce_inval_reg_phys; + __be64 __iomem *tce_inval_reg; } ioda; }; -- cgit v1.2.3 From e57080f17da5d50f7b3b9d6c045f28632d71309d Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:13 +1000 Subject: powerpc/powernv/ioda2: Add TCE invalidation for all attached groups The iommu_table struct keeps a list of IOMMU groups it is used for. At the moment there is just a single group attached but further patches will add TCE table sharing. When sharing is enabled, TCE cache in each PE needs to be invalidated so does the patch. This does not change pnv_pci_ioda1_tce_invalidate() as there is no plan to enable TCE table sharing on PHBs older than IODA2. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 35 ++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 12 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 6bbb2bb35581..390863608569 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -1765,23 +1766,15 @@ static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe) __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg); } -static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, - unsigned long index, unsigned long npages, bool rm) +static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm, + __be64 __iomem *invalidate, unsigned shift, + unsigned long index, unsigned long npages) { - struct iommu_table_group_link *tgl = list_first_entry_or_null( - &tbl->it_group_list, struct iommu_table_group_link, - next); - struct pnv_ioda_pe *pe = container_of(tgl->table_group, - struct pnv_ioda_pe, table_group); unsigned long start, end, inc; - __be64 __iomem *invalidate = rm ? - (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys : - pe->phb->ioda.tce_inval_reg; - const unsigned shift = tbl->it_page_shift; /* We'll invalidate DMA address in PE scope */ start = 0x2ull << 60; - start |= (pe->pe_number & 0xFF); + start |= (pe_number & 0xFF); end = start; /* Figure out the start, end and step */ @@ -1799,6 +1792,24 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, } } +static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, + unsigned long index, unsigned long npages, bool rm) +{ + struct iommu_table_group_link *tgl; + + list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { + struct pnv_ioda_pe *pe = container_of(tgl->table_group, + struct pnv_ioda_pe, table_group); + __be64 __iomem *invalidate = rm ? + (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys : + pe->phb->ioda.tce_inval_reg; + + pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm, + invalidate, tbl->it_page_shift, + index, npages); + } +} + static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, -- cgit v1.2.3 From c5bb44edee19b2c19221a0b5a68add37ea5733c5 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:14 +1000 Subject: powerpc/powernv: Implement accessor to TCE entry This replaces direct accesses to TCE table with a helper which returns an TCE entry address. This does not make difference now but will when multi-level TCE tables get introduces. No change in behavior is expected. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 00f1abd34379..a07b83283ccc 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -572,38 +572,46 @@ struct pci_ops pnv_pci_ops = { .write = pnv_pci_write_config, }; +static __be64 *pnv_tce(struct iommu_table *tbl, long idx) +{ + __be64 *tmp = ((__be64 *)tbl->it_base); + + return tmp + idx; +} + int pnv_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs) { u64 proto_tce = iommu_direction_to_tce_perm(direction); - __be64 *tcep; - u64 rpn; - - tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; - rpn = __pa(uaddr) >> tbl->it_page_shift; + u64 rpn = __pa(uaddr) >> tbl->it_page_shift; + long i; - while (npages--) - *(tcep++) = cpu_to_be64(proto_tce | - (rpn++ << tbl->it_page_shift)); + for (i = 0; i < npages; i++) { + unsigned long newtce = proto_tce | + ((rpn + i) << tbl->it_page_shift); + unsigned long idx = index - tbl->it_offset + i; + *(pnv_tce(tbl, idx)) = cpu_to_be64(newtce); + } return 0; } void pnv_tce_free(struct iommu_table *tbl, long index, long npages) { - __be64 *tcep; + long i; - tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; + for (i = 0; i < npages; i++) { + unsigned long idx = index - tbl->it_offset + i; - while (npages--) - *(tcep++) = cpu_to_be64(0); + *(pnv_tce(tbl, idx)) = cpu_to_be64(0); + } } unsigned long pnv_tce_get(struct iommu_table *tbl, long index) { - return ((u64 *)tbl->it_base)[index - tbl->it_offset]; + return *(pnv_tce(tbl, index - tbl->it_offset)); } struct iommu_table *pnv_pci_table_alloc(int nid) -- cgit v1.2.3 From 05c6cfb9dce0d13d37e9d007ee6a4af36f1c0a58 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:15 +1000 Subject: powerpc/iommu/powernv: Release replaced TCE At the moment writing new TCE value to the IOMMU table fails with EBUSY if there is a valid entry already. However PAPR specification allows the guest to write new TCE value without clearing it first. Another problem this patch is addressing is the use of pool locks for external IOMMU users such as VFIO. The pool locks are to protect DMA page allocator rather than entries and since the host kernel does not control what pages are in use, there is no point in pool locks and exchange()+put_page(oldtce) is sufficient to avoid possible races. This adds an exchange() callback to iommu_table_ops which does the same thing as set() plus it returns replaced TCE and DMA direction so the caller can release the pages afterwards. The exchange() receives a physical address unlike set() which receives linear mapping address; and returns a physical address as the clear() does. This implements exchange() for P5IOC2/IODA/IODA2. This adds a requirement for a platform to have exchange() implemented in order to support VFIO. This replaces iommu_tce_build() and iommu_clear_tce() with a single iommu_tce_xchg(). This makes sure that TCE permission bits are not set in TCE passed to IOMMU API as those are to be calculated by platform code from DMA direction. This moves SetPageDirty() to the IOMMU code to make it work for both VFIO ioctl interface in in-kernel TCE acceleration (when it becomes available later). Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 22 ++++++++-- arch/powerpc/kernel/iommu.c | 59 +++++++++------------------ arch/powerpc/platforms/powernv/pci-ioda.c | 34 ++++++++++++++++ arch/powerpc/platforms/powernv/pci-p5ioc2.c | 3 ++ arch/powerpc/platforms/powernv/pci.c | 18 +++++++++ arch/powerpc/platforms/powernv/pci.h | 2 + drivers/vfio/vfio_iommu_spapr_tce.c | 63 +++++++++++++++++------------ 7 files changed, 132 insertions(+), 69 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 489133cf7c5e..4636734604d7 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -45,13 +45,29 @@ extern int iommu_is_off; extern int iommu_force_on; struct iommu_table_ops { + /* + * When called with direction==DMA_NONE, it is equal to clear(). + * uaddr is a linear map address. + */ int (*set)(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs); +#ifdef CONFIG_IOMMU_API + /* + * Exchanges existing TCE with new TCE plus direction bits; + * returns old TCE and DMA direction mask. + * @tce is a physical address. + */ + int (*exchange)(struct iommu_table *tbl, + long index, + unsigned long *hpa, + enum dma_data_direction *direction); +#endif void (*clear)(struct iommu_table *tbl, long index, long npages); + /* get() returns a physical address */ unsigned long (*get)(struct iommu_table *tbl, long index); void (*flush)(struct iommu_table *tbl); }; @@ -153,6 +169,8 @@ extern void iommu_register_group(struct iommu_table_group *table_group, extern int iommu_add_device(struct device *dev); extern void iommu_del_device(struct device *dev); extern int __init tce_iommu_bus_notifier_init(void); +extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, + unsigned long *hpa, enum dma_data_direction *direction); #else static inline void iommu_register_group(struct iommu_table_group *table_group, int pci_domain_number, @@ -225,10 +243,6 @@ extern int iommu_tce_clear_param_check(struct iommu_table *tbl, unsigned long npages); extern int iommu_tce_put_param_check(struct iommu_table *tbl, unsigned long ioba, unsigned long tce); -extern int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, - unsigned long hwaddr, enum dma_data_direction direction); -extern unsigned long iommu_clear_tce(struct iommu_table *tbl, - unsigned long entry); extern void iommu_flush_tce(struct iommu_table *tbl); extern int iommu_take_ownership(struct iommu_table *tbl); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 0fb88005c3c5..a8e3490b54e3 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -965,10 +965,7 @@ EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check); int iommu_tce_put_param_check(struct iommu_table *tbl, unsigned long ioba, unsigned long tce) { - if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ))) - return -EINVAL; - - if (tce & ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ)) + if (tce & ~IOMMU_PAGE_MASK(tbl)) return -EINVAL; if (ioba & ~IOMMU_PAGE_MASK(tbl)) @@ -985,44 +982,16 @@ int iommu_tce_put_param_check(struct iommu_table *tbl, } EXPORT_SYMBOL_GPL(iommu_tce_put_param_check); -unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry) +long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, + unsigned long *hpa, enum dma_data_direction *direction) { - unsigned long oldtce; - struct iommu_pool *pool = get_pool(tbl, entry); + long ret; - spin_lock(&(pool->lock)); + ret = tbl->it_ops->exchange(tbl, entry, hpa, direction); - oldtce = tbl->it_ops->get(tbl, entry); - if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)) - tbl->it_ops->clear(tbl, entry, 1); - else - oldtce = 0; - - spin_unlock(&(pool->lock)); - - return oldtce; -} -EXPORT_SYMBOL_GPL(iommu_clear_tce); - -/* - * hwaddr is a kernel virtual address here (0xc... bazillion), - * tce_build converts it to a physical address. - */ -int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, - unsigned long hwaddr, enum dma_data_direction direction) -{ - int ret = -EBUSY; - unsigned long oldtce; - struct iommu_pool *pool = get_pool(tbl, entry); - - spin_lock(&(pool->lock)); - - oldtce = tbl->it_ops->get(tbl, entry); - /* Add new entry if it is not busy */ - if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))) - ret = tbl->it_ops->set(tbl, entry, 1, hwaddr, direction, NULL); - - spin_unlock(&(pool->lock)); + if (!ret && ((*direction == DMA_FROM_DEVICE) || + (*direction == DMA_BIDIRECTIONAL))) + SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT)); /* if (unlikely(ret)) pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n", @@ -1031,13 +1000,23 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, return ret; } -EXPORT_SYMBOL_GPL(iommu_tce_build); +EXPORT_SYMBOL_GPL(iommu_tce_xchg); int iommu_take_ownership(struct iommu_table *tbl) { unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; int ret = 0; + /* + * VFIO does not control TCE entries allocation and the guest + * can write new TCEs on top of existing ones so iommu_tce_build() + * must be able to release old pages. This functionality + * requires exchange() callback defined so if it is not + * implemented, we disallow taking ownership over the table. + */ + if (!tbl->it_ops->exchange) + return -EINVAL; + spin_lock_irqsave(&tbl->large_pool.lock, flags); for (i = 0; i < tbl->nr_pools; i++) spin_lock(&tbl->pools[i].lock); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 390863608569..ecbc071a143e 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1738,6 +1738,20 @@ static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index, return ret; } +#ifdef CONFIG_IOMMU_API +static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction) +{ + long ret = pnv_tce_xchg(tbl, index, hpa, direction); + + if (!ret && (tbl->it_type & + (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE))) + pnv_pci_ioda1_tce_invalidate(tbl, index, 1, false); + + return ret; +} +#endif + static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index, long npages) { @@ -1749,6 +1763,9 @@ static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index, static struct iommu_table_ops pnv_ioda1_iommu_ops = { .set = pnv_ioda1_tce_build, +#ifdef CONFIG_IOMMU_API + .exchange = pnv_ioda1_tce_xchg, +#endif .clear = pnv_ioda1_tce_free, .get = pnv_tce_get, }; @@ -1824,6 +1841,20 @@ static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, return ret; } +#ifdef CONFIG_IOMMU_API +static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction) +{ + long ret = pnv_tce_xchg(tbl, index, hpa, direction); + + if (!ret && (tbl->it_type & + (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE))) + pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false); + + return ret; +} +#endif + static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, long npages) { @@ -1835,6 +1866,9 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, static struct iommu_table_ops pnv_ioda2_iommu_ops = { .set = pnv_ioda2_tce_build, +#ifdef CONFIG_IOMMU_API + .exchange = pnv_ioda2_tce_xchg, +#endif .clear = pnv_ioda2_tce_free, .get = pnv_tce_get, }; diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index f80e5a1d6117..eaec85b63b34 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -85,6 +85,9 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { } static struct iommu_table_ops pnv_p5ioc2_iommu_ops = { .set = pnv_tce_build, +#ifdef CONFIG_IOMMU_API + .exchange = pnv_tce_xchg, +#endif .clear = pnv_tce_free, .get = pnv_tce_get, }; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index a07b83283ccc..d465b9c32388 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -598,6 +598,24 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages, return 0; } +#ifdef CONFIG_IOMMU_API +int pnv_tce_xchg(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction) +{ + u64 proto_tce = iommu_direction_to_tce_perm(*direction); + unsigned long newtce = *hpa | proto_tce, oldtce; + unsigned long idx = index - tbl->it_offset; + + BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl)); + + oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce)); + *hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE); + *direction = iommu_tce_direction(oldtce); + + return 0; +} +#endif + void pnv_tce_free(struct iommu_table *tbl, long index, long npages) { long i; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 792a723c36ec..8ef2d28aded0 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -207,6 +207,8 @@ extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs); extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages); +extern int pnv_tce_xchg(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction); extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index); void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 9c720de46c33..a9e2d13c03c0 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -236,18 +236,11 @@ static void tce_iommu_release(void *iommu_data) } static void tce_iommu_unuse_page(struct tce_container *container, - unsigned long oldtce) + unsigned long hpa) { struct page *page; - if (!(oldtce & (TCE_PCI_READ | TCE_PCI_WRITE))) - return; - - page = pfn_to_page(oldtce >> PAGE_SHIFT); - - if (oldtce & TCE_PCI_WRITE) - SetPageDirty(page); - + page = pfn_to_page(hpa >> PAGE_SHIFT); put_page(page); } @@ -255,14 +248,21 @@ static int tce_iommu_clear(struct tce_container *container, struct iommu_table *tbl, unsigned long entry, unsigned long pages) { - unsigned long oldtce; + unsigned long oldhpa; + long ret; + enum dma_data_direction direction; for ( ; pages; --pages, ++entry) { - oldtce = iommu_clear_tce(tbl, entry); - if (!oldtce) + direction = DMA_NONE; + oldhpa = 0; + ret = iommu_tce_xchg(tbl, entry, &oldhpa, &direction); + if (ret) + continue; + + if (direction == DMA_NONE) continue; - tce_iommu_unuse_page(container, oldtce); + tce_iommu_unuse_page(container, oldhpa); } return 0; @@ -284,12 +284,13 @@ static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa) static long tce_iommu_build(struct tce_container *container, struct iommu_table *tbl, - unsigned long entry, unsigned long tce, unsigned long pages) + unsigned long entry, unsigned long tce, unsigned long pages, + enum dma_data_direction direction) { long i, ret = 0; struct page *page; unsigned long hpa; - enum dma_data_direction direction = iommu_tce_direction(tce); + enum dma_data_direction dirtmp; for (i = 0; i < pages; ++i) { unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; @@ -305,8 +306,8 @@ static long tce_iommu_build(struct tce_container *container, } hpa |= offset; - ret = iommu_tce_build(tbl, entry + i, (unsigned long) __va(hpa), - direction); + dirtmp = direction; + ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp); if (ret) { tce_iommu_unuse_page(container, hpa); pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n", @@ -314,6 +315,10 @@ static long tce_iommu_build(struct tce_container *container, tce, ret); break; } + + if (dirtmp != DMA_NONE) + tce_iommu_unuse_page(container, hpa); + tce += IOMMU_PAGE_SIZE(tbl); } @@ -378,8 +383,8 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_IOMMU_MAP_DMA: { struct vfio_iommu_type1_dma_map param; struct iommu_table *tbl = NULL; - unsigned long tce; long num; + enum dma_data_direction direction; if (!container->enabled) return -EPERM; @@ -405,19 +410,27 @@ static long tce_iommu_ioctl(void *iommu_data, return -EINVAL; /* iova is checked by the IOMMU API */ - tce = param.vaddr; - if (param.flags & VFIO_DMA_MAP_FLAG_READ) - tce |= TCE_PCI_READ; - if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) - tce |= TCE_PCI_WRITE; + if (param.flags & VFIO_DMA_MAP_FLAG_READ) { + if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) + direction = DMA_BIDIRECTIONAL; + else + direction = DMA_TO_DEVICE; + } else { + if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) + direction = DMA_FROM_DEVICE; + else + return -EINVAL; + } - ret = iommu_tce_put_param_check(tbl, param.iova, tce); + ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr); if (ret) return ret; ret = tce_iommu_build(container, tbl, param.iova >> tbl->it_page_shift, - tce, param.size >> tbl->it_page_shift); + param.vaddr, + param.size >> tbl->it_page_shift, + direction); iommu_flush_tce(tbl); -- cgit v1.2.3 From e5aad1e678746af663e3e3acc7f3501309997f51 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:16 +1000 Subject: powerpc/powernv/ioda2: Rework iommu_table creation This moves iommu_table creation to the beginning to make following changes easier to review. This starts using table parameters from the iommu_table struct. This should cause no behavioural change. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index ecbc071a143e..feaba5ecfa97 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2071,13 +2071,23 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, addr = page_address(tce_mem); memset(addr, 0, tce_table_size); + /* Setup linux iommu table */ + pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0, + IOMMU_PAGE_SHIFT_4K); + + tbl->it_ops = &pnv_ioda2_iommu_ops; + iommu_init_table(tbl, phb->hose->node); +#ifdef CONFIG_IOMMU_API + pe->table_group.ops = &pnv_pci_ioda2_ops; +#endif + /* * Map TCE table through TVT. The TVE index is the PE number * shifted by 1 bit for 32-bits DMA space. */ rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, - pe->pe_number << 1, 1, __pa(addr), - tce_table_size, 0x1000); + pe->pe_number << 1, 1, __pa(tbl->it_base), + tbl->it_size << 3, 1ULL << tbl->it_page_shift); if (rc) { pe_err(pe, "Failed to configure 32-bit TCE table," " err %ld\n", rc); @@ -2086,20 +2096,10 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, pnv_pci_ioda2_tce_invalidate_entire(pe); - /* Setup linux iommu table */ - pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0, - IOMMU_PAGE_SHIFT_4K); - /* OPAL variant of PHB3 invalidated TCEs */ if (phb->ioda.tce_inval_reg) tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); - tbl->it_ops = &pnv_ioda2_iommu_ops; - iommu_init_table(tbl, phb->hose->node); -#ifdef CONFIG_IOMMU_API - pe->table_group.ops = &pnv_pci_ioda2_ops; -#endif - if (pe->flags & PNV_IODA_PE_DEV) { /* * Setting table base here only for carrying iommu_group -- cgit v1.2.3 From aca6913f555176363cda21518ff79da7469ebd64 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:17 +1000 Subject: powerpc/powernv/ioda2: Introduce helpers to allocate TCE pages This is a part of moving TCE table allocation into an iommu_ops callback to support multiple IOMMU groups per one VFIO container. This moves the code which allocates the actual TCE tables to helpers: pnv_pci_ioda2_table_alloc_pages() and pnv_pci_ioda2_table_free_pages(). These do not allocate/free the iommu_table struct. This enforces window size to be a power of two. This should cause no behavioural change. Signed-off-by: Alexey Kardashevskiy Reviewed-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 83 +++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 20 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index feaba5ecfa97..e61610cccf6e 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -40,6 +40,7 @@ #include #include #include +#include #include @@ -49,6 +50,8 @@ /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) +static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl); + static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, const char *fmt, ...) { @@ -1313,8 +1316,8 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe iommu_group_put(pe->table_group.group); BUG_ON(pe->table_group.group); } + pnv_pci_ioda2_table_free_pages(tbl); iommu_free_table(tbl, of_node_full_name(dev->dev.of_node)); - free_pages(addr, get_order(TCE32_TABLE_SIZE)); } static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs) @@ -2033,13 +2036,62 @@ static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb) phb->ioda.tce_inval_reg = ioremap(phb->ioda.tce_inval_reg_phys, 8); } -static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, - struct pnv_ioda_pe *pe) +static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift) { struct page *tce_mem = NULL; + __be64 *addr; + unsigned order = max_t(unsigned, shift, PAGE_SHIFT) - PAGE_SHIFT; + + tce_mem = alloc_pages_node(nid, GFP_KERNEL, order); + if (!tce_mem) { + pr_err("Failed to allocate a TCE memory, order=%d\n", order); + return NULL; + } + addr = page_address(tce_mem); + memset(addr, 0, 1UL << (order + PAGE_SHIFT)); + + return addr; +} + +static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, + __u32 page_shift, __u64 window_size, struct iommu_table *tbl) +{ void *addr; + const unsigned window_shift = ilog2(window_size); + unsigned entries_shift = window_shift - page_shift; + unsigned table_shift = max_t(unsigned, entries_shift + 3, PAGE_SHIFT); + const unsigned long tce_table_size = 1UL << table_shift; + + if ((window_size > memory_hotplug_max()) || !is_power_of_2(window_size)) + return -EINVAL; + + /* Allocate TCE table */ + addr = pnv_pci_ioda2_table_do_alloc_pages(nid, table_shift); + if (!addr) + return -ENOMEM; + + /* Setup linux iommu table */ + pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset, + page_shift); + + pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n", + window_size, tce_table_size, bus_offset); + + return 0; +} + +static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl) +{ + if (!tbl->it_size) + return; + + free_pages(tbl->it_base, get_order(tbl->it_size << 3)); +} + +static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, + struct pnv_ioda_pe *pe) +{ struct iommu_table *tbl; - unsigned int tce_table_size, end; int64_t rc; /* We shouldn't already have a 32-bit DMA associated */ @@ -2056,24 +2108,16 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, /* The PE will reserve all possible 32-bits space */ pe->tce32_seg = 0; - end = (1 << ilog2(phb->ioda.m32_pci_base)); - tce_table_size = (end / 0x1000) * 8; pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n", - end); + phb->ioda.m32_pci_base); - /* Allocate TCE table */ - tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, - get_order(tce_table_size)); - if (!tce_mem) { - pe_err(pe, "Failed to allocate a 32-bit TCE memory\n"); + /* Setup linux iommu table */ + rc = pnv_pci_ioda2_table_alloc_pages(pe->phb->hose->node, + 0, IOMMU_PAGE_SHIFT_4K, phb->ioda.m32_pci_base, tbl); + if (rc) { + pe_err(pe, "Failed to create 32-bit TCE table, err %ld", rc); goto fail; } - addr = page_address(tce_mem); - memset(addr, 0, tce_table_size); - - /* Setup linux iommu table */ - pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0, - IOMMU_PAGE_SHIFT_4K); tbl->it_ops = &pnv_ioda2_iommu_ops; iommu_init_table(tbl, phb->hose->node); @@ -2119,9 +2163,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, fail: if (pe->tce32_seg >= 0) pe->tce32_seg = -1; - if (tce_mem) - __free_pages(tce_mem, get_order(tce_table_size)); if (tbl) { + pnv_pci_ioda2_table_free_pages(tbl); pnv_pci_unlink_table_and_group(tbl, &pe->table_group); iommu_free_table(tbl, "pnv"); } -- cgit v1.2.3 From 43cb60ab7f8c91b9bf5988edc318dee99ec93b9b Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:18 +1000 Subject: powerpc/powernv/ioda2: Introduce pnv_pci_ioda2_set_window This is a part of moving DMA window programming to an iommu_ops callback. pnv_pci_ioda2_set_window() takes an iommu_table_group as a first parameter (not pnv_ioda_pe) as it is going to be used as a callback for VFIO DDW code. This should cause no behavioural change. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 47 +++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 9 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index e61610cccf6e..552f6ac52560 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1970,6 +1970,43 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, } } +static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, + int num, struct iommu_table *tbl) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + struct pnv_phb *phb = pe->phb; + int64_t rc; + const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; + const __u64 win_size = tbl->it_size << tbl->it_page_shift; + + pe_info(pe, "Setting up window %llx..%llx pg=%x\n", + start_addr, start_addr + win_size - 1, + IOMMU_PAGE_SIZE(tbl)); + + /* + * Map TCE table through TVT. The TVE index is the PE number + * shifted by 1 bit for 32-bits DMA space. + */ + rc = opal_pci_map_pe_dma_window(phb->opal_id, + pe->pe_number, + pe->pe_number << 1, + 1, + __pa(tbl->it_base), + tbl->it_size << 3, + IOMMU_PAGE_SIZE(tbl)); + if (rc) { + pe_err(pe, "Failed to configure TCE table, err %ld\n", rc); + return rc; + } + + pnv_pci_link_table_and_group(phb->hose->node, num, + tbl, &pe->table_group); + pnv_pci_ioda2_tce_invalidate_entire(pe); + + return 0; +} + static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) { uint16_t window_id = (pe->pe_number << 1 ) + 1; @@ -2125,21 +2162,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, pe->table_group.ops = &pnv_pci_ioda2_ops; #endif - /* - * Map TCE table through TVT. The TVE index is the PE number - * shifted by 1 bit for 32-bits DMA space. - */ - rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, - pe->pe_number << 1, 1, __pa(tbl->it_base), - tbl->it_size << 3, 1ULL << tbl->it_page_shift); + rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl); if (rc) { pe_err(pe, "Failed to configure 32-bit TCE table," " err %ld\n", rc); goto fail; } - pnv_pci_ioda2_tce_invalidate_entire(pe); - /* OPAL variant of PHB3 invalidated TCEs */ if (phb->ioda.tce_inval_reg) tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); -- cgit v1.2.3 From bbb845c4bac88d8feffa8945dd28b50849984e30 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:19 +1000 Subject: powerpc/powernv: Implement multilevel TCE tables TCE tables might get too big in case of 4K IOMMU pages and DDW enabled on huge guests (hundreds of GB of RAM) so the kernel might be unable to allocate contiguous chunk of physical memory to store the TCE table. To address this, POWER8 CPU (actually, IODA2) supports multi-level TCE tables, up to 5 levels which splits the table into a tree of smaller subtables. This adds multi-level TCE tables support to pnv_pci_ioda2_table_alloc_pages() and pnv_pci_ioda2_table_free_pages() helpers. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 2 + arch/powerpc/platforms/powernv/pci-ioda.c | 105 +++++++++++++++++++++++++++--- arch/powerpc/platforms/powernv/pci.c | 13 ++++ 3 files changed, 111 insertions(+), 9 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 4636734604d7..706cfc0b1190 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -96,6 +96,8 @@ struct iommu_pool { struct iommu_table { unsigned long it_busno; /* Bus number this table belongs to */ unsigned long it_size; /* Size of iommu table in entries */ + unsigned long it_indirect_levels; + unsigned long it_level_size; unsigned long it_offset; /* Offset into global table */ unsigned long it_base; /* mapped address of tce table */ unsigned long it_index; /* which iommu table this is */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 552f6ac52560..21d8c61f0b03 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -50,6 +50,9 @@ /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) +#define POWERNV_IOMMU_DEFAULT_LEVELS 1 +#define POWERNV_IOMMU_MAX_LEVELS 5 + static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl); static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, @@ -1977,6 +1980,8 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, table_group); struct pnv_phb *phb = pe->phb; int64_t rc; + const unsigned long size = tbl->it_indirect_levels ? + tbl->it_level_size : tbl->it_size; const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; const __u64 win_size = tbl->it_size << tbl->it_page_shift; @@ -1991,9 +1996,9 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, pe->pe_number << 1, - 1, + tbl->it_indirect_levels + 1, __pa(tbl->it_base), - tbl->it_size << 3, + size << 3, IOMMU_PAGE_SIZE(tbl)); if (rc) { pe_err(pe, "Failed to configure TCE table, err %ld\n", rc); @@ -2073,11 +2078,16 @@ static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb) phb->ioda.tce_inval_reg = ioremap(phb->ioda.tce_inval_reg_phys, 8); } -static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift) +static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift, + unsigned levels, unsigned long limit, + unsigned long *current_offset) { struct page *tce_mem = NULL; - __be64 *addr; + __be64 *addr, *tmp; unsigned order = max_t(unsigned, shift, PAGE_SHIFT) - PAGE_SHIFT; + unsigned long allocated = 1UL << (order + PAGE_SHIFT); + unsigned entries = 1UL << (shift - 3); + long i; tce_mem = alloc_pages_node(nid, GFP_KERNEL, order); if (!tce_mem) { @@ -2085,31 +2095,79 @@ static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift) return NULL; } addr = page_address(tce_mem); - memset(addr, 0, 1UL << (order + PAGE_SHIFT)); + memset(addr, 0, allocated); + + --levels; + if (!levels) { + *current_offset += allocated; + return addr; + } + + for (i = 0; i < entries; ++i) { + tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift, + levels, limit, current_offset); + if (!tmp) + break; + + addr[i] = cpu_to_be64(__pa(tmp) | + TCE_PCI_READ | TCE_PCI_WRITE); + + if (*current_offset >= limit) + break; + } return addr; } +static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, + unsigned long size, unsigned level); + static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, - __u32 page_shift, __u64 window_size, struct iommu_table *tbl) + __u32 page_shift, __u64 window_size, __u32 levels, + struct iommu_table *tbl) { void *addr; + unsigned long offset = 0, level_shift; const unsigned window_shift = ilog2(window_size); unsigned entries_shift = window_shift - page_shift; unsigned table_shift = max_t(unsigned, entries_shift + 3, PAGE_SHIFT); const unsigned long tce_table_size = 1UL << table_shift; + if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS)) + return -EINVAL; + if ((window_size > memory_hotplug_max()) || !is_power_of_2(window_size)) return -EINVAL; + /* Adjust direct table size from window_size and levels */ + entries_shift = (entries_shift + levels - 1) / levels; + level_shift = entries_shift + 3; + level_shift = max_t(unsigned, level_shift, PAGE_SHIFT); + /* Allocate TCE table */ - addr = pnv_pci_ioda2_table_do_alloc_pages(nid, table_shift); + addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, + levels, tce_table_size, &offset); + + /* addr==NULL means that the first level allocation failed */ if (!addr) return -ENOMEM; + /* + * First level was allocated but some lower level failed as + * we did not allocate as much as we wanted, + * release partially allocated table. + */ + if (offset < tce_table_size) { + pnv_pci_ioda2_table_do_free_pages(addr, + 1ULL << (level_shift - 3), levels - 1); + return -ENOMEM; + } + /* Setup linux iommu table */ pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset, page_shift); + tbl->it_level_size = 1ULL << (level_shift - 3); + tbl->it_indirect_levels = levels - 1; pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n", window_size, tce_table_size, bus_offset); @@ -2117,12 +2175,40 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, return 0; } +static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, + unsigned long size, unsigned level) +{ + const unsigned long addr_ul = (unsigned long) addr & + ~(TCE_PCI_READ | TCE_PCI_WRITE); + + if (level) { + long i; + u64 *tmp = (u64 *) addr_ul; + + for (i = 0; i < size; ++i) { + unsigned long hpa = be64_to_cpu(tmp[i]); + + if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE))) + continue; + + pnv_pci_ioda2_table_do_free_pages(__va(hpa), size, + level - 1); + } + } + + free_pages(addr_ul, get_order(size << 3)); +} + static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl) { + const unsigned long size = tbl->it_indirect_levels ? + tbl->it_level_size : tbl->it_size; + if (!tbl->it_size) return; - free_pages(tbl->it_base, get_order(tbl->it_size << 3)); + pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size, + tbl->it_indirect_levels); } static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, @@ -2150,7 +2236,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, /* Setup linux iommu table */ rc = pnv_pci_ioda2_table_alloc_pages(pe->phb->hose->node, - 0, IOMMU_PAGE_SHIFT_4K, phb->ioda.m32_pci_base, tbl); + 0, IOMMU_PAGE_SHIFT_4K, phb->ioda.m32_pci_base, + POWERNV_IOMMU_DEFAULT_LEVELS, tbl); if (rc) { pe_err(pe, "Failed to create 32-bit TCE table, err %ld", rc); goto fail; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index d465b9c32388..765d8ed558d0 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -575,6 +575,19 @@ struct pci_ops pnv_pci_ops = { static __be64 *pnv_tce(struct iommu_table *tbl, long idx) { __be64 *tmp = ((__be64 *)tbl->it_base); + int level = tbl->it_indirect_levels; + const long shift = ilog2(tbl->it_level_size); + unsigned long mask = (tbl->it_level_size - 1) << (level * shift); + + while (level) { + int n = (idx & mask) >> (level * shift); + unsigned long tce = be64_to_cpu(tmp[n]); + + tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE)); + idx &= ~mask; + mask >>= shift; + --level; + } return tmp + idx; } -- cgit v1.2.3 From 4793d65d1ac056d92b594d05c6aab3c040d913dd Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:20 +1000 Subject: vfio: powerpc/spapr: powerpc/powernv/ioda: Define and implement DMA windows API This extends iommu_table_group_ops by a set of callbacks to support dynamic DMA windows management. create_table() creates a TCE table with specific parameters. it receives iommu_table_group to know nodeid in order to allocate TCE table memory closer to the PHB. The exact format of allocated multi-level table might be also specific to the PHB model (not the case now though). This callback calculated the DMA window offset on a PCI bus from @num and stores it in a just created table. set_window() sets the window at specified TVT index + @num on PHB. unset_window() unsets the window from specified TVT. This adds a free() callback to iommu_table_ops to free the memory (potentially a tree of tables) allocated for the TCE table. create_table() and free() are supposed to be called once per VFIO container and set_window()/unset_window() are supposed to be called for every group in a container. This adds IOMMU capabilities to iommu_table_group such as default 32bit window parameters and others. This makes use of new values in vfio_iommu_spapr_tce. IODA1/P5IOC2 do not support DDW so they do not advertise pagemasks to the userspace. Signed-off-by: Alexey Kardashevskiy Acked-by: Alex Williamson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 19 ++++++ arch/powerpc/platforms/powernv/pci-ioda.c | 96 ++++++++++++++++++++++++++--- arch/powerpc/platforms/powernv/pci-p5ioc2.c | 7 ++- drivers/vfio/vfio_iommu_spapr_tce.c | 19 +++--- 4 files changed, 124 insertions(+), 17 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 706cfc0b1190..e5541759a37d 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -70,6 +70,7 @@ struct iommu_table_ops { /* get() returns a physical address */ unsigned long (*get)(struct iommu_table *tbl, long index); void (*flush)(struct iommu_table *tbl); + void (*free)(struct iommu_table *tbl); }; /* These are used by VIO */ @@ -146,6 +147,17 @@ extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, struct iommu_table_group; struct iommu_table_group_ops { + long (*create_table)(struct iommu_table_group *table_group, + int num, + __u32 page_shift, + __u64 window_size, + __u32 levels, + struct iommu_table **ptbl); + long (*set_window)(struct iommu_table_group *table_group, + int num, + struct iommu_table *tblnew); + long (*unset_window)(struct iommu_table_group *table_group, + int num); /* Switch ownership from platform code to external user (e.g. VFIO) */ void (*take_ownership)(struct iommu_table_group *table_group); /* Switch ownership from external user (e.g. VFIO) back to core */ @@ -159,6 +171,13 @@ struct iommu_table_group_link { }; struct iommu_table_group { + /* IOMMU properties */ + __u32 tce32_start; + __u32 tce32_size; + __u64 pgsizes; /* Bitmap of supported page sizes */ + __u32 max_dynamic_windows_supported; + __u32 max_levels; + struct iommu_group *group; struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; struct iommu_table_group_ops *ops; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 21d8c61f0b03..ee3e643c16bb 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -1870,6 +1871,12 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false); } +static void pnv_ioda2_table_free(struct iommu_table *tbl) +{ + pnv_pci_ioda2_table_free_pages(tbl); + iommu_free_table(tbl, "pnv"); +} + static struct iommu_table_ops pnv_ioda2_iommu_ops = { .set = pnv_ioda2_tce_build, #ifdef CONFIG_IOMMU_API @@ -1877,6 +1884,7 @@ static struct iommu_table_ops pnv_ioda2_iommu_ops = { #endif .clear = pnv_ioda2_tce_free, .get = pnv_tce_get, + .free = pnv_ioda2_table_free, }; static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, @@ -1947,6 +1955,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, TCE_PCI_SWINV_PAIR); tbl->it_ops = &pnv_ioda1_iommu_ops; + pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift; + pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift; iommu_init_table(tbl, phb->hose->node); if (pe->flags & PNV_IODA_PE_DEV) { @@ -1985,7 +1995,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; const __u64 win_size = tbl->it_size << tbl->it_page_shift; - pe_info(pe, "Setting up window %llx..%llx pg=%x\n", + pe_info(pe, "Setting up window#%d %llx..%llx pg=%x\n", num, start_addr, start_addr + win_size - 1, IOMMU_PAGE_SIZE(tbl)); @@ -1995,7 +2005,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, */ rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, - pe->pe_number << 1, + (pe->pe_number << 1) + num, tbl->it_indirect_levels + 1, __pa(tbl->it_base), size << 3, @@ -2040,7 +2050,67 @@ static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) pe->tce_bypass_enabled = enable; } +static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, + __u32 page_shift, __u64 window_size, __u32 levels, + struct iommu_table *tbl); + +static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, + int num, __u32 page_shift, __u64 window_size, __u32 levels, + struct iommu_table **ptbl) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + int nid = pe->phb->hose->node; + __u64 bus_offset = num ? pe->tce_bypass_base : table_group->tce32_start; + long ret; + struct iommu_table *tbl; + + tbl = pnv_pci_table_alloc(nid); + if (!tbl) + return -ENOMEM; + + ret = pnv_pci_ioda2_table_alloc_pages(nid, + bus_offset, page_shift, window_size, + levels, tbl); + if (ret) { + iommu_free_table(tbl, "pnv"); + return ret; + } + + tbl->it_ops = &pnv_ioda2_iommu_ops; + if (pe->phb->ioda.tce_inval_reg) + tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); + + *ptbl = tbl; + + return 0; +} + #ifdef CONFIG_IOMMU_API +static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, + int num) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + struct pnv_phb *phb = pe->phb; + long ret; + + pe_info(pe, "Removing DMA window #%d\n", num); + + ret = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, + (pe->pe_number << 1) + num, + 0/* levels */, 0/* table address */, + 0/* table size */, 0/* page size */); + if (ret) + pe_warn(pe, "Unmapping failed, ret = %ld\n", ret); + else + pnv_pci_ioda2_tce_invalidate_entire(pe); + + pnv_pci_unlink_table_and_group(table_group->tables[num], table_group); + + return ret; +} + static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) { struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, @@ -2060,6 +2130,9 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) } static struct iommu_table_group_ops pnv_pci_ioda2_ops = { + .create_table = pnv_pci_ioda2_create_table, + .set_window = pnv_pci_ioda2_set_window, + .unset_window = pnv_pci_ioda2_unset_window, .take_ownership = pnv_ioda2_take_ownership, .release_ownership = pnv_ioda2_release_ownership, }; @@ -2214,7 +2287,7 @@ static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl) static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { - struct iommu_table *tbl; + struct iommu_table *tbl = NULL; int64_t rc; /* We shouldn't already have a 32-bit DMA associated */ @@ -2224,10 +2297,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, /* TVE #1 is selected by PCI address bit 59 */ pe->tce_bypass_base = 1ull << 59; - tbl = pnv_pci_table_alloc(phb->hose->node); iommu_register_group(&pe->table_group, phb->hose->global_number, pe->pe_number); - pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group); /* The PE will reserve all possible 32-bits space */ pe->tce32_seg = 0; @@ -2235,13 +2306,22 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, phb->ioda.m32_pci_base); /* Setup linux iommu table */ - rc = pnv_pci_ioda2_table_alloc_pages(pe->phb->hose->node, - 0, IOMMU_PAGE_SHIFT_4K, phb->ioda.m32_pci_base, - POWERNV_IOMMU_DEFAULT_LEVELS, tbl); + pe->table_group.tce32_start = 0; + pe->table_group.tce32_size = phb->ioda.m32_pci_base; + pe->table_group.max_dynamic_windows_supported = + IOMMU_TABLE_GROUP_MAX_TABLES; + pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS; + pe->table_group.pgsizes = SZ_4K | SZ_64K | SZ_16M; + + rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, + IOMMU_PAGE_SHIFT_4K, + pe->table_group.tce32_size, + POWERNV_IOMMU_DEFAULT_LEVELS, &tbl); if (rc) { pe_err(pe, "Failed to create 32-bit TCE table, err %ld", rc); goto fail; } + pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group); tbl->it_ops = &pnv_ioda2_iommu_ops; iommu_init_table(tbl, phb->hose->node); diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index eaec85b63b34..f2bdfea3b68d 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -127,6 +127,8 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, u64 phb_id; int64_t rc; static int primary = 1; + struct iommu_table_group *table_group; + struct iommu_table *tbl; pr_info(" Initializing p5ioc2 PHB %s\n", np->full_name); @@ -201,7 +203,10 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, * hotplug or SRIOV on P5IOC2 and therefore iommu_free_table() * should not be called for phb->p5ioc2.table_group.tables[0] ever. */ - phb->p5ioc2.table_group.tables[0] = &phb->p5ioc2.iommu_table; + tbl = phb->p5ioc2.table_group.tables[0] = &phb->p5ioc2.iommu_table; + table_group = &phb->p5ioc2.table_group; + table_group->tce32_start = tbl->it_offset << tbl->it_page_shift; + table_group->tce32_size = tbl->it_size << tbl->it_page_shift; } void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index a9e2d13c03c0..6d919eb4251f 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -135,7 +135,6 @@ static int tce_iommu_enable(struct tce_container *container) { int ret = 0; unsigned long locked; - struct iommu_table *tbl; struct iommu_table_group *table_group; if (!container->grp) @@ -171,13 +170,19 @@ static int tce_iommu_enable(struct tce_container *container) * this is that we cannot tell here the amount of RAM used by the guest * as this information is only available from KVM and VFIO is * KVM agnostic. + * + * So we do not allow enabling a container without a group attached + * as there is no way to know how much we should increment + * the locked_vm counter. */ table_group = iommu_group_get_iommudata(container->grp); if (!table_group) return -ENODEV; - tbl = table_group->tables[0]; - locked = (tbl->it_size << tbl->it_page_shift) >> PAGE_SHIFT; + if (!table_group->tce32_size) + return -EPERM; + + locked = table_group->tce32_size >> PAGE_SHIFT; ret = try_increment_locked_vm(locked); if (ret) return ret; @@ -350,7 +355,6 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { struct vfio_iommu_spapr_tce_info info; - struct iommu_table *tbl; struct iommu_table_group *table_group; if (WARN_ON(!container->grp)) @@ -358,8 +362,7 @@ static long tce_iommu_ioctl(void *iommu_data, table_group = iommu_group_get_iommudata(container->grp); - tbl = table_group->tables[0]; - if (WARN_ON_ONCE(!tbl)) + if (!table_group) return -ENXIO; minsz = offsetofend(struct vfio_iommu_spapr_tce_info, @@ -371,8 +374,8 @@ static long tce_iommu_ioctl(void *iommu_data, if (info.argsz < minsz) return -EINVAL; - info.dma32_window_start = tbl->it_offset << tbl->it_page_shift; - info.dma32_window_size = tbl->it_size << tbl->it_page_shift; + info.dma32_window_start = table_group->tce32_start; + info.dma32_window_size = table_group->tce32_size; info.flags = 0; if (copy_to_user((void __user *)arg, &info, minsz)) -- cgit v1.2.3 From c035e37b58c75ca216bfd1d5de3c1080ac0022b9 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:21 +1000 Subject: powerpc/powernv/ioda2: Use new helpers to do proper cleanup on PE release The existing code programmed TVT#0 with some address and then immediately released that memory. This makes use of pnv_pci_ioda2_unset_window() and pnv_pci_ioda2_set_bypass() which do correct resource release and TVT update. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index ee3e643c16bb..510bb9c8a9e1 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1288,34 +1288,21 @@ m64_failed: return -EBUSY; } +static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, + int num); +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); + static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe) { - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; struct iommu_table *tbl; - unsigned long addr; int64_t rc; - bus = dev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; tbl = pe->table_group.tables[0]; - addr = tbl->it_base; - - opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, - pe->pe_number << 1, 1, __pa(addr), - 0, 0x1000); - - rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, - pe->pe_number, - (pe->pe_number << 1) + 1, - pe->tce_bypass_base, - 0); + rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); if (rc) pe_warn(pe, "OPAL error %ld release DMA window\n", rc); - pnv_pci_unlink_table_and_group(tbl, &pe->table_group); + pnv_pci_ioda2_set_bypass(pe, false); if (pe->table_group.group) { iommu_group_put(pe->table_group.group); BUG_ON(pe->table_group.group); -- cgit v1.2.3 From 0054719386d96984153ad31d714a8be4ec7eba80 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:22 +1000 Subject: powerpc/iommu/ioda2: Add get_table_size() to calculate the size of future table This adds a way for the IOMMU user to know how much a new table will use so it can be accounted in the locked_vm limit before allocation happens. This stores the allocated table size in pnv_pci_ioda2_get_table_size() so the locked_vm counter can be updated correctly when a table is being disposed. This defines an iommu_table_group_ops callback to let VFIO know how much memory will be locked if a table is created. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 5 +++++ arch/powerpc/platforms/powernv/pci-ioda.c | 34 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index e5541759a37d..9d3749287689 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -99,6 +99,7 @@ struct iommu_table { unsigned long it_size; /* Size of iommu table in entries */ unsigned long it_indirect_levels; unsigned long it_level_size; + unsigned long it_allocated_size; unsigned long it_offset; /* Offset into global table */ unsigned long it_base; /* mapped address of tce table */ unsigned long it_index; /* which iommu table this is */ @@ -147,6 +148,10 @@ extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, struct iommu_table_group; struct iommu_table_group_ops { + unsigned long (*get_table_size)( + __u32 page_shift, + __u64 window_size, + __u32 levels); long (*create_table)(struct iommu_table_group *table_group, int num, __u32 page_shift, diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 510bb9c8a9e1..a7e098dba23d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2074,6 +2074,38 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, } #ifdef CONFIG_IOMMU_API +static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, + __u64 window_size, __u32 levels) +{ + unsigned long bytes = 0; + const unsigned window_shift = ilog2(window_size); + unsigned entries_shift = window_shift - page_shift; + unsigned table_shift = entries_shift + 3; + unsigned long tce_table_size = max(0x1000UL, 1UL << table_shift); + unsigned long direct_table_size; + + if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) || + (window_size > memory_hotplug_max()) || + !is_power_of_2(window_size)) + return 0; + + /* Calculate a direct table size from window_size and levels */ + entries_shift = (entries_shift + levels - 1) / levels; + table_shift = entries_shift + 3; + table_shift = max_t(unsigned, table_shift, PAGE_SHIFT); + direct_table_size = 1UL << table_shift; + + for ( ; levels; --levels) { + bytes += _ALIGN_UP(tce_table_size, direct_table_size); + + tce_table_size /= direct_table_size; + tce_table_size <<= 3; + tce_table_size = _ALIGN_UP(tce_table_size, direct_table_size); + } + + return bytes; +} + static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, int num) { @@ -2117,6 +2149,7 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) } static struct iommu_table_group_ops pnv_pci_ioda2_ops = { + .get_table_size = pnv_pci_ioda2_get_table_size, .create_table = pnv_pci_ioda2_create_table, .set_window = pnv_pci_ioda2_set_window, .unset_window = pnv_pci_ioda2_unset_window, @@ -2228,6 +2261,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, page_shift); tbl->it_level_size = 1ULL << (level_shift - 3); tbl->it_indirect_levels = levels - 1; + tbl->it_allocated_size = offset; pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n", window_size, tce_table_size, bus_offset); -- cgit v1.2.3 From 46d3e1e16294c587a74093b1f5474c1b33b72381 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:23 +1000 Subject: vfio: powerpc/spapr: powerpc/powernv/ioda2: Use DMA windows API in ownership control Before the IOMMU user (VFIO) would take control over the IOMMU table belonging to a specific IOMMU group. This approach did not allow sharing tables between IOMMU groups attached to the same container. This introduces a new IOMMU ownership flavour when the user can not just control the existing IOMMU table but remove/create tables on demand. If an IOMMU implements take/release_ownership() callbacks, this lets the user have full control over the IOMMU group. When the ownership is taken, the platform code removes all the windows so the caller must create them. Before returning the ownership back to the platform code, VFIO unprograms and removes all the tables it created. This changes IODA2's onwership handler to remove the existing table rather than manipulating with the existing one. From now on, iommu_take_ownership() and iommu_release_ownership() are only called from the vfio_iommu_spapr_tce driver. Old-style ownership is still supported allowing VFIO to run on older P5IOC2 and IODA IO controllers. No change in userspace-visible behaviour is expected. Since it recreates TCE tables on each ownership change, related kernel traces will appear more often. This adds a pnv_pci_ioda2_setup_default_config() which is called when PE is being configured at boot time and when the ownership is passed from VFIO to the platform code. Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 101 ++++++++++++++++-------------- drivers/vfio/vfio_iommu_spapr_tce.c | 88 +++++++++++++++++++++++++- 2 files changed, 141 insertions(+), 48 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index a7e098dba23d..b9f0f430e249 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2073,6 +2073,49 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, return 0; } +static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) +{ + struct iommu_table *tbl = NULL; + long rc; + + rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, + IOMMU_PAGE_SHIFT_4K, + pe->table_group.tce32_size, + POWERNV_IOMMU_DEFAULT_LEVELS, &tbl); + if (rc) { + pe_err(pe, "Failed to create 32-bit TCE table, err %ld", + rc); + return rc; + } + + iommu_init_table(tbl, pe->phb->hose->node); + + rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl); + if (rc) { + pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", + rc); + pnv_ioda2_table_free(tbl); + return rc; + } + + if (!pnv_iommu_bypass_disabled) + pnv_pci_ioda2_set_bypass(pe, true); + + /* OPAL variant of PHB3 invalidated TCEs */ + if (pe->phb->ioda.tce_inval_reg) + tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); + + /* + * Setting table base here only for carrying iommu_group + * further down to let iommu_add_device() do the job. + * pnv_pci_ioda_dma_dev_setup will override it later anyway. + */ + if (pe->flags & PNV_IODA_PE_DEV) + set_iommu_table_base(&pe->pdev->dev, tbl); + + return 0; +} + #ifdef CONFIG_IOMMU_API static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, __u64 window_size, __u32 levels) @@ -2134,9 +2177,12 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) { struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, table_group); + /* Store @tbl as pnv_pci_ioda2_unset_window() resets it */ + struct iommu_table *tbl = pe->table_group.tables[0]; - iommu_take_ownership(table_group->tables[0]); pnv_pci_ioda2_set_bypass(pe, false); + pnv_pci_ioda2_unset_window(&pe->table_group, 0); + pnv_ioda2_table_free(tbl); } static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) @@ -2144,8 +2190,7 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, table_group); - iommu_release_ownership(table_group->tables[0]); - pnv_pci_ioda2_set_bypass(pe, true); + pnv_pci_ioda2_setup_default_config(pe); } static struct iommu_table_group_ops pnv_pci_ioda2_ops = { @@ -2308,7 +2353,6 @@ static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl) static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { - struct iommu_table *tbl = NULL; int64_t rc; /* We shouldn't already have a 32-bit DMA associated */ @@ -2333,58 +2377,21 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, IOMMU_TABLE_GROUP_MAX_TABLES; pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS; pe->table_group.pgsizes = SZ_4K | SZ_64K | SZ_16M; - - rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, - IOMMU_PAGE_SHIFT_4K, - pe->table_group.tce32_size, - POWERNV_IOMMU_DEFAULT_LEVELS, &tbl); - if (rc) { - pe_err(pe, "Failed to create 32-bit TCE table, err %ld", rc); - goto fail; - } - pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group); - - tbl->it_ops = &pnv_ioda2_iommu_ops; - iommu_init_table(tbl, phb->hose->node); #ifdef CONFIG_IOMMU_API pe->table_group.ops = &pnv_pci_ioda2_ops; #endif - rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl); + rc = pnv_pci_ioda2_setup_default_config(pe); if (rc) { - pe_err(pe, "Failed to configure 32-bit TCE table," - " err %ld\n", rc); - goto fail; + if (pe->tce32_seg >= 0) + pe->tce32_seg = -1; + return; } - /* OPAL variant of PHB3 invalidated TCEs */ - if (phb->ioda.tce_inval_reg) - tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); - - if (pe->flags & PNV_IODA_PE_DEV) { - /* - * Setting table base here only for carrying iommu_group - * further down to let iommu_add_device() do the job. - * pnv_pci_ioda_dma_dev_setup will override it later anyway. - */ - set_iommu_table_base(&pe->pdev->dev, tbl); + if (pe->flags & PNV_IODA_PE_DEV) iommu_add_device(&pe->pdev->dev); - } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) + else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) pnv_ioda_setup_bus_dma(pe, pe->pbus); - - /* Also create a bypass window */ - if (!pnv_iommu_bypass_disabled) - pnv_pci_ioda2_set_bypass(pe, true); - - return; -fail: - if (pe->tce32_seg >= 0) - pe->tce32_seg = -1; - if (tbl) { - pnv_pci_ioda2_table_free_pages(tbl); - pnv_pci_unlink_table_and_group(tbl, &pe->table_group); - iommu_free_table(tbl, "pnv"); - } } static void pnv_ioda_setup_dma(struct pnv_phb *phb) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 6d919eb4251f..203caacf2242 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -333,6 +333,45 @@ static long tce_iommu_build(struct tce_container *container, return ret; } +static long tce_iommu_create_table(struct tce_container *container, + struct iommu_table_group *table_group, + int num, + __u32 page_shift, + __u64 window_size, + __u32 levels, + struct iommu_table **ptbl) +{ + long ret, table_size; + + table_size = table_group->ops->get_table_size(page_shift, window_size, + levels); + if (!table_size) + return -EINVAL; + + ret = try_increment_locked_vm(table_size >> PAGE_SHIFT); + if (ret) + return ret; + + ret = table_group->ops->create_table(table_group, num, + page_shift, window_size, levels, ptbl); + + WARN_ON(!ret && !(*ptbl)->it_ops->free); + WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size)); + + if (ret) + decrement_locked_vm(table_size >> PAGE_SHIFT); + + return ret; +} + +static void tce_iommu_free_table(struct iommu_table *tbl) +{ + unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; + + tbl->it_ops->free(tbl); + decrement_locked_vm(pages); +} + static long tce_iommu_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { @@ -546,15 +585,62 @@ static int tce_iommu_take_ownership(struct tce_container *container, static void tce_iommu_release_ownership_ddw(struct tce_container *container, struct iommu_table_group *table_group) { + long i; + + if (!table_group->ops->unset_window) { + WARN_ON_ONCE(1); + return; + } + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + /* Store table pointer as unset_window resets it */ + struct iommu_table *tbl = table_group->tables[i]; + + if (!tbl) + continue; + + table_group->ops->unset_window(table_group, i); + tce_iommu_clear(container, tbl, + tbl->it_offset, tbl->it_size); + tce_iommu_free_table(tbl); + } + table_group->ops->release_ownership(table_group); } static long tce_iommu_take_ownership_ddw(struct tce_container *container, struct iommu_table_group *table_group) { + long ret; + struct iommu_table *tbl = NULL; + + if (!table_group->ops->create_table || !table_group->ops->set_window || + !table_group->ops->release_ownership) { + WARN_ON_ONCE(1); + return -EFAULT; + } + table_group->ops->take_ownership(table_group); - return 0; + ret = tce_iommu_create_table(container, + table_group, + 0, /* window number */ + IOMMU_PAGE_SHIFT_4K, + table_group->tce32_size, + 1, /* default levels */ + &tbl); + if (!ret) { + ret = table_group->ops->set_window(table_group, 0, tbl); + if (ret) + tce_iommu_free_table(tbl); + else + table_group->tables[0] = tbl; + } + + if (ret) + table_group->ops->release_ownership(table_group); + + return ret; } static int tce_iommu_attach_group(void *iommu_data, -- cgit v1.2.3 From 15b244a88e1b2895605be4300b40b575345bcf50 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:24 +1000 Subject: powerpc/mmu: Add userspace-to-physical addresses translation cache We are adding support for DMA memory pre-registration to be used in conjunction with VFIO. The idea is that the userspace which is going to run a guest may want to pre-register a user space memory region so it all gets pinned once and never goes away. Having this done, a hypervisor will not have to pin/unpin pages on every DMA map/unmap request. This is going to help with multiple pinning of the same memory. Another use of it is in-kernel real mode (mmu off) acceleration of DMA requests where real time translation of guest physical to host physical addresses is non-trivial and may fail as linux ptes may be temporarily invalid. Also, having cached host physical addresses (compared to just pinning at the start and then walking the page table again on every H_PUT_TCE), we can be sure that the addresses which we put into TCE table are the ones we already pinned. This adds a list of memory regions to mm_context_t. Each region consists of a header and a list of physical addresses. This adds API to: 1. register/unregister memory regions; 2. do final cleanup (which puts all pre-registered pages); 3. do userspace to physical address translation; 4. manage usage counters; multiple registration of the same memory is allowed (once per container). This implements 2 counters per registered memory region: - @mapped: incremented on every DMA mapping; decremented on unmapping; initialized to 1 when a region is just registered; once it becomes zero, no more mappings allowe; - @used: incremented on every "register" ioctl; decremented on "unregister"; unregistration is allowed for DMA mapped regions unless it is the very last reference. For the very last reference this checks that the region is still mapped and returns -EBUSY so the userspace gets to know that memory is still pinned and unregistration needs to be retried; @used remains 1. Host physical addresses are stored in vmalloc'ed array. In order to access these in the real mode (mmu off), there is a real_vmalloc_addr() helper. In-kernel acceleration patchset will move it from KVM to MMU code. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu-hash64.h | 3 + arch/powerpc/include/asm/mmu_context.h | 18 ++ arch/powerpc/kernel/setup_64.c | 3 + arch/powerpc/mm/Makefile | 1 + arch/powerpc/mm/mmu_context_hash64.c | 6 + arch/powerpc/mm/mmu_context_iommu.c | 316 +++++++++++++++++++++++++++++++++ 6 files changed, 347 insertions(+) create mode 100644 arch/powerpc/mm/mmu_context_iommu.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 1da6a81ce541..a82f5347540a 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -536,6 +536,9 @@ typedef struct { /* for 4K PTE fragment support */ void *pte_frag; #endif +#ifdef CONFIG_SPAPR_TCE_IOMMU + struct list_head iommu_group_mem_list; +#endif } mm_context_t; diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 73382eba02dc..3e5184210d9b 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -16,6 +16,24 @@ */ extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm); extern void destroy_context(struct mm_struct *mm); +#ifdef CONFIG_SPAPR_TCE_IOMMU +struct mm_iommu_table_group_mem_t; + +extern bool mm_iommu_preregistered(void); +extern long mm_iommu_get(unsigned long ua, unsigned long entries, + struct mm_iommu_table_group_mem_t **pmem); +extern long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem); +extern void mm_iommu_init(mm_context_t *ctx); +extern void mm_iommu_cleanup(mm_context_t *ctx); +extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, + unsigned long size); +extern struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua, + unsigned long entries); +extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, + unsigned long ua, unsigned long *hpa); +extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); +extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem); +#endif extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next); extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 9fe3dcdbfca7..bdcbb716f4d6 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -686,6 +686,9 @@ void __init setup_arch(char **cmdline_p) init_mm.brk = klimit; #ifdef CONFIG_PPC_64K_PAGES init_mm.context.pte_frag = NULL; +#endif +#ifdef CONFIG_SPAPR_TCE_IOMMU + mm_iommu_init(&init_mm.context); #endif irqstack_early_init(); exc_lvl_early_init(); diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 9c8770b5f96f..3eb73a38220d 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -36,3 +36,4 @@ obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o +obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index 178876aef40f..4e4efbc2658e 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c @@ -88,6 +88,9 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) #ifdef CONFIG_PPC_64K_PAGES mm->context.pte_frag = NULL; +#endif +#ifdef CONFIG_SPAPR_TCE_IOMMU + mm_iommu_init(&mm->context); #endif return 0; } @@ -132,6 +135,9 @@ static inline void destroy_pagetable_page(struct mm_struct *mm) void destroy_context(struct mm_struct *mm) { +#ifdef CONFIG_SPAPR_TCE_IOMMU + mm_iommu_cleanup(&mm->context); +#endif #ifdef CONFIG_PPC_ICSWX drop_cop(mm->context.acop, mm); diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c new file mode 100644 index 000000000000..da6a2168ae9e --- /dev/null +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -0,0 +1,316 @@ +/* + * IOMMU helpers in MMU context. + * + * Copyright (C) 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include + +static DEFINE_MUTEX(mem_list_mutex); + +struct mm_iommu_table_group_mem_t { + struct list_head next; + struct rcu_head rcu; + unsigned long used; + atomic64_t mapped; + u64 ua; /* userspace address */ + u64 entries; /* number of entries in hpas[] */ + u64 *hpas; /* vmalloc'ed */ +}; + +static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, + unsigned long npages, bool incr) +{ + long ret = 0, locked, lock_limit; + + if (!npages) + return 0; + + down_write(&mm->mmap_sem); + + if (incr) { + locked = mm->locked_vm + npages; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + if (locked > lock_limit && !capable(CAP_IPC_LOCK)) + ret = -ENOMEM; + else + mm->locked_vm += npages; + } else { + if (WARN_ON_ONCE(npages > mm->locked_vm)) + npages = mm->locked_vm; + mm->locked_vm -= npages; + } + + pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n", + current->pid, + incr ? '+' : '-', + npages << PAGE_SHIFT, + mm->locked_vm << PAGE_SHIFT, + rlimit(RLIMIT_MEMLOCK)); + up_write(&mm->mmap_sem); + + return ret; +} + +bool mm_iommu_preregistered(void) +{ + if (!current || !current->mm) + return false; + + return !list_empty(¤t->mm->context.iommu_group_mem_list); +} +EXPORT_SYMBOL_GPL(mm_iommu_preregistered); + +long mm_iommu_get(unsigned long ua, unsigned long entries, + struct mm_iommu_table_group_mem_t **pmem) +{ + struct mm_iommu_table_group_mem_t *mem; + long i, j, ret = 0, locked_entries = 0; + struct page *page = NULL; + + if (!current || !current->mm) + return -ESRCH; /* process exited */ + + mutex_lock(&mem_list_mutex); + + list_for_each_entry_rcu(mem, ¤t->mm->context.iommu_group_mem_list, + next) { + if ((mem->ua == ua) && (mem->entries == entries)) { + ++mem->used; + *pmem = mem; + goto unlock_exit; + } + + /* Overlap? */ + if ((mem->ua < (ua + (entries << PAGE_SHIFT))) && + (ua < (mem->ua + + (mem->entries << PAGE_SHIFT)))) { + ret = -EINVAL; + goto unlock_exit; + } + + } + + ret = mm_iommu_adjust_locked_vm(current->mm, entries, true); + if (ret) + goto unlock_exit; + + locked_entries = entries; + + mem = kzalloc(sizeof(*mem), GFP_KERNEL); + if (!mem) { + ret = -ENOMEM; + goto unlock_exit; + } + + mem->hpas = vzalloc(entries * sizeof(mem->hpas[0])); + if (!mem->hpas) { + kfree(mem); + ret = -ENOMEM; + goto unlock_exit; + } + + for (i = 0; i < entries; ++i) { + if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT), + 1/* pages */, 1/* iswrite */, &page)) { + for (j = 0; j < i; ++j) + put_page(pfn_to_page( + mem->hpas[j] >> PAGE_SHIFT)); + vfree(mem->hpas); + kfree(mem); + ret = -EFAULT; + goto unlock_exit; + } + + mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; + } + + atomic64_set(&mem->mapped, 1); + mem->used = 1; + mem->ua = ua; + mem->entries = entries; + *pmem = mem; + + list_add_rcu(&mem->next, ¤t->mm->context.iommu_group_mem_list); + +unlock_exit: + if (locked_entries && ret) + mm_iommu_adjust_locked_vm(current->mm, locked_entries, false); + + mutex_unlock(&mem_list_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(mm_iommu_get); + +static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem) +{ + long i; + struct page *page = NULL; + + for (i = 0; i < mem->entries; ++i) { + if (!mem->hpas[i]) + continue; + + page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT); + if (!page) + continue; + + put_page(page); + mem->hpas[i] = 0; + } +} + +static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem) +{ + + mm_iommu_unpin(mem); + vfree(mem->hpas); + kfree(mem); +} + +static void mm_iommu_free(struct rcu_head *head) +{ + struct mm_iommu_table_group_mem_t *mem = container_of(head, + struct mm_iommu_table_group_mem_t, rcu); + + mm_iommu_do_free(mem); +} + +static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem) +{ + list_del_rcu(&mem->next); + mm_iommu_adjust_locked_vm(current->mm, mem->entries, false); + call_rcu(&mem->rcu, mm_iommu_free); +} + +long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem) +{ + long ret = 0; + + if (!current || !current->mm) + return -ESRCH; /* process exited */ + + mutex_lock(&mem_list_mutex); + + if (mem->used == 0) { + ret = -ENOENT; + goto unlock_exit; + } + + --mem->used; + /* There are still users, exit */ + if (mem->used) + goto unlock_exit; + + /* Are there still mappings? */ + if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) { + ++mem->used; + ret = -EBUSY; + goto unlock_exit; + } + + /* @mapped became 0 so now mappings are disabled, release the region */ + mm_iommu_release(mem); + +unlock_exit: + mutex_unlock(&mem_list_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(mm_iommu_put); + +struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, + unsigned long size) +{ + struct mm_iommu_table_group_mem_t *mem, *ret = NULL; + + list_for_each_entry_rcu(mem, + ¤t->mm->context.iommu_group_mem_list, + next) { + if ((mem->ua <= ua) && + (ua + size <= mem->ua + + (mem->entries << PAGE_SHIFT))) { + ret = mem; + break; + } + } + + return ret; +} +EXPORT_SYMBOL_GPL(mm_iommu_lookup); + +struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua, + unsigned long entries) +{ + struct mm_iommu_table_group_mem_t *mem, *ret = NULL; + + list_for_each_entry_rcu(mem, + ¤t->mm->context.iommu_group_mem_list, + next) { + if ((mem->ua == ua) && (mem->entries == entries)) { + ret = mem; + break; + } + } + + return ret; +} +EXPORT_SYMBOL_GPL(mm_iommu_find); + +long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, + unsigned long ua, unsigned long *hpa) +{ + const long entry = (ua - mem->ua) >> PAGE_SHIFT; + u64 *va = &mem->hpas[entry]; + + if (entry >= mem->entries) + return -EFAULT; + + *hpa = *va | (ua & ~PAGE_MASK); + + return 0; +} +EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); + +long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) +{ + if (atomic64_inc_not_zero(&mem->mapped)) + return 0; + + /* Last mm_iommu_put() has been called, no more mappings allowed() */ + return -ENXIO; +} +EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc); + +void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem) +{ + atomic64_add_unless(&mem->mapped, -1, 1); +} +EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec); + +void mm_iommu_init(mm_context_t *ctx) +{ + INIT_LIST_HEAD_RCU(&ctx->iommu_group_mem_list); +} + +void mm_iommu_cleanup(mm_context_t *ctx) +{ + struct mm_iommu_table_group_mem_t *mem, *tmp; + + list_for_each_entry_safe(mem, tmp, &ctx->iommu_group_mem_list, next) { + list_del_rcu(&mem->next); + mm_iommu_do_free(mem); + } +} -- cgit v1.2.3 From 2157e7b82f3b81f57bd80cd67cef09ef26e5f74c Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:25 +1000 Subject: vfio: powerpc/spapr: Register memory and define IOMMU v2 The existing implementation accounts the whole DMA window in the locked_vm counter. This is going to be worse with multiple containers and huge DMA windows. Also, real-time accounting would requite additional tracking of accounted pages due to the page size difference - IOMMU uses 4K pages and system uses 4K or 64K pages. Another issue is that actual pages pinning/unpinning happens on every DMA map/unmap request. This does not affect the performance much now as we spend way too much time now on switching context between guest/userspace/host but this will start to matter when we add in-kernel DMA map/unmap acceleration. This introduces a new IOMMU type for SPAPR - VFIO_SPAPR_TCE_v2_IOMMU. New IOMMU deprecates VFIO_IOMMU_ENABLE/VFIO_IOMMU_DISABLE and introduces 2 new ioctls to register/unregister DMA memory - VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - which receive user space address and size of a memory region which needs to be pinned/unpinned and counted in locked_vm. New IOMMU splits physical pages pinning and TCE table update into 2 different operations. It requires: 1) guest pages to be registered first 2) consequent map/unmap requests to work only with pre-registered memory. For the default single window case this means that the entire guest (instead of 2GB) needs to be pinned before using VFIO. When a huge DMA window is added, no additional pinning will be required, otherwise it would be guest RAM + 2GB. The new memory registration ioctls are not supported by VFIO_SPAPR_TCE_IOMMU. Dynamic DMA window and in-kernel acceleration will require memory to be preregistered in order to work. The accounting is done per the user process. This advertises v2 SPAPR TCE IOMMU and restricts what the userspace can do with v1 or v2 IOMMUs. In order to support memory pre-registration, we need a way to track the use of every registered memory region and only allow unregistration if a region is not in use anymore. So we need a way to tell from what region the just cleared TCE was from. This adds a userspace view of the TCE table into iommu_table struct. It contains userspace address, one per TCE entry. The table is only allocated when the ownership over an IOMMU group is taken which means it is only used from outside of the powernv code (such as VFIO). As v2 IOMMU supports IODA2 and pre-IODA2 IOMMUs (which do not support DDW API), this creates a default DMA window for IODA2 for consistency. Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- Documentation/vfio.txt | 31 ++- arch/powerpc/include/asm/iommu.h | 6 + drivers/vfio/vfio_iommu_spapr_tce.c | 501 ++++++++++++++++++++++++++++++------ include/uapi/linux/vfio.h | 27 ++ 4 files changed, 482 insertions(+), 83 deletions(-) (limited to 'arch/powerpc') diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index 4c746a7e717a..dcc37e109c68 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt @@ -289,10 +289,12 @@ PPC64 sPAPR implementation note This implementation has some specifics: -1) Only one IOMMU group per container is supported as an IOMMU group -represents the minimal entity which isolation can be guaranteed for and -groups are allocated statically, one per a Partitionable Endpoint (PE) +1) On older systems (POWER7 with P5IOC2/IODA1) only one IOMMU group per +container is supported as an IOMMU table is allocated at the boot time, +one table per a IOMMU group which is a Partitionable Endpoint (PE) (PE is often a PCI domain but not always). +Newer systems (POWER8 with IODA2) have improved hardware design which allows +to remove this limitation and have multiple IOMMU groups per a VFIO container. 2) The hardware supports so called DMA windows - the PCI address range within which DMA transfer is allowed, any attempt to access address space @@ -439,6 +441,29 @@ The code flow from the example above should be slightly changed: .... +5) There is v2 of SPAPR TCE IOMMU. It deprecates VFIO_IOMMU_ENABLE/ +VFIO_IOMMU_DISABLE and implements 2 new ioctls: +VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY +(which are unsupported in v1 IOMMU). + +PPC64 paravirtualized guests generate a lot of map/unmap requests, +and the handling of those includes pinning/unpinning pages and updating +mm::locked_vm counter to make sure we do not exceed the rlimit. +The v2 IOMMU splits accounting and pinning into separate operations: + +- VFIO_IOMMU_SPAPR_REGISTER_MEMORY/VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY ioctls +receive a user space address and size of the block to be pinned. +Bisecting is not supported and VFIO_IOMMU_UNREGISTER_MEMORY is expected to +be called with the exact address and size used for registering +the memory block. The userspace is not expected to call these often. +The ranges are stored in a linked list in a VFIO container. + +- VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA ioctls only update the actual +IOMMU table and do not do pinning; instead these check that the userspace +address is from pre-registered range. + +This separation helps in optimizing DMA for guests. + ------------------------------------------------------------------------------- [1] VFIO was originally an acronym for "Virtual Function I/O" in its diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 9d3749287689..f9957eb4c659 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -112,9 +112,15 @@ struct iommu_table { unsigned long *it_map; /* A simple allocation bitmap for now */ unsigned long it_page_shift;/* table iommu page size */ struct list_head it_group_list;/* List of iommu_table_group_link */ + unsigned long *it_userspace; /* userspace view of the table */ struct iommu_table_ops *it_ops; }; +#define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \ + ((tbl)->it_userspace ? \ + &((tbl)->it_userspace[(entry) - (tbl)->it_offset]) : \ + NULL) + /* Pure 2^n version of get_order */ static inline __attribute_const__ int get_iommu_order(unsigned long size, struct iommu_table *tbl) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 203caacf2242..91a32239bd0a 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -19,8 +19,10 @@ #include #include #include +#include #include #include +#include #define DRIVER_VERSION "0.1" #define DRIVER_AUTHOR "aik@ozlabs.ru" @@ -81,6 +83,11 @@ static void decrement_locked_vm(long npages) * into DMA'ble space using the IOMMU */ +struct tce_iommu_group { + struct list_head next; + struct iommu_group *grp; +}; + /* * The container descriptor supports only a single group per container. * Required by the API as the container is not supplied with the IOMMU group @@ -88,11 +95,84 @@ static void decrement_locked_vm(long npages) */ struct tce_container { struct mutex lock; - struct iommu_group *grp; bool enabled; + bool v2; unsigned long locked_pages; + struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; + struct list_head group_list; }; +static long tce_iommu_unregister_pages(struct tce_container *container, + __u64 vaddr, __u64 size) +{ + struct mm_iommu_table_group_mem_t *mem; + + if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK)) + return -EINVAL; + + mem = mm_iommu_find(vaddr, size >> PAGE_SHIFT); + if (!mem) + return -ENOENT; + + return mm_iommu_put(mem); +} + +static long tce_iommu_register_pages(struct tce_container *container, + __u64 vaddr, __u64 size) +{ + long ret = 0; + struct mm_iommu_table_group_mem_t *mem = NULL; + unsigned long entries = size >> PAGE_SHIFT; + + if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) || + ((vaddr + size) < vaddr)) + return -EINVAL; + + ret = mm_iommu_get(vaddr, entries, &mem); + if (ret) + return ret; + + container->enabled = true; + + return 0; +} + +static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl) +{ + unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) * + tbl->it_size, PAGE_SIZE); + unsigned long *uas; + long ret; + + BUG_ON(tbl->it_userspace); + + ret = try_increment_locked_vm(cb >> PAGE_SHIFT); + if (ret) + return ret; + + uas = vzalloc(cb); + if (!uas) { + decrement_locked_vm(cb >> PAGE_SHIFT); + return -ENOMEM; + } + tbl->it_userspace = uas; + + return 0; +} + +static void tce_iommu_userspace_view_free(struct iommu_table *tbl) +{ + unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) * + tbl->it_size, PAGE_SIZE); + + if (!tbl->it_userspace) + return; + + vfree(tbl->it_userspace); + tbl->it_userspace = NULL; + decrement_locked_vm(cb >> PAGE_SHIFT); +} + static bool tce_page_is_contained(struct page *page, unsigned page_shift) { /* @@ -103,18 +183,18 @@ static bool tce_page_is_contained(struct page *page, unsigned page_shift) return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift; } +static inline bool tce_groups_attached(struct tce_container *container) +{ + return !list_empty(&container->group_list); +} + static long tce_iommu_find_table(struct tce_container *container, phys_addr_t ioba, struct iommu_table **ptbl) { long i; - struct iommu_table_group *table_group; - - table_group = iommu_group_get_iommudata(container->grp); - if (!table_group) - return -1; for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { - struct iommu_table *tbl = table_group->tables[i]; + struct iommu_table *tbl = container->tables[i]; if (tbl) { unsigned long entry = ioba >> tbl->it_page_shift; @@ -136,9 +216,7 @@ static int tce_iommu_enable(struct tce_container *container) int ret = 0; unsigned long locked; struct iommu_table_group *table_group; - - if (!container->grp) - return -ENXIO; + struct tce_iommu_group *tcegrp; if (!current->mm) return -ESRCH; /* process exited */ @@ -175,7 +253,12 @@ static int tce_iommu_enable(struct tce_container *container) * as there is no way to know how much we should increment * the locked_vm counter. */ - table_group = iommu_group_get_iommudata(container->grp); + if (!tce_groups_attached(container)) + return -ENODEV; + + tcegrp = list_first_entry(&container->group_list, + struct tce_iommu_group, next); + table_group = iommu_group_get_iommudata(tcegrp->grp); if (!table_group) return -ENODEV; @@ -211,7 +294,7 @@ static void *tce_iommu_open(unsigned long arg) { struct tce_container *container; - if (arg != VFIO_SPAPR_TCE_IOMMU) { + if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) { pr_err("tce_vfio: Wrong IOMMU type\n"); return ERR_PTR(-EINVAL); } @@ -221,18 +304,45 @@ static void *tce_iommu_open(unsigned long arg) return ERR_PTR(-ENOMEM); mutex_init(&container->lock); + INIT_LIST_HEAD_RCU(&container->group_list); + + container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU; return container; } +static int tce_iommu_clear(struct tce_container *container, + struct iommu_table *tbl, + unsigned long entry, unsigned long pages); +static void tce_iommu_free_table(struct iommu_table *tbl); + static void tce_iommu_release(void *iommu_data) { struct tce_container *container = iommu_data; + struct iommu_table_group *table_group; + struct tce_iommu_group *tcegrp; + long i; - WARN_ON(container->grp); + while (tce_groups_attached(container)) { + tcegrp = list_first_entry(&container->group_list, + struct tce_iommu_group, next); + table_group = iommu_group_get_iommudata(tcegrp->grp); + tce_iommu_detach_group(iommu_data, tcegrp->grp); + } - if (container->grp) - tce_iommu_detach_group(iommu_data, container->grp); + /* + * If VFIO created a table, it was not disposed + * by tce_iommu_detach_group() so do it now. + */ + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbl = container->tables[i]; + + if (!tbl) + continue; + + tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); + tce_iommu_free_table(tbl); + } tce_iommu_disable(container); mutex_destroy(&container->lock); @@ -249,6 +359,47 @@ static void tce_iommu_unuse_page(struct tce_container *container, put_page(page); } +static int tce_iommu_prereg_ua_to_hpa(unsigned long tce, unsigned long size, + unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem) +{ + long ret = 0; + struct mm_iommu_table_group_mem_t *mem; + + mem = mm_iommu_lookup(tce, size); + if (!mem) + return -EINVAL; + + ret = mm_iommu_ua_to_hpa(mem, tce, phpa); + if (ret) + return -EINVAL; + + *pmem = mem; + + return 0; +} + +static void tce_iommu_unuse_page_v2(struct iommu_table *tbl, + unsigned long entry) +{ + struct mm_iommu_table_group_mem_t *mem = NULL; + int ret; + unsigned long hpa = 0; + unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + + if (!pua || !current || !current->mm) + return; + + ret = tce_iommu_prereg_ua_to_hpa(*pua, IOMMU_PAGE_SIZE(tbl), + &hpa, &mem); + if (ret) + pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n", + __func__, *pua, entry, ret); + if (mem) + mm_iommu_mapped_dec(mem); + + *pua = 0; +} + static int tce_iommu_clear(struct tce_container *container, struct iommu_table *tbl, unsigned long entry, unsigned long pages) @@ -267,6 +418,11 @@ static int tce_iommu_clear(struct tce_container *container, if (direction == DMA_NONE) continue; + if (container->v2) { + tce_iommu_unuse_page_v2(tbl, entry); + continue; + } + tce_iommu_unuse_page(container, oldhpa); } @@ -333,6 +489,64 @@ static long tce_iommu_build(struct tce_container *container, return ret; } +static long tce_iommu_build_v2(struct tce_container *container, + struct iommu_table *tbl, + unsigned long entry, unsigned long tce, unsigned long pages, + enum dma_data_direction direction) +{ + long i, ret = 0; + struct page *page; + unsigned long hpa; + enum dma_data_direction dirtmp; + + for (i = 0; i < pages; ++i) { + struct mm_iommu_table_group_mem_t *mem = NULL; + unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, + entry + i); + + ret = tce_iommu_prereg_ua_to_hpa(tce, IOMMU_PAGE_SIZE(tbl), + &hpa, &mem); + if (ret) + break; + + page = pfn_to_page(hpa >> PAGE_SHIFT); + if (!tce_page_is_contained(page, tbl->it_page_shift)) { + ret = -EPERM; + break; + } + + /* Preserve offset within IOMMU page */ + hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; + dirtmp = direction; + + /* The registered region is being unregistered */ + if (mm_iommu_mapped_inc(mem)) + break; + + ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp); + if (ret) { + /* dirtmp cannot be DMA_NONE here */ + tce_iommu_unuse_page_v2(tbl, entry + i); + pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n", + __func__, entry << tbl->it_page_shift, + tce, ret); + break; + } + + if (dirtmp != DMA_NONE) + tce_iommu_unuse_page_v2(tbl, entry + i); + + *pua = tce; + + tce += IOMMU_PAGE_SIZE(tbl); + } + + if (ret) + tce_iommu_clear(container, tbl, entry, i); + + return ret; +} + static long tce_iommu_create_table(struct tce_container *container, struct iommu_table_group *table_group, int num, @@ -358,6 +572,12 @@ static long tce_iommu_create_table(struct tce_container *container, WARN_ON(!ret && !(*ptbl)->it_ops->free); WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size)); + if (!ret && container->v2) { + ret = tce_iommu_userspace_view_alloc(*ptbl); + if (ret) + (*ptbl)->it_ops->free(*ptbl); + } + if (ret) decrement_locked_vm(table_size >> PAGE_SHIFT); @@ -368,6 +588,7 @@ static void tce_iommu_free_table(struct iommu_table *tbl) { unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; + tce_iommu_userspace_view_free(tbl); tbl->it_ops->free(tbl); decrement_locked_vm(pages); } @@ -383,6 +604,7 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_CHECK_EXTENSION: switch (arg) { case VFIO_SPAPR_TCE_IOMMU: + case VFIO_SPAPR_TCE_v2_IOMMU: ret = 1; break; default: @@ -394,12 +616,15 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { struct vfio_iommu_spapr_tce_info info; + struct tce_iommu_group *tcegrp; struct iommu_table_group *table_group; - if (WARN_ON(!container->grp)) + if (!tce_groups_attached(container)) return -ENXIO; - table_group = iommu_group_get_iommudata(container->grp); + tcegrp = list_first_entry(&container->group_list, + struct tce_iommu_group, next); + table_group = iommu_group_get_iommudata(tcegrp->grp); if (!table_group) return -ENXIO; @@ -468,11 +693,18 @@ static long tce_iommu_ioctl(void *iommu_data, if (ret) return ret; - ret = tce_iommu_build(container, tbl, - param.iova >> tbl->it_page_shift, - param.vaddr, - param.size >> tbl->it_page_shift, - direction); + if (container->v2) + ret = tce_iommu_build_v2(container, tbl, + param.iova >> tbl->it_page_shift, + param.vaddr, + param.size >> tbl->it_page_shift, + direction); + else + ret = tce_iommu_build(container, tbl, + param.iova >> tbl->it_page_shift, + param.vaddr, + param.size >> tbl->it_page_shift, + direction); iommu_flush_tce(tbl); @@ -518,7 +750,62 @@ static long tce_iommu_ioctl(void *iommu_data, return ret; } + case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: { + struct vfio_iommu_spapr_register_memory param; + + if (!container->v2) + break; + + minsz = offsetofend(struct vfio_iommu_spapr_register_memory, + size); + + if (copy_from_user(¶m, (void __user *)arg, minsz)) + return -EFAULT; + + if (param.argsz < minsz) + return -EINVAL; + + /* No flag is supported now */ + if (param.flags) + return -EINVAL; + + mutex_lock(&container->lock); + ret = tce_iommu_register_pages(container, param.vaddr, + param.size); + mutex_unlock(&container->lock); + + return ret; + } + case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: { + struct vfio_iommu_spapr_register_memory param; + + if (!container->v2) + break; + + minsz = offsetofend(struct vfio_iommu_spapr_register_memory, + size); + + if (copy_from_user(¶m, (void __user *)arg, minsz)) + return -EFAULT; + + if (param.argsz < minsz) + return -EINVAL; + + /* No flag is supported now */ + if (param.flags) + return -EINVAL; + + mutex_lock(&container->lock); + ret = tce_iommu_unregister_pages(container, param.vaddr, + param.size); + mutex_unlock(&container->lock); + + return ret; + } case VFIO_IOMMU_ENABLE: + if (container->v2) + break; + mutex_lock(&container->lock); ret = tce_iommu_enable(container); mutex_unlock(&container->lock); @@ -526,16 +813,27 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_IOMMU_DISABLE: + if (container->v2) + break; + mutex_lock(&container->lock); tce_iommu_disable(container); mutex_unlock(&container->lock); return 0; - case VFIO_EEH_PE_OP: - if (!container->grp) - return -ENODEV; - return vfio_spapr_iommu_eeh_ioctl(container->grp, - cmd, arg); + case VFIO_EEH_PE_OP: { + struct tce_iommu_group *tcegrp; + + ret = 0; + list_for_each_entry(tcegrp, &container->group_list, next) { + ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp, + cmd, arg); + if (ret) + return ret; + } + return ret; + } + } return -ENOTTY; @@ -547,14 +845,17 @@ static void tce_iommu_release_ownership(struct tce_container *container, int i; for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { - struct iommu_table *tbl = table_group->tables[i]; + struct iommu_table *tbl = container->tables[i]; if (!tbl) continue; tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); + tce_iommu_userspace_view_free(tbl); if (tbl->it_map) iommu_release_ownership(tbl); + + container->tables[i] = NULL; } } @@ -569,7 +870,10 @@ static int tce_iommu_take_ownership(struct tce_container *container, if (!tbl || !tbl->it_map) continue; - rc = iommu_take_ownership(tbl); + rc = tce_iommu_userspace_view_alloc(tbl); + if (!rc) + rc = iommu_take_ownership(tbl); + if (rc) { for (j = 0; j < i; ++j) iommu_release_ownership( @@ -579,6 +883,9 @@ static int tce_iommu_take_ownership(struct tce_container *container, } } + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) + container->tables[i] = table_group->tables[i]; + return 0; } @@ -592,18 +899,8 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container, return; } - for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { - /* Store table pointer as unset_window resets it */ - struct iommu_table *tbl = table_group->tables[i]; - - if (!tbl) - continue; - + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) table_group->ops->unset_window(table_group, i); - tce_iommu_clear(container, tbl, - tbl->it_offset, tbl->it_size); - tce_iommu_free_table(tbl); - } table_group->ops->release_ownership(table_group); } @@ -611,7 +908,7 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container, static long tce_iommu_take_ownership_ddw(struct tce_container *container, struct iommu_table_group *table_group) { - long ret; + long i, ret = 0; struct iommu_table *tbl = NULL; if (!table_group->ops->create_table || !table_group->ops->set_window || @@ -622,23 +919,45 @@ static long tce_iommu_take_ownership_ddw(struct tce_container *container, table_group->ops->take_ownership(table_group); - ret = tce_iommu_create_table(container, - table_group, - 0, /* window number */ - IOMMU_PAGE_SHIFT_4K, - table_group->tce32_size, - 1, /* default levels */ - &tbl); - if (!ret) { - ret = table_group->ops->set_window(table_group, 0, tbl); + /* + * If it the first group attached, check if there is + * a default DMA window and create one if none as + * the userspace expects it to exist. + */ + if (!tce_groups_attached(container) && !container->tables[0]) { + ret = tce_iommu_create_table(container, + table_group, + 0, /* window number */ + IOMMU_PAGE_SHIFT_4K, + table_group->tce32_size, + 1, /* default levels */ + &tbl); if (ret) - tce_iommu_free_table(tbl); + goto release_exit; else - table_group->tables[0] = tbl; + container->tables[0] = tbl; } - if (ret) - table_group->ops->release_ownership(table_group); + /* Set all windows to the new group */ + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + tbl = container->tables[i]; + + if (!tbl) + continue; + + /* Set the default window to a new group */ + ret = table_group->ops->set_window(table_group, i, tbl); + if (ret) + goto release_exit; + } + + return 0; + +release_exit: + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) + table_group->ops->unset_window(table_group, i); + + table_group->ops->release_ownership(table_group); return ret; } @@ -649,29 +968,44 @@ static int tce_iommu_attach_group(void *iommu_data, int ret; struct tce_container *container = iommu_data; struct iommu_table_group *table_group; + struct tce_iommu_group *tcegrp = NULL; mutex_lock(&container->lock); /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", iommu_group_id(iommu_group), iommu_group); */ - if (container->grp) { - pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n", - iommu_group_id(container->grp), - iommu_group_id(iommu_group)); + table_group = iommu_group_get_iommudata(iommu_group); + + if (tce_groups_attached(container) && (!table_group->ops || + !table_group->ops->take_ownership || + !table_group->ops->release_ownership)) { ret = -EBUSY; goto unlock_exit; } - if (container->enabled) { - pr_err("tce_vfio: attaching group #%u to enabled container\n", - iommu_group_id(iommu_group)); - ret = -EBUSY; - goto unlock_exit; + /* Check if new group has the same iommu_ops (i.e. compatible) */ + list_for_each_entry(tcegrp, &container->group_list, next) { + struct iommu_table_group *table_group_tmp; + + if (tcegrp->grp == iommu_group) { + pr_warn("tce_vfio: Group %d is already attached\n", + iommu_group_id(iommu_group)); + ret = -EBUSY; + goto unlock_exit; + } + table_group_tmp = iommu_group_get_iommudata(tcegrp->grp); + if (table_group_tmp->ops != table_group->ops) { + pr_warn("tce_vfio: Group %d is incompatible with group %d\n", + iommu_group_id(iommu_group), + iommu_group_id(tcegrp->grp)); + ret = -EPERM; + goto unlock_exit; + } } - table_group = iommu_group_get_iommudata(iommu_group); - if (!table_group) { - ret = -ENXIO; + tcegrp = kzalloc(sizeof(*tcegrp), GFP_KERNEL); + if (!tcegrp) { + ret = -ENOMEM; goto unlock_exit; } @@ -681,10 +1015,15 @@ static int tce_iommu_attach_group(void *iommu_data, else ret = tce_iommu_take_ownership_ddw(container, table_group); - if (!ret) - container->grp = iommu_group; + if (!ret) { + tcegrp->grp = iommu_group; + list_add(&tcegrp->next, &container->group_list); + } unlock_exit: + if (ret && tcegrp) + kfree(tcegrp); + mutex_unlock(&container->lock); return ret; @@ -695,24 +1034,26 @@ static void tce_iommu_detach_group(void *iommu_data, { struct tce_container *container = iommu_data; struct iommu_table_group *table_group; + bool found = false; + struct tce_iommu_group *tcegrp; mutex_lock(&container->lock); - if (iommu_group != container->grp) { - pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n", - iommu_group_id(iommu_group), - iommu_group_id(container->grp)); - goto unlock_exit; + + list_for_each_entry(tcegrp, &container->group_list, next) { + if (tcegrp->grp == iommu_group) { + found = true; + break; + } } - if (container->enabled) { - pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n", - iommu_group_id(container->grp)); - tce_iommu_disable(container); + if (!found) { + pr_warn("tce_vfio: detaching unattached group #%u\n", + iommu_group_id(iommu_group)); + goto unlock_exit; } - /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n", - iommu_group_id(iommu_group), iommu_group); */ - container->grp = NULL; + list_del(&tcegrp->next); + kfree(tcegrp); table_group = iommu_group_get_iommudata(iommu_group); BUG_ON(!table_group); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index e4fa1995f613..fa84391a0d00 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -36,6 +36,8 @@ /* Two-stage IOMMU */ #define VFIO_TYPE1_NESTING_IOMMU 6 /* Implies v2 */ +#define VFIO_SPAPR_TCE_v2_IOMMU 7 + /* * The IOCTL interface is designed for extensibility by embedding the * structure length (argsz) and flags into structures passed between @@ -507,6 +509,31 @@ struct vfio_eeh_pe_op { #define VFIO_EEH_PE_OP _IO(VFIO_TYPE, VFIO_BASE + 21) +/** + * VFIO_IOMMU_SPAPR_REGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 17, struct vfio_iommu_spapr_register_memory) + * + * Registers user space memory where DMA is allowed. It pins + * user pages and does the locked memory accounting so + * subsequent VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA calls + * get faster. + */ +struct vfio_iommu_spapr_register_memory { + __u32 argsz; + __u32 flags; + __u64 vaddr; /* Process virtual address */ + __u64 size; /* Size of mapping (bytes) */ +}; +#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17) + +/** + * VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 18, struct vfio_iommu_spapr_register_memory) + * + * Unregisters user space memory registered with + * VFIO_IOMMU_SPAPR_REGISTER_MEMORY. + * Uses vfio_iommu_spapr_register_memory for parameters. + */ +#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 18) + /* ***************************************************************** */ #endif /* _UAPIVFIO_H */ -- cgit v1.2.3 From e633bc86a922468a82300eef5b9802e17be5e23d Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:26 +1000 Subject: vfio: powerpc/spapr: Support Dynamic DMA windows This adds create/remove window ioctls to create and remove DMA windows. sPAPR defines a Dynamic DMA windows capability which allows para-virtualized guests to create additional DMA windows on a PCI bus. The existing linux kernels use this new window to map the entire guest memory and switch to the direct DMA operations saving time on map/unmap requests which would normally happen in a big amounts. This adds 2 ioctl handlers - VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE - to create and remove windows. Up to 2 windows are supported now by the hardware and by this driver. This changes VFIO_IOMMU_SPAPR_TCE_GET_INFO handler to return additional information such as a number of supported windows and maximum number levels of TCE tables. DDW is added as a capability, not as a SPAPR TCE IOMMU v2 unique feature as we still want to support v2 on platforms which cannot do DDW for the sake of TCE acceleration in KVM (coming soon). Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- Documentation/vfio.txt | 19 ++++ arch/powerpc/include/asm/iommu.h | 2 +- drivers/vfio/vfio_iommu_spapr_tce.c | 196 +++++++++++++++++++++++++++++++++++- include/uapi/linux/vfio.h | 61 ++++++++++- 4 files changed, 273 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index dcc37e109c68..1dd3fddfd3a1 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt @@ -464,6 +464,25 @@ address is from pre-registered range. This separation helps in optimizing DMA for guests. +6) sPAPR specification allows guests to have an additional DMA window(s) on +a PCI bus with a variable page size. Two ioctls have been added to support +this: VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE. +The platform has to support the functionality or error will be returned to +the userspace. The existing hardware supports up to 2 DMA windows, one is +2GB long, uses 4K pages and called "default 32bit window"; the other can +be as big as entire RAM, use different page size, it is optional - guests +create those in run-time if the guest driver supports 64bit DMA. + +VFIO_IOMMU_SPAPR_TCE_CREATE receives a page shift, a DMA window size and +a number of TCE table levels (if a TCE table is going to be big enough and +the kernel may not be able to allocate enough of physically contiguous memory). +It creates a new window in the available slot and returns the bus address where +the new window starts. Due to hardware limitation, the user space cannot choose +the location of DMA windows. + +VFIO_IOMMU_SPAPR_TCE_REMOVE receives the bus start address of the window +and removes it. + ------------------------------------------------------------------------------- [1] VFIO was originally an acronym for "Virtual Function I/O" in its diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index f9957eb4c659..ca18cff90900 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -149,7 +149,7 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); */ extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, int nid); -#define IOMMU_TABLE_GROUP_MAX_TABLES 1 +#define IOMMU_TABLE_GROUP_MAX_TABLES 2 struct iommu_table_group; diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 91a32239bd0a..0582b72ef377 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -211,6 +211,18 @@ static long tce_iommu_find_table(struct tce_container *container, return -1; } +static int tce_iommu_find_free_table(struct tce_container *container) +{ + int i; + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + if (!container->tables[i]) + return i; + } + + return -ENOSPC; +} + static int tce_iommu_enable(struct tce_container *container) { int ret = 0; @@ -593,11 +605,115 @@ static void tce_iommu_free_table(struct iommu_table *tbl) decrement_locked_vm(pages); } +static long tce_iommu_create_window(struct tce_container *container, + __u32 page_shift, __u64 window_size, __u32 levels, + __u64 *start_addr) +{ + struct tce_iommu_group *tcegrp; + struct iommu_table_group *table_group; + struct iommu_table *tbl = NULL; + long ret, num; + + num = tce_iommu_find_free_table(container); + if (num < 0) + return num; + + /* Get the first group for ops::create_table */ + tcegrp = list_first_entry(&container->group_list, + struct tce_iommu_group, next); + table_group = iommu_group_get_iommudata(tcegrp->grp); + if (!table_group) + return -EFAULT; + + if (!(table_group->pgsizes & (1ULL << page_shift))) + return -EINVAL; + + if (!table_group->ops->set_window || !table_group->ops->unset_window || + !table_group->ops->get_table_size || + !table_group->ops->create_table) + return -EPERM; + + /* Create TCE table */ + ret = tce_iommu_create_table(container, table_group, num, + page_shift, window_size, levels, &tbl); + if (ret) + return ret; + + BUG_ON(!tbl->it_ops->free); + + /* + * Program the table to every group. + * Groups have been tested for compatibility at the attach time. + */ + list_for_each_entry(tcegrp, &container->group_list, next) { + table_group = iommu_group_get_iommudata(tcegrp->grp); + + ret = table_group->ops->set_window(table_group, num, tbl); + if (ret) + goto unset_exit; + } + + container->tables[num] = tbl; + + /* Return start address assigned by platform in create_table() */ + *start_addr = tbl->it_offset << tbl->it_page_shift; + + return 0; + +unset_exit: + list_for_each_entry(tcegrp, &container->group_list, next) { + table_group = iommu_group_get_iommudata(tcegrp->grp); + table_group->ops->unset_window(table_group, num); + } + tce_iommu_free_table(tbl); + + return ret; +} + +static long tce_iommu_remove_window(struct tce_container *container, + __u64 start_addr) +{ + struct iommu_table_group *table_group = NULL; + struct iommu_table *tbl; + struct tce_iommu_group *tcegrp; + int num; + + num = tce_iommu_find_table(container, start_addr, &tbl); + if (num < 0) + return -EINVAL; + + BUG_ON(!tbl->it_size); + + /* Detach groups from IOMMUs */ + list_for_each_entry(tcegrp, &container->group_list, next) { + table_group = iommu_group_get_iommudata(tcegrp->grp); + + /* + * SPAPR TCE IOMMU exposes the default DMA window to + * the guest via dma32_window_start/size of + * VFIO_IOMMU_SPAPR_TCE_GET_INFO. Some platforms allow + * the userspace to remove this window, some do not so + * here we check for the platform capability. + */ + if (!table_group->ops || !table_group->ops->unset_window) + return -EPERM; + + table_group->ops->unset_window(table_group, num); + } + + /* Free table */ + tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); + tce_iommu_free_table(tbl); + container->tables[num] = NULL; + + return 0; +} + static long tce_iommu_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { struct tce_container *container = iommu_data; - unsigned long minsz; + unsigned long minsz, ddwsz; long ret; switch (cmd) { @@ -641,6 +757,21 @@ static long tce_iommu_ioctl(void *iommu_data, info.dma32_window_start = table_group->tce32_start; info.dma32_window_size = table_group->tce32_size; info.flags = 0; + memset(&info.ddw, 0, sizeof(info.ddw)); + + if (table_group->max_dynamic_windows_supported && + container->v2) { + info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW; + info.ddw.pgsizes = table_group->pgsizes; + info.ddw.max_dynamic_windows_supported = + table_group->max_dynamic_windows_supported; + info.ddw.levels = table_group->max_levels; + } + + ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw); + + if (info.argsz >= ddwsz) + minsz = ddwsz; if (copy_to_user((void __user *)arg, &info, minsz)) return -EFAULT; @@ -834,6 +965,69 @@ static long tce_iommu_ioctl(void *iommu_data, return ret; } + case VFIO_IOMMU_SPAPR_TCE_CREATE: { + struct vfio_iommu_spapr_tce_create create; + + if (!container->v2) + break; + + if (!tce_groups_attached(container)) + return -ENXIO; + + minsz = offsetofend(struct vfio_iommu_spapr_tce_create, + start_addr); + + if (copy_from_user(&create, (void __user *)arg, minsz)) + return -EFAULT; + + if (create.argsz < minsz) + return -EINVAL; + + if (create.flags) + return -EINVAL; + + mutex_lock(&container->lock); + + ret = tce_iommu_create_window(container, create.page_shift, + create.window_size, create.levels, + &create.start_addr); + + mutex_unlock(&container->lock); + + if (!ret && copy_to_user((void __user *)arg, &create, minsz)) + ret = -EFAULT; + + return ret; + } + case VFIO_IOMMU_SPAPR_TCE_REMOVE: { + struct vfio_iommu_spapr_tce_remove remove; + + if (!container->v2) + break; + + if (!tce_groups_attached(container)) + return -ENXIO; + + minsz = offsetofend(struct vfio_iommu_spapr_tce_remove, + start_addr); + + if (copy_from_user(&remove, (void __user *)arg, minsz)) + return -EFAULT; + + if (remove.argsz < minsz) + return -EINVAL; + + if (remove.flags) + return -EINVAL; + + mutex_lock(&container->lock); + + ret = tce_iommu_remove_window(container, remove.start_addr); + + mutex_unlock(&container->lock); + + return ret; + } } return -ENOTTY; diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index fa84391a0d00..9fd7b5d8df2f 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -444,6 +444,23 @@ struct vfio_iommu_type1_dma_unmap { /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ +/* + * The SPAPR TCE DDW info struct provides the information about + * the details of Dynamic DMA window capability. + * + * @pgsizes contains a page size bitmask, 4K/64K/16M are supported. + * @max_dynamic_windows_supported tells the maximum number of windows + * which the platform can create. + * @levels tells the maximum number of levels in multi-level IOMMU tables; + * this allows splitting a table into smaller chunks which reduces + * the amount of physically contiguous memory required for the table. + */ +struct vfio_iommu_spapr_tce_ddw_info { + __u64 pgsizes; /* Bitmap of supported page sizes */ + __u32 max_dynamic_windows_supported; + __u32 levels; +}; + /* * The SPAPR TCE info struct provides the information about the PCI bus * address ranges available for DMA, these values are programmed into @@ -454,14 +471,17 @@ struct vfio_iommu_type1_dma_unmap { * addresses too so the window works as a filter rather than an offset * for IOVA addresses. * - * A flag will need to be added if other page sizes are supported, - * so as defined here, it is always 4k. + * Flags supported: + * - VFIO_IOMMU_SPAPR_INFO_DDW: informs the userspace that dynamic DMA windows + * (DDW) support is present. @ddw is only supported when DDW is present. */ struct vfio_iommu_spapr_tce_info { __u32 argsz; - __u32 flags; /* reserved for future use */ + __u32 flags; +#define VFIO_IOMMU_SPAPR_INFO_DDW (1 << 0) /* DDW supported */ __u32 dma32_window_start; /* 32 bit window start (bytes) */ __u32 dma32_window_size; /* 32 bit window size (bytes) */ + struct vfio_iommu_spapr_tce_ddw_info ddw; }; #define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) @@ -534,6 +554,41 @@ struct vfio_iommu_spapr_register_memory { */ #define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 18) +/** + * VFIO_IOMMU_SPAPR_TCE_CREATE - _IOWR(VFIO_TYPE, VFIO_BASE + 19, struct vfio_iommu_spapr_tce_create) + * + * Creates an additional TCE table and programs it (sets a new DMA window) + * to every IOMMU group in the container. It receives page shift, window + * size and number of levels in the TCE table being created. + * + * It allocates and returns an offset on a PCI bus of the new DMA window. + */ +struct vfio_iommu_spapr_tce_create { + __u32 argsz; + __u32 flags; + /* in */ + __u32 page_shift; + __u64 window_size; + __u32 levels; + /* out */ + __u64 start_addr; +}; +#define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19) + +/** + * VFIO_IOMMU_SPAPR_TCE_REMOVE - _IOW(VFIO_TYPE, VFIO_BASE + 20, struct vfio_iommu_spapr_tce_remove) + * + * Unprograms a TCE table from all groups in the container and destroys it. + * It receives a PCI bus offset as a window id. + */ +struct vfio_iommu_spapr_tce_remove { + __u32 argsz; + __u32 flags; + /* in */ + __u64 start_addr; +}; +#define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20) + /* ***************************************************************** */ #endif /* _UAPIVFIO_H */ -- cgit v1.2.3 From b91c1e3e7a6f22a6b898e345b745b6a43273c973 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 May 2015 08:53:25 +1000 Subject: powerpc: Fix duplicate const clang warning in user access code We see a large number of duplicate const errors in the user access code when building with llvm/clang: include/linux/pagemap.h:576:8: warning: duplicate 'const' declaration specifier [-Wduplicate-decl-specifier] ret = __get_user(c, uaddr); The problem is we are doing const __typeof__(*(ptr)), which will hit the warning if ptr is marked const. Removing const does not seem to have any effect on GCC code generation. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/uaccess.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index a0c071d24e0e..2a8ebae0936b 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -265,7 +265,7 @@ do { \ ({ \ long __gu_err; \ unsigned long __gu_val; \ - const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ @@ -279,7 +279,7 @@ do { \ ({ \ long __gu_err; \ long long __gu_val; \ - const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ @@ -293,7 +293,7 @@ do { \ ({ \ long __gu_err = -EFAULT; \ unsigned long __gu_val = 0; \ - const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ might_fault(); \ if (access_ok(VERIFY_READ, __gu_addr, (size))) \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ @@ -305,7 +305,7 @@ do { \ ({ \ long __gu_err; \ unsigned long __gu_val; \ - const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ -- cgit v1.2.3 From 1fb3f5a7ca599f322e6bf21272ad215301159aa0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 May 2015 08:53:26 +1000 Subject: powerpc: Only use -mabi=altivec if toolchain supports it The -mabi=altivec option is not recognised on LLVM, so use call cc-option to check for support. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/lib/Makefile | 2 +- lib/raid6/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 7902802a19a5..a47e14277fd8 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -33,6 +33,6 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o obj-$(CONFIG_ALTIVEC) += xor_vmx.o -CFLAGS_xor_vmx.o += -maltivec -mabi=altivec +CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec) obj-$(CONFIG_PPC64) += $(obj64-y) diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index c7dab0645554..3b10a48fa040 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -15,7 +15,7 @@ quiet_cmd_unroll = UNROLL $@ < $< > $@ || ( rm -f $@ && exit 1 ) ifeq ($(CONFIG_ALTIVEC),y) -altivec_flags := -maltivec -mabi=altivec +altivec_flags := -maltivec $(call cc-option,-mabi=altivec) endif # The GCC option -ffreestanding is required in order to compile code containing -- cgit v1.2.3 From a50a862e74a9d7b447df30c33aa3d534ce3a977a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 May 2015 08:53:27 +1000 Subject: powerpc: Only use -mtraceback=no, -mno-string and -msoft-float if toolchain supports it These options are not recognised on LLVM, so use call cc-option to check for support. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index edf39cebfed8..799e67d54c4d 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -113,7 +113,7 @@ else endif endif -CFLAGS-$(CONFIG_PPC64) := -mtraceback=no +CFLAGS-$(CONFIG_PPC64) := $(call cc-option,-mtraceback=no) ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,-mcall-aixdesc) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2) @@ -160,7 +160,8 @@ asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1) KBUILD_CPPFLAGS += -Iarch/$(ARCH) $(asinstr) KBUILD_AFLAGS += -Iarch/$(ARCH) $(AFLAGS-y) -KBUILD_CFLAGS += -msoft-float -pipe -Iarch/$(ARCH) $(CFLAGS-y) +KBUILD_CFLAGS += $(call cc-option,-msoft-float) +KBUILD_CFLAGS += -pipe -Iarch/$(ARCH) $(CFLAGS-y) CPP = $(CC) -E $(KBUILD_CFLAGS) CHECKFLAGS += -m$(CONFIG_WORD_SIZE) -D__powerpc__ -D__powerpc$(CONFIG_WORD_SIZE)__ @@ -192,7 +193,7 @@ KBUILD_CFLAGS += $(call cc-option,-fno-dwarf2-cfi-asm) # Never use string load/store instructions as they are # often slow when they are implemented at all -KBUILD_CFLAGS += -mno-string +KBUILD_CFLAGS += $(call cc-option,-mno-string) ifeq ($(CONFIG_6xx),y) KBUILD_CFLAGS += -mcpu=powerpc -- cgit v1.2.3 From 92d6cf2dab83cfb00b3d80fd38a38a77c6873277 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 May 2015 08:53:28 +1000 Subject: powerpc: Don't use -mno-strict-align on clang We added -mno-strict-align in commit f036b3681962 (powerpc: Work around little endian gcc bug) to fix gcc bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57134 Clang doesn't understand it. We need to use a conditional because we can't use the simpler call cc-option here. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 799e67d54c4d..1eec1a0462b6 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -66,7 +66,10 @@ endif UTS_MACHINE := $(OLDARCH) ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) -override CC += -mlittle-endian -mno-strict-align +override CC += -mlittle-endian +ifneq ($(COMPILER),clang) +override CC += -mno-strict-align +endif override AS += -mlittle-endian override LD += -EL override CROSS32CC += -mlittle-endian -- cgit v1.2.3 From 238abecde8ad43f914e095fcf23e0bd35dc7a7f2 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 May 2015 08:53:29 +1000 Subject: powerpc: Don't use gcc specific options on clang We have code to choose between several options, eg. -mabi=elfv2 vs -mcall-aixdesc, and -mcmodel=medium vs -mminimal-toc. But these are all GCC specific, so use cc-option on all of them. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 1eec1a0462b6..05f464eb6952 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -118,12 +118,12 @@ endif CFLAGS-$(CONFIG_PPC64) := $(call cc-option,-mtraceback=no) ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) -CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,-mcall-aixdesc) +CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc)) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2) else -CFLAGS-$(CONFIG_PPC64) += -mcall-aixdesc +CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc) endif -CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,-mminimal-toc) +CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD) -- cgit v1.2.3 From 4bece972fce6e597cb513bdcae4a04e14fc0dd81 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 15 Jun 2015 15:01:32 +1000 Subject: powerpc/powernv: pnv_init_idle_states() should only run on powernv Although this init call checks for device tree properties before doing anything, it should still only run on powernv machines. Reviewed-by: Shreyas B Prabhu Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/idle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index bd39a120bd60..59d735d2e5c0 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -289,5 +290,4 @@ out_free: out: return 0; } - -subsys_initcall(pnv_init_idle_states); +machine_subsys_initcall(powernv, pnv_init_idle_states); -- cgit v1.2.3 From 3609d819a36c65857816ca1278d80767d6d9b990 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Wed, 20 May 2015 00:30:14 +0530 Subject: powerpc: Make doorbell check preemption safe Doorbell can be used to cause ipi on cpus which are sibling threads on the same core. So icp_native_cause_ipi checks if the destination cpu is a sibling thread of the current cpu and uses doorbell in such cases. But while running with CONFIG_PREEMPT=y, since this section is preemtible, we can run into issues if after we check if the destination cpu is a sibling cpu, the task gets migrated from a sibling cpu to a cpu on another core. Fix this by using get_cpu()/ put_cpu() Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/xics/icp-native.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index 2fc4cf1b7557..eae32654bdf2 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -147,12 +147,16 @@ static void icp_native_cause_ipi(int cpu, unsigned long data) { kvmppc_set_host_ipi(cpu, 1); #ifdef CONFIG_PPC_DOORBELL - if (cpu_has_feature(CPU_FTR_DBELL) && - (cpumask_test_cpu(cpu, cpu_sibling_mask(smp_processor_id())))) - doorbell_cause_ipi(cpu, data); - else + if (cpu_has_feature(CPU_FTR_DBELL)) { + if (cpumask_test_cpu(cpu, cpu_sibling_mask(get_cpu()))) { + doorbell_cause_ipi(cpu, data); + put_cpu(); + return; + } + put_cpu(); + } #endif - icp_native_set_qirr(cpu, IPI_PRIORITY); + icp_native_set_qirr(cpu, IPI_PRIORITY); } /* -- cgit v1.2.3 From 02b6505c8f84cbb4be459c9bf8ebbb7b0754e764 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Wed, 17 Jun 2015 11:36:57 +1000 Subject: powerpc/powernv: Increase opal-irqchip initcall priority The eeh subsystem for powernv requires the opal event irqchip to be initialised prior to initialisation or the following errors are produced (and eeh doesn't work as expected): irq: XICS didn't like hwirq-0x9 to VIRQ17 mapping (rc=-22) pnv_eeh_post_init: Can't request OPAL event interrupt (0) On powernv eeh is initialised from a subsys_initcall due to a check for machine_is(powernv) in eeh_init(). This patch increases the initcall priority of opal_event_init() to an arch_initcall to ensure the opal event interface is initialised prior to any users of it. Signed-off-by: Alistair Popple Reported-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-irqchip.c | 4 ++++ arch/powerpc/platforms/powernv/opal.c | 3 --- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index 841135f48981..e2e7d75f52f3 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -231,6 +231,7 @@ out: of_node_put(opal_node); return rc; } +machine_arch_initcall(powernv, opal_event_init); /** * opal_event_request(unsigned int opal_event_nr) - Request an event @@ -244,6 +245,9 @@ out: */ int opal_event_request(unsigned int opal_event_nr) { + if (WARN_ON_ONCE(!opal_event_irqchip.domain)) + return NO_IRQ; + return irq_create_mapping(opal_event_irqchip.domain, opal_event_nr); } EXPORT_SYMBOL(opal_event_request); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 9e9c483eee4d..f084afa0e3ba 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -657,9 +657,6 @@ static int __init opal_init(void) return -ENODEV; } - /* Initialise OPAL events */ - opal_event_init(); - /* Register OPAL consoles if any ports */ if (firmware_has_feature(FW_FEATURE_OPALv2)) consoles = of_find_node_by_path("/ibm,opal/consoles"); -- cgit v1.2.3 From 7185795a62589292015484985635b4a38029a2b9 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 15 Jun 2015 17:25:34 +0800 Subject: powerpc/powernv: fix construction of opal PRD messages We currently have a bug in the PRD code, where the contents of an incoming message (beyond the header) will be overwritten by the list item manipulations when adding to to the prd_msg_queue. This change reorders struct opal_prd_msg_queue_item, so that the message body doesn't overlap the list_head. We also clarify the memcpy of the message, as we're copying unnecessary bytes at the end of the message data. Signed-off-by: Jeremy Kerr Acked-by: Stewart Smith Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-prd.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index d9e72bde79f5..46cb3feb0a13 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -32,9 +32,13 @@ #include +/** + * The msg member must be at the end of the struct, as it's followed by the + * message data. + */ struct opal_prd_msg_queue_item { - struct opal_prd_msg_header msg; struct list_head list; + struct opal_prd_msg_header msg; }; static struct device_node *prd_node; @@ -351,22 +355,23 @@ static int opal_prd_msg_notifier(struct notifier_block *nb, struct opal_prd_msg_queue_item *item; struct opal_prd_msg_header *hdr; struct opal_msg *msg = _msg; + int msg_size, item_size; unsigned long flags; - int size; if (msg_type != OPAL_MSG_PRD) return 0; - /* Calculate total size of the item we need to store. The 'size' field - * in the header includes the header itself. */ + /* Calculate total size of the message and item we need to store. The + * 'size' field in the header includes the header itself. */ hdr = (void *)msg->params; - size = (sizeof(*item) - sizeof(item->msg)) + be16_to_cpu(hdr->size); + msg_size = be16_to_cpu(hdr->size); + item_size = msg_size + sizeof(*item) - sizeof(item->msg); - item = kzalloc(size, GFP_ATOMIC); + item = kzalloc(item_size, GFP_ATOMIC); if (!item) return -ENOMEM; - memcpy(&item->msg, msg->params, size); + memcpy(&item->msg, msg->params, msg_size); spin_lock_irqsave(&opal_prd_msg_queue_lock, flags); list_add_tail(&item->list, &opal_prd_msg_queue); -- cgit v1.2.3 From cdf2bc1c283066d6f0834cc5dd4fcf5d01e44af9 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 17 Jun 2015 08:35:39 +0800 Subject: powerpc/include: Add opal-prd to installed uapi headers We'll want to build the opal-prd daemon with the prd headers, so include this in the uapi headers list. Signed-off-by: Jeremy Kerr Signed-off-by: Michael Ellerman --- arch/powerpc/include/uapi/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild index 79c4068be278..f44a027818af 100644 --- a/arch/powerpc/include/uapi/asm/Kbuild +++ b/arch/powerpc/include/uapi/asm/Kbuild @@ -18,6 +18,7 @@ header-y += kvm_para.h header-y += mman.h header-y += msgbuf.h header-y += nvram.h +header-y += opal-prd.h header-y += param.h header-y += perf_event.h header-y += poll.h -- cgit v1.2.3 From b5926430dfa07d17e5d768c16b0d81c13a793f7c Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 15 Jun 2015 17:49:59 +1000 Subject: powerpc/iommu/ioda2: Enable compile with IOV=on and IOMMU_API=off The pnv_pci_ioda2_unset_window() function is used to do the final cleanup of a DMA window being released: - via VFIO ioctl by the guest request; - via unplugging a virtual PCI function. However the function was under #ifdef CONFIG_IOMMU_API and was missing. This moves the helper outside of IOMMU_API block and enables it for either or both IOMMU_API and PCI_IOV. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 50 ++++++++++++++++--------------- 1 file changed, 26 insertions(+), 24 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b9f0f430e249..8424f5cdf0b1 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2116,6 +2116,32 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) return 0; } +#if defined(CONFIG_IOMMU_API) || defined(CONFIG_PCI_IOV) +static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, + int num) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + struct pnv_phb *phb = pe->phb; + long ret; + + pe_info(pe, "Removing DMA window #%d\n", num); + + ret = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, + (pe->pe_number << 1) + num, + 0/* levels */, 0/* table address */, + 0/* table size */, 0/* page size */); + if (ret) + pe_warn(pe, "Unmapping failed, ret = %ld\n", ret); + else + pnv_pci_ioda2_tce_invalidate_entire(pe); + + pnv_pci_unlink_table_and_group(table_group->tables[num], table_group); + + return ret; +} +#endif + #ifdef CONFIG_IOMMU_API static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, __u64 window_size, __u32 levels) @@ -2149,30 +2175,6 @@ static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, return bytes; } -static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, - int num) -{ - struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, - table_group); - struct pnv_phb *phb = pe->phb; - long ret; - - pe_info(pe, "Removing DMA window #%d\n", num); - - ret = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, - (pe->pe_number << 1) + num, - 0/* levels */, 0/* table address */, - 0/* table size */, 0/* page size */); - if (ret) - pe_warn(pe, "Unmapping failed, ret = %ld\n", ret); - else - pnv_pci_ioda2_tce_invalidate_entire(pe); - - pnv_pci_unlink_table_and_group(table_group->tables[num], table_group); - - return ret; -} - static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) { struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, -- cgit v1.2.3 From b4b56f9ecab40f3b4ef53e130c9f6663be491894 Mon Sep 17 00:00:00 2001 From: Sam bobroff Date: Fri, 12 Jun 2015 11:06:32 +1000 Subject: powerpc/tm: Abort syscalls in active transactions This patch changes the syscall handler to doom (tabort) active transactions when a syscall is made and return very early without performing the syscall and keeping side effects to a minimum (no CPU accounting or system call tracing is performed). Also included is a new HWCAP2 bit, PPC_FEATURE2_HTM_NOSC, to indicate this behaviour to userspace. Currently, the system call instruction automatically suspends an active transaction which causes side effects to persist when an active transaction fails. This does change the kernel's behaviour, but in a way that was documented as unsupported. It doesn't reduce functionality as syscalls will still be performed after tsuspend; it just requires that the transaction be explicitly suspended. It also provides a consistent interface and makes the behaviour of user code substantially the same across powerpc and platforms that do not support suspended transactions (e.g. x86 and s390). Performance measurements using http://ozlabs.org/~anton/junkcode/null_syscall.c indicate the cost of a normal (non-aborted) system call increases by about 0.25%. Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman --- Documentation/powerpc/transactional_memory.txt | 32 +++++++++++----------- arch/powerpc/include/asm/cputable.h | 10 ++++--- arch/powerpc/include/uapi/asm/cputable.h | 1 + arch/powerpc/include/uapi/asm/tm.h | 2 +- arch/powerpc/kernel/cputable.c | 4 ++- arch/powerpc/kernel/entry_64.S | 35 +++++++++++++++++++++++++ tools/testing/selftests/powerpc/tm/Makefile | 4 +-- tools/testing/selftests/powerpc/tm/tm-syscall.c | 3 ++- 8 files changed, 66 insertions(+), 25 deletions(-) (limited to 'arch/powerpc') diff --git a/Documentation/powerpc/transactional_memory.txt b/Documentation/powerpc/transactional_memory.txt index ded69794a5c0..ba0a2a4a54ba 100644 --- a/Documentation/powerpc/transactional_memory.txt +++ b/Documentation/powerpc/transactional_memory.txt @@ -74,22 +74,23 @@ Causes of transaction aborts Syscalls ======== -Performing syscalls from within transaction is not recommended, and can lead -to unpredictable results. +Syscalls made from within an active transaction will not be performed and the +transaction will be doomed by the kernel with the failure code TM_CAUSE_SYSCALL +| TM_CAUSE_PERSISTENT. -Syscalls do not by design abort transactions, but beware: The kernel code will -not be running in transactional state. The effect of syscalls will always -remain visible, but depending on the call they may abort your transaction as a -side-effect, read soon-to-be-aborted transactional data that should not remain -invisible, etc. If you constantly retry a transaction that constantly aborts -itself by calling a syscall, you'll have a livelock & make no progress. +Syscalls made from within a suspended transaction are performed as normal and +the transaction is not explicitly doomed by the kernel. However, what the +kernel does to perform the syscall may result in the transaction being doomed +by the hardware. The syscall is performed in suspended mode so any side +effects will be persistent, independent of transaction success or failure. No +guarantees are provided by the kernel about which syscalls will affect +transaction success. -Simple syscalls (e.g. sigprocmask()) "could" be OK. Even things like write() -from, say, printf() should be OK as long as the kernel does not access any -memory that was accessed transactionally. - -Consider any syscalls that happen to work as debug-only -- not recommended for -production use. Best to queue them up till after the transaction is over. +Care must be taken when relying on syscalls to abort during active transactions +if the calls are made via a library. Libraries may cache values (which may +give the appearance of success) or perform operations that cause transaction +failure before entering the kernel (which may produce different failure codes). +Examples are glibc's getpid() and lazy symbol resolution. Signals @@ -176,8 +177,7 @@ kernel aborted a transaction: TM_CAUSE_RESCHED Thread was rescheduled. TM_CAUSE_TLBI Software TLB invalid. TM_CAUSE_FAC_UNAV FP/VEC/VSX unavailable trap. - TM_CAUSE_SYSCALL Currently unused; future syscalls that must abort - transactions for consistency will use this. + TM_CAUSE_SYSCALL Syscall from active transaction. TM_CAUSE_SIGNAL Signal delivered. TM_CAUSE_MISC Currently unused. TM_CAUSE_ALIGNMENT Alignment fault. diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 6367b8347dad..4994648b9265 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -242,11 +242,13 @@ enum { /* We only set the TM feature if the kernel was compiled with TM supprt */ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -#define CPU_FTR_TM_COMP CPU_FTR_TM -#define PPC_FEATURE2_HTM_COMP PPC_FEATURE2_HTM +#define CPU_FTR_TM_COMP CPU_FTR_TM +#define PPC_FEATURE2_HTM_COMP PPC_FEATURE2_HTM +#define PPC_FEATURE2_HTM_NOSC_COMP PPC_FEATURE2_HTM_NOSC #else -#define CPU_FTR_TM_COMP 0 -#define PPC_FEATURE2_HTM_COMP 0 +#define CPU_FTR_TM_COMP 0 +#define PPC_FEATURE2_HTM_COMP 0 +#define PPC_FEATURE2_HTM_NOSC_COMP 0 #endif /* We need to mark all pages as being coherent if we're SMP or we have a diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h index de2c0e4ee1aa..43686043e297 100644 --- a/arch/powerpc/include/uapi/asm/cputable.h +++ b/arch/powerpc/include/uapi/asm/cputable.h @@ -42,5 +42,6 @@ #define PPC_FEATURE2_ISEL 0x08000000 #define PPC_FEATURE2_TAR 0x04000000 #define PPC_FEATURE2_VEC_CRYPTO 0x02000000 +#define PPC_FEATURE2_HTM_NOSC 0x01000000 #endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */ diff --git a/arch/powerpc/include/uapi/asm/tm.h b/arch/powerpc/include/uapi/asm/tm.h index 5d836b7c1176..5047659815a5 100644 --- a/arch/powerpc/include/uapi/asm/tm.h +++ b/arch/powerpc/include/uapi/asm/tm.h @@ -11,7 +11,7 @@ #define TM_CAUSE_RESCHED 0xde #define TM_CAUSE_TLBI 0xdc #define TM_CAUSE_FAC_UNAV 0xda -#define TM_CAUSE_SYSCALL 0xd8 /* future use */ +#define TM_CAUSE_SYSCALL 0xd8 #define TM_CAUSE_MISC 0xd6 /* future use */ #define TM_CAUSE_SIGNAL 0xd4 #define TM_CAUSE_ALIGNMENT 0xd2 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 60262fdf35ba..7d80bfdfb15e 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -108,7 +108,9 @@ extern void __restore_cpu_e6500(void); PPC_FEATURE_TRUE_LE | \ PPC_FEATURE_PSERIES_PERFMON_COMPAT) #define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \ - PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \ + PPC_FEATURE2_HTM_COMP | \ + PPC_FEATURE2_HTM_NOSC_COMP | \ + PPC_FEATURE2_DSCR | \ PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \ PPC_FEATURE2_VEC_CRYPTO) #define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 278888e89acc..579e0f9a2d57 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -34,6 +34,7 @@ #include #include #include +#include /* * System calls. @@ -51,6 +52,12 @@ exception_marker: .globl system_call_common system_call_common: +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION + extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ + bne tabort_syscall +END_FTR_SECTION_IFSET(CPU_FTR_TM) +#endif andi. r10,r12,MSR_PR mr r10,r1 addi r1,r1,-INT_FRAME_SIZE @@ -311,6 +318,34 @@ syscall_exit_work: bl do_syscall_trace_leave b ret_from_except +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +tabort_syscall: + /* Firstly we need to enable TM in the kernel */ + mfmsr r10 + li r13, 1 + rldimi r10, r13, MSR_TM_LG, 63-MSR_TM_LG + mtmsrd r10, 0 + + /* tabort, this dooms the transaction, nothing else */ + li r13, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT) + TABORT(R13) + + /* + * Return directly to userspace. We have corrupted user register state, + * but userspace will never see that register state. Execution will + * resume after the tbegin of the aborted transaction with the + * checkpointed register state. + */ + li r13, MSR_RI + andc r10, r10, r13 + mtmsrd r10, 1 + mtspr SPRN_SRR0, r11 + mtspr SPRN_SRR1, r12 + + rfid + b . /* prevent speculative execution */ +#endif + /* Save non-volatile GPRs, if not already saved. */ _GLOBAL(save_nvgprs) ld r11,_TRAP(r1) diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 6bff955e1d55..4bea62a319dc 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -1,11 +1,11 @@ -TEST_PROGS := tm-resched-dscr +TEST_PROGS := tm-resched-dscr tm-syscall all: $(TEST_PROGS) $(TEST_PROGS): ../harness.c tm-syscall: tm-syscall-asm.S -tm-syscall: CFLAGS += -mhtm +tm-syscall: CFLAGS += -mhtm -I../../../../../usr/include include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c index 3ed8d4b252fa..1276e23da63b 100644 --- a/tools/testing/selftests/powerpc/tm/tm-syscall.c +++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c @@ -82,7 +82,8 @@ int tm_syscall(void) unsigned count = 0; struct timeval end, now; - SKIP_IF(!((long)get_auxv_entry(AT_HWCAP2) & PPC_FEATURE2_HTM)); + SKIP_IF(!((long)get_auxv_entry(AT_HWCAP2) + & PPC_FEATURE2_HTM_NOSC)); setbuf(stdout, NULL); printf("Testing transactional syscalls for %d seconds...\n", TEST_DURATION); -- cgit v1.2.3 From 02505cebc1db06707d31635e9c9b342d58f887b7 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 17 Jun 2015 08:13:40 +0530 Subject: powerpc/mm: PTE_RPN_MAX is not used, remove the same Remove the unused #define Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pte-common.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index c5a755ef7011..b7c8d079c121 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -85,10 +85,8 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void); * 64-bit PTEs */ #if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) -#define PTE_RPN_MAX (1ULL << (64 - PTE_RPN_SHIFT)) #define PTE_RPN_MASK (~((1ULL< Date: Wed, 17 Jun 2015 08:13:41 +0530 Subject: powerpc/mm: Change the swap encoding in pte. Current swap encoding in pte can't support large pfns above 4TB. Change the swap encoding such that we put the swap type in the PTE bits. Also add build checks to make sure we don't overlap with HPTEFLAGS. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pgtable-ppc64.h | 26 +++++++++++++++++++++----- arch/powerpc/include/asm/pte-book3e.h | 1 + arch/powerpc/include/asm/pte-hash64.h | 1 + 3 files changed, 23 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index f951d9cf358a..f890f7ce1593 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -347,11 +347,27 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) /* Encode and de-code a swap entry */ -#define __swp_type(entry) (((entry).val >> 1) & 0x3f) -#define __swp_offset(entry) ((entry).val >> 8) -#define __swp_entry(type, offset) ((swp_entry_t){((type)<< 1)|((offset)<<8)}) -#define __pte_to_swp_entry(pte) ((swp_entry_t){pte_val(pte) >> PTE_RPN_SHIFT}) -#define __swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_RPN_SHIFT }) +#define MAX_SWAPFILES_CHECK() do { \ + BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \ + /* \ + * Don't have overlapping bits with _PAGE_HPTEFLAGS \ + * We filter HPTEFLAGS on set_pte. \ + */ \ + BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \ + } while (0) +/* + * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT; + */ +#define SWP_TYPE_BITS 5 +#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \ + & ((1UL << SWP_TYPE_BITS) - 1)) +#define __swp_offset(x) ((x).val >> PTE_RPN_SHIFT) +#define __swp_entry(type, offset) ((swp_entry_t) { \ + ((type) << _PAGE_BIT_SWAP_TYPE) \ + | ((offset) << PTE_RPN_SHIFT) }) + +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) +#define __swp_entry_to_pte(x) __pte((x).val) void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); void pgtable_cache_init(void); diff --git a/arch/powerpc/include/asm/pte-book3e.h b/arch/powerpc/include/asm/pte-book3e.h index 91a704952ca1..8d8473278d91 100644 --- a/arch/powerpc/include/asm/pte-book3e.h +++ b/arch/powerpc/include/asm/pte-book3e.h @@ -11,6 +11,7 @@ /* Architected bits */ #define _PAGE_PRESENT 0x000001 /* software: pte contains a translation */ #define _PAGE_SW1 0x000002 +#define _PAGE_BIT_SWAP_TYPE 2 #define _PAGE_BAP_SR 0x000004 #define _PAGE_BAP_UR 0x000008 #define _PAGE_BAP_SW 0x000010 diff --git a/arch/powerpc/include/asm/pte-hash64.h b/arch/powerpc/include/asm/pte-hash64.h index fc852f7e7b3a..ef612c160da7 100644 --- a/arch/powerpc/include/asm/pte-hash64.h +++ b/arch/powerpc/include/asm/pte-hash64.h @@ -16,6 +16,7 @@ */ #define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */ #define _PAGE_USER 0x0002 /* matches one of the PP bits */ +#define _PAGE_BIT_SWAP_TYPE 2 #define _PAGE_EXEC 0x0004 /* No execute on POWER4 and newer (we invert) */ #define _PAGE_GUARDED 0x0008 /* We can derive Memory coherence from _PAGE_NO_CACHE */ -- cgit v1.2.3 From 5c89a87d13d168eacb8110810544a98ce0f4319d Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 18 Jun 2015 11:41:36 +1000 Subject: powerpc/powernv: Fix wrong IOMMU table in pnv_ioda_setup_bus_dma() When pnv_pci_ioda_fixup() is called during PHB fixup time, each PE in the sorted list of PEs (phb::pe_dma_list) is iterated to setup the PE's DMA32 space by pnv_ioda_setup_bus_dma() if the PE's DMA32 weight is bigger than zero. The function also assigns all the subordinate PCI devices of the PE's primary bus with the PE's DMA32 IOMMU table. It causes the PCI devicess in the child PEs, which don't have DMA weight, receives wrong IOMMU table and then IOMMU group. The patch fixes above issue by more check on the PE's coverage and don't assign IOMMU table to those PCI devices, which belong to the child PEs. The problem was found on Firestone platform initially. Suggested-by: Gavin Shan Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 8424f5cdf0b1..5738d315248b 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1661,7 +1661,7 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, set_iommu_table_base(&dev->dev, pe->table_group.tables[0]); iommu_add_device(&dev->dev); - if (dev->subordinate) + if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) pnv_ioda_setup_bus_dma(pe, dev->subordinate); } } -- cgit v1.2.3