From 3872731187141d5d0a5c4fb30007b8b9ec36a44d Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 6 Oct 2021 15:47:35 +1100 Subject: powerps/pseries/dma: Add support for 2M IOMMU page size The upcoming PAPR spec adds a 2M page size, bit 23 right after 16G page size in the "ibm,query-pe-dma-window" call. This adds support for the new page size. Since the new page size is out of sorted order, this changes the loop to not assume that shift[] is sorted. This has now been tested and is known to work on a pre-release version of phyp. Signed-off-by: Alexey Kardashevskiy Reviewed-by: Leonardo Bras Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211006044735.1114669-1-aik@ozlabs.ru --- arch/powerpc/platforms/pseries/iommu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index dab5c56ffd0e..269f61d519c2 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1159,14 +1159,15 @@ static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn) /* Return largest page shift based on "IO Page Sizes" output of ibm,query-pe-dma-window. */ static int iommu_get_page_shift(u32 query_page_size) { - /* Supported IO page-sizes according to LoPAR */ + /* Supported IO page-sizes according to LoPAR, note that 2M is out of order */ const int shift[] = { __builtin_ctzll(SZ_4K), __builtin_ctzll(SZ_64K), __builtin_ctzll(SZ_16M), __builtin_ctzll(SZ_32M), __builtin_ctzll(SZ_64M), __builtin_ctzll(SZ_128M), - __builtin_ctzll(SZ_256M), __builtin_ctzll(SZ_16G) + __builtin_ctzll(SZ_256M), __builtin_ctzll(SZ_16G), __builtin_ctzll(SZ_2M) }; int i = ARRAY_SIZE(shift) - 1; + int ret = 0; /* * On LoPAR, ibm,query-pe-dma-window outputs "IO Page Sizes" using a bit field: @@ -1176,11 +1177,10 @@ static int iommu_get_page_shift(u32 query_page_size) */ for (; i >= 0 ; i--) { if (query_page_size & (1 << i)) - return shift[i]; + ret = max(ret, shift[i]); } - /* No valid page size found. */ - return 0; + return ret; } static struct property *ddw_property_create(const char *propname, u32 liobn, u64 dma_addr, -- cgit v1.2.3 From 3c2172c1c47b4079c29f0e6637d764a99355ebcd Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Wed, 29 Sep 2021 11:36:45 +0800 Subject: powerpc/85xx: Fix oops when mpc85xx_smp_guts_ids node cannot be found When the field described in mpc85xx_smp_guts_ids[] is not configured in dtb, the mpc85xx_setup_pmc() does not assign a value to the "guts" variable. As a result, the oops is triggered when mpc85xx_freeze_time_base() is executed. Fixes: 56f1ba280719 ("powerpc/mpc85xx: refactor the PM operations") Cc: stable@vger.kernel.org # v4.6+ Signed-off-by: Xiaoming Ni Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210929033646.39630-2-nixiaoming@huawei.com --- arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c index 7c0133f558d0..ffa8a7a6a2db 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c @@ -94,9 +94,8 @@ int __init mpc85xx_setup_pmc(void) pr_err("Could not map guts node address\n"); return -ENOMEM; } + qoriq_pm_ops = &mpc85xx_pm_ops; } - qoriq_pm_ops = &mpc85xx_pm_ops; - return 0; } -- cgit v1.2.3 From c45361abb9185b1e172bd75eff51ad5f601ccae4 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Wed, 29 Sep 2021 11:36:46 +0800 Subject: powerpc/85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n When CONFIG_SMP=y, timebase synchronization is required when the second kernel is started. arch/powerpc/kernel/smp.c: int __cpu_up(unsigned int cpu, struct task_struct *tidle) { ... if (smp_ops->give_timebase) smp_ops->give_timebase(); ... } void start_secondary(void *unused) { ... if (smp_ops->take_timebase) smp_ops->take_timebase(); ... } When CONFIG_HOTPLUG_CPU=n and CONFIG_KEXEC_CORE=n, smp_85xx_ops.give_timebase is NULL, smp_85xx_ops.take_timebase is NULL, As a result, the timebase is not synchronized. Timebase synchronization does not depend on CONFIG_HOTPLUG_CPU. Fixes: 56f1ba280719 ("powerpc/mpc85xx: refactor the PM operations") Cc: stable@vger.kernel.org # v4.6+ Signed-off-by: Xiaoming Ni Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210929033646.39630-3-nixiaoming@huawei.com --- arch/powerpc/platforms/85xx/Makefile | 4 +++- arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 4 ++++ arch/powerpc/platforms/85xx/smp.c | 12 ++++++------ 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile index 60e4e97a929d..260fbad7967b 100644 --- a/arch/powerpc/platforms/85xx/Makefile +++ b/arch/powerpc/platforms/85xx/Makefile @@ -3,7 +3,9 @@ # Makefile for the PowerPC 85xx linux kernel. # obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_FSL_PMC) += mpc85xx_pm_ops.o +ifneq ($(CONFIG_FSL_CORENET_RCPM),y) +obj-$(CONFIG_SMP) += mpc85xx_pm_ops.o +endif obj-y += common.o diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c index ffa8a7a6a2db..4a8af80011a6 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c @@ -17,6 +17,7 @@ static struct ccsr_guts __iomem *guts; +#ifdef CONFIG_FSL_PMC static void mpc85xx_irq_mask(int cpu) { @@ -49,6 +50,7 @@ static void mpc85xx_cpu_up_prepare(int cpu) { } +#endif static void mpc85xx_freeze_time_base(bool freeze) { @@ -76,10 +78,12 @@ static const struct of_device_id mpc85xx_smp_guts_ids[] = { static const struct fsl_pm_ops mpc85xx_pm_ops = { .freeze_time_base = mpc85xx_freeze_time_base, +#ifdef CONFIG_FSL_PMC .irq_mask = mpc85xx_irq_mask, .irq_unmask = mpc85xx_irq_unmask, .cpu_die = mpc85xx_cpu_die, .cpu_up_prepare = mpc85xx_cpu_up_prepare, +#endif }; int __init mpc85xx_setup_pmc(void) diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index c6df294054fe..83f4a6389a28 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -40,7 +40,6 @@ struct epapr_spin_table { u32 pir; }; -#ifdef CONFIG_HOTPLUG_CPU static u64 timebase; static int tb_req; static int tb_valid; @@ -112,6 +111,7 @@ static void mpc85xx_take_timebase(void) local_irq_restore(flags); } +#ifdef CONFIG_HOTPLUG_CPU static void smp_85xx_cpu_offline_self(void) { unsigned int cpu = smp_processor_id(); @@ -495,21 +495,21 @@ void __init mpc85xx_smp_init(void) smp_85xx_ops.probe = NULL; } -#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_FSL_CORENET_RCPM + /* Assign a value to qoriq_pm_ops on PPC_E500MC */ fsl_rcpm_init(); -#endif - -#ifdef CONFIG_FSL_PMC +#else + /* Assign a value to qoriq_pm_ops on !PPC_E500MC */ mpc85xx_setup_pmc(); #endif if (qoriq_pm_ops) { smp_85xx_ops.give_timebase = mpc85xx_give_timebase; smp_85xx_ops.take_timebase = mpc85xx_take_timebase; +#ifdef CONFIG_HOTPLUG_CPU smp_85xx_ops.cpu_offline_self = smp_85xx_cpu_offline_self; smp_85xx_ops.cpu_die = qoriq_cpu_kill; - } #endif + } smp_ops = &smp_85xx_ops; #ifdef CONFIG_KEXEC_CORE -- cgit v1.2.3 From 494f238a3861863d908af7b98a369f6d8a986c85 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 18 Sep 2021 11:22:32 +0200 Subject: powerpc/476: Fix sparse report Fix sparse errors: arch/powerpc/platforms/44x/ppc476.c:236:17: warning: cast removes address space '__iomem' of expression arch/powerpc/platforms/44x/ppc476.c:241:34: warning: incorrect type in argument 1 (different address spaces) arch/powerpc/platforms/44x/ppc476.c:241:34: expected void const volatile [noderef] __iomem *addr arch/powerpc/platforms/44x/ppc476.c:241:34: got unsigned char [usertype] * arch/powerpc/platforms/44x/ppc476.c:243:17: warning: incorrect type in argument 1 (different address spaces) arch/powerpc/platforms/44x/ppc476.c:243:17: expected void volatile [noderef] __iomem *addr arch/powerpc/platforms/44x/ppc476.c:243:17: got unsigned char [usertype] *[assigned] fpga Mark 'fpga' pointer as __iomem. Fixes: ab9a4183fddf ("powerpc: Update currituck pci/usb fixup for new board revision") Reported-by: kernel test robot Signed-off-by: Christophe Leroy Reviewed-by: Alistair Popple Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/aa6055769b92a5d8685b8d0adab99c48a0b0ef4b.1631956926.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/44x/ppc476.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c index 07f7e3ce67b5..fb7db5cedd4e 100644 --- a/arch/powerpc/platforms/44x/ppc476.c +++ b/arch/powerpc/platforms/44x/ppc476.c @@ -219,7 +219,7 @@ static int board_rev = -1; static int __init ppc47x_get_board_rev(void) { int reg; - u8 *fpga; + u8 __iomem *fpga; struct device_node *np = NULL; if (of_machine_is_compatible("ibm,currituck")) { @@ -233,7 +233,7 @@ static int __init ppc47x_get_board_rev(void) if (!np) goto fail; - fpga = (u8 *) of_iomap(np, 0); + fpga = of_iomap(np, 0); of_node_put(np); if (!fpga) goto fail; -- cgit v1.2.3 From 93fa8e9d88118bd2bdf2c61f9b63e7d4d9b648fe Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Fri, 3 Sep 2021 16:32:46 +1000 Subject: powerpc: Remove unused prototype for of_show_percpuinfo commit 6d7f58b04d82 ("[PATCH] powerpc: Some minor cleanups to setup_32.c") removed of_show_percpuinfo but didn't remove the prototype. Remove it. Fixes: 6d7f58b04d82 ("[PATCH] powerpc: Some minor cleanups to setup_32.c") Signed-off-by: Daniel Axtens Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210903063246.70691-1-dja@axtens.net --- arch/powerpc/platforms/powermac/pmac.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h index 0d715db434dc..29d2036dcc9d 100644 --- a/arch/powerpc/platforms/powermac/pmac.h +++ b/arch/powerpc/platforms/powermac/pmac.h @@ -27,7 +27,6 @@ extern void pmac_nvram_update(void); extern unsigned char pmac_nvram_read_byte(int addr); extern void pmac_nvram_write_byte(int addr, unsigned char val); extern void pmac_pcibios_after_init(void); -extern int of_show_percpuinfo(struct seq_file *m, int i); extern void pmac_setup_pci_dma(void); extern void pmac_check_ht_link(void); -- cgit v1.2.3 From 9d7fb0643a156a5dddd887814e1263b648501cb0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 3 Sep 2021 08:23:52 +0000 Subject: powerpc/powermac: Remove stale declaration of pmac_md pmac_md doesn't exist anymore, remove stall declaration. Fixes: e8222502ee61 ("[PATCH] powerpc: Kill _machine and hard-coded platform numbers") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b2e52934e5a500f149e6d94db3cfa0569bc35081.1630657402.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/powermac/setup.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 86aee3f2483f..13e8a8a9841c 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -79,8 +79,6 @@ int pmac_newworld; static int current_root_goodness = -1; -extern struct machdep_calls pmac_md; - #define DEFAULT_ROOT_DEVICE Root_SDA1 /* sda1 - slightly silly choice */ #ifdef CONFIG_PPC64 -- cgit v1.2.3 From 452f145eca73945222923525a7eba59cf37909cc Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 10 Sep 2021 16:19:40 +0200 Subject: powerpc: Drop superfluous pci_dev_is_added() calls On powerpc, pci_dev_is_added() is called as part of SR-IOV fixups that are done under pcibios_add_device() which in turn is only called in pci_device_add() whih is called when a PCI device is scanned. pci_dev_assign_added() is called in pci_bus_add_device() which is only called after scanning the device. Thus pci_dev_is_added() is always false and can be dropped. Signed-off-by: Niklas Schnelle Reviewed-by: Bjorn Helgaas Reviewed-by: Oliver O'Halloran [mpe: Tweak change log slightly to reflect Oliver's comments] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210910141940.2598035-2-schnelle@linux.ibm.com --- arch/powerpc/platforms/powernv/pci-sriov.c | 6 ------ arch/powerpc/platforms/pseries/setup.c | 3 +-- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c index 28aac933a439..deddbb233fde 100644 --- a/arch/powerpc/platforms/powernv/pci-sriov.c +++ b/arch/powerpc/platforms/powernv/pci-sriov.c @@ -9,9 +9,6 @@ #include "pci.h" -/* for pci_dev_is_added() */ -#include "../../../../drivers/pci/pci.h" - /* * The majority of the complexity in supporting SR-IOV on PowerNV comes from * the need to put the MMIO space for each VF into a separate PE. Internally @@ -228,9 +225,6 @@ disable_iov: void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev) { - if (WARN_ON(pci_dev_is_added(pdev))) - return; - if (pdev->is_virtfn) { struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index f79126f16258..2188054470c1 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -74,7 +74,6 @@ #include #include "pseries.h" -#include "../../../../drivers/pci/pci.h" DEFINE_STATIC_KEY_FALSE(shared_processor); EXPORT_SYMBOL(shared_processor); @@ -750,7 +749,7 @@ static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev) const int *indexes; struct device_node *dn = pci_device_to_OF_node(pdev); - if (!pdev->is_physfn || pci_dev_is_added(pdev)) + if (!pdev->is_physfn) return; /*Firmware must support open sriov otherwise dont configure*/ indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); -- cgit v1.2.3 From 7eff9bc00ddf1e2281dff575884b7f676c85b006 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 13 Sep 2021 17:17:26 +0200 Subject: powerpc/mem: Fix arch/powerpc/mm/mem.c:53:12: error: no previous prototype for 'create_section_mapping' Commit 8e11d62e2e87 ("powerpc/mem: Add back missing header to fix 'no previous prototype' error") was supposed to fix the problem, but in the meantime commit a927bd6ba952 ("mm: fix phys_to_target_node() and* memory_add_physaddr_to_nid() exports") moved create_section_mapping() prototype from asm/sparsemem.h to asm/mmzone.h Fixes: 8e11d62e2e87 ("powerpc/mem: Add back missing header to fix 'no previous prototype' error") Reported-by: kernel test robot Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/025754fde3d027904ae9d0191f395890bec93369.1631541649.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index c3c4e31462ec..05b9c3f31456 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -20,8 +20,8 @@ #include #include #include -#include #include +#include #include -- cgit v1.2.3 From 2a24d80fc86bcd70c8e780078254e873ea217379 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 14 Sep 2021 09:17:04 -0700 Subject: powerpc/asm: Remove UPD_CONSTR after GCC 4.9 removal UPD_CONSTR was previously a preprocessor define for an old GCC 4.9 inline asm bug with m<> constraints. Fixes: 6563139d90ad ("powerpc: remove GCC version check for UPD_CONSTR") Suggested-by: Nathan Chancellor Suggested-by: Christophe Leroy Suggested-by: Michael Ellerman Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210914161712.2463458-1-ndesaulniers@google.com --- arch/powerpc/include/asm/asm-const.h | 2 -- arch/powerpc/include/asm/atomic.h | 8 ++++---- arch/powerpc/include/asm/io.h | 4 ++-- arch/powerpc/include/asm/uaccess.h | 6 +++--- arch/powerpc/kvm/powerpc.c | 4 ++-- 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/asm-const.h b/arch/powerpc/include/asm/asm-const.h index dbfa5e1e3198..bfb3c3534877 100644 --- a/arch/powerpc/include/asm/asm-const.h +++ b/arch/powerpc/include/asm/asm-const.h @@ -12,6 +12,4 @@ # define ASM_CONST(x) __ASM_CONST(x) #endif -#define UPD_CONSTR "<>" - #endif /* _ASM_POWERPC_ASM_CONST_H */ diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 6a53ef178bfd..fd594fdbd84d 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -27,14 +27,14 @@ static __inline__ int arch_atomic_read(const atomic_t *v) { int t; - __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"UPD_CONSTR(v->counter)); + __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m<>"(v->counter)); return t; } static __inline__ void arch_atomic_set(atomic_t *v, int i) { - __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : "r"(i)); + __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m<>"(v->counter) : "r"(i)); } #define ATOMIC_OP(op, asm_op) \ @@ -320,14 +320,14 @@ static __inline__ s64 arch_atomic64_read(const atomic64_t *v) { s64 t; - __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"UPD_CONSTR(v->counter)); + __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m<>"(v->counter)); return t; } static __inline__ void arch_atomic64_set(atomic64_t *v, s64 i) { - __asm__ __volatile__("std%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : "r"(i)); + __asm__ __volatile__("std%U0%X0 %1,%0" : "=m<>"(v->counter) : "r"(i)); } #define ATOMIC64_OP(op, asm_op) \ diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index f130783c8301..beba4979bff9 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -122,7 +122,7 @@ static inline u##size name(const volatile u##size __iomem *addr) \ { \ u##size ret; \ __asm__ __volatile__("sync;"#insn"%U1%X1 %0,%1;twi 0,%0,0;isync"\ - : "=r" (ret) : "m"UPD_CONSTR (*addr) : "memory"); \ + : "=r" (ret) : "m<>" (*addr) : "memory"); \ return ret; \ } @@ -130,7 +130,7 @@ static inline u##size name(const volatile u##size __iomem *addr) \ static inline void name(volatile u##size __iomem *addr, u##size val) \ { \ __asm__ __volatile__("sync;"#insn"%U0%X0 %1,%0" \ - : "=m"UPD_CONSTR (*addr) : "r" (val) : "memory"); \ + : "=m<>" (*addr) : "r" (val) : "memory"); \ mmiowb_set_pending(); \ } diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 22c79ab40006..63316100080c 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -86,7 +86,7 @@ __pu_failed: \ "1: " op "%U1%X1 %0,%1 # put_user\n" \ EX_TABLE(1b, %l2) \ : \ - : "r" (x), "m"UPD_CONSTR (*addr) \ + : "r" (x), "m<>" (*addr) \ : \ : label) @@ -143,7 +143,7 @@ do { \ "1: "op"%U1%X1 %0, %1 # get_user\n" \ EX_TABLE(1b, %l2) \ : "=r" (x) \ - : "m"UPD_CONSTR (*addr) \ + : "m<>" (*addr) \ : \ : label) @@ -200,7 +200,7 @@ __gus_failed: \ ".previous\n" \ EX_TABLE(1b, 3b) \ : "=r" (err), "=r" (x) \ - : "m"UPD_CONSTR (*addr), "i" (-EFAULT), "0" (err)) + : "m<>" (*addr), "i" (-EFAULT), "0" (err)) #ifdef __powerpc64__ #define __get_user_asm2(x, addr, err) \ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b4e6f70b97b9..3fd037d36afb 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1094,7 +1094,7 @@ static inline u64 sp_to_dp(u32 fprs) preempt_disable(); enable_kernel_fp(); - asm ("lfs%U1%X1 0,%1; stfd%U0%X0 0,%0" : "=m"UPD_CONSTR (fprd) : "m"UPD_CONSTR (fprs) + asm ("lfs%U1%X1 0,%1; stfd%U0%X0 0,%0" : "=m<>" (fprd) : "m<>" (fprs) : "fr0"); preempt_enable(); return fprd; @@ -1106,7 +1106,7 @@ static inline u32 dp_to_sp(u64 fprd) preempt_disable(); enable_kernel_fp(); - asm ("lfd%U1%X1 0,%1; stfs%U0%X0 0,%0" : "=m"UPD_CONSTR (fprs) : "m"UPD_CONSTR (fprd) + asm ("lfd%U1%X1 0,%1; stfs%U0%X0 0,%0" : "=m<>" (fprs) : "m<>" (fprd) : "fr0"); preempt_enable(); return fprs; -- cgit v1.2.3 From ee87843795ec5dc2f3bb315fade3ec098c88f639 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Tue, 14 Sep 2021 20:08:02 +0530 Subject: powerpc/powernv/dump: Fix typo in comment Signed-off-by: Vasant Hegde Reviewed-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210914143802.54325-1-hegdevasant@linux.vnet.ibm.com --- arch/powerpc/platforms/powernv/opal-dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 00c5a59d82d9..717d1d30ade5 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -419,7 +419,7 @@ void __init opal_platform_dump_init(void) int rc; int dump_irq; - /* ELOG not supported by firmware */ + /* Dump not supported by firmware */ if (!opal_check_token(OPAL_DUMP_READ)) return; -- cgit v1.2.3 From f2719b26ae27282c145202ffd656d5ff1fe737cc Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 15 Sep 2021 15:34:35 +0200 Subject: video: fbdev: chipsfb: use memset_io() instead of memset() While investigating a lockup at startup on Powerbook 3400C, it was identified that the fbdev driver generates alignment exception at startup: --- interrupt: 600 at memset+0x60/0xc0 NIP: c0021414 LR: c03fc49c CTR: 00007fff REGS: ca021c10 TRAP: 0600 Tainted: G W (5.14.2-pmac-00727-g12a41fa69492) MSR: 00009032 CR: 44008442 XER: 20000100 DAR: cab80020 DSISR: 00017c07 GPR00: 00000007 ca021cd0 c14412e0 cab80000 00000000 00100000 cab8001c 00000004 GPR08: 00100000 00007fff 00000000 00000000 84008442 00000000 c0006fb4 00000000 GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00100000 GPR24: 00000000 81800000 00000320 c15fa400 c14d1878 00000000 c14d1800 c094e19c NIP [c0021414] memset+0x60/0xc0 LR [c03fc49c] chipsfb_pci_init+0x160/0x580 --- interrupt: 600 [ca021cd0] [c03fc46c] chipsfb_pci_init+0x130/0x580 (unreliable) [ca021d20] [c03a3a70] pci_device_probe+0xf8/0x1b8 [ca021d50] [c043d584] really_probe.part.0+0xac/0x388 [ca021d70] [c043d914] __driver_probe_device+0xb4/0x170 [ca021d90] [c043da18] driver_probe_device+0x48/0x144 [ca021dc0] [c043e318] __driver_attach+0x11c/0x1c4 [ca021de0] [c043ad30] bus_for_each_dev+0x88/0xf0 [ca021e10] [c043c724] bus_add_driver+0x190/0x22c [ca021e40] [c043ee94] driver_register+0x9c/0x170 [ca021e60] [c0006c28] do_one_initcall+0x54/0x1ec [ca021ed0] [c08246e4] kernel_init_freeable+0x1c0/0x270 [ca021f10] [c0006fdc] kernel_init+0x28/0x11c [ca021f30] [c0017148] ret_from_kernel_thread+0x14/0x1c Instruction dump: 7d4601a4 39490777 7d4701a4 39490888 7d4801a4 39490999 7d4901a4 39290aaa 7d2a01a4 4c00012c 4bfffe88 0fe00000 <4bfffe80> 9421fff0 38210010 48001970 This is due to 'dcbz' instruction being used on non-cached memory. 'dcbz' instruction is used by memset() to zeroize a complete cacheline at once, and memset() is not expected to be used on non cached memory. When performing a 'sparse' check on fbdev driver, it also appears that the use of memset() is unexpected: drivers/video/fbdev/chipsfb.c:334:17: warning: incorrect type in argument 1 (different address spaces) drivers/video/fbdev/chipsfb.c:334:17: expected void * drivers/video/fbdev/chipsfb.c:334:17: got char [noderef] __iomem *screen_base drivers/video/fbdev/chipsfb.c:334:15: warning: memset with byte count of 1048576 Use fb_memset() instead of memset(). fb_memset() is defined as memset_io() for powerpc. Fixes: 8c8709334cec ("[PATCH] ppc32: Remove CONFIG_PMAC_PBOOK") Reported-by: Stan Johnson Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/884a54f1e5cb774c1d9b4db780209bee5d4f6718.1631712563.git.christophe.leroy@csgroup.eu --- drivers/video/fbdev/chipsfb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/chipsfb.c b/drivers/video/fbdev/chipsfb.c index 998067b701fa..393894af26f8 100644 --- a/drivers/video/fbdev/chipsfb.c +++ b/drivers/video/fbdev/chipsfb.c @@ -331,7 +331,7 @@ static const struct fb_var_screeninfo chipsfb_var = { static void init_chips(struct fb_info *p, unsigned long addr) { - memset(p->screen_base, 0, 0x100000); + fb_memset(p->screen_base, 0, 0x100000); p->fix = chipsfb_fix; p->fix.smem_start = addr; -- cgit v1.2.3 From 56537faf8821e361d739fc5ff58c9c40f54a1d4c Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 28 Sep 2021 07:45:50 -0500 Subject: powerpc: fix unbalanced node refcount in check_kvm_guest() When check_kvm_guest() succeeds in looking up a /hypervisor OF node, it returns without performing a matching put for the lookup, leaving the node's reference count elevated. Add the necessary call to of_node_put(), rearranging the code slightly to avoid repetition or goto. Fixes: 107c55005fbd ("powerpc/pseries: Add KVM guest doorbell restrictions") Signed-off-by: Nathan Lynch Reviewed-by: Srikar Dronamraju Reviewed-by: Tyrel Datwyler Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210928124550.132020-1-nathanl@linux.ibm.com --- arch/powerpc/kernel/firmware.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index c7022c41cc31..20328f72f9f2 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -31,11 +31,10 @@ int __init check_kvm_guest(void) if (!hyper_node) return 0; - if (!of_device_is_compatible(hyper_node, "linux,kvm")) - return 0; - - static_branch_enable(&kvm_guest); + if (of_device_is_compatible(hyper_node, "linux,kvm")) + static_branch_enable(&kvm_guest); + of_node_put(hyper_node); return 0; } core_initcall(check_kvm_guest); // before kvm_guest_init() -- cgit v1.2.3 From 799f9b51db688608b50e630a57bee5f699b268ca Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 28 Sep 2021 16:41:46 -0500 Subject: powerpc/paravirt: vcpu_is_preempted() commentary Add comments more clearly documenting that this function determines whether hypervisor-level preemption of the VM has occurred. Signed-off-by: Nathan Lynch Reviewed-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210928214147.312412-2-nathanl@linux.ibm.com --- arch/powerpc/include/asm/paravirt.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index bcb7b5f917be..39f173961f6a 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -21,7 +21,7 @@ static inline bool is_shared_processor(void) return static_branch_unlikely(&shared_processor); } -/* If bit 0 is set, the cpu has been preempted */ +/* If bit 0 is set, the cpu has been ceded, conferred, or preempted */ static inline u32 yield_count_of(int cpu) { __be32 yield_count = READ_ONCE(lppaca_of(cpu).yield_count); @@ -92,6 +92,19 @@ static inline void prod_cpu(int cpu) #define vcpu_is_preempted vcpu_is_preempted static inline bool vcpu_is_preempted(int cpu) { + /* + * The dispatch/yield bit alone is an imperfect indicator of + * whether the hypervisor has dispatched @cpu to run on a physical + * processor. When it is clear, @cpu is definitely not preempted. + * But when it is set, it means only that it *might* be, subject to + * other conditions. So we check other properties of the VM and + * @cpu first, resorting to the yield count last. + */ + + /* + * Hypervisor preemption isn't possible in dedicated processor + * mode by definition. + */ if (!is_shared_processor()) return false; @@ -100,9 +113,10 @@ static inline bool vcpu_is_preempted(int cpu) int first_cpu = cpu_first_thread_sibling(smp_processor_id()); /* - * Preemption can only happen at core granularity. This CPU - * is not preempted if one of the CPU of this core is not - * preempted. + * The PowerVM hypervisor dispatches VMs on a whole core + * basis. So we know that a thread sibling of the local CPU + * cannot have been preempted by the hypervisor, even if it + * has called H_CONFER, which will set the yield bit. */ if (cpu_first_thread_sibling(cpu) == first_cpu) return false; -- cgit v1.2.3 From fda0eb220021a97c1d656434b9340ebf3fc4704a Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 28 Sep 2021 16:41:47 -0500 Subject: powerpc/paravirt: correct preempt debug splat in vcpu_is_preempted() vcpu_is_preempted() can be used outside of preempt-disabled critical sections, yielding warnings such as: BUG: using smp_processor_id() in preemptible [00000000] code: systemd-udevd/185 caller is rwsem_spin_on_owner+0x1cc/0x2d0 CPU: 1 PID: 185 Comm: systemd-udevd Not tainted 5.15.0-rc2+ #33 Call Trace: [c000000012907ac0] [c000000000aa30a8] dump_stack_lvl+0xac/0x108 (unreliable) [c000000012907b00] [c000000001371f70] check_preemption_disabled+0x150/0x160 [c000000012907b90] [c0000000001e0e8c] rwsem_spin_on_owner+0x1cc/0x2d0 [c000000012907be0] [c0000000001e1408] rwsem_down_write_slowpath+0x478/0x9a0 [c000000012907ca0] [c000000000576cf4] filename_create+0x94/0x1e0 [c000000012907d10] [c00000000057ac08] do_symlinkat+0x68/0x1a0 [c000000012907d70] [c00000000057ae18] sys_symlink+0x58/0x70 [c000000012907da0] [c00000000002e448] system_call_exception+0x198/0x3c0 [c000000012907e10] [c00000000000c54c] system_call_common+0xec/0x250 The result of vcpu_is_preempted() is always used speculatively, and the function does not access per-cpu resources in a (Linux) preempt-unsafe way. Use raw_smp_processor_id() to avoid such warnings, adding explanatory comments. Fixes: ca3f969dcb11 ("powerpc/paravirt: Use is_kvm_guest() in vcpu_is_preempted()") Signed-off-by: Nathan Lynch Reviewed-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210928214147.312412-3-nathanl@linux.ibm.com --- arch/powerpc/include/asm/paravirt.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index 39f173961f6a..eb7df559ae74 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -110,7 +110,23 @@ static inline bool vcpu_is_preempted(int cpu) #ifdef CONFIG_PPC_SPLPAR if (!is_kvm_guest()) { - int first_cpu = cpu_first_thread_sibling(smp_processor_id()); + int first_cpu; + + /* + * The result of vcpu_is_preempted() is used in a + * speculative way, and is always subject to invalidation + * by events internal and external to Linux. While we can + * be called in preemptable context (in the Linux sense), + * we're not accessing per-cpu resources in a way that can + * race destructively with Linux scheduler preemption and + * migration, and callers can tolerate the potential for + * error introduced by sampling the CPU index without + * pinning the task to it. So it is permissible to use + * raw_smp_processor_id() here to defeat the preempt debug + * warnings that can arise from using smp_processor_id() + * in arbitrary contexts. + */ + first_cpu = cpu_first_thread_sibling(raw_smp_processor_id()); /* * The PowerVM hypervisor dispatches VMs on a whole core -- cgit v1.2.3 From 7edd5c9a8820bedb22870b34a809d45f2a86a35a Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 27 Sep 2021 15:19:30 -0500 Subject: powerpc/pseries/cpuhp: cache node corrections On pseries, cache nodes in the device tree can be added and removed by the CPU DLPAR code as well as the partition migration (mobility) code. PowerVM partitions in dedicated processor mode typically have L2 and L3 cache nodes. The CPU DLPAR code has the following shortcomings: * Cache nodes returned as siblings of a new CPU node by ibm,configure-connector are silently discarded; only the CPU node is added to the device tree. * Cache nodes which become unreferenced in the processor removal path are not removed from the device tree. This can lead to duplicate nodes when the post-migration device tree update code replaces cache nodes. This is long-standing behavior. Presumably it has gone mostly unnoticed because the two bugs have the property of obscuring each other in common simple scenarios (e.g. remove a CPU and add it back). Likely you'd notice only if you cared to inspect the device tree or the sysfs cacheinfo information. Booted with two processors: $ pwd /sys/firmware/devicetree/base/cpus $ ls -1d */ l2-cache@2010/ l2-cache@2011/ l3-cache@3110/ l3-cache@3111/ PowerPC,POWER9@0/ PowerPC,POWER9@8/ $ lsprop */l2-cache l2-cache@2010/l2-cache 00003110 (12560) l2-cache@2011/l2-cache 00003111 (12561) PowerPC,POWER9@0/l2-cache 00002010 (8208) PowerPC,POWER9@8/l2-cache 00002011 (8209) $ ls /sys/devices/system/cpu/cpu0/cache/ index0 index1 index2 index3 After DLPAR-adding PowerPC,POWER9@10, we see that its associated cache nodes are absent, its threads' L2+L3 cacheinfo is unpopulated, and it is missing a cache level in its sched domain hierarchy: $ ls -1d */ l2-cache@2010/ l2-cache@2011/ l3-cache@3110/ l3-cache@3111/ PowerPC,POWER9@0/ PowerPC,POWER9@10/ PowerPC,POWER9@8/ $ lsprop PowerPC\,POWER9@10/l2-cache PowerPC,POWER9@10/l2-cache 00002012 (8210) $ ls /sys/devices/system/cpu/cpu16/cache/ index0 index1 $ grep . /sys/kernel/debug/sched/domains/cpu{0,8,16}/domain*/name /sys/kernel/debug/sched/domains/cpu0/domain0/name:SMT /sys/kernel/debug/sched/domains/cpu0/domain1/name:CACHE /sys/kernel/debug/sched/domains/cpu0/domain2/name:DIE /sys/kernel/debug/sched/domains/cpu8/domain0/name:SMT /sys/kernel/debug/sched/domains/cpu8/domain1/name:CACHE /sys/kernel/debug/sched/domains/cpu8/domain2/name:DIE /sys/kernel/debug/sched/domains/cpu16/domain0/name:SMT /sys/kernel/debug/sched/domains/cpu16/domain1/name:DIE When removing PowerPC,POWER9@8, we see that its cache nodes are left behind: $ ls -1d */ l2-cache@2010/ l2-cache@2011/ l3-cache@3110/ l3-cache@3111/ PowerPC,POWER9@0/ When DLPAR is combined with VM migration, we can get duplicate nodes. E.g. removing one processor, then migrating, adding a processor, and then migrating again can result in warnings from the OF core during post-migration device tree updates: Duplicate name in cpus, renamed to "l2-cache@2011#1" Duplicate name in cpus, renamed to "l3-cache@3111#1" and nodes with duplicated phandles in the tree, making lookup behavior unpredictable: $ lsprop l[23]-cache@*/ibm,phandle l2-cache@2010/ibm,phandle 00002010 (8208) l2-cache@2011#1/ibm,phandle 00002011 (8209) l2-cache@2011/ibm,phandle 00002011 (8209) l3-cache@3110/ibm,phandle 00003110 (12560) l3-cache@3111#1/ibm,phandle 00003111 (12561) l3-cache@3111/ibm,phandle 00003111 (12561) Address these issues by: * Correctly processing siblings of the node returned from dlpar_configure_connector(). * Removing cache nodes in the CPU remove path when it can be determined that they are not associated with other CPUs or caches. Use the of_changeset API in both cases, which allows us to keep the error handling in this code from becoming more complex while ensuring that the device tree cannot become inconsistent. Fixes: ac71380071d1 ("powerpc/pseries: Add CPU dlpar remove functionality") Fixes: 90edf184b9b7 ("powerpc/pseries: Add CPU dlpar add functionality") Signed-off-by: Nathan Lynch Tested-by: Daniel Henrique Barboza Reviewed-by: Daniel Henrique Barboza Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210927201933.76786-2-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 75 ++++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index d646c22e94ab..00ac7d7e63e5 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -521,6 +521,27 @@ static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index) return found; } +static int pseries_cpuhp_attach_nodes(struct device_node *dn) +{ + struct of_changeset cs; + int ret; + + /* + * This device node is unattached but may have siblings; open-code the + * traversal. + */ + for (of_changeset_init(&cs); dn != NULL; dn = dn->sibling) { + ret = of_changeset_attach_node(&cs, dn); + if (ret) + goto out; + } + + ret = of_changeset_apply(&cs); +out: + of_changeset_destroy(&cs); + return ret; +} + static ssize_t dlpar_cpu_add(u32 drc_index) { struct device_node *dn, *parent; @@ -563,7 +584,7 @@ static ssize_t dlpar_cpu_add(u32 drc_index) return -EINVAL; } - rc = dlpar_attach_node(dn, parent); + rc = pseries_cpuhp_attach_nodes(dn); /* Regardless we are done with parent now */ of_node_put(parent); @@ -600,6 +621,53 @@ static ssize_t dlpar_cpu_add(u32 drc_index) return rc; } +static unsigned int pseries_cpuhp_cache_use_count(const struct device_node *cachedn) +{ + unsigned int use_count = 0; + struct device_node *dn; + + WARN_ON(!of_node_is_type(cachedn, "cache")); + + for_each_of_cpu_node(dn) { + if (of_find_next_cache_node(dn) == cachedn) + use_count++; + } + + for_each_node_by_type(dn, "cache") { + if (of_find_next_cache_node(dn) == cachedn) + use_count++; + } + + return use_count; +} + +static int pseries_cpuhp_detach_nodes(struct device_node *cpudn) +{ + struct device_node *dn; + struct of_changeset cs; + int ret = 0; + + of_changeset_init(&cs); + ret = of_changeset_detach_node(&cs, cpudn); + if (ret) + goto out; + + dn = cpudn; + while ((dn = of_find_next_cache_node(dn))) { + if (pseries_cpuhp_cache_use_count(dn) > 1) + break; + + ret = of_changeset_detach_node(&cs, dn); + if (ret) + goto out; + } + + ret = of_changeset_apply(&cs); +out: + of_changeset_destroy(&cs); + return ret; +} + static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index) { int rc; @@ -621,7 +689,7 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index) return rc; } - rc = dlpar_detach_node(dn); + rc = pseries_cpuhp_detach_nodes(dn); if (rc) { int saved_rc = rc; @@ -885,10 +953,9 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add) int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) { - u32 count, drc_index; + u32 drc_index; int rc; - count = hp_elog->_drc_u.drc_count; drc_index = hp_elog->_drc_u.drc_index; lock_device_hotplug(); -- cgit v1.2.3 From 983f9101740641434cea4f2e172175ff4b0276ad Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 27 Sep 2021 15:19:31 -0500 Subject: powerpc/cpuhp: BUG -> WARN conversion in offline path If, due to bugs elsewhere, we get into unregister_cpu_online() with a CPU that isn't marked hotpluggable, we can emit a warning and return an appropriate error instead of crashing. Signed-off-by: Nathan Lynch Reviewed-by: Daniel Henrique Barboza Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210927201933.76786-3-nathanl@linux.ibm.com --- arch/powerpc/kernel/sysfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index defecb3b1b15..08d8072d6e7a 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -928,7 +928,8 @@ static int unregister_cpu_online(unsigned int cpu) struct device_attribute *attrs, *pmc_attrs; int i, nattrs; - BUG_ON(!c->hotpluggable); + if (WARN_RATELIMIT(!c->hotpluggable, "cpu %d can't be offlined\n", cpu)) + return -EBUSY; #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_SMT)) -- cgit v1.2.3 From fa2a5dfe2ddd0e7c77e5f608e1fa374192e5be97 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 27 Sep 2021 15:19:32 -0500 Subject: powerpc/pseries/cpuhp: delete add/remove_by_count code The core DLPAR code supports two actions (add and remove) and three subtypes of action: * By DRC index: the action is attempted on a single specified resource. This is the usual case for processors. * By indexed count: the action is attempted on a range of resources beginning at the specified index. This is implemented only by the memory DLPAR code. * By count: the lower layer (CPU or memory) is responsible for locating the specified number of resources to which the action can be applied. I cannot find any evidence of the "by count" subtype being used by drmgr or qemu for processors. And when I try to exercise this code, the add case does not work: $ ppc64_cpu --smt ; nproc SMT=8 24 $ printf "cpu remove count 2" > /sys/kernel/dlpar $ nproc 8 $ printf "cpu add count 2" > /sys/kernel/dlpar -bash: printf: write error: Invalid argument $ dmesg | tail -2 pseries-hotplug-cpu: Failed to find enough CPUs (1 of 2) to add dlpar: Could not handle DLPAR request "cpu add count 2" $ nproc 8 $ drmgr -c cpu -a -q 2 # this uses the by-index method Validating CPU DLPAR capability...yes. CPU 1 CPU 17 $ nproc 24 This is because find_drc_info_cpus_to_add() does not increment drc_index appropriately during its search. This is not hard to fix. But the _by_count() functions also have the property that they attempt to roll back all prior operations if the entire request cannot be satisfied, even though the rollback itself can encounter errors. It's not possible to provide transaction-like behavior at this level, and it's undesirable to have code that can only pretend to do that. Any users of these functions cannot know what the state of the system is in the error case. And the error paths are, to my knowledge, impossible to test without adding custom error injection code. Summary: * This code has not worked reliably since its introduction. * There is no evidence that it is used. * It contains questionable rollback behaviors in error paths which are difficult to test. So let's remove it. Fixes: ac71380071d1 ("powerpc/pseries: Add CPU dlpar remove functionality") Fixes: 90edf184b9b7 ("powerpc/pseries: Add CPU dlpar add functionality") Fixes: b015f6bc9547 ("powerpc/pseries: Add cpu DLPAR support for drc-info property") Signed-off-by: Nathan Lynch Tested-by: Daniel Henrique Barboza Reviewed-by: Daniel Henrique Barboza Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210927201933.76786-4-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 218 +-------------------------- 1 file changed, 2 insertions(+), 216 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 00ac7d7e63e5..b50f3e9aa259 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -741,216 +741,6 @@ static int dlpar_cpu_remove_by_index(u32 drc_index) return rc; } -static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove) -{ - struct device_node *dn; - int cpus_found = 0; - int rc; - - /* We want to find cpus_to_remove + 1 CPUs to ensure we do not - * remove the last CPU. - */ - for_each_node_by_type(dn, "cpu") { - cpus_found++; - - if (cpus_found > cpus_to_remove) { - of_node_put(dn); - break; - } - - /* Note that cpus_found is always 1 ahead of the index - * into the cpu_drcs array, so we use cpus_found - 1 - */ - rc = of_property_read_u32(dn, "ibm,my-drc-index", - &cpu_drcs[cpus_found - 1]); - if (rc) { - pr_warn("Error occurred getting drc-index for %pOFn\n", - dn); - of_node_put(dn); - return -1; - } - } - - if (cpus_found < cpus_to_remove) { - pr_warn("Failed to find enough CPUs (%d of %d) to remove\n", - cpus_found, cpus_to_remove); - } else if (cpus_found == cpus_to_remove) { - pr_warn("Cannot remove all CPUs\n"); - } - - return cpus_found; -} - -static int dlpar_cpu_remove_by_count(u32 cpus_to_remove) -{ - u32 *cpu_drcs; - int cpus_found; - int cpus_removed = 0; - int i, rc; - - pr_debug("Attempting to hot-remove %d CPUs\n", cpus_to_remove); - - cpu_drcs = kcalloc(cpus_to_remove, sizeof(*cpu_drcs), GFP_KERNEL); - if (!cpu_drcs) - return -EINVAL; - - cpus_found = find_dlpar_cpus_to_remove(cpu_drcs, cpus_to_remove); - if (cpus_found <= cpus_to_remove) { - kfree(cpu_drcs); - return -EINVAL; - } - - for (i = 0; i < cpus_to_remove; i++) { - rc = dlpar_cpu_remove_by_index(cpu_drcs[i]); - if (rc) - break; - - cpus_removed++; - } - - if (cpus_removed != cpus_to_remove) { - pr_warn("CPU hot-remove failed, adding back removed CPUs\n"); - - for (i = 0; i < cpus_removed; i++) - dlpar_cpu_add(cpu_drcs[i]); - - rc = -EINVAL; - } else { - rc = 0; - } - - kfree(cpu_drcs); - return rc; -} - -static int find_drc_info_cpus_to_add(struct device_node *cpus, - struct property *info, - u32 *cpu_drcs, u32 cpus_to_add) -{ - struct of_drc_info drc; - const __be32 *value; - u32 count, drc_index; - int cpus_found = 0; - int i, j; - - if (!info) - return -1; - - value = of_prop_next_u32(info, NULL, &count); - if (value) - value++; - - for (i = 0; i < count; i++) { - of_read_drc_info_cell(&info, &value, &drc); - if (strncmp(drc.drc_type, "CPU", 3)) - break; - - drc_index = drc.drc_index_start; - for (j = 0; j < drc.num_sequential_elems; j++) { - if (dlpar_cpu_exists(cpus, drc_index)) - continue; - - cpu_drcs[cpus_found++] = drc_index; - - if (cpus_found == cpus_to_add) - return cpus_found; - - drc_index += drc.sequential_inc; - } - } - - return cpus_found; -} - -static int find_drc_index_cpus_to_add(struct device_node *cpus, - u32 *cpu_drcs, u32 cpus_to_add) -{ - int cpus_found = 0; - int index, rc; - u32 drc_index; - - /* Search the ibm,drc-indexes array for possible CPU drcs to - * add. Note that the format of the ibm,drc-indexes array is - * the number of entries in the array followed by the array - * of drc values so we start looking at index = 1. - */ - index = 1; - while (cpus_found < cpus_to_add) { - rc = of_property_read_u32_index(cpus, "ibm,drc-indexes", - index++, &drc_index); - - if (rc) - break; - - if (dlpar_cpu_exists(cpus, drc_index)) - continue; - - cpu_drcs[cpus_found++] = drc_index; - } - - return cpus_found; -} - -static int dlpar_cpu_add_by_count(u32 cpus_to_add) -{ - struct device_node *parent; - struct property *info; - u32 *cpu_drcs; - int cpus_added = 0; - int cpus_found; - int i, rc; - - pr_debug("Attempting to hot-add %d CPUs\n", cpus_to_add); - - cpu_drcs = kcalloc(cpus_to_add, sizeof(*cpu_drcs), GFP_KERNEL); - if (!cpu_drcs) - return -EINVAL; - - parent = of_find_node_by_path("/cpus"); - if (!parent) { - pr_warn("Could not find CPU root node in device tree\n"); - kfree(cpu_drcs); - return -1; - } - - info = of_find_property(parent, "ibm,drc-info", NULL); - if (info) - cpus_found = find_drc_info_cpus_to_add(parent, info, cpu_drcs, cpus_to_add); - else - cpus_found = find_drc_index_cpus_to_add(parent, cpu_drcs, cpus_to_add); - - of_node_put(parent); - - if (cpus_found < cpus_to_add) { - pr_warn("Failed to find enough CPUs (%d of %d) to add\n", - cpus_found, cpus_to_add); - kfree(cpu_drcs); - return -EINVAL; - } - - for (i = 0; i < cpus_to_add; i++) { - rc = dlpar_cpu_add(cpu_drcs[i]); - if (rc) - break; - - cpus_added++; - } - - if (cpus_added < cpus_to_add) { - pr_warn("CPU hot-add failed, removing any added CPUs\n"); - - for (i = 0; i < cpus_added; i++) - dlpar_cpu_remove_by_index(cpu_drcs[i]); - - rc = -EINVAL; - } else { - rc = 0; - } - - kfree(cpu_drcs); - return rc; -} - int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) { u32 drc_index; @@ -962,9 +752,7 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) switch (hp_elog->action) { case PSERIES_HP_ELOG_ACTION_REMOVE: - if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) - rc = dlpar_cpu_remove_by_count(count); - else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) { + if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) { rc = dlpar_cpu_remove_by_index(drc_index); /* * Setting the isolation state of an UNISOLATED/CONFIGURED @@ -978,9 +766,7 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) rc = -EINVAL; break; case PSERIES_HP_ELOG_ACTION_ADD: - if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) - rc = dlpar_cpu_add_by_count(count); - else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) + if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) rc = dlpar_cpu_add(drc_index); else rc = -EINVAL; -- cgit v1.2.3 From f9473a65719e59c45f1638cc04db7c80de8fcc1a Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 27 Sep 2021 15:19:33 -0500 Subject: powerpc/pseries/cpuhp: remove obsolete comment from pseries_cpu_die This comment likely refers to the obsolete DLPAR workflow where some resource state transitions were driven more directly from user space utilities, but it also seems to contradict itself: "Change isolate state to Isolate [...]" is at odds with the preceding sentences, and it does not relate at all to the code that follows. Remove it to prevent confusion. Signed-off-by: Nathan Lynch Reviewed-by: Daniel Henrique Barboza Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210927201933.76786-5-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index b50f3e9aa259..5ab44600c8d3 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -137,11 +137,6 @@ static void pseries_cpu_die(unsigned int cpu) cpu, pcpu); } - /* Isolation and deallocation are definitely done by - * drslot_chrp_cpu. If they were not they would be - * done here. Change isolate state to Isolate and - * change allocation-state to Unusable. - */ paca_ptrs[cpu]->cpu_start = 0; } -- cgit v1.2.3 From 02b182e67482d9167a13a0ff19b55037b70b21ad Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 7 Oct 2021 12:25:02 +0530 Subject: powerpc/perf: Refactor the code definition of perf reg extended mask PERF_REG_PMU_MASK_300 and PERF_REG_PMU_MASK_31 defines the mask value for extended registers. Current definition of these mask values uses hex constant and does not use registers by name, making it less readable. Patch refactor the macro values by or'ing together the actual register value constants. Also include PERF_REG_EXTENDED_MAX as part of enum definition. Suggested-by: Michael Ellerman Signed-off-by: Athira Rajeev Reviewed-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211007065505.27809-2-atrajeev@linux.vnet.ibm.com --- arch/powerpc/include/uapi/asm/perf_regs.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h b/arch/powerpc/include/uapi/asm/perf_regs.h index 578b3ee86105..085094553f3b 100644 --- a/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/arch/powerpc/include/uapi/asm/perf_regs.h @@ -61,27 +61,32 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_PMC4, PERF_REG_POWERPC_PMC5, PERF_REG_POWERPC_PMC6, - /* Max regs without the extended regs */ + /* Max mask value for interrupt regs w/o extended regs */ PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1, + /* Max mask value for interrupt regs including extended regs */ + PERF_REG_EXTENDED_MAX = PERF_REG_POWERPC_PMC6 + 1, }; #define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) -/* Exclude MMCR3, SIER2, SIER3 for CPU_FTR_ARCH_300 */ -#define PERF_EXCLUDE_REG_EXT_300 (7ULL << PERF_REG_POWERPC_MMCR3) - /* * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 * includes 9 SPRS from MMCR0 to PMC6 excluding the - * unsupported SPRS in PERF_EXCLUDE_REG_EXT_300. + * unsupported SPRS MMCR3, SIER2 and SIER3. */ -#define PERF_REG_PMU_MASK_300 ((0xfffULL << PERF_REG_POWERPC_MMCR0) - PERF_EXCLUDE_REG_EXT_300) +#define PERF_REG_PMU_MASK_300 \ + ((1ULL << PERF_REG_POWERPC_MMCR0) | (1ULL << PERF_REG_POWERPC_MMCR1) | \ + (1ULL << PERF_REG_POWERPC_MMCR2) | (1ULL << PERF_REG_POWERPC_PMC1) | \ + (1ULL << PERF_REG_POWERPC_PMC2) | (1ULL << PERF_REG_POWERPC_PMC3) | \ + (1ULL << PERF_REG_POWERPC_PMC4) | (1ULL << PERF_REG_POWERPC_PMC5) | \ + (1ULL << PERF_REG_POWERPC_PMC6)) /* * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 * includes 12 SPRs from MMCR0 to PMC6. */ -#define PERF_REG_PMU_MASK_31 (0xfffULL << PERF_REG_POWERPC_MMCR0) +#define PERF_REG_PMU_MASK_31 \ + (PERF_REG_PMU_MASK_300 | (1ULL << PERF_REG_POWERPC_MMCR3) | \ + (1ULL << PERF_REG_POWERPC_SIER2) | (1ULL << PERF_REG_POWERPC_SIER3)) -#define PERF_REG_EXTENDED_MAX (PERF_REG_POWERPC_PMC6 + 1) #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */ -- cgit v1.2.3 From 29908bbf7b8960d261dfdd428bbaa656275e80f3 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 7 Oct 2021 12:25:04 +0530 Subject: powerpc/perf: Expose instruction and data address registers as part of extended regs Patch adds support to include Sampled Instruction Address Register (SIAR) and Sampled Data Address Register (SDAR) SPRs as part of extended registers. Update the definition of PERF_REG_PMU_MASK_300/31 and PERF_REG_EXTENDED_MAX to include these SPR's. Signed-off-by: Athira Rajeev Reviewed-by: Daniel Axtens Reviewed-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211007065505.27809-4-atrajeev@linux.vnet.ibm.com --- arch/powerpc/include/uapi/asm/perf_regs.h | 11 +++++++---- arch/powerpc/perf/perf_regs.c | 4 ++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h b/arch/powerpc/include/uapi/asm/perf_regs.h index 085094553f3b..749a2e3af89e 100644 --- a/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/arch/powerpc/include/uapi/asm/perf_regs.h @@ -61,17 +61,19 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_PMC4, PERF_REG_POWERPC_PMC5, PERF_REG_POWERPC_PMC6, + PERF_REG_POWERPC_SDAR, + PERF_REG_POWERPC_SIAR, /* Max mask value for interrupt regs w/o extended regs */ PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1, /* Max mask value for interrupt regs including extended regs */ - PERF_REG_EXTENDED_MAX = PERF_REG_POWERPC_PMC6 + 1, + PERF_REG_EXTENDED_MAX = PERF_REG_POWERPC_SIAR + 1, }; #define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) /* * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 - * includes 9 SPRS from MMCR0 to PMC6 excluding the + * includes 11 SPRS from MMCR0 to SIAR excluding the * unsupported SPRS MMCR3, SIER2 and SIER3. */ #define PERF_REG_PMU_MASK_300 \ @@ -79,11 +81,12 @@ enum perf_event_powerpc_regs { (1ULL << PERF_REG_POWERPC_MMCR2) | (1ULL << PERF_REG_POWERPC_PMC1) | \ (1ULL << PERF_REG_POWERPC_PMC2) | (1ULL << PERF_REG_POWERPC_PMC3) | \ (1ULL << PERF_REG_POWERPC_PMC4) | (1ULL << PERF_REG_POWERPC_PMC5) | \ - (1ULL << PERF_REG_POWERPC_PMC6)) + (1ULL << PERF_REG_POWERPC_PMC6) | (1ULL << PERF_REG_POWERPC_SDAR) | \ + (1ULL << PERF_REG_POWERPC_SIAR)) /* * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 - * includes 12 SPRs from MMCR0 to PMC6. + * includes 14 SPRs from MMCR0 to SIAR. */ #define PERF_REG_PMU_MASK_31 \ (PERF_REG_PMU_MASK_300 | (1ULL << PERF_REG_POWERPC_MMCR3) | \ diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index b931eed482c9..51d31b65e423 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -90,7 +90,11 @@ static u64 get_ext_regs_value(int idx) return mfspr(SPRN_SIER2); case PERF_REG_POWERPC_SIER3: return mfspr(SPRN_SIER3); + case PERF_REG_POWERPC_SDAR: + return mfspr(SPRN_SDAR); #endif + case PERF_REG_POWERPC_SIAR: + return mfspr(SPRN_SIAR); default: return 0; } } -- cgit v1.2.3 From 322fda0405fecaaa540b0fa90393830aaadaf420 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 5 Oct 2021 00:57:49 +1000 Subject: KVM: PPC: Book3S HV: H_ENTER filter out reserved HPTE[B] value The HPTE B field is a 2-bit field with values 0b10 and 0b11 reserved. This field is also taken from the HPTE and used when KVM executes TLBIEs to set the B field of those instructions. Disallow the guest setting B to a reserved value with H_ENTER by rejecting it. This is the same approach already taken for rejecting reserved (unsupported) LLP values. This prevents the guest from being able to induce the host to execute TLBIE with reserved values, which is not known to be a problem with current processors but in theory it could prevent the TLBIE from working correctly in a future processor. Signed-off-by: Nicholas Piggin Reviewed-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211004145749.1331331-1-npiggin@gmail.com --- arch/powerpc/include/asm/kvm_book3s_64.h | 4 ++++ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 19b6942c6969..fff391b9b97b 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -378,6 +378,10 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, rb |= 1; /* L field */ rb |= r & 0xff000 & ((1ul << a_pgshift) - 1); /* LP field */ } + /* + * This sets both bits of the B field in the PTE. 0b1x values are + * reserved, but those will have been filtered by kvmppc_do_h_enter. + */ rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */ return rb; } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 632b2545072b..2c1f3c6e72d1 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -207,6 +207,15 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, if (kvm_is_radix(kvm)) return H_FUNCTION; + /* + * The HPTE gets used by compute_tlbie_rb() to set TLBIE bits, so + * these functions should work together -- must ensure a guest can not + * cause problems with the TLBIE that KVM executes. + */ + if ((pteh >> HPTE_V_SSIZE_SHIFT) & 0x2) { + /* B=0b1x is a reserved value, disallow it. */ + return H_PARAMETER; + } psize = kvmppc_actual_pgsz(pteh, ptel); if (!psize) return H_PARAMETER; -- cgit v1.2.3 From 602946ec2f90d5bd965857753880db29d2d9a1e9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 12 Oct 2021 12:40:37 +0200 Subject: powerpc: Set max_mapnr correctly max_mapnr is used by virt_addr_valid() to check if a linear address is valid. It must only include lowmem PFNs, like other architectures. Problem detected on a system with 1G mem (Only 768M are mapped), with CONFIG_DEBUG_VIRTUAL and CONFIG_TEST_DEBUG_VIRTUAL, it didn't report virt_to_phys(VMALLOC_START), VMALLOC_START being 0xf1000000. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/77d99037782ac4b3c3b0124fc4ae80ce7b760b05.1634035228.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 05b9c3f31456..bd5d91a31183 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -256,7 +256,7 @@ void __init mem_init(void) #endif high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - set_max_mapnr(max_pfn); + set_max_mapnr(max_low_pfn); kasan_late_init(); -- cgit v1.2.3 From 6ffeb56ee2109b30a9e393f7e0c22c9b5030ac38 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Mon, 11 Oct 2021 09:03:56 +0200 Subject: powerpc/boot: Use CONFIG_PPC_POWERNV to compile OPAL support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_PPC64_BOOT_WRAPPER is selected by CPU_LITTLE_ENDIAN which is used to compile support for other platforms such as Microwatt. There is no need for OPAL calls on these. Signed-off-by: Cédric Le Goater Reviewed-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211011070356.99952-1-clg@kaod.org --- arch/powerpc/boot/Makefile | 2 +- arch/powerpc/boot/serial.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 089ee3ea55c8..9993c6256ad2 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -123,7 +123,7 @@ src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \ oflib.c ofconsole.c cuboot.c src-wlib-$(CONFIG_PPC_MPC52xx) += mpc52xx-psc.c -src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S opal.c +src-wlib-$(CONFIG_PPC_POWERNV) += opal-calls.S opal.c ifndef CONFIG_PPC64_BOOT_WRAPPER src-wlib-y += crtsavres.S endif diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c index 9a19e5905485..54d2522be485 100644 --- a/arch/powerpc/boot/serial.c +++ b/arch/powerpc/boot/serial.c @@ -132,7 +132,7 @@ int serial_console_init(void) else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart")) rc = mpc5200_psc_console_init(devp, &serial_cd); #endif -#ifdef CONFIG_PPC64_BOOT_WRAPPER +#ifdef CONFIG_PPC_POWERNV else if (dt_is_compatible(devp, "ibm,opal-console-raw")) rc = opal_console_init(devp, &serial_cd); #endif -- cgit v1.2.3 From b616230e2325e5560330c40172a4d4e4c5da2c59 Mon Sep 17 00:00:00 2001 From: Kai Song Date: Sat, 9 Oct 2021 12:16:30 +0800 Subject: powerpc/eeh: Fix docstrings in eeh.c We fix the following warnings when building kernel with W=1: arch/powerpc/kernel/eeh.c:598: warning: Function parameter or member 'function' not described in 'eeh_pci_enable' arch/powerpc/kernel/eeh.c:774: warning: Function parameter or member 'edev' not described in 'eeh_set_dev_freset' arch/powerpc/kernel/eeh.c:774: warning: expecting prototype for eeh_set_pe_freset(). Prototype was for eeh_set_dev_freset() instead arch/powerpc/kernel/eeh.c:814: warning: Function parameter or member 'include_passed' not described in 'eeh_pe_reset_full' arch/powerpc/kernel/eeh.c:944: warning: Function parameter or member 'ops' not described in 'eeh_init' arch/powerpc/kernel/eeh.c:1451: warning: Function parameter or member 'include_passed' not described in 'eeh_pe_reset' arch/powerpc/kernel/eeh.c:1526: warning: Function parameter or member 'func' not described in 'eeh_pe_inject_err' arch/powerpc/kernel/eeh.c:1526: warning: Excess function parameter 'function' described in 'eeh_pe_inject_err' Signed-off-by: Kai Song Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211009041630.4135-1-songkai01@inspur.com --- arch/powerpc/kernel/eeh.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index e9b597ed423c..91e0f4cf1db3 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -589,6 +589,7 @@ EXPORT_SYMBOL(eeh_check_failure); /** * eeh_pci_enable - Enable MMIO or DMA transfers for this slot * @pe: EEH PE + * @function: EEH option * * This routine should be called to reenable frozen MMIO or DMA * so that it would work correctly again. It's useful while doing @@ -761,8 +762,8 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat } /** - * eeh_set_pe_freset - Check the required reset for the indicated device - * @data: EEH device + * eeh_set_dev_freset - Check the required reset for the indicated device + * @edev: EEH device * @flag: return value * * Each device might have its preferred reset type: fundamental or @@ -801,6 +802,7 @@ static void eeh_pe_refreeze_passed(struct eeh_pe *root) /** * eeh_pe_reset_full - Complete a full reset process on the indicated PE * @pe: EEH PE + * @include_passed: include passed-through devices? * * This function executes a full reset procedure on a PE, including setting * the appropriate flags, performing a fundamental or hot reset, and then @@ -937,6 +939,7 @@ static struct notifier_block eeh_device_nb = { /** * eeh_init - System wide EEH initialization + * @ops: struct to trace EEH operation callback functions * * It's the platform's job to call this from an arch_initcall(). */ @@ -1442,6 +1445,7 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe, bool include_passed) * eeh_pe_reset - Issue PE reset according to specified type * @pe: EEH PE * @option: reset type + * @include_passed: include passed-through devices? * * The routine is called to reset the specified PE with the * indicated type, either fundamental reset or hot reset. @@ -1513,12 +1517,12 @@ EXPORT_SYMBOL_GPL(eeh_pe_configure); * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE * @pe: the indicated PE * @type: error type - * @function: error function + * @func: error function * @addr: address * @mask: address mask * * The routine is called to inject the specified PCI error, which - * is determined by @type and @function, to the indicated PE for + * is determined by @type and @func, to the indicated PE for * testing purpose. */ int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, -- cgit v1.2.3 From 8f6aca0e0f26eaaee670cd27896993a45cdc8f9e Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 7 Oct 2021 13:21:21 +0530 Subject: powerpc/perf: Fix cycles/instructions as PM_CYC/PM_INST_CMPL in power10 On power9 and earlier platforms, the default event used for cyles and instructions is PM_CYC (0x0001e) and PM_INST_CMPL (0x00002) respectively. These events use two programmable PMCs and by default will count irrespective of the run latch state (idle state). But since they use programmable PMCs, these events can lead to multiplexing with other events, because there are only 4 programmable PMCs. Hence in power10, performance monitoring unit (PMU) driver uses performance monitor counter 5 (PMC5) and performance monitor counter6 (PMC6) for counting instructions and cycles. Currently on power10, the event used for cycles is PM_RUN_CYC (0x600F4) and instructions uses PM_RUN_INST_CMPL (0x500fa). But counting of these events in idle state is controlled by the CC56RUN bit setting in Monitor Mode Control Register0 (MMCR0). If the CC56RUN bit is zero, PMC5/6 will not count when CTRL[RUN] (run latch) is zero. This could lead to missing some counts if a thread is in idle state during system wide profiling. To fix it, set the CC56RUN bit in MMCR0 for power10, which makes PMC5 and PMC6 count instructions and cycles regardless of the run latch state. Since this change make PMC5/6 count as PM_INST_CMPL/PM_CYC, rename the event code 0x600f4 as PM_CYC instead of PM_RUN_CYC and event code 0x500fa as PM_INST_CMPL instead of PM_RUN_INST_CMPL. The changes are only for PMC5/6 event codes and will not affect the behaviour of PM_RUN_CYC/PM_RUN_INST_CMPL if progammed in other PMC's. Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support") Signed-off-by: Athira Rajeev Reviewed-by: Madhavan Srinivasan [mpe: Tweak change log wording for style and consistency] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211007075121.28497-1-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/power10-events-list.h | 8 +++--- arch/powerpc/perf/power10-pmu.c | 44 +++++++++++++++++++++++---------- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h index 93be7197d250..564f14097f07 100644 --- a/arch/powerpc/perf/power10-events-list.h +++ b/arch/powerpc/perf/power10-events-list.h @@ -9,10 +9,10 @@ /* * Power10 event codes. */ -EVENT(PM_RUN_CYC, 0x600f4); +EVENT(PM_CYC, 0x600f4); EVENT(PM_DISP_STALL_CYC, 0x100f8); EVENT(PM_EXEC_STALL, 0x30008); -EVENT(PM_RUN_INST_CMPL, 0x500fa); +EVENT(PM_INST_CMPL, 0x500fa); EVENT(PM_BR_CMPL, 0x4d05e); EVENT(PM_BR_MPRED_CMPL, 0x400f6); EVENT(PM_BR_FIN, 0x2f04a); @@ -50,8 +50,8 @@ EVENT(PM_DTLB_MISS, 0x300fc); /* ITLB Reloaded */ EVENT(PM_ITLB_MISS, 0x400fc); -EVENT(PM_RUN_CYC_ALT, 0x0001e); -EVENT(PM_RUN_INST_CMPL_ALT, 0x00002); +EVENT(PM_CYC_ALT, 0x0001e); +EVENT(PM_INST_CMPL_ALT, 0x00002); /* * Memory Access Events diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index f9d64c63bb4a..9dd75f385837 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -91,8 +91,8 @@ extern u64 PERF_REG_EXTENDED_MASK; /* Table of alternatives, sorted by column 0 */ static const unsigned int power10_event_alternatives[][MAX_ALT] = { - { PM_RUN_CYC_ALT, PM_RUN_CYC }, - { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL }, + { PM_CYC_ALT, PM_CYC }, + { PM_INST_CMPL_ALT, PM_INST_CMPL }, }; static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[]) @@ -118,8 +118,8 @@ static int power10_check_attr_config(struct perf_event *ev) return 0; } -GENERIC_EVENT_ATTR(cpu-cycles, PM_RUN_CYC); -GENERIC_EVENT_ATTR(instructions, PM_RUN_INST_CMPL); +GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC); +GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL); GENERIC_EVENT_ATTR(branch-instructions, PM_BR_CMPL); GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL); GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1); @@ -148,8 +148,8 @@ CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS); CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS); static struct attribute *power10_events_attr_dd1[] = { - GENERIC_EVENT_PTR(PM_RUN_CYC), - GENERIC_EVENT_PTR(PM_RUN_INST_CMPL), + GENERIC_EVENT_PTR(PM_CYC), + GENERIC_EVENT_PTR(PM_INST_CMPL), GENERIC_EVENT_PTR(PM_BR_CMPL), GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL), GENERIC_EVENT_PTR(PM_LD_REF_L1), @@ -173,8 +173,8 @@ static struct attribute *power10_events_attr_dd1[] = { }; static struct attribute *power10_events_attr[] = { - GENERIC_EVENT_PTR(PM_RUN_CYC), - GENERIC_EVENT_PTR(PM_RUN_INST_CMPL), + GENERIC_EVENT_PTR(PM_CYC), + GENERIC_EVENT_PTR(PM_INST_CMPL), GENERIC_EVENT_PTR(PM_BR_FIN), GENERIC_EVENT_PTR(PM_MPRED_BR_FIN), GENERIC_EVENT_PTR(PM_LD_REF_L1), @@ -271,8 +271,8 @@ static const struct attribute_group *power10_pmu_attr_groups[] = { }; static int power10_generic_events_dd1[] = { - [PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC, - [PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL, + [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC, + [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL, [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL, [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, @@ -280,8 +280,8 @@ static int power10_generic_events_dd1[] = { }; static int power10_generic_events[] = { - [PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC, - [PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL, + [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC, + [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_FIN, [PERF_COUNT_HW_BRANCH_MISSES] = PM_MPRED_BR_FIN, [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, @@ -548,6 +548,24 @@ static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { #undef C +/* + * Set the MMCR0[CC56RUN] bit to enable counting for + * PMC5 and PMC6 regardless of the state of CTRL[RUN], + * so that we can use counters 5 and 6 as PM_INST_CMPL and + * PM_CYC. + */ +static int power10_compute_mmcr(u64 event[], int n_ev, + unsigned int hwc[], struct mmcr_regs *mmcr, + struct perf_event *pevents[], u32 flags) +{ + int ret; + + ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags); + if (!ret) + mmcr->mmcr0 |= MMCR0_C56RUN; + return ret; +} + static struct power_pmu power10_pmu = { .name = "POWER10", .n_counter = MAX_PMU_COUNTERS, @@ -555,7 +573,7 @@ static struct power_pmu power10_pmu = { .test_adder = ISA207_TEST_ADDER, .group_constraint_mask = CNST_CACHE_PMC4_MASK, .group_constraint_val = CNST_CACHE_PMC4_VAL, - .compute_mmcr = isa207_compute_mmcr, + .compute_mmcr = power10_compute_mmcr, .config_bhrb = power10_config_bhrb, .bhrb_filter_map = power10_bhrb_filter_map, .get_constraint = isa207_get_constraint, -- cgit v1.2.3 From 3091f5fc5f1df7741ddf326561384e0997eca2a1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 13 Oct 2021 16:43:54 +0200 Subject: powerpc: Mark .opd section read-only .opd section contains function descriptors used to locate functions in the kernel. If someone is able to modify a function descriptor he will be able to run arbitrary kernel function instead of another. To avoid that, move .opd section inside read-only memory. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3cd40b682fb6f75bb40947b55ca0bac20cb3f995.1634136222.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vmlinux.lds.S | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 40bdefe9caa7..18e42c74abdd 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -143,6 +143,12 @@ SECTIONS SOFT_MASK_TABLE(8) RESTART_TABLE(8) + .opd : AT(ADDR(.opd) - LOAD_OFFSET) { + __start_opd = .; + KEEP(*(.opd)) + __end_opd = .; + } + . = ALIGN(8); __stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) { __start___stf_entry_barrier_fixup = .; @@ -339,12 +345,6 @@ SECTIONS *(.branch_lt) } - .opd : AT(ADDR(.opd) - LOAD_OFFSET) { - __start_opd = .; - KEEP(*(.opd)) - __end_opd = .; - } - . = ALIGN(256); .got : AT(ADDR(.got) - LOAD_OFFSET) { __toc_start = .; -- cgit v1.2.3 From 4f703e7faa67a116016c4678fc88b507c12670c9 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 14 Oct 2021 08:04:38 +1030 Subject: powerpc/s64: Clarify that radix lacks DEBUG_PAGEALLOC The page_alloc.c code will call into __kernel_map_pages() when DEBUG_PAGEALLOC is configured and enabled. As the implementation assumes hash, this should crash spectacularly if not for a bit of luck in __kernel_map_pages(). In this function linear_map_hash_count is always zero, the for loop exits without doing any damage. There are no other platforms that determine if they support debug_pagealloc at runtime. Instead of adding code to mm/page_alloc.c to do that, this change turns the map/unmap into a noop when in radix mode and prints a warning once. Signed-off-by: Joel Stanley Reviewed-by: Christophe Leroy [mpe: Reformat if per Christophe's suggestion] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211013213438.675095-1-joel@jms.id.au --- arch/powerpc/include/asm/book3s/64/hash.h | 2 ++ arch/powerpc/include/asm/book3s/64/pgtable.h | 10 ++++++++++ arch/powerpc/include/asm/book3s/64/radix.h | 3 +++ arch/powerpc/mm/book3s64/hash_utils.c | 2 +- arch/powerpc/mm/book3s64/radix_pgtable.c | 7 +++++++ 5 files changed, 23 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index d959b0195ad9..674fe0e890dc 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -255,6 +255,8 @@ int hash__create_section_mapping(unsigned long start, unsigned long end, int nid, pgprot_t prot); int hash__remove_section_mapping(unsigned long start, unsigned long end); +void hash__kernel_map_pages(struct page *page, int numpages, int enable); + #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 5d34a8646f08..33e073d6b0c4 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1101,6 +1101,16 @@ static inline void vmemmap_remove_mapping(unsigned long start, } #endif +#ifdef CONFIG_DEBUG_PAGEALLOC +static inline void __kernel_map_pages(struct page *page, int numpages, int enable) +{ + if (radix_enabled()) + radix__kernel_map_pages(page, numpages, enable); + else + hash__kernel_map_pages(page, numpages, enable); +} +#endif + static inline pte_t pmd_pte(pmd_t pmd) { return __pte_raw(pmd_raw(pmd)); diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 59cab558e2f0..d090d9612348 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -316,5 +316,8 @@ int radix__create_section_mapping(unsigned long start, unsigned long end, int nid, pgprot_t prot); int radix__remove_section_mapping(unsigned long start, unsigned long end); #endif /* CONFIG_MEMORY_HOTPLUG */ + +void radix__kernel_map_pages(struct page *page, int numpages, int enable); + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index c145776d3ae5..cfd45245d009 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1988,7 +1988,7 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) mmu_kernel_ssize, 0); } -void __kernel_map_pages(struct page *page, int numpages, int enable) +void hash__kernel_map_pages(struct page *page, int numpages, int enable) { unsigned long flags, vaddr, lmi; int i; diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index ae20add7954a..3a600bd7fbc6 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -920,6 +920,13 @@ void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long #endif #endif +#ifdef CONFIG_DEBUG_PAGEALLOC +void radix__kernel_map_pages(struct page *page, int numpages, int enable) +{ + pr_warn_once("DEBUG_PAGEALLOC not supported in radix mode\n"); +} +#endif + #ifdef CONFIG_TRANSPARENT_HUGEPAGE unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, -- cgit v1.2.3 From 915b368f6968122fcab31fd6441ed4ba7cc7574c Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Thu, 14 Oct 2021 03:56:04 -0400 Subject: powerpc/pseries/iommu: Add of_node_put() before break Fix following coccicheck warning: ./arch/powerpc/platforms/pseries/iommu.c:924:1-28: WARNING: Function for_each_node_with_property should have of_node_put() before break Early exits from for_each_node_with_property should decrement the node reference counter. Signed-off-by: Wan Jiabing Reviewed-by: Leonardo Bras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211014075624.16344-1-wanjiabing@vivo.com --- arch/powerpc/platforms/pseries/iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 269f61d519c2..c140aa683f66 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -929,8 +929,10 @@ static void find_existing_ddw_windows_named(const char *name) } window = ddw_list_new_entry(pdn, dma64); - if (!window) + if (!window) { + of_node_put(pdn); break; + } spin_lock(&dma_win_list_lock); list_add(&window->list, &dma_win_list); -- cgit v1.2.3 From 7453f501d443c7febf48809ee1c530b64d625c59 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Sun, 17 Oct 2021 21:54:16 -0400 Subject: powerpc/kexec_file: Add of_node_put() before goto Fix following coccicheck warning: ./arch/powerpc/kexec/file_load_64.c:698:1-22: WARNING: Function for_each_node_by_type should have of_node_put() before goto Early exits from for_each_node_by_type should decrement the node reference counter. Signed-off-by: Wan Jiabing Acked-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211018015418.10182-1-wanjiabing@vivo.com --- arch/powerpc/kexec/file_load_64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 5056e175ca2c..b4981b651d9a 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -700,6 +700,7 @@ static int update_usable_mem_fdt(void *fdt, struct crash_mem *usable_mem) if (ret) { pr_err("Failed to set linux,usable-memory property for %s node", dn->full_name); + of_node_put(dn); goto out; } } -- cgit v1.2.3 From 68b44f94d6370e2c6c790fedd28e637fa9964a93 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 15 Oct 2021 12:02:42 +0200 Subject: powerpc/booke: Disable STRICT_KERNEL_RWX, DEBUG_PAGEALLOC and KFENCE fsl_booke and 44x are not able to map kernel linear memory with pages, so they can't support DEBUG_PAGEALLOC and KFENCE, and STRICT_KERNEL_RWX is also a problem for now. Enable those only on book3s (both 32 and 64 except KFENCE), 8xx and 40x. Fixes: 88df6e90fa97 ("[POWERPC] DEBUG_PAGEALLOC for 32-bit") Fixes: 95902e6c8864 ("powerpc/mm: Implement STRICT_KERNEL_RWX on PPC32") Fixes: 90cbac0e995d ("powerpc: Enable KFENCE for PPC32") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d1ad9fdd9b27da3fdfa16510bb542ed51fa6e134.1634292136.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ba5b66189358..6b9f523882c5 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -138,7 +138,7 @@ config PPC select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 select ARCH_HAS_SET_MEMORY - select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION) + select ARCH_HAS_STRICT_KERNEL_RWX if (PPC_BOOK3S || PPC_8xx || 40x) && !HIBERNATION select ARCH_HAS_STRICT_MODULE_RWX if ARCH_HAS_STRICT_KERNEL_RWX && !PPC_BOOK3S_32 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE @@ -150,7 +150,7 @@ config PPC select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC32 || PPC_BOOK3S_64 + select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select ARCH_USE_MEMTEST @@ -190,7 +190,7 @@ config PPC select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14 - select HAVE_ARCH_KFENCE if PPC32 + select HAVE_ARCH_KFENCE if PPC_BOOK3S_32 || PPC_8xx || 40x select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT -- cgit v1.2.3 From 3a75fd709c89cb45b8b1044b8ef0d15027a69f9b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 15 Oct 2021 12:02:43 +0200 Subject: powerpc/fsl_booke: Rename fsl_booke.c to fsl_book3e.c We have a myriad of CONFIG symbols around different variants of BOOKEs, which would be worth tidying up one day. But at least, make file names and CONFIG option match: We have CONFIG_FSL_BOOKE and CONFIG_PPC_FSL_BOOK3E. fsl_booke.c is selected by and only by CONFIG_PPC_FSL_BOOK3E. So rename it fsl_book3e to reduce confusion. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5dc871db1f67739319bec11f049ca450da1c13a2.1634292136.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/Makefile | 4 +- arch/powerpc/mm/nohash/fsl_book3e.c | 333 ++++++++++++++++++++++++++++++++++++ arch/powerpc/mm/nohash/fsl_booke.c | 333 ------------------------------------ 3 files changed, 335 insertions(+), 335 deletions(-) create mode 100644 arch/powerpc/mm/nohash/fsl_book3e.c delete mode 100644 arch/powerpc/mm/nohash/fsl_booke.c diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile index 0424f6ce5bd8..b1f630d423d8 100644 --- a/arch/powerpc/mm/nohash/Makefile +++ b/arch/powerpc/mm/nohash/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_PPC_BOOK3E_64) += tlb_low_64e.o book3e_pgtable.o obj-$(CONFIG_40x) += 40x.o obj-$(CONFIG_44x) += 44x.o obj-$(CONFIG_PPC_8xx) += 8xx.o -obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke.o +obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_book3e.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr_booke.o ifdef CONFIG_HUGETLB_PAGE obj-$(CONFIG_PPC_FSL_BOOK3E) += book3e_hugetlbpage.o @@ -16,4 +16,4 @@ endif # Disable kcov instrumentation on sensitive code # This is necessary for booting with kcov enabled on book3e machines KCOV_INSTRUMENT_tlb.o := n -KCOV_INSTRUMENT_fsl_booke.o := n +KCOV_INSTRUMENT_fsl_book3e.o := n diff --git a/arch/powerpc/mm/nohash/fsl_book3e.c b/arch/powerpc/mm/nohash/fsl_book3e.c new file mode 100644 index 000000000000..03dacbe940e5 --- /dev/null +++ b/arch/powerpc/mm/nohash/fsl_book3e.c @@ -0,0 +1,333 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Modifications by Kumar Gala (galak@kernel.crashing.org) to support + * E500 Book E processors. + * + * Copyright 2004,2010 Freescale Semiconductor, Inc. + * + * This file contains the routines for initializing the MMU + * on the 4xx series of chips. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +unsigned int tlbcam_index; + +#define NUM_TLBCAMS (64) +struct tlbcam TLBCAM[NUM_TLBCAMS]; + +struct tlbcamrange { + unsigned long start; + unsigned long limit; + phys_addr_t phys; +} tlbcam_addrs[NUM_TLBCAMS]; + +unsigned long tlbcam_sz(int idx) +{ + return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1; +} + +#ifdef CONFIG_FSL_BOOKE +/* + * Return PA for this VA if it is mapped by a CAM, or 0 + */ +phys_addr_t v_block_mapped(unsigned long va) +{ + int b; + for (b = 0; b < tlbcam_index; ++b) + if (va >= tlbcam_addrs[b].start && va < tlbcam_addrs[b].limit) + return tlbcam_addrs[b].phys + (va - tlbcam_addrs[b].start); + return 0; +} + +/* + * Return VA for a given PA or 0 if not mapped + */ +unsigned long p_block_mapped(phys_addr_t pa) +{ + int b; + for (b = 0; b < tlbcam_index; ++b) + if (pa >= tlbcam_addrs[b].phys + && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start) + +tlbcam_addrs[b].phys) + return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys); + return 0; +} +#endif + +/* + * Set up a variable-size TLB entry (tlbcam). The parameters are not checked; + * in particular size must be a power of 4 between 4k and the max supported by + * an implementation; max may further be limited by what can be represented in + * an unsigned long (for example, 32-bit implementations cannot support a 4GB + * size). + */ +static void settlbcam(int index, unsigned long virt, phys_addr_t phys, + unsigned long size, unsigned long flags, unsigned int pid) +{ + unsigned int tsize; + + tsize = __ilog2(size) - 10; + +#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC) + if ((flags & _PAGE_NO_CACHE) == 0) + flags |= _PAGE_COHERENT; +#endif + + TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index) | MAS0_NV(index+1); + TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid); + TLBCAM[index].MAS2 = virt & PAGE_MASK; + + TLBCAM[index].MAS2 |= (flags & _PAGE_WRITETHRU) ? MAS2_W : 0; + TLBCAM[index].MAS2 |= (flags & _PAGE_NO_CACHE) ? MAS2_I : 0; + TLBCAM[index].MAS2 |= (flags & _PAGE_COHERENT) ? MAS2_M : 0; + TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0; + TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0; + + TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SX | MAS3_SR; + TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0); + if (mmu_has_feature(MMU_FTR_BIG_PHYS)) + TLBCAM[index].MAS7 = (u64)phys >> 32; + + /* Below is unlikely -- only for large user pages or similar */ + if (pte_user(__pte(flags))) { + TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; + TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); + } + + tlbcam_addrs[index].start = virt; + tlbcam_addrs[index].limit = virt + size - 1; + tlbcam_addrs[index].phys = phys; +} + +unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, + phys_addr_t phys) +{ + unsigned int camsize = __ilog2(ram); + unsigned int align = __ffs(virt | phys); + unsigned long max_cam; + + if ((mfspr(SPRN_MMUCFG) & MMUCFG_MAVN) == MMUCFG_MAVN_V1) { + /* Convert (4^max) kB to (2^max) bytes */ + max_cam = ((mfspr(SPRN_TLB1CFG) >> 16) & 0xf) * 2 + 10; + camsize &= ~1U; + align &= ~1U; + } else { + /* Convert (2^max) kB to (2^max) bytes */ + max_cam = __ilog2(mfspr(SPRN_TLB1PS)) + 10; + } + + if (camsize > align) + camsize = align; + if (camsize > max_cam) + camsize = max_cam; + + return 1UL << camsize; +} + +static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt, + unsigned long ram, int max_cam_idx, + bool dryrun) +{ + int i; + unsigned long amount_mapped = 0; + + /* Calculate CAM values */ + for (i = 0; ram && i < max_cam_idx; i++) { + unsigned long cam_sz; + + cam_sz = calc_cam_sz(ram, virt, phys); + if (!dryrun) + settlbcam(i, virt, phys, cam_sz, + pgprot_val(PAGE_KERNEL_X), 0); + + ram -= cam_sz; + amount_mapped += cam_sz; + virt += cam_sz; + phys += cam_sz; + } + + if (dryrun) + return amount_mapped; + + loadcam_multi(0, i, max_cam_idx); + tlbcam_index = i; + +#ifdef CONFIG_PPC64 + get_paca()->tcd.esel_next = i; + get_paca()->tcd.esel_max = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; + get_paca()->tcd.esel_first = i; +#endif + + return amount_mapped; +} + +unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx, bool dryrun) +{ + unsigned long virt = PAGE_OFFSET; + phys_addr_t phys = memstart_addr; + + return map_mem_in_cams_addr(phys, virt, ram, max_cam_idx, dryrun); +} + +#ifdef CONFIG_PPC32 + +#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS) +#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS" +#endif + +unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) +{ + return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1; +} + +void flush_instruction_cache(void) +{ + unsigned long tmp; + + tmp = mfspr(SPRN_L1CSR1); + tmp |= L1CSR1_ICFI | L1CSR1_ICLFR; + mtspr(SPRN_L1CSR1, tmp); + isync(); +} + +/* + * MMU_init_hw does the chip-specific initialization of the MMU hardware. + */ +void __init MMU_init_hw(void) +{ + flush_instruction_cache(); +} + +void __init adjust_total_lowmem(void) +{ + unsigned long ram; + int i; + + /* adjust lowmem size to __max_low_memory */ + ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem); + + i = switch_to_as1(); + __max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, false); + restore_to_as0(i, 0, 0, 1); + + pr_info("Memory CAM mapping: "); + for (i = 0; i < tlbcam_index - 1; i++) + pr_cont("%lu/", tlbcam_sz(i) >> 20); + pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20, + (unsigned int)((total_lowmem - __max_low_memory) >> 20)); + + memblock_set_current_limit(memstart_addr + __max_low_memory); +} + +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + phys_addr_t limit = first_memblock_base + first_memblock_size; + + /* 64M mapped initially according to head_fsl_booke.S */ + memblock_set_current_limit(min_t(u64, limit, 0x04000000)); +} + +#ifdef CONFIG_RELOCATABLE +int __initdata is_second_reloc; +notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start) +{ + unsigned long base = kernstart_virt_addr; + phys_addr_t size; + + kernstart_addr = start; + if (is_second_reloc) { + virt_phys_offset = PAGE_OFFSET - memstart_addr; + kaslr_late_init(); + return; + } + + /* + * Relocatable kernel support based on processing of dynamic + * relocation entries. Before we get the real memstart_addr, + * We will compute the virt_phys_offset like this: + * virt_phys_offset = stext.run - kernstart_addr + * + * stext.run = (KERNELBASE & ~0x3ffffff) + + * (kernstart_addr & 0x3ffffff) + * When we relocate, we have : + * + * (kernstart_addr & 0x3ffffff) = (stext.run & 0x3ffffff) + * + * hence: + * virt_phys_offset = (KERNELBASE & ~0x3ffffff) - + * (kernstart_addr & ~0x3ffffff) + * + */ + start &= ~0x3ffffff; + base &= ~0x3ffffff; + virt_phys_offset = base - start; + early_get_first_memblock_info(__va(dt_ptr), &size); + /* + * We now get the memstart_addr, then we should check if this + * address is the same as what the PAGE_OFFSET map to now. If + * not we have to change the map of PAGE_OFFSET to memstart_addr + * and do a second relocation. + */ + if (start != memstart_addr) { + int n; + long offset = start - memstart_addr; + + is_second_reloc = 1; + n = switch_to_as1(); + /* map a 64M area for the second relocation */ + if (memstart_addr > start) + map_mem_in_cams(0x4000000, CONFIG_LOWMEM_CAM_NUM, + false); + else + map_mem_in_cams_addr(start, PAGE_OFFSET + offset, + 0x4000000, CONFIG_LOWMEM_CAM_NUM, + false); + restore_to_as0(n, offset, __va(dt_ptr), 1); + /* We should never reach here */ + panic("Relocation error"); + } + + kaslr_early_init(__va(dt_ptr), size); +} +#endif +#endif diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c deleted file mode 100644 index 03dacbe940e5..000000000000 --- a/arch/powerpc/mm/nohash/fsl_booke.c +++ /dev/null @@ -1,333 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Modifications by Kumar Gala (galak@kernel.crashing.org) to support - * E500 Book E processors. - * - * Copyright 2004,2010 Freescale Semiconductor, Inc. - * - * This file contains the routines for initializing the MMU - * on the 4xx series of chips. - * -- paulus - * - * Derived from arch/ppc/mm/init.c: - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) - * and Cort Dougan (PReP) (cort@cs.nmt.edu) - * Copyright (C) 1996 Paul Mackerras - * - * Derived from "arch/i386/mm/init.c" - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -unsigned int tlbcam_index; - -#define NUM_TLBCAMS (64) -struct tlbcam TLBCAM[NUM_TLBCAMS]; - -struct tlbcamrange { - unsigned long start; - unsigned long limit; - phys_addr_t phys; -} tlbcam_addrs[NUM_TLBCAMS]; - -unsigned long tlbcam_sz(int idx) -{ - return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1; -} - -#ifdef CONFIG_FSL_BOOKE -/* - * Return PA for this VA if it is mapped by a CAM, or 0 - */ -phys_addr_t v_block_mapped(unsigned long va) -{ - int b; - for (b = 0; b < tlbcam_index; ++b) - if (va >= tlbcam_addrs[b].start && va < tlbcam_addrs[b].limit) - return tlbcam_addrs[b].phys + (va - tlbcam_addrs[b].start); - return 0; -} - -/* - * Return VA for a given PA or 0 if not mapped - */ -unsigned long p_block_mapped(phys_addr_t pa) -{ - int b; - for (b = 0; b < tlbcam_index; ++b) - if (pa >= tlbcam_addrs[b].phys - && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start) - +tlbcam_addrs[b].phys) - return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys); - return 0; -} -#endif - -/* - * Set up a variable-size TLB entry (tlbcam). The parameters are not checked; - * in particular size must be a power of 4 between 4k and the max supported by - * an implementation; max may further be limited by what can be represented in - * an unsigned long (for example, 32-bit implementations cannot support a 4GB - * size). - */ -static void settlbcam(int index, unsigned long virt, phys_addr_t phys, - unsigned long size, unsigned long flags, unsigned int pid) -{ - unsigned int tsize; - - tsize = __ilog2(size) - 10; - -#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC) - if ((flags & _PAGE_NO_CACHE) == 0) - flags |= _PAGE_COHERENT; -#endif - - TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index) | MAS0_NV(index+1); - TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid); - TLBCAM[index].MAS2 = virt & PAGE_MASK; - - TLBCAM[index].MAS2 |= (flags & _PAGE_WRITETHRU) ? MAS2_W : 0; - TLBCAM[index].MAS2 |= (flags & _PAGE_NO_CACHE) ? MAS2_I : 0; - TLBCAM[index].MAS2 |= (flags & _PAGE_COHERENT) ? MAS2_M : 0; - TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0; - TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0; - - TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SX | MAS3_SR; - TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0); - if (mmu_has_feature(MMU_FTR_BIG_PHYS)) - TLBCAM[index].MAS7 = (u64)phys >> 32; - - /* Below is unlikely -- only for large user pages or similar */ - if (pte_user(__pte(flags))) { - TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; - TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); - } - - tlbcam_addrs[index].start = virt; - tlbcam_addrs[index].limit = virt + size - 1; - tlbcam_addrs[index].phys = phys; -} - -unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, - phys_addr_t phys) -{ - unsigned int camsize = __ilog2(ram); - unsigned int align = __ffs(virt | phys); - unsigned long max_cam; - - if ((mfspr(SPRN_MMUCFG) & MMUCFG_MAVN) == MMUCFG_MAVN_V1) { - /* Convert (4^max) kB to (2^max) bytes */ - max_cam = ((mfspr(SPRN_TLB1CFG) >> 16) & 0xf) * 2 + 10; - camsize &= ~1U; - align &= ~1U; - } else { - /* Convert (2^max) kB to (2^max) bytes */ - max_cam = __ilog2(mfspr(SPRN_TLB1PS)) + 10; - } - - if (camsize > align) - camsize = align; - if (camsize > max_cam) - camsize = max_cam; - - return 1UL << camsize; -} - -static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt, - unsigned long ram, int max_cam_idx, - bool dryrun) -{ - int i; - unsigned long amount_mapped = 0; - - /* Calculate CAM values */ - for (i = 0; ram && i < max_cam_idx; i++) { - unsigned long cam_sz; - - cam_sz = calc_cam_sz(ram, virt, phys); - if (!dryrun) - settlbcam(i, virt, phys, cam_sz, - pgprot_val(PAGE_KERNEL_X), 0); - - ram -= cam_sz; - amount_mapped += cam_sz; - virt += cam_sz; - phys += cam_sz; - } - - if (dryrun) - return amount_mapped; - - loadcam_multi(0, i, max_cam_idx); - tlbcam_index = i; - -#ifdef CONFIG_PPC64 - get_paca()->tcd.esel_next = i; - get_paca()->tcd.esel_max = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; - get_paca()->tcd.esel_first = i; -#endif - - return amount_mapped; -} - -unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx, bool dryrun) -{ - unsigned long virt = PAGE_OFFSET; - phys_addr_t phys = memstart_addr; - - return map_mem_in_cams_addr(phys, virt, ram, max_cam_idx, dryrun); -} - -#ifdef CONFIG_PPC32 - -#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS) -#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS" -#endif - -unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) -{ - return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1; -} - -void flush_instruction_cache(void) -{ - unsigned long tmp; - - tmp = mfspr(SPRN_L1CSR1); - tmp |= L1CSR1_ICFI | L1CSR1_ICLFR; - mtspr(SPRN_L1CSR1, tmp); - isync(); -} - -/* - * MMU_init_hw does the chip-specific initialization of the MMU hardware. - */ -void __init MMU_init_hw(void) -{ - flush_instruction_cache(); -} - -void __init adjust_total_lowmem(void) -{ - unsigned long ram; - int i; - - /* adjust lowmem size to __max_low_memory */ - ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem); - - i = switch_to_as1(); - __max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, false); - restore_to_as0(i, 0, 0, 1); - - pr_info("Memory CAM mapping: "); - for (i = 0; i < tlbcam_index - 1; i++) - pr_cont("%lu/", tlbcam_sz(i) >> 20); - pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20, - (unsigned int)((total_lowmem - __max_low_memory) >> 20)); - - memblock_set_current_limit(memstart_addr + __max_low_memory); -} - -void setup_initial_memory_limit(phys_addr_t first_memblock_base, - phys_addr_t first_memblock_size) -{ - phys_addr_t limit = first_memblock_base + first_memblock_size; - - /* 64M mapped initially according to head_fsl_booke.S */ - memblock_set_current_limit(min_t(u64, limit, 0x04000000)); -} - -#ifdef CONFIG_RELOCATABLE -int __initdata is_second_reloc; -notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start) -{ - unsigned long base = kernstart_virt_addr; - phys_addr_t size; - - kernstart_addr = start; - if (is_second_reloc) { - virt_phys_offset = PAGE_OFFSET - memstart_addr; - kaslr_late_init(); - return; - } - - /* - * Relocatable kernel support based on processing of dynamic - * relocation entries. Before we get the real memstart_addr, - * We will compute the virt_phys_offset like this: - * virt_phys_offset = stext.run - kernstart_addr - * - * stext.run = (KERNELBASE & ~0x3ffffff) + - * (kernstart_addr & 0x3ffffff) - * When we relocate, we have : - * - * (kernstart_addr & 0x3ffffff) = (stext.run & 0x3ffffff) - * - * hence: - * virt_phys_offset = (KERNELBASE & ~0x3ffffff) - - * (kernstart_addr & ~0x3ffffff) - * - */ - start &= ~0x3ffffff; - base &= ~0x3ffffff; - virt_phys_offset = base - start; - early_get_first_memblock_info(__va(dt_ptr), &size); - /* - * We now get the memstart_addr, then we should check if this - * address is the same as what the PAGE_OFFSET map to now. If - * not we have to change the map of PAGE_OFFSET to memstart_addr - * and do a second relocation. - */ - if (start != memstart_addr) { - int n; - long offset = start - memstart_addr; - - is_second_reloc = 1; - n = switch_to_as1(); - /* map a 64M area for the second relocation */ - if (memstart_addr > start) - map_mem_in_cams(0x4000000, CONFIG_LOWMEM_CAM_NUM, - false); - else - map_mem_in_cams_addr(start, PAGE_OFFSET + offset, - 0x4000000, CONFIG_LOWMEM_CAM_NUM, - false); - restore_to_as0(n, offset, __va(dt_ptr), 1); - /* We should never reach here */ - panic("Relocation error"); - } - - kaslr_early_init(__va(dt_ptr), size); -} -#endif -#endif -- cgit v1.2.3 From 01116e6e98b08ab0641fa516ddafb1b1b2088e64 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 15 Oct 2021 12:02:44 +0200 Subject: powerpc/fsl_booke: Take exec flag into account when setting TLBCAMs Don't force MAS3_SX and MAS3_UX at all time. Take into account the exec flag. While at it, fix a couple of closeby style problems (indent with space and unnecessary parenthesis), it keeps more readability. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5467044e59f27f9fcf709b9661779e3ce5f784f6.1634292136.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/fsl_book3e.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/nohash/fsl_book3e.c b/arch/powerpc/mm/nohash/fsl_book3e.c index 03dacbe940e5..fdf1029e62f0 100644 --- a/arch/powerpc/mm/nohash/fsl_book3e.c +++ b/arch/powerpc/mm/nohash/fsl_book3e.c @@ -122,15 +122,17 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys, TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0; TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0; - TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SX | MAS3_SR; - TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0); + TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SR; + TLBCAM[index].MAS3 |= (flags & _PAGE_BAP_SX) ? MAS3_SX : 0; + TLBCAM[index].MAS3 |= (flags & _PAGE_RW) ? MAS3_SW : 0; if (mmu_has_feature(MMU_FTR_BIG_PHYS)) TLBCAM[index].MAS7 = (u64)phys >> 32; /* Below is unlikely -- only for large user pages or similar */ if (pte_user(__pte(flags))) { - TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; - TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); + TLBCAM[index].MAS3 |= MAS3_UR; + TLBCAM[index].MAS3 |= (flags & _PAGE_EXEC) ? MAS3_UX : 0; + TLBCAM[index].MAS3 |= (flags & _PAGE_RW) ? MAS3_UW : 0; } tlbcam_addrs[index].start = virt; -- cgit v1.2.3 From a97dd9e2f760c6996a8f1385ddab0bfef325b364 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 15 Oct 2021 12:02:45 +0200 Subject: powerpc/fsl_booke: Enable reloading of TLBCAM without switching to AS1 Avoid switching to AS1 when reloading TLBCAM after init for STRICT_KERNEL_RWX. When we setup AS1 we expect the entire accessible memory to be mapped through one entry, this is not the case anymore at the end of init. We are not changing the size of TLBCAMs, only flags, so no need to switch to AS1. So change loadcam_multi() to not switch to AS1 when the given temporary tlb entry in 0. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a9d517fbfbc940f56103c46b323f6eb8f4485571.1634292136.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/tlb_low.S | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S index 5add4a51e51f..dd39074de9af 100644 --- a/arch/powerpc/mm/nohash/tlb_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S @@ -369,7 +369,7 @@ _GLOBAL(_tlbivax_bcast) * extern void loadcam_entry(unsigned int index) * * Load TLBCAM[index] entry in to the L2 CAM MMU - * Must preserve r7, r8, r9, r10 and r11 + * Must preserve r7, r8, r9, r10, r11, r12 */ _GLOBAL(loadcam_entry) mflr r5 @@ -401,7 +401,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) * * r3 = first entry to write * r4 = number of entries to write - * r5 = temporary tlb entry + * r5 = temporary tlb entry (0 means no switch to AS1) */ _GLOBAL(loadcam_multi) mflr r8 @@ -409,6 +409,8 @@ _GLOBAL(loadcam_multi) mfmsr r11 andi. r11,r11,MSR_IS bne 10f + mr. r12, r5 + beq 10f /* * Set up temporary TLB entry that is the same as what we're @@ -446,6 +448,8 @@ _GLOBAL(loadcam_multi) /* Don't return to AS=0 if we were in AS=1 at function start */ andi. r11,r11,MSR_IS bne 3f + cmpwi r12, 0 + beq 3f /* Return to AS=0 and clear the temporary entry */ mfmsr r6 -- cgit v1.2.3 From 52bda69ae8b5102fe08c9db10f4a1514478e07d3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 15 Oct 2021 12:02:46 +0200 Subject: powerpc/fsl_booke: Tell map_mem_in_cams() if init is done In order to be able to call map_mem_in_cams() once more after init for STRICT_KERNEL_RWX, add an argument. For now, map_mem_in_cams() is always called only during init. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3b69a7e0b393b16984ade882a5eae5d727717459.1634292136.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/mmu_decl.h | 2 +- arch/powerpc/mm/nohash/fsl_book3e.c | 12 ++++++------ arch/powerpc/mm/nohash/kaslr_booke.c | 2 +- arch/powerpc/mm/nohash/tlb.c | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index dd1cabc2ea0f..e13a3c0caa02 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -126,7 +126,7 @@ unsigned long mmu_mapin_ram(unsigned long base, unsigned long top); #ifdef CONFIG_PPC_FSL_BOOK3E extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx, - bool dryrun); + bool dryrun, bool init); extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, phys_addr_t phys); #ifdef CONFIG_PPC32 diff --git a/arch/powerpc/mm/nohash/fsl_book3e.c b/arch/powerpc/mm/nohash/fsl_book3e.c index fdf1029e62f0..375b2b8238c1 100644 --- a/arch/powerpc/mm/nohash/fsl_book3e.c +++ b/arch/powerpc/mm/nohash/fsl_book3e.c @@ -167,7 +167,7 @@ unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt, unsigned long ram, int max_cam_idx, - bool dryrun) + bool dryrun, bool init) { int i; unsigned long amount_mapped = 0; @@ -202,12 +202,12 @@ static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt, return amount_mapped; } -unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx, bool dryrun) +unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx, bool dryrun, bool init) { unsigned long virt = PAGE_OFFSET; phys_addr_t phys = memstart_addr; - return map_mem_in_cams_addr(phys, virt, ram, max_cam_idx, dryrun); + return map_mem_in_cams_addr(phys, virt, ram, max_cam_idx, dryrun, init); } #ifdef CONFIG_PPC32 @@ -248,7 +248,7 @@ void __init adjust_total_lowmem(void) ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem); i = switch_to_as1(); - __max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, false); + __max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, false, true); restore_to_as0(i, 0, 0, 1); pr_info("Memory CAM mapping: "); @@ -319,11 +319,11 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start) /* map a 64M area for the second relocation */ if (memstart_addr > start) map_mem_in_cams(0x4000000, CONFIG_LOWMEM_CAM_NUM, - false); + false, true); else map_mem_in_cams_addr(start, PAGE_OFFSET + offset, 0x4000000, CONFIG_LOWMEM_CAM_NUM, - false); + false, true); restore_to_as0(n, offset, __va(dt_ptr), 1); /* We should never reach here */ panic("Relocation error"); diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 4c74e8a5482b..8fc49b1b4a91 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -314,7 +314,7 @@ static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t size pr_warn("KASLR: No safe seed for randomizing the kernel base.\n"); ram = min_t(phys_addr_t, __max_low_memory, size); - ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true); + ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true, false); linear_sz = min_t(unsigned long, ram, SZ_512M); /* If the linear size is smaller than 64M, do not randmize */ diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index 5872f69141d5..fc195b9f524b 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -643,7 +643,7 @@ static void early_init_this_mmu(void) if (map) linear_map_top = map_mem_in_cams(linear_map_top, - num_cams, false); + num_cams, true, true); } #endif @@ -764,7 +764,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; linear_sz = map_mem_in_cams(first_memblock_size, num_cams, - true); + false, true); ppc64_rma_size = min_t(u64, linear_sz, 0x40000000); } else -- cgit v1.2.3 From 0b2859a74306b2b89f6e77c216fe0992ff890fa6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 15 Oct 2021 12:02:47 +0200 Subject: powerpc/fsl_booke: Allocate separate TLBCAMs for readonly memory Reorganise TLBCAM allocation so that when STRICT_KERNEL_RWX is enabled, TLBCAMs are allocated such that readonly memory uses different TLBCAMs. This results in an allocation looking like: Memory CAM mapping: 4/4/4/1/1/1/1/16/16/16/64/64/64/256/256 Mb, residual: 256Mb Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8ca169bc288261a0e0558712f979023c3a960ebb.1634292136.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/fsl_book3e.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/nohash/fsl_book3e.c b/arch/powerpc/mm/nohash/fsl_book3e.c index 375b2b8238c1..c1bc11f46344 100644 --- a/arch/powerpc/mm/nohash/fsl_book3e.c +++ b/arch/powerpc/mm/nohash/fsl_book3e.c @@ -171,15 +171,34 @@ static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt, { int i; unsigned long amount_mapped = 0; + unsigned long boundary; + + if (strict_kernel_rwx_enabled()) + boundary = (unsigned long)(_sinittext - _stext); + else + boundary = ram; /* Calculate CAM values */ - for (i = 0; ram && i < max_cam_idx; i++) { + for (i = 0; boundary && i < max_cam_idx; i++) { + unsigned long cam_sz; + pgprot_t prot = PAGE_KERNEL_X; + + cam_sz = calc_cam_sz(boundary, virt, phys); + if (!dryrun) + settlbcam(i, virt, phys, cam_sz, pgprot_val(prot), 0); + + boundary -= cam_sz; + amount_mapped += cam_sz; + virt += cam_sz; + phys += cam_sz; + } + for (ram -= amount_mapped; ram && i < max_cam_idx; i++) { unsigned long cam_sz; + pgprot_t prot = PAGE_KERNEL_X; cam_sz = calc_cam_sz(ram, virt, phys); if (!dryrun) - settlbcam(i, virt, phys, cam_sz, - pgprot_val(PAGE_KERNEL_X), 0); + settlbcam(i, virt, phys, cam_sz, pgprot_val(prot), 0); ram -= cam_sz; amount_mapped += cam_sz; -- cgit v1.2.3 From d5970045cf9e266d9a43941ac0866865fd22a36a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 15 Oct 2021 12:02:48 +0200 Subject: powerpc/fsl_booke: Update of TLBCAMs after init After init, set readonly memory as ROX and set readwrite memory as RWX, if STRICT_KERNEL_RWX is enabled. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/66bef0b9c273e1121706883f3cf5ad0a053d863f.1634292136.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/mmu_decl.h | 2 +- arch/powerpc/mm/nohash/fsl_book3e.c | 32 ++++++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index e13a3c0caa02..0dd4c18f8363 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -168,7 +168,7 @@ static inline phys_addr_t v_block_mapped(unsigned long va) { return 0; } static inline unsigned long p_block_mapped(phys_addr_t pa) { return 0; } #endif -#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx) +#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC_FSL_BOOK3E) void mmu_mark_initmem_nx(void); void mmu_mark_rodata_ro(void); #else diff --git a/arch/powerpc/mm/nohash/fsl_book3e.c b/arch/powerpc/mm/nohash/fsl_book3e.c index c1bc11f46344..978e0bcdfa2c 100644 --- a/arch/powerpc/mm/nohash/fsl_book3e.c +++ b/arch/powerpc/mm/nohash/fsl_book3e.c @@ -181,7 +181,7 @@ static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt, /* Calculate CAM values */ for (i = 0; boundary && i < max_cam_idx; i++) { unsigned long cam_sz; - pgprot_t prot = PAGE_KERNEL_X; + pgprot_t prot = init ? PAGE_KERNEL_X : PAGE_KERNEL_ROX; cam_sz = calc_cam_sz(boundary, virt, phys); if (!dryrun) @@ -194,7 +194,7 @@ static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt, } for (ram -= amount_mapped; ram && i < max_cam_idx; i++) { unsigned long cam_sz; - pgprot_t prot = PAGE_KERNEL_X; + pgprot_t prot = init ? PAGE_KERNEL_X : PAGE_KERNEL; cam_sz = calc_cam_sz(ram, virt, phys); if (!dryrun) @@ -209,8 +209,13 @@ static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt, if (dryrun) return amount_mapped; - loadcam_multi(0, i, max_cam_idx); - tlbcam_index = i; + if (init) { + loadcam_multi(0, i, max_cam_idx); + tlbcam_index = i; + } else { + loadcam_multi(0, i, 0); + WARN_ON(i > tlbcam_index); + } #ifdef CONFIG_PPC64 get_paca()->tcd.esel_next = i; @@ -279,6 +284,25 @@ void __init adjust_total_lowmem(void) memblock_set_current_limit(memstart_addr + __max_low_memory); } +#ifdef CONFIG_STRICT_KERNEL_RWX +void mmu_mark_rodata_ro(void) +{ + /* Everything is done in mmu_mark_initmem_nx() */ +} +#endif + +void mmu_mark_initmem_nx(void) +{ + unsigned long remapped; + + if (!strict_kernel_rwx_enabled()) + return; + + remapped = map_mem_in_cams(__max_low_memory, CONFIG_LOWMEM_CAM_NUM, false, false); + + WARN_ON(__max_low_memory != remapped); +} + void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { -- cgit v1.2.3 From 49e3d8ea62482625c3835f0a33ae9c1dda39ea8f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 15 Oct 2021 12:02:49 +0200 Subject: powerpc/fsl_booke: Enable STRICT_KERNEL_RWX Enable STRICT_KERNEL_RWX on fsl_booke. For that, we need additional TLBCAMs dedicated to linear mapping, based on the alignment of _sinittext. By default, up to 768 Mbytes of memory are mapped. It uses 3 TLBCAMs of size 256 Mbytes. With a data alignment of 16, we need up to 9 TLBCAMs: 16/16/16/16/64/64/64/256/256 With a data alignment of 4, we need up to 12 TLBCAMs: 4/4/4/4/16/16/16/64/64/64/256/256 With a data alignment of 1, we need up to 15 TLBCAMs: 1/1/1/1/4/4/4/16/16/16/64/64/64/256/256 By default, set a 16 Mbytes alignment as a compromise between memory usage and number of TLBCAMs. This can be adjusted manually when needed. For the time being, it doens't work when the base is randomised. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/29f9e5d2bbbc83ae9ca879265426a6278bf4d5bb.1634292136.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 6b9f523882c5..939a47642a9c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -139,6 +139,7 @@ config PPC select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if (PPC_BOOK3S || PPC_8xx || 40x) && !HIBERNATION + select ARCH_HAS_STRICT_KERNEL_RWX if FSL_BOOKE && !HIBERNATION && !RANDOMIZE_BASE select ARCH_HAS_STRICT_MODULE_RWX if ARCH_HAS_STRICT_KERNEL_RWX && !PPC_BOOK3S_32 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE @@ -778,7 +779,8 @@ config DATA_SHIFT_BOOL bool "Set custom data alignment" depends on ADVANCED_OPTIONS depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE - depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !STRICT_KERNEL_RWX) + depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !STRICT_KERNEL_RWX) || \ + FSL_BOOKE help This option allows you to set the kernel data alignment. When RAM is mapped by blocks, the alignment needs to fit the size and @@ -791,11 +793,13 @@ config DATA_SHIFT default 24 if STRICT_KERNEL_RWX && PPC64 range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx + range 20 24 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_FSL_BOOKE default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 default 18 if (DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 default 23 if STRICT_KERNEL_RWX && PPC_8xx default 23 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx && PIN_TLB_DATA default 19 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx + default 24 if STRICT_KERNEL_RWX && FSL_BOOKE default PPC_PAGE_SHIFT help On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO. @@ -1123,7 +1127,10 @@ config LOWMEM_CAM_NUM_BOOL config LOWMEM_CAM_NUM depends on FSL_BOOKE int "Number of CAMs to use to map low memory" if LOWMEM_CAM_NUM_BOOL - default 3 + default 3 if !STRICT_KERNEL_RWX + default 9 if DATA_SHIFT >= 24 + default 12 if DATA_SHIFT >= 22 + default 15 config DYNAMIC_MEMSTART bool "Enable page aligned dynamic load address for kernel" -- cgit v1.2.3 From 61cb9ac66b30374c7fd8a8b2a3c4f8f432c72e36 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 15 Oct 2021 00:03:45 -0500 Subject: powerpc/vas: Fix potential NULL pointer dereference (!ptr && !ptr->foo) strikes again. :) The expression (!ptr && !ptr->foo) is bogus and in case ptr is NULL, it leads to a NULL pointer dereference: ptr->foo. Fix this by converting && to || This issue was detected with the help of Coccinelle, and audited and fixed manually. Fixes: 1a0d0d5ed5e3 ("powerpc/vas: Add platform specific user window operations") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Reviewed-by: Tyrel Datwyler Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211015050345.GA1161918@embeddedor --- arch/powerpc/platforms/book3s/vas-api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 30172e52e16b..4d82c92ddd52 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -303,7 +303,7 @@ static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) return -EINVAL; } - if (!cp_inst->coproc->vops && !cp_inst->coproc->vops->open_win) { + if (!cp_inst->coproc->vops || !cp_inst->coproc->vops->open_win) { pr_err("VAS API is not registered\n"); return -EACCES; } @@ -373,7 +373,7 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) return -EINVAL; } - if (!cp_inst->coproc->vops && !cp_inst->coproc->vops->paste_addr) { + if (!cp_inst->coproc->vops || !cp_inst->coproc->vops->paste_addr) { pr_err("%s(): VAS API is not registered\n", __func__); return -EACCES; } -- cgit v1.2.3 From e9efabc6e4c31517394be13c2f0c5abadd33f328 Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Thu, 14 Oct 2021 00:05:29 +0200 Subject: powerpc/5200: dts: add missing pci ranges Add ranges property to fix build warnings: Warning (pci_bridge): /pci@f0000d00: missing ranges for PCI bridge (or not a bridge) Signed-off-by: Anatolij Gustschin Reviewed-by: Rob Herring Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211013220532.24759-2-agust@denx.de --- arch/powerpc/boot/dts/mpc5200b.dtsi | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/boot/dts/mpc5200b.dtsi b/arch/powerpc/boot/dts/mpc5200b.dtsi index 648fe31795f4..8c645d2bc455 100644 --- a/arch/powerpc/boot/dts/mpc5200b.dtsi +++ b/arch/powerpc/boot/dts/mpc5200b.dtsi @@ -276,7 +276,9 @@ clock-frequency = <0>; // From boot loader interrupts = <2 8 0 2 9 0 2 10 0>; bus-range = <0 0>; - // ranges = need to add + ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000>, + <0x02000000 0 0x90000000 0x90000000 0 0x10000000>, + <0x01000000 0 0x00000000 0xa0000000 0 0x01000000>; }; localbus: localbus { -- cgit v1.2.3 From 7855b6c66dc458e2f5abfb2b50f527ea4101df77 Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Thu, 14 Oct 2021 00:05:30 +0200 Subject: powerpc/5200: dts: fix pci ranges warnings Fix ranges property warnings: pci@f0000d00:ranges: 'oneOf' conditional failed, one must be fixed: Signed-off-by: Anatolij Gustschin Reviewed-by: Rob Herring Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211013220532.24759-3-agust@denx.de --- arch/powerpc/boot/dts/a4m072.dts | 6 +++--- arch/powerpc/boot/dts/charon.dts | 6 +++--- arch/powerpc/boot/dts/digsy_mtc.dts | 6 +++--- arch/powerpc/boot/dts/lite5200.dts | 6 +++--- arch/powerpc/boot/dts/lite5200b.dts | 6 +++--- arch/powerpc/boot/dts/media5200.dts | 6 +++--- arch/powerpc/boot/dts/mucmc52.dts | 6 +++--- arch/powerpc/boot/dts/pcm030.dts | 6 +++--- arch/powerpc/boot/dts/pcm032.dts | 6 +++--- arch/powerpc/boot/dts/tqm5200.dts | 6 +++--- 10 files changed, 30 insertions(+), 30 deletions(-) diff --git a/arch/powerpc/boot/dts/a4m072.dts b/arch/powerpc/boot/dts/a4m072.dts index a9cef5726422..d4270a2ec6c7 100644 --- a/arch/powerpc/boot/dts/a4m072.dts +++ b/arch/powerpc/boot/dts/a4m072.dts @@ -140,8 +140,8 @@ clock-frequency = <0>; /* From boot loader */ interrupts = <2 8 0 2 9 0 2 10 0>; bus-range = <0 0>; - ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000 - 0x02000000 0 0x90000000 0x90000000 0 0x10000000 - 0x01000000 0 0x00000000 0xa0000000 0 0x01000000>; + ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000>, + <0x02000000 0 0x90000000 0x90000000 0 0x10000000>, + <0x01000000 0 0x00000000 0xa0000000 0 0x01000000>; }; }; diff --git a/arch/powerpc/boot/dts/charon.dts b/arch/powerpc/boot/dts/charon.dts index 408b486b13df..6f9fe88a5f36 100644 --- a/arch/powerpc/boot/dts/charon.dts +++ b/arch/powerpc/boot/dts/charon.dts @@ -225,8 +225,8 @@ clock-frequency = <0>; // From boot loader interrupts = <2 8 0 2 9 0 2 10 0>; bus-range = <0 0>; - ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000 - 0x02000000 0 0x90000000 0x90000000 0 0x10000000 - 0x01000000 0 0x00000000 0xa0000000 0 0x01000000>; + ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000>, + <0x02000000 0 0x90000000 0x90000000 0 0x10000000>, + <0x01000000 0 0x00000000 0xa0000000 0 0x01000000>; }; }; diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts b/arch/powerpc/boot/dts/digsy_mtc.dts index 0e5e9d3acf79..010156649bfe 100644 --- a/arch/powerpc/boot/dts/digsy_mtc.dts +++ b/arch/powerpc/boot/dts/digsy_mtc.dts @@ -98,9 +98,9 @@ clock-frequency = <0>; // From boot loader interrupts = <2 8 0 2 9 0 2 10 0>; bus-range = <0 0>; - ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000 - 0x02000000 0 0x90000000 0x90000000 0 0x10000000 - 0x01000000 0 0x00000000 0xa0000000 0 0x01000000>; + ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000>, + <0x02000000 0 0x90000000 0x90000000 0 0x10000000>, + <0x01000000 0 0x00000000 0xa0000000 0 0x01000000>; }; localbus { diff --git a/arch/powerpc/boot/dts/lite5200.dts b/arch/powerpc/boot/dts/lite5200.dts index cb2782dd6132..84a12e7915e9 100644 --- a/arch/powerpc/boot/dts/lite5200.dts +++ b/arch/powerpc/boot/dts/lite5200.dts @@ -283,9 +283,9 @@ clock-frequency = <0>; // From boot loader interrupts = <2 8 0 2 9 0 2 10 0>; bus-range = <0 0>; - ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000 - 0x02000000 0 0xa0000000 0xa0000000 0 0x10000000 - 0x01000000 0 0x00000000 0xb0000000 0 0x01000000>; + ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000>, + <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000>, + <0x01000000 0 0x00000000 0xb0000000 0 0x01000000>; }; localbus { diff --git a/arch/powerpc/boot/dts/lite5200b.dts b/arch/powerpc/boot/dts/lite5200b.dts index 2b86c81f9048..c361c59a9681 100644 --- a/arch/powerpc/boot/dts/lite5200b.dts +++ b/arch/powerpc/boot/dts/lite5200b.dts @@ -116,9 +116,9 @@ clock-frequency = <0>; // From boot loader interrupts = <2 8 0 2 9 0 2 10 0>; bus-range = <0 0>; - ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000 - 0x02000000 0 0xa0000000 0xa0000000 0 0x10000000 - 0x01000000 0 0x00000000 0xb0000000 0 0x01000000>; + ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000>, + <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000>, + <0x01000000 0 0x00000000 0xb0000000 0 0x01000000>; }; localbus { diff --git a/arch/powerpc/boot/dts/media5200.dts b/arch/powerpc/boot/dts/media5200.dts index 61cae9dcddef..f7d7538eb69d 100644 --- a/arch/powerpc/boot/dts/media5200.dts +++ b/arch/powerpc/boot/dts/media5200.dts @@ -96,9 +96,9 @@ 0xe000 0 0 1 &media5200_fpga 0 5 // CoralIP >; - ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000 - 0x02000000 0 0xa0000000 0xa0000000 0 0x10000000 - 0x01000000 0 0x00000000 0xb0000000 0 0x01000000>; + ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000>, + <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000>, + <0x01000000 0 0x00000000 0xb0000000 0 0x01000000>; interrupt-parent = <&mpc5200_pic>; }; diff --git a/arch/powerpc/boot/dts/mucmc52.dts b/arch/powerpc/boot/dts/mucmc52.dts index c6c66306308d..e88a7bd4034d 100644 --- a/arch/powerpc/boot/dts/mucmc52.dts +++ b/arch/powerpc/boot/dts/mucmc52.dts @@ -106,9 +106,9 @@ 0x8000 0 0 3 &mpc5200_pic 0 2 3 0x8000 0 0 4 &mpc5200_pic 0 1 3 >; - ranges = <0x42000000 0 0x60000000 0x60000000 0 0x10000000 - 0x02000000 0 0x90000000 0x90000000 0 0x10000000 - 0x01000000 0 0x00000000 0xa0000000 0 0x01000000>; + ranges = <0x42000000 0 0x60000000 0x60000000 0 0x10000000>, + <0x02000000 0 0x90000000 0x90000000 0 0x10000000>, + <0x01000000 0 0x00000000 0xa0000000 0 0x01000000>; }; localbus { diff --git a/arch/powerpc/boot/dts/pcm030.dts b/arch/powerpc/boot/dts/pcm030.dts index b1bc731f7afd..5cee474dcc4c 100644 --- a/arch/powerpc/boot/dts/pcm030.dts +++ b/arch/powerpc/boot/dts/pcm030.dts @@ -90,9 +90,9 @@ 0xc800 0 0 2 &mpc5200_pic 1 2 3 0xc800 0 0 3 &mpc5200_pic 1 3 3 0xc800 0 0 4 &mpc5200_pic 0 0 3>; - ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000 - 0x02000000 0 0xa0000000 0xa0000000 0 0x10000000 - 0x01000000 0 0x00000000 0xb0000000 0 0x01000000>; + ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000>, + <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000>, + <0x01000000 0 0x00000000 0xb0000000 0 0x01000000>; }; localbus { diff --git a/arch/powerpc/boot/dts/pcm032.dts b/arch/powerpc/boot/dts/pcm032.dts index 780e13d99e7b..b6fdf861c7d6 100644 --- a/arch/powerpc/boot/dts/pcm032.dts +++ b/arch/powerpc/boot/dts/pcm032.dts @@ -87,9 +87,9 @@ 0xc800 0 0 2 &mpc5200_pic 1 2 3 0xc800 0 0 3 &mpc5200_pic 1 3 3 0xc800 0 0 4 &mpc5200_pic 0 0 3>; - ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000 - 0x02000000 0 0xa0000000 0xa0000000 0 0x10000000 - 0x01000000 0 0x00000000 0xb0000000 0 0x01000000>; + ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000>, + <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000>, + <0x01000000 0 0x00000000 0xb0000000 0 0x01000000>; }; localbus { diff --git a/arch/powerpc/boot/dts/tqm5200.dts b/arch/powerpc/boot/dts/tqm5200.dts index 9ed0bc78967e..40b139d92a19 100644 --- a/arch/powerpc/boot/dts/tqm5200.dts +++ b/arch/powerpc/boot/dts/tqm5200.dts @@ -200,8 +200,8 @@ clock-frequency = <0>; // From boot loader interrupts = <2 8 0 2 9 0 2 10 0>; bus-range = <0 0>; - ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000 - 0x02000000 0 0x90000000 0x90000000 0 0x10000000 - 0x01000000 0 0x00000000 0xa0000000 0 0x01000000>; + ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000>, + <0x02000000 0 0x90000000 0x90000000 0 0x10000000>, + <0x01000000 0 0x00000000 0xa0000000 0 0x01000000>; }; }; -- cgit v1.2.3 From aed2886a5e9ffc8269a4220bff1e9e030d3d2eb1 Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Thu, 14 Oct 2021 00:05:31 +0200 Subject: powerpc/5200: dts: fix memory node unit name Fixes build warnings: Warning (unit_address_vs_reg): /memory: node has a reg or ranges property, but no unit name Signed-off-by: Anatolij Gustschin Reviewed-by: Rob Herring Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211013220532.24759-4-agust@denx.de --- arch/powerpc/boot/dts/charon.dts | 2 +- arch/powerpc/boot/dts/digsy_mtc.dts | 2 +- arch/powerpc/boot/dts/lite5200.dts | 2 +- arch/powerpc/boot/dts/lite5200b.dts | 2 +- arch/powerpc/boot/dts/media5200.dts | 2 +- arch/powerpc/boot/dts/mpc5200b.dtsi | 2 +- arch/powerpc/boot/dts/o2d.dts | 2 +- arch/powerpc/boot/dts/o2d.dtsi | 2 +- arch/powerpc/boot/dts/o2dnt2.dts | 2 +- arch/powerpc/boot/dts/o3dnt.dts | 2 +- arch/powerpc/boot/dts/pcm032.dts | 2 +- arch/powerpc/boot/dts/tqm5200.dts | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/boot/dts/charon.dts b/arch/powerpc/boot/dts/charon.dts index 6f9fe88a5f36..ea6e76ae2545 100644 --- a/arch/powerpc/boot/dts/charon.dts +++ b/arch/powerpc/boot/dts/charon.dts @@ -35,7 +35,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts b/arch/powerpc/boot/dts/digsy_mtc.dts index 010156649bfe..57024a4c1e7d 100644 --- a/arch/powerpc/boot/dts/digsy_mtc.dts +++ b/arch/powerpc/boot/dts/digsy_mtc.dts @@ -16,7 +16,7 @@ model = "intercontrol,digsy-mtc"; compatible = "intercontrol,digsy-mtc"; - memory { + memory@0 { reg = <0x00000000 0x02000000>; // 32MB }; diff --git a/arch/powerpc/boot/dts/lite5200.dts b/arch/powerpc/boot/dts/lite5200.dts index 84a12e7915e9..b9d8487813b4 100644 --- a/arch/powerpc/boot/dts/lite5200.dts +++ b/arch/powerpc/boot/dts/lite5200.dts @@ -32,7 +32,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/lite5200b.dts b/arch/powerpc/boot/dts/lite5200b.dts index c361c59a9681..7e2d91c7cb66 100644 --- a/arch/powerpc/boot/dts/lite5200b.dts +++ b/arch/powerpc/boot/dts/lite5200b.dts @@ -31,7 +31,7 @@ led4 { gpios = <&gpio_simple 2 1>; }; }; - memory { + memory@0 { reg = <0x00000000 0x10000000>; // 256MB }; diff --git a/arch/powerpc/boot/dts/media5200.dts b/arch/powerpc/boot/dts/media5200.dts index f7d7538eb69d..96524ede16cd 100644 --- a/arch/powerpc/boot/dts/media5200.dts +++ b/arch/powerpc/boot/dts/media5200.dts @@ -32,7 +32,7 @@ }; }; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB RAM }; diff --git a/arch/powerpc/boot/dts/mpc5200b.dtsi b/arch/powerpc/boot/dts/mpc5200b.dtsi index 8c645d2bc455..ffa82c7e1055 100644 --- a/arch/powerpc/boot/dts/mpc5200b.dtsi +++ b/arch/powerpc/boot/dts/mpc5200b.dtsi @@ -33,7 +33,7 @@ }; }; - memory: memory { + memory: memory@0 { device_type = "memory"; reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/o2d.dts b/arch/powerpc/boot/dts/o2d.dts index 24a46f65e529..e0a8d3034417 100644 --- a/arch/powerpc/boot/dts/o2d.dts +++ b/arch/powerpc/boot/dts/o2d.dts @@ -12,7 +12,7 @@ model = "ifm,o2d"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/o2d.dtsi b/arch/powerpc/boot/dts/o2d.dtsi index 6661955a2be4..b55a9e5bd828 100644 --- a/arch/powerpc/boot/dts/o2d.dtsi +++ b/arch/powerpc/boot/dts/o2d.dtsi @@ -19,7 +19,7 @@ model = "ifm,o2d"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/o2dnt2.dts b/arch/powerpc/boot/dts/o2dnt2.dts index eeba7f5507d5..c2eedbd1f5fc 100644 --- a/arch/powerpc/boot/dts/o2dnt2.dts +++ b/arch/powerpc/boot/dts/o2dnt2.dts @@ -12,7 +12,7 @@ model = "ifm,o2dnt2"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/o3dnt.dts b/arch/powerpc/boot/dts/o3dnt.dts index fd00396b0593..e4c1bdd41271 100644 --- a/arch/powerpc/boot/dts/o3dnt.dts +++ b/arch/powerpc/boot/dts/o3dnt.dts @@ -12,7 +12,7 @@ model = "ifm,o3dnt"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/pcm032.dts b/arch/powerpc/boot/dts/pcm032.dts index b6fdf861c7d6..d00f13b62510 100644 --- a/arch/powerpc/boot/dts/pcm032.dts +++ b/arch/powerpc/boot/dts/pcm032.dts @@ -20,7 +20,7 @@ model = "phytec,pcm032"; compatible = "phytec,pcm032"; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/tqm5200.dts b/arch/powerpc/boot/dts/tqm5200.dts index 40b139d92a19..372177b19e60 100644 --- a/arch/powerpc/boot/dts/tqm5200.dts +++ b/arch/powerpc/boot/dts/tqm5200.dts @@ -32,7 +32,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x04000000>; // 64MB }; -- cgit v1.2.3 From a85c728cb5e12216c19ae5878980c2cbbbf8616d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 23 Aug 2021 15:29:12 +0000 Subject: powerpc/32: Don't use lmw/stmw for saving/restoring non volatile regs Instructions lmw/stmw are interesting for functions that are rarely used and not in the cache, because only one instruction is to be copied into the instruction cache instead of 19. However those instruction are less performant than 19x raw lwz/stw as they require synchronisation plus one additional cycle. SAVE_NVGPRS / REST_NVGPRS are used in only a few places which are mostly in interrupts entries/exits and in task switch so they are likely already in the cache. Using standard lwz improves null_syscall selftest by: - 10 cycles on mpc832x. - 2 cycles on mpc8xx. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/316c543b8906712c108985c8463eec09c8db577b.1629732542.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc_asm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 1c538a9a11e0..7be24048b8d1 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -28,8 +28,8 @@ #else #define SAVE_GPR(n, base) stw n,GPR0+4*(n)(base) #define REST_GPR(n, base) lwz n,GPR0+4*(n)(base) -#define SAVE_NVGPRS(base) stmw 13, GPR0+4*13(base) -#define REST_NVGPRS(base) lmw 13, GPR0+4*13(base) +#define SAVE_NVGPRS(base) SAVE_GPR(13, base); SAVE_8GPRS(14, base); SAVE_10GPRS(22, base) +#define REST_NVGPRS(base) REST_GPR(13, base); REST_8GPRS(14, base); REST_10GPRS(22, base) #endif #define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base) -- cgit v1.2.3 From 566af8cda399c088763d07464463dc871c943b54 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 24 Aug 2021 13:36:13 +0000 Subject: powerpc/audit: Convert powerpc to AUDIT_ARCH_COMPAT_GENERIC Commit e65e1fc2d24b ("[PATCH] syscall class hookup for all normal targets") added generic support for AUDIT but that didn't include support for bi-arch like powerpc. Commit 4b58841149dc ("audit: Add generic compat syscall support") added generic support for bi-arch. Convert powerpc to that bi-arch generic audit support. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a4b3951d1191d4183d92a07a6097566bde60d00a.1629812058.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 5 +-- arch/powerpc/include/asm/unistd32.h | 7 ++++ arch/powerpc/kernel/Makefile | 3 -- arch/powerpc/kernel/audit.c | 84 ------------------------------------- arch/powerpc/kernel/compat_audit.c | 44 ------------------- 5 files changed, 8 insertions(+), 135 deletions(-) create mode 100644 arch/powerpc/include/asm/unistd32.h delete mode 100644 arch/powerpc/kernel/audit.c delete mode 100644 arch/powerpc/kernel/compat_audit.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 939a47642a9c..aac3437c8668 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -161,6 +161,7 @@ config PPC select ARCH_WANT_IRQS_OFF_ACTIVATE_MM select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WEAK_RELEASE_ACQUIRE + select AUDIT_ARCH_COMPAT_GENERIC select BINFMT_ELF select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS @@ -314,10 +315,6 @@ config GENERIC_TBSYNC bool default y if PPC32 && SMP -config AUDIT_ARCH - bool - default y - config GENERIC_BUG bool default y diff --git a/arch/powerpc/include/asm/unistd32.h b/arch/powerpc/include/asm/unistd32.h new file mode 100644 index 000000000000..07689897d206 --- /dev/null +++ b/arch/powerpc/include/asm/unistd32.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_UNISTD32_H_ +#define _ASM_POWERPC_UNISTD32_H_ + +#include + +#endif /* _ASM_POWERPC_UNISTD32_H_ */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 7be36c1e1db6..825121eba3c2 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -125,9 +125,6 @@ obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \ pci-common.o pci_of_scan.o obj-$(CONFIG_PCI_MSI) += msi.o -obj-$(CONFIG_AUDIT) += audit.o -obj64-$(CONFIG_AUDIT) += compat_audit.o - obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o obj-y += trace/ diff --git a/arch/powerpc/kernel/audit.c b/arch/powerpc/kernel/audit.c deleted file mode 100644 index a2dddd7f3d09..000000000000 --- a/arch/powerpc/kernel/audit.c +++ /dev/null @@ -1,84 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include - -static unsigned dir_class[] = { -#include -~0U -}; - -static unsigned read_class[] = { -#include -~0U -}; - -static unsigned write_class[] = { -#include -~0U -}; - -static unsigned chattr_class[] = { -#include -~0U -}; - -static unsigned signal_class[] = { -#include -~0U -}; - -int audit_classify_arch(int arch) -{ -#ifdef CONFIG_PPC64 - if (arch == AUDIT_ARCH_PPC) - return 1; -#endif - return 0; -} - -int audit_classify_syscall(int abi, unsigned syscall) -{ -#ifdef CONFIG_PPC64 - extern int ppc32_classify_syscall(unsigned); - if (abi == AUDIT_ARCH_PPC) - return ppc32_classify_syscall(syscall); -#endif - switch(syscall) { - case __NR_open: - return 2; - case __NR_openat: - return 3; - case __NR_socketcall: - return 4; - case __NR_execve: - return 5; - default: - return 0; - } -} - -static int __init audit_classes_init(void) -{ -#ifdef CONFIG_PPC64 - extern __u32 ppc32_dir_class[]; - extern __u32 ppc32_write_class[]; - extern __u32 ppc32_read_class[]; - extern __u32 ppc32_chattr_class[]; - extern __u32 ppc32_signal_class[]; - audit_register_class(AUDIT_CLASS_WRITE_32, ppc32_write_class); - audit_register_class(AUDIT_CLASS_READ_32, ppc32_read_class); - audit_register_class(AUDIT_CLASS_DIR_WRITE_32, ppc32_dir_class); - audit_register_class(AUDIT_CLASS_CHATTR_32, ppc32_chattr_class); - audit_register_class(AUDIT_CLASS_SIGNAL_32, ppc32_signal_class); -#endif - audit_register_class(AUDIT_CLASS_WRITE, write_class); - audit_register_class(AUDIT_CLASS_READ, read_class); - audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); - audit_register_class(AUDIT_CLASS_CHATTR, chattr_class); - audit_register_class(AUDIT_CLASS_SIGNAL, signal_class); - return 0; -} - -__initcall(audit_classes_init); diff --git a/arch/powerpc/kernel/compat_audit.c b/arch/powerpc/kernel/compat_audit.c deleted file mode 100644 index 55c6ccda0a85..000000000000 --- a/arch/powerpc/kernel/compat_audit.c +++ /dev/null @@ -1,44 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#undef __powerpc64__ -#include - -unsigned ppc32_dir_class[] = { -#include -~0U -}; - -unsigned ppc32_chattr_class[] = { -#include -~0U -}; - -unsigned ppc32_write_class[] = { -#include -~0U -}; - -unsigned ppc32_read_class[] = { -#include -~0U -}; - -unsigned ppc32_signal_class[] = { -#include -~0U -}; - -int ppc32_classify_syscall(unsigned syscall) -{ - switch(syscall) { - case __NR_open: - return 2; - case __NR_openat: - return 3; - case __NR_socketcall: - return 4; - case __NR_execve: - return 5; - default: - return 1; - } -} -- cgit v1.2.3 From e606a2f46c72ec399bb9be194f6df95ea8ec3b1b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 31 Aug 2021 08:29:35 +0000 Subject: powerpc/time: Remove generic_suspend_{dis/en}able_irqs() Commit d75d68cfef49 ("powerpc: Clean up obsolete code relating to decrementer and timebase") made generic_suspend_enable_irqs() and generic_suspend_disable_irqs() static. Fold them into their only caller. Signed-off-by: Christophe Leroy Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c3f9ec9950394ef939014f7934268e6ee30ca04f.1630398566.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/time.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 934d8ae66cc6..cae8f03a44fe 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -631,8 +631,12 @@ void timer_broadcast_interrupt(void) #endif #ifdef CONFIG_SUSPEND -static void generic_suspend_disable_irqs(void) +/* Overrides the weak version in kernel/power/main.c */ +void arch_suspend_disable_irqs(void) { + if (ppc_md.suspend_disable_irqs) + ppc_md.suspend_disable_irqs(); + /* Disable the decrementer, so that it doesn't interfere * with suspending. */ @@ -642,23 +646,11 @@ static void generic_suspend_disable_irqs(void) set_dec(decrementer_max); } -static void generic_suspend_enable_irqs(void) -{ - local_irq_enable(); -} - -/* Overrides the weak version in kernel/power/main.c */ -void arch_suspend_disable_irqs(void) -{ - if (ppc_md.suspend_disable_irqs) - ppc_md.suspend_disable_irqs(); - generic_suspend_disable_irqs(); -} - /* Overrides the weak version in kernel/power/main.c */ void arch_suspend_enable_irqs(void) { - generic_suspend_enable_irqs(); + local_irq_enable(); + if (ppc_md.suspend_enable_irqs) ppc_md.suspend_enable_irqs(); } -- cgit v1.2.3 From 8f7fadb4ba87f6639d817a9b2d99112e9507dc63 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 31 Aug 2021 08:30:24 +0000 Subject: powerpc/machdep: Remove stale functions from ppc_md structure ppc_md.iommu_save() is not set anymore by any platform after commit c40785ad305b ("powerpc/dart: Use a cachable DART"). So iommu_save() has become a nop and can be removed. ppc_md.show_percpuinfo() is not set anymore by any platform after commit 4350147a816b ("[PATCH] ppc64: SMU based macs cpufreq support"). Last users of ppc_md.rtc_read_val() and ppc_md.rtc_write_val() were removed by commit 0f03a43b8f0f ("[POWERPC] Remove todc code from ARCH=powerpc") Last user of kgdb_map_scc() was removed by commit 17ce452f7ea3 ("kgdb, powerpc: arch specific powerpc kgdb support"). ppc.machine_kexec_prepare() has not been used since commit 8ee3e0d69623 ("powerpc: Remove the main legacy iSerie platform code"). This allows the removal of machine_kexec_prepare() and the rename of default_machine_kexec_prepare() into machine_kexec_prepare() Signed-off-by: Christophe Leroy Reviewed-by: Daniel Axtens [mpe: Drop prototype for default_machine_kexec_prepare() as noted by dja] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/24d4ca0ada683c9436a5f812a7aeb0a1362afa2b.1630398606.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/iommu.h | 6 ------ arch/powerpc/include/asm/kexec.h | 1 - arch/powerpc/include/asm/machdep.h | 13 ------------- arch/powerpc/kernel/setup-common.c | 3 --- arch/powerpc/kernel/swsusp_64.c | 5 ----- arch/powerpc/kernel/swsusp_asm64.S | 1 - arch/powerpc/kexec/core.c | 13 ------------- arch/powerpc/kexec/core_32.c | 2 +- arch/powerpc/kexec/core_64.c | 2 +- 9 files changed, 2 insertions(+), 44 deletions(-) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index bf3b84128525..c361212ac160 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -280,12 +280,6 @@ extern void iommu_init_early_dart(struct pci_controller_ops *controller_ops); extern void iommu_init_early_pasemi(void); #if defined(CONFIG_PPC64) && defined(CONFIG_PM) -static inline void iommu_save(void) -{ - if (ppc_md.iommu_save) - ppc_md.iommu_save(); -} - static inline void iommu_restore(void) { if (ppc_md.iommu_restore) diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 88d0d7cf3a79..c6f250eca3fb 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -79,7 +79,6 @@ extern int crash_wake_offline; struct kimage; struct pt_regs; extern void default_machine_kexec(struct kimage *image); -extern int default_machine_kexec_prepare(struct kimage *image); extern void default_machine_crash_shutdown(struct pt_regs *regs); extern int crash_shutdown_register(crash_shutdown_t handler); extern int crash_shutdown_unregister(crash_shutdown_t handler); diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 764f2732a821..a68311077d32 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -29,7 +29,6 @@ struct machdep_calls { char *name; #ifdef CONFIG_PPC64 #ifdef CONFIG_PM - void (*iommu_save)(void); void (*iommu_restore)(void); #endif #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE @@ -43,7 +42,6 @@ struct machdep_calls { void (*setup_arch)(void); /* Optional, may be NULL */ /* Optional, may be NULL. */ void (*show_cpuinfo)(struct seq_file *m); - void (*show_percpuinfo)(struct seq_file *m, int i); /* Returns the current operating frequency of "cpu" in Hz */ unsigned long (*get_proc_freq)(unsigned int cpu); @@ -74,8 +72,6 @@ struct machdep_calls { int (*set_rtc_time)(struct rtc_time *); void (*get_rtc_time)(struct rtc_time *); time64_t (*get_boot_time)(void); - unsigned char (*rtc_read_val)(int addr); - void (*rtc_write_val)(int addr, unsigned char val); void (*calibrate_decr)(void); @@ -141,8 +137,6 @@ struct machdep_calls { May be NULL. */ void (*init)(void); - void (*kgdb_map_scc)(void); - /* * optional PCI "hooks" */ @@ -187,13 +181,6 @@ struct machdep_calls { #ifdef CONFIG_KEXEC_CORE void (*kexec_cpu_down)(int crash_shutdown, int secondary); - /* Called to do what every setup is needed on image and the - * reboot code buffer. Returns 0 on success. - * Provide your own (maybe dummy) implementation if your platform - * claims to support kexec. - */ - int (*machine_kexec_prepare)(struct kimage *image); - /* Called to perform the _real_ kexec. * Do NOT allocate memory or fail here. We are past the point of * no return. diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index b1e43b69a559..0b7894eed58d 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -278,9 +278,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "clock\t\t: %lu.%06luMHz\n", proc_freq / 1000000, proc_freq % 1000000); - if (ppc_md.show_percpuinfo != NULL) - ppc_md.show_percpuinfo(m, cpu_id); - /* If we are a Freescale core do a simple check so * we dont have to keep adding cases in the future */ if (PVR_VER(pvr) & 0x8000) { diff --git a/arch/powerpc/kernel/swsusp_64.c b/arch/powerpc/kernel/swsusp_64.c index aeea97ad85cf..16ee3baaf09a 100644 --- a/arch/powerpc/kernel/swsusp_64.c +++ b/arch/powerpc/kernel/swsusp_64.c @@ -17,8 +17,3 @@ void do_after_copyback(void) touch_softlockup_watchdog(); mb(); } - -void _iommu_save(void) -{ - iommu_save(); -} diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index 6d3189830dd3..96bb20715aa9 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -128,7 +128,6 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR) * stack pointer on the stack like a real stackframe */ addi r1,r1,-128 - bl _iommu_save bl swsusp_save /* restore LR */ diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 48525e8b5730..a2242017e55f 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -48,19 +48,6 @@ void machine_crash_shutdown(struct pt_regs *regs) default_machine_crash_shutdown(regs); } -/* - * Do what every setup is needed on image and the - * reboot code buffer to allow us to avoid allocations - * later. - */ -int machine_kexec_prepare(struct kimage *image) -{ - if (ppc_md.machine_kexec_prepare) - return ppc_md.machine_kexec_prepare(image); - else - return default_machine_kexec_prepare(image); -} - void machine_kexec_cleanup(struct kimage *image) { } diff --git a/arch/powerpc/kexec/core_32.c b/arch/powerpc/kexec/core_32.c index bf9f1f906d64..b50aed48d09d 100644 --- a/arch/powerpc/kexec/core_32.c +++ b/arch/powerpc/kexec/core_32.c @@ -63,7 +63,7 @@ void default_machine_kexec(struct kimage *image) (*rnk)(page_list, reboot_code_buffer_phys, image->start); } -int default_machine_kexec_prepare(struct kimage *image) +int machine_kexec_prepare(struct kimage *image) { return 0; } diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index 89c069d664a5..66678518b938 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -32,7 +32,7 @@ #include #include -int default_machine_kexec_prepare(struct kimage *image) +int machine_kexec_prepare(struct kimage *image) { int i; unsigned long begin, end; /* limits of segment */ -- cgit v1.2.3 From 5c810ced36ae1a8f1a7403c1635b0dc07c115086 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 1 Sep 2021 08:30:21 +0000 Subject: powerpc/32: Add support for out-of-line static calls Add support for out-of-line static calls on PPC32. This change improve performance of calls to global function pointers by using direct calls instead of indirect calls. The trampoline is initialy populated with a 'blr' or branch to target, followed by an unreachable long jump sequence. In order to cater with parallele execution, the trampoline needs to be updated in a way that ensures it remains consistent at all time. This means we can't use the traditional lis/addi to load r12 with the target address, otherwise there would be a window during which the first instruction contains the upper part of the new target address while the second instruction still contains the lower part of the old target address. To avoid that the target address is stored just after the 'bctr' and loaded from there with a single instruction. Then, depending on the target distance, arch_static_call_transform() will either replace the first instruction by a direct 'bl ' or 'nop' in order to have the trampoline fall through the long jump sequence. For the special case of __static_call_return0(), to avoid the risk of a far branch, a version of it is inlined at the end of the trampoline. Performancewise the long jump sequence is probably not better than the indirect calls set by GCC when we don't use static calls, but such calls are unlikely to be required on powerpc32: With most configurations the kernel size is far below 32 Mbytes so only modules may happen to be too far. And even modules are likely to be close enough as they are allocated below the kernel core and as close as possible of the kernel text. static_call selftest is running successfully with this change. With this patch, __do_irq() has the following sequence to trace irq entries: c0004a00 <__SCT__tp_func_irq_entry>: c0004a00: 48 00 00 e0 b c0004ae0 <__traceiter_irq_entry> c0004a04: 3d 80 c0 00 lis r12,-16384 c0004a08: 81 8c 4a 1c lwz r12,18972(r12) c0004a0c: 7d 89 03 a6 mtctr r12 c0004a10: 4e 80 04 20 bctr c0004a14: 38 60 00 00 li r3,0 c0004a18: 4e 80 00 20 blr c0004a1c: 00 00 00 00 .long 0x0 ... c0005654 <__do_irq>: ... c0005664: 7c 7f 1b 78 mr r31,r3 ... c00056a0: 81 22 00 00 lwz r9,0(r2) c00056a4: 39 29 00 01 addi r9,r9,1 c00056a8: 91 22 00 00 stw r9,0(r2) c00056ac: 3d 20 c0 af lis r9,-16209 c00056b0: 81 29 74 cc lwz r9,29900(r9) c00056b4: 2c 09 00 00 cmpwi r9,0 c00056b8: 41 82 00 10 beq c00056c8 <__do_irq+0x74> c00056bc: 80 69 00 04 lwz r3,4(r9) c00056c0: 7f e4 fb 78 mr r4,r31 c00056c4: 4b ff f3 3d bl c0004a00 <__SCT__tp_func_irq_entry> Before this patch, __do_irq() was doing the following to trace irq entries: c0005700 <__do_irq>: ... c0005710: 7c 7e 1b 78 mr r30,r3 ... c000574c: 93 e1 00 0c stw r31,12(r1) c0005750: 81 22 00 00 lwz r9,0(r2) c0005754: 39 29 00 01 addi r9,r9,1 c0005758: 91 22 00 00 stw r9,0(r2) c000575c: 3d 20 c0 af lis r9,-16209 c0005760: 83 e9 f4 cc lwz r31,-2868(r9) c0005764: 2c 1f 00 00 cmpwi r31,0 c0005768: 41 82 00 24 beq c000578c <__do_irq+0x8c> c000576c: 81 3f 00 00 lwz r9,0(r31) c0005770: 80 7f 00 04 lwz r3,4(r31) c0005774: 7d 29 03 a6 mtctr r9 c0005778: 7f c4 f3 78 mr r4,r30 c000577c: 4e 80 04 21 bctrl c0005780: 85 3f 00 0c lwzu r9,12(r31) c0005784: 2c 09 00 00 cmpwi r9,0 c0005788: 40 82 ff e4 bne c000576c <__do_irq+0x6c> Behind the fact of now using a direct 'bl' instead of a 'load/mtctr/bctr' sequence, we can also see that we get one less register on the stack. Signed-off-by: Christophe Leroy Acked-by: Peter Zijlstra (Intel) Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6ec2a7865ed6a5ec54ab46d026785bafe1d837ea.1630484892.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/static_call.h | 28 +++++++++++++++++++++++++ arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/static_call.c | 37 ++++++++++++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/include/asm/static_call.h create mode 100644 arch/powerpc/kernel/static_call.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index aac3437c8668..5d0586214662 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -243,6 +243,7 @@ config PPC select HAVE_SOFTIRQ_ON_OWN_STACK select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2) select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13) + select HAVE_STATIC_CALL if PPC32 select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select HUGETLB_PAGE_SIZE_VARIABLE if PPC_BOOK3S_64 && HUGETLB_PAGE diff --git a/arch/powerpc/include/asm/static_call.h b/arch/powerpc/include/asm/static_call.h new file mode 100644 index 000000000000..0a0bc79bd1fa --- /dev/null +++ b/arch/powerpc/include/asm/static_call.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_STATIC_CALL_H +#define _ASM_POWERPC_STATIC_CALL_H + +#define __PPC_SCT(name, inst) \ + asm(".pushsection .text, \"ax\" \n" \ + ".align 5 \n" \ + ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \ + STATIC_CALL_TRAMP_STR(name) ": \n" \ + inst " \n" \ + " lis 12,2f@ha \n" \ + " lwz 12,2f@l(12) \n" \ + " mtctr 12 \n" \ + " bctr \n" \ + "1: li 3, 0 \n" \ + " blr \n" \ + "2: .long 0 \n" \ + ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \ + ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \ + ".popsection \n") + +#define PPC_SCT_RET0 20 /* Offset of label 1 */ +#define PPC_SCT_DATA 28 /* Offset of label 2 */ + +#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) __PPC_SCT(name, "b " #func) +#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) __PPC_SCT(name, "blr") + +#endif /* _ASM_POWERPC_STATIC_CALL_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 825121eba3c2..d079292525b6 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -106,7 +106,7 @@ extra-y += vmlinux.lds obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o -obj-$(CONFIG_PPC32) += entry_32.o setup_32.o early_32.o +obj-$(CONFIG_PPC32) += entry_32.o setup_32.o early_32.o static_call.o obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_BOOTX_TEXT) += btext.o diff --git a/arch/powerpc/kernel/static_call.c b/arch/powerpc/kernel/static_call.c new file mode 100644 index 000000000000..863a7aa24650 --- /dev/null +++ b/arch/powerpc/kernel/static_call.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +#include + +void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) +{ + int err; + bool is_ret0 = (func == __static_call_return0); + unsigned long target = (unsigned long)(is_ret0 ? tramp + PPC_SCT_RET0 : func); + bool is_short = is_offset_in_branch_range((long)target - (long)tramp); + + if (!tramp) + return; + + mutex_lock(&text_mutex); + + if (func && !is_short) { + err = patch_instruction(tramp + PPC_SCT_DATA, ppc_inst(target)); + if (err) + goto out; + } + + if (!func) + err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR())); + else if (is_short) + err = patch_branch(tramp, target, 0); + else + err = patch_instruction(tramp, ppc_inst(PPC_RAW_NOP())); +out: + mutex_unlock(&text_mutex); + + if (err) + panic("%s: patching failed %pS at %pS\n", __func__, func, tramp); +} +EXPORT_SYMBOL_GPL(arch_static_call_transform); -- cgit v1.2.3 From cbe654c779616807e1e6823c3bdbfe07a10562b8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 16 Sep 2021 16:52:09 +0200 Subject: powerpc: warn on emulation of dcbz instruction in kernel mode dcbz instruction shouldn't be used on non-cached memory. Using it on non-cached memory can result in alignment exception and implies a heavy handling. Instead of silentely emulating the instruction and resulting in high performance degradation, warn whenever an alignment exception is taken in kernel mode due to dcbz, so that the user is made aware that dcbz instruction has been used unexpectedly by the kernel. Reported-by: Stan Johnson Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2e3acfe63d289c6fba366e16973c9ab8369e8b75.1631803922.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/align.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index bbb4181621dd..bf96b954a4eb 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -349,6 +349,7 @@ int fix_alignment(struct pt_regs *regs) if (op.type != CACHEOP + DCBZ) return -EINVAL; PPC_WARN_ALIGNMENT(dcbz, regs); + WARN_ON_ONCE(!user_mode(regs)); r = emulate_dcbz(op.ea, regs); } else { if (type == LARX || type == STCX) -- cgit v1.2.3 From e28d0b675056d072f1f11fa644d0efbb016bb7ce Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 16 Sep 2021 20:43:36 +0200 Subject: powerpc/lib/sstep: Don't use __{get/put}_user() on kernel addresses In the old days, when we didn't have kernel userspace access protection and had set_fs(), it was wise to use __get_user() and friends to read kernel memory. Nowadays, get_user() and put_user() are granting userspace access and are exclusively for userspace access. Convert single step emulation functions to user_access_begin() and friends and use unsafe_get_user() and unsafe_put_user(). When addressing kernel addresses, there is no need to open userspace access. And for book3s/32 it is particularly important to no try and open userspace access on kernel address, because that would break the content of kernel space segment registers. No guard has been put against that risk in order to avoid degrading performance. copy_from_kernel_nofault() and copy_to_kernel_nofault() should be used but they are out-of-line functions which would degrade performance. Those two functions are making use of __get_kernel_nofault() and __put_kernel_nofault() macros. Those two macros are just wrappers behind __get_user_size_goto() and __put_user_size_goto(). unsafe_get_user() and unsafe_put_user() are also wrappers of __get_user_size_goto() and __put_user_size_goto(). Use them to access kernel space. That allows refactoring userspace and kernelspace access. Reported-by: Stan Johnson Signed-off-by: Christophe Leroy Depends-on: 4fe5cda9f89d ("powerpc/uaccess: Implement user_read_access_begin and user_write_access_begin") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/22831c9d17f948680a12c5292e7627288b15f713.1631817805.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/sstep.c | 197 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 140 insertions(+), 57 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index d8d5f901cee1..86f49e3e7cf5 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -302,33 +302,51 @@ static nokprobe_inline void do_byte_reverse(void *ptr, int nb) } } -static nokprobe_inline int read_mem_aligned(unsigned long *dest, - unsigned long ea, int nb, - struct pt_regs *regs) +static __always_inline int +__read_mem_aligned(unsigned long *dest, unsigned long ea, int nb, struct pt_regs *regs) { - int err = 0; unsigned long x = 0; switch (nb) { case 1: - err = __get_user(x, (unsigned char __user *) ea); + unsafe_get_user(x, (unsigned char __user *)ea, Efault); break; case 2: - err = __get_user(x, (unsigned short __user *) ea); + unsafe_get_user(x, (unsigned short __user *)ea, Efault); break; case 4: - err = __get_user(x, (unsigned int __user *) ea); + unsafe_get_user(x, (unsigned int __user *)ea, Efault); break; #ifdef __powerpc64__ case 8: - err = __get_user(x, (unsigned long __user *) ea); + unsafe_get_user(x, (unsigned long __user *)ea, Efault); break; #endif } - if (!err) - *dest = x; - else + *dest = x; + return 0; + +Efault: + regs->dar = ea; + return -EFAULT; +} + +static nokprobe_inline int +read_mem_aligned(unsigned long *dest, unsigned long ea, int nb, struct pt_regs *regs) +{ + int err; + + if (is_kernel_addr(ea)) + return __read_mem_aligned(dest, ea, nb, regs); + + if (user_read_access_begin((void __user *)ea, nb)) { + err = __read_mem_aligned(dest, ea, nb, regs); + user_read_access_end(); + } else { + err = -EFAULT; regs->dar = ea; + } + return err; } @@ -336,10 +354,8 @@ static nokprobe_inline int read_mem_aligned(unsigned long *dest, * Copy from userspace to a buffer, using the largest possible * aligned accesses, up to sizeof(long). */ -static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb, - struct pt_regs *regs) +static __always_inline int __copy_mem_in(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) { - int err = 0; int c; for (; nb > 0; nb -= c) { @@ -348,31 +364,46 @@ static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb, c = max_align(nb); switch (c) { case 1: - err = __get_user(*dest, (unsigned char __user *) ea); + unsafe_get_user(*dest, (u8 __user *)ea, Efault); break; case 2: - err = __get_user(*(u16 *)dest, - (unsigned short __user *) ea); + unsafe_get_user(*(u16 *)dest, (u16 __user *)ea, Efault); break; case 4: - err = __get_user(*(u32 *)dest, - (unsigned int __user *) ea); + unsafe_get_user(*(u32 *)dest, (u32 __user *)ea, Efault); break; #ifdef __powerpc64__ case 8: - err = __get_user(*(unsigned long *)dest, - (unsigned long __user *) ea); + unsafe_get_user(*(u64 *)dest, (u64 __user *)ea, Efault); break; #endif } - if (err) { - regs->dar = ea; - return err; - } dest += c; ea += c; } return 0; + +Efault: + regs->dar = ea; + return -EFAULT; +} + +static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) +{ + int err; + + if (is_kernel_addr(ea)) + return __copy_mem_in(dest, ea, nb, regs); + + if (user_read_access_begin((void __user *)ea, nb)) { + err = __copy_mem_in(dest, ea, nb, regs); + user_read_access_end(); + } else { + err = -EFAULT; + regs->dar = ea; + } + + return err; } static nokprobe_inline int read_mem_unaligned(unsigned long *dest, @@ -410,30 +441,48 @@ static int read_mem(unsigned long *dest, unsigned long ea, int nb, } NOKPROBE_SYMBOL(read_mem); -static nokprobe_inline int write_mem_aligned(unsigned long val, - unsigned long ea, int nb, - struct pt_regs *regs) +static __always_inline int +__write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *regs) { - int err = 0; - switch (nb) { case 1: - err = __put_user(val, (unsigned char __user *) ea); + unsafe_put_user(val, (unsigned char __user *)ea, Efault); break; case 2: - err = __put_user(val, (unsigned short __user *) ea); + unsafe_put_user(val, (unsigned short __user *)ea, Efault); break; case 4: - err = __put_user(val, (unsigned int __user *) ea); + unsafe_put_user(val, (unsigned int __user *)ea, Efault); break; #ifdef __powerpc64__ case 8: - err = __put_user(val, (unsigned long __user *) ea); + unsafe_put_user(val, (unsigned long __user *)ea, Efault); break; #endif } - if (err) + return 0; + +Efault: + regs->dar = ea; + return -EFAULT; +} + +static nokprobe_inline int +write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *regs) +{ + int err; + + if (is_kernel_addr(ea)) + return __write_mem_aligned(val, ea, nb, regs); + + if (user_write_access_begin((void __user *)ea, nb)) { + err = __write_mem_aligned(val, ea, nb, regs); + user_write_access_end(); + } else { + err = -EFAULT; regs->dar = ea; + } + return err; } @@ -441,10 +490,8 @@ static nokprobe_inline int write_mem_aligned(unsigned long val, * Copy from a buffer to userspace, using the largest possible * aligned accesses, up to sizeof(long). */ -static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb, - struct pt_regs *regs) +static nokprobe_inline int __copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) { - int err = 0; int c; for (; nb > 0; nb -= c) { @@ -453,31 +500,46 @@ static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb, c = max_align(nb); switch (c) { case 1: - err = __put_user(*dest, (unsigned char __user *) ea); + unsafe_put_user(*dest, (u8 __user *)ea, Efault); break; case 2: - err = __put_user(*(u16 *)dest, - (unsigned short __user *) ea); + unsafe_put_user(*(u16 *)dest, (u16 __user *)ea, Efault); break; case 4: - err = __put_user(*(u32 *)dest, - (unsigned int __user *) ea); + unsafe_put_user(*(u32 *)dest, (u32 __user *)ea, Efault); break; #ifdef __powerpc64__ case 8: - err = __put_user(*(unsigned long *)dest, - (unsigned long __user *) ea); + unsafe_put_user(*(u64 *)dest, (u64 __user *)ea, Efault); break; #endif } - if (err) { - regs->dar = ea; - return err; - } dest += c; ea += c; } return 0; + +Efault: + regs->dar = ea; + return -EFAULT; +} + +static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) +{ + int err; + + if (is_kernel_addr(ea)) + return __copy_mem_out(dest, ea, nb, regs); + + if (user_write_access_begin((void __user *)ea, nb)) { + err = __copy_mem_out(dest, ea, nb, regs); + user_write_access_end(); + } else { + err = -EFAULT; + regs->dar = ea; + } + + return err; } static nokprobe_inline int write_mem_unaligned(unsigned long val, @@ -986,10 +1048,24 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op, } #endif /* CONFIG_VSX */ +static int __emulate_dcbz(unsigned long ea) +{ + unsigned long i; + unsigned long size = l1_dcache_bytes(); + + for (i = 0; i < size; i += sizeof(long)) + unsafe_put_user(0, (unsigned long __user *)(ea + i), Efault); + + return 0; + +Efault: + return -EFAULT; +} + int emulate_dcbz(unsigned long ea, struct pt_regs *regs) { int err; - unsigned long i, size; + unsigned long size; #ifdef __powerpc64__ size = ppc64_caches.l1d.block_size; @@ -1001,14 +1077,21 @@ int emulate_dcbz(unsigned long ea, struct pt_regs *regs) ea &= ~(size - 1); if (!address_ok(regs, ea, size)) return -EFAULT; - for (i = 0; i < size; i += sizeof(long)) { - err = __put_user(0, (unsigned long __user *) (ea + i)); - if (err) { - regs->dar = ea; - return err; - } + + if (is_kernel_addr(ea)) { + err = __emulate_dcbz(ea); + } else if (user_write_access_begin((void __user *)ea, size)) { + err = __emulate_dcbz(ea); + user_write_access_end(); + } else { + err = -EFAULT; } - return 0; + + if (err) + regs->dar = ea; + + + return err; } NOKPROBE_SYMBOL(emulate_dcbz); -- cgit v1.2.3 From 63f501e07a8557bc8ab79d17ef76ae21df1e395d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 17 Sep 2021 15:57:12 +0200 Subject: powerpc/8xx: Simplify TLB handling In the old days, TLB handling for 8xx was using tlbie and tlbia instructions directly as much as possible. But commit f048aace29e0 ("powerpc/mm: Add SMP support to no-hash TLB handling") broke that by introducing out-of-line unnecessary complex functions for booke/smp which don't have tlbie/tlbia instructions and require more complex handling. Restore direct use of tlbie and tlbia for 8xx which is never SMP. With this patch we now get c00ecc68 : c00ecc68: 39 00 00 00 li r8,0 c00ecc6c: 81 46 00 00 lwz r10,0(r6) c00ecc70: 91 06 00 00 stw r8,0(r6) c00ecc74: 7c 00 2a 64 tlbie r5,r0 c00ecc78: 7c 00 04 ac hwsync c00ecc7c: 91 43 00 00 stw r10,0(r3) c00ecc80: 4e 80 00 20 blr Before it was c0012880 : c0012880: 2c 03 00 00 cmpwi r3,0 c0012884: 41 82 00 54 beq c00128d8 c0012888: 81 22 00 00 lwz r9,0(r2) c001288c: 81 43 00 20 lwz r10,32(r3) c0012890: 39 29 00 01 addi r9,r9,1 c0012894: 91 22 00 00 stw r9,0(r2) c0012898: 2c 0a 00 00 cmpwi r10,0 c001289c: 41 82 00 10 beq c00128ac c00128a0: 81 2a 01 dc lwz r9,476(r10) c00128a4: 2c 09 ff ff cmpwi r9,-1 c00128a8: 41 82 00 0c beq c00128b4 c00128ac: 7c 00 22 64 tlbie r4,r0 c00128b0: 7c 00 04 ac hwsync c00128b4: 81 22 00 00 lwz r9,0(r2) c00128b8: 39 29 ff ff addi r9,r9,-1 c00128bc: 2c 09 00 00 cmpwi r9,0 c00128c0: 91 22 00 00 stw r9,0(r2) c00128c4: 4c a2 00 20 bclr+ 4,eq c00128c8: 81 22 00 70 lwz r9,112(r2) c00128cc: 71 29 00 04 andi. r9,r9,4 c00128d0: 4d 82 00 20 beqlr c00128d4: 48 65 76 74 b c0669f48 c00128d8: 81 22 00 00 lwz r9,0(r2) c00128dc: 39 29 00 01 addi r9,r9,1 c00128e0: 91 22 00 00 stw r9,0(r2) c00128e4: 4b ff ff c8 b c00128ac ... c00ecdc8 : c00ecdc8: 94 21 ff f0 stwu r1,-16(r1) c00ecdcc: 39 20 00 00 li r9,0 c00ecdd0: 93 c1 00 08 stw r30,8(r1) c00ecdd4: 83 c6 00 00 lwz r30,0(r6) c00ecdd8: 91 26 00 00 stw r9,0(r6) c00ecddc: 93 e1 00 0c stw r31,12(r1) c00ecde0: 7c 08 02 a6 mflr r0 c00ecde4: 7c 7f 1b 78 mr r31,r3 c00ecde8: 7c 83 23 78 mr r3,r4 c00ecdec: 7c a4 2b 78 mr r4,r5 c00ecdf0: 90 01 00 14 stw r0,20(r1) c00ecdf4: 4b f2 5a 8d bl c0012880 c00ecdf8: 93 df 00 00 stw r30,0(r31) c00ecdfc: 7f e3 fb 78 mr r3,r31 c00ece00: 80 01 00 14 lwz r0,20(r1) c00ece04: 83 c1 00 08 lwz r30,8(r1) c00ece08: 83 e1 00 0c lwz r31,12(r1) c00ece0c: 7c 08 03 a6 mtlr r0 c00ece10: 38 21 00 10 addi r1,r1,16 c00ece14: 4e 80 00 20 blr Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/fb324f1c8f2ddb57cf6aad1cea26329558f1c1c0.1631887021.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/tlbflush.h | 15 +++++++++++++++ arch/powerpc/mm/nohash/tlb.c | 2 ++ 2 files changed, 17 insertions(+) diff --git a/arch/powerpc/include/asm/nohash/tlbflush.h b/arch/powerpc/include/asm/nohash/tlbflush.h index 1edb7243e515..c08d25e3e626 100644 --- a/arch/powerpc/include/asm/nohash/tlbflush.h +++ b/arch/powerpc/include/asm/nohash/tlbflush.h @@ -32,11 +32,26 @@ extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); +#ifdef CONFIG_PPC_8xx +static inline void local_flush_tlb_mm(struct mm_struct *mm) +{ + unsigned int pid = READ_ONCE(mm->context.id); + + if (pid != MMU_NO_CONTEXT) + asm volatile ("sync; tlbia; isync" : : : "memory"); +} + +static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +{ + asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory"); +} +#else extern void local_flush_tlb_mm(struct mm_struct *mm); extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, int tsize, int ind); +#endif #ifdef CONFIG_SMP extern void flush_tlb_mm(struct mm_struct *mm); diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index fc195b9f524b..89353d4f5604 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -185,6 +185,7 @@ EXPORT_PER_CPU_SYMBOL(next_tlbcam_idx); * processor */ +#ifndef CONFIG_PPC_8xx /* * These are the base non-SMP variants of page and mm flushing */ @@ -218,6 +219,7 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) mmu_get_tsize(mmu_virtual_psize), 0); } EXPORT_SYMBOL(local_flush_tlb_page); +#endif /* * And here are the SMP non-local implementations -- cgit v1.2.3 From fdacae8a84024474afff234bdd1dbe19ad597a10 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 24 Sep 2021 19:13:53 +0200 Subject: powerpc: Activate CONFIG_STRICT_KERNEL_RWX by default CONFIG_STRICT_KERNEL_RWX should be set by default on every architectures (See https://github.com/KSPP/linux/issues/4) On PPC32 we have to find a compromise between performance and/or memory wasting and selection of strict_kernel_rwx, because it implies either smaller memory chunks or larger alignment between RO memory and RW memory. For instance the 8xx maps memory with 8M pages. So either the limit between RO and RW must be 8M aligned or it falls back or 512k pages which implies more pressure on the TLB. book3s/32 maps memory with BATs as much as possible. BATS can have any power-of-two size between 128k and 256M but we have only 4 to 8 BATs so the alignment must be good enough to allow efficient use of the BATs and avoid falling back on standard page mapping which would kill performance. So let's go one step forward and make it the default but still allow users to unset it when wanted. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/057c40164084bfc7d77c0b2ff78d95dbf6a2a21b.1632503622.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 5d0586214662..e43e17987b92 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -149,6 +149,7 @@ config PPC select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x -- cgit v1.2.3 From a61ec782a754229b24aae2d6c2109510d6420ae6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 22 Sep 2021 15:37:18 +0200 Subject: powerpc/breakpoint: Cleanup cache_op_size() does exactly the same as l1_dcache_bytes(). Remove it. MSR_64BIT already exists, no need to enclode the check around #ifdef __powerpc64__ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6184b08088312a7d787d450eb902584e4ae77f7a.1632317816.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/hw_breakpoint_constraints.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c index 675d1f66ab72..42b967e3d85c 100644 --- a/arch/powerpc/kernel/hw_breakpoint_constraints.c +++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c @@ -127,15 +127,6 @@ bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr, return false; } -static int cache_op_size(void) -{ -#ifdef __powerpc64__ - return ppc64_caches.l1d.block_size; -#else - return L1_CACHE_BYTES; -#endif -} - void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, int *type, int *size, unsigned long *ea) { @@ -147,14 +138,14 @@ void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, analyse_instr(&op, regs, *instr); *type = GETTYPE(op.type); *ea = op.ea; -#ifdef __powerpc64__ + if (!(regs->msr & MSR_64BIT)) *ea &= 0xffffffffUL; -#endif + *size = GETSIZE(op.type); if (*type == CACHEOP) { - *size = cache_op_size(); + *size = l1_dcache_bytes(); *ea &= ~(*size - 1); } else if (*type == LOAD_VMX || *type == STORE_VMX) { *ea &= ~(*size - 1); -- cgit v1.2.3 From c7d19189d7241e135cd2e450a7fbc77cb7bd40ee Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 17 Sep 2021 15:57:31 +0200 Subject: powerpc/32: Don't use a struct based type for pte_t Long time ago we had a config item called STRICT_MM_TYPECHECKS to build the kernel with pte_t defined as a structure in order to perform additional build checks or build it with pte_t defined as a simple type in order to get simpler generated code. Commit 670eea924198 ("powerpc/mm: Always use STRICT_MM_TYPECHECKS") made the struct based definition the only one, considering that the generated code was similar in both cases. That's right on ppc64 because the ABI is such that the content of a struct having a single simple type element is passed as register, but on ppc32 such a structure is passed via the stack like any structure. Simple test function: pte_t test(pte_t pte) { return pte; } Before this patch we get c00108ec : c00108ec: 81 24 00 00 lwz r9,0(r4) c00108f0: 91 23 00 00 stw r9,0(r3) c00108f4: 4e 80 00 20 blr So, for PPC32, restore the simple type behaviour we got before commit 670eea924198, but instead of adding a config option to activate type check, do it when __CHECKER__ is set so that type checking is performed by 'sparse' and provides feedback like: arch/powerpc/mm/pgtable.c:466:16: warning: incorrect type in return expression (different base types) arch/powerpc/mm/pgtable.c:466:16: expected unsigned long arch/powerpc/mm/pgtable.c:466:16: got struct pte_t [usertype] x With this patch we now get c0010890 : c0010890: 4e 80 00 20 blr Signed-off-by: Christophe Leroy [mpe: Define STRICT_MM_TYPECHECKS rather than repeating the condition] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c904599f33aaf6bb7ee2836a9ff8368509e0d78d.1631887042.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 2 +- arch/powerpc/include/asm/pgtable-types.h | 18 +++++++++++++++++- arch/powerpc/mm/pgtable.c | 2 +- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index f06ae00f2a65..34ce50da1850 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -245,7 +245,7 @@ static int number_of_cells_per_pte(pmd_t *pmd, pte_basic_t val, int huge) static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, unsigned long clr, unsigned long set, int huge) { - pte_basic_t *entry = &p->pte; + pte_basic_t *entry = (pte_basic_t *)p; pte_basic_t old = pte_val(*p); pte_basic_t new = (old & ~(pte_basic_t)clr) | set; int num, i; diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h index d11b4c61d686..efed0db7b1db 100644 --- a/arch/powerpc/include/asm/pgtable-types.h +++ b/arch/powerpc/include/asm/pgtable-types.h @@ -2,17 +2,33 @@ #ifndef _ASM_POWERPC_PGTABLE_TYPES_H #define _ASM_POWERPC_PGTABLE_TYPES_H +#if defined(__CHECKER__) || !defined(CONFIG_PPC32) +#define STRICT_MM_TYPECHECKS +#endif + /* PTE level */ #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES) typedef struct { pte_basic_t pte, pte1, pte2, pte3; } pte_t; -#else +#elif defined(STRICT_MM_TYPECHECKS) typedef struct { pte_basic_t pte; } pte_t; +#else +typedef pte_basic_t pte_t; #endif + +#if defined(STRICT_MM_TYPECHECKS) || \ + (defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES)) #define __pte(x) ((pte_t) { (x) }) static inline pte_basic_t pte_val(pte_t x) { return x.pte; } +#else +#define __pte(x) ((pte_t)(x)) +static inline pte_basic_t pte_val(pte_t x) +{ + return x; +} +#endif /* PMD level */ #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index cd16b407f47e..ce9482383144 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -271,7 +271,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_ { pmd_t *pmd = pmd_off(mm, addr); pte_basic_t val; - pte_basic_t *entry = &ptep->pte; + pte_basic_t *entry = (pte_basic_t *)ptep; int num, i; /* -- cgit v1.2.3 From 319fa1a52e438a6e028329187783a25ad498c4e6 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Wed, 20 Oct 2021 14:47:03 -0500 Subject: powerpc/pseries/mobility: ignore ibm, platform-facilities updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On VMs with NX encryption, compression, and/or RNG offload, these capabilities are described by nodes in the ibm,platform-facilities device tree hierarchy: $ tree -d /sys/firmware/devicetree/base/ibm,platform-facilities/ /sys/firmware/devicetree/base/ibm,platform-facilities/ ├── ibm,compression-v1 ├── ibm,random-v1 └── ibm,sym-encryption-v1 3 directories The acceleration functions that these nodes describe are not disrupted by live migration, not even temporarily. But the post-migration ibm,update-nodes sequence firmware always sends "delete" messages for this hierarchy, followed by an "add" directive to reconstruct it via ibm,configure-connector (log with debugging statements enabled in mobility.c): mobility: removing node /ibm,platform-facilities/ibm,random-v1:4294967285 mobility: removing node /ibm,platform-facilities/ibm,compression-v1:4294967284 mobility: removing node /ibm,platform-facilities/ibm,sym-encryption-v1:4294967283 mobility: removing node /ibm,platform-facilities:4294967286 ... mobility: added node /ibm,platform-facilities:4294967286 Note we receive a single "add" message for the entire hierarchy, and what we receive from the ibm,configure-connector sequence is the top-level platform-facilities node along with its three children. The debug message simply reports the parent node and not the whole subtree. Also, significantly, the nodes added are almost completely equivalent to the ones removed; even phandles are unchanged. ibm,shared-interrupt-pool in the leaf nodes is the only property I've observed to differ, and Linux does not use that. So in practice, the sum of update messages Linux receives for this hierarchy is equivalent to minor property updates. We succeed in removing the original hierarchy from the device tree. But the vio bus code is ignorant of this, and does not unbind or relinquish its references. The leaf nodes, still reachable through sysfs, of course still refer to the now-freed ibm,platform-facilities parent node, which makes use-after-free possible: refcount_t: addition on 0; use-after-free. WARNING: CPU: 3 PID: 1706 at lib/refcount.c:25 refcount_warn_saturate+0x164/0x1f0 refcount_warn_saturate+0x160/0x1f0 (unreliable) kobject_get+0xf0/0x100 of_node_get+0x30/0x50 of_get_parent+0x50/0xb0 of_fwnode_get_parent+0x54/0x90 fwnode_count_parents+0x50/0x150 fwnode_full_name_string+0x30/0x110 device_node_string+0x49c/0x790 vsnprintf+0x1c0/0x4c0 sprintf+0x44/0x60 devspec_show+0x34/0x50 dev_attr_show+0x40/0xa0 sysfs_kf_seq_show+0xbc/0x200 kernfs_seq_show+0x44/0x60 seq_read_iter+0x2a4/0x740 kernfs_fop_read_iter+0x254/0x2e0 new_sync_read+0x120/0x190 vfs_read+0x1d0/0x240 Moreover, the "new" replacement subtree is not correctly added to the device tree, resulting in ibm,platform-facilities parent node without the appropriate leaf nodes, and broken symlinks in the sysfs device hierarchy: $ tree -d /sys/firmware/devicetree/base/ibm,platform-facilities/ /sys/firmware/devicetree/base/ibm,platform-facilities/ 0 directories $ cd /sys/devices/vio ; find . -xtype l -exec file {} + ./ibm,sym-encryption-v1/of_node: broken symbolic link to ../../../firmware/devicetree/base/ibm,platform-facilities/ibm,sym-encryption-v1 ./ibm,random-v1/of_node: broken symbolic link to ../../../firmware/devicetree/base/ibm,platform-facilities/ibm,random-v1 ./ibm,compression-v1/of_node: broken symbolic link to ../../../firmware/devicetree/base/ibm,platform-facilities/ibm,compression-v1 This is because add_dt_node() -> dlpar_attach_node() attaches only the parent node returned from configure-connector, ignoring any children. This should be corrected for the general case, but fixing that won't help with the stale OF node references, which is the more urgent problem. One way to address that would be to make the drivers respond to node removal notifications, so that node references can be dropped appropriately. But this would likely force the drivers to disrupt active clients for no useful purpose: equivalent nodes are immediately re-added. And recall that the acceleration capabilities described by the nodes remain available throughout the whole process. The solution I believe to be robust for this situation is to convert remove+add of a node with an unchanged phandle to an update of the node's properties in the Linux device tree structure. That would involve changing and adding a fair amount of code, and may take several iterations to land. Until that can be realized we have a confirmed use-after-free and the possibility of memory corruption. So add a limited workaround that discriminates on the node type, ignoring adds and removes. This should be amenable to backporting in the meantime. Fixes: 410bccf97881 ("powerpc/pseries: Partition migration in the kernel") Cc: stable@vger.kernel.org Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211020194703.2613093-1-nathanl@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 34 +++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index e83e0891272d..210a37a065fb 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -63,6 +63,27 @@ static int mobility_rtas_call(int token, char *buf, s32 scope) static int delete_dt_node(struct device_node *dn) { + struct device_node *pdn; + bool is_platfac; + + pdn = of_get_parent(dn); + is_platfac = of_node_is_type(dn, "ibm,platform-facilities") || + of_node_is_type(pdn, "ibm,platform-facilities"); + of_node_put(pdn); + + /* + * The drivers that bind to nodes in the platform-facilities + * hierarchy don't support node removal, and the removal directive + * from firmware is always followed by an add of an equivalent + * node. The capability (e.g. RNG, encryption, compression) + * represented by the node is never interrupted by the migration. + * So ignore changes to this part of the tree. + */ + if (is_platfac) { + pr_notice("ignoring remove operation for %pOFfp\n", dn); + return 0; + } + pr_debug("removing node %pOFfp\n", dn); dlpar_detach_node(dn); return 0; @@ -222,6 +243,19 @@ static int add_dt_node(struct device_node *parent_dn, __be32 drc_index) if (!dn) return -ENOENT; + /* + * Since delete_dt_node() ignores this node type, this is the + * necessary counterpart. We also know that a platform-facilities + * node returned from dlpar_configure_connector() has children + * attached, and dlpar_attach_node() only adds the parent, leaking + * the children. So ignore these on the add side for now. + */ + if (of_node_is_type(dn, "ibm,platform-facilities")) { + pr_notice("ignoring add operation for %pOF\n", dn); + dlpar_free_cc_nodes(dn); + return 0; + } + rc = dlpar_attach_node(dn, parent_dn); if (rc) dlpar_free_cc_nodes(dn); -- cgit v1.2.3 From b7472e1764bfc0fe3d6578cb281e81c812ca5886 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 27 Oct 2021 22:29:31 +1100 Subject: Revert "powerpc/audit: Convert powerpc to AUDIT_ARCH_COMPAT_GENERIC" This reverts commit 566af8cda399c088763d07464463dc871c943b54. This caused some conflicts vs the audit tree, and the audit maintainers would prefer we postpone this to the next merge window so we have more time for testing. Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 5 ++- arch/powerpc/include/asm/unistd32.h | 7 ---- arch/powerpc/kernel/Makefile | 3 ++ arch/powerpc/kernel/audit.c | 84 +++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/compat_audit.c | 44 +++++++++++++++++++ 5 files changed, 135 insertions(+), 8 deletions(-) delete mode 100644 arch/powerpc/include/asm/unistd32.h create mode 100644 arch/powerpc/kernel/audit.c create mode 100644 arch/powerpc/kernel/compat_audit.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e43e17987b92..4267d27bef52 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -162,7 +162,6 @@ config PPC select ARCH_WANT_IRQS_OFF_ACTIVATE_MM select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WEAK_RELEASE_ACQUIRE - select AUDIT_ARCH_COMPAT_GENERIC select BINFMT_ELF select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS @@ -317,6 +316,10 @@ config GENERIC_TBSYNC bool default y if PPC32 && SMP +config AUDIT_ARCH + bool + default y + config GENERIC_BUG bool default y diff --git a/arch/powerpc/include/asm/unistd32.h b/arch/powerpc/include/asm/unistd32.h deleted file mode 100644 index 07689897d206..000000000000 --- a/arch/powerpc/include/asm/unistd32.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _ASM_POWERPC_UNISTD32_H_ -#define _ASM_POWERPC_UNISTD32_H_ - -#include - -#endif /* _ASM_POWERPC_UNISTD32_H_ */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index d079292525b6..0e3640e14eb1 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -125,6 +125,9 @@ obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \ pci-common.o pci_of_scan.o obj-$(CONFIG_PCI_MSI) += msi.o +obj-$(CONFIG_AUDIT) += audit.o +obj64-$(CONFIG_AUDIT) += compat_audit.o + obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o obj-y += trace/ diff --git a/arch/powerpc/kernel/audit.c b/arch/powerpc/kernel/audit.c new file mode 100644 index 000000000000..a2dddd7f3d09 --- /dev/null +++ b/arch/powerpc/kernel/audit.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +static unsigned dir_class[] = { +#include +~0U +}; + +static unsigned read_class[] = { +#include +~0U +}; + +static unsigned write_class[] = { +#include +~0U +}; + +static unsigned chattr_class[] = { +#include +~0U +}; + +static unsigned signal_class[] = { +#include +~0U +}; + +int audit_classify_arch(int arch) +{ +#ifdef CONFIG_PPC64 + if (arch == AUDIT_ARCH_PPC) + return 1; +#endif + return 0; +} + +int audit_classify_syscall(int abi, unsigned syscall) +{ +#ifdef CONFIG_PPC64 + extern int ppc32_classify_syscall(unsigned); + if (abi == AUDIT_ARCH_PPC) + return ppc32_classify_syscall(syscall); +#endif + switch(syscall) { + case __NR_open: + return 2; + case __NR_openat: + return 3; + case __NR_socketcall: + return 4; + case __NR_execve: + return 5; + default: + return 0; + } +} + +static int __init audit_classes_init(void) +{ +#ifdef CONFIG_PPC64 + extern __u32 ppc32_dir_class[]; + extern __u32 ppc32_write_class[]; + extern __u32 ppc32_read_class[]; + extern __u32 ppc32_chattr_class[]; + extern __u32 ppc32_signal_class[]; + audit_register_class(AUDIT_CLASS_WRITE_32, ppc32_write_class); + audit_register_class(AUDIT_CLASS_READ_32, ppc32_read_class); + audit_register_class(AUDIT_CLASS_DIR_WRITE_32, ppc32_dir_class); + audit_register_class(AUDIT_CLASS_CHATTR_32, ppc32_chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL_32, ppc32_signal_class); +#endif + audit_register_class(AUDIT_CLASS_WRITE, write_class); + audit_register_class(AUDIT_CLASS_READ, read_class); + audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); + audit_register_class(AUDIT_CLASS_CHATTR, chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL, signal_class); + return 0; +} + +__initcall(audit_classes_init); diff --git a/arch/powerpc/kernel/compat_audit.c b/arch/powerpc/kernel/compat_audit.c new file mode 100644 index 000000000000..55c6ccda0a85 --- /dev/null +++ b/arch/powerpc/kernel/compat_audit.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +#undef __powerpc64__ +#include + +unsigned ppc32_dir_class[] = { +#include +~0U +}; + +unsigned ppc32_chattr_class[] = { +#include +~0U +}; + +unsigned ppc32_write_class[] = { +#include +~0U +}; + +unsigned ppc32_read_class[] = { +#include +~0U +}; + +unsigned ppc32_signal_class[] = { +#include +~0U +}; + +int ppc32_classify_syscall(unsigned syscall) +{ + switch(syscall) { + case __NR_open: + return 2; + case __NR_openat: + return 3; + case __NR_socketcall: + return 4; + case __NR_execve: + return 5; + default: + return 1; + } +} -- cgit v1.2.3 From f22969a6604165df658b99221b0aeb6918a9026d Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Fri, 15 Oct 2021 10:46:49 +1030 Subject: powerpc/64s: Default to 64K pages for 64 bit book3s For 64-bit book3s the default should be 64K as that's what modern CPUs are designed for. The following defconfigs already set CONFIG_PPC_64K_PAGES: cell_defconfig pasemi_defconfig powernv_defconfig ppc64_defconfig pseries_defconfig skiroot_defconfig The have the option removed from the defconfig, as it is now the default. The defconfigs that now need to set CONFIG_PPC_4K_PAGES to maintain their existing behaviour are: g5_defconfig maple_defconfig microwatt_defconfig ps3_defconfig Signed-off-by: Joel Stanley Signed-off-by: Michael Ellerman BugLink: https://github.com/linuxppc/issues/issues/109 Link: https://lore.kernel.org/r/20211015001649.45591-1-joel@jms.id.au --- arch/powerpc/Kconfig | 1 + arch/powerpc/configs/cell_defconfig | 1 - arch/powerpc/configs/g5_defconfig | 1 + arch/powerpc/configs/maple_defconfig | 1 + arch/powerpc/configs/microwatt_defconfig | 1 + arch/powerpc/configs/pasemi_defconfig | 1 - arch/powerpc/configs/powernv_defconfig | 1 - arch/powerpc/configs/ppc64_defconfig | 1 - arch/powerpc/configs/ps3_defconfig | 1 + arch/powerpc/configs/pseries_defconfig | 1 - arch/powerpc/configs/skiroot_defconfig | 1 - 11 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 4267d27bef52..dea74d7717c0 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -710,6 +710,7 @@ config ARCH_MEMORY_PROBE choice prompt "Page size" + default PPC_64K_PAGES if PPC_BOOK3S_64 default PPC_4K_PAGES help Select the kernel logical page size. Increasing the page size diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index cc2c0d51f493..7fd9e596ea33 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -36,7 +36,6 @@ CONFIG_GEN_RTC=y CONFIG_BINFMT_MISC=m CONFIG_IRQ_ALL_CPUS=y CONFIG_NUMA=y -CONFIG_PPC_64K_PAGES=y CONFIG_SCHED_SMT=y CONFIG_PCIEPORTBUS=y CONFIG_NET=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 63d611cc160f..9d6212a8b195 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -26,6 +26,7 @@ CONFIG_CPU_FREQ_PMAC64=y CONFIG_GEN_RTC=y CONFIG_KEXEC=y CONFIG_IRQ_ALL_CPUS=y +CONFIG_PPC_4K_PAGES=y CONFIG_PCI_MSI=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index 9424c1e67e1c..c821a97f4a89 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -25,6 +25,7 @@ CONFIG_UDBG_RTAS_CONSOLE=y CONFIG_GEN_RTC=y CONFIG_KEXEC=y CONFIG_IRQ_ALL_CPUS=y +CONFIG_PPC_4K_PAGES=y CONFIG_PCI_MSI=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig index 9465209b8c5b..07d87a4044b2 100644 --- a/arch/powerpc/configs/microwatt_defconfig +++ b/arch/powerpc/configs/microwatt_defconfig @@ -26,6 +26,7 @@ CONFIG_PPC_MICROWATT=y # CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set CONFIG_CPU_FREQ=y CONFIG_HZ_100=y +CONFIG_PPC_4K_PAGES=y # CONFIG_PPC_MEM_KEYS is not set # CONFIG_SECCOMP is not set # CONFIG_MQ_IOSCHED_KYBER is not set diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 78606b7e42df..e00a703581c3 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -22,7 +22,6 @@ CONFIG_CPU_FREQ_GOV_POWERSAVE=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y CONFIG_HZ_1000=y -CONFIG_PPC_64K_PAGES=y # CONFIG_SECCOMP is not set CONFIG_PCI_MSI=y CONFIG_PCCARD=y diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 8bfeea6c7de7..49f49c263935 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -62,7 +62,6 @@ CONFIG_MEMORY_FAILURE=y CONFIG_HWPOISON_INJECT=m CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y -CONFIG_PPC_64K_PAGES=y CONFIG_SCHED_SMT=y CONFIG_PM=y CONFIG_HOTPLUG_PCI=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 0ad2291337a7..203d0b7f0bb8 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -52,7 +52,6 @@ CONFIG_KEXEC_FILE=y CONFIG_CRASH_DUMP=y CONFIG_FA_DUMP=y CONFIG_IRQ_ALL_CPUS=y -CONFIG_PPC_64K_PAGES=y CONFIG_SCHED_SMT=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_RPA=m diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index f300dcb937cc..7c95fab4b920 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -30,6 +30,7 @@ CONFIG_PS3_LPM=m # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=y CONFIG_KEXEC=y +CONFIG_PPC_4K_PAGES=y # CONFIG_SPARSEMEM_VMEMMAP is not set # CONFIG_COMPACTION is not set CONFIG_SCHED_SMT=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index b183629f1bcf..de7641adb899 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -56,7 +56,6 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_PPC_64K_PAGES=y CONFIG_SCHED_SMT=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_RPA=m diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index b806a5d3a695..396d508730a1 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -43,7 +43,6 @@ CONFIG_KEXEC_FILE=y CONFIG_PRESERVE_FA_DUMP=y CONFIG_IRQ_ALL_CPUS=y CONFIG_NUMA=y -CONFIG_PPC_64K_PAGES=y CONFIG_SCHED_SMT=y CONFIG_CMDLINE="console=tty0 console=hvc0 ipr.fast_reboot=1 quiet" # CONFIG_SECCOMP is not set -- cgit v1.2.3 From b949d009dd52ecdced248889cf11297677f9e8a6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 26 Oct 2021 15:48:29 +0200 Subject: powerpc/boot: Set LC_ALL=C in wrapper script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While trying to build a simple Image for ACADIA platform, I got the following error: WRAP arch/powerpc/boot/simpleImage.acadia INFO: Uncompressed kernel (size 0x6ae7d0) overlaps the address of the wrapper(0x400000) INFO: Fixing the link_address of wrapper to (0x700000) powerpc64-linux-gnu-ld : mode d'émulation non reconnu : -T Émulations prises en charge : elf64ppc elf32ppc elf32ppclinux elf32ppcsim elf64lppc elf32lppc elf32lppclinux elf32lppcsim make[1]: *** [arch/powerpc/boot/Makefile:424 : arch/powerpc/boot/simpleImage.acadia] Erreur 1 make: *** [arch/powerpc/Makefile:285 : simpleImage.acadia] Erreur 2 Trying again with V=1 shows the following command powerpc64-linux-gnu-ld -m -T arch/powerpc/boot/zImage.lds -Ttext 0x700000 --no-dynamic-linker -o arch/powerpc/boot/simpleImage.acadia -Map wrapper.map arch/powerpc/boot/fixed-head.o arch/powerpc/boot/simpleboot.o ./zImage.3278022.o arch/powerpc/boot/wrapper.a The argument of '-m' is missing. This is due to the wrapper script calling 'objdump -p vmlinux' and looking for 'file format', whereas the output of objdump is: vmlinux: format de fichier elf32-powerpc En-tête de programme: LOAD off 0x00010000 vaddr 0xc0000000 paddr 0x00000000 align 2**16 filesz 0x0069e1d4 memsz 0x006c128c flags rwx NOTE off 0x0064591c vaddr 0xc063591c paddr 0x0063591c align 2**2 filesz 0x00000054 memsz 0x00000054 flags --- Add LC_ALL=C at the beginning of the wrapper script in order to get the output expected by the script: vmlinux: file format elf32-powerpc Program Header: LOAD off 0x00010000 vaddr 0xc0000000 paddr 0x00000000 align 2**16 filesz 0x0069e1d4 memsz 0x006c128c flags rwx NOTE off 0x0064591c vaddr 0xc063591c paddr 0x0063591c align 2**2 filesz 0x00000054 memsz 0x00000054 flags --- Signed-off-by: Christophe Leroy Acked-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a9ff3bc98035f63b122c051f02dc47c7aed10430.1635256089.git.christophe.leroy@csgroup.eu --- arch/powerpc/boot/wrapper | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 1cd82564c996..9184eda780fd 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -26,6 +26,8 @@ # Stop execution if any command fails set -e +export LC_ALL=C + # Allow for verbose output if [ "$V" = 1 ]; then set -x -- cgit v1.2.3 From 4a5cb51f3db4be547225a4bce7a43d41b231382b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 26 Oct 2021 22:25:31 +1000 Subject: powerpc/64s/interrupt: Fix check_return_regs_valid() false positive The check_return_regs_valid() can cause a false positive if the return regs are marked as norestart and they are an HSRR type interrupt, because the low bit in the bottom of regs->trap causes interrupt type matching to fail. This can occcur for example on bare metal with a HV privileged doorbell interrupt that causes a signal, but do_signal returns early because get_signal() fails, and takes the "No signal to deliver" path. In this case no signal was delivered so the return location is not changed so return SRRs are not invalidated, yet set_trap_norestart is called, which messes up the match. Building go-1.16.6 is known to reproduce this. Fix it by using the TRAP() accessor which masks out the low bit. Fixes: 6eaaf9de3599 ("powerpc/64s/interrupt: Check and fix srr_valid without crashing") Cc: stable@vger.kernel.org # v5.14+ Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211026122531.3599918-1-npiggin@gmail.com --- arch/powerpc/kernel/interrupt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index de10a2697258..835b626cd476 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -266,7 +266,7 @@ static void check_return_regs_valid(struct pt_regs *regs) if (trap_is_scv(regs)) return; - trap = regs->trap; + trap = TRAP(regs); // EE in HV mode sets HSRRs like 0xea0 if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL) trap = 0xea0; -- cgit v1.2.3 From cb662608e546d755e3e1b51b30a269459323bf24 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Mon, 25 Oct 2021 20:24:36 +1000 Subject: selftests/powerpc: Use date instead of EPOCHSECONDS in mitigation-patching.sh The EPOCHSECONDS environment variable was added in bash 5.0 (released 2019). Some distributions of the "stable" and "long-term" variety ship older versions of bash than this, so swap to using the date command instead. "%s" was added to coreutils `date` in 1993 so we should be good, but who knows, it is a GNU extension and not part of the POSIX spec for `date`. Signed-off-by: Russell Currey Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211025102436.19177-1-ruscur@russell.cc --- tools/testing/selftests/powerpc/security/mitigation-patching.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh index 00197acb7ff1..b0b20e0b4e30 100755 --- a/tools/testing/selftests/powerpc/security/mitigation-patching.sh +++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh @@ -13,7 +13,7 @@ function do_one orig=$(cat "$mitigation") - start=$EPOCHSECONDS + start=$(date +%s) now=$start while [[ $((now-start)) -lt "$TIMEOUT" ]] @@ -21,7 +21,7 @@ function do_one echo 0 > "$mitigation" echo 1 > "$mitigation" - now=$EPOCHSECONDS + now=$(date +%s) done echo "$orig" > "$mitigation" -- cgit v1.2.3 From 44a8214de96bafb5210e43bfa2c97c19bf75af3d Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Mon, 25 Oct 2021 11:26:49 +0530 Subject: powerpc/bpf: Fix write protecting JIT code Running program with bpf-to-bpf function calls results in data access exception (0x300) with the below call trace: bpf_int_jit_compile+0x238/0x750 (unreliable) bpf_check+0x2008/0x2710 bpf_prog_load+0xb00/0x13a0 __sys_bpf+0x6f4/0x27c0 sys_bpf+0x2c/0x40 system_call_exception+0x164/0x330 system_call_vectored_common+0xe8/0x278 as bpf_int_jit_compile() tries writing to write protected JIT code location during the extra pass. Fix it by holding off write protection of JIT code until the extra pass, where branch target addresses fixup happens. Fixes: 62e3d4210ac9 ("powerpc/bpf: Write protect JIT code") Cc: stable@vger.kernel.org # v5.14+ Signed-off-by: Hari Bathini Reviewed-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211025055649.114728-1-hbathini@linux.ibm.com --- arch/powerpc/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 53aefee3fe70..0df97af4dcaf 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -237,8 +237,8 @@ skip_codegen_passes: fp->jited_len = alloclen; bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); - bpf_jit_binary_lock_ro(bpf_hdr); if (!fp->is_func || extra_pass) { + bpf_jit_binary_lock_ro(bpf_hdr); bpf_prog_fill_jited_linfo(fp, addrs); out_addrs: kfree(addrs); -- cgit v1.2.3 From b1b93cb7e794e914787bf7d9936b57a149cdee4f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 26 Oct 2021 07:39:24 +0200 Subject: powerpc/nohash: Fix __ptep_set_access_flags() and ptep_set_wrprotect() Commit 26973fa5ac0e ("powerpc/mm: use pte helpers in generic code") changed those two functions to use pte helpers to determine which bits to clear and which bits to set. This change was based on the assumption that bits to be set/cleared are always the same and can be determined by applying the pte manipulation helpers on __pte(0). But on platforms like book3e, the bits depend on whether the page is a user page or not. For the time being it more or less works because of _PAGE_EXEC being used for user pages only and exec right being set at all time on kernel page. But following patch will clean that and output of pte_mkexec() will depend on the page being a user or kernel page. Instead of trying to make an even more complicated helper where bits would become dependent on the final pte value, come back to a more static situation like before commit 26973fa5ac0e ("powerpc/mm: use pte helpers in generic code"), by introducing an 8xx specific version of __ptep_set_access_flags() and ptep_set_wrprotect(). Fixes: 26973fa5ac0e ("powerpc/mm: use pte helpers in generic code") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/922bdab3a220781bae2360ff3dd5adb7fe4d34f1.1635226743.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 17 ++++++++--------- arch/powerpc/include/asm/nohash/32/pte-8xx.h | 22 ++++++++++++++++++++++ 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 34ce50da1850..11c6849f7864 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -306,30 +306,29 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, } #define __HAVE_ARCH_PTEP_SET_WRPROTECT +#ifndef ptep_set_wrprotect static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - unsigned long clr = ~pte_val(pte_wrprotect(__pte(~0))); - unsigned long set = pte_val(pte_wrprotect(__pte(0))); - - pte_update(mm, addr, ptep, clr, set, 0); + pte_update(mm, addr, ptep, _PAGE_RW, 0, 0); } +#endif +#ifndef __ptep_set_access_flags static inline void __ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, pte_t entry, unsigned long address, int psize) { - pte_t pte_set = pte_mkyoung(pte_mkdirty(pte_mkwrite(pte_mkexec(__pte(0))))); - pte_t pte_clr = pte_mkyoung(pte_mkdirty(pte_mkwrite(pte_mkexec(__pte(~0))))); - unsigned long set = pte_val(entry) & pte_val(pte_set); - unsigned long clr = ~pte_val(entry) & ~pte_val(pte_clr); + unsigned long set = pte_val(entry) & + (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); int huge = psize > mmu_virtual_psize ? 1 : 0; - pte_update(vma->vm_mm, address, ptep, clr, set, huge); + pte_update(vma->vm_mm, address, ptep, 0, set, huge); flush_tlb_page(vma, address); } +#endif static inline int pte_young(pte_t pte) { diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index fcc48d590d88..1a89ebdc3acc 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -136,6 +136,28 @@ static inline pte_t pte_mkhuge(pte_t pte) #define pte_mkhuge pte_mkhuge +static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, + unsigned long clr, unsigned long set, int huge); + +static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + pte_update(mm, addr, ptep, 0, _PAGE_RO, 0); +} +#define ptep_set_wrprotect ptep_set_wrprotect + +static inline void __ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, + pte_t entry, unsigned long address, int psize) +{ + unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_EXEC); + unsigned long clr = ~pte_val(entry) & _PAGE_RO; + int huge = psize > mmu_virtual_psize ? 1 : 0; + + pte_update(vma->vm_mm, address, ptep, clr, set, huge); + + flush_tlb_page(vma, address); +} +#define __ptep_set_access_flags __ptep_set_access_flags + static inline unsigned long pgd_leaf_size(pgd_t pgd) { if (pgd_val(pgd) & _PMD_PAGE_8M) -- cgit v1.2.3 From b6cb20fdc2735f8b2e082937066c33fe376c2ee2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 26 Oct 2021 07:39:25 +0200 Subject: powerpc/book3e: Fix set_memory_x() and set_memory_nx() set_memory_x() calls pte_mkexec() which sets _PAGE_EXEC. set_memory_nx() calls pte_exprotec() which clears _PAGE_EXEC. Book3e has 2 bits, UX and SX, which defines the exec rights resp. for user (PR=1) and for kernel (PR=0). _PAGE_EXEC is defined as UX only. An executable kernel page is set with either _PAGE_KERNEL_RWX or _PAGE_KERNEL_ROX, which both have SX set and UX cleared. So set_memory_nx() call for an executable kernel page does nothing because UX is already cleared. And set_memory_x() on a non-executable kernel page makes it executable for the user and keeps it non-executable for kernel. Also, pte_exec() always returns 'false' on kernel pages, because it checks _PAGE_EXEC which doesn't include SX, so for instance the W+X check doesn't work. To fix this: - change tlb_low_64e.S to use _PAGE_BAP_UX instead of _PAGE_USER - sets both UX and SX in _PAGE_EXEC so that pte_exec() returns true whenever one of the two bits is set and pte_exprotect() clears both bits. - Define a book3e specific version of pte_mkexec() which sets either SX or UX based on UR. Fixes: 1f9ad21c3b38 ("powerpc/mm: Implement set_memory() routines") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c41100f9c144dc5b62e5a751b810190c6b5d42fd.1635226743.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 2 ++ arch/powerpc/include/asm/nohash/64/pgtable.h | 5 ----- arch/powerpc/include/asm/nohash/pte-book3e.h | 18 ++++++++++++++---- arch/powerpc/mm/nohash/tlb_low_64e.S | 8 ++++---- 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 11c6849f7864..b67742e2a9b2 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -193,10 +193,12 @@ static inline pte_t pte_wrprotect(pte_t pte) } #endif +#ifndef pte_mkexec static inline pte_t pte_mkexec(pte_t pte) { return __pte(pte_val(pte) | _PAGE_EXEC); } +#endif #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index d081704b13fb..9d2905a47410 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -118,11 +118,6 @@ static inline pte_t pte_wrprotect(pte_t pte) return __pte(pte_val(pte) & ~_PAGE_RW); } -static inline pte_t pte_mkexec(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_EXEC); -} - #define PMD_BAD_BITS (PTE_TABLE_SIZE-1) #define PUD_BAD_BITS (PMD_TABLE_SIZE-1) diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h index 813918f40765..f798640422c2 100644 --- a/arch/powerpc/include/asm/nohash/pte-book3e.h +++ b/arch/powerpc/include/asm/nohash/pte-book3e.h @@ -48,7 +48,7 @@ #define _PAGE_WRITETHRU 0x800000 /* W: cache write-through */ /* "Higher level" linux bit combinations */ -#define _PAGE_EXEC _PAGE_BAP_UX /* .. and was cache cleaned */ +#define _PAGE_EXEC (_PAGE_BAP_SX | _PAGE_BAP_UX) /* .. and was cache cleaned */ #define _PAGE_RW (_PAGE_BAP_SW | _PAGE_BAP_UW) /* User write permission */ #define _PAGE_KERNEL_RW (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY) #define _PAGE_KERNEL_RO (_PAGE_BAP_SR) @@ -93,11 +93,11 @@ /* Permission masks used to generate the __P and __S table */ #define PAGE_NONE __pgprot(_PAGE_BASE) #define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC) +#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_BAP_UX) #define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_BAP_UX) #define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_BAP_UX) #ifndef __ASSEMBLY__ static inline pte_t pte_mkprivileged(pte_t pte) @@ -113,6 +113,16 @@ static inline pte_t pte_mkuser(pte_t pte) } #define pte_mkuser pte_mkuser + +static inline pte_t pte_mkexec(pte_t pte) +{ + if (pte_val(pte) & _PAGE_BAP_UR) + return __pte((pte_val(pte) & ~_PAGE_BAP_SX) | _PAGE_BAP_UX); + else + return __pte((pte_val(pte) & ~_PAGE_BAP_UX) | _PAGE_BAP_SX); +} +#define pte_mkexec pte_mkexec + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index bf24451f3e71..9235e720e357 100644 --- a/arch/powerpc/mm/nohash/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -222,7 +222,7 @@ tlb_miss_kernel_bolted: tlb_miss_fault_bolted: /* We need to check if it was an instruction miss */ - andi. r10,r11,_PAGE_EXEC|_PAGE_BAP_SX + andi. r10,r11,_PAGE_BAP_UX|_PAGE_BAP_SX bne itlb_miss_fault_bolted dtlb_miss_fault_bolted: tlb_epilog_bolted @@ -239,7 +239,7 @@ itlb_miss_fault_bolted: srdi r15,r16,60 /* get region */ bne- itlb_miss_fault_bolted - li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */ + li r11,_PAGE_PRESENT|_PAGE_BAP_UX /* Base perm */ /* We do the user/kernel test for the PID here along with the RW test */ @@ -614,7 +614,7 @@ itlb_miss_fault_e6500: /* We do the user/kernel test for the PID here along with the RW test */ - li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */ + li r11,_PAGE_PRESENT|_PAGE_BAP_UX /* Base perm */ oris r11,r11,_PAGE_ACCESSED@h cmpldi cr0,r15,0 /* Check for user region */ @@ -734,7 +734,7 @@ normal_tlb_miss_done: normal_tlb_miss_access_fault: /* We need to check if it was an instruction miss */ - andi. r10,r11,_PAGE_EXEC + andi. r10,r11,_PAGE_BAP_UX bne 1f ld r14,EX_TLB_DEAR(r12) ld r15,EX_TLB_ESR(r12) -- cgit v1.2.3 From 44c14509b0dabb909ad1ec28800893ea71762732 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 26 Oct 2021 07:39:26 +0200 Subject: powerpc/fsl_booke: Fix setting of exec flag when setting TLBCAMs Building tqm8541_defconfig results in: arch/powerpc/mm/nohash/fsl_book3e.c: In function 'settlbcam': arch/powerpc/mm/nohash/fsl_book3e.c:126:40: error: '_PAGE_BAP_SX' undeclared (first use in this function) 126 | TLBCAM[index].MAS3 |= (flags & _PAGE_BAP_SX) ? MAS3_SX : 0; | ^~~~~~~~~~~~ arch/powerpc/mm/nohash/fsl_book3e.c:126:40: note: each undeclared identifier is reported only once for each function it appears in make[3]: *** [scripts/Makefile.build:277: arch/powerpc/mm/nohash/fsl_book3e.o] Error 1 make[2]: *** [scripts/Makefile.build:540: arch/powerpc/mm/nohash] Error 2 make[1]: *** [scripts/Makefile.build:540: arch/powerpc/mm] Error 2 make: *** [Makefile:1868: arch/powerpc] Error 2 This is because _PAGE_BAP_SX is not defined when using 32 bits PTE. Now that _PAGE_EXEC contains both _PAGE_BAP_SX and _PAGE_BAP_UX, it can be used instead. Fixes: 01116e6e98b0 ("powerpc/fsl_booke: Take exec flag into account when setting TLBCAMs") Reported-by: kernel test robot Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/91a0235e7f2a85308b84aa5b9efd8d022e2b899a.1635226743.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/fsl_book3e.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/nohash/fsl_book3e.c b/arch/powerpc/mm/nohash/fsl_book3e.c index 978e0bcdfa2c..b231a54f540c 100644 --- a/arch/powerpc/mm/nohash/fsl_book3e.c +++ b/arch/powerpc/mm/nohash/fsl_book3e.c @@ -123,7 +123,6 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys, TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0; TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SR; - TLBCAM[index].MAS3 |= (flags & _PAGE_BAP_SX) ? MAS3_SX : 0; TLBCAM[index].MAS3 |= (flags & _PAGE_RW) ? MAS3_SW : 0; if (mmu_has_feature(MMU_FTR_BIG_PHYS)) TLBCAM[index].MAS7 = (u64)phys >> 32; @@ -133,6 +132,8 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys, TLBCAM[index].MAS3 |= MAS3_UR; TLBCAM[index].MAS3 |= (flags & _PAGE_EXEC) ? MAS3_UX : 0; TLBCAM[index].MAS3 |= (flags & _PAGE_RW) ? MAS3_UW : 0; + } else { + TLBCAM[index].MAS3 |= (flags & _PAGE_EXEC) ? MAS3_SX : 0; } tlbcam_addrs[index].start = virt; -- cgit v1.2.3 From 5d354dc35ebb0b224d627264c60f60dbda3a1bc3 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Thu, 21 Oct 2021 12:56:57 +0200 Subject: powerpc/83xx/mpc8349emitx: Make mcu_gpiochip_remove() return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Up to now mcu_gpiochip_remove() returns zero unconditionally. Make it return void instead which makes it easier to see in the callers that there is no error to handle. Also the return value of i2c remove callbacks is ignored anyway. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211021105657.72572-1-u.kleine-koenig@pengutronix.de --- arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index 409481016928..bb789f33c70e 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -135,11 +135,10 @@ static int mcu_gpiochip_add(struct mcu *mcu) return gpiochip_add_data(gc, mcu); } -static int mcu_gpiochip_remove(struct mcu *mcu) +static void mcu_gpiochip_remove(struct mcu *mcu) { kfree(mcu->gc.label); gpiochip_remove(&mcu->gc); - return 0; } static int mcu_probe(struct i2c_client *client) @@ -198,9 +197,7 @@ static int mcu_remove(struct i2c_client *client) glob_mcu = NULL; } - ret = mcu_gpiochip_remove(mcu); - if (ret) - return ret; + mcu_gpiochip_remove(mcu); kfree(mcu); return 0; } -- cgit v1.2.3 From 3c12b4df8d5e026345a19886ae375b3ebc33c0b6 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Wed, 27 Oct 2021 17:24:10 +1000 Subject: powerpc/security: Use a mutex for interrupt exit code patching The mitigation-patching.sh script in the powerpc selftests toggles all mitigations on and off simultaneously, revealing that rfi_flush and stf_barrier cannot safely operate at the same time due to races in updating the static key. On some systems, the static key code throws a warning and the kernel remains functional. On others, the kernel will hang or crash. Fix this by slapping on a mutex. Fixes: 13799748b957 ("powerpc/64: use interrupt restart table to speed up return from interrupt") Cc: stable@vger.kernel.org # v5.14+ Signed-off-by: Russell Currey Acked-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211027072410.40950-1-ruscur@russell.cc --- arch/powerpc/lib/feature-fixups.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index cda17bee5afe..c3e06922468b 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -228,6 +228,7 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) static bool stf_exit_reentrant = false; static bool rfi_exit_reentrant = false; +static DEFINE_MUTEX(exit_flush_lock); static int __do_stf_barrier_fixups(void *data) { @@ -253,6 +254,9 @@ void do_stf_barrier_fixups(enum stf_barrier_type types) * low level interrupt exit code before patching. After the patching, * if allowed, then flip the branch to allow fast exits. */ + + // Prevent static key update races with do_rfi_flush_fixups() + mutex_lock(&exit_flush_lock); static_branch_enable(&interrupt_exit_not_reentrant); stop_machine(__do_stf_barrier_fixups, &types, NULL); @@ -264,6 +268,8 @@ void do_stf_barrier_fixups(enum stf_barrier_type types) if (stf_exit_reentrant && rfi_exit_reentrant) static_branch_disable(&interrupt_exit_not_reentrant); + + mutex_unlock(&exit_flush_lock); } void do_uaccess_flush_fixups(enum l1d_flush_type types) @@ -486,6 +492,9 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) * without stop_machine, so this could be achieved with a broadcast * IPI instead, but this matches the stf sequence. */ + + // Prevent static key update races with do_stf_barrier_fixups() + mutex_lock(&exit_flush_lock); static_branch_enable(&interrupt_exit_not_reentrant); stop_machine(__do_rfi_flush_fixups, &types, NULL); @@ -497,6 +506,8 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) if (stf_exit_reentrant && rfi_exit_reentrant) static_branch_disable(&interrupt_exit_not_reentrant); + + mutex_unlock(&exit_flush_lock); } void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) -- cgit v1.2.3 From 235cee162459d96153d63651ce7ff51752528c96 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 28 Oct 2021 00:21:50 +1000 Subject: KVM: PPC: Tick accounting should defer vtime accounting 'til after IRQ handling Commit 112665286d08 ("KVM: PPC: Book3S HV: Context tracking exit guest context before enabling irqs") moved guest_exit() into the interrupt protected area to avoid wrong context warning (or worse). The problem is that tick-based time accounting has not yet been updated at this point (because it depends on the timer interrupt firing), so the guest time gets incorrectly accounted to system time. To fix the problem, follow the x86 fix in commit 160457140187 ("Defer vtime accounting 'til after IRQ handling"), and allow host IRQs to run before accounting the guest exit time. In the case vtime accounting is enabled, this is not required because TB is used directly for accounting. Before this patch, with CONFIG_TICK_CPU_ACCOUNTING=y in the host and a guest running a kernel compile, the 'guest' fields of /proc/stat are stuck at zero. With the patch they can be observed increasing roughly as expected. Fixes: e233d54d4d97 ("KVM: booke: use __kvm_guest_exit") Fixes: 112665286d08 ("KVM: PPC: Book3S HV: Context tracking exit guest context before enabling irqs") Cc: stable@vger.kernel.org # 5.12+ Signed-off-by: Laurent Vivier [np: only required for tick accounting, add Book3E fix, tweak changelog] Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211027142150.3711582-1-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv.c | 30 ++++++++++++++++++++++++++++-- arch/powerpc/kvm/booke.c | 16 +++++++++++++++- 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 2acb1c96cfaf..7b74fc0a986b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3726,7 +3726,20 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_set_host_core(pcpu); - guest_exit_irqoff(); + context_tracking_guest_exit(); + if (!vtime_accounting_enabled_this_cpu()) { + local_irq_enable(); + /* + * Service IRQs here before vtime_account_guest_exit() so any + * ticks that occurred while running the guest are accounted to + * the guest. If vtime accounting is enabled, accounting uses + * TB rather than ticks, so it can be done without enabling + * interrupts here, which has the problem that it accounts + * interrupt processing overhead to the host. + */ + local_irq_disable(); + } + vtime_account_guest_exit(); local_irq_enable(); @@ -4510,7 +4523,20 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, kvmppc_set_host_core(pcpu); - guest_exit_irqoff(); + context_tracking_guest_exit(); + if (!vtime_accounting_enabled_this_cpu()) { + local_irq_enable(); + /* + * Service IRQs here before vtime_account_guest_exit() so any + * ticks that occurred while running the guest are accounted to + * the guest. If vtime accounting is enabled, accounting uses + * TB rather than ticks, so it can be done without enabling + * interrupts here, which has the problem that it accounts + * interrupt processing overhead to the host. + */ + local_irq_disable(); + } + vtime_account_guest_exit(); local_irq_enable(); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 977801c83aff..8c15c90dd3a9 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1042,7 +1042,21 @@ int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr) } trace_kvm_exit(exit_nr, vcpu); - guest_exit_irqoff(); + + context_tracking_guest_exit(); + if (!vtime_accounting_enabled_this_cpu()) { + local_irq_enable(); + /* + * Service IRQs here before vtime_account_guest_exit() so any + * ticks that occurred while running the guest are accounted to + * the guest. If vtime accounting is enabled, accounting uses + * TB rather than ticks, so it can be done without enabling + * interrupts here, which has the problem that it accounts + * interrupt processing overhead to the host. + */ + local_irq_disable(); + } + vtime_account_guest_exit(); local_irq_enable(); -- cgit v1.2.3 From fef071be57dc43679a32d5b0e6ee176d6f12e9f2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 14 Oct 2021 13:44:24 +1100 Subject: powerpc/dcr: Use cmplwi instead of 3-argument cmpli MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In dcr-low.S we use cmpli with three arguments, instead of four arguments as defined in the ISA: cmpli cr0,r3,1024 This appears to be a PPC440-ism, looking at the "PPC440x5 CPU Core User’s Manual" it shows cmpli having no L field, but implied to be 0 due to the core being 32-bit. It mentions that the ISA defines four arguments and recommends using cmplwi. It also corresponds to the old POWER instruction set, which had no L field there, a reserved bit instead. dcr-low.S is only built 32-bit, because it is only built when DCR_NATIVE=y, which is only selected by 40x and 44x. Looking at the generated code (with gcc/gas) we see cmplwi as expected. Although gas is happy with the 3-argument version when building for 32-bit, the LLVM assembler is not and errors out with: arch/powerpc/sysdev/dcr-low.S:27:10: error: invalid operand for instruction cmpli 0,%r3,1024; ... ^ Switch to the cmplwi extended opcode, which avoids any confusion when reading the ISA, fixes the issue with the LLVM assembler, and also means the code could be built 64-bit in future (though that's very unlikely). Reported-by: Nick Desaulniers Reviewed-by: Nick Desaulniers Signed-off-by: Michael Ellerman BugLink: https://github.com/ClangBuiltLinux/linux/issues/1419 Link: https://lore.kernel.org/r/20211014024424.528848-1-mpe@ellerman.id.au --- arch/powerpc/sysdev/dcr-low.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/dcr-low.S b/arch/powerpc/sysdev/dcr-low.S index efeeb1b885a1..329b9c4ae542 100644 --- a/arch/powerpc/sysdev/dcr-low.S +++ b/arch/powerpc/sysdev/dcr-low.S @@ -11,7 +11,7 @@ #include #define DCR_ACCESS_PROLOG(table) \ - cmpli cr0,r3,1024; \ + cmplwi cr0,r3,1024; \ rlwinm r3,r3,4,18,27; \ lis r5,table@h; \ ori r5,r5,table@l; \ -- cgit v1.2.3 From 290fe8aa69ef5c51c778c0bb33f8ef0181c769f5 Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Thu, 28 Oct 2021 15:28:22 +0800 Subject: powerpc/44x/fsp2: add missing of_node_put Early exits from for_each_compatible_node() should decrement the node reference counter. Reported by Coccinelle: ./arch/powerpc/platforms/44x/fsp2.c:206:1-25: WARNING: Function "for_each_compatible_node" should have of_node_put() before return around line 218. Fixes: 7813043e1bbc ("powerpc/44x/fsp2: Add irq error handlers") Signed-off-by: Bixuan Cui Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1635406102-88719-1-git-send-email-cuibixuan@linux.alibaba.com --- arch/powerpc/platforms/44x/fsp2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c index b299e43f5ef9..823397c802de 100644 --- a/arch/powerpc/platforms/44x/fsp2.c +++ b/arch/powerpc/platforms/44x/fsp2.c @@ -208,6 +208,7 @@ static void node_irq_request(const char *compat, irq_handler_t errirq_handler) if (irq == NO_IRQ) { pr_err("device tree node %pOFn is missing a interrupt", np); + of_node_put(np); return; } @@ -215,6 +216,7 @@ static void node_irq_request(const char *compat, irq_handler_t errirq_handler) if (rc) { pr_err("fsp_of_probe: request_irq failed: np=%pOF rc=%d", np, rc); + of_node_put(np); return; } } -- cgit v1.2.3 From b1f896ce3542eb2eede5949ee2e481526fae1108 Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Tue, 26 Oct 2021 16:31:08 +0300 Subject: powerpc/xmon: fix task state output p_state is unsigned since the commit 2f064a59a11f The patch also uses TASK_RUNNING instead of null. Fixes: 2f064a59a11f ("sched: Change task_struct::state") Signed-off-by: Denis Kirjanov Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211026133108.7113-1-kda@linux-powerpc.org --- arch/powerpc/xmon/xmon.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index dd8241c009e5..8b28ff9d98d1 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3264,8 +3264,7 @@ static void show_task(struct task_struct *volatile tsk) * appropriate for calling from xmon. This could be moved * to a common, generic, routine used by both. */ - state = (p_state == 0) ? 'R' : - (p_state < 0) ? 'U' : + state = (p_state == TASK_RUNNING) ? 'R' : (p_state & TASK_UNINTERRUPTIBLE) ? 'D' : (p_state & TASK_STOPPED) ? 'T' : (p_state & TASK_TRACED) ? 'C' : -- cgit v1.2.3 From 19b27f37ca97d1e42453b9e48af1cccb296f6965 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 27 Oct 2021 17:16:46 +1100 Subject: MAINTAINERS: Update powerpc KVM entry Paul is no longer handling patches for kvmppc. Instead we'll treat them as regular powerpc patches, taking them via the powerpc tree, using the topic/ppc-kvm branch when necessary. Also drop the web reference, it doesn't have any information specifically relevant to powerpc KVM. Signed-off-by: Michael Ellerman Acked-by: Paolo Bonzini Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211027061646.540708-1-mpe@ellerman.id.au --- MAINTAINERS | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ca6d6fde85cf..fbfd3345c40d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10260,11 +10260,8 @@ F: arch/mips/include/uapi/asm/kvm* F: arch/mips/kvm/ KERNEL VIRTUAL MACHINE FOR POWERPC (KVM/powerpc) -M: Paul Mackerras -L: kvm-ppc@vger.kernel.org -S: Supported -W: http://www.linux-kvm.org/ -T: git git://github.com/agraf/linux-2.6.git +L: linuxppc-dev@lists.ozlabs.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git topic/ppc-kvm F: arch/powerpc/include/asm/kvm* F: arch/powerpc/include/uapi/asm/kvm* F: arch/powerpc/kernel/kvm* -- cgit v1.2.3 From f8c0e36b48e32b14bb83332d24e0646acd31d9e9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 28 Oct 2021 14:59:15 +0200 Subject: powerpc: Don't provide __kernel_map_pages() without ARCH_SUPPORTS_DEBUG_PAGEALLOC When ARCH_SUPPORTS_DEBUG_PAGEALLOC is not selected, the user can still select CONFIG_DEBUG_PAGEALLOC in which case __kernel_map_pages() is provided by mm/page_poison.c So only define __kernel_map_pages() when both CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC and CONFIG_DEBUG_PAGEALLOC are defined. Fixes: 68b44f94d637 ("powerpc/booke: Disable STRICT_KERNEL_RWX, DEBUG_PAGEALLOC and KFENCE") Reported-by: kernel test robot Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/971b69739ff4746252e711a9845210465c023a9e.1635425947.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/pgtable_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index dcf5ecca19d9..fde1ed445ca4 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -173,7 +173,7 @@ void mark_rodata_ro(void) } #endif -#ifdef CONFIG_DEBUG_PAGEALLOC +#if defined(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) && defined(CONFIG_DEBUG_PAGEALLOC) void __kernel_map_pages(struct page *page, int numpages, int enable) { unsigned long addr = (unsigned long)page_address(page); -- cgit v1.2.3 From 52862ab33c5d97490f3fa345d6529829e6d6637b Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 28 Oct 2021 22:27:16 +0530 Subject: powerpc/powernv/prd: Unregister OPAL_MSG_PRD2 notifier during module unload Commit 587164cd, introduced new opal message type (OPAL_MSG_PRD2) and added opal notifier. But I missed to unregister the notifier during module unload path. This results in below call trace if you try to unload and load opal_prd module. Also add new notifier_block for OPAL_MSG_PRD2 message. Sample calltrace (modprobe -r opal_prd; modprobe opal_prd) BUG: Unable to handle kernel data access on read at 0xc0080000192200e0 Faulting instruction address: 0xc00000000018d1cc Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA PowerNV CPU: 66 PID: 7446 Comm: modprobe Kdump: loaded Tainted: G E 5.14.0prd #759 NIP: c00000000018d1cc LR: c00000000018d2a8 CTR: c0000000000cde10 REGS: c0000003c4c0f0a0 TRAP: 0300 Tainted: G E (5.14.0prd) MSR: 9000000002009033 CR: 24224824 XER: 20040000 CFAR: c00000000018d2a4 DAR: c0080000192200e0 DSISR: 40000000 IRQMASK: 1 ... NIP notifier_chain_register+0x2c/0xc0 LR atomic_notifier_chain_register+0x48/0x80 Call Trace: 0xc000000002090610 (unreliable) atomic_notifier_chain_register+0x58/0x80 opal_message_notifier_register+0x7c/0x1e0 opal_prd_probe+0x84/0x150 [opal_prd] platform_probe+0x78/0x130 really_probe+0x110/0x5d0 __driver_probe_device+0x17c/0x230 driver_probe_device+0x60/0x130 __driver_attach+0xfc/0x220 bus_for_each_dev+0xa8/0x130 driver_attach+0x34/0x50 bus_add_driver+0x1b0/0x300 driver_register+0x98/0x1a0 __platform_driver_register+0x38/0x50 opal_prd_driver_init+0x34/0x50 [opal_prd] do_one_initcall+0x60/0x2d0 do_init_module+0x7c/0x320 load_module+0x3394/0x3650 __do_sys_finit_module+0xd4/0x160 system_call_exception+0x140/0x290 system_call_common+0xf4/0x258 Fixes: 587164cd593c ("powerpc/powernv: Add new opal message type") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Vasant Hegde Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211028165716.41300-1-hegdevasant@linux.vnet.ibm.com --- arch/powerpc/platforms/powernv/opal-prd.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index a191f4c60ce7..113bdb151f68 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -369,6 +369,12 @@ static struct notifier_block opal_prd_event_nb = { .priority = 0, }; +static struct notifier_block opal_prd_event_nb2 = { + .notifier_call = opal_prd_msg_notifier, + .next = NULL, + .priority = 0, +}; + static int opal_prd_probe(struct platform_device *pdev) { int rc; @@ -390,9 +396,10 @@ static int opal_prd_probe(struct platform_device *pdev) return rc; } - rc = opal_message_notifier_register(OPAL_MSG_PRD2, &opal_prd_event_nb); + rc = opal_message_notifier_register(OPAL_MSG_PRD2, &opal_prd_event_nb2); if (rc) { pr_err("Couldn't register PRD2 event notifier\n"); + opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); return rc; } @@ -401,6 +408,8 @@ static int opal_prd_probe(struct platform_device *pdev) pr_err("failed to register miscdev\n"); opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); + opal_message_notifier_unregister(OPAL_MSG_PRD2, + &opal_prd_event_nb2); return rc; } @@ -411,6 +420,7 @@ static int opal_prd_remove(struct platform_device *pdev) { misc_deregister(&opal_prd_dev); opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); + opal_message_notifier_unregister(OPAL_MSG_PRD2, &opal_prd_event_nb2); return 0; } -- cgit v1.2.3 From 81291383ffde08b23bce75e7d6b2575ce9d3475c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 28 Oct 2021 23:30:43 +1000 Subject: powerpc/32e: Ignore ESR in instruction storage interrupt handler A e5500 machine running a 32-bit kernel sometimes hangs at boot, seemingly going into an infinite loop of instruction storage interrupts. The ESR (Exception Syndrome Register) has a value of 0x800000 (store) when this happens, which is likely set by a previous store. An instruction TLB miss interrupt would then leave ESR unchanged, and if no PTE exists it calls directly to the instruction storage interrupt handler without changing ESR. access_error() does not cause a segfault due to a store to a read-only vma because is_exec is true. Most subsequent fault handling does not check for a write fault on a read-only vma, and might do strange things like create a writeable PTE or call page_mkwrite on a read only vma or file. It's not clear what happens here to cause the infinite faulting in this case, a fault handler failure or low level PTE or TLB handling. In any case this can be fixed by having the instruction storage interrupt zero regs->dsisr rather than storing the ESR value to it. Fixes: a01a3f2ddbcd ("powerpc: remove arguments from fault handler functions") Cc: stable@vger.kernel.org # v5.12+ Reported-by: Jacques de Laval Signed-off-by: Nicholas Piggin Tested-by: Jacques de Laval Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211028133043.4159501-1-npiggin@gmail.com --- arch/powerpc/kernel/head_booke.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index e5503420b6c6..ef8d1b1c234e 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -465,12 +465,21 @@ label: bl do_page_fault; \ b interrupt_return +/* + * Instruction TLB Error interrupt handlers may call InstructionStorage + * directly without clearing ESR, so the ESR at this point may be left over + * from a prior interrupt. + * + * In any case, do_page_fault for BOOK3E does not use ESR and always expects + * dsisr to be 0. ESR_DST from a prior store in particular would confuse fault + * handling. + */ #define INSTRUCTION_STORAGE_EXCEPTION \ START_EXCEPTION(InstructionStorage) \ - NORMAL_EXCEPTION_PROLOG(0x400, INST_STORAGE); \ - mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ + NORMAL_EXCEPTION_PROLOG(0x400, INST_STORAGE); \ + li r5,0; /* Store 0 in regs->esr (dsisr) */ \ stw r5,_ESR(r11); \ - stw r12, _DEAR(r11); /* Pass SRR0 as arg2 */ \ + stw r12, _DEAR(r11); /* Set regs->dear (dar) to SRR0 */ \ prepare_transfer_to_handler; \ bl do_page_fault; \ b interrupt_return -- cgit v1.2.3 From c12ab8dbc492b992e1ea717db933cee568780c47 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 29 Oct 2021 17:10:45 +0200 Subject: powerpc/8xx: Fix Oops with STRICT_KERNEL_RWX without DEBUG_RODATA_TEST Until now, all tests involving CONFIG_STRICT_KERNEL_RWX were done with DEBUG_RODATA_TEST to check the result. But now that CONFIG_STRICT_KERNEL_RWX is selected by default, it came without CONFIG_DEBUG_RODATA_TEST and led to the following Oops [ 6.830908] Freeing unused kernel image (initmem) memory: 352K [ 6.840077] BUG: Unable to handle kernel data access on write at 0xc1285200 [ 6.846836] Faulting instruction address: 0xc0004b6c [ 6.851745] Oops: Kernel access of bad area, sig: 11 [#1] [ 6.857075] BE PAGE_SIZE=16K PREEMPT CMPC885 [ 6.861348] SAF3000 DIE NOTIFICATION [ 6.864830] CPU: 0 PID: 1 Comm: swapper Not tainted 5.15.0-rc5-s3k-dev-02255-g2747d7b7916f #451 [ 6.873429] NIP: c0004b6c LR: c0004b60 CTR: 00000000 [ 6.878419] REGS: c902be60 TRAP: 0300 Not tainted (5.15.0-rc5-s3k-dev-02255-g2747d7b7916f) [ 6.886852] MSR: 00009032 CR: 53000335 XER: 8000ff40 [ 6.893564] DAR: c1285200 DSISR: 82000000 [ 6.893564] GPR00: 0c000000 c902bf20 c20f4000 08000000 00000001 04001f00 c1800000 00000035 [ 6.893564] GPR08: ff0001ff c1280000 00000002 c0004b60 00001000 00000000 c0004b1c 00000000 [ 6.893564] GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 6.893564] GPR24: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 c1060000 [ 6.932034] NIP [c0004b6c] kernel_init+0x50/0x138 [ 6.936682] LR [c0004b60] kernel_init+0x44/0x138 [ 6.941245] Call Trace: [ 6.943653] [c902bf20] [c0004b60] kernel_init+0x44/0x138 (unreliable) [ 6.950022] [c902bf30] [c001122c] ret_from_kernel_thread+0x5c/0x64 [ 6.956135] Instruction dump: [ 6.959060] 48ffc521 48045469 4800d8cd 3d20c086 89295fa0 2c090000 41820058 480796c9 [ 6.966890] 4800e48d 3d20c128 39400002 3fe0c106 <91495200> 3bff8000 4806fa1d 481f7d75 [ 6.974902] ---[ end trace 1e397bacba4aa610 ]--- 0xc1285200 corresponds to 'system_state' global var that the kernel is trying to set to SYSTEM_RUNNING. This var is above the RO/RW limit so it shouldn't Oops. It oopses because the dirty bit is missing. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3d5800b0bbcd7b19761b98f50421358667b45331.1635520232.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 9bdb95f5694f..2d596881b70e 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -755,7 +755,7 @@ _GLOBAL(mmu_pin_tlb) cmplw r6, r9 bdnzt lt, 2b -4: LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) +4: LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) 2: ori r0, r6, MD_EVALID mtspr SPRN_MD_CTR, r5 mtspr SPRN_MD_EPN, r0 -- cgit v1.2.3