From c9f3401313a5089f100d7d1ef4b75cd7b49b2190 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 18 Jan 2021 22:34:51 +1000 Subject: powerpc: Always enable queued spinlocks for 64s, disable for others Queued spinlocks have shown to have good performance and fairness properties even on smaller (2 socket) POWER systems. This selects them automatically for 64s. For other platforms they are de-selected, the standard spinlock is far simpler and smaller code, and single chips with a handful of cores is unlikely to show any improvement. CONFIG_EXPERT still allows this to be changed, e.g., to help debug performance or correctness issues. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210118123451.1452206-1-npiggin@gmail.com --- arch/powerpc/Kconfig | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/Kconfig') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 107bb4319e0e..eebb4a8c156c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -505,18 +505,14 @@ config HOTPLUG_CPU Say N if you are unsure. config PPC_QUEUED_SPINLOCKS - bool "Queued spinlocks" + bool "Queued spinlocks" if EXPERT depends on SMP + default PPC_BOOK3S_64 help Say Y here to use queued spinlocks which give better scalability and fairness on large SMP and NUMA systems without harming single threaded performance. - This option is currently experimental, the code is more complex and - less tested so it defaults to "N" for the moment. - - If unsure, say "N". - config ARCH_CPU_PROBE_RELEASE def_bool y depends on HOTPLUG_CPU -- cgit v1.2.3 From 910a0cb6d259736a0c86e795d4c2f42af8d0d775 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 20 Jan 2021 07:49:13 +0000 Subject: powerpc/47x: Disable 256k page size PPC47x_TLBE_SIZE isn't defined for 256k pages, leading to a build break if 256k pages is selected. So change the kconfig so that 256k pages can't be selected for 47x. Fixes: e7f75ad01d59 ("powerpc/47x: Base ppc476 support") Reported-by: kernel test robot Signed-off-by: Christophe Leroy [mpe: Expand change log to mention build break] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2fed79b1154c872194f98bac4422c23918325e61.1611128938.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/Kconfig') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index eebb4a8c156c..ff64ac7d227d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -768,7 +768,7 @@ config PPC_64K_PAGES config PPC_256K_PAGES bool "256k page size" - depends on 44x && !STDBINUTILS + depends on 44x && !STDBINUTILS && !PPC_47x help Make the page size 256k. -- cgit v1.2.3 From 4eeef098b43242ed145c83fba9989d586d707589 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 20 Jan 2021 07:49:14 +0000 Subject: powerpc/44x: Remove STDBINUTILS kconfig option STDBINUTILS is just a toggle to allow 256k page size to appear in the possible page sizes list for the 44x. Make 256k page size option appear all the time with an explicit warning about binutils, and remove this unnecessary STDBINUTILS config option. Signed-off-by: Christophe Leroy [mpe: Incorporate help text changes from David Laight] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f9981e819009aa121a998dc483052ec76f78f991.1611128938.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) (limited to 'arch/powerpc/Kconfig') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ff64ac7d227d..ef64d96bf316 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -716,18 +716,6 @@ config ARCH_MEMORY_PROBE def_bool y depends on MEMORY_HOTPLUG -config STDBINUTILS - bool "Using standard binutils settings" - depends on 44x - default y - help - Turning this option off allows you to select 256KB PAGE_SIZE on 44x. - Note, that kernel will be able to run only those applications, - which had been compiled using binutils later than 2.17.50.0.3 with - '-zmax-page-size' set to 256K (the default is 64K). Or, if using - the older binutils, you can patch them with a trivial patch, which - changes the ELF_MAXPAGESIZE definition from 0x10000 to 0x40000. - choice prompt "Page size" default PPC_4K_PAGES @@ -767,17 +755,15 @@ config PPC_64K_PAGES select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64 config PPC_256K_PAGES - bool "256k page size" - depends on 44x && !STDBINUTILS && !PPC_47x + bool "256k page size (Requires non-standard binutils settings)" + depends on 44x && !PPC_47x help Make the page size 256k. - As the ELF standard only requires alignment to support page - sizes up to 64k, you will need to compile all of your user - space applications with a non-standard binutils settings - (see the STDBINUTILS description for details). - - Say N unless you know what you are doing. + The kernel will only be able to run applications that have been + compiled with '-zmax-page-size' set to 256K (the default is 64K) using + binutils later than 2.17.50.0.3, or by patching the ELF_MAXPAGESIZE + definition from 0x10000 to 0x40000 in older versions. endchoice -- cgit v1.2.3 From 2a06bf3e95cd93e3640d431960181b8e47415f33 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:40 +1000 Subject: powerpc/64: context tracking remove _TIF_NOHZ Add context tracking to the system call handler explicitly, and remove _TIF_NOHZ. This improves system call performance when nohz_full is enabled. On a POWER9, gettid scv system call cost on a nohz_full CPU improves from 1129 cycles to 1004 cycles and on a housekeeping CPU from 550 cycles to 430 cycles. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-31-npiggin@gmail.com --- arch/powerpc/Kconfig | 1 - arch/powerpc/include/asm/thread_info.h | 4 +- arch/powerpc/kernel/ptrace/ptrace.c | 4 -- arch/powerpc/kernel/signal.c | 4 -- arch/powerpc/kernel/syscall_64.c | 72 ++++++++++++++++++++++------------ 5 files changed, 47 insertions(+), 38 deletions(-) (limited to 'arch/powerpc/Kconfig') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ef64d96bf316..686829dd1c66 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -196,7 +196,6 @@ config PPC select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13) select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2) select HAVE_CONTEXT_TRACKING if PPC64 - select HAVE_TIF_NOHZ if PPC64 select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 3d8a47af7a25..386d576673a1 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -94,7 +94,6 @@ void arch_setup_new_exec(void); #define TIF_PATCH_PENDING 6 /* pending live patching update */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SINGLESTEP 8 /* singlestepping active */ -#define TIF_NOHZ 9 /* in adaptive nohz mode */ #define TIF_SECCOMP 10 /* secure computing */ #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ #define TIF_NOERROR 12 /* Force successful syscall return */ @@ -128,11 +127,10 @@ void arch_setup_new_exec(void); #define _TIF_UPROBE (1<flags) & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE); @@ -340,8 +338,6 @@ void do_syscall_trace_leave(struct pt_regs *regs) step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, step); - - user_enter(); } void __init pt_regs_check(void); diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 53782aa60ade..9ded046edb0e 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -282,8 +282,6 @@ static void do_signal(struct task_struct *tsk) void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) { - user_exit(); - if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); @@ -299,8 +297,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) tracehook_notify_resume(regs); rseq_handle_notify_resume(NULL, regs); } - - user_enter(); } static unsigned long get_tm_stackpointer(struct task_struct *tsk) diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 316c3ba16554..45c4420fe339 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -1,9 +1,11 @@ // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include #include #include +#include #include #include #include @@ -28,6 +30,9 @@ notrace long system_call_exception(long r3, long r4, long r5, if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); + CT_WARN_ON(ct_state() == CONTEXT_KERNEL); + user_exit_irqoff(); + trace_hardirqs_off(); /* finish reconciling */ if (IS_ENABLED(CONFIG_PPC_BOOK3S)) @@ -144,7 +149,7 @@ notrace long system_call_exception(long r3, long r4, long r5, * enabled when the interrupt handler returns (indicating a process-context / * synchronous interrupt) then irqs_enabled should be true. */ -static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled) +static notrace inline bool __prep_irq_for_enabled_exit(bool clear_ri) { /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); @@ -161,29 +166,6 @@ static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_en trace_hardirqs_off(); local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - /* - * Must replay pending soft-masked interrupts now. Don't just - * local_irq_enabe(); local_irq_disable(); because if we are - * returning from an asynchronous interrupt here, another one - * might hit after irqs are enabled, and it would exit via this - * same path allowing another to fire, and so on unbounded. - * - * If interrupts were enabled when this interrupt exited, - * indicating a process context (synchronous) interrupt, - * local_irq_enable/disable can be used, which will enable - * interrupts rather than keeping them masked (unclear how - * much benefit this is over just replaying for all cases, - * because we immediately disable again, so all we're really - * doing is allowing hard interrupts to execute directly for - * a very small time, rather than being masked and replayed). - */ - if (irqs_enabled) { - local_irq_enable(); - local_irq_disable(); - } else { - replay_soft_interrupts(); - } - return false; } local_paca->irq_happened = 0; @@ -192,6 +174,37 @@ static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_en return true; } +static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled) +{ + if (__prep_irq_for_enabled_exit(clear_ri)) + return true; + + /* + * Must replay pending soft-masked interrupts now. Don't just + * local_irq_enabe(); local_irq_disable(); because if we are + * returning from an asynchronous interrupt here, another one + * might hit after irqs are enabled, and it would exit via this + * same path allowing another to fire, and so on unbounded. + * + * If interrupts were enabled when this interrupt exited, + * indicating a process context (synchronous) interrupt, + * local_irq_enable/disable can be used, which will enable + * interrupts rather than keeping them masked (unclear how + * much benefit this is over just replaying for all cases, + * because we immediately disable again, so all we're really + * doing is allowing hard interrupts to execute directly for + * a very small time, rather than being masked and replayed). + */ + if (irqs_enabled) { + local_irq_enable(); + local_irq_disable(); + } else { + replay_soft_interrupts(); + } + + return false; +} + /* * This should be called after a syscall returns, with r3 the return value * from the syscall. If this function returns non-zero, the system call @@ -209,6 +222,8 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, unsigned long ti_flags; unsigned long ret = 0; + CT_WARN_ON(ct_state() == CONTEXT_USER); + kuap_check_amr(); regs->result = r3; @@ -240,9 +255,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, ret |= _TIF_RESTOREALL; } +again: local_irq_disable(); -again: ti_flags = READ_ONCE(*ti_flagsp); while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); @@ -286,9 +301,14 @@ again: } } + user_enter_irqoff(); + /* scv need not set RI=0 because SRRs are not used */ - if (unlikely(!prep_irq_for_enabled_exit(!scv, true))) + if (unlikely(!__prep_irq_for_enabled_exit(!scv))) { + user_exit_irqoff(); + local_irq_enable(); goto again; + } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM local_paca->tm_scratch = regs->msr; -- cgit v1.2.3