From fed626db8bc240678fd0fa8078aa767ad21d47fd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 28 Feb 2023 13:57:47 +0100 Subject: s390/bp: add missing BPENTER to program check handler When leaving interpretive execution because of a program check BPENTER should be called like it is done on interrupt exit as well. Acked-by: Alexander Gordeev Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 76a06f3d3671..2af5d7602ae8 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -355,6 +355,7 @@ ENTRY(pgm_check_handler) #if IS_ENABLED(CONFIG_KVM) # cleanup critical section for program checks in __sie64a OUTSIDE %r9,.Lsie_gmap,.Lsie_done,1f + BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) SIEEXIT lghi %r10,_PIF_GUEST_FAULT #endif -- cgit v1.2.3 From f33f2d4c7c80c641f6ca3dfe5e7dfe1f91543780 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 28 Feb 2023 13:57:48 +0100 Subject: s390/bp: remove TIF_ISOLATE_BP TIF_ISOLATE_BP is unused since it was introduced with commit 6b73044b2b00 ("s390: run user space and KVM guests with modified branch prediction"). Given that there is no use case remove it again. Acked-by: Alexander Gordeev Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/processor.h | 1 - arch/s390/include/asm/thread_info.h | 2 -- arch/s390/kernel/entry.S | 36 +++++++++++++----------------------- arch/s390/kernel/processor.c | 9 --------- 4 files changed, 13 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index e98d9650764b..ddf3adde161c 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -329,7 +329,6 @@ static __always_inline void __noreturn disabled_wait(void) #define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL -extern int s390_isolate_bp(void); extern int s390_isolate_bp_guest(void); static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index b2ffcb4fe000..f19e6f5ec367 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -70,7 +70,6 @@ void arch_setup_new_exec(void); #define TIF_PATCH_PENDING 5 /* pending live patching update */ #define TIF_PGSTE 6 /* New mm's will use 4K page tables */ #define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ -#define TIF_ISOLATE_BP 8 /* Run process with isolated BP */ #define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */ #define TIF_PER_TRAP 10 /* Need to handle PER trap on exit to usermode */ @@ -94,7 +93,6 @@ void arch_setup_new_exec(void); #define _TIF_UPROBE BIT(TIF_UPROBE) #define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE) #define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING) -#define _TIF_ISOLATE_BP BIT(TIF_ISOLATE_BP) #define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST) #define _TIF_PER_TRAP BIT(TIF_PER_TRAP) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 2af5d7602ae8..d8890a6f5dee 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -227,7 +227,7 @@ ENTRY(__sie64a) TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lsie_skip # exit if fp/vx regs changed lg %r14,__SF_SIE_CONTROL_PHYS(%r15) # get sie block phys addr - BPEXIT __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) + BPEXIT __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST .Lsie_entry: sie 0(%r14) # Let the next instruction be NOP to avoid triggering a machine check @@ -235,7 +235,7 @@ ENTRY(__sie64a) nopr 7 .Lsie_leave: BPOFF - BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) + BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST .Lsie_skip: lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE @@ -291,11 +291,9 @@ ENTRY(system_call) .Lsysc_per: STBEAR __LC_LAST_BREAK lctlg %c1,%c1,__LC_KERNEL_ASCE - lg %r12,__LC_CURRENT lg %r15,__LC_KERNEL_STACK xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15) - BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP # clear user controlled register to prevent speculative use xgr %r0,%r0 xgr %r1,%r1 @@ -314,7 +312,7 @@ ENTRY(system_call) brasl %r14,__do_syscall lctlg %c1,%c1,__LC_USER_ASCE mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) - BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP + BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) stpt __LC_EXIT_TIMER @@ -329,7 +327,7 @@ ENTRY(ret_from_fork) brasl %r14,__ret_from_fork lctlg %c1,%c1,__LC_USER_ASCE mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) - BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP + BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) stpt __LC_EXIT_TIMER @@ -344,7 +342,6 @@ ENTRY(pgm_check_handler) stpt __LC_SYS_ENTER_TIMER BPOFF stmg %r8,%r15,__LC_SAVE_AREA_SYNC - lg %r12,__LC_CURRENT lghi %r10,0 lmg %r8,%r9,__LC_PGM_OLD_PSW tmhh %r8,0x0001 # coming from user space? @@ -355,7 +352,7 @@ ENTRY(pgm_check_handler) #if IS_ENABLED(CONFIG_KVM) # cleanup critical section for program checks in __sie64a OUTSIDE %r9,.Lsie_gmap,.Lsie_done,1f - BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) + BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT lghi %r10,_PIF_GUEST_FAULT #endif @@ -367,8 +364,7 @@ ENTRY(pgm_check_handler) aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) # CHECK_VMAP_STACK branches to stack_overflow or 4f CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f -3: BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP - lg %r15,__LC_KERNEL_STACK +3: lg %r15,__LC_KERNEL_STACK 4: la %r11,STACK_FRAME_OVERHEAD(%r15) stg %r10,__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) @@ -390,7 +386,7 @@ ENTRY(pgm_check_handler) tmhh %r8,0x0001 # returning to user space? jno .Lpgm_exit_kernel lctlg %c1,%c1,__LC_USER_ASCE - BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP + BPON stpt __LC_EXIT_TIMER .Lpgm_exit_kernel: mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) @@ -420,20 +416,18 @@ ENTRY(\name) STBEAR __LC_LAST_BREAK BPOFF stmg %r8,%r15,__LC_SAVE_AREA_ASYNC - lg %r12,__LC_CURRENT lmg %r8,%r9,\lc_old_psw tmhh %r8,0x0001 # interrupting from user ? jnz 1f #if IS_ENABLED(CONFIG_KVM) OUTSIDE %r9,.Lsie_gmap,.Lsie_done,0f - BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) + BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT #endif 0: CHECK_STACK __LC_SAVE_AREA_ASYNC aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f -1: BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP - lctlg %c1,%c1,__LC_KERNEL_ASCE +1: lctlg %c1,%c1,__LC_KERNEL_ASCE lg %r15,__LC_KERNEL_STACK 2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) la %r11,STACK_FRAME_OVERHEAD(%r15) @@ -457,7 +451,7 @@ ENTRY(\name) tmhh %r8,0x0001 # returning to user ? jno 2f lctlg %c1,%c1,__LC_USER_ASCE - BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP + BPON stpt __LC_EXIT_TIMER 2: LBEAR __PT_LAST_BREAK(%r11) lmg %r0,%r15,__PT_R0(%r11) @@ -502,7 +496,6 @@ ENTRY(mcck_int_handler) spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer LBEAR __LC_LAST_BREAK_SAVE_AREA-4095(%r1) # validate bear lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# validate gprs - lg %r12,__LC_CURRENT lmg %r8,%r9,__LC_MCK_OLD_PSW TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE jo .Lmcck_panic # yes -> rest of mcck code invalid @@ -531,16 +524,13 @@ ENTRY(mcck_int_handler) TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID jno .Lmcck_panic #if IS_ENABLED(CONFIG_KVM) - OUTSIDE %r9,.Lsie_gmap,.Lsie_done,.Lmcck_stack + OUTSIDE %r9,.Lsie_gmap,.Lsie_done,.Lmcck_user OUTSIDE %r9,.Lsie_entry,.Lsie_leave,4f oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST -4: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) +4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT - j .Lmcck_stack #endif .Lmcck_user: - BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP -.Lmcck_stack: lg %r15,__LC_MCCK_STACK la %r11,STACK_FRAME_OVERHEAD(%r15) stctg %c1,%c1,__PT_CR1(%r11) @@ -568,7 +558,7 @@ ENTRY(mcck_int_handler) mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? jno 0f - BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP + BPON stpt __LC_EXIT_TIMER 0: ALTERNATIVE "nop", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193 LBEAR 0(%r12) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index a194611ba88c..7e2878c9a036 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -365,15 +365,6 @@ const struct seq_operations cpuinfo_op = { .show = show_cpuinfo, }; -int s390_isolate_bp(void) -{ - if (!test_facility(82)) - return -EOPNOTSUPP; - set_thread_flag(TIF_ISOLATE_BP); - return 0; -} -EXPORT_SYMBOL(s390_isolate_bp); - int s390_isolate_bp_guest(void) { if (!test_facility(82)) -- cgit v1.2.3 From 9b63fd2fc8526e51444de35598927e406b1bb26f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 28 Feb 2023 13:57:49 +0100 Subject: s390/bp: remove s390_isolate_bp_guest() s390_isolate_bp_guest() is unused. Remove it. Reviewed-by: Alexander Gordeev Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/processor.h | 2 -- arch/s390/kernel/processor.c | 9 --------- 2 files changed, 11 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index ddf3adde161c..dd95fddd5dda 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -329,8 +329,6 @@ static __always_inline void __noreturn disabled_wait(void) #define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL -extern int s390_isolate_bp_guest(void); - static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) { return arch_irqs_disabled_flags(regs->psw.mask); diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 7e2878c9a036..0a999c8226d7 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -364,12 +364,3 @@ const struct seq_operations cpuinfo_op = { .stop = c_stop, .show = show_cpuinfo, }; - -int s390_isolate_bp_guest(void) -{ - if (!test_facility(82)) - return -EOPNOTSUPP; - set_thread_flag(TIF_ISOLATE_BP_GUEST); - return 0; -} -EXPORT_SYMBOL(s390_isolate_bp_guest); -- cgit v1.2.3 From 69a407bf81a8d70d51e1b7787da577e67447c9ea Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 28 Feb 2023 13:57:50 +0100 Subject: s390/bp: remove __bpon() There is no point in changing branch prediction state of a cpu shortly before it enters stop state. Therefore remove __bpon(). Acked-by: Alexander Gordeev Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/processor.h | 1 - arch/s390/kernel/entry.S | 18 ++++++------------ arch/s390/kernel/ipl.c | 1 - arch/s390/kernel/smp.c | 2 -- 4 files changed, 6 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index dd95fddd5dda..80ac0c1034dc 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -99,7 +99,6 @@ void cpu_detect_mhz_feature(void); extern const struct seq_operations cpuinfo_op; extern void execve_tail(void); -extern void __bpon(void); unsigned long vdso_size(void); /* diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index d8890a6f5dee..58b85aedca22 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -159,21 +159,15 @@ _LPP_OFFSET = __LC_LPP .section .kprobes.text, "ax" .Ldummy: /* - * This nop exists only in order to avoid that __bpon starts at - * the beginning of the kprobes text section. In that case we would - * have several symbols at the same address. E.g. objdump would take - * an arbitrary symbol name when disassembling this code. - * With the added nop in between the __bpon symbol is unique - * again. + * The following nop exists only in order to avoid that the next + * symbol starts at the beginning of the kprobes text section. + * In that case there would be several symbols at the same address. + * E.g. objdump would take an arbitrary symbol when disassembling + * the code. + * With the added nop in between this cannot happen. */ nop 0 -ENTRY(__bpon) - .globl __bpon - BPON - BR_EX %r14 -ENDPROC(__bpon) - /* * Scheduler resume function, called by switch_to * gpr2 = (task_struct *) prev diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 5f0f5c86963a..0f91cd401eef 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -649,7 +649,6 @@ static struct kset *ipl_kset; static void __ipl_run(void *unused) { - __bpon(); diag308(DIAG308_LOAD_CLEAR, NULL); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index d4888453bbf8..0126c5f6b904 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -348,7 +348,6 @@ static void pcpu_delegate(struct pcpu *pcpu, abs_lc->restart_source = source_cpu; put_abs_lowcore(abs_lc); } - __bpon(); asm volatile( "0: sigp 0,%0,%2 # sigp restart to target cpu\n" " brc 2,0b # busy, try again\n" @@ -986,7 +985,6 @@ void __cpu_die(unsigned int cpu) void __noreturn cpu_die(void) { idle_task_exit(); - __bpon(); pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0); for (;;) ; } -- cgit v1.2.3 From fb77914a692d550a5bb0c7f71eac40e6da9c0e6d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Mar 2023 00:55:33 +0000 Subject: s390: trim ancient junk from copy_thread() Setting and ->psw.addr in childregs of kernel thread is a rudiment of the old kernel_thread()/kernel_execve() implementation. Mainline hadn't been using them since 2012. And clarify the assignments to frame->sf.gprs - the array stores grp6..gpr15 values to be set by __switch_to(), so frame->sf.gprs[5] actually affects grp11, etc. Better spell that as frame->sf.gprs[11 - 6]... Signed-off-by: Al Viro Link: https://lore.kernel.org/r/ZAU6BYFisE8evmYf@ZenIV Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/process.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 67df64ef4839..87ca3a727604 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -136,12 +136,12 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.last_break = 1; frame->sf.back_chain = 0; - frame->sf.gprs[5] = (unsigned long)frame + sizeof(struct stack_frame); - frame->sf.gprs[6] = (unsigned long)p; + frame->sf.gprs[11 - 6] = (unsigned long)&frame->childregs; + frame->sf.gprs[12 - 6] = (unsigned long)p; /* new return point is ret_from_fork */ - frame->sf.gprs[8] = (unsigned long)ret_from_fork; + frame->sf.gprs[14 - 6] = (unsigned long)ret_from_fork; /* fake return stack for resume(), don't go back to schedule */ - frame->sf.gprs[9] = (unsigned long)frame; + frame->sf.gprs[15 - 6] = (unsigned long)frame; /* Store access registers to kernel stack of new process. */ if (unlikely(args->fn)) { @@ -149,8 +149,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) memset(&frame->childregs, 0, sizeof(struct pt_regs)); frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; - frame->childregs.psw.addr = - (unsigned long)__ret_from_fork; frame->childregs.gprs[9] = (unsigned long)args->fn; frame->childregs.gprs[10] = (unsigned long)args->fn_arg; frame->childregs.orig_gpr2 = -1; -- cgit v1.2.3 From 029a4f4b9555ddb47f38f9589fd6d43277efb9c6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 9 Mar 2023 20:12:28 +0100 Subject: s390/setup: always inline gen_lpswe() gen_lpswe() contains a BUILD_BUG_ON() statement which depends on a function parameter. If the compiler decides to generate a not inlined function this will lead to a build error, even if all call sites pass a valid parameter. To avoid this always inline gen_lpswe(). Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/setup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 3a1f8825bc7d..5271bb278cfa 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -164,7 +164,7 @@ struct oldmem_data { }; extern struct oldmem_data oldmem_data; -static inline u32 gen_lpswe(unsigned long addr) +static __always_inline u32 gen_lpswe(unsigned long addr) { BUILD_BUG_ON(addr > 0xfff); return 0xb2b20000 | addr; -- cgit v1.2.3 From ae83707f39e1b8f2a15af3868bba25ce4fa93364 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 6 Mar 2023 16:10:12 +0100 Subject: s390/pci: only add specific device in zpci_bus_scan_device() As the name suggests zpci_bus_scan_device() is used to scan a specific device and thus pci_bus_add_device() for that device is sufficient. Furthermore move this call inside the rescan/remove locking. Suggested-by: Bjorn Helgaas Signed-off-by: Niklas Schnelle Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20230306151014.60913-3-schnelle@linux.ibm.com Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci_bus.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 6a8da1b742ae..465399dc4a53 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -87,9 +87,8 @@ int zpci_bus_scan_device(struct zpci_dev *zdev) if (!pdev) return -ENODEV; - pci_bus_add_device(pdev); pci_lock_rescan_remove(); - pci_bus_add_devices(zdev->zbus->bus); + pci_bus_add_device(pdev); pci_unlock_rescan_remove(); return 0; -- cgit v1.2.3 From b881208dcdae311734f172d61cf9ae6fcb108389 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 6 Mar 2023 16:10:13 +0100 Subject: s390/pci: remove redundant pci_bus_add_devices() on new bus The pci_bus_add_devices() call in zpci_bus_create_pci_bus() is without function since at this point no device could have been added to the freshly created PCI bus. Suggested-by: Bjorn Helgaas Signed-off-by: Niklas Schnelle Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20230306151014.60913-4-schnelle@linux.ibm.com Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci_bus.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 465399dc4a53..df2e27fbd376 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -212,7 +212,6 @@ static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *fr, s } zbus->bus = bus; - pci_bus_add_devices(bus); return 0; } -- cgit v1.2.3 From 45e5f0c017e0d0dabb952db63428e31eca6be87e Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 6 Mar 2023 16:10:14 +0100 Subject: s390/pci: clean up left over special treatment for function zero Prior to commit 960ac3626487 ("s390/pci: allow zPCI zbus without a function zero") enabling and scanning a PCI function had to potentially be postponed until the function with devfn zero on that bus was plugged. While the commit removed the waiting itself extra code to scan all functions on the PCI bus once function zero appeared was missed. Remove that code and the outdated comments about waiting for function zero. Signed-off-by: Niklas Schnelle Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20230306151014.60913-5-schnelle@linux.ibm.com Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci.c | 23 +++-------------------- arch/s390/pci/pci_bus.c | 7 ++----- 2 files changed, 5 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index ef38b1514c77..ed6ff20dd0f6 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -870,32 +870,15 @@ bool zpci_is_device_configured(struct zpci_dev *zdev) * @fh: The general function handle supplied by the platform * * Given a device in the configuration state Configured, enables, scans and - * adds it to the common code PCI subsystem if possible. If the PCI device is - * parked because we can not yet create a PCI bus because we have not seen - * function 0, it is ignored but will be scanned once function 0 appears. - * If any failure occurs, the zpci_dev is left disabled. + * adds it to the common code PCI subsystem if possible. If any failure occurs, + * the zpci_dev is left disabled. * * Return: 0 on success, or an error code otherwise */ int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh) { - int rc; - zpci_update_fh(zdev, fh); - /* the PCI function will be scanned once function 0 appears */ - if (!zdev->zbus->bus) - return 0; - - /* For function 0 on a multi-function bus scan whole bus as we might - * have to pick up existing functions waiting for it to allow creating - * the PCI bus - */ - if (zdev->devfn == 0 && zdev->zbus->multifunction) - rc = zpci_bus_scan_bus(zdev->zbus); - else - rc = zpci_bus_scan_device(zdev); - - return rc; + return zpci_bus_scan_device(zdev); } /** diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index df2e27fbd376..a9875feca293 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -131,11 +131,8 @@ void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error) * @zbus: the zbus to be scanned * * Enables and scans all PCI functions on the bus making them available to the - * common PCI code. If there is no function 0 on the zbus nothing is scanned. If - * a function does not have a slot yet because it was added to the zbus before - * function 0 the slot is created. If a PCI function fails to be initialized - * an error will be returned but attempts will still be made for all other - * functions on the bus. + * common PCI code. If a PCI function fails to be initialized an error will be + * returned but attempts will still be made for all other functions on the bus. * * Return: 0 on success, an error value otherwise */ -- cgit v1.2.3 From 0599331c3da6dbbe814262079f6b0c4f3575fd5d Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 15:45:20 -0800 Subject: s390: simplify one-level sysctl registration for topology_ctl_table There is no need to declare an extra tables to just create directory, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20230310234525.3986352-2-mcgrof@kernel.org Reviewed-by: Vasily Gorbik Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/kernel/topology.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index c6eecd4a5302..e5d6a1c25d13 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -637,16 +637,6 @@ static struct ctl_table topology_ctl_table[] = { { }, }; -static struct ctl_table topology_dir_table[] = { - { - .procname = "s390", - .maxlen = 0, - .mode = 0555, - .child = topology_ctl_table, - }, - { }, -}; - static int __init topology_init(void) { timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE); @@ -654,7 +644,7 @@ static int __init topology_init(void) set_topology_timer(); else topology_update_polarization_simple(); - register_sysctl_table(topology_dir_table); + register_sysctl("s390", topology_ctl_table); return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); } device_initcall(topology_init); -- cgit v1.2.3 From 751e24071c2e408f4b1781327756700ee4c941ab Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 15:45:21 -0800 Subject: s390: simplify one-level syctl registration for s390dbf_table There is no need to declare an extra tables to just create directory, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20230310234525.3986352-3-mcgrof@kernel.org Reviewed-by: Vasily Gorbik Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/kernel/debug.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index b376f0377a2c..221c865785c2 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -981,16 +981,6 @@ static struct ctl_table s390dbf_table[] = { { } }; -static struct ctl_table s390dbf_dir_table[] = { - { - .procname = "s390dbf", - .maxlen = 0, - .mode = S_IRUGO | S_IXUGO, - .child = s390dbf_table, - }, - { } -}; - static struct ctl_table_header *s390dbf_sysctl_header; /** @@ -1574,7 +1564,7 @@ out: */ static int __init debug_init(void) { - s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table); + s390dbf_sysctl_header = register_sysctl("s390dbf", s390dbf_table); mutex_lock(&debug_mutex); debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT, NULL); initialized = 1; -- cgit v1.2.3 From 71cb8c00a2eb74b6c61374366fef4fd402faa692 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 15:45:22 -0800 Subject: s390: simplify one-level sysctl registration for appldata_table There is no need to declare an extra tables to just create directory, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20230310234525.3986352-4-mcgrof@kernel.org Reviewed-by: Vasily Gorbik Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/appldata/appldata_base.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index c0fd29133f27..c593f2228083 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -66,16 +66,6 @@ static struct ctl_table appldata_table[] = { { }, }; -static struct ctl_table appldata_dir_table[] = { - { - .procname = appldata_proc_name, - .maxlen = 0, - .mode = S_IRUGO | S_IXUGO, - .child = appldata_table, - }, - { }, -}; - /* * Timer */ @@ -422,7 +412,7 @@ static int __init appldata_init(void) appldata_wq = alloc_ordered_workqueue("appldata", 0); if (!appldata_wq) return -ENOMEM; - appldata_sysctl_header = register_sysctl_table(appldata_dir_table); + appldata_sysctl_header = register_sysctl(appldata_proc_name, appldata_table); return 0; } -- cgit v1.2.3 From 414b2a960e4373765987bdaeaafbccde6b6eb27b Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 15:45:23 -0800 Subject: s390: simplify one level sysctl registration for cmm_table There is no need to declare an extra tables to just create directory, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20230310234525.3986352-5-mcgrof@kernel.org Reviewed-by: Vasily Gorbik Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/mm/cmm.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 9141ed4c52e9..5300c6867d5e 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -335,16 +335,6 @@ static struct ctl_table cmm_table[] = { { } }; -static struct ctl_table cmm_dir_table[] = { - { - .procname = "vm", - .maxlen = 0, - .mode = 0555, - .child = cmm_table, - }, - { } -}; - #ifdef CONFIG_CMM_IUCV #define SMSG_PREFIX "CMM" static void cmm_smsg_target(const char *from, char *msg) @@ -389,7 +379,7 @@ static int __init cmm_init(void) { int rc = -ENOMEM; - cmm_sysctl_header = register_sysctl_table(cmm_dir_table); + cmm_sysctl_header = register_sysctl("vm", cmm_table); if (!cmm_sysctl_header) goto out_sysctl; #ifdef CONFIG_CMM_IUCV -- cgit v1.2.3 From 7ddc873dcb1d05eaafceeb4a2afd53bbb31addb8 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 15:45:24 -0800 Subject: s390: simplify one-level sysctl registration for page_table_sysctl There is no need to declare an extra tables to just create directory, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20230310234525.3986352-6-mcgrof@kernel.org Reviewed-by: Vasily Gorbik Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/mm/pgalloc.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 2de48b2c1b04..0f68b7257e08 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -33,19 +33,9 @@ static struct ctl_table page_table_sysctl[] = { { } }; -static struct ctl_table page_table_sysctl_dir[] = { - { - .procname = "vm", - .maxlen = 0, - .mode = 0555, - .child = page_table_sysctl, - }, - { } -}; - static int __init page_table_register_sysctl(void) { - return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM; + return register_sysctl("vm", page_table_sysctl) ? 0 : -ENOMEM; } __initcall(page_table_register_sysctl); -- cgit v1.2.3 From 7db12246306ea601809f22b26d7c2093dd80e146 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 15:45:25 -0800 Subject: s390: simplify dynamic sysctl registration for appldata_register_ops The routine appldata_register_ops() allocates a sysctl table with 4 entries. The firsts one, ops->ctl_table[0] is the parent directory with an empty entry following it, ops->ctl_table[1]. The next entry is for the ops->name and that is ops->ctl_table[2]. It needs an empty entry following that, and that is ops->ctl_table[3]. And so hence the kcalloc(4, sizeof(struct ctl_table), GFP_KERNEL). We can simplify this considerably since sysctl_register("foo", table) can create the parent directory for us if it does not exist. So we can just remove the first two entries and move back the ops->name to the first entry, and just use kcalloc(2, ...). [gor@linux.ibm.com: appldata_generic_handler fixup ctl_table index 2->0] Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20230310234525.3986352-7-mcgrof@kernel.org Reviewed-by: Vasily Gorbik Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/appldata/appldata_base.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index c593f2228083..b07b0610950e 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -281,7 +281,7 @@ appldata_generic_handler(struct ctl_table *ctl, int write, mutex_lock(&appldata_ops_mutex); list_for_each(lh, &appldata_ops_list) { tmp_ops = list_entry(lh, struct appldata_ops, list); - if (&tmp_ops->ctl_table[2] == ctl) { + if (&tmp_ops->ctl_table[0] == ctl) { found = 1; } } @@ -351,7 +351,8 @@ int appldata_register_ops(struct appldata_ops *ops) if (ops->size > APPLDATA_MAX_REC_SIZE) return -EINVAL; - ops->ctl_table = kcalloc(4, sizeof(struct ctl_table), GFP_KERNEL); + /* The last entry must be an empty one */ + ops->ctl_table = kcalloc(2, sizeof(struct ctl_table), GFP_KERNEL); if (!ops->ctl_table) return -ENOMEM; @@ -359,17 +360,12 @@ int appldata_register_ops(struct appldata_ops *ops) list_add(&ops->list, &appldata_ops_list); mutex_unlock(&appldata_ops_mutex); - ops->ctl_table[0].procname = appldata_proc_name; - ops->ctl_table[0].maxlen = 0; - ops->ctl_table[0].mode = S_IRUGO | S_IXUGO; - ops->ctl_table[0].child = &ops->ctl_table[2]; + ops->ctl_table[0].procname = ops->name; + ops->ctl_table[0].mode = S_IRUGO | S_IWUSR; + ops->ctl_table[0].proc_handler = appldata_generic_handler; + ops->ctl_table[0].data = ops; - ops->ctl_table[2].procname = ops->name; - ops->ctl_table[2].mode = S_IRUGO | S_IWUSR; - ops->ctl_table[2].proc_handler = appldata_generic_handler; - ops->ctl_table[2].data = ops; - - ops->sysctl_header = register_sysctl_table(ops->ctl_table); + ops->sysctl_header = register_sysctl(appldata_proc_name, ops->ctl_table); if (!ops->sysctl_header) goto out; return 0; -- cgit v1.2.3 From 7229ea86e0a0ed117bbc9d1677003c0bb0a5d40e Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 10 Mar 2023 03:36:48 +0100 Subject: s390/dumpstack: resolve userspace last_break report_user_fault() currently does not show which library last_break points to. Call print_vma_addr() to find out; the output now looks like this: Last Breaking-Event-Address: [<000003ffaa2a56e4>] libc.so.6[3ffaa180000+251000] For kernel it's unchanged: Last Breaking-Event-Address: [<000000000030fd06>] trace_hardirqs_on+0x56/0xc8 Signed-off-by: Ilya Leoshkevich Acked-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/dumpstack.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 1e3233eb510a..f257058d0828 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -152,7 +152,13 @@ void show_stack(struct task_struct *task, unsigned long *stack, static void show_last_breaking_event(struct pt_regs *regs) { printk("Last Breaking-Event-Address:\n"); - printk(" [<%016lx>] %pSR\n", regs->last_break, (void *)regs->last_break); + printk(" [<%016lx>] ", regs->last_break); + if (user_mode(regs)) { + print_vma_addr(KERN_CONT, regs->last_break); + pr_cont("\n"); + } else { + pr_cont("%pSR\n", (void *)regs->last_break); + } } void show_registers(struct pt_regs *regs) -- cgit v1.2.3 From 53fcc7dbf17691d8eac382ee315970a75286dd4b Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 15 Feb 2023 14:27:45 +0100 Subject: s390/boot: remove non-functioning image bootable check check_image_bootable() has been introduced with commit 627c9b62058e ("s390/boot: block uncompressed vmlinux booting attempts") to make sure that users don't try to boot uncompressed vmlinux ELF image in qemu. It used to be possible quite some time ago. That commit prevented confusion with uncompressed vmlinux image starting to boot and even printing kernel messages until it crashed. Users might have tried to report the problem without realizing they are doing something which was not intended. Since commit f1d3c5323772 ("s390/boot: move sclp early buffer from fixed address in asm to C") check_image_bootable() doesn't function properly anymore, as well as booting uncompressed vmlinux image in qemu doesn't really produce any output and crashes. Moving forward it doesn't make sense to fix check_image_bootable() anymore, so simply remove it. Acked-by: Alexander Gordeev Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/boot/startup.c | 1 - arch/s390/include/asm/setup.h | 2 -- arch/s390/kernel/early.c | 14 -------------- 3 files changed, 17 deletions(-) (limited to 'arch') diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 11413f0baabc..16ee3469f744 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -29,7 +29,6 @@ unsigned long __bootdata_preserved(vmemmap_size); unsigned long __bootdata_preserved(MODULES_VADDR); unsigned long __bootdata_preserved(MODULES_END); unsigned long __bootdata(ident_map_size); -int __bootdata(is_full_image) = 1; struct initrd_data __bootdata(initrd_data); u64 __bootdata_preserved(stfle_fac_list[16]); diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 3a1f8825bc7d..fc887e3e76f8 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -150,8 +150,6 @@ static inline unsigned long kaslr_offset(void) return __kaslr_offset; } -extern int is_full_image; - struct initrd_data { unsigned long start; unsigned long size; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index d26f02495636..8225a4c1f2e2 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -34,8 +34,6 @@ #include #include "entry.h" -int __bootdata(is_full_image); - #define decompressor_handled_param(param) \ static int __init ignore_decompressor_param_##param(char *s) \ { \ @@ -288,17 +286,6 @@ static void __init setup_boot_command_line(void) strscpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE); } -static void __init check_image_bootable(void) -{ - if (is_full_image) - return; - - sclp_early_printk("Linux kernel boot failure: An attempt to boot a vmlinux ELF image failed.\n"); - sclp_early_printk("This image does not contain all parts necessary for starting up. Use\n"); - sclp_early_printk("bzImage or arch/s390/boot/compressed/vmlinux instead.\n"); - disabled_wait(); -} - static void __init sort_amode31_extable(void) { sort_extable(__start_amode31_ex_table, __stop_amode31_ex_table); @@ -307,7 +294,6 @@ static void __init sort_amode31_extable(void) void __init startup_init(void) { reset_tod_clock(); - check_image_bootable(); time_early_init(); init_kernel_storage_key(); lockdep_off(); -- cgit v1.2.3 From 8c37cb7d4ffcc827a9484282691b018715a5ae1a Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 8 Feb 2023 18:11:25 +0100 Subject: s390/boot: rename mem_detect to physmem_info In preparation to extending mem_detect with additional information like reserved ranges rename it to more generic physmem_info. This new naming also help to avoid confusion by using more exact terms like "physmem online ranges", etc. Acked-by: Heiko Carstens Reviewed-by: Alexander Gordeev Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/boot/Makefile | 2 +- arch/s390/boot/boot.h | 2 +- arch/s390/boot/kaslr.c | 14 +-- arch/s390/boot/mem_detect.c | 191 ----------------------------------- arch/s390/boot/physmem_info.c | 191 +++++++++++++++++++++++++++++++++++ arch/s390/boot/startup.c | 6 +- arch/s390/boot/vmem.c | 6 +- arch/s390/include/asm/mem_detect.h | 117 --------------------- arch/s390/include/asm/physmem_info.h | 118 ++++++++++++++++++++++ arch/s390/kernel/setup.c | 28 ++--- arch/s390/mm/kasan_init.c | 6 +- drivers/s390/char/sclp_early_core.c | 8 +- 12 files changed, 345 insertions(+), 344 deletions(-) delete mode 100644 arch/s390/boot/mem_detect.c create mode 100644 arch/s390/boot/physmem_info.c delete mode 100644 arch/s390/include/asm/mem_detect.h create mode 100644 arch/s390/include/asm/physmem_info.h (limited to 'arch') diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index cebd4ca16916..c7c81e5f9218 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -35,7 +35,7 @@ endif CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char -obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o vmem.o +obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o obj-y += version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 58ce701d6110..d39895d5796e 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -34,7 +34,7 @@ struct vmlinux_info { void startup_kernel(void); unsigned long detect_memory(unsigned long *safe_addr); -void mem_detect_set_usable_limit(unsigned long limit); +void physmem_set_usable_limit(unsigned long limit); bool is_ipl_block_dump(void); void store_ipl_parmblock(void); unsigned long read_ipl_report(unsigned long safe_addr); diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index 3e3d846400b4..22b7c5d8e94a 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -3,7 +3,7 @@ * Copyright IBM Corp. 2019 */ #include -#include +#include #include #include #include @@ -93,7 +93,7 @@ static int get_random(unsigned long limit, unsigned long *value) /* * To randomize kernel base address we have to consider several facts: - * 1. physical online memory might not be continuous and have holes. mem_detect + * 1. physical online memory might not be continuous and have holes. physmem * info contains list of online memory ranges we should consider. * 2. we have several memory regions which are occupied and we should not * overlap and destroy them. Currently safe_addr tells us the border below @@ -108,7 +108,7 @@ static int get_random(unsigned long limit, unsigned long *value) * (16 pages when the kernel is built with kasan enabled) * Assumptions: * 1. kernel size (including .bss size) and upper memory limit are page aligned. - * 2. mem_detect memory region start is THREAD_SIZE aligned / end is PAGE_SIZE + * 2. physmem online region start is THREAD_SIZE aligned / end is PAGE_SIZE * aligned (in practice memory configurations granularity on z/VM and LPAR * is 1mb). * @@ -132,7 +132,7 @@ static unsigned long count_valid_kernel_positions(unsigned long kernel_size, unsigned long start, end, pos = 0; int i; - for_each_mem_detect_usable_block(i, &start, &end) { + for_each_physmem_usable_range(i, &start, &end) { if (_min >= end) continue; if (start >= _max) @@ -153,7 +153,7 @@ static unsigned long position_to_address(unsigned long pos, unsigned long kernel unsigned long start, end; int i; - for_each_mem_detect_usable_block(i, &start, &end) { + for_each_physmem_usable_range(i, &start, &end) { if (_min >= end) continue; if (start >= _max) @@ -172,8 +172,8 @@ static unsigned long position_to_address(unsigned long pos, unsigned long kernel unsigned long get_random_base(unsigned long safe_addr) { - unsigned long usable_total = get_mem_detect_usable_total(); - unsigned long memory_limit = get_mem_detect_end(); + unsigned long usable_total = get_physmem_usable_total(); + unsigned long memory_limit = get_physmem_usable_end(); unsigned long base_pos, max_pos, kernel_size; int i; diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c deleted file mode 100644 index 35f4ba11f7fd..000000000000 --- a/arch/s390/boot/mem_detect.c +++ /dev/null @@ -1,191 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include -#include "decompressor.h" -#include "boot.h" - -struct mem_detect_info __bootdata(mem_detect); - -/* up to 256 storage elements, 1020 subincrements each */ -#define ENTRIES_EXTENDED_MAX \ - (256 * (1020 / 2) * sizeof(struct mem_detect_block)) - -static struct mem_detect_block *__get_mem_detect_block_ptr(u32 n) -{ - if (n < MEM_INLINED_ENTRIES) - return &mem_detect.entries[n]; - return &mem_detect.entries_extended[n - MEM_INLINED_ENTRIES]; -} - -/* - * sequential calls to add_mem_detect_block with adjacent memory areas - * are merged together into single memory block. - */ -void add_mem_detect_block(u64 start, u64 end) -{ - struct mem_detect_block *block; - - if (mem_detect.count) { - block = __get_mem_detect_block_ptr(mem_detect.count - 1); - if (block->end == start) { - block->end = end; - return; - } - } - - block = __get_mem_detect_block_ptr(mem_detect.count); - block->start = start; - block->end = end; - mem_detect.count++; -} - -static int __diag260(unsigned long rx1, unsigned long rx2) -{ - unsigned long reg1, reg2, ry; - union register_pair rx; - psw_t old; - int rc; - - rx.even = rx1; - rx.odd = rx2; - ry = 0x10; /* storage configuration */ - rc = -1; /* fail */ - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" - " diag %[rx],%[ry],0x260\n" - " ipm %[rc]\n" - " srl %[rc],28\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" - : [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - [rc] "+&d" (rc), - [ry] "+&d" (ry), - "+Q" (S390_lowcore.program_new_psw), - "=Q" (old) - : [rx] "d" (rx.pair), - [psw_old] "a" (&old), - [psw_pgm] "a" (&S390_lowcore.program_new_psw) - : "cc", "memory"); - return rc == 0 ? ry : -1; -} - -static int diag260(void) -{ - int rc, i; - - struct { - unsigned long start; - unsigned long end; - } storage_extents[8] __aligned(16); /* VM supports up to 8 extends */ - - memset(storage_extents, 0, sizeof(storage_extents)); - rc = __diag260((unsigned long)storage_extents, sizeof(storage_extents)); - if (rc == -1) - return -1; - - for (i = 0; i < min_t(int, rc, ARRAY_SIZE(storage_extents)); i++) - add_mem_detect_block(storage_extents[i].start, storage_extents[i].end + 1); - return 0; -} - -static int tprot(unsigned long addr) -{ - unsigned long reg1, reg2; - int rc = -EFAULT; - psw_t old; - - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" - " tprot 0(%[addr]),0\n" - " ipm %[rc]\n" - " srl %[rc],28\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" - : [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - [rc] "+&d" (rc), - "=Q" (S390_lowcore.program_new_psw.addr), - "=Q" (old) - : [psw_old] "a" (&old), - [psw_pgm] "a" (&S390_lowcore.program_new_psw), - [addr] "a" (addr) - : "cc", "memory"); - return rc; -} - -static unsigned long search_mem_end(void) -{ - unsigned long range = 1 << (MAX_PHYSMEM_BITS - 20); /* in 1MB blocks */ - unsigned long offset = 0; - unsigned long pivot; - - while (range > 1) { - range >>= 1; - pivot = offset + range; - if (!tprot(pivot << 20)) - offset = pivot; - } - return (offset + 1) << 20; -} - -unsigned long detect_memory(unsigned long *safe_addr) -{ - unsigned long max_physmem_end = 0; - - sclp_early_get_memsize(&max_physmem_end); - mem_detect.entries_extended = (struct mem_detect_block *)ALIGN(*safe_addr, sizeof(u64)); - - if (!sclp_early_read_storage_info()) { - mem_detect.info_source = MEM_DETECT_SCLP_STOR_INFO; - } else if (!diag260()) { - mem_detect.info_source = MEM_DETECT_DIAG260; - max_physmem_end = max_physmem_end ?: get_mem_detect_end(); - } else if (max_physmem_end) { - add_mem_detect_block(0, max_physmem_end); - mem_detect.info_source = MEM_DETECT_SCLP_READ_INFO; - } else { - max_physmem_end = search_mem_end(); - add_mem_detect_block(0, max_physmem_end); - mem_detect.info_source = MEM_DETECT_BIN_SEARCH; - } - - if (mem_detect.count > MEM_INLINED_ENTRIES) { - *safe_addr += (mem_detect.count - MEM_INLINED_ENTRIES) * - sizeof(struct mem_detect_block); - } - - return max_physmem_end; -} - -void mem_detect_set_usable_limit(unsigned long limit) -{ - struct mem_detect_block *block; - int i; - - /* make sure mem_detect.usable ends up within online memory block */ - for (i = 0; i < mem_detect.count; i++) { - block = __get_mem_detect_block_ptr(i); - if (block->start >= limit) - break; - if (block->end >= limit) { - mem_detect.usable = limit; - break; - } - mem_detect.usable = block->end; - } -} diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c new file mode 100644 index 000000000000..dc2e4d0abfab --- /dev/null +++ b/arch/s390/boot/physmem_info.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include "decompressor.h" +#include "boot.h" + +struct physmem_info __bootdata(physmem_info); + +/* up to 256 storage elements, 1020 subincrements each */ +#define ENTRIES_EXTENDED_MAX \ + (256 * (1020 / 2) * sizeof(struct physmem_range)) + +static struct physmem_range *__get_physmem_range_ptr(u32 n) +{ + if (n < MEM_INLINED_ENTRIES) + return &physmem_info.online[n]; + return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES]; +} + +/* + * sequential calls to add_physmem_online_range with adjacent memory ranges + * are merged together into single memory range. + */ +void add_physmem_online_range(u64 start, u64 end) +{ + struct physmem_range *range; + + if (physmem_info.range_count) { + range = __get_physmem_range_ptr(physmem_info.range_count - 1); + if (range->end == start) { + range->end = end; + return; + } + } + + range = __get_physmem_range_ptr(physmem_info.range_count); + range->start = start; + range->end = end; + physmem_info.range_count++; +} + +static int __diag260(unsigned long rx1, unsigned long rx2) +{ + unsigned long reg1, reg2, ry; + union register_pair rx; + psw_t old; + int rc; + + rx.even = rx1; + rx.odd = rx2; + ry = 0x10; /* storage configuration */ + rc = -1; /* fail */ + asm volatile( + " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" + " epsw %[reg1],%[reg2]\n" + " st %[reg1],0(%[psw_pgm])\n" + " st %[reg2],4(%[psw_pgm])\n" + " larl %[reg1],1f\n" + " stg %[reg1],8(%[psw_pgm])\n" + " diag %[rx],%[ry],0x260\n" + " ipm %[rc]\n" + " srl %[rc],28\n" + "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" + : [reg1] "=&d" (reg1), + [reg2] "=&a" (reg2), + [rc] "+&d" (rc), + [ry] "+&d" (ry), + "+Q" (S390_lowcore.program_new_psw), + "=Q" (old) + : [rx] "d" (rx.pair), + [psw_old] "a" (&old), + [psw_pgm] "a" (&S390_lowcore.program_new_psw) + : "cc", "memory"); + return rc == 0 ? ry : -1; +} + +static int diag260(void) +{ + int rc, i; + + struct { + unsigned long start; + unsigned long end; + } storage_extents[8] __aligned(16); /* VM supports up to 8 extends */ + + memset(storage_extents, 0, sizeof(storage_extents)); + rc = __diag260((unsigned long)storage_extents, sizeof(storage_extents)); + if (rc == -1) + return -1; + + for (i = 0; i < min_t(int, rc, ARRAY_SIZE(storage_extents)); i++) + add_physmem_online_range(storage_extents[i].start, storage_extents[i].end + 1); + return 0; +} + +static int tprot(unsigned long addr) +{ + unsigned long reg1, reg2; + int rc = -EFAULT; + psw_t old; + + asm volatile( + " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" + " epsw %[reg1],%[reg2]\n" + " st %[reg1],0(%[psw_pgm])\n" + " st %[reg2],4(%[psw_pgm])\n" + " larl %[reg1],1f\n" + " stg %[reg1],8(%[psw_pgm])\n" + " tprot 0(%[addr]),0\n" + " ipm %[rc]\n" + " srl %[rc],28\n" + "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" + : [reg1] "=&d" (reg1), + [reg2] "=&a" (reg2), + [rc] "+&d" (rc), + "=Q" (S390_lowcore.program_new_psw.addr), + "=Q" (old) + : [psw_old] "a" (&old), + [psw_pgm] "a" (&S390_lowcore.program_new_psw), + [addr] "a" (addr) + : "cc", "memory"); + return rc; +} + +static unsigned long search_mem_end(void) +{ + unsigned long range = 1 << (MAX_PHYSMEM_BITS - 20); /* in 1MB blocks */ + unsigned long offset = 0; + unsigned long pivot; + + while (range > 1) { + range >>= 1; + pivot = offset + range; + if (!tprot(pivot << 20)) + offset = pivot; + } + return (offset + 1) << 20; +} + +unsigned long detect_memory(unsigned long *safe_addr) +{ + unsigned long max_physmem_end = 0; + + sclp_early_get_memsize(&max_physmem_end); + physmem_info.online_extended = (struct physmem_range *)ALIGN(*safe_addr, sizeof(u64)); + + if (!sclp_early_read_storage_info()) { + physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO; + } else if (!diag260()) { + physmem_info.info_source = MEM_DETECT_DIAG260; + max_physmem_end = max_physmem_end ?: get_physmem_usable_end(); + } else if (max_physmem_end) { + add_physmem_online_range(0, max_physmem_end); + physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO; + } else { + max_physmem_end = search_mem_end(); + add_physmem_online_range(0, max_physmem_end); + physmem_info.info_source = MEM_DETECT_BIN_SEARCH; + } + + if (physmem_info.range_count > MEM_INLINED_ENTRIES) { + *safe_addr += (physmem_info.range_count - MEM_INLINED_ENTRIES) * + sizeof(struct physmem_range); + } + + return max_physmem_end; +} + +void physmem_set_usable_limit(unsigned long limit) +{ + struct physmem_range *range; + int i; + + /* make sure mem_detect.usable ends up within online memory block */ + for (i = 0; i < physmem_info.range_count; i++) { + range = __get_physmem_range_ptr(i); + if (range->start >= limit) + break; + if (range->end >= limit) { + physmem_info.usable = limit; + break; + } + physmem_info.usable = range->end; + } +} diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 16ee3469f744..50475bf25ecd 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include "decompressor.h" #include "boot.h" #include "uv.h" @@ -139,7 +139,7 @@ static void handle_relocs(unsigned long offset) * * Consider the following factors: * 1. max_physmem_end - end of physical memory online or standby. - * Always <= end of the last online memory block (get_mem_detect_end()). + * Always >= end of the last online memory range (get_physmem_online_end()). * 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the * kernel is able to support. * 3. "mem=" kernel command line option which limits physical memory usage. @@ -303,7 +303,7 @@ void startup_kernel(void) setup_ident_map_size(max_physmem_end); setup_vmalloc_size(); asce_limit = setup_kernel_memory_layout(); - mem_detect_set_usable_limit(ident_map_size); + physmem_set_usable_limit(ident_map_size); if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) { random_lma = get_random_base(safe_addr); diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 4d1d0d8e99cb..b89a6893f398 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include "decompressor.h" @@ -51,7 +51,7 @@ static void pgtable_populate_init(void) pgalloc_low = max(pgalloc_low, initrd_end); } - pgalloc_end = round_down(get_mem_detect_end(), PAGE_SIZE); + pgalloc_end = round_down(get_physmem_usable_end(), PAGE_SIZE); pgalloc_pos = pgalloc_end; boot_check_oom(); @@ -252,7 +252,7 @@ void setup_vmem(unsigned long asce_limit) */ pgtable_populate_init(); pgtable_populate(0, sizeof(struct lowcore), POPULATE_ONE2ONE); - for_each_mem_detect_usable_block(i, &start, &end) + for_each_physmem_usable_range(i, &start, &end) pgtable_populate(start, end, POPULATE_ONE2ONE); pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), POPULATE_ABS_LOWCORE); diff --git a/arch/s390/include/asm/mem_detect.h b/arch/s390/include/asm/mem_detect.h deleted file mode 100644 index f9e7354036d2..000000000000 --- a/arch/s390/include/asm/mem_detect.h +++ /dev/null @@ -1,117 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_S390_MEM_DETECT_H -#define _ASM_S390_MEM_DETECT_H - -#include - -enum mem_info_source { - MEM_DETECT_NONE = 0, - MEM_DETECT_SCLP_STOR_INFO, - MEM_DETECT_DIAG260, - MEM_DETECT_SCLP_READ_INFO, - MEM_DETECT_BIN_SEARCH -}; - -struct mem_detect_block { - u64 start; - u64 end; -}; - -/* - * Storage element id is defined as 1 byte (up to 256 storage elements). - * In practise only storage element id 0 and 1 are used). - * According to architecture one storage element could have as much as - * 1020 subincrements. 255 mem_detect_blocks are embedded in mem_detect_info. - * If more mem_detect_blocks are required, a block of memory from already - * known mem_detect_block is taken (entries_extended points to it). - */ -#define MEM_INLINED_ENTRIES 255 /* (PAGE_SIZE - 16) / 16 */ - -struct mem_detect_info { - u32 count; - u8 info_source; - unsigned long usable; - struct mem_detect_block entries[MEM_INLINED_ENTRIES]; - struct mem_detect_block *entries_extended; -}; -extern struct mem_detect_info mem_detect; - -void add_mem_detect_block(u64 start, u64 end); - -static inline int __get_mem_detect_block(u32 n, unsigned long *start, - unsigned long *end, bool respect_usable_limit) -{ - if (n >= mem_detect.count) { - *start = 0; - *end = 0; - return -1; - } - - if (n < MEM_INLINED_ENTRIES) { - *start = (unsigned long)mem_detect.entries[n].start; - *end = (unsigned long)mem_detect.entries[n].end; - } else { - *start = (unsigned long)mem_detect.entries_extended[n - MEM_INLINED_ENTRIES].start; - *end = (unsigned long)mem_detect.entries_extended[n - MEM_INLINED_ENTRIES].end; - } - - if (respect_usable_limit && mem_detect.usable) { - if (*start >= mem_detect.usable) - return -1; - if (*end > mem_detect.usable) - *end = mem_detect.usable; - } - return 0; -} - -/** - * for_each_mem_detect_usable_block - early online memory range iterator - * @i: an integer used as loop variable - * @p_start: ptr to unsigned long for start address of the range - * @p_end: ptr to unsigned long for end address of the range - * - * Walks over detected online memory ranges below usable limit. - */ -#define for_each_mem_detect_usable_block(i, p_start, p_end) \ - for (i = 0; !__get_mem_detect_block(i, p_start, p_end, true); i++) - -/* Walks over all detected online memory ranges disregarding usable limit. */ -#define for_each_mem_detect_block(i, p_start, p_end) \ - for (i = 0; !__get_mem_detect_block(i, p_start, p_end, false); i++) - -static inline unsigned long get_mem_detect_usable_total(void) -{ - unsigned long start, end, total = 0; - int i; - - for_each_mem_detect_usable_block(i, &start, &end) - total += end - start; - - return total; -} - -static inline void get_mem_detect_reserved(unsigned long *start, - unsigned long *size) -{ - *start = (unsigned long)mem_detect.entries_extended; - if (mem_detect.count > MEM_INLINED_ENTRIES) - *size = (mem_detect.count - MEM_INLINED_ENTRIES) * sizeof(struct mem_detect_block); - else - *size = 0; -} - -static inline unsigned long get_mem_detect_end(void) -{ - unsigned long start; - unsigned long end; - - if (mem_detect.usable) - return mem_detect.usable; - if (mem_detect.count) { - __get_mem_detect_block(mem_detect.count - 1, &start, &end, false); - return end; - } - return 0; -} - -#endif diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h new file mode 100644 index 000000000000..d5e65a5d06e7 --- /dev/null +++ b/arch/s390/include/asm/physmem_info.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_MEM_DETECT_H +#define _ASM_S390_MEM_DETECT_H + +#include + +enum physmem_info_source { + MEM_DETECT_NONE = 0, + MEM_DETECT_SCLP_STOR_INFO, + MEM_DETECT_DIAG260, + MEM_DETECT_SCLP_READ_INFO, + MEM_DETECT_BIN_SEARCH +}; + +struct physmem_range { + u64 start; + u64 end; +}; + +/* + * Storage element id is defined as 1 byte (up to 256 storage elements). + * In practise only storage element id 0 and 1 are used). + * According to architecture one storage element could have as much as + * 1020 subincrements. 255 physmem_ranges are embedded in physmem_info. + * If more physmem_ranges are required, a block of memory from already + * known physmem_range is taken (online_extended points to it). + */ +#define MEM_INLINED_ENTRIES 255 /* (PAGE_SIZE - 16) / 16 */ + +struct physmem_info { + u32 range_count; + u8 info_source; + unsigned long usable; + struct physmem_range online[MEM_INLINED_ENTRIES]; + struct physmem_range *online_extended; +}; + +extern struct physmem_info physmem_info; + +void add_physmem_online_range(u64 start, u64 end); + +static inline int __get_physmem_range(u32 n, unsigned long *start, + unsigned long *end, bool respect_usable_limit) +{ + if (n >= physmem_info.range_count) { + *start = 0; + *end = 0; + return -1; + } + + if (n < MEM_INLINED_ENTRIES) { + *start = (unsigned long)physmem_info.online[n].start; + *end = (unsigned long)physmem_info.online[n].end; + } else { + *start = (unsigned long)physmem_info.online_extended[n - MEM_INLINED_ENTRIES].start; + *end = (unsigned long)physmem_info.online_extended[n - MEM_INLINED_ENTRIES].end; + } + + if (respect_usable_limit && physmem_info.usable) { + if (*start >= physmem_info.usable) + return -1; + if (*end > physmem_info.usable) + *end = physmem_info.usable; + } + return 0; +} + +/** + * for_each_physmem_usable_range - early online memory range iterator + * @i: an integer used as loop variable + * @p_start: ptr to unsigned long for start address of the range + * @p_end: ptr to unsigned long for end address of the range + * + * Walks over detected online memory ranges below usable limit. + */ +#define for_each_physmem_usable_range(i, p_start, p_end) \ + for (i = 0; !__get_physmem_range(i, p_start, p_end, true); i++) + +/* Walks over all detected online memory ranges disregarding usable limit. */ +#define for_each_physmem_online_range(i, p_start, p_end) \ + for (i = 0; !__get_physmem_range(i, p_start, p_end, false); i++) + +static inline unsigned long get_physmem_usable_total(void) +{ + unsigned long start, end, total = 0; + int i; + + for_each_physmem_usable_range(i, &start, &end) + total += end - start; + + return total; +} + +static inline void get_physmem_reserved(unsigned long *start, unsigned long *size) +{ + *start = (unsigned long)physmem_info.online_extended; + if (physmem_info.range_count > MEM_INLINED_ENTRIES) + *size = (physmem_info.range_count - MEM_INLINED_ENTRIES) * + sizeof(struct physmem_range); + else + *size = 0; +} + +static inline unsigned long get_physmem_usable_end(void) +{ + unsigned long start; + unsigned long end; + + if (physmem_info.usable) + return physmem_info.usable; + if (physmem_info.range_count) { + __get_physmem_range(physmem_info.range_count - 1, &start, &end, false); + return end; + } + return 0; +} + +#endif diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 8ec5cdf9dadc..f909a2dc8a5a 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -74,7 +74,7 @@ #include #include #include -#include +#include #include #include #include @@ -147,7 +147,7 @@ static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31; int __bootdata(noexec_disabled); unsigned long __bootdata(ident_map_size); -struct mem_detect_info __bootdata(mem_detect); +struct physmem_info __bootdata(physmem_info); struct initrd_data __bootdata(initrd_data); unsigned long __bootdata(pgalloc_pos); unsigned long __bootdata(pgalloc_end); @@ -730,27 +730,27 @@ static void __init reserve_certificate_list(void) memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size); } -static void __init reserve_mem_detect_info(void) +static void __init reserve_physmem_info(void) { unsigned long start, size; - get_mem_detect_reserved(&start, &size); + get_physmem_reserved(&start, &size); if (size) memblock_reserve(start, size); } -static void __init free_mem_detect_info(void) +static void __init free_physmem_info(void) { unsigned long start, size; - get_mem_detect_reserved(&start, &size); + get_physmem_reserved(&start, &size); if (size) memblock_phys_free(start, size); } static const char * __init get_mem_info_source(void) { - switch (mem_detect.info_source) { + switch (physmem_info.info_source) { case MEM_DETECT_SCLP_STOR_INFO: return "sclp storage info"; case MEM_DETECT_DIAG260: @@ -763,18 +763,18 @@ static const char * __init get_mem_info_source(void) return "none"; } -static void __init memblock_add_mem_detect_info(void) +static void __init memblock_add_physmem_info(void) { unsigned long start, end; int i; pr_debug("physmem info source: %s (%hhd)\n", - get_mem_info_source(), mem_detect.info_source); + get_mem_info_source(), physmem_info.info_source); /* keep memblock lists close to the kernel */ memblock_set_bottom_up(true); - for_each_mem_detect_usable_block(i, &start, &end) + for_each_physmem_usable_range(i, &start, &end) memblock_add(start, end - start); - for_each_mem_detect_block(i, &start, &end) + for_each_physmem_online_range(i, &start, &end) memblock_physmem_add(start, end - start); memblock_set_bottom_up(false); memblock_set_node(0, ULONG_MAX, &memblock.memory, 0); @@ -997,14 +997,14 @@ void __init setup_arch(char **cmdline_p) reserve_kernel(); reserve_initrd(); reserve_certificate_list(); - reserve_mem_detect_info(); + reserve_physmem_info(); memblock_set_current_limit(ident_map_size); memblock_allow_resize(); /* Get information about *all* installed memory */ - memblock_add_mem_detect_info(); + memblock_add_physmem_info(); - free_mem_detect_info(); + free_physmem_info(); setup_memory_end(); memblock_dump_all(); setup_memory(); diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index ef89a5f26853..b0658136264f 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -244,7 +244,7 @@ void __init kasan_early_init(void) memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); if (has_edat) { - shadow_alloc_size = get_mem_detect_usable_total() >> KASAN_SHADOW_SCALE_SHIFT; + shadow_alloc_size = get_physmem_usable_total() >> KASAN_SHADOW_SCALE_SHIFT; segment_pos = round_down(pgalloc_pos, _SEGMENT_SIZE); segment_low = segment_pos - shadow_alloc_size; segment_low = round_down(segment_low, _SEGMENT_SIZE); @@ -282,7 +282,7 @@ void __init kasan_early_init(void) * +- shadow end ----+---------+- shadow end ---+ */ /* populate kasan shadow (for identity mapping and zero page mapping) */ - for_each_mem_detect_usable_block(i, &start, &end) + for_each_physmem_usable_range(i, &start, &end) kasan_early_pgtable_populate(__sha(start), __sha(end), POPULATE_MAP); if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { untracked_end = VMALLOC_START; diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c index ac1d00980fa6..dbd5c53d8edf 100644 --- a/drivers/s390/char/sclp_early_core.c +++ b/drivers/s390/char/sclp_early_core.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include "sclp.h" #include "sclp_rw.h" @@ -336,7 +336,7 @@ int __init sclp_early_get_hsa_size(unsigned long *hsa_size) #define SCLP_STORAGE_INFO_FACILITY 0x0000400000000000UL -void __weak __init add_mem_detect_block(u64 start, u64 end) {} +void __weak __init add_physmem_online_range(u64 start, u64 end) {} int __init sclp_early_read_storage_info(void) { struct read_storage_sccb *sccb = (struct read_storage_sccb *)sclp_early_sccb; @@ -369,7 +369,7 @@ int __init sclp_early_read_storage_info(void) if (!sccb->entries[sn]) continue; rn = sccb->entries[sn] >> 16; - add_mem_detect_block((rn - 1) * rzm, rn * rzm); + add_physmem_online_range((rn - 1) * rzm, rn * rzm); } break; case 0x0310: @@ -382,6 +382,6 @@ int __init sclp_early_read_storage_info(void) return 0; fail: - mem_detect.count = 0; + physmem_info.range_count = 0; return -EIO; } -- cgit v1.2.3 From f913a6600491d3f478ea976a9be0fb1001476c10 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 2 Feb 2023 13:59:36 +0100 Subject: s390/boot: rework decompressor reserved tracking Currently several approaches for finding unused memory in decompressor are utilized. While "safe_addr" grows towards higher addresses, vmem code allocates paging structures top down. The former requires careful ordering. In addition to that ipl report handling code verifies potential intersections with secure boot certificates on its own. Neither of two approaches are memory holes aware and consistent with each other in low memory conditions. To solve that, existing approaches are generalized and combined together, as well as online memory ranges are now taken into consideration. physmem_info has been extended to contain reserved memory ranges. New set of functions allow to handle reserves and find unused memory. All reserves and memory allocations are "typed". In case of out of memory condition decompressor fails with detailed info on current reserved ranges and usable online memory. Linux version 6.2.0 ... Kernel command line: ... mem=100M Our of memory allocating 100000 bytes 100000 aligned in range 0:5800000 Reserved memory ranges: 0000000000000000 0000000003e33000 DECOMPRESSOR 0000000003f00000 00000000057648a3 INITRD 00000000063e0000 00000000063e8000 VMEM 00000000063eb000 00000000063f4000 VMEM 00000000063f7800 0000000006400000 VMEM 0000000005800000 0000000006300000 KASAN Usable online memory ranges (info source: sclp read info [3]): 0000000000000000 0000000006400000 Usable online memory total: 6400000 Reserved: 61b10a3 Free: 24ef5d Call Trace: (sp:000000000002bd58 [<0000000000012a70>] physmem_alloc_top_down+0x60/0x14c) sp:000000000002bdc8 [<0000000000013756>] _pa+0x56/0x6a sp:000000000002bdf0 [<0000000000013bcc>] pgtable_populate+0x45c/0x65e sp:000000000002be90 [<00000000000140aa>] setup_vmem+0x2da/0x424 sp:000000000002bec8 [<0000000000011c20>] startup_kernel+0x428/0x8b4 sp:000000000002bf60 [<00000000000100f4>] startup_normal+0xd4/0xd4 physmem_alloc_range allows to find free memory in specified range. It should be used for one time allocations only like finding position for amode31 and vmlinux. physmem_alloc_top_down can be used just like physmem_alloc_range, but it also allows multiple allocations per type and tries to merge sequential allocations together. Which is useful for paging structures allocations. If sequential allocations cannot be merged together they are "chained", allowing easy per type reserved ranges enumeration and migration to memblock later. Extra "struct reserved_range" allocated for chaining are not tracked or reserved but rely on the fact that both physmem_alloc_range and physmem_alloc_top_down search for free memory only below current top down allocator position. All reserved ranges should be transferred to memblock before memblock allocations are enabled. The startup code has been reordered to delay any memory allocations until online memory ranges are detected and occupied memory ranges are marked as reserved to be excluded from follow-up allocations. Ipl report certificates are a special case, ipl report certificates list is checked together with other memory reserves until certificates are saved elsewhere. KASAN required memory for shadow memory allocation and mapping is reserved as 1 large chunk which is later passed to KASAN early initialization code. Acked-by: Heiko Carstens Reviewed-by: Alexander Gordeev Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/boot/boot.h | 30 +++++- arch/s390/boot/ipl_report.c | 106 +++++++++----------- arch/s390/boot/kaslr.c | 113 ++------------------- arch/s390/boot/pgm_check_info.c | 5 +- arch/s390/boot/physmem_info.c | 188 +++++++++++++++++++++++++++++------ arch/s390/boot/startup.c | 86 +++++++++------- arch/s390/boot/vmem.c | 69 ++----------- arch/s390/boot/vmlinux.lds.S | 2 + arch/s390/include/asm/physmem_info.h | 112 +++++++++++++++++---- arch/s390/include/asm/setup.h | 9 -- arch/s390/kernel/setup.c | 76 ++++---------- arch/s390/mm/kasan_init.c | 39 ++++---- 12 files changed, 430 insertions(+), 405 deletions(-) (limited to 'arch') diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index d39895d5796e..2b4048106418 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -8,6 +8,8 @@ #ifndef __ASSEMBLY__ +#include + struct machine_info { unsigned char has_edat1 : 1; unsigned char has_edat2 : 1; @@ -33,21 +35,34 @@ struct vmlinux_info { }; void startup_kernel(void); -unsigned long detect_memory(unsigned long *safe_addr); +unsigned long detect_max_physmem_end(void); +void detect_physmem_online_ranges(unsigned long max_physmem_end); void physmem_set_usable_limit(unsigned long limit); +void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size); +void physmem_free(enum reserved_range_type type); +/* for continuous/multiple allocations per type */ +unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, + unsigned long align); +/* for single allocations, 1 per type */ +unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size, + unsigned long align, unsigned long min, unsigned long max, + bool die_on_oom); +bool ipl_report_certs_intersects(unsigned long addr, unsigned long size, + unsigned long *intersection_start); bool is_ipl_block_dump(void); void store_ipl_parmblock(void); -unsigned long read_ipl_report(unsigned long safe_addr); +int read_ipl_report(void); +void save_ipl_cert_comp_list(void); void setup_boot_command_line(void); void parse_boot_command_line(void); void verify_facilities(void); void print_missing_facilities(void); void sclp_early_setup_buffer(void); void print_pgm_check_info(void); -unsigned long get_random_base(unsigned long safe_addr); +unsigned long get_random_base(void); void setup_vmem(unsigned long asce_limit); -unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total); void __printf(1, 2) decompressor_printk(const char *fmt, ...); +void print_stacktrace(unsigned long sp); void error(char *m); extern struct machine_info machine; @@ -62,7 +77,7 @@ extern char __boot_data_start[], __boot_data_end[]; extern char __boot_data_preserved_start[], __boot_data_preserved_end[]; extern char _decompressor_syms_start[], _decompressor_syms_end[]; extern char _stack_start[], _stack_end[]; -extern char _end[]; +extern char _end[], _decompressor_end[]; extern unsigned char _compressed_start[]; extern unsigned char _compressed_end[]; extern struct vmlinux_info _vmlinux_info; @@ -70,5 +85,10 @@ extern struct vmlinux_info _vmlinux_info; #define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore)) +static inline bool intersects(unsigned long addr0, unsigned long size0, + unsigned long addr1, unsigned long size1) +{ + return addr0 + size0 > addr1 && addr1 + size1 > addr0; +} #endif /* __ASSEMBLY__ */ #endif /* BOOT_BOOT_H */ diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c index 74b5cd264862..1803035e68d2 100644 --- a/arch/s390/boot/ipl_report.c +++ b/arch/s390/boot/ipl_report.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "boot.h" @@ -16,20 +17,16 @@ unsigned long __bootdata_preserved(ipl_cert_list_size); unsigned long __bootdata(early_ipl_comp_list_addr); unsigned long __bootdata(early_ipl_comp_list_size); +static struct ipl_rb_certificates *certs; +static struct ipl_rb_components *comps; +static bool ipl_report_needs_saving; + #define for_each_rb_entry(entry, rb) \ for (entry = rb->entries; \ (void *) entry + sizeof(*entry) <= (void *) rb + rb->len; \ entry++) -static inline bool intersects(unsigned long addr0, unsigned long size0, - unsigned long addr1, unsigned long size1) -{ - return addr0 + size0 > addr1 && addr1 + size1 > addr0; -} - -static unsigned long find_bootdata_space(struct ipl_rb_components *comps, - struct ipl_rb_certificates *certs, - unsigned long safe_addr) +static unsigned long get_cert_comp_list_size(void) { struct ipl_rb_certificate_entry *cert; struct ipl_rb_component_entry *comp; @@ -44,44 +41,27 @@ static unsigned long find_bootdata_space(struct ipl_rb_components *comps, ipl_cert_list_size = 0; for_each_rb_entry(cert, certs) ipl_cert_list_size += sizeof(unsigned int) + cert->len; - size = ipl_cert_list_size + early_ipl_comp_list_size; + return ipl_cert_list_size + early_ipl_comp_list_size; +} - /* - * Start from safe_addr to find a free memory area large - * enough for the IPL report boot data. This area is used - * for ipl_cert_list_addr/ipl_cert_list_size and - * early_ipl_comp_list_addr/early_ipl_comp_list_size. It must - * not overlap with any component or any certificate. - */ -repeat: - if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size && - intersects(initrd_data.start, initrd_data.size, safe_addr, size)) - safe_addr = initrd_data.start + initrd_data.size; - if (intersects(safe_addr, size, (unsigned long)comps, comps->len)) { - safe_addr = (unsigned long)comps + comps->len; - goto repeat; - } - for_each_rb_entry(comp, comps) - if (intersects(safe_addr, size, comp->addr, comp->len)) { - safe_addr = comp->addr + comp->len; - goto repeat; +bool ipl_report_certs_intersects(unsigned long addr, unsigned long size, + unsigned long *intersection_start) +{ + struct ipl_rb_certificate_entry *cert; + + if (!ipl_report_needs_saving) + return false; + + for_each_rb_entry(cert, certs) { + if (intersects(addr, size, cert->addr, cert->len)) { + *intersection_start = cert->addr; + return true; } - if (intersects(safe_addr, size, (unsigned long)certs, certs->len)) { - safe_addr = (unsigned long)certs + certs->len; - goto repeat; } - for_each_rb_entry(cert, certs) - if (intersects(safe_addr, size, cert->addr, cert->len)) { - safe_addr = cert->addr + cert->len; - goto repeat; - } - early_ipl_comp_list_addr = safe_addr; - ipl_cert_list_addr = safe_addr + early_ipl_comp_list_size; - - return safe_addr + size; + return false; } -static void copy_components_bootdata(struct ipl_rb_components *comps) +static void copy_components_bootdata(void) { struct ipl_rb_component_entry *comp, *ptr; @@ -90,7 +70,7 @@ static void copy_components_bootdata(struct ipl_rb_components *comps) memcpy(ptr++, comp, sizeof(*ptr)); } -static void copy_certificates_bootdata(struct ipl_rb_certificates *certs) +static void copy_certificates_bootdata(void) { struct ipl_rb_certificate_entry *cert; void *ptr; @@ -104,10 +84,8 @@ static void copy_certificates_bootdata(struct ipl_rb_certificates *certs) } } -unsigned long read_ipl_report(unsigned long safe_addr) +int read_ipl_report(void) { - struct ipl_rb_certificates *certs; - struct ipl_rb_components *comps; struct ipl_pl_hdr *pl_hdr; struct ipl_rl_hdr *rl_hdr; struct ipl_rb_hdr *rb_hdr; @@ -120,7 +98,7 @@ unsigned long read_ipl_report(unsigned long safe_addr) */ if (!ipl_block_valid || !(ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR)) - return safe_addr; + return -1; ipl_secure_flag = !!(ipl_block.hdr.flags & IPL_PL_FLAG_SIPL); /* * There is an IPL report, to find it load the pointer to the @@ -158,16 +136,30 @@ unsigned long read_ipl_report(unsigned long safe_addr) * With either the component list or the certificate list * missing the kernel will stay ignorant of secure IPL. */ - if (!comps || !certs) - return safe_addr; + if (!comps || !certs) { + certs = NULL; + return -1; + } - /* - * Copy component and certificate list to a safe area - * where the decompressed kernel can find them. - */ - safe_addr = find_bootdata_space(comps, certs, safe_addr); - copy_components_bootdata(comps); - copy_certificates_bootdata(certs); + ipl_report_needs_saving = true; + physmem_reserve(RR_IPLREPORT, (unsigned long)pl_hdr, + (unsigned long)rl_end - (unsigned long)pl_hdr); + return 0; +} + +void save_ipl_cert_comp_list(void) +{ + unsigned long size; + + if (!ipl_report_needs_saving) + return; + + size = get_cert_comp_list_size(); + early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int)); + ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size; - return safe_addr; + copy_components_bootdata(); + copy_certificates_bootdata(); + physmem_free(RR_IPLREPORT); + ipl_report_needs_saving = false; } diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index 22b7c5d8e94a..71f75f03f800 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -91,113 +91,16 @@ static int get_random(unsigned long limit, unsigned long *value) return 0; } -/* - * To randomize kernel base address we have to consider several facts: - * 1. physical online memory might not be continuous and have holes. physmem - * info contains list of online memory ranges we should consider. - * 2. we have several memory regions which are occupied and we should not - * overlap and destroy them. Currently safe_addr tells us the border below - * which all those occupied regions are. We are safe to use anything above - * safe_addr. - * 3. the upper limit might apply as well, even if memory above that limit is - * online. Currently those limitations are: - * 3.1. Limit set by "mem=" kernel command line option - * 3.2. memory reserved at the end for kasan initialization. - * 4. kernel base address must be aligned to THREAD_SIZE (kernel stack size). - * Which is required for CONFIG_CHECK_STACK. Currently THREAD_SIZE is 4 pages - * (16 pages when the kernel is built with kasan enabled) - * Assumptions: - * 1. kernel size (including .bss size) and upper memory limit are page aligned. - * 2. physmem online region start is THREAD_SIZE aligned / end is PAGE_SIZE - * aligned (in practice memory configurations granularity on z/VM and LPAR - * is 1mb). - * - * To guarantee uniform distribution of kernel base address among all suitable - * addresses we generate random value just once. For that we need to build a - * continuous range in which every value would be suitable. We can build this - * range by simply counting all suitable addresses (let's call them positions) - * which would be valid as kernel base address. To count positions we iterate - * over online memory ranges. For each range which is big enough for the - * kernel image we count all suitable addresses we can put the kernel image at - * that is - * (end - start - kernel_size) / THREAD_SIZE + 1 - * Two functions count_valid_kernel_positions and position_to_address help - * to count positions in memory range given and then convert position back - * to address. - */ -static unsigned long count_valid_kernel_positions(unsigned long kernel_size, - unsigned long _min, - unsigned long _max) -{ - unsigned long start, end, pos = 0; - int i; - - for_each_physmem_usable_range(i, &start, &end) { - if (_min >= end) - continue; - if (start >= _max) - break; - start = max(_min, start); - end = min(_max, end); - if (end - start < kernel_size) - continue; - pos += (end - start - kernel_size) / THREAD_SIZE + 1; - } - - return pos; -} - -static unsigned long position_to_address(unsigned long pos, unsigned long kernel_size, - unsigned long _min, unsigned long _max) -{ - unsigned long start, end; - int i; - - for_each_physmem_usable_range(i, &start, &end) { - if (_min >= end) - continue; - if (start >= _max) - break; - start = max(_min, start); - end = min(_max, end); - if (end - start < kernel_size) - continue; - if ((end - start - kernel_size) / THREAD_SIZE + 1 >= pos) - return start + (pos - 1) * THREAD_SIZE; - pos -= (end - start - kernel_size) / THREAD_SIZE + 1; - } - - return 0; -} - -unsigned long get_random_base(unsigned long safe_addr) +unsigned long get_random_base(void) { - unsigned long usable_total = get_physmem_usable_total(); - unsigned long memory_limit = get_physmem_usable_end(); - unsigned long base_pos, max_pos, kernel_size; - int i; + unsigned long vmlinux_size = vmlinux.image_size + vmlinux.bss_size; + unsigned long minimal_pos = vmlinux.default_lma + vmlinux_size; + unsigned long random; - /* - * Avoid putting kernel in the end of physical memory - * which vmem and kasan code will use for shadow memory and - * pgtable mapping allocations. - */ - memory_limit -= kasan_estimate_memory_needs(usable_total); - memory_limit -= vmem_estimate_memory_needs(usable_total); - - safe_addr = ALIGN(safe_addr, THREAD_SIZE); - kernel_size = vmlinux.image_size + vmlinux.bss_size; - if (safe_addr + kernel_size > memory_limit) + /* [vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size : physmem_info.usable] */ + if (get_random(physmem_info.usable - minimal_pos, &random)) return 0; - max_pos = count_valid_kernel_positions(kernel_size, safe_addr, memory_limit); - if (!max_pos) { - sclp_early_printk("KASLR disabled: not enough memory\n"); - return 0; - } - - /* we need a value in the range [1, base_pos] inclusive */ - if (get_random(max_pos, &base_pos)) - return 0; - return position_to_address(base_pos + 1, kernel_size, safe_addr, memory_limit); + return physmem_alloc_range(RR_VMLINUX, vmlinux_size, THREAD_SIZE, + vmlinux.default_lma, minimal_pos + random, false); } diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index c2a1defc79da..0861e3c403f8 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -123,11 +123,10 @@ out: sclp_early_printk(buf); } -static noinline void print_stacktrace(void) +void print_stacktrace(unsigned long sp) { struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start, (unsigned long)_stack_end }; - unsigned long sp = S390_lowcore.gpregs_save_area[15]; bool first = true; decompressor_printk("Call Trace:\n"); @@ -173,7 +172,7 @@ void print_pgm_check_info(void) gpregs[8], gpregs[9], gpregs[10], gpregs[11]); decompressor_printk(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]); - print_stacktrace(); + print_stacktrace(S390_lowcore.gpregs_save_area[15]); decompressor_printk("Last Breaking-Event-Address:\n"); decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.pgm_last_break, (void *)S390_lowcore.pgm_last_break); diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c index dc2e4d0abfab..4ee9b7381142 100644 --- a/arch/s390/boot/physmem_info.c +++ b/arch/s390/boot/physmem_info.c @@ -1,16 +1,21 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include -#include -#include -#include -#include #include +#include +#include #include +#include +#include +#include +#include #include "decompressor.h" #include "boot.h" struct physmem_info __bootdata(physmem_info); +static unsigned int physmem_alloc_ranges; +static unsigned long physmem_alloc_pos; /* up to 256 storage elements, 1020 subincrements each */ #define ENTRIES_EXTENDED_MAX \ @@ -20,6 +25,11 @@ static struct physmem_range *__get_physmem_range_ptr(u32 n) { if (n < MEM_INLINED_ENTRIES) return &physmem_info.online[n]; + if (unlikely(!physmem_info.online_extended)) { + physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range( + RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0, + physmem_alloc_pos, true); + } return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES]; } @@ -143,49 +153,171 @@ static unsigned long search_mem_end(void) return (offset + 1) << 20; } -unsigned long detect_memory(unsigned long *safe_addr) +unsigned long detect_max_physmem_end(void) { unsigned long max_physmem_end = 0; - sclp_early_get_memsize(&max_physmem_end); - physmem_info.online_extended = (struct physmem_range *)ALIGN(*safe_addr, sizeof(u64)); + if (!sclp_early_get_memsize(&max_physmem_end)) { + physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO; + } else { + max_physmem_end = search_mem_end(); + physmem_info.info_source = MEM_DETECT_BIN_SEARCH; + } + return max_physmem_end; +} +void detect_physmem_online_ranges(unsigned long max_physmem_end) +{ if (!sclp_early_read_storage_info()) { physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO; } else if (!diag260()) { physmem_info.info_source = MEM_DETECT_DIAG260; - max_physmem_end = max_physmem_end ?: get_physmem_usable_end(); } else if (max_physmem_end) { add_physmem_online_range(0, max_physmem_end); - physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO; - } else { - max_physmem_end = search_mem_end(); - add_physmem_online_range(0, max_physmem_end); - physmem_info.info_source = MEM_DETECT_BIN_SEARCH; } +} - if (physmem_info.range_count > MEM_INLINED_ENTRIES) { - *safe_addr += (physmem_info.range_count - MEM_INLINED_ENTRIES) * - sizeof(struct physmem_range); +void physmem_set_usable_limit(unsigned long limit) +{ + physmem_info.usable = limit; + physmem_alloc_pos = limit; +} + +static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max) +{ + unsigned long start, end, total_mem = 0, total_reserved_mem = 0; + struct reserved_range *range; + enum reserved_range_type t; + int i; + + decompressor_printk("Linux version %s\n", kernel_version); + if (!is_prot_virt_guest() && early_command_line[0]) + decompressor_printk("Kernel command line: %s\n", early_command_line); + decompressor_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n", + size, align, min, max); + decompressor_printk("Reserved memory ranges:\n"); + for_each_physmem_reserved_range(t, range, &start, &end) { + decompressor_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t)); + total_reserved_mem += end - start; + } + decompressor_printk("Usable online memory ranges (info source: %s [%x]):\n", + get_physmem_info_source(), physmem_info.info_source); + for_each_physmem_usable_range(i, &start, &end) { + decompressor_printk("%016lx %016lx\n", start, end); + total_mem += end - start; } + decompressor_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n", + total_mem, total_reserved_mem, + total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0); + print_stacktrace(current_frame_address()); + sclp_early_printk("\n\n -- System halted\n"); + disabled_wait(); +} - return max_physmem_end; +void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) +{ + physmem_info.reserved[type].start = addr; + physmem_info.reserved[type].end = addr + size; } -void physmem_set_usable_limit(unsigned long limit) +void physmem_free(enum reserved_range_type type) { - struct physmem_range *range; - int i; + physmem_info.reserved[type].start = 0; + physmem_info.reserved[type].end = 0; +} - /* make sure mem_detect.usable ends up within online memory block */ - for (i = 0; i < physmem_info.range_count; i++) { - range = __get_physmem_range_ptr(i); - if (range->start >= limit) - break; - if (range->end >= limit) { - physmem_info.usable = limit; +static bool __physmem_alloc_intersects(unsigned long addr, unsigned long size, + unsigned long *intersection_start) +{ + unsigned long res_addr, res_size; + int t; + + for (t = 0; t < RR_MAX; t++) { + if (!get_physmem_reserved(t, &res_addr, &res_size)) + continue; + if (intersects(addr, size, res_addr, res_size)) { + *intersection_start = res_addr; + return true; + } + } + return ipl_report_certs_intersects(addr, size, intersection_start); +} + +static unsigned long __physmem_alloc_range(unsigned long size, unsigned long align, + unsigned long min, unsigned long max, + unsigned int from_ranges, unsigned int *ranges_left, + bool die_on_oom) +{ + unsigned int nranges = from_ranges ?: physmem_info.range_count; + unsigned long range_start, range_end; + unsigned long intersection_start; + unsigned long addr, pos = max; + + align = max(align, 8UL); + while (nranges) { + __get_physmem_range(nranges - 1, &range_start, &range_end, false); + pos = min(range_end, pos); + + if (round_up(min, align) + size > pos) break; + addr = round_down(pos - size, align); + if (range_start > addr) { + nranges--; + continue; + } + if (__physmem_alloc_intersects(addr, size, &intersection_start)) { + pos = intersection_start; + continue; + } + + if (ranges_left) + *ranges_left = nranges; + return addr; + } + if (die_on_oom) + die_oom(size, align, min, max); + return 0; +} + +unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size, + unsigned long align, unsigned long min, unsigned long max, + bool die_on_oom) +{ + unsigned long addr; + + max = min(max, physmem_alloc_pos); + addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom); + if (addr) + physmem_reserve(type, addr, size); + return addr; +} + +unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, + unsigned long align) +{ + struct reserved_range *range = &physmem_info.reserved[type]; + struct reserved_range *new_range; + unsigned int ranges_left; + unsigned long addr; + + addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges, + &ranges_left, true); + /* if not a consecutive allocation of the same type or first allocation */ + if (range->start != addr + size) { + if (range->end) { + physmem_alloc_pos = __physmem_alloc_range( + sizeof(struct reserved_range), 0, 0, physmem_alloc_pos, + physmem_alloc_ranges, &ranges_left, true); + new_range = (struct reserved_range *)physmem_alloc_pos; + *new_range = *range; + range->chain = new_range; + addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, + ranges_left, &ranges_left, true); } - physmem_info.usable = range->end; + range->end = addr + size; } + range->start = addr; + physmem_alloc_pos = addr; + physmem_alloc_ranges = ranges_left; + return addr; } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 50475bf25ecd..bc07e24329b9 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -21,7 +21,6 @@ unsigned long __bootdata_preserved(__kaslr_offset); unsigned long __bootdata_preserved(__abs_lowcore); unsigned long __bootdata_preserved(__memcpy_real_area); pte_t *__bootdata_preserved(memcpy_real_ptep); -unsigned long __bootdata(__amode31_base); unsigned long __bootdata_preserved(VMALLOC_START); unsigned long __bootdata_preserved(VMALLOC_END); struct page *__bootdata_preserved(vmemmap); @@ -29,7 +28,6 @@ unsigned long __bootdata_preserved(vmemmap_size); unsigned long __bootdata_preserved(MODULES_VADDR); unsigned long __bootdata_preserved(MODULES_END); unsigned long __bootdata(ident_map_size); -struct initrd_data __bootdata(initrd_data); u64 __bootdata_preserved(stfle_fac_list[16]); u64 __bootdata_preserved(alt_stfle_fac_list[16]); @@ -75,17 +73,20 @@ unsigned long mem_safe_offset(void) } #endif -static unsigned long rescue_initrd(unsigned long safe_addr) +static void rescue_initrd(unsigned long min, unsigned long max) { + unsigned long old_addr, addr, size; + if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD)) - return safe_addr; - if (!initrd_data.start || !initrd_data.size) - return safe_addr; - if (initrd_data.start < safe_addr) { - memmove((void *)safe_addr, (void *)initrd_data.start, initrd_data.size); - initrd_data.start = safe_addr; - } - return initrd_data.start + initrd_data.size; + return; + if (!get_physmem_reserved(RR_INITRD, &addr, &size)) + return; + if (addr >= min && addr + size <= max) + return; + old_addr = addr; + physmem_free(RR_INITRD); + addr = physmem_alloc_top_down(RR_INITRD, size, 0); + memmove((void *)addr, (void *)old_addr, size); } static void copy_bootdata(void) @@ -267,46 +268,52 @@ static void offset_vmlinux_info(unsigned long offset) vmlinux.invalid_pg_dir_off += offset; } -static unsigned long reserve_amode31(unsigned long safe_addr) -{ - __amode31_base = PAGE_ALIGN(safe_addr); - return __amode31_base + vmlinux.amode31_size; -} - void startup_kernel(void) { unsigned long max_physmem_end; unsigned long random_lma; - unsigned long safe_addr; unsigned long asce_limit; + unsigned long safe_addr; void *img; psw_t psw; - initrd_data.start = parmarea.initrd_start; - initrd_data.size = parmarea.initrd_size; + setup_lpp(); + safe_addr = mem_safe_offset(); + /* + * reserve decompressor memory together with decompression heap, buffer and + * memory which might be occupied by uncompressed kernel at default 1Mb + * position (if KASLR is off or failed). + */ + physmem_reserve(RR_DECOMPRESSOR, 0, safe_addr); + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && parmarea.initrd_size) + physmem_reserve(RR_INITRD, parmarea.initrd_start, parmarea.initrd_size); oldmem_data.start = parmarea.oldmem_base; oldmem_data.size = parmarea.oldmem_size; - setup_lpp(); store_ipl_parmblock(); - safe_addr = mem_safe_offset(); - safe_addr = reserve_amode31(safe_addr); - safe_addr = read_ipl_report(safe_addr); + read_ipl_report(); uv_query_info(); - safe_addr = rescue_initrd(safe_addr); sclp_early_read_info(); setup_boot_command_line(); parse_boot_command_line(); detect_facilities(); sanitize_prot_virt_host(); - max_physmem_end = detect_memory(&safe_addr); + max_physmem_end = detect_max_physmem_end(); setup_ident_map_size(max_physmem_end); setup_vmalloc_size(); asce_limit = setup_kernel_memory_layout(); + /* got final ident_map_size, physmem allocations could be performed now */ physmem_set_usable_limit(ident_map_size); + detect_physmem_online_ranges(max_physmem_end); + save_ipl_cert_comp_list(); + rescue_initrd(safe_addr, ident_map_size); +#ifdef CONFIG_KASAN + physmem_alloc_top_down(RR_KASAN, kasan_estimate_memory_needs(get_physmem_usable_total()), + _SEGMENT_SIZE); +#endif if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) { - random_lma = get_random_base(safe_addr); + random_lma = get_random_base(); if (random_lma) { __kaslr_offset = random_lma - vmlinux.default_lma; img = (void *)vmlinux.default_lma; @@ -317,8 +324,16 @@ void startup_kernel(void) if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) { img = decompress_kernel(); memmove((void *)vmlinux.default_lma, img, vmlinux.image_size); - } else if (__kaslr_offset) + } else if (__kaslr_offset) { memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size); + memset(img, 0, vmlinux.image_size); + } + + /* vmlinux decompression is done, shrink reserved low memory */ + physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end); + if (!__kaslr_offset) + physmem_reserve(RR_VMLINUX, vmlinux.default_lma, vmlinux.image_size + vmlinux.bss_size); + physmem_alloc_range(RR_AMODE31, vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G, true); /* * The order of the following operations is important: @@ -338,16 +353,11 @@ void startup_kernel(void) setup_vmem(asce_limit); copy_bootdata(); - if (__kaslr_offset) { - /* - * Save KASLR offset for early dumps, before vmcore_info is set. - * Mark as uneven to distinguish from real vmcore_info pointer. - */ - S390_lowcore.vmcore_info = __kaslr_offset | 0x1UL; - /* Clear non-relocated kernel */ - if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) - memset(img, 0, vmlinux.image_size); - } + /* + * Save KASLR offset for early dumps, before vmcore_info is set. + * Mark as uneven to distinguish from real vmcore_info pointer. + */ + S390_lowcore.vmcore_info = __kaslr_offset ? __kaslr_offset | 0x1UL : 0; /* * Jump to the decompressed kernel entry point and switch DAT mode on. diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index b89a6893f398..8f16e6f9fb20 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -10,6 +10,8 @@ #include "decompressor.h" #include "boot.h" +unsigned long __bootdata_preserved(s390_invalid_asce); + #define init_mm (*(struct mm_struct *)vmlinux.init_mm_off) #define swapper_pg_dir vmlinux.swapper_pg_dir_off #define invalid_pg_dir vmlinux.invalid_pg_dir_off @@ -22,77 +24,27 @@ static inline pte_t *__virt_to_kpte(unsigned long va) return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va); } -unsigned long __bootdata_preserved(s390_invalid_asce); -unsigned long __bootdata(pgalloc_pos); -unsigned long __bootdata(pgalloc_end); -unsigned long __bootdata(pgalloc_low); - enum populate_mode { POPULATE_NONE, POPULATE_ONE2ONE, POPULATE_ABS_LOWCORE, }; -static void boot_check_oom(void) -{ - if (pgalloc_pos < pgalloc_low) - error("out of memory on boot\n"); -} - -static void pgtable_populate_init(void) -{ - unsigned long initrd_end; - unsigned long kernel_end; - - kernel_end = vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size; - pgalloc_low = round_up(kernel_end, PAGE_SIZE); - if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) { - initrd_end = round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE); - pgalloc_low = max(pgalloc_low, initrd_end); - } - - pgalloc_end = round_down(get_physmem_usable_end(), PAGE_SIZE); - pgalloc_pos = pgalloc_end; - - boot_check_oom(); -} - -static void *boot_alloc_pages(unsigned int order) -{ - unsigned long size = PAGE_SIZE << order; - - pgalloc_pos -= size; - pgalloc_pos = round_down(pgalloc_pos, size); - - boot_check_oom(); - - return (void *)pgalloc_pos; -} - static void *boot_crst_alloc(unsigned long val) { + unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; unsigned long *table; - table = boot_alloc_pages(CRST_ALLOC_ORDER); - if (table) - crst_table_init(table, val); + table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size); + crst_table_init(table, val); return table; } static pte_t *boot_pte_alloc(void) { - static void *pte_leftover; pte_t *pte; - BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE); - - if (!pte_leftover) { - pte_leftover = boot_alloc_pages(0); - pte = pte_leftover + _PAGE_TABLE_SIZE; - } else { - pte = pte_leftover; - pte_leftover = NULL; - } + pte = (pte_t *)physmem_alloc_top_down(RR_VMEM, _PAGE_TABLE_SIZE, _PAGE_TABLE_SIZE); memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); return pte; } @@ -126,7 +78,6 @@ static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end) static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next; pte_t *pte, entry; pte = pte_offset_kernel(pmd, addr); @@ -250,7 +201,6 @@ void setup_vmem(unsigned long asce_limit) * To prevent creation of a large page at address 0 first map * the lowcore and create the identity mapping only afterwards. */ - pgtable_populate_init(); pgtable_populate(0, sizeof(struct lowcore), POPULATE_ONE2ONE); for_each_physmem_usable_range(i, &start, &end) pgtable_populate(start, end, POPULATE_ONE2ONE); @@ -269,10 +219,3 @@ void setup_vmem(unsigned long asce_limit) init_mm.context.asce = S390_lowcore.kernel_asce; } - -unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total) -{ - unsigned long pages = DIV_ROUND_UP(online_mem_total, PAGE_SIZE); - - return DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2; -} diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S index fa9d33b01b85..389df0e0d9e5 100644 --- a/arch/s390/boot/vmlinux.lds.S +++ b/arch/s390/boot/vmlinux.lds.S @@ -93,6 +93,8 @@ SECTIONS _decompressor_syms_end = .; } + _decompressor_end = .; + #ifdef CONFIG_KERNEL_UNCOMPRESSED . = 0x100000; #else diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h index d5e65a5d06e7..27234fa1da8e 100644 --- a/arch/s390/include/asm/physmem_info.h +++ b/arch/s390/include/asm/physmem_info.h @@ -17,6 +17,27 @@ struct physmem_range { u64 end; }; +enum reserved_range_type { + RR_DECOMPRESSOR, + RR_INITRD, + RR_VMLINUX, + RR_AMODE31, + RR_IPLREPORT, + RR_CERT_COMP_LIST, + RR_MEM_DETECT_EXTENDED, + RR_VMEM, +#ifdef CONFIG_KASAN + RR_KASAN, +#endif + RR_MAX +}; + +struct reserved_range { + unsigned long start; + unsigned long end; + struct reserved_range *chain; +}; + /* * Storage element id is defined as 1 byte (up to 256 storage elements). * In practise only storage element id 0 and 1 are used). @@ -31,6 +52,7 @@ struct physmem_info { u32 range_count; u8 info_source; unsigned long usable; + struct reserved_range reserved[RR_MAX]; struct physmem_range online[MEM_INLINED_ENTRIES]; struct physmem_range *online_extended; }; @@ -80,6 +102,70 @@ static inline int __get_physmem_range(u32 n, unsigned long *start, #define for_each_physmem_online_range(i, p_start, p_end) \ for (i = 0; !__get_physmem_range(i, p_start, p_end, false); i++) +static inline const char *get_physmem_info_source(void) +{ + switch (physmem_info.info_source) { + case MEM_DETECT_SCLP_STOR_INFO: + return "sclp storage info"; + case MEM_DETECT_DIAG260: + return "diag260"; + case MEM_DETECT_SCLP_READ_INFO: + return "sclp read info"; + case MEM_DETECT_BIN_SEARCH: + return "binary search"; + } + return "none"; +} + +#define RR_TYPE_NAME(t) case RR_ ## t: return #t +static inline const char *get_rr_type_name(enum reserved_range_type t) +{ + switch (t) { + RR_TYPE_NAME(DECOMPRESSOR); + RR_TYPE_NAME(INITRD); + RR_TYPE_NAME(VMLINUX); + RR_TYPE_NAME(AMODE31); + RR_TYPE_NAME(IPLREPORT); + RR_TYPE_NAME(CERT_COMP_LIST); + RR_TYPE_NAME(MEM_DETECT_EXTENDED); + RR_TYPE_NAME(VMEM); +#ifdef CONFIG_KASAN + RR_TYPE_NAME(KASAN); +#endif + default: + return "UNKNOWN"; + } +} + +#define for_each_physmem_reserved_type_range(t, range, p_start, p_end) \ + for (range = &physmem_info.reserved[t], *p_start = range->start, *p_end = range->end; \ + range && range->end; range = range->chain, \ + *p_start = range ? range->start : 0, *p_end = range ? range->end : 0) + +static inline struct reserved_range *__physmem_reserved_next(enum reserved_range_type *t, + struct reserved_range *range) +{ + if (!range) { + range = &physmem_info.reserved[*t]; + if (range->end) + return range; + } + if (range->chain) + return range->chain; + while (++*t < RR_MAX) { + range = &physmem_info.reserved[*t]; + if (range->end) + return range; + } + return NULL; +} + +#define for_each_physmem_reserved_range(t, range, p_start, p_end) \ + for (t = 0, range = __physmem_reserved_next(&t, NULL), \ + *p_start = range ? range->start : 0, *p_end = range ? range->end : 0; \ + range; range = __physmem_reserved_next(&t, range), \ + *p_start = range ? range->start : 0, *p_end = range ? range->end : 0) + static inline unsigned long get_physmem_usable_total(void) { unsigned long start, end, total = 0; @@ -91,28 +177,12 @@ static inline unsigned long get_physmem_usable_total(void) return total; } -static inline void get_physmem_reserved(unsigned long *start, unsigned long *size) +static inline unsigned long get_physmem_reserved(enum reserved_range_type type, + unsigned long *addr, unsigned long *size) { - *start = (unsigned long)physmem_info.online_extended; - if (physmem_info.range_count > MEM_INLINED_ENTRIES) - *size = (physmem_info.range_count - MEM_INLINED_ENTRIES) * - sizeof(struct physmem_range); - else - *size = 0; -} - -static inline unsigned long get_physmem_usable_end(void) -{ - unsigned long start; - unsigned long end; - - if (physmem_info.usable) - return physmem_info.usable; - if (physmem_info.range_count) { - __get_physmem_range(physmem_info.range_count - 1, &start, &end, false); - return end; - } - return 0; + *addr = physmem_info.reserved[type].start; + *size = physmem_info.reserved[type].end - physmem_info.reserved[type].start; + return *size; } #endif diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index fc887e3e76f8..966d569f49b7 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -74,10 +74,6 @@ extern unsigned int zlib_dfltcc_support; extern int noexec_disabled; extern unsigned long ident_map_size; -extern unsigned long pgalloc_pos; -extern unsigned long pgalloc_end; -extern unsigned long pgalloc_low; -extern unsigned long __amode31_base; /* The Write Back bit position in the physaddr is given by the SLPC PCI */ extern unsigned long mio_wb_bit_mask; @@ -150,11 +146,6 @@ static inline unsigned long kaslr_offset(void) return __kaslr_offset; } -struct initrd_data { - unsigned long start; - unsigned long size; -}; -extern struct initrd_data initrd_data; struct oldmem_data { unsigned long start; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f909a2dc8a5a..d25425b8d0c0 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -148,13 +148,8 @@ static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31; int __bootdata(noexec_disabled); unsigned long __bootdata(ident_map_size); struct physmem_info __bootdata(physmem_info); -struct initrd_data __bootdata(initrd_data); -unsigned long __bootdata(pgalloc_pos); -unsigned long __bootdata(pgalloc_end); -unsigned long __bootdata(pgalloc_low); unsigned long __bootdata_preserved(__kaslr_offset); -unsigned long __bootdata(__amode31_base); unsigned int __bootdata_preserved(zlib_dfltcc_support); EXPORT_SYMBOL(zlib_dfltcc_support); u64 __bootdata_preserved(stfle_fac_list[16]); @@ -635,7 +630,11 @@ static struct notifier_block kdump_mem_nb = { */ static void __init reserve_pgtables(void) { - memblock_reserve(pgalloc_pos, pgalloc_end - pgalloc_pos); + unsigned long start, end; + struct reserved_range *range; + + for_each_physmem_reserved_type_range(RR_VMEM, range, &start, &end) + memblock_reserve(start, end - start); } /* @@ -712,13 +711,13 @@ static void __init reserve_crashkernel(void) */ static void __init reserve_initrd(void) { -#ifdef CONFIG_BLK_DEV_INITRD - if (!initrd_data.start || !initrd_data.size) + unsigned long addr, size; + + if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD) || !get_physmem_reserved(RR_INITRD, &addr, &size)) return; - initrd_start = (unsigned long)__va(initrd_data.start); - initrd_end = initrd_start + initrd_data.size; - memblock_reserve(initrd_data.start, initrd_data.size); -#endif + initrd_start = (unsigned long)__va(addr); + initrd_end = initrd_start + size; + memblock_reserve(addr, size); } /* @@ -732,35 +731,18 @@ static void __init reserve_certificate_list(void) static void __init reserve_physmem_info(void) { - unsigned long start, size; + unsigned long addr, size; - get_physmem_reserved(&start, &size); - if (size) - memblock_reserve(start, size); + if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + memblock_reserve(addr, size); } static void __init free_physmem_info(void) { - unsigned long start, size; + unsigned long addr, size; - get_physmem_reserved(&start, &size); - if (size) - memblock_phys_free(start, size); -} - -static const char * __init get_mem_info_source(void) -{ - switch (physmem_info.info_source) { - case MEM_DETECT_SCLP_STOR_INFO: - return "sclp storage info"; - case MEM_DETECT_DIAG260: - return "diag260"; - case MEM_DETECT_SCLP_READ_INFO: - return "sclp read info"; - case MEM_DETECT_BIN_SEARCH: - return "binary search"; - } - return "none"; + if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + memblock_phys_free(addr, size); } static void __init memblock_add_physmem_info(void) @@ -769,7 +751,7 @@ static void __init memblock_add_physmem_info(void) int i; pr_debug("physmem info source: %s (%hhd)\n", - get_mem_info_source(), physmem_info.info_source); + get_physmem_info_source(), physmem_info.info_source); /* keep memblock lists close to the kernel */ memblock_set_bottom_up(true); for_each_physmem_usable_range(i, &start, &end) @@ -780,21 +762,6 @@ static void __init memblock_add_physmem_info(void) memblock_set_node(0, ULONG_MAX, &memblock.memory, 0); } -/* - * Check for initrd being in usable memory - */ -static void __init check_initrd(void) -{ -#ifdef CONFIG_BLK_DEV_INITRD - if (initrd_data.start && initrd_data.size && - !memblock_is_region_memory(initrd_data.start, initrd_data.size)) { - pr_err("The initial RAM disk does not fit into the memory\n"); - memblock_phys_free(initrd_data.start, initrd_data.size); - initrd_start = initrd_end = 0; - } -#endif -} - /* * Reserve memory used for lowcore/command line/kernel image. */ @@ -803,7 +770,7 @@ static void __init reserve_kernel(void) memblock_reserve(0, STARTUP_NORMAL_OFFSET); memblock_reserve(OLDMEM_BASE, sizeof(unsigned long)); memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long)); - memblock_reserve(__amode31_base, __eamode31 - __samode31); + memblock_reserve(physmem_info.reserved[RR_AMODE31].start, __eamode31 - __samode31); memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP); memblock_reserve(__pa(_stext), _end - _stext); } @@ -825,13 +792,13 @@ static void __init setup_memory(void) static void __init relocate_amode31_section(void) { unsigned long amode31_size = __eamode31 - __samode31; - long amode31_offset = __amode31_base - __samode31; + long amode31_offset = physmem_info.reserved[RR_AMODE31].start - __samode31; long *ptr; pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size); /* Move original AMODE31 section to the new one */ - memmove((void *)__amode31_base, (void *)__samode31, amode31_size); + memmove((void *)physmem_info.reserved[RR_AMODE31].start, (void *)__samode31, amode31_size); /* Zero out the old AMODE31 section to catch invalid accesses within it */ memset((void *)__samode31, 0, amode31_size); @@ -1017,7 +984,6 @@ void __init setup_arch(char **cmdline_p) if (MACHINE_HAS_EDAT2) hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); - check_initrd(); reserve_crashkernel(); #ifdef CONFIG_CRASH_DUMP /* diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index b0658136264f..2b20382f1bd8 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -1,19 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 -#include -#include +#include #include -#include -#include +#include #include #include -#include #include -#include -#include -#include +#include +#include +static unsigned long pgalloc_pos __initdata; static unsigned long segment_pos __initdata; -static unsigned long segment_low __initdata; static bool has_edat __initdata; static bool has_nx __initdata; @@ -28,19 +24,20 @@ static void __init kasan_early_panic(const char *reason) static void * __init kasan_early_alloc_segment(void) { - segment_pos -= _SEGMENT_SIZE; + unsigned long addr = segment_pos; - if (segment_pos < segment_low) + segment_pos += _SEGMENT_SIZE; + if (segment_pos > pgalloc_pos) kasan_early_panic("out of memory during initialisation\n"); - return __va(segment_pos); + return __va(addr); } static void * __init kasan_early_alloc_pages(unsigned int order) { pgalloc_pos -= (PAGE_SIZE << order); - if (pgalloc_pos < pgalloc_low) + if (segment_pos > pgalloc_pos) kasan_early_panic("out of memory during initialisation\n"); return __va(pgalloc_pos); @@ -225,8 +222,8 @@ void __init kasan_early_init(void) pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); + unsigned long pgalloc_pos_initial, segment_pos_initial; unsigned long untracked_end = MODULES_VADDR; - unsigned long shadow_alloc_size; unsigned long start, end; int i; @@ -243,13 +240,11 @@ void __init kasan_early_init(void) crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); - if (has_edat) { - shadow_alloc_size = get_physmem_usable_total() >> KASAN_SHADOW_SCALE_SHIFT; - segment_pos = round_down(pgalloc_pos, _SEGMENT_SIZE); - segment_low = segment_pos - shadow_alloc_size; - segment_low = round_down(segment_low, _SEGMENT_SIZE); - pgalloc_pos = segment_low; - } + /* segment allocations go bottom up -> <- pgalloc go top down */ + segment_pos_initial = physmem_info.reserved[RR_KASAN].start; + segment_pos = segment_pos_initial; + pgalloc_pos_initial = physmem_info.reserved[RR_KASAN].end; + pgalloc_pos = pgalloc_pos_initial; /* * Current memory layout: * +- 0 -------------+ +- shadow start -+ @@ -298,4 +293,6 @@ void __init kasan_early_init(void) /* enable kasan */ init_task.kasan_depth = 0; sclp_early_printk("KernelAddressSanitizer initialized\n"); + memblock_reserve(segment_pos_initial, segment_pos - segment_pos_initial); + memblock_reserve(pgalloc_pos, pgalloc_pos_initial - pgalloc_pos); } -- cgit v1.2.3 From e4c31004d3348202d4bc0bcdf662bf9d9d3e11cb Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 14 Feb 2023 09:39:24 +0100 Subject: s390/mm,pageattr: allow KASAN shadow memory Allow changing page table attributes for KASAN shadow memory ranges. Acked-by: Heiko Carstens Reviewed-by: Alexander Gordeev Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/mm/pageattr.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 85195c18b2e8..7838e9c70000 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -300,8 +300,6 @@ static int change_page_attr(unsigned long addr, unsigned long end, if (addr == end) return 0; - if (end >= MODULES_END) - return -EINVAL; mutex_lock(&cpa_mutex); pgdp = pgd_offset_k(addr); do { -- cgit v1.2.3 From 557b19709da97bc93ea5cf61926ca05800c15a13 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 9 Feb 2023 22:05:11 +0100 Subject: s390/kasan: move shadow mapping to decompressor Since regular paging structs are initialized in decompressor already move KASAN shadow mapping to decompressor as well. This helps to avoid allocating KASAN required memory in 1 large chunk, de-duplicate paging structs creation code and start the uncompressed kernel with KASAN instrumentation right away. This also allows to avoid all pitfalls accidentally calling KASAN instrumented code during KASAN initialization. Acked-by: Heiko Carstens Reviewed-by: Alexander Gordeev Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/boot/boot.h | 7 + arch/s390/boot/startup.c | 11 +- arch/s390/boot/vmem.c | 227 ++++++++++++++++++++++++-- arch/s390/include/asm/kasan.h | 31 +--- arch/s390/include/asm/physmem_info.h | 17 -- arch/s390/kernel/early.c | 9 ++ arch/s390/kernel/head64.S | 3 - arch/s390/kernel/vmlinux.lds.S | 7 + arch/s390/mm/Makefile | 3 - arch/s390/mm/kasan_init.c | 298 ----------------------------------- arch/s390/mm/vmem.c | 11 ++ 11 files changed, 258 insertions(+), 366 deletions(-) delete mode 100644 arch/s390/mm/kasan_init.c (limited to 'arch') diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 2b4048106418..872963c8a0ab 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -32,6 +32,13 @@ struct vmlinux_info { unsigned long init_mm_off; unsigned long swapper_pg_dir_off; unsigned long invalid_pg_dir_off; +#ifdef CONFIG_KASAN + unsigned long kasan_early_shadow_page_off; + unsigned long kasan_early_shadow_pte_off; + unsigned long kasan_early_shadow_pmd_off; + unsigned long kasan_early_shadow_pud_off; + unsigned long kasan_early_shadow_p4d_off; +#endif }; void startup_kernel(void); diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index bc07e24329b9..bdf305a93987 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -266,6 +266,13 @@ static void offset_vmlinux_info(unsigned long offset) vmlinux.init_mm_off += offset; vmlinux.swapper_pg_dir_off += offset; vmlinux.invalid_pg_dir_off += offset; +#ifdef CONFIG_KASAN + vmlinux.kasan_early_shadow_page_off += offset; + vmlinux.kasan_early_shadow_pte_off += offset; + vmlinux.kasan_early_shadow_pmd_off += offset; + vmlinux.kasan_early_shadow_pud_off += offset; + vmlinux.kasan_early_shadow_p4d_off += offset; +#endif } void startup_kernel(void) @@ -307,10 +314,6 @@ void startup_kernel(void) detect_physmem_online_ranges(max_physmem_end); save_ipl_cert_comp_list(); rescue_initrd(safe_addr, ident_map_size); -#ifdef CONFIG_KASAN - physmem_alloc_top_down(RR_KASAN, kasan_estimate_memory_needs(get_physmem_usable_total()), - _SEGMENT_SIZE); -#endif if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) { random_lma = get_random_base(); diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 8f16e6f9fb20..b01ea2abda03 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include @@ -16,6 +17,182 @@ unsigned long __bootdata_preserved(s390_invalid_asce); #define swapper_pg_dir vmlinux.swapper_pg_dir_off #define invalid_pg_dir vmlinux.invalid_pg_dir_off +enum populate_mode { + POPULATE_NONE, + POPULATE_ONE2ONE, + POPULATE_ABS_LOWCORE, +#ifdef CONFIG_KASAN + POPULATE_KASAN_MAP_SHADOW, + POPULATE_KASAN_ZERO_SHADOW, + POPULATE_KASAN_SHALLOW +#endif +}; + +static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode); + +#ifdef CONFIG_KASAN + +#define kasan_early_shadow_page vmlinux.kasan_early_shadow_page_off +#define kasan_early_shadow_pte ((pte_t *)vmlinux.kasan_early_shadow_pte_off) +#define kasan_early_shadow_pmd ((pmd_t *)vmlinux.kasan_early_shadow_pmd_off) +#define kasan_early_shadow_pud ((pud_t *)vmlinux.kasan_early_shadow_pud_off) +#define kasan_early_shadow_p4d ((p4d_t *)vmlinux.kasan_early_shadow_p4d_off) +#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) + +static pte_t pte_z; + +static void kasan_populate_shadow(void) +{ + pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); + pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); + p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); + unsigned long untracked_end; + unsigned long start, end; + int i; + + pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); + if (!machine.has_nx) + pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC)); + crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); + crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); + crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); + memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); + + /* + * Current memory layout: + * +- 0 -------------+ +- shadow start -+ + * |1:1 ident mapping| /|1/8 of ident map| + * | | / | | + * +-end of ident map+ / +----------------+ + * | ... gap ... | / | kasan | + * | | / | zero page | + * +- vmalloc area -+ / | mapping | + * | vmalloc_size | / | (untracked) | + * +- modules vaddr -+ / +----------------+ + * | 2Gb |/ | unmapped | allocated per module + * +- shadow start -+ +----------------+ + * | 1/8 addr space | | zero pg mapping| (untracked) + * +- shadow end ----+---------+- shadow end ---+ + * + * Current memory layout (KASAN_VMALLOC): + * +- 0 -------------+ +- shadow start -+ + * |1:1 ident mapping| /|1/8 of ident map| + * | | / | | + * +-end of ident map+ / +----------------+ + * | ... gap ... | / | kasan zero page| (untracked) + * | | / | mapping | + * +- vmalloc area -+ / +----------------+ + * | vmalloc_size | / |shallow populate| + * +- modules vaddr -+ / +----------------+ + * | 2Gb |/ |shallow populate| + * +- shadow start -+ +----------------+ + * | 1/8 addr space | | zero pg mapping| (untracked) + * +- shadow end ----+---------+- shadow end ---+ + */ + + for_each_physmem_usable_range(i, &start, &end) + pgtable_populate(__sha(start), __sha(end), POPULATE_KASAN_MAP_SHADOW); + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + untracked_end = VMALLOC_START; + /* shallowly populate kasan shadow for vmalloc and modules */ + pgtable_populate(__sha(VMALLOC_START), __sha(MODULES_END), POPULATE_KASAN_SHALLOW); + } else { + untracked_end = MODULES_VADDR; + } + /* populate kasan shadow for untracked memory */ + pgtable_populate(__sha(ident_map_size), __sha(untracked_end), POPULATE_KASAN_ZERO_SHADOW); + pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE), POPULATE_KASAN_ZERO_SHADOW); +} + +static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { + pgd_populate(&init_mm, pgd, kasan_early_shadow_p4d); + return true; + } + return false; +} + +static bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, P4D_SIZE) && end - addr >= P4D_SIZE) { + p4d_populate(&init_mm, p4d, kasan_early_shadow_pud); + return true; + } + return false; +} + +static bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) { + pud_populate(&init_mm, pud, kasan_early_shadow_pmd); + return true; + } + return false; +} + +static bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) { + pmd_populate(&init_mm, pmd, kasan_early_shadow_pte); + return true; + } + return false; +} + +static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) +{ + pte_t entry; + + if (mode == POPULATE_KASAN_ZERO_SHADOW) { + set_pte(pte, pte_z); + return true; + } + return false; +} +#else + +static inline void kasan_populate_shadow(void) {} + +static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + return false; +} + +static inline bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + return false; +} + +static inline bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + return false; +} + +static inline bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + return false; +} + +static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) +{ + return false; +} + +#endif + /* * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though. */ @@ -24,12 +201,6 @@ static inline pte_t *__virt_to_kpte(unsigned long va) return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va); } -enum populate_mode { - POPULATE_NONE, - POPULATE_ONE2ONE, - POPULATE_ABS_LOWCORE, -}; - static void *boot_crst_alloc(unsigned long val) { unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; @@ -42,14 +213,26 @@ static void *boot_crst_alloc(unsigned long val) static pte_t *boot_pte_alloc(void) { + static void *pte_leftover; pte_t *pte; - pte = (pte_t *)physmem_alloc_top_down(RR_VMEM, _PAGE_TABLE_SIZE, _PAGE_TABLE_SIZE); + /* + * handling pte_leftovers this way helps to avoid memory fragmentation + * during POPULATE_KASAN_MAP_SHADOW when EDAT is off + */ + if (!pte_leftover) { + pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE); + pte = pte_leftover + _PAGE_TABLE_SIZE; + } else { + pte = pte_leftover; + pte_leftover = NULL; + } + memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); return pte; } -static unsigned long _pa(unsigned long addr, enum populate_mode mode) +static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode) { switch (mode) { case POPULATE_NONE: @@ -58,6 +241,12 @@ static unsigned long _pa(unsigned long addr, enum populate_mode mode) return addr; case POPULATE_ABS_LOWCORE: return __abs_lowcore_pa(addr); +#ifdef CONFIG_KASAN + case POPULATE_KASAN_MAP_SHADOW: + addr = physmem_alloc_top_down(RR_VMEM, size, size); + memset((void *)addr, 0, size); + return addr; +#endif default: return -1; } @@ -83,7 +272,9 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e pte = pte_offset_kernel(pmd, addr); for (; addr < end; addr += PAGE_SIZE, pte++) { if (pte_none(*pte)) { - entry = __pte(_pa(addr, mode)); + if (kasan_pte_populate_zero_shadow(pte, mode)) + continue; + entry = __pte(_pa(addr, PAGE_SIZE, mode)); entry = set_pte_bit(entry, PAGE_KERNEL_EXEC); set_pte(pte, entry); } @@ -101,8 +292,10 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e for (; addr < end; addr = next, pmd++) { next = pmd_addr_end(addr, end); if (pmd_none(*pmd)) { + if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode)) + continue; if (can_large_pmd(pmd, addr, next)) { - entry = __pmd(_pa(addr, mode)); + entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode)); entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC); set_pmd(pmd, entry); continue; @@ -127,8 +320,10 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e for (; addr < end; addr = next, pud++) { next = pud_addr_end(addr, end); if (pud_none(*pud)) { + if (kasan_pud_populate_zero_shadow(pud, addr, next, mode)) + continue; if (can_large_pud(pud, addr, next)) { - entry = __pud(_pa(addr, mode)); + entry = __pud(_pa(addr, _REGION3_SIZE, mode)); entry = set_pud_bit(entry, REGION3_KERNEL_EXEC); set_pud(pud, entry); continue; @@ -153,6 +348,8 @@ static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long e for (; addr < end; addr = next, p4d++) { next = p4d_addr_end(addr, end); if (p4d_none(*p4d)) { + if (kasan_p4d_populate_zero_shadow(p4d, addr, next, mode)) + continue; pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY); p4d_populate(&init_mm, p4d, pud); } @@ -170,9 +367,15 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat for (; addr < end; addr = next, pgd++) { next = pgd_addr_end(addr, end); if (pgd_none(*pgd)) { + if (kasan_pgd_populate_zero_shadow(pgd, addr, next, mode)) + continue; p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY); pgd_populate(&init_mm, pgd, p4d); } +#ifdef CONFIG_KASAN + if (mode == POPULATE_KASAN_SHALLOW) + continue; +#endif pgtable_p4d_populate(pgd, addr, next, mode); } } @@ -210,6 +413,8 @@ void setup_vmem(unsigned long asce_limit) POPULATE_NONE); memcpy_real_ptep = __virt_to_kpte(__memcpy_real_area); + kasan_populate_shadow(); + S390_lowcore.kernel_asce = swapper_pg_dir | asce_bits; S390_lowcore.user_asce = s390_invalid_asce; diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h index e5cfc81d5b61..0cffead0f2f2 100644 --- a/arch/s390/include/asm/kasan.h +++ b/arch/s390/include/asm/kasan.h @@ -2,7 +2,7 @@ #ifndef __ASM_KASAN_H #define __ASM_KASAN_H -#include +#include #ifdef CONFIG_KASAN @@ -13,35 +13,6 @@ #define KASAN_SHADOW_START KASAN_SHADOW_OFFSET #define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) -extern void kasan_early_init(void); - -/* - * Estimate kasan memory requirements, which it will reserve - * at the very end of available physical memory. To estimate - * that, we take into account that kasan would require - * 1/8 of available physical memory (for shadow memory) + - * creating page tables for the shadow memory region. - * To keep page tables estimates simple take the double of - * combined ptes size. - * - * physmem parameter has to be already adjusted if not entire physical memory - * would be used (e.g. due to effect of "mem=" option). - */ -static inline unsigned long kasan_estimate_memory_needs(unsigned long physmem) -{ - unsigned long kasan_needs; - unsigned long pages; - /* for shadow memory */ - kasan_needs = round_up(physmem / 8, PAGE_SIZE); - /* for paging structures */ - pages = DIV_ROUND_UP(kasan_needs, PAGE_SIZE); - kasan_needs += DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2; - - return kasan_needs; -} -#else -static inline void kasan_early_init(void) { } -static inline unsigned long kasan_estimate_memory_needs(unsigned long physmem) { return 0; } #endif #endif diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h index 27234fa1da8e..8e9c582592b3 100644 --- a/arch/s390/include/asm/physmem_info.h +++ b/arch/s390/include/asm/physmem_info.h @@ -26,9 +26,6 @@ enum reserved_range_type { RR_CERT_COMP_LIST, RR_MEM_DETECT_EXTENDED, RR_VMEM, -#ifdef CONFIG_KASAN - RR_KASAN, -#endif RR_MAX }; @@ -129,9 +126,6 @@ static inline const char *get_rr_type_name(enum reserved_range_type t) RR_TYPE_NAME(CERT_COMP_LIST); RR_TYPE_NAME(MEM_DETECT_EXTENDED); RR_TYPE_NAME(VMEM); -#ifdef CONFIG_KASAN - RR_TYPE_NAME(KASAN); -#endif default: return "UNKNOWN"; } @@ -166,17 +160,6 @@ static inline struct reserved_range *__physmem_reserved_next(enum reserved_range range; range = __physmem_reserved_next(&t, range), \ *p_start = range ? range->start : 0, *p_end = range ? range->end : 0) -static inline unsigned long get_physmem_usable_total(void) -{ - unsigned long start, end, total = 0; - int i; - - for_each_physmem_usable_range(i, &start, &end) - total += end - start; - - return total; -} - static inline unsigned long get_physmem_reserved(enum reserved_range_type type, unsigned long *addr, unsigned long *size) { diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 8225a4c1f2e2..2dd5976a55ac 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -51,6 +51,14 @@ decompressor_handled_param(nokaslr); decompressor_handled_param(prot_virt); #endif +static void __init kasan_early_init(void) +{ +#ifdef CONFIG_KASAN + init_task.kasan_depth = 0; + sclp_early_printk("KernelAddressSanitizer initialized\n"); +#endif +} + static void __init reset_tod_clock(void) { union tod_clock clk; @@ -293,6 +301,7 @@ static void __init sort_amode31_extable(void) void __init startup_init(void) { + kasan_early_init(); reset_tod_clock(); time_early_init(); init_kernel_storage_key(); diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 3b3bf8329e6c..f68be3951103 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -26,9 +26,6 @@ ENTRY(startup_continue) stg %r14,__LC_CURRENT larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD-__PT_SIZE brasl %r14,sclp_early_adjust_va # allow sclp_early_printk -#ifdef CONFIG_KASAN - brasl %r14,kasan_early_init -#endif brasl %r14,startup_init # s390 specific early init brasl %r14,start_kernel # common init code # diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index b653ba8d51e6..8d2288a5ba25 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -219,6 +219,13 @@ SECTIONS QUAD(init_mm) QUAD(swapper_pg_dir) QUAD(invalid_pg_dir) +#ifdef CONFIG_KASAN + QUAD(kasan_early_shadow_page) + QUAD(kasan_early_shadow_pte) + QUAD(kasan_early_shadow_pmd) + QUAD(kasan_early_shadow_pud) + QUAD(kasan_early_shadow_p4d) +#endif } :NONE /* Debugging sections. */ diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 57e4f3a24829..d90db06a8af5 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -10,6 +10,3 @@ obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o obj-$(CONFIG_PGSTE) += gmap.o - -KASAN_SANITIZE_kasan_init.o := n -obj-$(CONFIG_KASAN) += kasan_init.o diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c deleted file mode 100644 index 2b20382f1bd8..000000000000 --- a/arch/s390/mm/kasan_init.c +++ /dev/null @@ -1,298 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include - -static unsigned long pgalloc_pos __initdata; -static unsigned long segment_pos __initdata; -static bool has_edat __initdata; -static bool has_nx __initdata; - -#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) - -static void __init kasan_early_panic(const char *reason) -{ - sclp_early_printk("The Linux kernel failed to boot with the KernelAddressSanitizer:\n"); - sclp_early_printk(reason); - disabled_wait(); -} - -static void * __init kasan_early_alloc_segment(void) -{ - unsigned long addr = segment_pos; - - segment_pos += _SEGMENT_SIZE; - if (segment_pos > pgalloc_pos) - kasan_early_panic("out of memory during initialisation\n"); - - return __va(addr); -} - -static void * __init kasan_early_alloc_pages(unsigned int order) -{ - pgalloc_pos -= (PAGE_SIZE << order); - - if (segment_pos > pgalloc_pos) - kasan_early_panic("out of memory during initialisation\n"); - - return __va(pgalloc_pos); -} - -static void * __init kasan_early_crst_alloc(unsigned long val) -{ - unsigned long *table; - - table = kasan_early_alloc_pages(CRST_ALLOC_ORDER); - if (table) - crst_table_init(table, val); - return table; -} - -static pte_t * __init kasan_early_pte_alloc(void) -{ - static void *pte_leftover; - pte_t *pte; - - BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE); - - if (!pte_leftover) { - pte_leftover = kasan_early_alloc_pages(0); - pte = pte_leftover + _PAGE_TABLE_SIZE; - } else { - pte = pte_leftover; - pte_leftover = NULL; - } - memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); - return pte; -} - -enum populate_mode { - POPULATE_MAP, - POPULATE_ZERO_SHADOW, - POPULATE_SHALLOW -}; - -static inline pgprot_t pgprot_clear_bit(pgprot_t pgprot, unsigned long bit) -{ - return __pgprot(pgprot_val(pgprot) & ~bit); -} - -static void __init kasan_early_pgtable_populate(unsigned long address, - unsigned long end, - enum populate_mode mode) -{ - pgprot_t pgt_prot_zero = PAGE_KERNEL_RO; - pgprot_t pgt_prot = PAGE_KERNEL; - pgprot_t sgt_prot = SEGMENT_KERNEL; - pgd_t *pg_dir; - p4d_t *p4_dir; - pud_t *pu_dir; - pmd_t *pm_dir; - pte_t *pt_dir; - pmd_t pmd; - pte_t pte; - - if (!has_nx) { - pgt_prot_zero = pgprot_clear_bit(pgt_prot_zero, _PAGE_NOEXEC); - pgt_prot = pgprot_clear_bit(pgt_prot, _PAGE_NOEXEC); - sgt_prot = pgprot_clear_bit(sgt_prot, _SEGMENT_ENTRY_NOEXEC); - } - - while (address < end) { - pg_dir = pgd_offset_k(address); - if (pgd_none(*pg_dir)) { - if (mode == POPULATE_ZERO_SHADOW && - IS_ALIGNED(address, PGDIR_SIZE) && - end - address >= PGDIR_SIZE) { - pgd_populate(&init_mm, pg_dir, - kasan_early_shadow_p4d); - address = (address + PGDIR_SIZE) & PGDIR_MASK; - continue; - } - p4_dir = kasan_early_crst_alloc(_REGION2_ENTRY_EMPTY); - pgd_populate(&init_mm, pg_dir, p4_dir); - } - - if (mode == POPULATE_SHALLOW) { - address = (address + P4D_SIZE) & P4D_MASK; - continue; - } - - p4_dir = p4d_offset(pg_dir, address); - if (p4d_none(*p4_dir)) { - if (mode == POPULATE_ZERO_SHADOW && - IS_ALIGNED(address, P4D_SIZE) && - end - address >= P4D_SIZE) { - p4d_populate(&init_mm, p4_dir, - kasan_early_shadow_pud); - address = (address + P4D_SIZE) & P4D_MASK; - continue; - } - pu_dir = kasan_early_crst_alloc(_REGION3_ENTRY_EMPTY); - p4d_populate(&init_mm, p4_dir, pu_dir); - } - - pu_dir = pud_offset(p4_dir, address); - if (pud_none(*pu_dir)) { - if (mode == POPULATE_ZERO_SHADOW && - IS_ALIGNED(address, PUD_SIZE) && - end - address >= PUD_SIZE) { - pud_populate(&init_mm, pu_dir, - kasan_early_shadow_pmd); - address = (address + PUD_SIZE) & PUD_MASK; - continue; - } - pm_dir = kasan_early_crst_alloc(_SEGMENT_ENTRY_EMPTY); - pud_populate(&init_mm, pu_dir, pm_dir); - } - - pm_dir = pmd_offset(pu_dir, address); - if (pmd_none(*pm_dir)) { - if (IS_ALIGNED(address, PMD_SIZE) && - end - address >= PMD_SIZE) { - if (mode == POPULATE_ZERO_SHADOW) { - pmd_populate(&init_mm, pm_dir, kasan_early_shadow_pte); - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } else if (has_edat) { - void *page = kasan_early_alloc_segment(); - - memset(page, 0, _SEGMENT_SIZE); - pmd = __pmd(__pa(page)); - pmd = set_pmd_bit(pmd, sgt_prot); - set_pmd(pm_dir, pmd); - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } - } - pt_dir = kasan_early_pte_alloc(); - pmd_populate(&init_mm, pm_dir, pt_dir); - } else if (pmd_large(*pm_dir)) { - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } - - pt_dir = pte_offset_kernel(pm_dir, address); - if (pte_none(*pt_dir)) { - void *page; - - switch (mode) { - case POPULATE_MAP: - page = kasan_early_alloc_pages(0); - memset(page, 0, PAGE_SIZE); - pte = __pte(__pa(page)); - pte = set_pte_bit(pte, pgt_prot); - set_pte(pt_dir, pte); - break; - case POPULATE_ZERO_SHADOW: - page = kasan_early_shadow_page; - pte = __pte(__pa(page)); - pte = set_pte_bit(pte, pgt_prot_zero); - set_pte(pt_dir, pte); - break; - case POPULATE_SHALLOW: - /* should never happen */ - break; - } - } - address += PAGE_SIZE; - } -} - -static void __init kasan_early_detect_facilities(void) -{ - if (test_facility(8)) { - has_edat = true; - __ctl_set_bit(0, 23); - } - if (!noexec_disabled && test_facility(130)) { - has_nx = true; - __ctl_set_bit(0, 20); - } -} - -void __init kasan_early_init(void) -{ - pte_t pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); - pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); - pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); - p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); - unsigned long pgalloc_pos_initial, segment_pos_initial; - unsigned long untracked_end = MODULES_VADDR; - unsigned long start, end; - int i; - - kasan_early_detect_facilities(); - if (!has_nx) - pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC)); - - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE)); - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE)); - - /* init kasan zero shadow */ - crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); - crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); - crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); - memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); - - /* segment allocations go bottom up -> <- pgalloc go top down */ - segment_pos_initial = physmem_info.reserved[RR_KASAN].start; - segment_pos = segment_pos_initial; - pgalloc_pos_initial = physmem_info.reserved[RR_KASAN].end; - pgalloc_pos = pgalloc_pos_initial; - /* - * Current memory layout: - * +- 0 -------------+ +- shadow start -+ - * |1:1 ident mapping| /|1/8 of ident map| - * | | / | | - * +-end of ident map+ / +----------------+ - * | ... gap ... | / | kasan | - * | | / | zero page | - * +- vmalloc area -+ / | mapping | - * | vmalloc_size | / | (untracked) | - * +- modules vaddr -+ / +----------------+ - * | 2Gb |/ | unmapped | allocated per module - * +- shadow start -+ +----------------+ - * | 1/8 addr space | | zero pg mapping| (untracked) - * +- shadow end ----+---------+- shadow end ---+ - * - * Current memory layout (KASAN_VMALLOC): - * +- 0 -------------+ +- shadow start -+ - * |1:1 ident mapping| /|1/8 of ident map| - * | | / | | - * +-end of ident map+ / +----------------+ - * | ... gap ... | / | kasan zero page| (untracked) - * | | / | mapping | - * +- vmalloc area -+ / +----------------+ - * | vmalloc_size | / |shallow populate| - * +- modules vaddr -+ / +----------------+ - * | 2Gb |/ |shallow populate| - * +- shadow start -+ +----------------+ - * | 1/8 addr space | | zero pg mapping| (untracked) - * +- shadow end ----+---------+- shadow end ---+ - */ - /* populate kasan shadow (for identity mapping and zero page mapping) */ - for_each_physmem_usable_range(i, &start, &end) - kasan_early_pgtable_populate(__sha(start), __sha(end), POPULATE_MAP); - if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { - untracked_end = VMALLOC_START; - /* shallowly populate kasan shadow for vmalloc and modules */ - kasan_early_pgtable_populate(__sha(VMALLOC_START), __sha(MODULES_END), - POPULATE_SHALLOW); - } - /* populate kasan shadow for untracked memory */ - kasan_early_pgtable_populate(__sha(ident_map_size), __sha(untracked_end), - POPULATE_ZERO_SHADOW); - kasan_early_pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE), - POPULATE_ZERO_SHADOW); - /* enable kasan */ - init_task.kasan_depth = 0; - sclp_early_printk("KernelAddressSanitizer initialized\n"); - memblock_reserve(segment_pos_initial, segment_pos - segment_pos_initial); - memblock_reserve(pgalloc_pos, pgalloc_pos_initial - pgalloc_pos); -} diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 4113a7ffa149..242f95aa9801 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -664,6 +665,9 @@ static void __init memblock_region_swap(void *a, void *b, int size) swap(*(struct memblock_region *)a, *(struct memblock_region *)b); } +#ifdef CONFIG_KASAN +#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) +#endif /* * map whole physical memory to virtual memory (identity mapping) * we reserve enough space in the vmalloc area for vmemmap to hotplug @@ -733,6 +737,13 @@ void __init vmem_map_init(void) SET_MEMORY_RW | SET_MEMORY_NX); } +#ifdef CONFIG_KASAN + for_each_mem_range(i, &base, &end) + __set_memory(__sha(base), + (__sha(end) - __sha(base)) >> PAGE_SHIFT, + SET_MEMORY_RW | SET_MEMORY_NX); +#endif + __set_memory((unsigned long)_stext, (unsigned long)(_etext - _stext) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); -- cgit v1.2.3 From 6ef55060a1cc29dd54ff390f22cb3de266dab2b0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Mar 2023 13:50:35 +0100 Subject: s390: make use of CONFIG_FUNCTION_ALIGNMENT Make use of CONFIG_FUNCTION_ALIGNMENT which was introduced with commit d49a0626216b ("arch: Introduce CONFIG_FUNCTION_ALIGNMENT"). Select FUNCTION_ALIGNMENT_8B for gcc in order to reflect gcc's default function alignment. For all other compilers, which is only clang, select a function alignment of 16 bytes which reflects the default function alignment for clang. Also change the __ALIGN define to follow whatever the value of CONFIG_FUNCTION_ALIGNMENT is. This makes sure that the alignment of C and assembler functions is the same. In result everything still uses the default function alignment for both compilers. However in addition this is now also true for all assembly functions, so that all functions have a consistent alignment. Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 2 ++ arch/s390/include/asm/linkage.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9809c74e1240..8f1c6da2ae4d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -131,6 +131,8 @@ config S390 select CLONE_BACKWARDS2 select DMA_OPS if PCI select DYNAMIC_FTRACE if FUNCTION_TRACER + select FUNCTION_ALIGNMENT_8B if CC_IS_GCC + select FUNCTION_ALIGNMENT_16B if !CC_IS_GCC select GCC12_NO_ARRAY_BOUNDS select GENERIC_ALLOCATOR select GENERIC_CPU_AUTOPROBE diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h index c76777b15fec..df3fb7d8227b 100644 --- a/arch/s390/include/asm/linkage.h +++ b/arch/s390/include/asm/linkage.h @@ -4,7 +4,7 @@ #include -#define __ALIGN .align 16, 0x07 +#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT, 0x07 #define __ALIGN_STR __stringify(__ALIGN) #endif -- cgit v1.2.3 From dfa2a72cdbfc3b7cda196b3389579cff88201ce6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Mar 2023 13:50:36 +0100 Subject: s390/ftrace: move hotpatch trampolines to mcount.S Move the ftrace hotpatch trampolines to mcount.S. This allows to make use of the standard SYM_CODE macros which again makes sure that the hotpatch trampolines follow the function alignment rules of the rest of the kernel. Signed-off-by: Heiko Carstens Acked-by: Ilya Leoshkevich Signed-off-by: Heiko Carstens --- arch/s390/kernel/ftrace.c | 20 -------------------- arch/s390/kernel/mcount.S | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 416b5a94353d..6f6c44b7af89 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -49,26 +49,6 @@ struct ftrace_insn { s32 disp; } __packed; -asm( - " .align 16\n" - "ftrace_shared_hotpatch_trampoline_br:\n" - " lmg %r0,%r1,2(%r1)\n" - " br %r1\n" - "ftrace_shared_hotpatch_trampoline_br_end:\n" -); - -#ifdef CONFIG_EXPOLINE -asm( - " .align 16\n" - "ftrace_shared_hotpatch_trampoline_exrl:\n" - " lmg %r0,%r1,2(%r1)\n" - " exrl %r0,0f\n" - " j .\n" - "0: br %r1\n" - "ftrace_shared_hotpatch_trampoline_exrl_end:\n" -); -#endif /* CONFIG_EXPOLINE */ - #ifdef CONFIG_MODULES static char *ftrace_plt; #endif /* CONFIG_MODULES */ diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 43ff91073d2a..4c4ee762f515 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -135,6 +135,22 @@ SYM_FUNC_END(return_to_handler) #endif #endif /* CONFIG_FUNCTION_TRACER */ +SYM_CODE_START(ftrace_shared_hotpatch_trampoline_br) + lmg %r0,%r1,2(%r1) + br %r1 +SYM_INNER_LABEL(ftrace_shared_hotpatch_trampoline_br_end, SYM_L_GLOBAL) +SYM_CODE_END(ftrace_shared_hotpatch_trampoline_br) + +#ifdef CONFIG_EXPOLINE +SYM_CODE_START(ftrace_shared_hotpatch_trampoline_exrl) + lmg %r0,%r1,2(%r1) + exrl %r0,0f + j . +0: br %r1 +SYM_INNER_LABEL(ftrace_shared_hotpatch_trampoline_exrl_end, SYM_L_GLOBAL) +SYM_CODE_END(ftrace_shared_hotpatch_trampoline_exrl) +#endif /* CONFIG_EXPOLINE */ + #ifdef CONFIG_RETHOOK SYM_FUNC_START(arch_rethook_trampoline) -- cgit v1.2.3 From 91a0117dce08a635d48b6016c5af354edde2082b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Mar 2023 13:50:37 +0100 Subject: s390/expoline: use __ALIGN instead of open coded .align Use __ALIGN instead of open coded .align statement to make sure that external expoline thunks follow global function alignment rules. Signed-off-by: Heiko Carstens --- arch/s390/include/asm/nospec-insn.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index 7e9e99523e95..7a946c42ad13 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -2,6 +2,7 @@ #ifndef _ASM_S390_NOSPEC_ASM_H #define _ASM_S390_NOSPEC_ASM_H +#include #include #ifdef __ASSEMBLY__ @@ -16,7 +17,7 @@ .macro __THUNK_PROLOG_NAME name #ifdef CONFIG_EXPOLINE_EXTERN .pushsection .text,"ax",@progbits - .align 16,0x07 + __ALIGN #else .pushsection .text.\name,"axG",@progbits,\name,comdat #endif -- cgit v1.2.3 From c2272b2d3b164a02929c0ce9ffed35c4fe8fbe40 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Mar 2023 13:50:38 +0100 Subject: s390/vdso: use __ALIGN instead of open coded .align Use __ALIGN instead of open coded .align statement to make sure that vdso code follows global kernel function alignment rules. Signed-off-by: Heiko Carstens --- arch/s390/kernel/vdso32/vdso_user_wrapper.S | 3 ++- arch/s390/kernel/vdso64/vdso_user_wrapper.S | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/vdso32/vdso_user_wrapper.S b/arch/s390/kernel/vdso32/vdso_user_wrapper.S index 3f42f27f978c..2e645003fdaf 100644 --- a/arch/s390/kernel/vdso32/vdso_user_wrapper.S +++ b/arch/s390/kernel/vdso32/vdso_user_wrapper.S @@ -1,12 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include #include #include .macro vdso_syscall func,syscall .globl __kernel_compat_\func .type __kernel_compat_\func,@function - .align 8 + __ALIGN __kernel_compat_\func: CFI_STARTPROC svc \syscall diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S index 97f0c0a669a5..57f62596e53b 100644 --- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S +++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include #include #include #include @@ -16,7 +17,7 @@ .macro vdso_func func .globl __kernel_\func .type __kernel_\func,@function - .align 8 + __ALIGN __kernel_\func: CFI_STARTPROC aghi %r15,-WRAPPER_FRAME_SIZE @@ -41,7 +42,7 @@ vdso_func getcpu .macro vdso_syscall func,syscall .globl __kernel_\func .type __kernel_\func,@function - .align 8 + __ALIGN __kernel_\func: CFI_STARTPROC svc \syscall -- cgit v1.2.3 From 003d248fee72eb8d86aefaf3b6e47fe8acfda0b6 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Sun, 29 Jan 2023 19:45:25 +0100 Subject: s390/zcrypt: make psmid unsigned long instead of long long Since s390 kernel build does not support 32 bit build any more there is no difference between long and long long. So this patch reworks all occurrences of psmid (a 64 bit value) to use unsigned long now. Signed-off-by: Harald Freudenberger Acked-by: Heiko Carstens Reviewed-by: Holger Dengler Signed-off-by: Heiko Carstens --- arch/s390/include/asm/ap.h | 4 ++-- drivers/s390/crypto/ap_bus.h | 6 +++--- drivers/s390/crypto/ap_queue.c | 8 ++++---- drivers/s390/crypto/zcrypt_cex2c.c | 6 +++--- drivers/s390/crypto/zcrypt_msgtype50.c | 4 ++-- drivers/s390/crypto/zcrypt_msgtype6.c | 10 +++++----- 6 files changed, 19 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index c699f251a464..0744a00f92a5 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -386,7 +386,7 @@ static inline struct ap_queue_status ap_nqap(ap_qid_t qid, * *resgr0 is to be used instead of qid to further process this entry. */ static inline struct ap_queue_status ap_dqap(ap_qid_t qid, - unsigned long long *psmid, + unsigned long *psmid, void *msg, size_t length, size_t *reslength, unsigned long *resgr0) @@ -429,7 +429,7 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid, if (resgr0) *resgr0 = reg0; } else { - *psmid = (((unsigned long long)rp1.even) << 32) + rp1.odd; + *psmid = (rp1.even << 32) + rp1.odd; if (resgr0) *resgr0 = 0; } diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 0f17933954fb..4ef8e6ac6323 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -242,7 +242,7 @@ enum ap_fi_flags { struct ap_message { struct list_head list; /* Request queueing. */ - unsigned long long psmid; /* Message id. */ + unsigned long psmid; /* Message id. */ void *msg; /* Pointer to message buffer. */ unsigned int len; /* actual msg len in msg buffer */ unsigned int bufsize; /* allocated msg buffer size */ @@ -285,8 +285,8 @@ static inline void ap_release_message(struct ap_message *ap_msg) * for the first time. Otherwise the ap message queue will get * confused. */ -int ap_send(ap_qid_t, unsigned long long, void *, size_t); -int ap_recv(ap_qid_t, unsigned long long *, void *, size_t); +int ap_send(ap_qid_t qid, unsigned long psmid, void *msg, size_t length); +int ap_recv(ap_qid_t qid, unsigned long *psmid, void *msg, size_t length); enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event); enum ap_sm_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_sm_event event); diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 2637fe1df727..2fe8cbf72091 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -68,7 +68,7 @@ static int ap_queue_enable_irq(struct ap_queue *aq, void *ind) * because a segment boundary was reached. The NQAP is repeated. */ static inline struct ap_queue_status -__ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length, +__ap_send(ap_qid_t qid, unsigned long psmid, void *msg, size_t length, int special) { if (special) @@ -76,7 +76,7 @@ __ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length, return ap_nqap(qid, psmid, msg, length); } -int ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length) +int ap_send(ap_qid_t qid, unsigned long psmid, void *msg, size_t length) { struct ap_queue_status status; @@ -95,7 +95,7 @@ int ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length) } EXPORT_SYMBOL(ap_send); -int ap_recv(ap_qid_t qid, unsigned long long *psmid, void *msg, size_t length) +int ap_recv(ap_qid_t qid, unsigned long *psmid, void *msg, size_t length) { struct ap_queue_status status; @@ -177,7 +177,7 @@ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq) break; } if (!found) { - AP_DBF_WARN("%s unassociated reply psmid=0x%016llx on 0x%02x.%04x\n", + AP_DBF_WARN("%s unassociated reply psmid=0x%016lx on 0x%02x.%04x\n", __func__, aq->reply->psmid, AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); } diff --git a/drivers/s390/crypto/zcrypt_cex2c.c b/drivers/s390/crypto/zcrypt_cex2c.c index cb7849defce3..9cabe3937c9a 100644 --- a/drivers/s390/crypto/zcrypt_cex2c.c +++ b/drivers/s390/crypto/zcrypt_cex2c.c @@ -181,7 +181,7 @@ static const struct attribute_group cca_queue_attr_grp = { static int zcrypt_cex2c_rng_supported(struct ap_queue *aq) { struct ap_message ap_msg; - unsigned long long psmid; + unsigned long psmid; unsigned int domain; struct { struct type86_hdr hdr; @@ -209,7 +209,7 @@ static int zcrypt_cex2c_rng_supported(struct ap_queue *aq) msg = ap_msg.msg; msg->cprbx.domain = AP_QID_QUEUE(aq->qid); - rc = ap_send(aq->qid, 0x0102030405060708ULL, ap_msg.msg, ap_msg.len); + rc = ap_send(aq->qid, 0x0102030405060708UL, ap_msg.msg, ap_msg.len); if (rc) goto out_free; @@ -217,7 +217,7 @@ static int zcrypt_cex2c_rng_supported(struct ap_queue *aq) for (i = 0; i < 2 * HZ; i++) { msleep(1000 / HZ); rc = ap_recv(aq->qid, &psmid, ap_msg.msg, 4096); - if (rc == 0 && psmid == 0x0102030405060708ULL) + if (rc == 0 && psmid == 0x0102030405060708UL) break; } diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index 7d245645fdd5..1b6b60b7215b 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -476,7 +476,7 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq, if (!ap_msg->msg) return -ENOMEM; ap_msg->receive = zcrypt_cex2a_receive; - ap_msg->psmid = (((unsigned long long)current->pid) << 32) + + ap_msg->psmid = (((unsigned long)current->pid) << 32) + atomic_inc_return(&zcrypt_step); ap_msg->private = &work; rc = ICAMEX_msg_to_type50MEX_msg(zq, ap_msg, mex); @@ -527,7 +527,7 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq, if (!ap_msg->msg) return -ENOMEM; ap_msg->receive = zcrypt_cex2a_receive; - ap_msg->psmid = (((unsigned long long)current->pid) << 32) + + ap_msg->psmid = (((unsigned long)current->pid) << 32) + atomic_inc_return(&zcrypt_step); ap_msg->private = &work; rc = ICACRT_msg_to_type50CRT_msg(zq, ap_msg, crt); diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index a2e7fe33ba62..6c874808c356 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -1034,7 +1034,7 @@ static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq, return -ENOMEM; ap_msg->bufsize = PAGE_SIZE; ap_msg->receive = zcrypt_msgtype6_receive; - ap_msg->psmid = (((unsigned long long)current->pid) << 32) + + ap_msg->psmid = (((unsigned long)current->pid) << 32) + atomic_inc_return(&zcrypt_step); ap_msg->private = &resp_type; rc = icamex_msg_to_type6mex_msgx(zq, ap_msg, mex); @@ -1084,7 +1084,7 @@ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq, return -ENOMEM; ap_msg->bufsize = PAGE_SIZE; ap_msg->receive = zcrypt_msgtype6_receive; - ap_msg->psmid = (((unsigned long long)current->pid) << 32) + + ap_msg->psmid = (((unsigned long)current->pid) << 32) + atomic_inc_return(&zcrypt_step); ap_msg->private = &resp_type; rc = icacrt_msg_to_type6crt_msgx(zq, ap_msg, crt); @@ -1135,7 +1135,7 @@ int prep_cca_ap_msg(bool userspace, struct ica_xcRB *xcrb, if (!ap_msg->msg) return -ENOMEM; ap_msg->receive = zcrypt_msgtype6_receive; - ap_msg->psmid = (((unsigned long long)current->pid) << 32) + + ap_msg->psmid = (((unsigned long)current->pid) << 32) + atomic_inc_return(&zcrypt_step); ap_msg->private = kmemdup(&resp_type, sizeof(resp_type), GFP_KERNEL); if (!ap_msg->private) @@ -1216,7 +1216,7 @@ int prep_ep11_ap_msg(bool userspace, struct ep11_urb *xcrb, if (!ap_msg->msg) return -ENOMEM; ap_msg->receive = zcrypt_msgtype6_receive_ep11; - ap_msg->psmid = (((unsigned long long)current->pid) << 32) + + ap_msg->psmid = (((unsigned long)current->pid) << 32) + atomic_inc_return(&zcrypt_step); ap_msg->private = kmemdup(&resp_type, sizeof(resp_type), GFP_KERNEL); if (!ap_msg->private) @@ -1326,7 +1326,7 @@ int prep_rng_ap_msg(struct ap_message *ap_msg, int *func_code, if (!ap_msg->msg) return -ENOMEM; ap_msg->receive = zcrypt_msgtype6_receive; - ap_msg->psmid = (((unsigned long long)current->pid) << 32) + + ap_msg->psmid = (((unsigned long)current->pid) << 32) + atomic_inc_return(&zcrypt_step); ap_msg->private = kmemdup(&resp_type, sizeof(resp_type), GFP_KERNEL); if (!ap_msg->private) -- cgit v1.2.3 From 8794c5961394b7fb8a69f43eaad9566e5496c0c8 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 14 Feb 2023 17:13:18 +0100 Subject: s390/zcrypt: rework length information for dqap The inline ap_dqap function does not return the number of bytes actually written into the message buffer. The calling code inspects the AP message header to figure out what kind of AP message has been received and pulls the length information from this header. This processing may not work correctly in cases where only a fragment of the reply is received. With this patch the ap_dqap inline function now returns the number of actually written bytes in the *length parameter. So the calling function has a chance to compare the number of received bytes against what the AP message header length field states. This is especially useful in cases where a message could only get partially received. The low level reply processing functions needed some rework to be able to catch this new length information and compare it the right way. The rework also deals with some situations where until now the reply length was not correctly calculated and/or set. All this has been heavily tested as the modifications on the reply length information may affect crypto load. Signed-off-by: Harald Freudenberger Reviewed-by: Holger Dengler Signed-off-by: Heiko Carstens --- arch/s390/include/asm/ap.h | 18 ++++++++++----- drivers/s390/crypto/ap_bus.h | 8 +++---- drivers/s390/crypto/ap_queue.c | 16 ++++++------- drivers/s390/crypto/zcrypt_cex2c.c | 3 ++- drivers/s390/crypto/zcrypt_msgtype50.c | 11 +++++---- drivers/s390/crypto/zcrypt_msgtype6.c | 41 ++++++++++++++++++++++------------ 6 files changed, 60 insertions(+), 37 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index 0744a00f92a5..6bb536e87897 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -359,10 +359,11 @@ static inline struct ap_queue_status ap_nqap(ap_qid_t qid, * ap_dqap(): Receive message from adjunct processor queue. * @qid: The AP queue number * @psmid: Pointer to program supplied message identifier - * @msg: The message text - * @length: The message length - * @reslength: Resitual length on return - * @resgr0: input: gr0 value (only used if != 0), output: resitual gr0 content + * @msg: Pointer to message buffer + * @msglen: Message buffer size + * @length: Pointer to length of actually written bytes + * @reslength: Residual length on return + * @resgr0: input: gr0 value (only used if != 0), output: residual gr0 content * * Returns AP queue status structure. * Condition code 1 on DQAP means the receive has taken place @@ -387,7 +388,8 @@ static inline struct ap_queue_status ap_nqap(ap_qid_t qid, */ static inline struct ap_queue_status ap_dqap(ap_qid_t qid, unsigned long *psmid, - void *msg, size_t length, + void *msg, size_t msglen, + size_t *length, size_t *reslength, unsigned long *resgr0) { @@ -399,7 +401,7 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid, rp1.even = 0UL; rp1.odd = 0UL; rp2.even = (unsigned long)msg; - rp2.odd = (unsigned long)length; + rp2.odd = (unsigned long)msglen; asm volatile( " lgr 0,%[reg0]\n" /* qid param into gr0 */ @@ -434,6 +436,10 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid, *resgr0 = 0; } + /* update *length with the nr of bytes stored into the msg buffer */ + if (length) + *length = msglen - rp2.odd; + return reg1.status; } diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 4ef8e6ac6323..b5d7ccbc0784 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -244,8 +244,8 @@ struct ap_message { struct list_head list; /* Request queueing. */ unsigned long psmid; /* Message id. */ void *msg; /* Pointer to message buffer. */ - unsigned int len; /* actual msg len in msg buffer */ - unsigned int bufsize; /* allocated msg buffer size */ + size_t len; /* actual msg len in msg buffer */ + size_t bufsize; /* allocated msg buffer size */ u16 flags; /* Flags, see AP_MSG_FLAG_xxx */ struct ap_fi fi; /* Failure Injection cmd */ int rc; /* Return code for this message */ @@ -285,8 +285,8 @@ static inline void ap_release_message(struct ap_message *ap_msg) * for the first time. Otherwise the ap message queue will get * confused. */ -int ap_send(ap_qid_t qid, unsigned long psmid, void *msg, size_t length); -int ap_recv(ap_qid_t qid, unsigned long *psmid, void *msg, size_t length); +int ap_send(ap_qid_t qid, unsigned long psmid, void *msg, size_t msglen); +int ap_recv(ap_qid_t qid, unsigned long *psmid, void *msg, size_t msglen); enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event); enum ap_sm_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_sm_event event); diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 2fe8cbf72091..bbd314918a5d 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -59,7 +59,7 @@ static int ap_queue_enable_irq(struct ap_queue *aq, void *ind) * @qid: The AP queue number * @psmid: The program supplied message identifier * @msg: The message text - * @length: The message length + * @msglen: The message length * @special: Special Bit * * Returns AP queue status structure. @@ -68,19 +68,19 @@ static int ap_queue_enable_irq(struct ap_queue *aq, void *ind) * because a segment boundary was reached. The NQAP is repeated. */ static inline struct ap_queue_status -__ap_send(ap_qid_t qid, unsigned long psmid, void *msg, size_t length, +__ap_send(ap_qid_t qid, unsigned long psmid, void *msg, size_t msglen, int special) { if (special) qid |= 0x400000UL; - return ap_nqap(qid, psmid, msg, length); + return ap_nqap(qid, psmid, msg, msglen); } -int ap_send(ap_qid_t qid, unsigned long psmid, void *msg, size_t length) +int ap_send(ap_qid_t qid, unsigned long psmid, void *msg, size_t msglen) { struct ap_queue_status status; - status = __ap_send(qid, psmid, msg, length, 0); + status = __ap_send(qid, psmid, msg, msglen, 0); switch (status.response_code) { case AP_RESPONSE_NORMAL: return 0; @@ -95,13 +95,13 @@ int ap_send(ap_qid_t qid, unsigned long psmid, void *msg, size_t length) } EXPORT_SYMBOL(ap_send); -int ap_recv(ap_qid_t qid, unsigned long *psmid, void *msg, size_t length) +int ap_recv(ap_qid_t qid, unsigned long *psmid, void *msg, size_t msglen) { struct ap_queue_status status; if (!msg) return -EINVAL; - status = ap_dqap(qid, psmid, msg, length, NULL, NULL); + status = ap_dqap(qid, psmid, msg, msglen, NULL, NULL, NULL); switch (status.response_code) { case AP_RESPONSE_NORMAL: return 0; @@ -150,7 +150,7 @@ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq) do { status = ap_dqap(aq->qid, &aq->reply->psmid, aq->reply->msg, aq->reply->bufsize, - &reslen, &resgr0); + &aq->reply->len, &reslen, &resgr0); parts++; } while (status.response_code == 0xFF && resgr0 != 0); diff --git a/drivers/s390/crypto/zcrypt_cex2c.c b/drivers/s390/crypto/zcrypt_cex2c.c index 9cabe3937c9a..4dacf5f6461f 100644 --- a/drivers/s390/crypto/zcrypt_cex2c.c +++ b/drivers/s390/crypto/zcrypt_cex2c.c @@ -203,6 +203,7 @@ static int zcrypt_cex2c_rng_supported(struct ap_queue *aq) ap_msg.msg = (void *)get_zeroed_page(GFP_KERNEL); if (!ap_msg.msg) return -ENOMEM; + ap_msg.bufsize = PAGE_SIZE; rng_type6cprb_msgx(&ap_msg, 4, &domain); @@ -216,7 +217,7 @@ static int zcrypt_cex2c_rng_supported(struct ap_queue *aq) /* Wait for the test message to complete. */ for (i = 0; i < 2 * HZ; i++) { msleep(1000 / HZ); - rc = ap_recv(aq->qid, &psmid, ap_msg.msg, 4096); + rc = ap_recv(aq->qid, &psmid, ap_msg.msg, ap_msg.bufsize); if (rc == 0 && psmid == 0x0102030405060708UL) break; } diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index 1b6b60b7215b..05ace18c12b0 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -441,14 +441,17 @@ static void zcrypt_cex2a_receive(struct ap_queue *aq, t80h = reply->msg; if (t80h->type == TYPE80_RSP_CODE) { len = t80h->len; - if (len > reply->bufsize || len > msg->bufsize) { + if (len > reply->bufsize || len > msg->bufsize || + len != reply->len) { + ZCRYPT_DBF_DBG("%s len mismatch => EMSGSIZE\n", __func__); msg->rc = -EMSGSIZE; - } else { - memcpy(msg->msg, reply->msg, len); - msg->len = len; + goto out; } + memcpy(msg->msg, reply->msg, len); + msg->len = len; } else { memcpy(msg->msg, reply->msg, sizeof(error_reply)); + msg->len = sizeof(error_reply); } out: complete((struct completion *)msg->private); diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 6c874808c356..914151c03753 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -938,28 +938,37 @@ static void zcrypt_msgtype6_receive(struct ap_queue *aq, t86r->cprbx.cprb_ver_id == 0x02) { switch (resp_type->type) { case CEXXC_RESPONSE_TYPE_ICA: - len = sizeof(struct type86x_reply) + t86r->length - 2; - if (len > reply->bufsize || len > msg->bufsize) { + len = sizeof(struct type86x_reply) + t86r->length; + if (len > reply->bufsize || len > msg->bufsize || + len != reply->len) { + ZCRYPT_DBF_DBG("%s len mismatch => EMSGSIZE\n", __func__); msg->rc = -EMSGSIZE; - } else { - memcpy(msg->msg, reply->msg, len); - msg->len = len; + goto out; } + memcpy(msg->msg, reply->msg, len); + msg->len = len; break; case CEXXC_RESPONSE_TYPE_XCRB: - len = t86r->fmt2.offset2 + t86r->fmt2.count2; - if (len > reply->bufsize || len > msg->bufsize) { + if (t86r->fmt2.count2) + len = t86r->fmt2.offset2 + t86r->fmt2.count2; + else + len = t86r->fmt2.offset1 + t86r->fmt2.count1; + if (len > reply->bufsize || len > msg->bufsize || + len != reply->len) { + ZCRYPT_DBF_DBG("%s len mismatch => EMSGSIZE\n", __func__); msg->rc = -EMSGSIZE; - } else { - memcpy(msg->msg, reply->msg, len); - msg->len = len; + goto out; } + memcpy(msg->msg, reply->msg, len); + msg->len = len; break; default: memcpy(msg->msg, &error_reply, sizeof(error_reply)); + msg->len = sizeof(error_reply); } } else { memcpy(msg->msg, reply->msg, sizeof(error_reply)); + msg->len = sizeof(error_reply); } out: complete(&resp_type->work); @@ -994,18 +1003,22 @@ static void zcrypt_msgtype6_receive_ep11(struct ap_queue *aq, switch (resp_type->type) { case CEXXC_RESPONSE_TYPE_EP11: len = t86r->fmt2.offset1 + t86r->fmt2.count1; - if (len > reply->bufsize || len > msg->bufsize) { + if (len > reply->bufsize || len > msg->bufsize || + len != reply->len) { + ZCRYPT_DBF_DBG("%s len mismatch => EMSGSIZE\n", __func__); msg->rc = -EMSGSIZE; - } else { - memcpy(msg->msg, reply->msg, len); - msg->len = len; + goto out; } + memcpy(msg->msg, reply->msg, len); + msg->len = len; break; default: memcpy(msg->msg, &error_reply, sizeof(error_reply)); + msg->len = sizeof(error_reply); } } else { memcpy(msg->msg, reply->msg, sizeof(error_reply)); + msg->len = sizeof(error_reply); } out: complete(&resp_type->work); -- cgit v1.2.3 From f6047040217babeb9655b855c83d61cb1c375ac9 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 20 Sep 2022 15:12:01 +0200 Subject: s390/ap: exploit new B bit from QCI config info This patch introduces an update to the ap_config_info struct which is filled with the QCI subfunction. There is a new bit apsb (short 'B') showing if the AP secure bind facility is available. The patch also includes a simple function ap_sb_available() wrapping this bit test. Signed-off-by: Harald Freudenberger Reviewed-by: Tony Krowiak Reviewed-by: Holger Dengler Signed-off-by: Heiko Carstens --- arch/s390/include/asm/ap.h | 13 +++++++------ drivers/s390/crypto/ap_bus.c | 20 ++++++++++++++++---- drivers/s390/crypto/ap_bus.h | 1 + drivers/s390/crypto/vfio_ap_ops.c | 6 +++--- 4 files changed, 27 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index 6bb536e87897..5e4a88460a57 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -180,15 +180,16 @@ struct ap_config_info { unsigned int apxa : 1; /* N bit */ unsigned int qact : 1; /* C bit */ unsigned int rc8a : 1; /* R bit */ - unsigned char _reserved1 : 4; - unsigned char _reserved2[3]; - unsigned char Na; /* max # of APs - 1 */ - unsigned char Nd; /* max # of Domains - 1 */ - unsigned char _reserved3[10]; + unsigned int : 4; + unsigned int apsb : 1; /* B bit */ + unsigned int : 23; + unsigned char na; /* max # of APs - 1 */ + unsigned char nd; /* max # of Domains - 1 */ + unsigned char _reserved0[10]; unsigned int apm[8]; /* AP ID mask */ unsigned int aqm[8]; /* AP (usage) queue mask */ unsigned int adm[8]; /* AP (control) domain mask */ - unsigned char _reserved4[16]; + unsigned char _reserved1[16]; } __aligned(8); /** diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 2bc184ee8952..b3c350e9fc1d 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -200,6 +200,18 @@ static inline int ap_qact_available(void) return 0; } +/* + * ap_sb_available(): Test if the AP secure binding facility is available. + * + * Returns 1 if secure binding facility is available. + */ +int ap_sb_available(void) +{ + if (ap_qci_info) + return ap_qci_info->apsb; + return 0; +} + /* * ap_fetch_qci_info(): Fetch cryptographic config info * @@ -248,13 +260,13 @@ static void __init ap_init_qci_info(void) AP_DBF_INFO("%s successful fetched initial qci info\n", __func__); if (ap_qci_info->apxa) { - if (ap_qci_info->Na) { - ap_max_adapter_id = ap_qci_info->Na; + if (ap_qci_info->na) { + ap_max_adapter_id = ap_qci_info->na; AP_DBF_INFO("%s new ap_max_adapter_id is %d\n", __func__, ap_max_adapter_id); } - if (ap_qci_info->Nd) { - ap_max_domain_id = ap_qci_info->Nd; + if (ap_qci_info->nd) { + ap_max_domain_id = ap_qci_info->nd; AP_DBF_INFO("%s new ap_max_domain_id is %d\n", __func__, ap_max_domain_id); } diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index b5d7ccbc0784..5ce020879a38 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -296,6 +296,7 @@ void ap_cancel_message(struct ap_queue *aq, struct ap_message *ap_msg); void ap_flush_queue(struct ap_queue *aq); void *ap_airq_ptr(void); +int ap_sb_available(void); void ap_wait(enum ap_sm_wait wait); void ap_request_timeout(struct timer_list *t); void ap_bus_force_rescan(void); diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 72e10abb103a..bfe995116a6a 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -599,9 +599,9 @@ out_unlock: static void vfio_ap_matrix_init(struct ap_config_info *info, struct ap_matrix *matrix) { - matrix->apm_max = info->apxa ? info->Na : 63; - matrix->aqm_max = info->apxa ? info->Nd : 15; - matrix->adm_max = info->apxa ? info->Nd : 15; + matrix->apm_max = info->apxa ? info->na : 63; + matrix->aqm_max = info->apxa ? info->nd : 15; + matrix->adm_max = info->apxa ? info->nd : 15; } static void vfio_ap_mdev_update_guest_apcb(struct ap_matrix_mdev *matrix_mdev) -- cgit v1.2.3 From 211c06d845a7a5d856e2a99276ceb02aaa11c618 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 12 Sep 2022 18:02:44 +0200 Subject: s390/ap: make tapq gr2 response a struct This patch introduces a new struct ap_tapq_gr2 which covers the response in GR2 on TAPQ invocation. This makes it much easier and less error-prone for the calling functions to access the right field without shifting and masking. Signed-off-by: Harald Freudenberger Reviewed-by: Tony Krowiak Reviewed-by: Holger Dengler Signed-off-by: Heiko Carstens --- arch/s390/include/asm/ap.h | 39 +++++++++++++++++++++++++++++++++------ drivers/s390/crypto/ap_bus.c | 23 ++++++----------------- drivers/s390/crypto/vfio_ap_ops.c | 8 +++----- 3 files changed, 42 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index 5e4a88460a57..ba8da3f02f35 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -86,6 +86,34 @@ static inline bool ap_instructions_available(void) return reg1 != 0; } +/* TAPQ register GR2 response struct */ +struct ap_tapq_gr2 { + union { + unsigned long value; + struct { + unsigned int fac : 32; /* facility bits */ + unsigned int apinfo : 32; /* ap type, ... */ + }; + struct { + unsigned int s : 1; /* APSC */ + unsigned int m : 1; /* AP4KM */ + unsigned int c : 1; /* AP4KC */ + unsigned int mode : 3; + unsigned int n : 1; /* APXA */ + unsigned int : 1; + unsigned int class : 8; + unsigned int bs : 2; /* SE bind/assoc */ + unsigned int : 14; + unsigned int at : 8; /* ap type */ + unsigned int nd : 8; /* nr of domains */ + unsigned int : 4; + unsigned int ml : 4; /* apxl ml */ + unsigned int : 4; + unsigned int qd : 4; /* queue depth */ + }; + }; +}; + /** * ap_tapq(): Test adjunct processor queue. * @qid: The AP queue number @@ -93,7 +121,7 @@ static inline bool ap_instructions_available(void) * * Returns AP queue status structure. */ -static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info) +static inline struct ap_queue_status ap_tapq(ap_qid_t qid, struct ap_tapq_gr2 *info) { union ap_queue_status_reg reg1; unsigned long reg2; @@ -108,7 +136,7 @@ static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info) : [qid] "d" (qid) : "cc", "0", "1", "2"); if (info) - *info = reg2; + info->value = reg2; return reg1.status; } @@ -116,13 +144,12 @@ static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info) * ap_test_queue(): Test adjunct processor queue. * @qid: The AP queue number * @tbit: Test facilities bit - * @info: Pointer to queue descriptor + * @info: Ptr to tapq gr2 struct * * Returns AP queue status structure. */ -static inline struct ap_queue_status ap_test_queue(ap_qid_t qid, - int tbit, - unsigned long *info) +static inline struct ap_queue_status ap_test_queue(ap_qid_t qid, int tbit, + struct ap_tapq_gr2 *info) { if (tbit) qid |= 1UL << 23; /* set T bit*/ diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 4e1926cf7a3d..ab37818faeab 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -343,18 +343,7 @@ static bool ap_queue_info(ap_qid_t qid, int *q_type, unsigned int *q_fac, int *q_depth, int *q_ml, bool *q_decfg, bool *q_cstop) { struct ap_queue_status status; - union { - unsigned long value; - struct { - unsigned int fac : 32; /* facility bits */ - unsigned int at : 8; /* ap type */ - unsigned int _res1 : 8; - unsigned int _res2 : 4; - unsigned int ml : 4; /* apxl ml */ - unsigned int _res3 : 4; - unsigned int qd : 4; /* queue depth */ - } tapq_gr2; - } tapq_info; + struct ap_tapq_gr2 tapq_info; tapq_info.value = 0; @@ -364,7 +353,7 @@ static bool ap_queue_info(ap_qid_t qid, int *q_type, unsigned int *q_fac, return false; /* call TAPQ on this APQN */ - status = ap_test_queue(qid, ap_apft_available(), &tapq_info.value); + status = ap_test_queue(qid, ap_apft_available(), &tapq_info); switch (status.response_code) { case AP_RESPONSE_NORMAL: case AP_RESPONSE_RESET_IN_PROGRESS: @@ -378,10 +367,10 @@ static bool ap_queue_info(ap_qid_t qid, int *q_type, unsigned int *q_fac, */ if (WARN_ON_ONCE(!tapq_info.value)) return false; - *q_type = tapq_info.tapq_gr2.at; - *q_fac = tapq_info.tapq_gr2.fac; - *q_depth = tapq_info.tapq_gr2.qd; - *q_ml = tapq_info.tapq_gr2.ml; + *q_type = tapq_info.at; + *q_fac = tapq_info.fac; + *q_depth = tapq_info.qd; + *q_ml = tapq_info.ml; *q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED; *q_cstop = status.response_code == AP_RESPONSE_CHECKSTOPPED; switch (*q_type) { diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index bfe995116a6a..31de464e4bb2 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -2115,8 +2115,8 @@ static void vfio_ap_filter_apid_by_qtype(unsigned long *apm, unsigned long *aqm) { bool apid_cleared; struct ap_queue_status status; - unsigned long apid, apqi, info; - int qtype, qtype_mask = 0xff000000; + unsigned long apid, apqi; + struct ap_tapq_gr2 info; for_each_set_bit_inv(apid, apm, AP_DEVICES) { apid_cleared = false; @@ -2133,15 +2133,13 @@ static void vfio_ap_filter_apid_by_qtype(unsigned long *apm, unsigned long *aqm) case AP_RESPONSE_DECONFIGURED: case AP_RESPONSE_CHECKSTOPPED: case AP_RESPONSE_BUSY: - qtype = info & qtype_mask; - /* * The vfio_ap device driver only * supports CEX4 and newer adapters, so * remove the APID if the adapter is * older than a CEX4. */ - if (qtype < AP_DEVICE_TYPE_CEX4) { + if (info.at < AP_DEVICE_TYPE_CEX4) { clear_bit_inv(apid, apm); apid_cleared = true; } -- cgit v1.2.3 From 4bdf3c3956d863b6823daa185ee3ce3e3a432cf3 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 7 Sep 2022 18:04:03 +0200 Subject: s390/ap: provide F bit parameter for ap_rapq() and ap_zapq() Extent the ap inline functions ap_rapq() (calls PQAP(RAPQ)) and ap_zapq() (calls PQAP(ZAPQ)) with a new parameter to enable the new architectured F bit which forces an unassociate and/or unbind on a secure execution associated and/or bound queue. Signed-off-by: Harald Freudenberger Reviewed-by: Tony Krowiak Reviewed-by: Holger Dengler Signed-off-by: Heiko Carstens --- arch/s390/include/asm/ap.h | 12 ++++++++++-- drivers/s390/crypto/ap_queue.c | 4 ++-- drivers/s390/crypto/vfio_ap_ops.c | 2 +- drivers/s390/crypto/zcrypt_cex2c.c | 2 +- 4 files changed, 14 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index ba8da3f02f35..3442e76d5e3d 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -159,14 +159,18 @@ static inline struct ap_queue_status ap_test_queue(ap_qid_t qid, int tbit, /** * ap_pqap_rapq(): Reset adjunct processor queue. * @qid: The AP queue number + * @fbit: if != 0 set F bit * * Returns AP queue status structure. */ -static inline struct ap_queue_status ap_rapq(ap_qid_t qid) +static inline struct ap_queue_status ap_rapq(ap_qid_t qid, int fbit) { unsigned long reg0 = qid | (1UL << 24); /* fc 1UL is RAPQ */ union ap_queue_status_reg reg1; + if (fbit) + reg0 |= 1UL << 22; + asm volatile( " lgr 0,%[reg0]\n" /* qid arg into gr0 */ " .insn rre,0xb2af0000,0,0\n" /* PQAP(RAPQ) */ @@ -180,14 +184,18 @@ static inline struct ap_queue_status ap_rapq(ap_qid_t qid) /** * ap_pqap_zapq(): Reset and zeroize adjunct processor queue. * @qid: The AP queue number + * @fbit: if != 0 set F bit * * Returns AP queue status structure. */ -static inline struct ap_queue_status ap_zapq(ap_qid_t qid) +static inline struct ap_queue_status ap_zapq(ap_qid_t qid, int fbit) { unsigned long reg0 = qid | (2UL << 24); /* fc 2UL is ZAPQ */ union ap_queue_status_reg reg1; + if (fbit) + reg0 |= 1UL << 22; + asm volatile( " lgr 0,%[reg0]\n" /* qid arg into gr0 */ " .insn rre,0xb2af0000,0,0\n" /* PQAP(ZAPQ) */ diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 1c08b282987c..8517e1c54149 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -322,7 +322,7 @@ static enum ap_sm_wait ap_sm_reset(struct ap_queue *aq) { struct ap_queue_status status; - status = ap_rapq(aq->qid); + status = ap_rapq(aq->qid, 0); switch (status.response_code) { case AP_RESPONSE_NORMAL: case AP_RESPONSE_RESET_IN_PROGRESS: @@ -936,7 +936,7 @@ void ap_queue_remove(struct ap_queue *aq) * to the initial value AP_DEV_STATE_UNINITIATED. */ spin_lock_bh(&aq->lock); - ap_zapq(aq->qid); + ap_zapq(aq->qid, 0); aq->dev_state = AP_DEV_STATE_UNINITIATED; spin_unlock_bh(&aq->lock); } diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 31de464e4bb2..cfbcb864ab63 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1657,7 +1657,7 @@ static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q) if (!q) return 0; retry_zapq: - status = ap_zapq(q->apqn); + status = ap_zapq(q->apqn, 0); q->reset_rc = status.response_code; switch (status.response_code) { case AP_RESPONSE_NORMAL: diff --git a/drivers/s390/crypto/zcrypt_cex2c.c b/drivers/s390/crypto/zcrypt_cex2c.c index 78f8d68ffcb2..251b5bd3d19c 100644 --- a/drivers/s390/crypto/zcrypt_cex2c.c +++ b/drivers/s390/crypto/zcrypt_cex2c.c @@ -338,7 +338,7 @@ static int zcrypt_cex2c_queue_probe(struct ap_device *ap_dev) zq->queue = aq; zq->online = 1; atomic_set(&zq->load, 0); - ap_rapq(aq->qid); + ap_rapq(aq->qid, 0); rc = zcrypt_cex2c_rng_supported(aq); if (rc < 0) { zcrypt_queue_free(zq); -- cgit v1.2.3 From c81cf436e47c34d2ff29e1b3db296f00d55b1c6f Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 7 Sep 2022 19:05:18 +0200 Subject: s390/ap: new low level inline functions ap_bapq() and ap_aapq() Introduce two new low level functions ap_bapq() (calls PQAP(BAPQ)) and ap_aapq (calls PQAP(AAPQ)). Both functions are only meant to be used in SE environment with the SE AP binding facility available. Signed-off-by: Harald Freudenberger Reviewed-by: Holger Dengler Signed-off-by: Heiko Carstens --- arch/s390/include/asm/ap.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'arch') diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index 3442e76d5e3d..92b04d66d69a 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -354,6 +354,59 @@ static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit, return reg1.status; } +/* + * ap_bapq(): SE bind AP queue. + * @qid: The AP queue number + * + * Returns AP queue status structure. + * + * Invoking this function in a non-SE environment + * may case a specification exception. + */ +static inline struct ap_queue_status ap_bapq(ap_qid_t qid) +{ + unsigned long reg0 = qid | (7UL << 24); /* fc 7 is BAPQ */ + union ap_queue_status_reg reg1; + + asm volatile( + " lgr 0,%[reg0]\n" /* qid arg into gr0 */ + " .insn rre,0xb2af0000,0,0\n" /* PQAP(BAPQ) */ + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + : [reg1] "=&d" (reg1.value) + : [reg0] "d" (reg0) + : "cc", "0", "1"); + + return reg1.status; +} + +/* + * ap_aapq(): SE associate AP queue. + * @qid: The AP queue number + * @sec_idx: The secret index + * + * Returns AP queue status structure. + * + * Invoking this function in a non-SE environment + * may case a specification exception. + */ +static inline struct ap_queue_status ap_aapq(ap_qid_t qid, unsigned int sec_idx) +{ + unsigned long reg0 = qid | (8UL << 24); /* fc 8 is AAPQ */ + unsigned long reg2 = sec_idx; + union ap_queue_status_reg reg1; + + asm volatile( + " lgr 0,%[reg0]\n" /* qid arg into gr0 */ + " lgr 2,%[reg2]\n" /* secret index into gr2 */ + " .insn rre,0xb2af0000,0,0\n" /* PQAP(AAPQ) */ + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + : [reg1] "=&d" (reg1.value) + : [reg0] "d" (reg0), [reg2] "d" (reg2) + : "cc", "0", "1", "2"); + + return reg1.status; +} + /** * ap_nqap(): Send message to adjunct processor queue. * @qid: The AP queue number -- cgit v1.2.3 From 2d72eaf036d2f2b7ec16cda2d0e7ce292537dad9 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Fri, 10 Mar 2023 17:46:49 +0100 Subject: s390/ap: implement SE AP bind, unbind and associate Implementation of the new functions for SE AP support: bind, unbind and associate. There are two new sysfs attributes for this: /sys/devices/ap/cardxx/xx.yyyy/se_bind /sys/devices/ap/cardxx/xx.yyyy/se_associate Writing a 1 into the se_bind attribute triggers the SE AP bind for this AP queue, writing a 0 into does an unbind - that's a reset (RAPQ) with the F bit enabled. The se_associate attribute needs an integer value in range 0...2^16-1 written in. This is the index into a secrets table feed into the ultravisor. For more details please see the Architecture documents. These both new ap queue attributes are only visible inside a SE guest with SB (Secure Binding) available. Signed-off-by: Harald Freudenberger Reviewed-by: Holger Dengler Signed-off-by: Heiko Carstens --- arch/s390/include/asm/ap.h | 13 +- drivers/s390/crypto/ap_bus.h | 46 +++++--- drivers/s390/crypto/ap_queue.c | 262 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 302 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index 92b04d66d69a..d5d967166bac 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -43,10 +43,11 @@ struct ap_queue_status { unsigned int queue_empty : 1; unsigned int replies_waiting : 1; unsigned int queue_full : 1; - unsigned int _pad1 : 4; + unsigned int : 3; + unsigned int async : 1; unsigned int irq_enabled : 1; unsigned int response_code : 8; - unsigned int _pad2 : 16; + unsigned int : 16; }; /* @@ -114,6 +115,14 @@ struct ap_tapq_gr2 { }; }; +/* + * Convenience defines to be used with the bs field from struct ap_tapq_gr2 + */ +#define AP_BS_Q_USABLE 0 +#define AP_BS_Q_USABLE_NO_SECURE_KEY 1 +#define AP_BS_Q_AVAIL_FOR_BINDING 2 +#define AP_BS_Q_UNUSABLE 3 + /** * ap_tapq(): Test adjunct processor queue. * @qid: The AP queue number diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index f14323c278a3..101fb324476f 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -39,22 +39,32 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr) return (*ptr & (0x80000000u >> nr)) != 0; } -#define AP_RESPONSE_NORMAL 0x00 -#define AP_RESPONSE_Q_NOT_AVAIL 0x01 -#define AP_RESPONSE_RESET_IN_PROGRESS 0x02 -#define AP_RESPONSE_DECONFIGURED 0x03 -#define AP_RESPONSE_CHECKSTOPPED 0x04 -#define AP_RESPONSE_BUSY 0x05 -#define AP_RESPONSE_INVALID_ADDRESS 0x06 -#define AP_RESPONSE_OTHERWISE_CHANGED 0x07 -#define AP_RESPONSE_INVALID_GISA 0x08 -#define AP_RESPONSE_Q_FULL 0x10 -#define AP_RESPONSE_NO_PENDING_REPLY 0x10 -#define AP_RESPONSE_INDEX_TOO_BIG 0x11 -#define AP_RESPONSE_NO_FIRST_PART 0x13 -#define AP_RESPONSE_MESSAGE_TOO_BIG 0x15 -#define AP_RESPONSE_REQ_FAC_NOT_INST 0x16 -#define AP_RESPONSE_INVALID_DOMAIN 0x42 +#define AP_RESPONSE_NORMAL 0x00 +#define AP_RESPONSE_Q_NOT_AVAIL 0x01 +#define AP_RESPONSE_RESET_IN_PROGRESS 0x02 +#define AP_RESPONSE_DECONFIGURED 0x03 +#define AP_RESPONSE_CHECKSTOPPED 0x04 +#define AP_RESPONSE_BUSY 0x05 +#define AP_RESPONSE_INVALID_ADDRESS 0x06 +#define AP_RESPONSE_OTHERWISE_CHANGED 0x07 +#define AP_RESPONSE_INVALID_GISA 0x08 +#define AP_RESPONSE_Q_BOUND_TO_ANOTHER 0x09 +#define AP_RESPONSE_STATE_CHANGE_IN_PROGRESS 0x0A +#define AP_RESPONSE_Q_NOT_BOUND 0x0B +#define AP_RESPONSE_Q_FULL 0x10 +#define AP_RESPONSE_NO_PENDING_REPLY 0x10 +#define AP_RESPONSE_INDEX_TOO_BIG 0x11 +#define AP_RESPONSE_NO_FIRST_PART 0x13 +#define AP_RESPONSE_MESSAGE_TOO_BIG 0x15 +#define AP_RESPONSE_REQ_FAC_NOT_INST 0x16 +#define AP_RESPONSE_Q_BIND_ERROR 0x30 +#define AP_RESPONSE_Q_NOT_AVAIL_FOR_ASSOC 0x31 +#define AP_RESPONSE_Q_NOT_EMPTY 0x32 +#define AP_RESPONSE_BIND_LIMIT_EXCEEDED 0x33 +#define AP_RESPONSE_INVALID_ASSOC_SECRET 0x34 +#define AP_RESPONSE_ASSOC_SECRET_NOT_UNIQUE 0x35 +#define AP_RESPONSE_ASSOC_FAILED 0x36 +#define AP_RESPONSE_INVALID_DOMAIN 0x42 /* * Known device types @@ -92,6 +102,7 @@ enum ap_sm_state { AP_SM_STATE_IDLE, AP_SM_STATE_WORKING, AP_SM_STATE_QUEUE_FULL, + AP_SM_STATE_ASSOC_WAIT, NR_AP_SM_STATES }; @@ -189,6 +200,7 @@ struct ap_card { }; #define TAPQ_CARD_FUNC_CMP_MASK 0xFFFF0000 +#define ASSOC_IDX_INVALID 0x10000 #define to_ap_card(x) container_of((x), struct ap_card, ap_dev.device) @@ -202,6 +214,7 @@ struct ap_queue { bool chkstop; /* checkstop state */ ap_qid_t qid; /* AP queue id. */ bool interrupt; /* indicate if interrupts are enabled */ + unsigned int assoc_idx; /* SE association index */ int queue_count; /* # messages currently on AP queue. */ int pendingq_count; /* # requests on pendingq list. */ int requestq_count; /* # requests on requestq list. */ @@ -212,6 +225,7 @@ struct ap_queue { struct list_head requestq; /* List of message yet to be sent. */ struct ap_message *reply; /* Per device reply message. */ enum ap_sm_state sm_state; /* ap queue state machine state */ + int rapq_fbit; /* fbit arg for next rapq invocation */ int last_err_rc; /* last error state response code */ }; diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 60dbabec25cf..2be63f2554bd 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -18,6 +18,21 @@ static void __ap_flush_queue(struct ap_queue *aq); +/* + * some AP queue helper functions + */ + +static inline bool ap_q_supports_bind(struct ap_queue *aq) +{ + return ap_test_bit(&aq->card->functions, AP_FUNC_EP11) || + ap_test_bit(&aq->card->functions, AP_FUNC_ACCEL); +} + +static inline bool ap_q_supports_assoc(struct ap_queue *aq) +{ + return ap_test_bit(&aq->card->functions, AP_FUNC_EP11); +} + /** * ap_queue_enable_irq(): Enable interrupt support on this AP queue. * @aq: The AP queue @@ -322,12 +337,13 @@ static enum ap_sm_wait ap_sm_reset(struct ap_queue *aq) { struct ap_queue_status status; - status = ap_rapq(aq->qid, 0); + status = ap_rapq(aq->qid, aq->rapq_fbit); switch (status.response_code) { case AP_RESPONSE_NORMAL: case AP_RESPONSE_RESET_IN_PROGRESS: aq->sm_state = AP_SM_STATE_RESET_WAIT; aq->interrupt = false; + aq->rapq_fbit = 0; return AP_SM_WAIT_LOW_TIMEOUT; default: aq->dev_state = AP_DEV_STATE_ERROR; @@ -423,6 +439,59 @@ static enum ap_sm_wait ap_sm_setirq_wait(struct ap_queue *aq) } } +/** + * ap_sm_assoc_wait(): Test queue for completion of a pending + * association request. + * @aq: pointer to the AP queue + */ +static enum ap_sm_wait ap_sm_assoc_wait(struct ap_queue *aq) +{ + struct ap_queue_status status; + struct ap_tapq_gr2 info; + + status = ap_test_queue(aq->qid, 1, &info); + /* handle asynchronous error on this queue */ + if (status.async && status.response_code) { + aq->dev_state = AP_DEV_STATE_ERROR; + aq->last_err_rc = status.response_code; + AP_DBF_WARN("%s asynch RC 0x%02x on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n", + __func__, status.response_code, + AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); + return AP_SM_WAIT_NONE; + } + if (status.response_code > AP_RESPONSE_BUSY) { + aq->dev_state = AP_DEV_STATE_ERROR; + aq->last_err_rc = status.response_code; + AP_DBF_WARN("%s RC 0x%02x on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n", + __func__, status.response_code, + AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); + return AP_SM_WAIT_NONE; + } + + /* check bs bits */ + switch (info.bs) { + case AP_BS_Q_USABLE: + /* association is through */ + aq->sm_state = AP_SM_STATE_IDLE; + AP_DBF_DBG("%s queue 0x%02x.%04x associated with %u\n", + __func__, AP_QID_CARD(aq->qid), + AP_QID_QUEUE(aq->qid), aq->assoc_idx); + return AP_SM_WAIT_NONE; + case AP_BS_Q_USABLE_NO_SECURE_KEY: + /* association still pending */ + return AP_SM_WAIT_LOW_TIMEOUT; + default: + /* reset from 'outside' happened or no idea at all */ + aq->assoc_idx = ASSOC_IDX_INVALID; + aq->dev_state = AP_DEV_STATE_ERROR; + aq->last_err_rc = status.response_code; + AP_DBF_WARN("%s bs 0x%02x on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n", + __func__, info.bs, + AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); + return AP_SM_WAIT_NONE; + } +} + /* * AP state machine jump table */ @@ -451,6 +520,10 @@ static ap_func_t *ap_jumptable[NR_AP_SM_STATES][NR_AP_SM_EVENTS] = { [AP_SM_EVENT_POLL] = ap_sm_read, [AP_SM_EVENT_TIMEOUT] = ap_sm_reset, }, + [AP_SM_STATE_ASSOC_WAIT] = { + [AP_SM_EVENT_POLL] = ap_sm_assoc_wait, + [AP_SM_EVENT_TIMEOUT] = ap_sm_reset, + }, }; enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event) @@ -696,6 +769,9 @@ static ssize_t states_show(struct device *dev, case AP_SM_STATE_QUEUE_FULL: rc += sysfs_emit_at(buf, rc, " [FULL]\n"); break; + case AP_SM_STATE_ASSOC_WAIT: + rc += sysfs_emit_at(buf, rc, " [ASSOC_WAIT]\n"); + break; default: rc += sysfs_emit_at(buf, rc, " [UNKNOWN]\n"); } @@ -780,6 +856,186 @@ static struct device_type ap_queue_type = { .groups = ap_queue_dev_attr_groups, }; +static ssize_t se_bind_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ap_queue *aq = to_ap_queue(dev); + struct ap_queue_status status; + struct ap_tapq_gr2 info; + + if (!ap_q_supports_bind(aq)) + return sysfs_emit(buf, "-\n"); + + status = ap_test_queue(aq->qid, 1, &info); + if (status.response_code > AP_RESPONSE_BUSY) { + AP_DBF_DBG("%s RC 0x%02x on tapq(0x%02x.%04x)\n", + __func__, status.response_code, + AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); + return -EIO; + } + switch (info.bs) { + case AP_BS_Q_USABLE: + case AP_BS_Q_USABLE_NO_SECURE_KEY: + return sysfs_emit(buf, "bound\n"); + default: + return sysfs_emit(buf, "unbound\n"); + } +} + +static ssize_t se_bind_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ap_queue *aq = to_ap_queue(dev); + struct ap_queue_status status; + bool value; + int rc; + + if (!ap_q_supports_bind(aq)) + return -EINVAL; + + /* only 0 (unbind) and 1 (bind) allowed */ + rc = kstrtobool(buf, &value); + if (rc) + return rc; + + if (value) { + /* bind, do BAPQ */ + spin_lock_bh(&aq->lock); + if (aq->sm_state < AP_SM_STATE_IDLE) { + spin_unlock_bh(&aq->lock); + return -EBUSY; + } + status = ap_bapq(aq->qid); + spin_unlock_bh(&aq->lock); + if (status.response_code) { + AP_DBF_WARN("%s RC 0x%02x on bapq(0x%02x.%04x)\n", + __func__, status.response_code, + AP_QID_CARD(aq->qid), + AP_QID_QUEUE(aq->qid)); + return -EIO; + } + } else { + /* unbind, set F bit arg and trigger RAPQ */ + spin_lock_bh(&aq->lock); + __ap_flush_queue(aq); + aq->rapq_fbit = 1; + aq->assoc_idx = ASSOC_IDX_INVALID; + aq->sm_state = AP_SM_STATE_RESET_START; + ap_wait(ap_sm_event(aq, AP_SM_EVENT_POLL)); + spin_unlock_bh(&aq->lock); + } + + return count; +} + +static DEVICE_ATTR_RW(se_bind); + +static ssize_t se_associate_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ap_queue *aq = to_ap_queue(dev); + struct ap_queue_status status; + struct ap_tapq_gr2 info; + + if (!ap_q_supports_assoc(aq)) + return sysfs_emit(buf, "-\n"); + + status = ap_test_queue(aq->qid, 1, &info); + if (status.response_code > AP_RESPONSE_BUSY) { + AP_DBF_DBG("%s RC 0x%02x on tapq(0x%02x.%04x)\n", + __func__, status.response_code, + AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); + return -EIO; + } + + switch (info.bs) { + case AP_BS_Q_USABLE: + if (aq->assoc_idx == ASSOC_IDX_INVALID) { + AP_DBF_WARN("%s AP_BS_Q_USABLE but invalid assoc_idx\n", __func__); + return -EIO; + } + return sysfs_emit(buf, "associated %u\n", aq->assoc_idx); + case AP_BS_Q_USABLE_NO_SECURE_KEY: + if (aq->assoc_idx != ASSOC_IDX_INVALID) + return sysfs_emit(buf, "association pending\n"); + fallthrough; + default: + return sysfs_emit(buf, "unassociated\n"); + } +} + +static ssize_t se_associate_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ap_queue *aq = to_ap_queue(dev); + struct ap_queue_status status; + unsigned int value; + int rc; + + if (!ap_q_supports_assoc(aq)) + return -EINVAL; + + /* association index needs to be >= 0 */ + rc = kstrtouint(buf, 0, &value); + if (rc) + return rc; + if (value >= ASSOC_IDX_INVALID) + return -EINVAL; + + spin_lock_bh(&aq->lock); + + /* sm should be in idle state */ + if (aq->sm_state != AP_SM_STATE_IDLE) { + spin_unlock_bh(&aq->lock); + return -EBUSY; + } + + /* already associated or association pending ? */ + if (aq->assoc_idx != ASSOC_IDX_INVALID) { + spin_unlock_bh(&aq->lock); + return -EINVAL; + } + + /* trigger the asynchronous association request */ + status = ap_aapq(aq->qid, value); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + case AP_RESPONSE_STATE_CHANGE_IN_PROGRESS: + aq->sm_state = AP_SM_STATE_ASSOC_WAIT; + aq->assoc_idx = value; + ap_wait(ap_sm_event(aq, AP_SM_EVENT_POLL)); + spin_unlock_bh(&aq->lock); + break; + default: + spin_unlock_bh(&aq->lock); + AP_DBF_WARN("%s RC 0x%02x on aapq(0x%02x.%04x)\n", + __func__, status.response_code, + AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); + return -EIO; + } + + return count; +} + +static DEVICE_ATTR_RW(se_associate); + +static struct attribute *ap_queue_dev_sb_attrs[] = { + &dev_attr_se_bind.attr, + &dev_attr_se_associate.attr, + NULL +}; + +static struct attribute_group ap_queue_dev_sb_attr_group = { + .attrs = ap_queue_dev_sb_attrs +}; + +static const struct attribute_group *ap_queue_dev_sb_attr_groups[] = { + &ap_queue_dev_sb_attr_group, + NULL +}; + static void ap_queue_device_release(struct device *dev) { struct ap_queue *aq = to_ap_queue(dev); @@ -801,6 +1057,9 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type) aq->ap_dev.device.release = ap_queue_device_release; aq->ap_dev.device.type = &ap_queue_type; aq->ap_dev.device_type = device_type; + // add optional SE secure binding attributes group + if (ap_sb_available() && is_prot_virt_guest()) + aq->ap_dev.device.groups = ap_queue_dev_sb_attr_groups; aq->qid = qid; aq->interrupt = false; spin_lock_init(&aq->lock); @@ -947,6 +1206,7 @@ void ap_queue_init_state(struct ap_queue *aq) aq->dev_state = AP_DEV_STATE_OPERATING; aq->sm_state = AP_SM_STATE_RESET_START; aq->last_err_rc = 0; + aq->assoc_idx = ASSOC_IDX_INVALID; ap_wait(ap_sm_event(aq, AP_SM_EVENT_POLL)); spin_unlock_bh(&aq->lock); } -- cgit v1.2.3 From d28d86a07dbca4b6e33032196ef4a9a7121181b4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 17 Mar 2023 14:51:41 +0100 Subject: s390/mm: make use of atomic_fetch_xor() Make use of atomic_fetch_xor() instead of an atomic_cmpxchg() loop to implement atomic_xor_bits() (aka atomic_xor_return()). This makes the C code more readable and in addition generates better code, since for z196 and newer a single lax instruction is generated instead of a cmpxchg() loop. Signed-off-by: Heiko Carstens --- arch/s390/mm/pgalloc.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 0f68b7257e08..66ab68db9842 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -133,13 +133,7 @@ err_p4d: static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) { - unsigned int old, new; - - do { - old = atomic_read(v); - new = old ^ bits; - } while (atomic_cmpxchg(v, old, new) != old); - return new; + return atomic_fetch_xor(bits, v) ^ bits; } #ifdef CONFIG_PGSTE -- cgit v1.2.3 From af90d7b69c69b9b8ab91acc2ceab3706a2798373 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 16 Mar 2023 16:34:46 +0100 Subject: s390/cpum_sf: remove flag PERF_CPUM_SF_FULL_BLOCKS This flag is used to process only fully populated sampling buffers when an sampling event is stopped on a CPU. By default the last sampling buffer is also scanned for samples even if the sampling block full indicator is not set in the trailer entry of a sampling buffer page. This flag can be set via perf_event_attr::config1 field. It was never used and never documented. It is useless now. With PERF_CPUM_SF_FULL_BLOCKS: When a process is scheduled off the CPU, the sampling is stopped and the samples are copied to the perf ring buffer and marked invalid. When stopped at the last full sample buffer page (which is achieved with the PERF_CPUM_SF_FULL_BLOCKS options), the hardware sampling will resume at the first free sample entry in the current, partially filled sample buffer. Without PERF_CPUM_SF_FULL_BLOCKS (default behavior): The partially filled last sample buffer is scanned and valid samples are saved to the perf ring buffer. The valid samples are marked invalid. The sampling is resumed when the process is scheduled on this CPU. Again the hardware sampling will resume at the first free sample entry in the current, partially filled sample buffer. Now the next interrupt handler invocation scans the full sample block and saves the valid samples to the ring buffer. It omits the invalid samples at the top of the buffer. The default behavior is fully sufficient, therefore remove this feature. Signed-off-by: Thomas Richter Acked-by: Hendrik Brueckner Acked-by: Sumanth Korikkar Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/perf_event.h | 2 -- arch/s390/kernel/perf_cpum_sf.c | 14 ++------------ 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index b9da71632827..9917e2717b2b 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -60,7 +60,6 @@ struct perf_sf_sde_regs { #define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */ #define PERF_CPUM_SF_MODE_MASK (PERF_CPUM_SF_BASIC_MODE| \ PERF_CPUM_SF_DIAG_MODE) -#define PERF_CPUM_SF_FULL_BLOCKS 0x0004 /* Process full SDBs only */ #define PERF_CPUM_SF_FREQ_MODE 0x0008 /* Sampling with frequency */ #define REG_NONE 0 @@ -71,7 +70,6 @@ struct perf_sf_sde_regs { #define SAMPL_RATE(hwc) ((hwc)->event_base) #define SAMPL_FLAGS(hwc) ((hwc)->config_base) #define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE) -#define SDB_FULL_BLOCKS(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS) #define SAMPLE_FREQ_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE) #define perf_arch_fetch_caller_regs(regs, __ip) do { \ diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index e7b867e2f73f..7ef72f5ff52e 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -882,10 +882,6 @@ static int __hw_perf_event_init(struct perf_event *event) SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE; } - /* Check and set other sampling flags */ - if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS) - SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS; - err = __hw_perf_event_init_rate(event, &si); if (err) goto out; @@ -1293,11 +1289,8 @@ static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t * The sampling buffer position are retrieved and saved in the TEAR_REG * register of the specified perf event. * - * Only full sample-data-blocks are processed. Specify the flash_all flag - * to also walk through partially filled sample-data-blocks. It is ignored - * if PERF_CPUM_SF_FULL_BLOCKS is set. The PERF_CPUM_SF_FULL_BLOCKS flag - * enforces the processing of full sample-data-blocks only (trailer entries - * with the block-full-indicator bit set). + * Only full sample-data-blocks are processed. Specify the flush_all flag + * to also walk through partially filled sample-data-blocks. */ static void hw_perf_event_update(struct perf_event *event, int flush_all) { @@ -1315,9 +1308,6 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) if (SAMPL_DIAG_MODE(&event->hw)) return; - if (flush_all && SDB_FULL_BLOCKS(hwc)) - flush_all = 0; - sdbt = (unsigned long *) TEAR_REG(hwc); done = event_overflow = sampl_overflow = num_sdb = 0; while (!done) { -- cgit v1.2.3 From e7615c9225d7c7562d1cd95ba8cc2cbf38c3d29e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 17 Mar 2023 10:31:40 +0100 Subject: s390: enable ARCH_HAS_MEMBARRIER_SYNC_CORE s390 trivially supports the ARCH_HAS_MEMBARRIER_SYNC_CORE requirements since the used lpswe(y) instruction to return from any kernel context to user space performs CPU serialization. This is very similar to arm, arm64 and powerpc. See commit 70216e18e519 ("membarrier: Provide core serializing command, *_SYNC_CORE") for further details. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- Documentation/features/sched/membarrier-sync-core/arch-support.txt | 4 ++-- arch/s390/Kconfig | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/Documentation/features/sched/membarrier-sync-core/arch-support.txt b/Documentation/features/sched/membarrier-sync-core/arch-support.txt index 1e51614c136e..23260ca44946 100644 --- a/Documentation/features/sched/membarrier-sync-core/arch-support.txt +++ b/Documentation/features/sched/membarrier-sync-core/arch-support.txt @@ -5,7 +5,7 @@ # # Architecture requirements # -# * arm/arm64/powerpc +# * arm/arm64/powerpc/s390 # # Rely on implicit context synchronization as a result of exception return # when returning from IPI handler, and when returning to user-space. @@ -45,7 +45,7 @@ | parisc: | TODO | | powerpc: | ok | | riscv: | TODO | - | s390: | TODO | + | s390: | ok | | sh: | TODO | | sparc: | TODO | | um: | TODO | diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8f1c6da2ae4d..d610f911677e 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -76,6 +76,7 @@ config S390 select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV + select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_PTE_SPECIAL -- cgit v1.2.3 From 10679e4d9848a04694eb99ec7390a6585d1b7ca9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Mar 2023 15:00:18 +0100 Subject: s390/uaccess: use symbolic names for inline assembly operands Improve readability of the uaccess inline assemblies by using symbolic names for all input and output operands. Except for debug sections the output of "objdump -Dr" of the uaccess object file is identical before/after this change. Reviewed-by: Gerald Schaefer Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/lib/uaccess.c | 105 +++++++++++++++++++++++++----------------------- 1 file changed, 54 insertions(+), 51 deletions(-) (limited to 'arch') diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index d44214072779..0ebd522ccd6f 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -44,25 +44,26 @@ static unsigned long raw_copy_from_user_key(void *to, const void __user *from, tmp1 = -4096UL; asm volatile( - " lr 0,%[spec]\n" - "0: mvcos 0(%2),0(%1),%0\n" - "6: jz 4f\n" - "1: algr %0,%3\n" - " slgr %1,%3\n" - " slgr %2,%3\n" - " j 0b\n" - "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */ - " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */ - " slgr %4,%1\n" - " clgr %0,%4\n" /* copy crosses next page boundary? */ - " jnh 5f\n" - "3: mvcos 0(%2),0(%1),%4\n" - "7: slgr %0,%4\n" - " j 5f\n" - "4: slgr %0,%0\n" + " lr 0,%[spec]\n" + "0: mvcos 0(%[to]),0(%[from]),%[size]\n" + "6: jz 4f\n" + "1: algr %[size],%[tmp1]\n" + " slgr %[from],%[tmp1]\n" + " slgr %[to],%[tmp1]\n" + " j 0b\n" + "2: la %[tmp2],4095(%[from])\n"/* tmp2 = from + 4095 */ + " nr %[tmp2],%[tmp1]\n" /* tmp2 = (from + 4095) & -4096 */ + " slgr %[tmp2],%[from]\n" + " clgr %[size],%[tmp2]\n" /* copy crosses next page boundary? */ + " jnh 5f\n" + "3: mvcos 0(%[to]),0(%[from]),%[tmp2]\n" + "7: slgr %[size],%[tmp2]\n" + " j 5f\n" + "4: slgr %[size],%[size]\n" "5:\n" EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) - : "+a" (size), "+a" (from), "+a" (to), "+a" (tmp1), "=a" (tmp2) + : [size] "+a" (size), [from] "+a" (from), [to] "+a" (to), + [tmp1] "+a" (tmp1), [tmp2] "=a" (tmp2) : [spec] "d" (spec.val) : "cc", "memory", "0"); return size; @@ -104,25 +105,26 @@ static unsigned long raw_copy_to_user_key(void __user *to, const void *from, tmp1 = -4096UL; asm volatile( - " lr 0,%[spec]\n" - "0: mvcos 0(%1),0(%2),%0\n" - "6: jz 4f\n" - "1: algr %0,%3\n" - " slgr %1,%3\n" - " slgr %2,%3\n" - " j 0b\n" - "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */ - " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */ - " slgr %4,%1\n" - " clgr %0,%4\n" /* copy crosses next page boundary? */ - " jnh 5f\n" - "3: mvcos 0(%1),0(%2),%4\n" - "7: slgr %0,%4\n" - " j 5f\n" - "4: slgr %0,%0\n" + " lr 0,%[spec]\n" + "0: mvcos 0(%[to]),0(%[from]),%[size]\n" + "6: jz 4f\n" + "1: algr %[size],%[tmp1]\n" + " slgr %[to],%[tmp1]\n" + " slgr %[from],%[tmp1]\n" + " j 0b\n" + "2: la %[tmp2],4095(%[to])\n" /* tmp2 = to + 4095 */ + " nr %[tmp2],%[tmp1]\n" /* tmp2 = (to + 4095) & -4096 */ + " slgr %[tmp2],%[to]\n" + " clgr %[size],%[tmp2]\n" /* copy crosses next page boundary? */ + " jnh 5f\n" + "3: mvcos 0(%[to]),0(%[from]),%[tmp2]\n" + "7: slgr %[size],%[tmp2]\n" + " j 5f\n" + "4: slgr %[size],%[size]\n" "5:\n" EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) - : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2) + : [size] "+a" (size), [to] "+a" (to), [from] "+a" (from), + [tmp1] "+a" (tmp1), [tmp2] "=a" (tmp2) : [spec] "d" (spec.val) : "cc", "memory", "0"); return size; @@ -155,25 +157,26 @@ unsigned long __clear_user(void __user *to, unsigned long size) tmp1 = -4096UL; asm volatile( - " lr 0,%[spec]\n" - "0: mvcos 0(%1),0(%4),%0\n" - "6: jz 4f\n" - "1: algr %0,%2\n" - " slgr %1,%2\n" - " j 0b\n" - "2: la %3,4095(%1)\n"/* %4 = to + 4095 */ - " nr %3,%2\n" /* %4 = (to + 4095) & -4096 */ - " slgr %3,%1\n" - " clgr %0,%3\n" /* copy crosses next page boundary? */ - " jnh 5f\n" - "3: mvcos 0(%1),0(%4),%3\n" - "7: slgr %0,%3\n" - " j 5f\n" - "4: slgr %0,%0\n" + " lr 0,%[spec]\n" + "0: mvcos 0(%[to]),0(%[zeropg]),%[size]\n" + "6: jz 4f\n" + "1: algr %[size],%[tmp1]\n" + " slgr %[to],%[tmp1]\n" + " j 0b\n" + "2: la %[tmp2],4095(%[to])\n" /* tmp2 = to + 4095 */ + " nr %[tmp2],%[tmp1]\n" /* tmp2 = (to + 4095) & -4096 */ + " slgr %[tmp2],%[to]\n" + " clgr %[size],%[tmp2]\n" /* copy crosses next page boundary? */ + " jnh 5f\n" + "3: mvcos 0(%[to]),0(%[zeropg]),%[tmp2]\n" + "7: slgr %[size],%[tmp2]\n" + " j 5f\n" + "4: slgr %[size],%[size]\n" "5:\n" EX_TABLE(0b,2b) EX_TABLE(6b,2b) EX_TABLE(3b,5b) EX_TABLE(7b,5b) - : "+&a" (size), "+&a" (to), "+a" (tmp1), "=&a" (tmp2) - : "a" (empty_zero_page), [spec] "d" (spec.val) + : [size] "+&a" (size), [to] "+&a" (to), + [tmp1] "+a" (tmp1), [tmp2] "=&a" (tmp2) + : [zeropg] "a" (empty_zero_page), [spec] "d" (spec.val) : "cc", "memory", "0"); return size; } -- cgit v1.2.3 From b96adf0d03e01fa10483f38e302a5f93bb733ed0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Mar 2023 15:00:19 +0100 Subject: s390/uaccess: remove unused label in inline assemblies Remove an unused label in all three uaccess inline assemblies. Reviewed-by: Gerald Schaefer Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/lib/uaccess.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 0ebd522ccd6f..d0bbdcbd7baf 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -47,7 +47,7 @@ static unsigned long raw_copy_from_user_key(void *to, const void __user *from, " lr 0,%[spec]\n" "0: mvcos 0(%[to]),0(%[from]),%[size]\n" "6: jz 4f\n" - "1: algr %[size],%[tmp1]\n" + " algr %[size],%[tmp1]\n" " slgr %[from],%[tmp1]\n" " slgr %[to],%[tmp1]\n" " j 0b\n" @@ -108,7 +108,7 @@ static unsigned long raw_copy_to_user_key(void __user *to, const void *from, " lr 0,%[spec]\n" "0: mvcos 0(%[to]),0(%[from]),%[size]\n" "6: jz 4f\n" - "1: algr %[size],%[tmp1]\n" + " algr %[size],%[tmp1]\n" " slgr %[to],%[tmp1]\n" " slgr %[from],%[tmp1]\n" " j 0b\n" @@ -160,7 +160,7 @@ unsigned long __clear_user(void __user *to, unsigned long size) " lr 0,%[spec]\n" "0: mvcos 0(%[to]),0(%[zeropg]),%[size]\n" "6: jz 4f\n" - "1: algr %[size],%[tmp1]\n" + " algr %[size],%[tmp1]\n" " slgr %[to],%[tmp1]\n" " j 0b\n" "2: la %[tmp2],4095(%[to])\n" /* tmp2 = to + 4095 */ -- cgit v1.2.3 From 4e0b0ad45c6616d85564cb27dbae04a962d068bd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Mar 2023 15:00:20 +0100 Subject: s390/uaccess: rename/sort labels in inline assemblies Rename and sort labels in uaccess inline assemblies to increase readability. In addition have only one EX_TABLE entry per line - also to increase readability. Except for debug sections the output of "objdump -Dr" of the uaccess object file is identical before/after this change. Reviewed-by: Gerald Schaefer Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/lib/uaccess.c | 51 +++++++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index d0bbdcbd7baf..4dec9fc12f73 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -46,7 +46,7 @@ static unsigned long raw_copy_from_user_key(void *to, const void __user *from, asm volatile( " lr 0,%[spec]\n" "0: mvcos 0(%[to]),0(%[from]),%[size]\n" - "6: jz 4f\n" + "1: jz 5f\n" " algr %[size],%[tmp1]\n" " slgr %[from],%[tmp1]\n" " slgr %[to],%[tmp1]\n" @@ -55,13 +55,16 @@ static unsigned long raw_copy_from_user_key(void *to, const void __user *from, " nr %[tmp2],%[tmp1]\n" /* tmp2 = (from + 4095) & -4096 */ " slgr %[tmp2],%[from]\n" " clgr %[size],%[tmp2]\n" /* copy crosses next page boundary? */ - " jnh 5f\n" + " jnh 6f\n" "3: mvcos 0(%[to]),0(%[from]),%[tmp2]\n" - "7: slgr %[size],%[tmp2]\n" - " j 5f\n" - "4: slgr %[size],%[size]\n" - "5:\n" - EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) + "4: slgr %[size],%[tmp2]\n" + " j 6f\n" + "5: slgr %[size],%[size]\n" + "6:\n" + EX_TABLE(0b, 2b) + EX_TABLE(3b, 6b) + EX_TABLE(1b, 2b) + EX_TABLE(4b, 6b) : [size] "+a" (size), [from] "+a" (from), [to] "+a" (to), [tmp1] "+a" (tmp1), [tmp2] "=a" (tmp2) : [spec] "d" (spec.val) @@ -107,7 +110,7 @@ static unsigned long raw_copy_to_user_key(void __user *to, const void *from, asm volatile( " lr 0,%[spec]\n" "0: mvcos 0(%[to]),0(%[from]),%[size]\n" - "6: jz 4f\n" + "1: jz 5f\n" " algr %[size],%[tmp1]\n" " slgr %[to],%[tmp1]\n" " slgr %[from],%[tmp1]\n" @@ -116,13 +119,16 @@ static unsigned long raw_copy_to_user_key(void __user *to, const void *from, " nr %[tmp2],%[tmp1]\n" /* tmp2 = (to + 4095) & -4096 */ " slgr %[tmp2],%[to]\n" " clgr %[size],%[tmp2]\n" /* copy crosses next page boundary? */ - " jnh 5f\n" + " jnh 6f\n" "3: mvcos 0(%[to]),0(%[from]),%[tmp2]\n" - "7: slgr %[size],%[tmp2]\n" - " j 5f\n" - "4: slgr %[size],%[size]\n" - "5:\n" - EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) + "4: slgr %[size],%[tmp2]\n" + " j 6f\n" + "5: slgr %[size],%[size]\n" + "6:\n" + EX_TABLE(0b, 2b) + EX_TABLE(3b, 6b) + EX_TABLE(1b, 2b) + EX_TABLE(4b, 6b) : [size] "+a" (size), [to] "+a" (to), [from] "+a" (from), [tmp1] "+a" (tmp1), [tmp2] "=a" (tmp2) : [spec] "d" (spec.val) @@ -159,7 +165,7 @@ unsigned long __clear_user(void __user *to, unsigned long size) asm volatile( " lr 0,%[spec]\n" "0: mvcos 0(%[to]),0(%[zeropg]),%[size]\n" - "6: jz 4f\n" + "1: jz 5f\n" " algr %[size],%[tmp1]\n" " slgr %[to],%[tmp1]\n" " j 0b\n" @@ -167,13 +173,16 @@ unsigned long __clear_user(void __user *to, unsigned long size) " nr %[tmp2],%[tmp1]\n" /* tmp2 = (to + 4095) & -4096 */ " slgr %[tmp2],%[to]\n" " clgr %[size],%[tmp2]\n" /* copy crosses next page boundary? */ - " jnh 5f\n" + " jnh 6f\n" "3: mvcos 0(%[to]),0(%[zeropg]),%[tmp2]\n" - "7: slgr %[size],%[tmp2]\n" - " j 5f\n" - "4: slgr %[size],%[size]\n" - "5:\n" - EX_TABLE(0b,2b) EX_TABLE(6b,2b) EX_TABLE(3b,5b) EX_TABLE(7b,5b) + "4: slgr %[size],%[tmp2]\n" + " j 6f\n" + "5: slgr %[size],%[size]\n" + "6:\n" + EX_TABLE(0b, 2b) + EX_TABLE(1b, 2b) + EX_TABLE(3b, 6b) + EX_TABLE(4b, 6b) : [size] "+&a" (size), [to] "+&a" (to), [tmp1] "+a" (tmp1), [tmp2] "=&a" (tmp2) : [zeropg] "a" (empty_zero_page), [spec] "d" (spec.val) -- cgit v1.2.3 From afdcc2ce3970ccad0ec4fb2d2c614e444c59f770 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Mar 2023 15:00:21 +0100 Subject: s390/uaccess: sort EX_TABLE list for inline assemblies Reviewed-by: Gerald Schaefer Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/lib/uaccess.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 4dec9fc12f73..7f33eb4dffba 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -62,8 +62,8 @@ static unsigned long raw_copy_from_user_key(void *to, const void __user *from, "5: slgr %[size],%[size]\n" "6:\n" EX_TABLE(0b, 2b) - EX_TABLE(3b, 6b) EX_TABLE(1b, 2b) + EX_TABLE(3b, 6b) EX_TABLE(4b, 6b) : [size] "+a" (size), [from] "+a" (from), [to] "+a" (to), [tmp1] "+a" (tmp1), [tmp2] "=a" (tmp2) @@ -126,8 +126,8 @@ static unsigned long raw_copy_to_user_key(void __user *to, const void *from, "5: slgr %[size],%[size]\n" "6:\n" EX_TABLE(0b, 2b) - EX_TABLE(3b, 6b) EX_TABLE(1b, 2b) + EX_TABLE(3b, 6b) EX_TABLE(4b, 6b) : [size] "+a" (size), [to] "+a" (to), [from] "+a" (from), [tmp1] "+a" (tmp1), [tmp2] "=a" (tmp2) -- cgit v1.2.3 From 7f65d18329a2546ce6f64ac6bf141c38b15c2ab3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Mar 2023 15:00:22 +0100 Subject: s390/uaccess: rename tmp1 and tmp2 variables Rename tmp1 and tmp2 variables to more meaningful val (for value) and rem (for remainder). Except for debug sections the output of "objdump -Dr" of the uaccess object file is identical before/after this change. Reviewed-by: Gerald Schaefer Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/lib/uaccess.c | 70 ++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 7f33eb4dffba..a89f6639818a 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -34,7 +34,7 @@ void debug_user_asce(int exit) static unsigned long raw_copy_from_user_key(void *to, const void __user *from, unsigned long size, unsigned long key) { - unsigned long tmp1, tmp2; + unsigned long val, rem; union oac spec = { .oac2.key = key, .oac2.as = PSW_BITS_AS_SECONDARY, @@ -42,22 +42,22 @@ static unsigned long raw_copy_from_user_key(void *to, const void __user *from, .oac2.a = 1, }; - tmp1 = -4096UL; + val = -4096UL; asm volatile( " lr 0,%[spec]\n" "0: mvcos 0(%[to]),0(%[from]),%[size]\n" "1: jz 5f\n" - " algr %[size],%[tmp1]\n" - " slgr %[from],%[tmp1]\n" - " slgr %[to],%[tmp1]\n" + " algr %[size],%[val]\n" + " slgr %[from],%[val]\n" + " slgr %[to],%[val]\n" " j 0b\n" - "2: la %[tmp2],4095(%[from])\n"/* tmp2 = from + 4095 */ - " nr %[tmp2],%[tmp1]\n" /* tmp2 = (from + 4095) & -4096 */ - " slgr %[tmp2],%[from]\n" - " clgr %[size],%[tmp2]\n" /* copy crosses next page boundary? */ + "2: la %[rem],4095(%[from])\n" /* rem = from + 4095 */ + " nr %[rem],%[val]\n" /* rem = (from + 4095) & -4096 */ + " slgr %[rem],%[from]\n" + " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ " jnh 6f\n" - "3: mvcos 0(%[to]),0(%[from]),%[tmp2]\n" - "4: slgr %[size],%[tmp2]\n" + "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" + "4: slgr %[size],%[rem]\n" " j 6f\n" "5: slgr %[size],%[size]\n" "6:\n" @@ -66,7 +66,7 @@ static unsigned long raw_copy_from_user_key(void *to, const void __user *from, EX_TABLE(3b, 6b) EX_TABLE(4b, 6b) : [size] "+a" (size), [from] "+a" (from), [to] "+a" (to), - [tmp1] "+a" (tmp1), [tmp2] "=a" (tmp2) + [val] "+a" (val), [rem] "=a" (rem) : [spec] "d" (spec.val) : "cc", "memory", "0"); return size; @@ -98,7 +98,7 @@ EXPORT_SYMBOL(_copy_from_user_key); static unsigned long raw_copy_to_user_key(void __user *to, const void *from, unsigned long size, unsigned long key) { - unsigned long tmp1, tmp2; + unsigned long val, rem; union oac spec = { .oac1.key = key, .oac1.as = PSW_BITS_AS_SECONDARY, @@ -106,22 +106,22 @@ static unsigned long raw_copy_to_user_key(void __user *to, const void *from, .oac1.a = 1, }; - tmp1 = -4096UL; + val = -4096UL; asm volatile( " lr 0,%[spec]\n" "0: mvcos 0(%[to]),0(%[from]),%[size]\n" "1: jz 5f\n" - " algr %[size],%[tmp1]\n" - " slgr %[to],%[tmp1]\n" - " slgr %[from],%[tmp1]\n" + " algr %[size],%[val]\n" + " slgr %[to],%[val]\n" + " slgr %[from],%[val]\n" " j 0b\n" - "2: la %[tmp2],4095(%[to])\n" /* tmp2 = to + 4095 */ - " nr %[tmp2],%[tmp1]\n" /* tmp2 = (to + 4095) & -4096 */ - " slgr %[tmp2],%[to]\n" - " clgr %[size],%[tmp2]\n" /* copy crosses next page boundary? */ + "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */ + " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */ + " slgr %[rem],%[to]\n" + " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ " jnh 6f\n" - "3: mvcos 0(%[to]),0(%[from]),%[tmp2]\n" - "4: slgr %[size],%[tmp2]\n" + "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" + "4: slgr %[size],%[rem]\n" " j 6f\n" "5: slgr %[size],%[size]\n" "6:\n" @@ -130,7 +130,7 @@ static unsigned long raw_copy_to_user_key(void __user *to, const void *from, EX_TABLE(3b, 6b) EX_TABLE(4b, 6b) : [size] "+a" (size), [to] "+a" (to), [from] "+a" (from), - [tmp1] "+a" (tmp1), [tmp2] "=a" (tmp2) + [val] "+a" (val), [rem] "=a" (rem) : [spec] "d" (spec.val) : "cc", "memory", "0"); return size; @@ -155,27 +155,27 @@ EXPORT_SYMBOL(_copy_to_user_key); unsigned long __clear_user(void __user *to, unsigned long size) { - unsigned long tmp1, tmp2; + unsigned long val, rem; union oac spec = { .oac1.as = PSW_BITS_AS_SECONDARY, .oac1.a = 1, }; - tmp1 = -4096UL; + val = -4096UL; asm volatile( " lr 0,%[spec]\n" "0: mvcos 0(%[to]),0(%[zeropg]),%[size]\n" "1: jz 5f\n" - " algr %[size],%[tmp1]\n" - " slgr %[to],%[tmp1]\n" + " algr %[size],%[val]\n" + " slgr %[to],%[val]\n" " j 0b\n" - "2: la %[tmp2],4095(%[to])\n" /* tmp2 = to + 4095 */ - " nr %[tmp2],%[tmp1]\n" /* tmp2 = (to + 4095) & -4096 */ - " slgr %[tmp2],%[to]\n" - " clgr %[size],%[tmp2]\n" /* copy crosses next page boundary? */ + "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */ + " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */ + " slgr %[rem],%[to]\n" + " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ " jnh 6f\n" - "3: mvcos 0(%[to]),0(%[zeropg]),%[tmp2]\n" - "4: slgr %[size],%[tmp2]\n" + "3: mvcos 0(%[to]),0(%[zeropg]),%[rem]\n" + "4: slgr %[size],%[rem]\n" " j 6f\n" "5: slgr %[size],%[size]\n" "6:\n" @@ -184,7 +184,7 @@ unsigned long __clear_user(void __user *to, unsigned long size) EX_TABLE(3b, 6b) EX_TABLE(4b, 6b) : [size] "+&a" (size), [to] "+&a" (to), - [tmp1] "+a" (tmp1), [tmp2] "=&a" (tmp2) + [val] "+a" (val), [rem] "=&a" (rem) : [zeropg] "a" (empty_zero_page), [spec] "d" (spec.val) : "cc", "memory", "0"); return size; -- cgit v1.2.3 From c3bd834328a6b642cfebc8a1a6cd5e5447cbbd12 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Mar 2023 15:00:23 +0100 Subject: s390/uaccess: get rid of not needed local variable Get rid of the not needed val local variable and pass the constant value directly as operand value. In addition this turns the val operand into an input operand, since it is not changed within the inline assemblies. This in turn requires also to add the earlyclobber contraint modifier to all output operands, since the (former) val operand is used after all output variants have been modified. The usercopy kunit tests still pass after this change. Reviewed-by: Gerald Schaefer Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/lib/uaccess.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index a89f6639818a..1cd5a3768fcd 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -34,7 +34,7 @@ void debug_user_asce(int exit) static unsigned long raw_copy_from_user_key(void *to, const void __user *from, unsigned long size, unsigned long key) { - unsigned long val, rem; + unsigned long rem; union oac spec = { .oac2.key = key, .oac2.as = PSW_BITS_AS_SECONDARY, @@ -42,7 +42,6 @@ static unsigned long raw_copy_from_user_key(void *to, const void __user *from, .oac2.a = 1, }; - val = -4096UL; asm volatile( " lr 0,%[spec]\n" "0: mvcos 0(%[to]),0(%[from]),%[size]\n" @@ -65,9 +64,8 @@ static unsigned long raw_copy_from_user_key(void *to, const void __user *from, EX_TABLE(1b, 2b) EX_TABLE(3b, 6b) EX_TABLE(4b, 6b) - : [size] "+a" (size), [from] "+a" (from), [to] "+a" (to), - [val] "+a" (val), [rem] "=a" (rem) - : [spec] "d" (spec.val) + : [size] "+&a" (size), [from] "+&a" (from), [to] "+&a" (to), [rem] "=&a" (rem) + : [val] "a" (-4096UL), [spec] "d" (spec.val) : "cc", "memory", "0"); return size; } @@ -98,7 +96,7 @@ EXPORT_SYMBOL(_copy_from_user_key); static unsigned long raw_copy_to_user_key(void __user *to, const void *from, unsigned long size, unsigned long key) { - unsigned long val, rem; + unsigned long rem; union oac spec = { .oac1.key = key, .oac1.as = PSW_BITS_AS_SECONDARY, @@ -106,7 +104,6 @@ static unsigned long raw_copy_to_user_key(void __user *to, const void *from, .oac1.a = 1, }; - val = -4096UL; asm volatile( " lr 0,%[spec]\n" "0: mvcos 0(%[to]),0(%[from]),%[size]\n" @@ -129,9 +126,8 @@ static unsigned long raw_copy_to_user_key(void __user *to, const void *from, EX_TABLE(1b, 2b) EX_TABLE(3b, 6b) EX_TABLE(4b, 6b) - : [size] "+a" (size), [to] "+a" (to), [from] "+a" (from), - [val] "+a" (val), [rem] "=a" (rem) - : [spec] "d" (spec.val) + : [size] "+&a" (size), [to] "+&a" (to), [from] "+&a" (from), [rem] "=&a" (rem) + : [val] "a" (-4096UL), [spec] "d" (spec.val) : "cc", "memory", "0"); return size; } @@ -155,13 +151,12 @@ EXPORT_SYMBOL(_copy_to_user_key); unsigned long __clear_user(void __user *to, unsigned long size) { - unsigned long val, rem; + unsigned long rem; union oac spec = { .oac1.as = PSW_BITS_AS_SECONDARY, .oac1.a = 1, }; - val = -4096UL; asm volatile( " lr 0,%[spec]\n" "0: mvcos 0(%[to]),0(%[zeropg]),%[size]\n" @@ -183,9 +178,8 @@ unsigned long __clear_user(void __user *to, unsigned long size) EX_TABLE(1b, 2b) EX_TABLE(3b, 6b) EX_TABLE(4b, 6b) - : [size] "+&a" (size), [to] "+&a" (to), - [val] "+a" (val), [rem] "=&a" (rem) - : [zeropg] "a" (empty_zero_page), [spec] "d" (spec.val) + : [size] "+&a" (size), [to] "+&a" (to), [rem] "=&a" (rem) + : [val] "a" (-4096UL), [zeropg] "a" (empty_zero_page), [spec] "d" (spec.val) : "cc", "memory", "0"); return size; } -- cgit v1.2.3 From 49d6e68f66132ee521d587ce97645ed2d3183d90 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Mar 2023 15:00:24 +0100 Subject: s390/uaccess: remove extra blank line In order to get uaccess.c (nearly) checkpatch warning free remove an extra blank line: CHECK: Blank lines aren't necessary before a close brace '}' + +} Reviewed-by: Gerald Schaefer Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/lib/uaccess.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 1cd5a3768fcd..e4a13d7cab6e 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -27,7 +27,6 @@ void debug_user_asce(int exit) "kernel: %016llx user: %016llx\n", exit ? "exit" : "entry", cr1, cr7, S390_lowcore.kernel_asce, S390_lowcore.user_asce); - } #endif /*CONFIG_DEBUG_ENTRY */ -- cgit v1.2.3 From 385bf43c48ffe44af881039779a6be09ac8a77c8 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 28 Mar 2023 11:09:09 +0200 Subject: s390/entry: rely on long-displacement facility Since commit 4efd417f298b ("s390: raise minimum supported machine generation to z10"), the long-displacement facility is assumed and required for the kernel. Clean up a couple of places in the entry code, where long-displacement could be used directly instead of using a base register. However, there are still a few other places where a base register has to be used to extend short-displacement for the second lowcore page access. Notably, boot/head.S still has to be built for z900, and in mcck_int_handler, spt and lbear, which don't have long-displacements, but need to access save areas at the second lowcore page. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 58b85aedca22..c41ec5a03d07 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -489,14 +489,13 @@ ENTRY(mcck_int_handler) la %r1,4095 # validate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer LBEAR __LC_LAST_BREAK_SAVE_AREA-4095(%r1) # validate bear - lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# validate gprs + lmg %r0,%r15,__LC_GPREGS_SAVE_AREA # validate gprs lmg %r8,%r9,__LC_MCK_OLD_PSW TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE jo .Lmcck_panic # yes -> rest of mcck code invalid TSTMSK __LC_MCCK_CODE,MCCK_CODE_CR_VALID jno .Lmcck_panic # control registers invalid -> panic - la %r14,4095 - lctlg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r14) # validate ctl regs + lctlg %c0,%c15,__LC_CREGS_SAVE_AREA # validate ctl regs ptlb lghi %r14,__LC_CPU_TIMER_SAVE_AREA mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) @@ -594,8 +593,7 @@ ENTRY(restart_int_handler) stg %r15,__LC_SAVE_AREA_RESTART TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4 jz 0f - la %r15,4095 - lctlg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r15) + lctlg %c0,%c15,__LC_CREGS_SAVE_AREA 0: larl %r15,.Lstosm_tmp stosm 0(%r15),0x04 # turn dat on, keep irqs off lg %r15,__LC_RESTART_STACK -- cgit v1.2.3 From b46650d56bd31dca8b09b5670ee9441a8f7b8869 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 28 Mar 2023 10:00:33 +0200 Subject: s390: make extables read-only Currently, exception tables are marked as ro_after_init. However, since they are sorted during compile time using scripts/sorttable, they can be moved to RO_DATA using the RO_EXCEPTION_TABLE_ALIGN macro, which is specifically designed for this purpose. Suggested-by: Heiko Carstens Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vmlinux.lds.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 8d2288a5ba25..2ae201ebf90b 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -14,6 +14,8 @@ #define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) \ *(.bss..invalid_pg_dir) +#define RO_EXCEPTION_TABLE_ALIGN 16 + /* Handle ro_after_init data on our own. */ #define RO_AFTER_INIT_DATA @@ -66,7 +68,6 @@ SECTIONS *(.data..ro_after_init) JUMP_TABLE_DATA } :data - EXCEPTION_TABLE(16) . = ALIGN(PAGE_SIZE); __end_ro_after_init = .; -- cgit v1.2.3 From e6badee94027a4e9586e6b5d087bc96e9e4d554c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 27 Mar 2023 11:37:21 +0200 Subject: s390/dumpstack: simplify in stack logic code The pattern for all in__stack() functions is the same; especially also the size of all stacks is the same. Simplify the code by passing only the stack address to the generic in_stack() helper, which then can assume a THREAD_SIZE sized stack. Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/dumpstack.c | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index f257058d0828..d2012635b093 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -41,60 +41,50 @@ const char *stack_type_name(enum stack_type type) EXPORT_SYMBOL_GPL(stack_type_name); static inline bool in_stack(unsigned long sp, struct stack_info *info, - enum stack_type type, unsigned long low, - unsigned long high) + enum stack_type type, unsigned long stack) { - if (sp < low || sp >= high) + if (sp < stack || sp >= stack + THREAD_SIZE) return false; info->type = type; - info->begin = low; - info->end = high; + info->begin = stack; + info->end = stack + THREAD_SIZE; return true; } static bool in_task_stack(unsigned long sp, struct task_struct *task, struct stack_info *info) { - unsigned long stack; + unsigned long stack = (unsigned long)task_stack_page(task); - stack = (unsigned long) task_stack_page(task); - return in_stack(sp, info, STACK_TYPE_TASK, stack, stack + THREAD_SIZE); + return in_stack(sp, info, STACK_TYPE_TASK, stack); } static bool in_irq_stack(unsigned long sp, struct stack_info *info) { - unsigned long frame_size, top; + unsigned long stack = S390_lowcore.async_stack - STACK_INIT_OFFSET; - frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - top = S390_lowcore.async_stack + frame_size; - return in_stack(sp, info, STACK_TYPE_IRQ, top - THREAD_SIZE, top); + return in_stack(sp, info, STACK_TYPE_IRQ, stack); } static bool in_nodat_stack(unsigned long sp, struct stack_info *info) { - unsigned long frame_size, top; + unsigned long stack = S390_lowcore.nodat_stack - STACK_INIT_OFFSET; - frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - top = S390_lowcore.nodat_stack + frame_size; - return in_stack(sp, info, STACK_TYPE_NODAT, top - THREAD_SIZE, top); + return in_stack(sp, info, STACK_TYPE_NODAT, stack); } static bool in_mcck_stack(unsigned long sp, struct stack_info *info) { - unsigned long frame_size, top; + unsigned long stack = S390_lowcore.mcck_stack - STACK_INIT_OFFSET; - frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - top = S390_lowcore.mcck_stack + frame_size; - return in_stack(sp, info, STACK_TYPE_MCCK, top - THREAD_SIZE, top); + return in_stack(sp, info, STACK_TYPE_MCCK, stack); } static bool in_restart_stack(unsigned long sp, struct stack_info *info) { - unsigned long frame_size, top; + unsigned long stack = S390_lowcore.restart_stack - STACK_INIT_OFFSET; - frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - top = S390_lowcore.restart_stack + frame_size; - return in_stack(sp, info, STACK_TYPE_RESTART, top - THREAD_SIZE, top); + return in_stack(sp, info, STACK_TYPE_RESTART, stack); } int get_stack_info(unsigned long sp, struct task_struct *task, -- cgit v1.2.3 From c2c3258fb55a0f51ea4a3c2cd4c29b23a64b151e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 27 Mar 2023 11:37:22 +0200 Subject: s390/stack: use STACK_INIT_OFFSET where possible Make STACK_INIT_OFFSET also available for assembler code, and use it everywhere instead of open-coding it at several places. Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/thread_info.h | 8 +++++--- arch/s390/kernel/entry.S | 12 ++++-------- arch/s390/kernel/head64.S | 2 +- arch/s390/kernel/setup.c | 3 +-- arch/s390/kernel/smp.c | 5 ++--- 5 files changed, 13 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index f19e6f5ec367..c7c97921ed8d 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -9,6 +9,9 @@ #define _ASM_THREAD_INFO_H #include +#ifndef ASM_OFFSETS_C +#include +#endif /* * General size of kernel stacks @@ -21,13 +24,12 @@ #define BOOT_STACK_SIZE (PAGE_SIZE << 2) #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) +#define STACK_INIT_OFFSET (THREAD_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE) + #ifndef __ASSEMBLY__ #include #include -#define STACK_INIT_OFFSET \ - (THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs)) - /* * low level task data that entry.S needs immediate access to * - this struct should fit entirely inside of one cache line diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index c41ec5a03d07..fc3d1df09736 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -29,10 +29,6 @@ #include #include -STACK_SHIFT = PAGE_SHIFT + THREAD_SIZE_ORDER -STACK_SIZE = 1 << STACK_SHIFT -STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE - _LPP_OFFSET = __LC_LPP .macro STBEAR address @@ -53,7 +49,7 @@ _LPP_OFFSET = __LC_LPP .macro CHECK_STACK savearea #ifdef CONFIG_CHECK_STACK - tml %r15,STACK_SIZE - CONFIG_STACK_GUARD + tml %r15,THREAD_SIZE - CONFIG_STACK_GUARD lghi %r14,\savearea jz stack_overflow #endif @@ -62,8 +58,8 @@ _LPP_OFFSET = __LC_LPP .macro CHECK_VMAP_STACK savearea,oklabel #ifdef CONFIG_VMAP_STACK lgr %r14,%r15 - nill %r14,0x10000 - STACK_SIZE - oill %r14,STACK_INIT + nill %r14,0x10000 - THREAD_SIZE + oill %r14,STACK_INIT_OFFSET clg %r14,__LC_KERNEL_STACK je \oklabel clg %r14,__LC_ASYNC_STACK @@ -179,7 +175,7 @@ ENTRY(__switch_to) stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task lghi %r4,__TASK_stack lghi %r1,__TASK_thread - llill %r5,STACK_INIT + llill %r5,STACK_INIT_OFFSET stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev lg %r15,0(%r4,%r3) # start of kernel stack of next agr %r15,%r5 # end of kernel stack of next diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index f68be3951103..82399b4b5f8a 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -24,7 +24,7 @@ ENTRY(startup_continue) # larl %r14,init_task stg %r14,__LC_CURRENT - larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD-__PT_SIZE + larl %r15,init_thread_union+STACK_INIT_OFFSET brasl %r14,sclp_early_adjust_va # allow sclp_early_printk brasl %r14,startup_init # s390 specific early init brasl %r14,start_kernel # common init code diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index d25425b8d0c0..061cc771d764 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -436,8 +436,7 @@ static void __init setup_lowcore(void) lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; lc->io_new_psw.addr = (unsigned long) io_int_handler; lc->clock_comparator = clock_comparator_max; - lc->nodat_stack = ((unsigned long) &init_thread_union) - + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); + lc->nodat_stack = ((unsigned long)&init_thread_union) + STACK_INIT_OFFSET; lc->current_task = (unsigned long)&init_task; lc->lpp = LPP_MAGIC; lc->machine_flags = S390_lowcore.machine_flags; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 0126c5f6b904..680b4fbdabb1 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -280,9 +280,8 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) cpu = pcpu - pcpu_devices; lc = lowcore_ptr[cpu]; - lc->kernel_stack = (unsigned long) task_stack_page(tsk) - + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); - lc->current_task = (unsigned long) tsk; + lc->kernel_stack = (unsigned long)task_stack_page(tsk) + STACK_INIT_OFFSET; + lc->current_task = (unsigned long)tsk; lc->lpp = LPP_MAGIC; lc->current_pid = tsk->pid; lc->user_timer = tsk->thread.user_timer; -- cgit v1.2.3 From cfea9bc78bc3f99abcef3efbe7631d3ed3b70406 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 27 Mar 2023 11:37:23 +0200 Subject: s390/stack: set lowcore kernel stack pointer early Make sure the lowcore kernel stack pointer reflects the kernel stack of the current task as early as possible, instead of having a NULL pointer there. Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/head64.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 82399b4b5f8a..f67982f562db 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -25,6 +25,7 @@ ENTRY(startup_continue) larl %r14,init_task stg %r14,__LC_CURRENT larl %r15,init_thread_union+STACK_INIT_OFFSET + stg %r15,__LC_KERNEL_STACK brasl %r14,sclp_early_adjust_va # allow sclp_early_printk brasl %r14,startup_init # s390 specific early init brasl %r14,start_kernel # common init code -- cgit v1.2.3 From 944c78376a39b86a57c0b36c73d1316bd97846bc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 27 Mar 2023 11:37:24 +0200 Subject: s390: use init_thread_union aka initial stack for the first process s390 is the only architecture which switches from the initial stack to a later on allocated different stack for the first process. This is (at least) problematic for the stackleak feature, which instruments functions to save the current stackpointer within the task structure of the running process. The stackleak code compares stack pointers of the current process - and doesn't expect that the kernel stack of a task can change. Even though the stackleak feature itself will not cause any harm, the assumption about kernel stacks being consistent is there, and only s390 doesn't follow that. Therefore switch back to use init_thread_union, just like all other architectures. Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/setup.c | 48 ++++++++++++++++++------------------------------ arch/s390/kernel/smp.c | 2 +- 2 files changed, 19 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 061cc771d764..76cfc7c23e70 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -380,6 +380,12 @@ void stack_free(unsigned long stack) #endif } +void __init arch_call_rest_init(void) +{ + smp_reinit_ipl_cpu(); + rest_init(); +} + int __init arch_early_irq_init(void) { unsigned long stack; @@ -391,28 +397,21 @@ int __init arch_early_irq_init(void) return 0; } -void __init arch_call_rest_init(void) +static unsigned long __init stack_alloc_early(void) { unsigned long stack; - smp_reinit_ipl_cpu(); - stack = stack_alloc(); - if (!stack) - panic("Couldn't allocate kernel stack"); - current->stack = (void *) stack; -#ifdef CONFIG_VMAP_STACK - current->stack_vm_area = (void *) stack; -#endif - set_task_stack_end_magic(current); - stack += STACK_INIT_OFFSET; - S390_lowcore.kernel_stack = stack; - call_on_stack_noreturn(rest_init, stack); + stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); + if (!stack) { + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", + __func__, THREAD_SIZE, THREAD_SIZE); + } + return stack; } static void __init setup_lowcore(void) { struct lowcore *lc, *abs_lc; - unsigned long mcck_stack; /* * Setup lowcore for boot cpu @@ -436,7 +435,6 @@ static void __init setup_lowcore(void) lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; lc->io_new_psw.addr = (unsigned long) io_int_handler; lc->clock_comparator = clock_comparator_max; - lc->nodat_stack = ((unsigned long)&init_thread_union) + STACK_INIT_OFFSET; lc->current_task = (unsigned long)&init_task; lc->lpp = LPP_MAGIC; lc->machine_flags = S390_lowcore.machine_flags; @@ -449,17 +447,14 @@ static void __init setup_lowcore(void) lc->steal_timer = S390_lowcore.steal_timer; lc->last_update_timer = S390_lowcore.last_update_timer; lc->last_update_clock = S390_lowcore.last_update_clock; - /* * Allocate the global restart stack which is the same for - * all CPUs in cast *one* of them does a PSW restart. + * all CPUs in case *one* of them does a PSW restart. */ - restart_stack = memblock_alloc(THREAD_SIZE, THREAD_SIZE); - if (!restart_stack) - panic("%s: Failed to allocate %lu bytes align=0x%lx\n", - __func__, THREAD_SIZE, THREAD_SIZE); - restart_stack += STACK_INIT_OFFSET; - + restart_stack = (void *)(stack_alloc_early() + STACK_INIT_OFFSET); + lc->mcck_stack = stack_alloc_early() + STACK_INIT_OFFSET; + lc->nodat_stack = stack_alloc_early() + STACK_INIT_OFFSET; + lc->kernel_stack = S390_lowcore.kernel_stack; /* * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant * restart data to the absolute zero lowcore. This is necessary if @@ -470,13 +465,6 @@ static void __init setup_lowcore(void) lc->restart_data = 0; lc->restart_source = -1U; __ctl_store(lc->cregs_save_area, 0, 15); - - mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); - if (!mcck_stack) - panic("%s: Failed to allocate %lu bytes align=0x%lx\n", - __func__, THREAD_SIZE, THREAD_SIZE); - lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET; - lc->spinlock_lockval = arch_spin_lockval(0); lc->spinlock_index = 0; arch_spin_lock_setup(0); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 680b4fbdabb1..cc98ac26e77c 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1295,7 +1295,7 @@ int __init smp_reinit_ipl_cpu(void) free_pages(lc_ipl->async_stack - STACK_INIT_OFFSET, THREAD_SIZE_ORDER); memblock_free_late(__pa(lc_ipl->mcck_stack - STACK_INIT_OFFSET), THREAD_SIZE); + memblock_free_late(__pa(lc_ipl->nodat_stack - STACK_INIT_OFFSET), THREAD_SIZE); memblock_free_late(__pa(lc_ipl), sizeof(*lc_ipl)); - return 0; } -- cgit v1.2.3 From 23be82f0deb8ce02fbb94d9baac7c2434d5e217e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 27 Mar 2023 11:37:25 +0200 Subject: s390/stacktrace: remove call_on_stack_noreturn() There is no user left of call_on_stack_noreturn() - remove it. Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/stacktrace.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 1802be5abb5d..25e833cd883c 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -189,17 +189,4 @@ static __always_inline unsigned long get_stack_pointer(struct task_struct *task, (rettype)r2; \ }) -#define call_on_stack_noreturn(fn, stack) \ -({ \ - void (*__fn)(void) = fn; \ - \ - asm volatile( \ - " la 15,0(%[_stack])\n" \ - " xc %[_bc](8,15),%[_bc](15)\n" \ - " brasl 14,%[_fn]\n" \ - ::[_bc] "i" (offsetof(struct stack_frame, back_chain)), \ - [_stack] "a" (stack), [_fn] "X" (__fn)); \ - BUG(); \ -}) - #endif /* _ASM_S390_STACKTRACE_H */ -- cgit v1.2.3 From 60afa6d1662c08dd266463231acc129488fffe6e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 27 Mar 2023 11:37:26 +0200 Subject: s390: remove arch_early_irq_init() Allocate early async stack like other early stacks and get rid of arch_early_irq_init(). This way the async stack is allocated earlier, and handled like all other stacks. Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/setup.c | 12 +----------- arch/s390/kernel/smp.c | 2 +- 2 files changed, 2 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 76cfc7c23e70..f2d0d52b3070 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -386,17 +386,6 @@ void __init arch_call_rest_init(void) rest_init(); } -int __init arch_early_irq_init(void) -{ - unsigned long stack; - - stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); - if (!stack) - panic("Couldn't allocate async stack"); - S390_lowcore.async_stack = stack + STACK_INIT_OFFSET; - return 0; -} - static unsigned long __init stack_alloc_early(void) { unsigned long stack; @@ -453,6 +442,7 @@ static void __init setup_lowcore(void) */ restart_stack = (void *)(stack_alloc_early() + STACK_INIT_OFFSET); lc->mcck_stack = stack_alloc_early() + STACK_INIT_OFFSET; + lc->async_stack = stack_alloc_early() + STACK_INIT_OFFSET; lc->nodat_stack = stack_alloc_early() + STACK_INIT_OFFSET; lc->kernel_stack = S390_lowcore.kernel_stack; /* diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index cc98ac26e77c..27c710178033 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1293,8 +1293,8 @@ int __init smp_reinit_ipl_cpu(void) local_mcck_enable(); local_irq_restore(flags); - free_pages(lc_ipl->async_stack - STACK_INIT_OFFSET, THREAD_SIZE_ORDER); memblock_free_late(__pa(lc_ipl->mcck_stack - STACK_INIT_OFFSET), THREAD_SIZE); + memblock_free_late(__pa(lc_ipl->async_stack - STACK_INIT_OFFSET), THREAD_SIZE); memblock_free_late(__pa(lc_ipl->nodat_stack - STACK_INIT_OFFSET), THREAD_SIZE); memblock_free_late(__pa(lc_ipl), sizeof(*lc_ipl)); return 0; -- cgit v1.2.3 From 22ca1e7738025ae38d07c05bae2af934b1b2c11f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 27 Mar 2023 11:37:27 +0200 Subject: s390: move on_thread_stack() to processor.h As preparation for the stackleak feature move on_thread_stack() to processor.h like x86. Also make it __always_inline, and slightly optimize it by reading current task's kernel stack pointer from lowcore. Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/entry-common.h | 5 ----- arch/s390/include/asm/processor.h | 7 +++++++ 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h index 000de2b1e67a..fdd319a622b0 100644 --- a/arch/s390/include/asm/entry-common.h +++ b/arch/s390/include/asm/entry-common.h @@ -60,9 +60,4 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare -static inline bool on_thread_stack(void) -{ - return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1)); -} - #endif diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 80ac0c1034dc..efffc28cbad8 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -226,6 +226,13 @@ static __always_inline unsigned long __current_stack_pointer(void) return sp; } +static __always_inline bool on_thread_stack(void) +{ + unsigned long ksp = S390_lowcore.kernel_stack; + + return !((ksp ^ current_stack_pointer) & ~(THREAD_SIZE - 1)); +} + static __always_inline unsigned short stap(void) { unsigned short cpu_address; -- cgit v1.2.3 From b94c0ebb1ec752016a3e41bfb66bb51ea905e533 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 27 Mar 2023 11:37:28 +0200 Subject: s390: enable HAVE_ARCH_STACKLEAK Add support for the stackleak feature. Whenever the kernel returns to user space the kernel stack is filled with a poison value. Enabling this feature is quite expensive: e.g. after instrumenting the getpid() system call function to have a 4kb stack the result is an increased runtime of the system call by a factor of 3. Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 + arch/s390/kernel/entry.S | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index d610f911677e..d4db47b0610c 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -155,6 +155,7 @@ config S390 select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY + select HAVE_ARCH_STACKLEAK select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_VMAP_STACK diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index fc3d1df09736..d5f8cd4319a4 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -150,6 +150,12 @@ _LPP_OFFSET = __LC_LPP .endm #endif + .macro STACKLEAK_ERASE +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + brasl %r14,stackleak_erase_on_task_stack +#endif + .endm + GEN_BR_THUNK %r14 .section .kprobes.text, "ax" @@ -300,6 +306,7 @@ ENTRY(system_call) MBEAR %r2 lgr %r3,%r14 brasl %r14,__do_syscall + STACKLEAK_ERASE lctlg %c1,%c1,__LC_USER_ASCE mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON @@ -315,6 +322,7 @@ ENDPROC(system_call) ENTRY(ret_from_fork) lgr %r3,%r11 brasl %r14,__ret_from_fork + STACKLEAK_ERASE lctlg %c1,%c1,__LC_USER_ASCE mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON @@ -375,6 +383,7 @@ ENTRY(pgm_check_handler) brasl %r14,__do_pgm_check tmhh %r8,0x0001 # returning to user space? jno .Lpgm_exit_kernel + STACKLEAK_ERASE lctlg %c1,%c1,__LC_USER_ASCE BPON stpt __LC_EXIT_TIMER @@ -440,6 +449,7 @@ ENTRY(\name) mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) tmhh %r8,0x0001 # returning to user ? jno 2f + STACKLEAK_ERASE lctlg %c1,%c1,__LC_USER_ASCE BPON stpt __LC_EXIT_TIMER -- cgit v1.2.3 From e42ac7789df64120d7d3d57433dfc9f37ec0cb99 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 30 Mar 2023 12:55:46 +0200 Subject: s390/checksum: always use cksm instruction Commit dfe843dce775 ("s390/checksum: support GENERIC_CSUM, enable it for KASAN") switched s390 to use the generic checksum functions, so that KASAN instrumentation also works checksum functions by avoiding architecture specific inline assemblies. There is however the problem that the generic csum_partial() function returns a 32 bit value with a 16 bit folded checksum, while the original s390 variant does not fold to 16 bit. This in turn causes that the ipib_checksum in lowcore contains different values depending on kernel config options. The ipib_checksum is used by system dumpers to verify if pointers in lowcore point to valid data. Verification is done by comparing checksum values. The system dumpers still use 32 bit checksum values which are not folded, and therefore the checksum verification fails (incorrectly). Symptom is that reboot after dump does not work anymore when a KASAN instrumented kernel is dumped. Fix this by not using the generic checksum implementation. Instead add an explicit kasan_check_read() so that KASAN knows about the read access from within the inline assembly. Reported-by: Alexander Egorenkov Fixes: dfe843dce775 ("s390/checksum: support GENERIC_CSUM, enable it for KASAN") Tested-by: Alexander Egorenkov Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 4 ---- arch/s390/include/asm/checksum.h | 9 ++------- 2 files changed, 2 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index d4db47b0610c..a7568449ca59 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -26,10 +26,6 @@ config GENERIC_BUG config GENERIC_BUG_RELATIVE_POINTERS def_bool y -config GENERIC_CSUM - bool - default y if KASAN - config GENERIC_LOCKBREAK def_bool y if PREEMPTION diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index d977a3a2f619..1b6b992cf18e 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -12,12 +12,7 @@ #ifndef _S390_CHECKSUM_H #define _S390_CHECKSUM_H -#ifdef CONFIG_GENERIC_CSUM - -#include - -#else /* CONFIG_GENERIC_CSUM */ - +#include #include #include @@ -40,6 +35,7 @@ static inline __wsum csum_partial(const void *buff, int len, __wsum sum) .odd = (unsigned long) len, }; + kasan_check_read(buff, len); asm volatile( "0: cksm %[sum],%[rp]\n" " jo 0b\n" @@ -135,5 +131,4 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, return csum_fold((__force __wsum)(sum >> 32)); } -#endif /* CONFIG_GENERIC_CSUM */ #endif /* _S390_CHECKSUM_H */ -- cgit v1.2.3 From 11018ef90ce73d9de8ac6e565c00cc8631b46328 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 30 Mar 2023 12:55:47 +0200 Subject: s390/checksum: remove not needed uaccess.h include Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/checksum.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index 1b6b992cf18e..69837eec2ff5 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -13,7 +13,6 @@ #define _S390_CHECKSUM_H #include -#include #include /* -- cgit v1.2.3 From bb87190c9d46c4285696e071d5972a534bb107cc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 31 Mar 2023 15:03:22 +0200 Subject: s390/kaslr: provide kaslr_enabled() function Just like other architectures provide a kaslr_enabled() function, instead of directly accessing a global variable. Also pass the renamed __kaslr_enabled variable from the decompressor to the kernel, so that kalsr_enabled() is available there too. This will be used by a subsequent patch which randomizes the module base load address. Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/boot/boot.h | 1 - arch/s390/boot/ipl_parm.c | 6 +++--- arch/s390/boot/pgm_check_info.c | 2 +- arch/s390/boot/startup.c | 6 +++--- arch/s390/include/asm/setup.h | 7 +++++++ arch/s390/kernel/setup.c | 1 + 6 files changed, 15 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 872963c8a0ab..5baa855c21cb 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -79,7 +79,6 @@ extern const char kernel_version[]; extern unsigned long memory_limit; extern unsigned long vmalloc_size; extern int vmalloc_size_set; -extern int kaslr_enabled; extern char __boot_data_start[], __boot_data_end[]; extern char __boot_data_preserved_start[], __boot_data_preserved_end[]; extern char _decompressor_syms_start[], _decompressor_syms_end[]; diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index c1f8f7999fed..8753cb0339e5 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -24,11 +24,11 @@ int __bootdata(noexec_disabled); unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL; struct ipl_parameter_block __bootdata_preserved(ipl_block); int __bootdata_preserved(ipl_block_valid); +int __bootdata_preserved(__kaslr_enabled); unsigned long vmalloc_size = VMALLOC_DEFAULT_SIZE; unsigned long memory_limit; int vmalloc_size_set; -int kaslr_enabled; static inline int __diag308(unsigned long subcode, void *addr) { @@ -264,7 +264,7 @@ void parse_boot_command_line(void) char *args; int rc; - kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE); + __kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE); args = strcpy(command_line_buf, early_command_line); while (*args) { args = next_arg(args, ¶m, &val); @@ -300,7 +300,7 @@ void parse_boot_command_line(void) modify_fac_list(val); if (!strcmp(param, "nokaslr")) - kaslr_enabled = 0; + __kaslr_enabled = 0; #if IS_ENABLED(CONFIG_KVM) if (!strcmp(param, "prot_virt")) { diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index 0861e3c403f8..97244cd7a206 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -153,7 +153,7 @@ void print_pgm_check_info(void) decompressor_printk("Kernel command line: %s\n", early_command_line); decompressor_printk("Kernel fault: interruption code %04x ilc:%x\n", S390_lowcore.pgm_code, S390_lowcore.pgm_ilc >> 1); - if (kaslr_enabled) + if (kaslr_enabled()) decompressor_printk("Kernel random base: %lx\n", __kaslr_offset); decompressor_printk("PSW : %016lx %016lx (%pS)\n", S390_lowcore.psw_save_area.mask, diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index bdf305a93987..cc0ca7e0cd6d 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -160,10 +160,10 @@ static void setup_ident_map_size(unsigned long max_physmem_end) #ifdef CONFIG_CRASH_DUMP if (oldmem_data.start) { - kaslr_enabled = 0; + __kaslr_enabled = 0; ident_map_size = min(ident_map_size, oldmem_data.size); } else if (ipl_block_valid && is_ipl_block_dump()) { - kaslr_enabled = 0; + __kaslr_enabled = 0; if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) ident_map_size = min(ident_map_size, hsa_size); } @@ -315,7 +315,7 @@ void startup_kernel(void) save_ipl_cert_comp_list(); rescue_initrd(safe_addr, ident_map_size); - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) { + if (kaslr_enabled()) { random_lma = get_random_base(); if (random_lma) { __kaslr_offset = random_lma - vmlinux.default_lma; diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index b28d250efbaa..f191255c60db 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -146,6 +146,13 @@ static inline unsigned long kaslr_offset(void) return __kaslr_offset; } +extern int __kaslr_enabled; +static inline int kaslr_enabled(void) +{ + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) + return __kaslr_enabled; + return 0; +} struct oldmem_data { unsigned long start; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f2d0d52b3070..0903fe356634 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -150,6 +150,7 @@ unsigned long __bootdata(ident_map_size); struct physmem_info __bootdata(physmem_info); unsigned long __bootdata_preserved(__kaslr_offset); +int __bootdata_preserved(__kaslr_enabled); unsigned int __bootdata_preserved(zlib_dfltcc_support); EXPORT_SYMBOL(zlib_dfltcc_support); u64 __bootdata_preserved(stfle_fac_list[16]); -- cgit v1.2.3 From 34644cc2e15a7a91ec36b496e218694d17371589 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 31 Mar 2023 15:03:23 +0200 Subject: s390/kaslr: randomize module base load address Randomize the load address of modules in the kernel to make KASLR effective for modules. This is the s390 variant of commit e2b32e678513 ("x86, kaslr: randomize module base load address"). Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/module.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 2d159b32885b..6588f4efe378 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -26,6 +26,7 @@ #include #include #include +#include #if 0 #define DEBUGP printk @@ -35,6 +36,24 @@ #define PLT_ENTRY_SIZE 22 +static unsigned long get_module_load_offset(void) +{ + static DEFINE_MUTEX(module_kaslr_mutex); + static unsigned long module_load_offset; + + if (!kaslr_enabled()) + return 0; + /* + * Calculate the module_load_offset the first time this code + * is called. Once calculated it stays the same until reboot. + */ + mutex_lock(&module_kaslr_mutex); + if (!module_load_offset) + module_load_offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE; + mutex_unlock(&module_kaslr_mutex); + return module_load_offset; +} + void *module_alloc(unsigned long size) { gfp_t gfp_mask = GFP_KERNEL; @@ -42,7 +61,8 @@ void *module_alloc(unsigned long size) if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; - p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END, + p = __vmalloc_node_range(size, MODULE_ALIGN, + MODULES_VADDR + get_module_load_offset(), MODULES_END, gfp_mask, PAGE_KERNEL_EXEC, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, __builtin_return_address(0)); if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { -- cgit v1.2.3 From bac30ea9ef80624dfe4bf20c0fc2073dcc771a87 Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Fri, 10 Mar 2023 12:52:37 +0100 Subject: s390/ipl: fix physical-virtual confusion for diag308 Diag 308 subcodes expect a physical address as their parameter. This currently is not a bug, but in the future physical and virtual addresses might differ. Fix the confusion by doing a virtual-to-physical conversion in the exported diag308() and leave the assembly wrapper __diag308() alone. Note that several callers pass NULL as addr, so check for the case when NULL is passed and pass 0 to hardware since virt_to_phys(0) might be nonzero. Suggested-by: Marc Hartmayer Signed-off-by: Nico Boehr Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ipl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 0f91cd401eef..43de939b7af1 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -176,11 +176,11 @@ static bool reipl_fcp_clear; static bool reipl_ccw_clear; static bool reipl_eckd_clear; -static inline int __diag308(unsigned long subcode, void *addr) +static inline int __diag308(unsigned long subcode, unsigned long addr) { union register_pair r1; - r1.even = (unsigned long) addr; + r1.even = addr; r1.odd = 0; asm volatile( " diag %[r1],%[subcode],0x308\n" @@ -195,7 +195,7 @@ static inline int __diag308(unsigned long subcode, void *addr) int diag308(unsigned long subcode, void *addr) { diag_stat_inc(DIAG_STAT_X308); - return __diag308(subcode, addr); + return __diag308(subcode, addr ? virt_to_phys(addr) : 0); } EXPORT_SYMBOL_GPL(diag308); -- cgit v1.2.3 From 22e99fa56443f19b96e2e888854bfc202dd97069 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 2 Apr 2023 20:55:16 +0200 Subject: s390/mm: implement set_memory_rox() Provide the s390 specific native set_memory_rox() implementation to avoid frequent set_memory_ro(); set_memory_x() call pairs. This is the s390 variant of commit 60463628c9e0 ("x86/mm: Implement native set_memory_rox()"). Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/set_memory.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h index 950d87bd997a..f4362ce1e17a 100644 --- a/arch/s390/include/asm/set_memory.h +++ b/arch/s390/include/asm/set_memory.h @@ -34,6 +34,12 @@ static inline int set_memory_x(unsigned long addr, int numpages) return __set_memory(addr, numpages, SET_MEMORY_X); } +#define set_memory_rox set_memory_rox +static inline int set_memory_rox(unsigned long addr, int numpages) +{ + return __set_memory(addr, numpages, SET_MEMORY_RO | SET_MEMORY_X); +} + static inline int set_memory_4k(unsigned long addr, int numpages) { return __set_memory(addr, numpages, SET_MEMORY_4K); -- cgit v1.2.3 From f0a2a7c527849bf1b112a43f0b8d0658a7b4e6ec Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 2 Apr 2023 20:55:17 +0200 Subject: s390/mm: implement set_memory_rwnx() Given that set_memory_rox() is implemented, provide also set_memory_rwnx(). This allows to get rid of all open coded __set_memory() usages in s390 architecture code. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/set_memory.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h index f4362ce1e17a..25f2464dbb7e 100644 --- a/arch/s390/include/asm/set_memory.h +++ b/arch/s390/include/asm/set_memory.h @@ -40,6 +40,11 @@ static inline int set_memory_rox(unsigned long addr, int numpages) return __set_memory(addr, numpages, SET_MEMORY_RO | SET_MEMORY_X); } +static inline int set_memory_rwnx(unsigned long addr, int numpages) +{ + return __set_memory(addr, numpages, SET_MEMORY_RW | SET_MEMORY_NX); +} + static inline int set_memory_4k(unsigned long addr, int numpages) { return __set_memory(addr, numpages, SET_MEMORY_4K); -- cgit v1.2.3 From f9b2d96c4fa4475929e26cbac4027801d93c6e53 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 2 Apr 2023 20:55:18 +0200 Subject: s390/mm: use set_memory_*() helpers instead of open coding Given that set_memory_rox() and set_memory_rwnx() exist, it is possible to get rid of all open coded __set_memory() usages and replace them with proper helper calls everywhere. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/kprobes.c | 2 +- arch/s390/mm/init.c | 5 ++--- arch/s390/mm/vmem.c | 34 ++++++++++++++-------------------- 3 files changed, 17 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 7b41ceecbb25..d4b863ed0aa7 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -41,7 +41,7 @@ void *alloc_insn_page(void) page = module_alloc(PAGE_SIZE); if (!page) return NULL; - __set_memory((unsigned long) page, 1, SET_MEMORY_RO | SET_MEMORY_X); + set_memory_rox((unsigned long)page, 1); return page; } diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 144447d5cb4c..8d94e29adcdb 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -176,9 +176,8 @@ void __init mem_init(void) void free_initmem(void) { - __set_memory((unsigned long)_sinittext, - (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT, - SET_MEMORY_RW | SET_MEMORY_NX); + set_memory_rwnx((unsigned long)_sinittext, + (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT); free_initmem_default(POISON_FREE_INITMEM); } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 242f95aa9801..5b22c6e24528 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -732,30 +732,24 @@ void __init vmem_map_init(void) memblock_region_cmp, memblock_region_swap); __for_each_mem_range(i, &memblock.memory, &memory_rwx, NUMA_NO_NODE, MEMBLOCK_NONE, &base, &end, NULL) { - __set_memory((unsigned long)__va(base), - (end - base) >> PAGE_SHIFT, - SET_MEMORY_RW | SET_MEMORY_NX); + set_memory_rwnx((unsigned long)__va(base), + (end - base) >> PAGE_SHIFT); } #ifdef CONFIG_KASAN - for_each_mem_range(i, &base, &end) - __set_memory(__sha(base), - (__sha(end) - __sha(base)) >> PAGE_SHIFT, - SET_MEMORY_RW | SET_MEMORY_NX); + for_each_mem_range(i, &base, &end) { + set_memory_rwnx(__sha(base), + (__sha(end) - __sha(base)) >> PAGE_SHIFT); + } #endif - - __set_memory((unsigned long)_stext, - (unsigned long)(_etext - _stext) >> PAGE_SHIFT, - SET_MEMORY_RO | SET_MEMORY_X); - __set_memory((unsigned long)_etext, - (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT, - SET_MEMORY_RO); - __set_memory((unsigned long)_sinittext, - (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT, - SET_MEMORY_RO | SET_MEMORY_X); - __set_memory(__stext_amode31, - (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT, - SET_MEMORY_RO | SET_MEMORY_X); + set_memory_rox((unsigned long)_stext, + (unsigned long)(_etext - _stext) >> PAGE_SHIFT); + set_memory_ro((unsigned long)_etext, + (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT); + set_memory_rox((unsigned long)_sinittext, + (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT); + set_memory_rox(__stext_amode31, + (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT); /* lowcore must be executable for LPSWE */ if (static_key_enabled(&cpu_has_bear)) -- cgit v1.2.3 From 7c7ab788c0959c363b5cb2ac5ff4822fc12ccba5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 2 Apr 2023 20:55:19 +0200 Subject: s390/ftrace: do not assume module_alloc() returns executable memory The ftrace code assumes at two places that module_alloc() returns executable memory. While this is currently true, this will be changed with a subsequent patch to follow other architectures which implement ARCH_HAS_STRICT_MODULE_RWX. Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ftrace.c | 2 +- arch/s390/kernel/module.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 6f6c44b7af89..c46381ea04ec 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -226,7 +226,7 @@ static int __init ftrace_plt_init(void) start = ftrace_shared_hotpatch_trampoline(&end); memcpy(ftrace_plt, start, end - start); - set_memory_ro((unsigned long)ftrace_plt, 1); + set_memory_rox((unsigned long)ftrace_plt, 1); return 0; } device_initcall(ftrace_plt_init); diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 6588f4efe378..57c13e30e66d 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -509,7 +509,7 @@ static int module_alloc_ftrace_hotpatch_trampolines(struct module *me, start = module_alloc(numpages * PAGE_SIZE); if (!start) return -ENOMEM; - set_memory_ro((unsigned long)start, numpages); + set_memory_rox((unsigned long)start, numpages); end = start + size; me->arch.trampolines_start = (struct ftrace_hotpatch_trampoline *)start; -- cgit v1.2.3 From 1707c1165283a9634717971f17692535c21ab0eb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 2 Apr 2023 20:55:21 +0200 Subject: s390/module: create module allocations without exec permissions This is the s390 variant of commit 7dfac3c5f40e ("arm64: module: create module allocations without exec permissions"): "The core code manages the executable permissions of code regions of modules explicitly. It is no longer necessary to create the module vmalloc regions with RWX permissions. So create them with RW- permissions instead, which is preferred from a security perspective." Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 57c13e30e66d..07a13546980d 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -63,7 +63,7 @@ void *module_alloc(unsigned long size) return NULL; p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR + get_module_load_offset(), MODULES_END, - gfp_mask, PAGE_KERNEL_EXEC, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, + gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, __builtin_return_address(0)); if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { vfree(p); -- cgit v1.2.3 From 3cdf0269cdc2c4728b71af4199823b6284379d42 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 24 Feb 2023 08:55:21 +0100 Subject: s390/cpum_cf: log bad return code of function cfset_all_copy When function cfset_all_copy() fails, also log the bad return code in the debug statement (when turned on). No functional change Signed-off-by: Thomas Richter Acked-by: Sumanth Korikkar Signed-off-by: Vasily Gorbik --- arch/s390/kernel/perf_cpum_cf.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index c9ab971498d6..0b134c220948 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -1290,7 +1290,7 @@ static size_t cfset_needspace(unsigned int sets) static int cfset_all_copy(unsigned long arg, cpumask_t *mask) { struct s390_ctrset_read __user *ctrset_read; - unsigned int cpu, cpus, rc; + unsigned int cpu, cpus, rc = 0; void __user *uptr; ctrset_read = (struct s390_ctrset_read __user *)arg; @@ -1304,17 +1304,20 @@ static int cfset_all_copy(unsigned long arg, cpumask_t *mask) rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets); rc |= copy_to_user(ctrset_cpudata->data, cpuhw->data, cpuhw->used); - if (rc) - return -EFAULT; + if (rc) { + rc = -EFAULT; + goto out; + } uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used; cond_resched(); } cpus = cpumask_weight(mask); if (put_user(cpus, &ctrset_read->no_cpus)) - return -EFAULT; - debug_sprintf_event(cf_dbg, 4, "%s copied %ld\n", __func__, + rc = -EFAULT; +out: + debug_sprintf_event(cf_dbg, 4, "%s rc %d copied %ld\n", __func__, rc, uptr - (void __user *)ctrset_read->data); - return 0; + return rc; } static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset, -- cgit v1.2.3 From 26300860c2be489fa8c919e55fba28fa782cbb1e Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 31 Mar 2023 11:42:20 +0200 Subject: s390/cpum_cf: remove unnecessary copy_from_user call Struct s390_ctrset_read userdata is filled by ioctl_read operation using put_user/copy_to_user. However, the ctrset->data value access is not performed anywhere during the ioctl_read operation. Remove unnecessary copy_from_user() call. No functional change. Signed-off-by: Thomas Richter Suggested-by: Sumanth Korikkar Acked-by: Sumanth Korikkar Signed-off-by: Vasily Gorbik --- arch/s390/kernel/perf_cpum_cf.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 0b134c220948..4c158faa3075 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -1388,14 +1388,10 @@ static int cfset_all_read(unsigned long arg, struct cfset_request *req) static long cfset_ioctl_read(unsigned long arg, struct cfset_request *req) { - struct s390_ctrset_read read; int ret = -ENODATA; - if (req && req->ctrset) { - if (copy_from_user(&read, (char __user *)arg, sizeof(read))) - return -EFAULT; + if (req && req->ctrset) ret = cfset_all_read(arg, req); - } return ret; } -- cgit v1.2.3 From a1d2d9cbaf0512451471705264bee7b3f50d4c29 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 15 Mar 2023 10:19:57 +0100 Subject: s390/boot: do not change default_lma The current modification of the default_lma is illogical and should be avoided. It would be more appropriate to introduce and utilize a new variable vmlinux_lma instead, so that default_lma remains unchanged and at its original "default" value of 0x100000. Signed-off-by: Vasily Gorbik --- arch/s390/boot/startup.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index cc0ca7e0cd6d..e1edb6186bee 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -235,9 +235,9 @@ static unsigned long setup_kernel_memory_layout(void) /* * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's. */ -static void clear_bss_section(void) +static void clear_bss_section(unsigned long vmlinux_lma) { - memset((void *)vmlinux.default_lma + vmlinux.image_size, 0, vmlinux.bss_size); + memset((void *)vmlinux_lma + vmlinux.image_size, 0, vmlinux.bss_size); } /* @@ -256,7 +256,6 @@ static void setup_vmalloc_size(void) static void offset_vmlinux_info(unsigned long offset) { - vmlinux.default_lma += offset; *(unsigned long *)(&vmlinux.entry) += offset; vmlinux.bootdata_off += offset; vmlinux.bootdata_preserved_off += offset; @@ -278,7 +277,7 @@ static void offset_vmlinux_info(unsigned long offset) void startup_kernel(void) { unsigned long max_physmem_end; - unsigned long random_lma; + unsigned long vmlinux_lma = 0; unsigned long asce_limit; unsigned long safe_addr; void *img; @@ -316,26 +315,26 @@ void startup_kernel(void) rescue_initrd(safe_addr, ident_map_size); if (kaslr_enabled()) { - random_lma = get_random_base(); - if (random_lma) { - __kaslr_offset = random_lma - vmlinux.default_lma; - img = (void *)vmlinux.default_lma; + vmlinux_lma = get_random_base(); + if (vmlinux_lma) { + __kaslr_offset = vmlinux_lma - vmlinux.default_lma; offset_vmlinux_info(__kaslr_offset); } } + vmlinux_lma = vmlinux_lma ?: vmlinux.default_lma; + physmem_reserve(RR_VMLINUX, vmlinux_lma, vmlinux.image_size + vmlinux.bss_size); if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) { img = decompress_kernel(); - memmove((void *)vmlinux.default_lma, img, vmlinux.image_size); + memmove((void *)vmlinux_lma, img, vmlinux.image_size); } else if (__kaslr_offset) { - memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size); + img = (void *)vmlinux.default_lma; + memmove((void *)vmlinux_lma, img, vmlinux.image_size); memset(img, 0, vmlinux.image_size); } /* vmlinux decompression is done, shrink reserved low memory */ physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end); - if (!__kaslr_offset) - physmem_reserve(RR_VMLINUX, vmlinux.default_lma, vmlinux.image_size + vmlinux.bss_size); physmem_alloc_range(RR_AMODE31, vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G, true); /* @@ -351,7 +350,7 @@ void startup_kernel(void) * - copy_bootdata() must follow setup_vmem() to propagate changes to * bootdata made by setup_vmem() */ - clear_bss_section(); + clear_bss_section(vmlinux_lma); handle_relocs(__kaslr_offset); setup_vmem(asce_limit); copy_bootdata(); -- cgit v1.2.3 From 898435203c115b164b96f30be7d9c790bbb50338 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 15 Mar 2023 11:00:19 +0100 Subject: s390/boot: pin amode31 default lma The special amode31 part of the kernel must always remain below 2Gb. Place it just under vmlinux.default_lma by default, which makes it easier to debug amode31 as its default lma is known 0x10000 - 0x3000 (currently, amode31's size is 3 pages). This location is always available as it is originally occupied by the vmlinux archive. Reviewed-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/boot/startup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index e1edb6186bee..45c30c2b1a7a 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -278,6 +278,7 @@ void startup_kernel(void) { unsigned long max_physmem_end; unsigned long vmlinux_lma = 0; + unsigned long amode31_lma; unsigned long asce_limit; unsigned long safe_addr; void *img; @@ -335,7 +336,8 @@ void startup_kernel(void) /* vmlinux decompression is done, shrink reserved low memory */ physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end); - physmem_alloc_range(RR_AMODE31, vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G, true); + amode31_lma = vmlinux.default_lma - vmlinux.amode31_size; + physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size); /* * The order of the following operations is important: -- cgit v1.2.3 From 6e259bc5a15ec49693fa3d19fdd4511f7679a20d Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 21 Feb 2023 23:08:42 +0100 Subject: s390/kaslr: generalize and improve random base distribution Improve the distribution algorithm of random base address to ensure a uniformity among all suitable addresses. To generate a random value once, and to build a continuous range in which every value is suitable, count all the suitable addresses (referred to as positions) that can be used as a base address. The positions are counted by iterating over the usable memory ranges. For each range that is big enough to accommodate the image, count all the suitable addresses where the image can be placed, while taking reserved memory ranges into consideration. A new function "iterate_valid_positions()" has dual purpose. Firstly, it is called to count the positions in a given memory range, and secondly, to convert a random position back to an address. "get_random_base()" has been replaced with more generic "randomize_within_range()" which now could be called for randomizing base addresses not just for the kernel image. Acked-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/boot/boot.h | 4 +- arch/s390/boot/kaslr.c | 110 ++++++++++++++++++++++++++++++++++++++---- arch/s390/boot/physmem_info.c | 5 ++ arch/s390/boot/startup.c | 4 +- 4 files changed, 112 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 5baa855c21cb..222c6886acf6 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -54,6 +54,7 @@ unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned lon unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size, unsigned long align, unsigned long min, unsigned long max, bool die_on_oom); +unsigned long get_physmem_alloc_pos(void); bool ipl_report_certs_intersects(unsigned long addr, unsigned long size, unsigned long *intersection_start); bool is_ipl_block_dump(void); @@ -66,7 +67,8 @@ void verify_facilities(void); void print_missing_facilities(void); void sclp_early_setup_buffer(void); void print_pgm_check_info(void); -unsigned long get_random_base(void); +unsigned long randomize_within_range(unsigned long size, unsigned long align, + unsigned long min, unsigned long max); void setup_vmem(unsigned long asce_limit); void __printf(1, 2) decompressor_printk(const char *fmt, ...); void print_stacktrace(unsigned long sp); diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index 71f75f03f800..90602101e2ae 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -91,16 +91,108 @@ static int get_random(unsigned long limit, unsigned long *value) return 0; } -unsigned long get_random_base(void) +static void sort_reserved_ranges(struct reserved_range *res, unsigned long size) { - unsigned long vmlinux_size = vmlinux.image_size + vmlinux.bss_size; - unsigned long minimal_pos = vmlinux.default_lma + vmlinux_size; - unsigned long random; + struct reserved_range tmp; + int i, j; - /* [vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size : physmem_info.usable] */ - if (get_random(physmem_info.usable - minimal_pos, &random)) - return 0; + for (i = 1; i < size; i++) { + tmp = res[i]; + for (j = i - 1; j >= 0 && res[j].start > tmp.start; j--) + res[j + 1] = res[j]; + res[j + 1] = tmp; + } +} + +static unsigned long iterate_valid_positions(unsigned long size, unsigned long align, + unsigned long _min, unsigned long _max, + struct reserved_range *res, size_t res_count, + bool pos_count, unsigned long find_pos) +{ + unsigned long start, end, tmp_end, range_pos, pos = 0; + struct reserved_range *res_end = res + res_count; + struct reserved_range *skip_res; + int i; + + align = max(align, 8UL); + _min = round_up(_min, align); + for_each_physmem_usable_range(i, &start, &end) { + if (_min >= end) + continue; + start = round_up(start, align); + if (start >= _max) + break; + start = max(_min, start); + end = min(_max, end); + + while (start + size <= end) { + /* skip reserved ranges below the start */ + while (res && res->end <= start) { + res++; + if (res >= res_end) + res = NULL; + } + skip_res = NULL; + tmp_end = end; + /* has intersecting reserved range */ + if (res && res->start < end) { + skip_res = res; + tmp_end = res->start; + } + if (start + size <= tmp_end) { + range_pos = (tmp_end - start - size) / align + 1; + if (pos_count) { + pos += range_pos; + } else { + if (range_pos >= find_pos) + return start + (find_pos - 1) * align; + find_pos -= range_pos; + } + } + if (!skip_res) + break; + start = round_up(skip_res->end, align); + } + } - return physmem_alloc_range(RR_VMLINUX, vmlinux_size, THREAD_SIZE, - vmlinux.default_lma, minimal_pos + random, false); + return pos_count ? pos : 0; +} + +/* + * Two types of decompressor memory allocations/reserves are considered + * differently. + * + * "Static" or "single" allocations are done via physmem_alloc_range() and + * physmem_reserve(), and they are listed in physmem_info.reserved[]. Each + * type of "static" allocation can only have one allocation per type and + * cannot have chains. + * + * On the other hand, "dynamic" or "repetitive" allocations are done via + * physmem_alloc_top_down(). These allocations are tightly packed together + * top down from the end of online memory. physmem_alloc_pos represents + * current position where those allocations start. + * + * Functions randomize_within_range() and iterate_valid_positions() + * only consider "dynamic" allocations by never looking above + * physmem_alloc_pos. "Static" allocations, however, are explicitly + * considered by checking the "res" (reserves) array. The first + * reserved_range of a "dynamic" allocation may also be checked along the + * way, but it will always be above the maximum value anyway. + */ +unsigned long randomize_within_range(unsigned long size, unsigned long align, + unsigned long min, unsigned long max) +{ + struct reserved_range res[RR_MAX]; + unsigned long max_pos, pos; + + memcpy(res, physmem_info.reserved, sizeof(res)); + sort_reserved_ranges(res, ARRAY_SIZE(res)); + max = min(max, get_physmem_alloc_pos()); + + max_pos = iterate_valid_positions(size, align, min, max, res, ARRAY_SIZE(res), true, 0); + if (!max_pos) + return 0; + if (get_random(max_pos, &pos)) + return 0; + return iterate_valid_positions(size, align, min, max, res, ARRAY_SIZE(res), false, pos + 1); } diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c index 4ee9b7381142..0cf79826eef9 100644 --- a/arch/s390/boot/physmem_info.c +++ b/arch/s390/boot/physmem_info.c @@ -321,3 +321,8 @@ unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned lon physmem_alloc_ranges = ranges_left; return addr; } + +unsigned long get_physmem_alloc_pos(void) +{ + return physmem_alloc_pos; +} diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 45c30c2b1a7a..638a6cf8a075 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -316,7 +316,9 @@ void startup_kernel(void) rescue_initrd(safe_addr, ident_map_size); if (kaslr_enabled()) { - vmlinux_lma = get_random_base(); + vmlinux_lma = randomize_within_range(vmlinux.image_size + vmlinux.bss_size, + THREAD_SIZE, vmlinux.default_lma, + ident_map_size); if (vmlinux_lma) { __kaslr_offset = vmlinux_lma - vmlinux.default_lma; offset_vmlinux_info(__kaslr_offset); -- cgit v1.2.3 From b3e0423c4e76b19f04799e01b6443949f5fecbbc Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 15 Mar 2023 13:54:14 +0100 Subject: s390/kaslr: randomize amode31 base address When the KASLR is enabled, randomize the base address of the amode31 image within the first 2 GB, similar to the approach taken for the vmlinux image. This makes it harder to predict the location of amode31 data and code. Reviewed-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/boot/startup.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 638a6cf8a075..64bd7ac3e35d 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -278,7 +278,7 @@ void startup_kernel(void) { unsigned long max_physmem_end; unsigned long vmlinux_lma = 0; - unsigned long amode31_lma; + unsigned long amode31_lma = 0; unsigned long asce_limit; unsigned long safe_addr; void *img; @@ -338,7 +338,9 @@ void startup_kernel(void) /* vmlinux decompression is done, shrink reserved low memory */ physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end); - amode31_lma = vmlinux.default_lma - vmlinux.amode31_size; + if (kaslr_enabled()) + amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G); + amode31_lma = amode31_lma ?: vmlinux.default_lma - vmlinux.amode31_size; physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size); /* -- cgit v1.2.3 From c01f2a5fe4e3df4ab846fcba5435ca9fdee4f583 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 24 Feb 2023 09:01:51 +0100 Subject: s390/cpum_cf: simplify pr_err() statement in cpumf_pmu_enable/disable Simplify pr_err() statement into one line and omit return statement. No functional change. Signed-off-by: Thomas Richter Acked-by: Sumanth Korikkar Signed-off-by: Vasily Gorbik --- arch/s390/kernel/perf_cpum_cf.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 4c158faa3075..96b581967334 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -471,13 +471,10 @@ static void cpumf_pmu_enable(struct pmu *pmu) return; err = lcctl(cpuhw->state | cpuhw->dev_state); - if (err) { - pr_err("Enabling the performance measuring unit " - "failed with rc=%x\n", err); - return; - } - - cpuhw->flags |= PMU_F_ENABLED; + if (err) + pr_err("Enabling the performance measuring unit failed with rc=%x\n", err); + else + cpuhw->flags |= PMU_F_ENABLED; } /* @@ -497,13 +494,10 @@ static void cpumf_pmu_disable(struct pmu *pmu) inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); inactive |= cpuhw->dev_state; err = lcctl(inactive); - if (err) { - pr_err("Disabling the performance measuring unit " - "failed with rc=%x\n", err); - return; - } - - cpuhw->flags &= ~PMU_F_ENABLED; + if (err) + pr_err("Disabling the performance measuring unit failed with rc=%x\n", err); + else + cpuhw->flags &= ~PMU_F_ENABLED; } #define PMC_INIT 0UL -- cgit v1.2.3 From d24e18ef7e13c9cd580ebee771f7ccb9d3f6ee42 Mon Sep 17 00:00:00 2001 From: Marc Hartmayer Date: Mon, 31 Jan 2022 13:22:31 +0000 Subject: s390/boot: improve install.sh script Use proper quoting for the variables and explicitly distinguish between command options and positional arguments. Acked-by: Heiko Carstens Signed-off-by: Marc Hartmayer Signed-off-by: Vasily Gorbik --- arch/s390/boot/install.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh index 616ba1660f08..a13dd2f2aa1c 100755 --- a/arch/s390/boot/install.sh +++ b/arch/s390/boot/install.sh @@ -17,8 +17,8 @@ echo "Warning: '${INSTALLKERNEL}' command not available - additional " \ "bootloader config required" >&2 -if [ -f $4/vmlinuz-$1 ]; then mv $4/vmlinuz-$1 $4/vmlinuz-$1.old; fi -if [ -f $4/System.map-$1 ]; then mv $4/System.map-$1 $4/System.map-$1.old; fi +if [ -f "$4/vmlinuz-$1" ]; then mv -- "$4/vmlinuz-$1" "$4/vmlinuz-$1.old"; fi +if [ -f "$4/System.map-$1" ]; then mv -- "$4/System.map-$1" "$4/System.map-$1.old"; fi -cat $2 > $4/vmlinuz-$1 -cp $3 $4/System.map-$1 +cat -- "$2" > "$4/vmlinuz-$1" +cp -- "$3" "$4/System.map-$1" -- cgit v1.2.3 From 07fdd6627f7f9c72ed68d531653b56df81da9996 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 6 Apr 2023 13:31:28 +0200 Subject: s390/mm: rename POPULATE_ONE2ONE to POPULATE_DIRECT Architectures generally use the "direct map" wording for mapping the whole physical memory. Use that wording as well in arch/s390/boot/vmem.c, instead of "one to one" in order to avoid confusion. This also matches what is already done in arch/s390/mm/vmem.c. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/boot/vmem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index b01ea2abda03..df91d9065f7e 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -19,7 +19,7 @@ unsigned long __bootdata_preserved(s390_invalid_asce); enum populate_mode { POPULATE_NONE, - POPULATE_ONE2ONE, + POPULATE_DIRECT, POPULATE_ABS_LOWCORE, #ifdef CONFIG_KASAN POPULATE_KASAN_MAP_SHADOW, @@ -237,7 +237,7 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m switch (mode) { case POPULATE_NONE: return -1; - case POPULATE_ONE2ONE: + case POPULATE_DIRECT: return addr; case POPULATE_ABS_LOWCORE: return __abs_lowcore_pa(addr); @@ -404,9 +404,9 @@ void setup_vmem(unsigned long asce_limit) * To prevent creation of a large page at address 0 first map * the lowcore and create the identity mapping only afterwards. */ - pgtable_populate(0, sizeof(struct lowcore), POPULATE_ONE2ONE); + pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT); for_each_physmem_usable_range(i, &start, &end) - pgtable_populate(start, end, POPULATE_ONE2ONE); + pgtable_populate(start, end, POPULATE_DIRECT); pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), POPULATE_ABS_LOWCORE); pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE, -- cgit v1.2.3 From 81e8479649853ffafc714aca4a9c0262efd3160a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 6 Apr 2023 13:31:29 +0200 Subject: s390/mm: fix direct map accounting Commit bb1520d581a3 ("s390/mm: start kernel with DAT enabled") did not implement direct map accounting in the early page table setup code. In result the reported values are bogus now: $cat /proc/meminfo ... DirectMap4k: 5120 kB DirectMap1M: 18446744073709546496 kB DirectMap2G: 0 kB Fix this by adding the missing accounting. The result looks sane again: $cat /proc/meminfo ... DirectMap4k: 6156 kB DirectMap1M: 2091008 kB DirectMap2G: 6291456 kB Fixes: bb1520d581a3 ("s390/mm: start kernel with DAT enabled") Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/boot/vmem.c | 18 ++++++++++++++++-- arch/s390/include/asm/pgtable.h | 2 +- arch/s390/mm/pageattr.c | 2 +- 3 files changed, 18 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index df91d9065f7e..acb1f8b53105 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -13,6 +13,10 @@ unsigned long __bootdata_preserved(s390_invalid_asce); +#ifdef CONFIG_PROC_FS +atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); +#endif + #define init_mm (*(struct mm_struct *)vmlinux.init_mm_off) #define swapper_pg_dir vmlinux.swapper_pg_dir_off #define invalid_pg_dir vmlinux.invalid_pg_dir_off @@ -267,6 +271,7 @@ static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end) static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, enum populate_mode mode) { + unsigned long pages = 0; pte_t *pte, entry; pte = pte_offset_kernel(pmd, addr); @@ -277,14 +282,17 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e entry = __pte(_pa(addr, PAGE_SIZE, mode)); entry = set_pte_bit(entry, PAGE_KERNEL_EXEC); set_pte(pte, entry); + pages++; } } + if (mode == POPULATE_DIRECT) + update_page_count(PG_DIRECT_MAP_4K, pages); } static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next; + unsigned long next, pages = 0; pmd_t *pmd, entry; pte_t *pte; @@ -298,6 +306,7 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode)); entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC); set_pmd(pmd, entry); + pages++; continue; } pte = boot_pte_alloc(); @@ -307,12 +316,14 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e } pgtable_pte_populate(pmd, addr, next, mode); } + if (mode == POPULATE_DIRECT) + update_page_count(PG_DIRECT_MAP_1M, pages); } static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next; + unsigned long next, pages = 0; pud_t *pud, entry; pmd_t *pmd; @@ -326,6 +337,7 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e entry = __pud(_pa(addr, _REGION3_SIZE, mode)); entry = set_pud_bit(entry, REGION3_KERNEL_EXEC); set_pud(pud, entry); + pages++; continue; } pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY); @@ -335,6 +347,8 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e } pgtable_pmd_populate(pud, addr, next, mode); } + if (mode == POPULATE_DIRECT) + update_page_count(PG_DIRECT_MAP_2G, pages); } static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end, diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 2c70b4d1263d..acbe1ac2d571 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -34,7 +34,7 @@ enum { PG_DIRECT_MAP_MAX }; -extern atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX]; +extern atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); static inline void update_page_count(int level, long count) { diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 7838e9c70000..77f31791044d 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -41,7 +41,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end) } #ifdef CONFIG_PROC_FS -atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX]; +atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); void arch_report_meminfo(struct seq_file *m) { -- cgit v1.2.3 From 3071e9b39106e1b9576584a372f446c23b258e3f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 6 Apr 2023 08:28:42 -0600 Subject: s390/diag: replace zero-length array with flexible-array member Zero-length arrays are deprecated [1] and have to be replaced by C99 flexible-array members. This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines on memcpy() and help to make progress towards globally enabling -fstrict-flex-arrays=3 [2] Link: https://github.com/KSPP/linux/issues/78 [1] Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html [2] Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/ZC7XGpUtVhqlRLhH@work Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/diag.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index 674a939f16ee..902e0330dd91 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -90,7 +90,7 @@ struct diag8c { u8 num_partitions; u16 width; u16 height; - u8 data[0]; + u8 data[]; } __packed __aligned(4); extern int diag8c(struct diag8c *out, struct ccw_dev_id *devno); -- cgit v1.2.3 From 6ca87bc4c8eee464cc13259aaf6881d8df3f86f9 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 6 Apr 2023 08:29:35 -0600 Subject: s390/fcx: replace zero-length array with flexible-array member Zero-length arrays are deprecated [1] and have to be replaced by C99 flexible-array members. This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines on memcpy() and help to make progress towards globally enabling -fstrict-flex-arrays=3 [2] Link: https://github.com/KSPP/linux/issues/78 [1] Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html [2] Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/ZC7XT5prvoE4Yunm@work Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/fcx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/fcx.h b/arch/s390/include/asm/fcx.h index b8a028a36173..29784b4b44f6 100644 --- a/arch/s390/include/asm/fcx.h +++ b/arch/s390/include/asm/fcx.h @@ -286,7 +286,7 @@ struct tccb_tcat { */ struct tccb { struct tccb_tcah tcah; - u8 tca[0]; + u8 tca[]; } __attribute__ ((packed, aligned(8))); struct tcw *tcw_get_intrg(struct tcw *tcw); -- cgit v1.2.3 From ca1382eafae50377088772d6c485ab6b89da2e56 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 11 Apr 2023 11:13:01 +0200 Subject: s390/debug: replace zero-length array with flexible-array member There are numerous patches which convert zero-length arrays with a flexible-array member. Convert the remaining s390 occurrences. Suggested-by: Gustavo A. R. Silva Link: https://github.com/KSPP/linux/issues/78 Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 221c865785c2..a85e0c3e7027 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -60,7 +60,7 @@ typedef struct { * except of floats, and long long (32 bit) * */ - long args[0]; + long args[]; } debug_sprintf_entry_t; /* internal function prototyes */ -- cgit v1.2.3 From 46c4d945ea1f9beebf49148b0d4aafb44e41ceb6 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 4 Apr 2023 14:47:55 +0200 Subject: s390/cpum_cf: introduce static CPU counter facility information The CPU measurement facility counter information instruction qctri() retrieves information about the available counter sets. The information varies between machine generations, but is constant when running on a particular machine. For example the CPU measurement facility counter first and second version numbers determine the amount of counters in a counter set. This information never changes. The counter sets are identical for all CPUs in the system. It does not matter which CPU performs the instruction. Authorization control of the CPU Measurement facility can only be changed in the activation profile while the LPAR is not running. Retrieve the CPU measurement counter information at device driver initialization time and use its constant values. Function validate_ctr_version() verifies if a user provided CPU Measurement counter facility counter is valid and defined. It now uses the newly introduced static CPU counter facility information. To avoid repeated recalculation of the counter set sizes (numbers of counters per set), which never changes on a running machine, calculate the counter set size once at device driver initialization and store the result in an array. Functions cpum_cf_make_setsize() and cpum_cf_read_setsize() are introduced. Finally remove cpu_cf_events::info member and use the static CPU counter facility information instead. Signed-off-by: Thomas Richter Acked-by: Heiko Carstens Acked-by: Sumanth Korikkar Signed-off-by: Vasily Gorbik --- arch/s390/kernel/perf_cpum_cf.c | 132 ++++++++++++++++++++-------------------- 1 file changed, 65 insertions(+), 67 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 96b581967334..e235b5b83d3c 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -76,7 +76,6 @@ static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest) } struct cpu_cf_events { - struct cpumf_ctr_info info; atomic_t ctr_set[CPUMF_CTR_SET_MAX]; u64 state; /* For perf_event_open SVC */ u64 dev_state; /* For /dev/hwctr */ @@ -95,6 +94,15 @@ static DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events); static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */ static debug_info_t *cf_dbg; +/* + * The CPU Measurement query counter information instruction contains + * information which varies per machine generation, but is constant and + * does not change when running on a particular machine, such as counter + * first and second version number. This is needed to determine the size + * of counter sets. Extract this information at device driver initialization. + */ +static struct cpumf_ctr_info cpumf_ctr_info; + #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ /* interval in seconds */ @@ -167,11 +175,10 @@ struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */ /* Create the trailer data at the end of a page. */ static void cfdiag_trailer(struct cf_trailer_entry *te) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); struct cpuid cpuid; - te->cfvn = cpuhw->info.cfvn; /* Counter version numbers */ - te->csvn = cpuhw->info.csvn; + te->cfvn = cpumf_ctr_info.cfvn; /* Counter version numbers */ + te->csvn = cpumf_ctr_info.csvn; get_cpu_id(&cpuid); /* Machine type */ te->mach_type = cpuid.machine; @@ -184,50 +191,60 @@ static void cfdiag_trailer(struct cf_trailer_entry *te) } /* - * Return the maximum possible counter set size (in number of 8 byte counters) - * depending on type and model number. + * The number of counters per counter set varies between machine generations, + * but is constant when running on a particular machine generation. + * Determine each counter set size at device driver initialization and + * retrieve it later. */ -static size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset, - struct cpumf_ctr_info *info) +static size_t cpumf_ctr_setsizes[CPUMF_CTR_SET_MAX]; +static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset) { size_t ctrset_size = 0; switch (ctrset) { case CPUMF_CTR_SET_BASIC: - if (info->cfvn >= 1) + if (cpumf_ctr_info.cfvn >= 1) ctrset_size = 6; break; case CPUMF_CTR_SET_USER: - if (info->cfvn == 1) + if (cpumf_ctr_info.cfvn == 1) ctrset_size = 6; - else if (info->cfvn >= 3) + else if (cpumf_ctr_info.cfvn >= 3) ctrset_size = 2; break; case CPUMF_CTR_SET_CRYPTO: - if (info->csvn >= 1 && info->csvn <= 5) + if (cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5) ctrset_size = 16; - else if (info->csvn == 6 || info->csvn == 7) + else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7) ctrset_size = 20; break; case CPUMF_CTR_SET_EXT: - if (info->csvn == 1) + if (cpumf_ctr_info.csvn == 1) ctrset_size = 32; - else if (info->csvn == 2) + else if (cpumf_ctr_info.csvn == 2) ctrset_size = 48; - else if (info->csvn >= 3 && info->csvn <= 5) + else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5) ctrset_size = 128; - else if (info->csvn == 6 || info->csvn == 7) + else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7) ctrset_size = 160; break; case CPUMF_CTR_SET_MT_DIAG: - if (info->csvn > 3) + if (cpumf_ctr_info.csvn > 3) ctrset_size = 48; break; case CPUMF_CTR_SET_MAX: break; } + cpumf_ctr_setsizes[ctrset] = ctrset_size; +} - return ctrset_size; +/* + * Return the maximum possible counter set size (in number of 8 byte counters) + * depending on type and model number. + */ +static size_t cpum_cf_read_setsize(enum cpumf_ctr_set ctrset) +{ + return cpumf_ctr_setsizes[ctrset]; } /* Read a counter set. The counter set number determines the counter set and @@ -248,14 +265,13 @@ static size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset, static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, size_t room, bool error_ok) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); size_t ctrset_size, need = 0; int rc = 3; /* Assume write failure */ ctrdata->def = CF_DIAG_CTRSET_DEF; ctrdata->set = ctrset; ctrdata->res1 = 0; - ctrset_size = cpum_cf_ctrset_size(ctrset, &cpuhw->info); + ctrset_size = cpum_cf_read_setsize(ctrset); if (ctrset_size) { /* Save data */ need = ctrset_size * sizeof(u64) + sizeof(*ctrdata); @@ -269,10 +285,6 @@ static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, need = 0; } - debug_sprintf_event(cf_dbg, 3, - "%s ctrset %d ctrset_size %zu cfvn %d csvn %d" - " need %zd rc %d\n", __func__, ctrset, ctrset_size, - cpuhw->info.cfvn, cpuhw->info.csvn, need, rc); return need; } @@ -380,37 +392,34 @@ static enum cpumf_ctr_set get_counter_set(u64 event) static int validate_ctr_version(const struct hw_perf_event *hwc, enum cpumf_ctr_set set) { - struct cpu_cf_events *cpuhw; - int err = 0; u16 mtdiag_ctl; - - cpuhw = &get_cpu_var(cpu_cf_events); + int err = 0; /* check required version for counter sets */ switch (set) { case CPUMF_CTR_SET_BASIC: case CPUMF_CTR_SET_USER: - if (cpuhw->info.cfvn < 1) + if (cpumf_ctr_info.cfvn < 1) err = -EOPNOTSUPP; break; case CPUMF_CTR_SET_CRYPTO: - if ((cpuhw->info.csvn >= 1 && cpuhw->info.csvn <= 5 && + if ((cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5 && hwc->config > 79) || - (cpuhw->info.csvn >= 6 && hwc->config > 83)) + (cpumf_ctr_info.csvn >= 6 && hwc->config > 83)) err = -EOPNOTSUPP; break; case CPUMF_CTR_SET_EXT: - if (cpuhw->info.csvn < 1) + if (cpumf_ctr_info.csvn < 1) err = -EOPNOTSUPP; - if ((cpuhw->info.csvn == 1 && hwc->config > 159) || - (cpuhw->info.csvn == 2 && hwc->config > 175) || - (cpuhw->info.csvn >= 3 && cpuhw->info.csvn <= 5 + if ((cpumf_ctr_info.csvn == 1 && hwc->config > 159) || + (cpumf_ctr_info.csvn == 2 && hwc->config > 175) || + (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5 && hwc->config > 255) || - (cpuhw->info.csvn >= 6 && hwc->config > 287)) + (cpumf_ctr_info.csvn >= 6 && hwc->config > 287)) err = -EOPNOTSUPP; break; case CPUMF_CTR_SET_MT_DIAG: - if (cpuhw->info.csvn <= 3) + if (cpumf_ctr_info.csvn <= 3) err = -EOPNOTSUPP; /* * MT-diagnostic counters are read-only. The counter set @@ -425,35 +434,30 @@ static int validate_ctr_version(const struct hw_perf_event *hwc, * counter set is enabled and active. */ mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]; - if (!((cpuhw->info.auth_ctl & mtdiag_ctl) && - (cpuhw->info.enable_ctl & mtdiag_ctl) && - (cpuhw->info.act_ctl & mtdiag_ctl))) + if (!((cpumf_ctr_info.auth_ctl & mtdiag_ctl) && + (cpumf_ctr_info.enable_ctl & mtdiag_ctl) && + (cpumf_ctr_info.act_ctl & mtdiag_ctl))) err = -EOPNOTSUPP; break; case CPUMF_CTR_SET_MAX: err = -EOPNOTSUPP; } - put_cpu_var(cpu_cf_events); return err; } static int validate_ctr_auth(const struct hw_perf_event *hwc) { - struct cpu_cf_events *cpuhw; - int err = 0; - - cpuhw = &get_cpu_var(cpu_cf_events); + int err = -ENOENT; /* Check authorization for cpu counter sets. * If the particular CPU counter set is not authorized, * return with -ENOENT in order to fall back to other * PMUs that might suffice the event request. */ - if (!(hwc->config_base & cpuhw->info.auth_ctl)) - err = -ENOENT; + if ((hwc->config_base & cpumf_ctr_info.auth_ctl)) + err = 0; - put_cpu_var(cpu_cf_events); return err; } @@ -509,8 +513,6 @@ static void cpum_cf_setup_cpu(void *flags) switch ((unsigned long)flags) { case PMC_INIT: - memset(&cpuhw->info, 0, sizeof(cpuhw->info)); - qctri(&cpuhw->info); cpuhw->flags |= PMU_F_RESERVED; break; @@ -977,7 +979,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code, /* counter authorization change alert */ if (alert & CPU_MF_INT_CF_CACA) - qctri(&cpuhw->info); + qctri(&cpumf_ctr_info); /* loss of counter data alert */ if (alert & CPU_MF_INT_CF_LCDA) @@ -994,9 +996,14 @@ static int __init cpumf_pmu_init(void) { int rc; - if (!cpum_cf_avail()) + /* Extract counter measurement facility information */ + if (!cpum_cf_avail() || qctri(&cpumf_ctr_info)) return -ENODEV; + /* Determine and store counter set sizes for later reference */ + for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) + cpum_cf_make_setsize(rc); + /* * Clear bit 15 of cr0 to unauthorize problem-state to * extract measurement counters @@ -1263,21 +1270,19 @@ static int cfset_all_start(struct cfset_request *req) */ static size_t cfset_needspace(unsigned int sets) { - struct cpu_cf_events *cpuhw = get_cpu_ptr(&cpu_cf_events); size_t bytes = 0; int i; for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { if (!(sets & cpumf_ctr_ctl[i])) continue; - bytes += cpum_cf_ctrset_size(i, &cpuhw->info) * sizeof(u64) + + bytes += cpum_cf_read_setsize(i) * sizeof(u64) + sizeof(((struct s390_ctrset_setdata *)0)->set) + sizeof(((struct s390_ctrset_setdata *)0)->no_cnts); } bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids * (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) + sizeof(((struct s390_ctrset_cpudata *)0)->no_sets)); - put_cpu_ptr(&cpu_cf_events); return bytes; } @@ -1351,7 +1356,7 @@ static void cfset_cpu_read(void *parm) if (!(p->sets & cpumf_ctr_ctl[set])) continue; /* Counter set not in list */ - set_size = cpum_cf_ctrset_size(set, &cpuhw->info); + set_size = cpum_cf_read_setsize(set); space = sizeof(cpuhw->data) - cpuhw->used; space = cfset_cpuset_read(sp, set, set_size, space); if (space) { @@ -1562,16 +1567,13 @@ static void cfdiag_read(struct perf_event *event) static int get_authctrsets(void) { - struct cpu_cf_events *cpuhw; unsigned long auth = 0; enum cpumf_ctr_set i; - cpuhw = &get_cpu_var(cpu_cf_events); for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { - if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i]) + if (cpumf_ctr_info.auth_ctl & cpumf_ctr_ctl[i]) auth |= cpumf_ctr_ctl[i]; } - put_cpu_var(cpu_cf_events); return auth; } @@ -1709,7 +1711,7 @@ static size_t cfdiag_maxsize(struct cpumf_ctr_info *info) enum cpumf_ctr_set i; for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { - size_t size = cpum_cf_ctrset_size(i, info); + size_t size = cpum_cf_read_setsize(i); if (size) max_size += size * sizeof(u64) + @@ -1743,16 +1745,12 @@ static void cfdiag_get_cpu_speed(void) static int cfset_init(void) { - struct cpumf_ctr_info info; size_t need; int rc; - if (qctri(&info)) - return -ENODEV; - cfdiag_get_cpu_speed(); /* Make sure the counter set data fits into predefined buffer. */ - need = cfdiag_maxsize(&info); + need = cfdiag_maxsize(&cpumf_ctr_info); if (need > sizeof(((struct cpu_cf_events *)0)->start)) { pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n", need); -- cgit v1.2.3 From 9ae9b868aeaad377f6315d7196a43e8827e24cb3 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 6 Apr 2023 11:32:18 +0200 Subject: s390/cpum_cf: provide counter number to validate_ctr_version() Function validate_ctr_version() first parameter is a pointer to a large structure, but only member hw_perf_event::config is used. Supply this structure member value in the function invocation. No functional change. Signed-off-by: Thomas Richter Acked-by: Heiko Carstens Acked-by: Sumanth Korikkar Signed-off-by: Vasily Gorbik --- arch/s390/kernel/perf_cpum_cf.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index e235b5b83d3c..ecbb89b76906 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -389,8 +389,7 @@ static enum cpumf_ctr_set get_counter_set(u64 event) return set; } -static int validate_ctr_version(const struct hw_perf_event *hwc, - enum cpumf_ctr_set set) +static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set) { u16 mtdiag_ctl; int err = 0; @@ -404,18 +403,17 @@ static int validate_ctr_version(const struct hw_perf_event *hwc, break; case CPUMF_CTR_SET_CRYPTO: if ((cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5 && - hwc->config > 79) || - (cpumf_ctr_info.csvn >= 6 && hwc->config > 83)) + config > 79) || (cpumf_ctr_info.csvn >= 6 && config > 83)) err = -EOPNOTSUPP; break; case CPUMF_CTR_SET_EXT: if (cpumf_ctr_info.csvn < 1) err = -EOPNOTSUPP; - if ((cpumf_ctr_info.csvn == 1 && hwc->config > 159) || - (cpumf_ctr_info.csvn == 2 && hwc->config > 175) || - (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5 - && hwc->config > 255) || - (cpumf_ctr_info.csvn >= 6 && hwc->config > 287)) + if ((cpumf_ctr_info.csvn == 1 && config > 159) || + (cpumf_ctr_info.csvn == 2 && config > 175) || + (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5 && + config > 255) || + (cpumf_ctr_info.csvn >= 6 && config > 287)) err = -EOPNOTSUPP; break; case CPUMF_CTR_SET_MT_DIAG: @@ -677,7 +675,7 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) /* Finally, validate version and authorization of the counter set */ err = validate_ctr_auth(hwc); if (!err) - err = validate_ctr_version(hwc, set); + err = validate_ctr_version(hwc->config, set); return err; } -- cgit v1.2.3 From 1a33aee1dc2476c5937a0890f2dfc228a165f364 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 6 Apr 2023 11:40:42 +0200 Subject: s390/cpum_cf: remove function validate_ctr_auth() by inline code Remove function validate_ctr_auth() and replace this very small function by its body. No functional change. Signed-off-by: Thomas Richter Acked-by: Heiko Carstens Acked-by: Sumanth Korikkar Signed-off-by: Vasily Gorbik --- arch/s390/kernel/perf_cpum_cf.c | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index ecbb89b76906..cf1b6e8a708d 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -444,21 +444,6 @@ static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set) return err; } -static int validate_ctr_auth(const struct hw_perf_event *hwc) -{ - int err = -ENOENT; - - /* Check authorization for cpu counter sets. - * If the particular CPU counter set is not authorized, - * return with -ENOENT in order to fall back to other - * PMUs that might suffice the event request. - */ - if ((hwc->config_base & cpumf_ctr_info.auth_ctl)) - err = 0; - - return err; -} - /* * Change the CPUMF state to active. * Enable and activate the CPU-counter sets according @@ -596,7 +581,6 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) struct perf_event_attr *attr = &event->attr; struct hw_perf_event *hwc = &event->hw; enum cpumf_ctr_set set; - int err = 0; u64 ev; switch (type) { @@ -672,12 +656,15 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) cpumf_hw_inuse(); event->destroy = hw_perf_event_destroy; - /* Finally, validate version and authorization of the counter set */ - err = validate_ctr_auth(hwc); - if (!err) - err = validate_ctr_version(hwc->config, set); - - return err; + /* + * Finally, validate version and authorization of the counter set. + * If the particular CPU counter set is not authorized, + * return with -ENOENT in order to fall back to other + * PMUs that might suffice the event request. + */ + if (!(hwc->config_base & cpumf_ctr_info.auth_ctl)) + return -ENOENT; + return validate_ctr_version(hwc->config, set); } /* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different -- cgit v1.2.3 From 7a04d491edf4766e7724671355b1ab27cae81a4a Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 3 Apr 2023 08:44:36 +0200 Subject: s390/kexec: turn DAT mode off immediately before purgatory The kernel code is not guaranteed DAT-off mode safe. Turn the DAT mode off immediately before entering the purgatory. Further, to avoid subtle side effects reset the system immediately before turning DAT mode off while making all necessary preparations in advance. Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/machine_kexec.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 2a8e73266428..eb473fc835b2 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -254,14 +254,13 @@ static void __do_machine_kexec(void *data) relocate_kernel_t data_mover; struct kimage *image = data; - s390_reset_system(); data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page); - - __arch_local_irq_stnsm(0xfb); /* disable DAT - avoid no-execute */ - /* Call the moving routine */ diag308_subcode = DIAG308_CLEAR_RESET; if (sclp.has_iplcc) diag308_subcode |= DIAG308_FLAG_EI; + s390_reset_system(); + + __arch_local_irq_stnsm(0xfb); /* disable DAT - avoid no-execute */ (*data_mover)(&image->head, image->start, diag308_subcode); /* Die if kexec returns */ -- cgit v1.2.3 From 86295cb4530dfbc2315ca5997502dad03748e5dd Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 3 Apr 2023 08:44:37 +0200 Subject: s390/kdump: cleanup do_start_kdump() prototype and usage Avoid unnecessary run-time and compile-time type conversions of do_start_kdump() function return value and parameter. Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/machine_kexec.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index eb473fc835b2..3e0fd01a192a 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -111,9 +111,8 @@ static noinline void __machine_kdump(void *image) store_status(__do_machine_kdump, image); } -static unsigned long do_start_kdump(unsigned long addr) +static int do_start_kdump(struct kimage *image) { - struct kimage *image = (struct kimage *) addr; int (*start_kdump)(int) = (void *)image->start; int rc; @@ -134,8 +133,8 @@ static bool kdump_csum_valid(struct kimage *image) int rc; preempt_disable(); - rc = call_on_stack(1, S390_lowcore.nodat_stack, unsigned long, do_start_kdump, - unsigned long, (unsigned long)image); + rc = call_on_stack(1, S390_lowcore.nodat_stack, int, do_start_kdump, + struct kimage *, image); preempt_enable(); return rc == 0; #else -- cgit v1.2.3 From 39218bcf941ab1a5eabbf350c0f7a1f1325434b0 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 3 Apr 2023 08:44:38 +0200 Subject: s390/kdump: fix virtual vs physical address confusion Fix virtual vs physical address confusion (which currently are the same). Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/machine_kexec.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 3e0fd01a192a..fb887674e159 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -29,8 +29,7 @@ #include #include -typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long, - unsigned long); +typedef void (*relocate_kernel_t)(unsigned long, unsigned long, unsigned long); extern const unsigned char relocate_kernel[]; extern const unsigned long long relocate_kernel_len; @@ -58,7 +57,7 @@ static void __do_machine_kdump(void *image) * prefix register of this CPU to zero */ memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA), - (void *)(prefix + __LC_FPREGS_SAVE_AREA), 512); + phys_to_virt(prefix + __LC_FPREGS_SAVE_AREA), 512); __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA); start_kdump = (void *)((struct kimage *) image)->start; @@ -209,7 +208,7 @@ int machine_kexec_prepare(struct kimage *image) return -EINVAL; /* Get the destination where the assembler code should be copied to.*/ - reboot_code_buffer = (void *) page_to_phys(image->control_code_page); + reboot_code_buffer = page_to_virt(image->control_code_page); /* Then copy it */ memcpy(reboot_code_buffer, relocate_kernel, relocate_kernel_len); @@ -249,18 +248,18 @@ void machine_crash_shutdown(struct pt_regs *regs) */ static void __do_machine_kexec(void *data) { - unsigned long diag308_subcode; - relocate_kernel_t data_mover; + unsigned long data_mover, entry, diag308_subcode; struct kimage *image = data; - data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page); + data_mover = page_to_phys(image->control_code_page); + entry = virt_to_phys(&image->head); diag308_subcode = DIAG308_CLEAR_RESET; if (sclp.has_iplcc) diag308_subcode |= DIAG308_FLAG_EI; s390_reset_system(); __arch_local_irq_stnsm(0xfb); /* disable DAT - avoid no-execute */ - (*data_mover)(&image->head, image->start, diag308_subcode); + (*(relocate_kernel_t)data_mover)(entry, image->start, diag308_subcode); /* Die if kexec returns */ disabled_wait(); -- cgit v1.2.3 From 82caf7aba107dbc0e70c330786bed9961a098ab0 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 3 Apr 2023 08:44:39 +0200 Subject: s390/kdump: rework invocation of DAT-off code Calling kdump kernel is a two-step process that involves invocation of the purgatory code: first time - to verify the new kernel checksum and second time - to call the new kernel itself. The purgatory code operates on real addresses and does not expect any memory protection. Therefore, before the purgatory code is entered the DAT mode is always turned off. However, it is only restored upon return from the new kernel checksum verification. In case the purgatory was called to start the new kernel and failed the control is returned to the old kernel, but the DAT mode continues staying off. The new kernel start failure is unlikely and leads to the disabled wait state anyway. Still that poses a risk, since the kernel code in general is not DAT-off safe and even calling the disabled_wait() function might crash. Introduce call_nodat() macro that allows entering DAT-off mode, calling an arbitrary function and restoring DAT mode back on. Switch all invocations of DAT-off code to that macro and avoid the above described scenario altogether. Name the call_nodat() macro in small letters after the already existing call_on_stack() and put it to the same header file. Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens [hca@linux.ibm.com: some small modifications to call_nodat() macro] Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/stacktrace.h | 47 ++++++++++++++++++++++++++++++++++++++ arch/s390/kernel/machine_kexec.c | 28 +++++++++++------------ 2 files changed, 61 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 25e833cd883c..1966422cf030 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -189,4 +189,51 @@ static __always_inline unsigned long get_stack_pointer(struct task_struct *task, (rettype)r2; \ }) +/* + * Use call_nodat() to call a function with DAT disabled. + * Proper sign and zero extension of function arguments is done. + * Usage: + * + * rc = call_nodat(nr, rettype, fn, t1, a1, t2, a2, ...) + * + * - nr specifies the number of function arguments of fn. + * - fn is the function to be called, where fn is a physical address. + * - rettype is the return type of fn. + * - t1, a1, ... are pairs, where t1 must match the type of the first + * argument of fn, t2 the second, etc. a1 is the corresponding + * first function argument (not name), etc. + * + * fn() is called with standard C function call ABI, with the exception + * that no useful stackframe or stackpointer is passed via register 15. + * Therefore the called function must not use r15 to access the stack. + */ +#define call_nodat(nr, rettype, fn, ...) \ +({ \ + rettype (*__fn)(CALL_PARM_##nr(__VA_ARGS__)) = (fn); \ + psw_t psw_enter, psw_leave; \ + CALL_LARGS_##nr(__VA_ARGS__); \ + CALL_REGS_##nr; \ + \ + CALL_TYPECHECK_##nr(__VA_ARGS__); \ + psw_enter.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT; \ + psw_enter.addr = (unsigned long)__fn; \ + asm volatile( \ + " epsw 0,1\n" \ + " risbg 1,0,0,31,32\n" \ + " larl 7,1f\n" \ + " stg 1,%[psw_leave]\n" \ + " stg 7,8+%[psw_leave]\n" \ + " la 7,%[psw_leave]\n" \ + " lra 7,0(7)\n" \ + " larl 1,0f\n" \ + " lra 14,0(1)\n" \ + " lpswe %[psw_enter]\n" \ + "0: lpswe 0(7)\n" \ + "1:\n" \ + : CALL_FMT_##nr, [psw_leave] "=Q" (psw_leave) \ + : [psw_enter] "Q" (psw_enter) \ + : "7", CALL_CLOBBER_##nr); \ + (rettype)r2; \ +}) + #endif /* _ASM_S390_STACKTRACE_H */ diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index fb887674e159..f5d8abf3b4fb 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -30,6 +30,7 @@ #include typedef void (*relocate_kernel_t)(unsigned long, unsigned long, unsigned long); +typedef int (*purgatory_t)(int); extern const unsigned char relocate_kernel[]; extern const unsigned long long relocate_kernel_len; @@ -40,11 +41,14 @@ extern const unsigned long long relocate_kernel_len; * Reset the system, copy boot CPU registers to absolute zero, * and jump to the kdump image */ -static void __do_machine_kdump(void *image) +static void __do_machine_kdump(void *data) { - int (*start_kdump)(int); + struct kimage *image = data; + purgatory_t purgatory; unsigned long prefix; + purgatory = (purgatory_t)image->start; + /* store_status() saved the prefix register to lowcore */ prefix = (unsigned long) S390_lowcore.prefixreg_save_area; @@ -59,11 +63,9 @@ static void __do_machine_kdump(void *image) memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA), phys_to_virt(prefix + __LC_FPREGS_SAVE_AREA), 512); - __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA); - start_kdump = (void *)((struct kimage *) image)->start; - start_kdump(1); + call_nodat(1, int, purgatory, int, 1); - /* Die if start_kdump returns */ + /* Die if kdump returns */ disabled_wait(); } @@ -112,13 +114,9 @@ static noinline void __machine_kdump(void *image) static int do_start_kdump(struct kimage *image) { - int (*start_kdump)(int) = (void *)image->start; - int rc; + purgatory_t purgatory = (purgatory_t)image->start; - __arch_local_irq_stnsm(0xfb); /* disable DAT */ - rc = start_kdump(0); - __arch_local_irq_stosm(0x04); /* enable DAT */ - return rc; + return call_nodat(1, int, purgatory, int, 0); } #endif /* CONFIG_CRASH_DUMP */ @@ -258,8 +256,10 @@ static void __do_machine_kexec(void *data) diag308_subcode |= DIAG308_FLAG_EI; s390_reset_system(); - __arch_local_irq_stnsm(0xfb); /* disable DAT - avoid no-execute */ - (*(relocate_kernel_t)data_mover)(entry, image->start, diag308_subcode); + call_nodat(3, void, (relocate_kernel_t)data_mover, + unsigned long, entry, + unsigned long, image->start, + unsigned long, diag308_subcode); /* Die if kexec returns */ disabled_wait(); -- cgit v1.2.3 From 2d1b21eceaf0765d60b543b2b8e26c2f55517259 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 3 Apr 2023 08:44:40 +0200 Subject: s390/kdump: remove nodat stack restriction for calling nodat functions To allow calling of DAT-off code from kernel the stack needs to be switched to nodat_stack (or other stack mapped as 1:1). Before call_nodat() macro was introduced that was necessary to provide the very same memory address for STNSM and STOSM instructions. If the kernel would stay on a random stack (e.g. a virtually mapped one) then a virtual address provided for STNSM instruction could differ from the physical address needed for the corresponding STOSM instruction. After call_nodat() macro is introduced the kernel stack does not need to be mapped 1:1 anymore, since the macro stores the physical memory address of return PSW in a register before entering DAT-off mode. This way the return LPSWE instruction is able to pick the correct memory location and restore the DAT-on mode. That however might fail in case the 16-byte return PSW happened to cross page boundary: PSW mask and PSW address could end up in two separate non-contiguous physical pages. Align the return PSW on 16-byte boundary so it always fits into a single physical page. As result any stack (including the virtually mapped one) could be used for calling DAT-off code and prior switching to nodat_stack becomes unnecessary. Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/stacktrace.h | 4 +++- arch/s390/kernel/machine_kexec.c | 13 ++----------- 2 files changed, 5 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 1966422cf030..78f7b729b65f 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -210,7 +210,9 @@ static __always_inline unsigned long get_stack_pointer(struct task_struct *task, #define call_nodat(nr, rettype, fn, ...) \ ({ \ rettype (*__fn)(CALL_PARM_##nr(__VA_ARGS__)) = (fn); \ - psw_t psw_enter, psw_leave; \ + /* aligned since psw_leave must not cross page boundary */ \ + psw_t __aligned(16) psw_leave; \ + psw_t psw_enter; \ CALL_LARGS_##nr(__VA_ARGS__); \ CALL_REGS_##nr; \ \ diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index f5d8abf3b4fb..6d9276c096a6 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -112,13 +112,6 @@ static noinline void __machine_kdump(void *image) store_status(__do_machine_kdump, image); } -static int do_start_kdump(struct kimage *image) -{ - purgatory_t purgatory = (purgatory_t)image->start; - - return call_nodat(1, int, purgatory, int, 0); -} - #endif /* CONFIG_CRASH_DUMP */ /* @@ -127,12 +120,10 @@ static int do_start_kdump(struct kimage *image) static bool kdump_csum_valid(struct kimage *image) { #ifdef CONFIG_CRASH_DUMP + purgatory_t purgatory = (purgatory_t)image->start; int rc; - preempt_disable(); - rc = call_on_stack(1, S390_lowcore.nodat_stack, int, do_start_kdump, - struct kimage *, image); - preempt_enable(); + rc = call_nodat(1, int, purgatory, int, 0); return rc == 0; #else return false; -- cgit v1.2.3 From e48b6853d81b6b48dc238d328700536b425c8e70 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 16 Apr 2023 20:15:17 +0200 Subject: s390/kasan: remove override of mem*() functions The kasan mem*() functions are not used anymore since s390 has switched to GENERIC_ENTRY and commit 69d4c0d32186 ("entry, kasan, x86: Disallow overriding mem*() functions"). Therefore remove the now dead code, similar to x86. While at it also use the SYM* macros in mem.S. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/string.h | 15 +++------------ arch/s390/lib/mem.S | 24 +++++++++++++++--------- 2 files changed, 18 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h index 3fae93ddb322..351685de53d2 100644 --- a/arch/s390/include/asm/string.h +++ b/arch/s390/include/asm/string.h @@ -55,18 +55,6 @@ char *strstr(const char *s1, const char *s2); #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) -extern void *__memcpy(void *dest, const void *src, size_t n); -extern void *__memset(void *s, int c, size_t n); -extern void *__memmove(void *dest, const void *src, size_t n); - -/* - * For files that are not instrumented (e.g. mm/slub.c) we - * should use not instrumented version of mem* functions. - */ - -#define memcpy(dst, src, len) __memcpy(dst, src, len) -#define memmove(dst, src, len) __memmove(dst, src, len) -#define memset(s, c, n) __memset(s, c, n) #define strlen(s) __strlen(s) #define __no_sanitize_prefix_strfunc(x) __##x @@ -79,6 +67,9 @@ extern void *__memmove(void *dest, const void *src, size_t n); #define __no_sanitize_prefix_strfunc(x) x #endif /* defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) */ +void *__memcpy(void *dest, const void *src, size_t n); +void *__memset(void *s, int c, size_t n); +void *__memmove(void *dest, const void *src, size_t n); void *__memset16(uint16_t *s, uint16_t v, size_t count); void *__memset32(uint32_t *s, uint32_t v, size_t count); void *__memset64(uint64_t *s, uint64_t v, size_t count); diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index dc0874f2e203..2e0480424f40 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -14,8 +14,7 @@ /* * void *memmove(void *dest, const void *src, size_t n) */ -WEAK(memmove) -ENTRY(__memmove) +SYM_FUNC_START(__memmove) ltgr %r4,%r4 lgr %r1,%r2 jz .Lmemmove_exit @@ -48,7 +47,10 @@ ENTRY(__memmove) BR_EX %r14 .Lmemmove_mvc: mvc 0(1,%r1),0(%r3) -ENDPROC(__memmove) +SYM_FUNC_END(__memmove) +EXPORT_SYMBOL(__memmove) + +SYM_FUNC_ALIAS(memmove, __memmove) EXPORT_SYMBOL(memmove) /* @@ -66,8 +68,7 @@ EXPORT_SYMBOL(memmove) * return __builtin_memset(s, c, n); * } */ -WEAK(memset) -ENTRY(__memset) +SYM_FUNC_START(__memset) ltgr %r4,%r4 jz .Lmemset_exit ltgr %r3,%r3 @@ -111,7 +112,10 @@ ENTRY(__memset) xc 0(1,%r1),0(%r1) .Lmemset_mvc: mvc 1(1,%r1),0(%r1) -ENDPROC(__memset) +SYM_FUNC_END(__memset) +EXPORT_SYMBOL(__memset) + +SYM_FUNC_ALIAS(memset, __memset) EXPORT_SYMBOL(memset) /* @@ -119,8 +123,7 @@ EXPORT_SYMBOL(memset) * * void *memcpy(void *dest, const void *src, size_t n) */ -WEAK(memcpy) -ENTRY(__memcpy) +SYM_FUNC_START(__memcpy) ltgr %r4,%r4 jz .Lmemcpy_exit aghi %r4,-1 @@ -141,7 +144,10 @@ ENTRY(__memcpy) j .Lmemcpy_remainder .Lmemcpy_mvc: mvc 0(1,%r1),0(%r3) -ENDPROC(__memcpy) +SYM_FUNC_END(__memcpy) +EXPORT_SYMBOL(__memcpy) + +SYM_FUNC_ALIAS(memcpy, __memcpy) EXPORT_SYMBOL(memcpy) /* -- cgit v1.2.3 From 45769052ae77c0fb0f846c151bb0a845cf5d4237 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Apr 2023 14:45:04 +0200 Subject: s390/lib: use SYM* macros instead of ENTRY(), etc. Consistently use the SYM* family of macros instead of the deprecated ENTRY(), ENDPROC(), etc. family of macros. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/lib/mem.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index 2e0480424f40..5a9a55de2e10 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -158,7 +158,7 @@ EXPORT_SYMBOL(memcpy) * void *__memset64(uint64_t *s, uint64_t v, size_t count) */ .macro __MEMSET bits,bytes,insn -ENTRY(__memset\bits) +SYM_FUNC_START(__memset\bits) ltgr %r4,%r4 jz .L__memset_exit\bits cghi %r4,\bytes @@ -184,7 +184,7 @@ ENTRY(__memset\bits) BR_EX %r14 .L__memset_mvc\bits: mvc \bytes(1,%r1),0(%r1) -ENDPROC(__memset\bits) +SYM_FUNC_END(__memset\bits) .endm __MEMSET 16,2,sth -- cgit v1.2.3 From ac0c06a1dc8702209e651975ce6f54715f7321c8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Apr 2023 14:45:05 +0200 Subject: s390/amode31: use SYM* macros instead of ENTRY(), etc. Consistently use the SYM* family of macros instead of the deprecated ENTRY(), ENDPROC(), etc. family of macros. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/text_amode31.S | 75 +++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 44 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S index e0f01ce251f5..14c6d25c035f 100644 --- a/arch/s390/kernel/text_amode31.S +++ b/arch/s390/kernel/text_amode31.S @@ -27,7 +27,7 @@ /* * int _diag14_amode31(unsigned long rx, unsigned long ry1, unsigned long subcode) */ -ENTRY(_diag14_amode31) +SYM_FUNC_START(_diag14_amode31) lgr %r1,%r2 lgr %r2,%r3 lgr %r3,%r4 @@ -42,12 +42,12 @@ ENTRY(_diag14_amode31) lgfr %r2,%r5 BR_EX_AMODE31_r14 EX_TABLE_AMODE31(.Ldiag14_ex, .Ldiag14_fault) -ENDPROC(_diag14_amode31) +SYM_FUNC_END(_diag14_amode31) /* * int _diag210_amode31(struct diag210 *addr) */ -ENTRY(_diag210_amode31) +SYM_FUNC_START(_diag210_amode31) lgr %r1,%r2 lhi %r2,-1 sam31 @@ -60,12 +60,12 @@ ENTRY(_diag210_amode31) lgfr %r2,%r2 BR_EX_AMODE31_r14 EX_TABLE_AMODE31(.Ldiag210_ex, .Ldiag210_fault) -ENDPROC(_diag210_amode31) +SYM_FUNC_END(_diag210_amode31) /* * int diag8c(struct diag8c *addr, struct ccw_dev_id *devno, size_t len) */ -ENTRY(_diag8c_amode31) +SYM_FUNC_START(_diag8c_amode31) llgf %r3,0(%r3) sam31 diag %r2,%r4,0x8c @@ -74,11 +74,11 @@ ENTRY(_diag8c_amode31) lgfr %r2,%r3 BR_EX_AMODE31_r14 EX_TABLE_AMODE31(.Ldiag8c_ex, .Ldiag8c_ex) -ENDPROC(_diag8c_amode31) +SYM_FUNC_END(_diag8c_amode31) /* * int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode) */ -ENTRY(_diag26c_amode31) +SYM_FUNC_START(_diag26c_amode31) lghi %r5,-EOPNOTSUPP sam31 diag %r2,%r4,0x26c @@ -87,42 +87,42 @@ ENTRY(_diag26c_amode31) lgfr %r2,%r5 BR_EX_AMODE31_r14 EX_TABLE_AMODE31(.Ldiag26c_ex, .Ldiag26c_ex) -ENDPROC(_diag26c_amode31) +SYM_FUNC_END(_diag26c_amode31) /* * void _diag0c_amode31(struct hypfs_diag0c_entry *entry) */ -ENTRY(_diag0c_amode31) +SYM_FUNC_START(_diag0c_amode31) sam31 diag %r2,%r2,0x0c sam64 BR_EX_AMODE31_r14 -ENDPROC(_diag0c_amode31) +SYM_FUNC_END(_diag0c_amode31) /* * void _diag308_reset_amode31(void) * * Calls diag 308 subcode 1 and continues execution */ -ENTRY(_diag308_reset_amode31) - larl %r4,.Lctlregs # Save control registers +SYM_FUNC_START(_diag308_reset_amode31) + larl %r4,ctlregs # Save control registers stctg %c0,%c15,0(%r4) lg %r2,0(%r4) # Disable lowcore protection nilh %r2,0xefff - larl %r4,.Lctlreg0 + larl %r4,ctlreg0 stg %r2,0(%r4) lctlg %c0,%c0,0(%r4) - larl %r4,.Lfpctl # Floating point control register + larl %r4,fpctl # Floating point control register stfpc 0(%r4) - larl %r4,.Lprefix # Save prefix register + larl %r4,prefix # Save prefix register stpx 0(%r4) - larl %r4,.Lprefix_zero # Set prefix register to 0 + larl %r4,prefix_zero # Set prefix register to 0 spx 0(%r4) - larl %r4,.Lcontinue_psw # Save PSW flags + larl %r4,continue_psw # Save PSW flags epsw %r2,%r3 stm %r2,%r3,0(%r4) larl %r4,.Lrestart_part2 # Setup restart PSW at absolute 0 - larl %r3,.Lrestart_diag308_psw + larl %r3,restart_diag308_psw og %r4,0(%r3) # Save PSW lghi %r3,0 sturg %r4,%r3 # Use sturg, because of large pages @@ -134,39 +134,26 @@ ENTRY(_diag308_reset_amode31) lhi %r1,2 # Use mode 2 = ESAME (dump) sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode sam64 # Switch to 64 bit addressing mode - larl %r4,.Lctlregs # Restore control registers + larl %r4,ctlregs # Restore control registers lctlg %c0,%c15,0(%r4) - larl %r4,.Lfpctl # Restore floating point ctl register + larl %r4,fpctl # Restore floating point ctl register lfpc 0(%r4) - larl %r4,.Lprefix # Restore prefix register + larl %r4,prefix # Restore prefix register spx 0(%r4) - larl %r4,.Lcontinue_psw # Restore PSW flags + larl %r4,continue_psw # Restore PSW flags larl %r2,.Lcontinue stg %r2,8(%r4) lpswe 0(%r4) .Lcontinue: BR_EX_AMODE31_r14 -ENDPROC(_diag308_reset_amode31) +SYM_FUNC_END(_diag308_reset_amode31) .section .amode31.data,"aw",@progbits -.align 8 -.Lrestart_diag308_psw: - .long 0x00080000,0x80000000 - -.align 8 -.Lcontinue_psw: - .quad 0,0 - -.align 8 -.Lctlreg0: - .quad 0 -.Lctlregs: - .rept 16 - .quad 0 - .endr -.Lfpctl: - .long 0 -.Lprefix: - .long 0 -.Lprefix_zero: - .long 0 + .balign 8 +SYM_DATA_LOCAL(restart_diag308_psw, .long 0x00080000,0x80000000) +SYM_DATA_LOCAL(continue_psw, .quad 0,0) +SYM_DATA_LOCAL(ctlreg0, .quad 0) +SYM_DATA_LOCAL(ctlregs, .fill 16,8,0) +SYM_DATA_LOCAL(fpctl, .long 0) +SYM_DATA_LOCAL(prefix, .long 0) +SYM_DATA_LOCAL(prefix_zero, .long 0) -- cgit v1.2.3 From 3e5e5107b7c6efe319069272a68a4d0ec8bce7fb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Apr 2023 14:45:06 +0200 Subject: s390/crypto,chacha: use SYM* macros instead of ENTRY(), etc. Consistently use the SYM* family of macros instead of the deprecated ENTRY(), ENDPROC(), etc. family of macros. Acked-by: Harald Freudenberger Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/crypto/chacha-s390.S | 47 +++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/s390/crypto/chacha-s390.S b/arch/s390/crypto/chacha-s390.S index 9b033622191c..37cb63f25b17 100644 --- a/arch/s390/crypto/chacha-s390.S +++ b/arch/s390/crypto/chacha-s390.S @@ -13,27 +13,28 @@ #define SP %r15 #define FRAME (16 * 8 + 4 * 8) -.data -.align 32 - -.Lsigma: -.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral -.long 1,0,0,0 -.long 2,0,0,0 -.long 3,0,0,0 -.long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap - -.long 0,1,2,3 -.long 0x61707865,0x61707865,0x61707865,0x61707865 # smashed sigma -.long 0x3320646e,0x3320646e,0x3320646e,0x3320646e -.long 0x79622d32,0x79622d32,0x79622d32,0x79622d32 -.long 0x6b206574,0x6b206574,0x6b206574,0x6b206574 + .data + .balign 32 -.previous +SYM_DATA_START_LOCAL(sigma) + .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral + .long 1,0,0,0 + .long 2,0,0,0 + .long 3,0,0,0 + .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap + + .long 0,1,2,3 + .long 0x61707865,0x61707865,0x61707865,0x61707865 # smashed sigma + .long 0x3320646e,0x3320646e,0x3320646e,0x3320646e + .long 0x79622d32,0x79622d32,0x79622d32,0x79622d32 + .long 0x6b206574,0x6b206574,0x6b206574,0x6b206574 +SYM_DATA_END(sigma) + + .previous GEN_BR_THUNK %r14 -.text + .text ############################################################################# # void chacha20_vx_4x(u8 *out, counst u8 *inp, size_t len, @@ -78,10 +79,10 @@ #define XT2 %v29 #define XT3 %v30 -ENTRY(chacha20_vx_4x) +SYM_FUNC_START(chacha20_vx_4x) stmg %r6,%r7,6*8(SP) - larl %r7,.Lsigma + larl %r7,sigma lhi %r0,10 lhi %r1,0 @@ -403,7 +404,7 @@ ENTRY(chacha20_vx_4x) lmg %r6,%r7,6*8(SP) BR_EX %r14 -ENDPROC(chacha20_vx_4x) +SYM_FUNC_END(chacha20_vx_4x) #undef OUT #undef INP @@ -471,7 +472,7 @@ ENDPROC(chacha20_vx_4x) #define T2 %v29 #define T3 %v30 -ENTRY(chacha20_vx) +SYM_FUNC_START(chacha20_vx) clgfi LEN,256 jle chacha20_vx_4x stmg %r6,%r7,6*8(SP) @@ -481,7 +482,7 @@ ENTRY(chacha20_vx) la SP,0(%r1,SP) stg %r0,0(SP) # back-chain - larl %r7,.Lsigma + larl %r7,sigma lhi %r0,10 VLM K1,K2,0,KEY,0 # load key @@ -902,6 +903,6 @@ ENTRY(chacha20_vx) lmg %r6,%r7,FRAME+6*8(SP) la SP,FRAME(SP) BR_EX %r14 -ENDPROC(chacha20_vx) +SYM_FUNC_END(chacha20_vx) .previous -- cgit v1.2.3 From 4b788ac8ed39686f7810733c16521a7b91e005c1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Apr 2023 14:45:07 +0200 Subject: s390/crc32be: use SYM* macros instead of ENTRY(), etc. Consistently use the SYM* family of macros instead of the deprecated ENTRY(), ENDPROC(), etc. family of macros. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/crypto/crc32be-vx.S | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S index 6b3d1009c392..6ea17628ea10 100644 --- a/arch/s390/crypto/crc32be-vx.S +++ b/arch/s390/crypto/crc32be-vx.S @@ -24,8 +24,8 @@ #define CONST_RU_POLY %v13 #define CONST_CRC_POLY %v14 -.data -.align 8 + .data + .balign 8 /* * The CRC-32 constant block contains reduction constants to fold and @@ -58,19 +58,20 @@ * P'(x) = 0xEDB88320 */ -.Lconstants_CRC_32_BE: +SYM_DATA_START_LOCAL(constants_CRC_32_BE) .quad 0x08833794c, 0x0e6228b11 # R1, R2 .quad 0x0c5b9cd4c, 0x0e8a45605 # R3, R4 .quad 0x0f200aa66, 1 << 32 # R5, x32 .quad 0x0490d678d, 1 # R6, 1 .quad 0x104d101df, 0 # u .quad 0x104C11DB7, 0 # P(x) +SYM_DATA_END(constants_CRC_32_BE) -.previous + .previous GEN_BR_THUNK %r14 -.text + .text /* * The CRC-32 function(s) use these calling conventions: * @@ -90,9 +91,9 @@ * * V9..V14: CRC-32 constants. */ -ENTRY(crc32_be_vgfm_16) +SYM_FUNC_START(crc32_be_vgfm_16) /* Load CRC-32 constants */ - larl %r5,.Lconstants_CRC_32_BE + larl %r5,constants_CRC_32_BE VLM CONST_R1R2,CONST_CRC_POLY,0,%r5 /* Load the initial CRC value into the leftmost word of V0. */ @@ -207,6 +208,6 @@ ENTRY(crc32_be_vgfm_16) .Ldone: VLGVF %r2,%v2,3 BR_EX %r14 -ENDPROC(crc32_be_vgfm_16) +SYM_FUNC_END(crc32_be_vgfm_16) .previous -- cgit v1.2.3 From b5f3c99d15725d78a2e720ca48ab47a43bd1b5cb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Apr 2023 14:45:08 +0200 Subject: s390/crc32le: use SYM* macros instead of ENTRY(), etc. Consistently use the SYM* family of macros instead of the deprecated ENTRY(), ENDPROC(), etc. family of macros. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/crypto/crc32le-vx.S | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S index 71caf0f4ec08..5a819ae09a0b 100644 --- a/arch/s390/crypto/crc32le-vx.S +++ b/arch/s390/crypto/crc32le-vx.S @@ -25,8 +25,8 @@ #define CONST_RU_POLY %v13 #define CONST_CRC_POLY %v14 -.data -.align 8 + .data + .balign 8 /* * The CRC-32 constant block contains reduction constants to fold and @@ -59,27 +59,29 @@ * P'(x) = 0x82F63B78 */ -.Lconstants_CRC_32_LE: +SYM_DATA_START_LOCAL(constants_CRC_32_LE) .octa 0x0F0E0D0C0B0A09080706050403020100 # BE->LE mask .quad 0x1c6e41596, 0x154442bd4 # R2, R1 .quad 0x0ccaa009e, 0x1751997d0 # R4, R3 .octa 0x163cd6124 # R5 .octa 0x1F7011641 # u' .octa 0x1DB710641 # P'(x) << 1 +SYM_DATA_END(constants_CRC_32_LE) -.Lconstants_CRC_32C_LE: +SYM_DATA_START_LOCAL(constants_CRC_32C_LE) .octa 0x0F0E0D0C0B0A09080706050403020100 # BE->LE mask .quad 0x09e4addf8, 0x740eef02 # R2, R1 .quad 0x14cd00bd6, 0xf20c0dfe # R4, R3 .octa 0x0dd45aab8 # R5 .octa 0x0dea713f1 # u' .octa 0x105ec76f0 # P'(x) << 1 +SYM_DATA_END(constants_CRC_32C_LE) -.previous + .previous GEN_BR_THUNK %r14 -.text + .text /* * The CRC-32 functions use these calling conventions: @@ -102,17 +104,17 @@ * V10..V14: CRC-32 constants. */ -ENTRY(crc32_le_vgfm_16) - larl %r5,.Lconstants_CRC_32_LE +SYM_FUNC_START(crc32_le_vgfm_16) + larl %r5,constants_CRC_32_LE j crc32_le_vgfm_generic -ENDPROC(crc32_le_vgfm_16) +SYM_FUNC_END(crc32_le_vgfm_16) -ENTRY(crc32c_le_vgfm_16) - larl %r5,.Lconstants_CRC_32C_LE +SYM_FUNC_START(crc32c_le_vgfm_16) + larl %r5,constants_CRC_32C_LE j crc32_le_vgfm_generic -ENDPROC(crc32c_le_vgfm_16) +SYM_FUNC_END(crc32c_le_vgfm_16) -ENTRY(crc32_le_vgfm_generic) +SYM_FUNC_START(crc32_le_vgfm_generic) /* Load CRC-32 constants */ VLM CONST_PERM_LE2BE,CONST_CRC_POLY,0,%r5 @@ -268,6 +270,6 @@ ENTRY(crc32_le_vgfm_generic) .Ldone: VLGVF %r2,%v2,2 BR_EX %r14 -ENDPROC(crc32_le_vgfm_generic) +SYM_FUNC_END(crc32_le_vgfm_generic) .previous -- cgit v1.2.3 From aaaac068f0d2d137b7fcad47d4cebbd24bf060ae Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Apr 2023 14:45:09 +0200 Subject: s390/mcount: use SYM* macros instead of ENTRY(), etc. Consistently use the SYM* family of macros instead of the deprecated ENTRY(), ENDPROC(), etc. family of macros. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/mcount.S | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 4c4ee762f515..a7902fdb7ba2 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -28,9 +28,9 @@ .section .kprobes.text, "ax" -ENTRY(ftrace_stub) +SYM_FUNC_START(ftrace_stub) BR_EX %r14 -ENDPROC(ftrace_stub) +SYM_FUNC_END(ftrace_stub) .macro ftrace_regs_entry, allregs=0 stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller @@ -153,8 +153,7 @@ SYM_CODE_END(ftrace_shared_hotpatch_trampoline_exrl) #ifdef CONFIG_RETHOOK -SYM_FUNC_START(arch_rethook_trampoline) - +SYM_CODE_START(arch_rethook_trampoline) stg %r14,(__SF_GPRS+8*8)(%r15) lay %r15,-STACK_FRAME_SIZE(%r15) stmg %r0,%r14,STACK_PTREGS_GPRS(%r15) @@ -177,7 +176,6 @@ SYM_FUNC_START(arch_rethook_trampoline) mvc __SF_EMPTY(16,%r7),STACK_PTREGS_PSW(%r15) lmg %r0,%r15,STACK_PTREGS_GPRS(%r15) lpswe __SF_EMPTY(%r15) - -SYM_FUNC_END(arch_rethook_trampoline) +SYM_CODE_END(arch_rethook_trampoline) #endif /* CONFIG_RETHOOK */ -- cgit v1.2.3 From a89d60fc7a9ad544a30251d238a5f35d023754d1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Apr 2023 14:45:10 +0200 Subject: s390/earlypgm: use SYM* macros instead of ENTRY(), etc. Consistently use the SYM* family of macros instead of the deprecated ENTRY(), ENDPROC(), etc. family of macros. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/earlypgm.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/earlypgm.S b/arch/s390/kernel/earlypgm.S index f521c6da37b8..c634871f0d90 100644 --- a/arch/s390/kernel/earlypgm.S +++ b/arch/s390/kernel/earlypgm.S @@ -7,7 +7,7 @@ #include #include -ENTRY(early_pgm_check_handler) +SYM_CODE_START(early_pgm_check_handler) stmg %r8,%r15,__LC_SAVE_AREA_SYNC aghi %r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE) la %r11,STACK_FRAME_OVERHEAD(%r15) @@ -20,4 +20,4 @@ ENTRY(early_pgm_check_handler) mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) lpswe __LC_RETURN_PSW -ENDPROC(early_pgm_check_handler) +SYM_CODE_END(early_pgm_check_handler) -- cgit v1.2.3 From 05d0935d12966686eb1d780e3f929727e9fa500d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Apr 2023 14:45:11 +0200 Subject: s390/head64: use SYM* macros instead of ENTRY(), etc. Consistently use the SYM* family of macros instead of the deprecated ENTRY(), ENDPROC(), etc. family of macros. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/head64.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index f67982f562db..df77ba102096 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -16,7 +16,7 @@ #include __HEAD -ENTRY(startup_continue) +SYM_CODE_START(startup_continue) larl %r1,tod_clock_base mvc 0(16,%r1),__LC_BOOT_CLOCK # @@ -33,8 +33,8 @@ ENTRY(startup_continue) # We returned from start_kernel ?!? PANIK # basr %r13,0 - lpswe .Ldw-.(%r13) # load disabled wait psw + lpswe dw_psw-.(%r13) # load disabled wait psw +SYM_CODE_END(startup_continue) .align 16 -.LPG1: -.Ldw: .quad 0x0002000180000000,0x0000000000000000 +SYM_DATA_LOCAL(dw_psw, .quad 0x0002000180000000,0x0000000000000000) -- cgit v1.2.3 From 26d14299220ab8e1d4fb166d8e66dd4a43097ec1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Apr 2023 14:45:12 +0200 Subject: s390/reipl: use SYM* macros instead of ENTRY(), etc. Consistently use the SYM* family of macros instead of the deprecated ENTRY(), ENDPROC(), etc. family of macros. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/reipl.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 4a22163962eb..88087a32ebc6 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -19,7 +19,7 @@ # r2 = Function to be called after store status # r3 = Parameter for function # -ENTRY(store_status) +SYM_CODE_START(store_status) /* Save register one and load save area base */ stg %r1,__LC_SAVE_AREA_RESTART /* General purpose registers */ @@ -61,7 +61,7 @@ ENTRY(store_status) stpx 0(%r1) /* Clock comparator - seven bytes */ lghi %r1,__LC_CLOCK_COMP_SAVE_AREA - larl %r4,.Lclkcmp + larl %r4,clkcmp stckc 0(%r4) mvc 1(7,%r1),1(%r4) /* Program status word */ @@ -73,9 +73,9 @@ ENTRY(store_status) lgr %r9,%r2 lgr %r2,%r3 BR_EX %r9 -ENDPROC(store_status) +SYM_CODE_END(store_status) .section .bss - .align 8 -.Lclkcmp: .quad 0x0000000000000000 + .balign 8 +SYM_DATA_LOCAL(clkcmp, .quad 0x0000000000000000) .previous -- cgit v1.2.3 From 6cea5f0bc93dffc74f0fa8cad7680b5ad21d1961 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Apr 2023 14:45:13 +0200 Subject: s390/kprobes: use SYM* macros instead of ENTRY(), etc. Consistently use the SYM* family of macros instead of the deprecated ENTRY(), ENDPROC(), etc. family of macros. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/kprobes_insn_page.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/kprobes_insn_page.S b/arch/s390/kernel/kprobes_insn_page.S index f6cb022ef8c8..b6335296dcd8 100644 --- a/arch/s390/kernel/kprobes_insn_page.S +++ b/arch/s390/kernel/kprobes_insn_page.S @@ -14,9 +14,9 @@ */ .section .kprobes.text, "ax" .align 4096 -ENTRY(kprobes_insn_page) +SYM_CODE_START(kprobes_insn_page) .rept 2048 .word 0x07fe .endr -ENDPROC(kprobes_insn_page) +SYM_CODE_END(kprobes_insn_page) .previous -- cgit v1.2.3 From 04b6d02dbecdda5d15aa55edbae2f0e1eb81749e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Apr 2023 14:45:14 +0200 Subject: s390/purgatory: use SYM* macros instead of ENTRY(), etc. Consistently use the SYM* family of macros instead of the deprecated ENTRY(), ENDPROC(), etc. family of macros. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/purgatory/head.S | 62 +++++++++++------------------------ arch/s390/purgatory/kexec-purgatory.S | 14 ++++---- 2 files changed, 25 insertions(+), 51 deletions(-) (limited to 'arch') diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S index 6f835124ee82..e5bd1a503528 100644 --- a/arch/s390/purgatory/head.S +++ b/arch/s390/purgatory/head.S @@ -76,9 +76,9 @@ diag %r0,%r1,0x308 .endm -.text -.align PAGE_SIZE -ENTRY(purgatory_start) + .text + .balign PAGE_SIZE +SYM_CODE_START(purgatory_start) /* The purgatory might be called after a diag308 so better set * architecture and addressing mode. */ @@ -245,45 +245,21 @@ ENTRY(purgatory_start) /* start crash kernel */ START_NEXT_KERNEL .base_dst 1 - - -load_psw_mask: - .long 0x00080000,0x80000000 - - .align 8 -disabled_wait_psw: - .quad 0x0002000180000000 - .quad 0x0000000000000000 + .do_checksum_verification - -gprregs: - .rept 10 - .quad 0 - .endr - -/* Macro to define a global variable with name and size (in bytes) to be - * shared with C code. - * - * Add the .size and .type attribute to satisfy checks on the Elf_Sym during - * purgatory load. - */ -.macro GLOBAL_VARIABLE name,size -\name: - .global \name - .size \name,\size - .type \name,object - .skip \size,0 -.endm - -GLOBAL_VARIABLE purgatory_sha256_digest,32 -GLOBAL_VARIABLE purgatory_sha_regions,16*__KEXEC_SHA_REGION_SIZE -GLOBAL_VARIABLE kernel_entry,8 -GLOBAL_VARIABLE kernel_type,8 -GLOBAL_VARIABLE crash_start,8 -GLOBAL_VARIABLE crash_size,8 - - .align PAGE_SIZE -stack: +SYM_CODE_END(purgatory_start) + +SYM_DATA_LOCAL(load_psw_mask, .long 0x00080000,0x80000000) + .balign 8 +SYM_DATA_LOCAL(disabled_wait_psw, .quad 0x0002000180000000,.do_checksum_verification) +SYM_DATA_LOCAL(gprregs, .fill 10,8,0) +SYM_DATA(purgatory_sha256_digest, .skip 32) +SYM_DATA(purgatory_sha_regions, .skip 16*__KEXEC_SHA_REGION_SIZE) +SYM_DATA(kernel_entry, .skip 8) +SYM_DATA(kernel_type, .skip 8) +SYM_DATA(crash_start, .skip 8) +SYM_DATA(crash_size, .skip 8) + .balign PAGE_SIZE +SYM_DATA_START_LOCAL(stack) /* The buffer to move this code must be as big as the code. */ .skip stack-purgatory_start - .align PAGE_SIZE -purgatory_end: + .balign PAGE_SIZE +SYM_DATA_END_LABEL(stack, SYM_L_LOCAL, purgatory_end) diff --git a/arch/s390/purgatory/kexec-purgatory.S b/arch/s390/purgatory/kexec-purgatory.S index 8293753100ae..25f512b1de12 100644 --- a/arch/s390/purgatory/kexec-purgatory.S +++ b/arch/s390/purgatory/kexec-purgatory.S @@ -1,14 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include .section .rodata, "a" - .align 8 -kexec_purgatory: - .globl kexec_purgatory + .balign 8 +SYM_DATA_START(kexec_purgatory) .incbin "arch/s390/purgatory/purgatory.ro" -.Lkexec_purgatroy_end: +SYM_DATA_END_LABEL(kexec_purgatory, SYM_L_LOCAL, kexec_purgatory_end) - .align 8 -kexec_purgatory_size: - .globl kexec_purgatory_size - .quad .Lkexec_purgatroy_end - kexec_purgatory + .balign 8 +SYM_DATA(kexec_purgatory_size, .quad kexec_purgatory_end-kexec_purgatory) -- cgit v1.2.3 From fda1dffa44b0ea657e413f1e548022e3f0269592 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Apr 2023 14:45:15 +0200 Subject: s390/entry: use SYM* macros instead of ENTRY(), etc. Consistently use the SYM* family of macros instead of the deprecated ENTRY(), ENDPROC(), etc. family of macros. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 69 ++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index d5f8cd4319a4..e5b6c1369e8e 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -177,7 +177,7 @@ _LPP_OFFSET = __LC_LPP * Returns: * gpr2 = prev */ -ENTRY(__switch_to) +SYM_FUNC_START(__switch_to) stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task lghi %r4,__TASK_stack lghi %r1,__TASK_thread @@ -193,7 +193,7 @@ ENTRY(__switch_to) lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40 BR_EX %r14 -ENDPROC(__switch_to) +SYM_FUNC_END(__switch_to) #if IS_ENABLED(CONFIG_KVM) /* @@ -202,7 +202,7 @@ ENDPROC(__switch_to) * %r3 pointer to sie control block virt * %r4 guest register save area */ -ENTRY(__sie64a) +SYM_FUNC_START(__sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers lg %r12,__LC_CURRENT stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical.. @@ -248,8 +248,7 @@ ENTRY(__sie64a) nopr 7 .Lrewind_pad2: nopr 7 - .globl sie_exit -sie_exit: +SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL) lg %r14,__SF_SIE_SAVEAREA(%r15) # load guest register save area stmg %r0,%r13,0(%r14) # save guest gprs 0-13 xgr %r0,%r0 # clear guest registers to @@ -269,7 +268,7 @@ sie_exit: EX_TABLE(.Lrewind_pad4,.Lsie_fault) EX_TABLE(.Lrewind_pad2,.Lsie_fault) EX_TABLE(sie_exit,.Lsie_fault) -ENDPROC(__sie64a) +SYM_FUNC_END(__sie64a) EXPORT_SYMBOL(__sie64a) EXPORT_SYMBOL(sie_exit) #endif @@ -279,7 +278,7 @@ EXPORT_SYMBOL(sie_exit) * are entered with interrupts disabled. */ -ENTRY(system_call) +SYM_CODE_START(system_call) stpt __LC_SYS_ENTER_TIMER stmg %r8,%r15,__LC_SAVE_AREA_SYNC BPOFF @@ -314,12 +313,12 @@ ENTRY(system_call) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) stpt __LC_EXIT_TIMER LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE -ENDPROC(system_call) +SYM_CODE_END(system_call) # # a new process exits the kernel with ret_from_fork # -ENTRY(ret_from_fork) +SYM_CODE_START(ret_from_fork) lgr %r3,%r11 brasl %r14,__ret_from_fork STACKLEAK_ERASE @@ -330,13 +329,13 @@ ENTRY(ret_from_fork) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) stpt __LC_EXIT_TIMER LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE -ENDPROC(ret_from_fork) +SYM_CODE_END(ret_from_fork) /* * Program check handler routine */ -ENTRY(pgm_check_handler) +SYM_CODE_START(pgm_check_handler) stpt __LC_SYS_ENTER_TIMER BPOFF stmg %r8,%r15,__LC_SAVE_AREA_SYNC @@ -403,13 +402,13 @@ ENTRY(pgm_check_handler) lghi %r14,1 LBEAR __LC_PGM_LAST_BREAK LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per -ENDPROC(pgm_check_handler) +SYM_CODE_END(pgm_check_handler) /* * Interrupt handler macro used for external and IO interrupts. */ .macro INT_HANDLER name,lc_old_psw,handler -ENTRY(\name) +SYM_CODE_START(\name) stckf __LC_INT_CLOCK stpt __LC_SYS_ENTER_TIMER STBEAR __LC_LAST_BREAK @@ -456,7 +455,7 @@ ENTRY(\name) 2: LBEAR __PT_LAST_BREAK(%r11) lmg %r0,%r15,__PT_R0(%r11) LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE -ENDPROC(\name) +SYM_CODE_END(\name) .endm INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq @@ -465,7 +464,7 @@ INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq /* * Load idle PSW. */ -ENTRY(psw_idle) +SYM_FUNC_START(psw_idle) stg %r14,(__SF_GPRS+8*8)(%r15) stg %r3,__SF_EMPTY(%r15) larl %r1,psw_idle_exit @@ -481,15 +480,14 @@ ENTRY(psw_idle) stckf __CLOCK_IDLE_ENTER(%r2) stpt __TIMER_IDLE_ENTER(%r2) lpswe __SF_EMPTY(%r15) -.globl psw_idle_exit -psw_idle_exit: +SYM_INNER_LABEL(psw_idle_exit, SYM_L_GLOBAL) BR_EX %r14 -ENDPROC(psw_idle) +SYM_FUNC_END(psw_idle) /* * Machine check handler routines */ -ENTRY(mcck_int_handler) +SYM_CODE_START(mcck_int_handler) stckf __LC_MCCK_CLOCK BPOFF la %r1,4095 # validate r1 @@ -573,10 +571,10 @@ ENTRY(mcck_int_handler) */ lhi %r5,0 lhi %r6,1 - larl %r7,.Lstop_lock + larl %r7,stop_lock cs %r5,%r6,0(%r7) # single CPU-stopper only jnz 4f - larl %r7,.Lthis_cpu + larl %r7,this_cpu stap 0(%r7) # this CPU address lh %r4,0(%r7) nilh %r4,0 @@ -592,15 +590,15 @@ ENTRY(mcck_int_handler) 3: sigp %r1,%r4,SIGP_STOP # stop this CPU brc SIGP_CC_BUSY,3b 4: j 4b -ENDPROC(mcck_int_handler) +SYM_CODE_END(mcck_int_handler) -ENTRY(restart_int_handler) +SYM_CODE_START(restart_int_handler) ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40 stg %r15,__LC_SAVE_AREA_RESTART TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4 jz 0f lctlg %c0,%c15,__LC_CREGS_SAVE_AREA -0: larl %r15,.Lstosm_tmp +0: larl %r15,stosm_tmp stosm 0(%r15),0x04 # turn dat on, keep irqs off lg %r15,__LC_RESTART_STACK xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15) @@ -621,7 +619,7 @@ ENTRY(restart_int_handler) 2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu brc 2,2b 3: j 3b -ENDPROC(restart_int_handler) +SYM_CODE_END(restart_int_handler) .section .kprobes.text, "ax" @@ -631,7 +629,7 @@ ENDPROC(restart_int_handler) * No need to properly save the registers, we are going to panic anyway. * Setup a pt_regs so that show_trace can provide a good call trace. */ -ENTRY(stack_overflow) +SYM_CODE_START(stack_overflow) lg %r15,__LC_NODAT_STACK # change to panic stack la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) @@ -641,26 +639,27 @@ ENTRY(stack_overflow) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs jg kernel_stack_overflow -ENDPROC(stack_overflow) +SYM_CODE_END(stack_overflow) #endif .section .data, "aw" - .align 4 -.Lstop_lock: .long 0 -.Lthis_cpu: .short 0 -.Lstosm_tmp: .byte 0 + .balign 4 +SYM_DATA_LOCAL(stop_lock, .long 0) +SYM_DATA_LOCAL(this_cpu, .short 0) +SYM_DATA_LOCAL(stosm_tmp, .byte 0) + .section .rodata, "a" #define SYSCALL(esame,emu) .quad __s390x_ ## esame - .globl sys_call_table -sys_call_table: +SYM_DATA_START(sys_call_table) #include "asm/syscall_table.h" +SYM_DATA_END(sys_call_table) #undef SYSCALL #ifdef CONFIG_COMPAT #define SYSCALL(esame,emu) .quad __s390_ ## emu - .globl sys_call_table_emu -sys_call_table_emu: +SYM_DATA_START(sys_call_table_emu) #include "asm/syscall_table.h" +SYM_DATA_END(sys_call_table_emu) #undef SYSCALL #endif -- cgit v1.2.3 From 680957b3b8b0b2e8697592fcb77369d4f643908a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Apr 2023 14:45:16 +0200 Subject: s390/relocate_kernel: use SYM* macros instead of ENTRY(), etc. Consistently use the SYM* family of macros instead of the deprecated ENTRY(), ENDPROC(), etc. family of macros. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/relocate_kernel.S | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S index a9a1a6f45375..89a3efce0500 100644 --- a/arch/s390/kernel/relocate_kernel.S +++ b/arch/s390/kernel/relocate_kernel.S @@ -26,7 +26,7 @@ */ .text -ENTRY(relocate_kernel) +SYM_CODE_START(relocate_kernel) basr %r13,0 # base address .base: lghi %r7,PAGE_SIZE # load PAGE_SIZE in r7 @@ -66,13 +66,11 @@ ENTRY(relocate_kernel) mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0 .diag: diag %r0,%r0,0x308 -ENDPROC(relocate_kernel) +SYM_CODE_END(relocate_kernel) - .align 8 - load_psw: + .balign 8 +SYM_DATA_START_LOCAL(load_psw) .long 0x00080000,0x80000000 - relocate_kernel_end: - .align 8 - .globl relocate_kernel_len - relocate_kernel_len: - .quad relocate_kernel_end - relocate_kernel +SYM_DATA_END_LABEL(load_psw, SYM_L_LOCAL, relocate_kernel_end) + .balign 8 +SYM_DATA(relocate_kernel_len, .quad relocate_kernel_end - relocate_kernel) -- cgit v1.2.3 From 0ae241f4d7d0003f091df6f8d1f45767c3d96d62 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Apr 2023 14:45:17 +0200 Subject: s390/relocate_kernel: adjust indentation relocate_kernel.S seems to be the only assembler file which doesn't follow the standard way of indentation. Adjust this for the sake of consistency. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/relocate_kernel.S | 84 +++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 42 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S index 89a3efce0500..0ae297c82afd 100644 --- a/arch/s390/kernel/relocate_kernel.S +++ b/arch/s390/kernel/relocate_kernel.S @@ -27,50 +27,50 @@ .text SYM_CODE_START(relocate_kernel) - basr %r13,0 # base address - .base: - lghi %r7,PAGE_SIZE # load PAGE_SIZE in r7 - lghi %r9,PAGE_SIZE # load PAGE_SIZE in r9 - lg %r5,0(%r2) # read another word for indirection page - aghi %r2,8 # increment pointer - tml %r5,0x1 # is it a destination page? - je .indir_check # NO, goto "indir_check" - lgr %r6,%r5 # r6 = r5 - nill %r6,0xf000 # mask it out and... - j .base # ...next iteration - .indir_check: - tml %r5,0x2 # is it a indirection page? - je .done_test # NO, goto "done_test" - nill %r5,0xf000 # YES, mask out, - lgr %r2,%r5 # move it into the right register, - j .base # and read next... - .done_test: - tml %r5,0x4 # is it the done indicator? - je .source_test # NO! Well, then it should be the source indicator... - j .done # ok, lets finish it here... - .source_test: - tml %r5,0x8 # it should be a source indicator... - je .base # NO, ignore it... - lgr %r8,%r5 # r8 = r5 - nill %r8,0xf000 # masking - 0: mvcle %r6,%r8,0x0 # copy PAGE_SIZE bytes from r8 to r6 - pad with 0 - jo 0b - j .base - .done: - lgr %r0,%r4 # subcode - cghi %r3,0 - je .diag - la %r4,load_psw-.base(%r13) # load psw-address into the register - o %r3,4(%r4) # or load address into psw - st %r3,4(%r4) - mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0 - .diag: - diag %r0,%r0,0x308 + basr %r13,0 # base address +.base: + lghi %r7,PAGE_SIZE # load PAGE_SIZE in r7 + lghi %r9,PAGE_SIZE # load PAGE_SIZE in r9 + lg %r5,0(%r2) # read another word for indirection page + aghi %r2,8 # increment pointer + tml %r5,0x1 # is it a destination page? + je .indir_check # NO, goto "indir_check" + lgr %r6,%r5 # r6 = r5 + nill %r6,0xf000 # mask it out and... + j .base # ...next iteration +.indir_check: + tml %r5,0x2 # is it a indirection page? + je .done_test # NO, goto "done_test" + nill %r5,0xf000 # YES, mask out, + lgr %r2,%r5 # move it into the right register, + j .base # and read next... +.done_test: + tml %r5,0x4 # is it the done indicator? + je .source_test # NO! Well, then it should be the source indicator... + j .done # ok, lets finish it here... +.source_test: + tml %r5,0x8 # it should be a source indicator... + je .base # NO, ignore it... + lgr %r8,%r5 # r8 = r5 + nill %r8,0xf000 # masking +0: mvcle %r6,%r8,0x0 # copy PAGE_SIZE bytes from r8 to r6 - pad with 0 + jo 0b + j .base +.done: + lgr %r0,%r4 # subcode + cghi %r3,0 + je .diag + la %r4,load_psw-.base(%r13) # load psw-address into the register + o %r3,4(%r4) # or load address into psw + st %r3,4(%r4) + mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0 +.diag: + diag %r0,%r0,0x308 SYM_CODE_END(relocate_kernel) - .balign 8 + .balign 8 SYM_DATA_START_LOCAL(load_psw) - .long 0x00080000,0x80000000 + .long 0x00080000,0x80000000 SYM_DATA_END_LABEL(load_psw, SYM_L_LOCAL, relocate_kernel_end) - .balign 8 + .balign 8 SYM_DATA(relocate_kernel_len, .quad relocate_kernel_end - relocate_kernel) -- cgit v1.2.3 From 17c51b1ba9c2d4b497349ac1622aafe67be16103 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 14 Apr 2023 14:30:43 +0200 Subject: s390/mm: use BIT macro to generate SET_MEMORY bit masks Use BIT macro to generate SET_MEMORY bit masks, which is easier to maintain if bits get added, or removed. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/set_memory.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h index 25f2464dbb7e..ec3f44c602eb 100644 --- a/arch/s390/include/asm/set_memory.h +++ b/arch/s390/include/asm/set_memory.h @@ -6,11 +6,19 @@ extern struct mutex cpa_mutex; -#define SET_MEMORY_RO 1UL -#define SET_MEMORY_RW 2UL -#define SET_MEMORY_NX 4UL -#define SET_MEMORY_X 8UL -#define SET_MEMORY_4K 16UL +enum { + _SET_MEMORY_RO_BIT, + _SET_MEMORY_RW_BIT, + _SET_MEMORY_NX_BIT, + _SET_MEMORY_X_BIT, + _SET_MEMORY_4K_BIT, +}; + +#define SET_MEMORY_RO BIT(_SET_MEMORY_RO_BIT) +#define SET_MEMORY_RW BIT(_SET_MEMORY_RW_BIT) +#define SET_MEMORY_NX BIT(_SET_MEMORY_NX_BIT) +#define SET_MEMORY_X BIT(_SET_MEMORY_X_BIT) +#define SET_MEMORY_4K BIT(_SET_MEMORY_4K_BIT) int __set_memory(unsigned long addr, int numpages, unsigned long flags); -- cgit v1.2.3 From 0490d6d7ba0a479fdd805da54ae25220ce5b514d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 14 Apr 2023 14:30:44 +0200 Subject: s390/mm: enable ARCH_HAS_SET_DIRECT_MAP Implement the set_direct_map_*() API, which allows to invalidate and set default permissions to pages within the direct mapping. Note that kernel_page_present(), which is also supposed to be part of this API, is intentionally not implemented. The reason for this is that kernel_page_present() is only used (and currently only makes sense) for suspend/resume, which isn't supported on s390. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 + arch/s390/include/asm/set_memory.h | 7 +++++++ arch/s390/mm/pageattr.c | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a7568449ca59..37986abfe007 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -77,6 +77,7 @@ config S390 select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SCALED_CPUTIME + select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h index ec3f44c602eb..7a3eefd7a242 100644 --- a/arch/s390/include/asm/set_memory.h +++ b/arch/s390/include/asm/set_memory.h @@ -12,6 +12,8 @@ enum { _SET_MEMORY_NX_BIT, _SET_MEMORY_X_BIT, _SET_MEMORY_4K_BIT, + _SET_MEMORY_INV_BIT, + _SET_MEMORY_DEF_BIT, }; #define SET_MEMORY_RO BIT(_SET_MEMORY_RO_BIT) @@ -19,6 +21,8 @@ enum { #define SET_MEMORY_NX BIT(_SET_MEMORY_NX_BIT) #define SET_MEMORY_X BIT(_SET_MEMORY_X_BIT) #define SET_MEMORY_4K BIT(_SET_MEMORY_4K_BIT) +#define SET_MEMORY_INV BIT(_SET_MEMORY_INV_BIT) +#define SET_MEMORY_DEF BIT(_SET_MEMORY_DEF_BIT) int __set_memory(unsigned long addr, int numpages, unsigned long flags); @@ -58,4 +62,7 @@ static inline int set_memory_4k(unsigned long addr, int numpages) return __set_memory(addr, numpages, SET_MEMORY_4K); } +int set_direct_map_invalid_noflush(struct page *page); +int set_direct_map_default_noflush(struct page *page); + #endif diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 77f31791044d..0b196dea2d92 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -4,6 +4,7 @@ * Author(s): Jan Glauber */ #include +#include #include #include #include @@ -101,6 +102,14 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC)); else if (flags & SET_MEMORY_X) new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC)); + if (flags & SET_MEMORY_INV) { + new = set_pte_bit(new, __pgprot(_PAGE_INVALID)); + } else if (flags & SET_MEMORY_DEF) { + new = __pte(pte_val(new) & PAGE_MASK); + new = set_pte_bit(new, PAGE_KERNEL); + if (!MACHINE_HAS_NX) + new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC)); + } pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE); ptep++; addr += PAGE_SIZE; @@ -151,6 +160,14 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); else if (flags & SET_MEMORY_X) new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); + if (flags & SET_MEMORY_INV) { + new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID)); + } else if (flags & SET_MEMORY_DEF) { + new = __pmd(pmd_val(new) & PMD_MASK); + new = set_pmd_bit(new, SEGMENT_KERNEL); + if (!MACHINE_HAS_NX) + new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); + } pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); } @@ -232,6 +249,14 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr, new = set_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); else if (flags & SET_MEMORY_X) new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); + if (flags & SET_MEMORY_INV) { + new = set_pud_bit(new, __pgprot(_REGION_ENTRY_INVALID)); + } else if (flags & SET_MEMORY_DEF) { + new = __pud(pud_val(new) & PUD_MASK); + new = set_pud_bit(new, REGION3_KERNEL); + if (!MACHINE_HAS_NX) + new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); + } pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); } @@ -325,6 +350,16 @@ int __set_memory(unsigned long addr, int numpages, unsigned long flags) return change_page_attr(addr, addr + numpages * PAGE_SIZE, flags); } +int set_direct_map_invalid_noflush(struct page *page) +{ + return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_INV); +} + +int set_direct_map_default_noflush(struct page *page) +{ + return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF); +} + #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) static void ipte_range(pte_t *pte, unsigned long address, int nr) -- cgit v1.2.3 From 7608f70adcb1ea6957d7b9e1d5bd53584178fbbc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 14 Apr 2023 14:30:45 +0200 Subject: s390: wire up memfd_secret system call s390 supports ARCH_HAS_SET_DIRECT_MAP, therefore wire up the memfd_secret system call, which depends on it. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/syscalls/syscall.tbl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 799147658dee..b68f47541169 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -449,7 +449,7 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self -# 447 reserved for memfd_secret +447 common memfd_secret sys_memfd_secret sys_memfd_secret 448 common process_mrelease sys_process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node -- cgit v1.2.3 From 34e4c79f3ba9e3d7de56be8ef1a514950915e0ee Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 14 Apr 2023 14:30:46 +0200 Subject: s390/mm: use VM_FLUSH_RESET_PERMS in module_alloc() Make use of the set_direct_map() calls for module allocations. In particular: - All changes to read-only permissions in kernel VA mappings are also applied to the direct mapping. Note that execute permissions are intentionally not applied to the direct mapping in order to make sure that all allocated pages within the direct mapping stay non-executable - module_alloc() passes the VM_FLUSH_RESET_PERMS to __vmalloc_node_range() to make sure that all implicit permission changes made to the direct mapping are reset when the allocated vm area is freed again Side effects: the direct mapping will be fragmented depending on how many vm areas with VM_FLUSH_RESET_PERMS and/or explicit page permission changes are allocated and freed again. For example, just after boot of a system the direct mapping statistics look like: $cat /proc/meminfo ... DirectMap4k: 111628 kB DirectMap1M: 16665600 kB DirectMap2G: 0 kB Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/module.c | 7 +++--- arch/s390/mm/pageattr.c | 55 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 54 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 07a13546980d..adf3796f4dab 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -62,9 +62,10 @@ void *module_alloc(unsigned long size) if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; p = __vmalloc_node_range(size, MODULE_ALIGN, - MODULES_VADDR + get_module_load_offset(), MODULES_END, - gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, - __builtin_return_address(0)); + MODULES_VADDR + get_module_load_offset(), + MODULES_END, gfp_mask, PAGE_KERNEL, + VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK, + NUMA_NO_NODE, __builtin_return_address(0)); if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { vfree(p); return NULL; diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 0b196dea2d92..5ba3bd8a7b12 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -323,9 +323,6 @@ static int change_page_attr(unsigned long addr, unsigned long end, int rc = -EINVAL; pgd_t *pgdp; - if (addr == end) - return 0; - mutex_lock(&cpa_mutex); pgdp = pgd_offset_k(addr); do { if (pgd_none(*pgdp)) @@ -336,18 +333,66 @@ static int change_page_attr(unsigned long addr, unsigned long end, break; cond_resched(); } while (pgdp++, addr = next, addr < end && !rc); - mutex_unlock(&cpa_mutex); + return rc; +} + +static int change_page_attr_alias(unsigned long addr, unsigned long end, + unsigned long flags) +{ + unsigned long alias, offset, va_start, va_end; + struct vm_struct *area; + int rc = 0; + + /* + * Changes to read-only permissions on kernel VA mappings are also + * applied to the kernel direct mapping. Execute permissions are + * intentionally not transferred to keep all allocated pages within + * the direct mapping non-executable. + */ + flags &= SET_MEMORY_RO | SET_MEMORY_RW; + if (!flags) + return 0; + area = NULL; + while (addr < end) { + if (!area) + area = find_vm_area((void *)addr); + if (!area || !(area->flags & VM_ALLOC)) + return 0; + va_start = (unsigned long)area->addr; + va_end = va_start + area->nr_pages * PAGE_SIZE; + offset = (addr - va_start) >> PAGE_SHIFT; + alias = (unsigned long)page_address(area->pages[offset]); + rc = change_page_attr(alias, alias + PAGE_SIZE, flags); + if (rc) + break; + addr += PAGE_SIZE; + if (addr >= va_end) + area = NULL; + } return rc; } int __set_memory(unsigned long addr, int numpages, unsigned long flags) { + unsigned long end; + int rc; + if (!MACHINE_HAS_NX) flags &= ~(SET_MEMORY_NX | SET_MEMORY_X); if (!flags) return 0; + if (!numpages) + return 0; addr &= PAGE_MASK; - return change_page_attr(addr, addr + numpages * PAGE_SIZE, flags); + end = addr + numpages * PAGE_SIZE; + mutex_lock(&cpa_mutex); + rc = change_page_attr(addr, end, flags); + if (rc) + goto out; + rc = change_page_attr_alias(addr, end, flags); +out: + mutex_unlock(&cpa_mutex); + return rc; } int set_direct_map_invalid_noflush(struct page *page) -- cgit v1.2.3 From ccf7c3fb61ed7f3019b5be9fe70ccc0ab782cf2e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 19 Apr 2023 15:17:02 +0200 Subject: s390: select ARCH_USE_SYM_ANNOTATIONS All old style assembly annotations have been converted for s390. Select ARCH_USE_SYM_ANNOTATIONS to make sure the old macros like ENTRY() aren't available anymore. This prevents that new code which uses the old macros will be added again. This follows what has been done for x86 with commit 2ce0d7f9766f ("x86/asm: Provide a Kconfig symbol for disabling old assembly annotations") and for arm64 with commit 50479d58eaa3 ("arm64: Disable old style assembly annotations"). Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 37986abfe007..574bd48199f2 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -120,6 +120,7 @@ config S390 select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_USE_SYM_ANNOTATIONS select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANTS_NO_INSTR select ARCH_WANT_DEFAULT_BPF_JIT -- cgit v1.2.3 From 2a405f6bb3a5b2baaa74dfc5aaa0e1b99145bd1b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 5 Apr 2023 15:08:41 +0200 Subject: s390/stackleak: provide fast __stackleak_poison() implementation Provide an s390 specific __stackleak_poison() implementation which is faster than the generic variant. For the original implementation with an enforced 4kb stackframe for the getpid() system call the system call overhead increases by a factor of 3 if the stackleak feature is enabled. Using the s390 mvc based variant this is reduced to an increase of 25% instead. This is within the expected area, since the mvc based implementation is more or less a memset64() variant which comes with similar results. See commit 0b77d6701cf8 ("s390: implement memset16, memset32 & memset64"). Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Link: https://lore.kernel.org/r/20230405130841.1350565-3-hca@linux.ibm.com Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/processor.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'arch') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index efffc28cbad8..dc17896a001a 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -118,6 +118,41 @@ unsigned long vdso_size(void); #define HAVE_ARCH_PICK_MMAP_LAYOUT +#define __stackleak_poison __stackleak_poison +static __always_inline void __stackleak_poison(unsigned long erase_low, + unsigned long erase_high, + unsigned long poison) +{ + unsigned long tmp, count; + + count = erase_high - erase_low; + if (!count) + return; + asm volatile( + " cghi %[count],8\n" + " je 2f\n" + " aghi %[count],-(8+1)\n" + " srlg %[tmp],%[count],8\n" + " ltgr %[tmp],%[tmp]\n" + " jz 1f\n" + "0: stg %[poison],0(%[addr])\n" + " mvc 8(256-8,%[addr]),0(%[addr])\n" + " la %[addr],256(%[addr])\n" + " brctg %[tmp],0b\n" + "1: stg %[poison],0(%[addr])\n" + " larl %[tmp],3f\n" + " ex %[count],0(%[tmp])\n" + " j 4f\n" + "2: stg %[poison],0(%[addr])\n" + " j 4f\n" + "3: mvc 8(1,%[addr]),0(%[addr])\n" + "4:\n" + : [addr] "+&a" (erase_low), [count] "+&d" (count), [tmp] "=&a" (tmp) + : [poison] "d" (poison) + : "memory", "cc" + ); +} + /* * Thread structure */ -- cgit v1.2.3