From f95b2c45d5f785f069d186b11b798f9c1d37dcda Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 10 Dec 2020 12:32:51 +0100 Subject: s390/tlb: make cleared_pXs flags consistent with generic code On s390 cleared_pXs flags in struct mmu_gather are set by corresponding pXd_free_tlb functions. Such approach is inconsistent with how the generic code interprets these flags, e.g pte_free_tlb() frees a PTE table - or a PMD level entity, and so on. This update does not bring any functional change, since s390 does not use the flags at the moment. Fixes: 9de7d833e3708 ("s390/tlb: Convert to generic mmu_gather") Link: https://lore.kernel.org/lkml/fbb00ac0-9104-8d25-f225-7b3d1b17a01f@huawei.com/ Reported-by: Zhenyu Ye Suggested-by: Gerald Schaefer Reviewed-by: Christian Borntraeger Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/tlb.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 954fa8ca6cbd..fe6407f0eb1b 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -66,7 +66,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, __tlb_adjust_range(tlb, address, PAGE_SIZE); tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; - tlb->cleared_ptes = 1; + tlb->cleared_pmds = 1; /* * page_table_free_rcu takes care of the allocation bit masks * of the 2K table fragments in the 4K page table page, @@ -110,7 +110,6 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, __tlb_adjust_range(tlb, address, PAGE_SIZE); tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; - tlb->cleared_p4ds = 1; tlb_remove_table(tlb, p4d); } @@ -128,7 +127,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, return; tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; - tlb->cleared_puds = 1; + tlb->cleared_p4ds = 1; tlb_remove_table(tlb, pud); } -- cgit v1.2.3 From 6110ccecd3c0c025ab7d5224a5feba5a664e084e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 15 Jan 2021 19:39:23 +0100 Subject: s390/atomic: remove small optimization to fix clang build With commit f0cbd3b83ed4 ("s390/atomic: circumvent gcc 10 build regression") there was an attempt to workaroud a gcc build bug, however with the workaround a similar problem with clang appeared. It was recommended to use a workaround which would fail again with gcc. Therefore simply remove the optimization. It is just not worth the effort. Besides that all of this will be changed to use compiler atomic builtins instead anyway. See https://reviews.llvm.org/D90231 and https://reviews.llvm.org/D91786 Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/atomic.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 11c5952e1afa..5860ae790f2d 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -44,16 +44,6 @@ static inline int atomic_fetch_add(int i, atomic_t *v) static inline void atomic_add(int i, atomic_t *v) { -#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - /* - * Order of conditions is important to circumvent gcc 10 bug: - * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html - */ - if ((i > -129) && (i < 128) && __builtin_constant_p(i)) { - __atomic_add_const(i, &v->counter); - return; - } -#endif __atomic_add(i, &v->counter); } @@ -115,16 +105,6 @@ static inline s64 atomic64_fetch_add(s64 i, atomic64_t *v) static inline void atomic64_add(s64 i, atomic64_t *v) { -#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - /* - * Order of conditions is important to circumvent gcc 10 bug: - * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html - */ - if ((i > -129) && (i < 128) && __builtin_constant_p(i)) { - __atomic64_add_const(i, (long *)&v->counter); - return; - } -#endif __atomic64_add(i, (long *)&v->counter); } -- cgit v1.2.3 From efe5e0fea4b24872736c62a0bcfc3f99bebd2005 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 15 Jan 2021 20:01:36 +0100 Subject: s390/bitops: remove small optimization to fix clang build clang does not know about the 'b1' construct used in bitops inline assembly. Since the plan is to use compiler atomic builtins anyway there is no point in requesting clang support for this. Especially if one considers that the kernel seems to be the only user of this. With removing this small optimization it is possible to compile the kernel also with -march=zEC12 and higher using clang. Build error: In file included from ./include/linux/bitops.h:32: ./arch/s390/include/asm/bitops.h:69:4: error: invalid operand in inline asm: 'oi $0,${1:b}' "oi %0,%b1\n" ^ Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/bitops.h | 36 ------------------------------------ 1 file changed, 36 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 431e208a5ea4..31121d32f81d 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -61,18 +61,6 @@ static __always_inline void arch_set_bit(unsigned long nr, volatile unsigned lon unsigned long *addr = __bitops_word(nr, ptr); unsigned long mask; -#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES - if (__builtin_constant_p(nr)) { - unsigned char *caddr = __bitops_byte(nr, ptr); - - asm volatile( - "oi %0,%b1\n" - : "+Q" (*caddr) - : "i" (1 << (nr & 7)) - : "cc", "memory"); - return; - } -#endif mask = 1UL << (nr & (BITS_PER_LONG - 1)); __atomic64_or(mask, (long *)addr); } @@ -82,18 +70,6 @@ static __always_inline void arch_clear_bit(unsigned long nr, volatile unsigned l unsigned long *addr = __bitops_word(nr, ptr); unsigned long mask; -#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES - if (__builtin_constant_p(nr)) { - unsigned char *caddr = __bitops_byte(nr, ptr); - - asm volatile( - "ni %0,%b1\n" - : "+Q" (*caddr) - : "i" (~(1 << (nr & 7))) - : "cc", "memory"); - return; - } -#endif mask = ~(1UL << (nr & (BITS_PER_LONG - 1))); __atomic64_and(mask, (long *)addr); } @@ -104,18 +80,6 @@ static __always_inline void arch_change_bit(unsigned long nr, unsigned long *addr = __bitops_word(nr, ptr); unsigned long mask; -#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES - if (__builtin_constant_p(nr)) { - unsigned char *caddr = __bitops_byte(nr, ptr); - - asm volatile( - "xi %0,%b1\n" - : "+Q" (*caddr) - : "i" (1 << (nr & 7)) - : "cc", "memory"); - return; - } -#endif mask = 1UL << (nr & (BITS_PER_LONG - 1)); __atomic64_xor(mask, (long *)addr); } -- cgit v1.2.3 From ac94a2911e84a7b3d29d725f8f43b07db1c916f2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 15 Jan 2021 19:56:12 +0100 Subject: s390: update defconfigs Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 1 + arch/s390/configs/defconfig | 1 + arch/s390/configs/zfcpdump_defconfig | 1 + 3 files changed, 3 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index c4f6ff98a612..c1c4f97897cd 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -40,6 +40,7 @@ CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_LIVEPATCH=y +CONFIG_MARCH_ZEC12=y CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 51135893cffe..467a06d92be6 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -38,6 +38,7 @@ CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_LIVEPATCH=y +CONFIG_MARCH_ZEC12=y CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 1ef211dae77a..0200ccf10ace 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -8,6 +8,7 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_COMPAT_BRK is not set +CONFIG_MARCH_ZEC12=y CONFIG_TUNE_ZEC12=y # CONFIG_COMPAT is not set CONFIG_NR_CPUS=2 -- cgit v1.2.3 From 56e62a73702836017564eaacd5212e4d0fa1c01d Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Sat, 21 Nov 2020 11:14:56 +0100 Subject: s390: convert to generic entry This patch converts s390 to use the generic entry infrastructure from kernel/entry/*. There are a few special things on s390: - PIF_PER_TRAP is moved to TIF_PER_TRAP as the generic code doesn't know about our PIF flags in exit_to_user_mode_loop(). - The old code had several ways to restart syscalls: a) PIF_SYSCALL_RESTART, which was only set during execve to force a restart after upgrading a process (usually qemu-kvm) to pgste page table extensions. b) PIF_SYSCALL, which is set by do_signal() to indicate that the current syscall should be restarted. This is changed so that do_signal() now also uses PIF_SYSCALL_RESTART. Continuing to use PIF_SYSCALL doesn't work with the generic code, and changing it to PIF_SYSCALL_RESTART makes PIF_SYSCALL and PIF_SYSCALL_RESTART more unique. - On s390 calling sys_sigreturn or sys_rt_sigreturn is implemented by executing a svc instruction on the process stack which causes a fault. While handling that fault the fault code sets PIF_SYSCALL to hand over processing to the syscall code on exit to usermode. The patch introduces PIF_SYSCALL_RET_SET, which is set if ptrace sets a return value for a syscall. The s390x ptrace ABI uses r2 both for the syscall number and return value, so ptrace cannot set the syscall number + return value at the same time. The flag makes handling that a bit easier. do_syscall() will just skip executing the syscall if PIF_SYSCALL_RET_SET is set. CONFIG_DEBUG_ASCE was removd in favour of the generic CONFIG_DEBUG_ENTRY. CR1/7/13 will be checked both on kernel entry and exit to contain the correct asces. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 + arch/s390/Kconfig.debug | 10 +- arch/s390/configs/debug_defconfig | 2 +- arch/s390/configs/defconfig | 1 - arch/s390/include/asm/cputime.h | 2 + arch/s390/include/asm/elf.h | 7 +- arch/s390/include/asm/entry-common.h | 60 +++ arch/s390/include/asm/fpu/api.h | 2 + arch/s390/include/asm/idle.h | 4 +- arch/s390/include/asm/lowcore.h | 4 +- arch/s390/include/asm/nmi.h | 1 + arch/s390/include/asm/processor.h | 52 ++- arch/s390/include/asm/ptrace.h | 9 +- arch/s390/include/asm/syscall.h | 11 +- arch/s390/include/asm/thread_info.h | 3 + arch/s390/include/asm/uaccess.h | 2 +- arch/s390/include/asm/vtime.h | 14 + arch/s390/include/uapi/asm/ptrace.h | 5 +- arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/asm-offsets.c | 19 +- arch/s390/kernel/compat_signal.c | 1 + arch/s390/kernel/entry.S | 803 ++++------------------------------- arch/s390/kernel/entry.h | 12 +- arch/s390/kernel/fpu.c | 88 ++++ arch/s390/kernel/idle.c | 24 ++ arch/s390/kernel/irq.c | 89 +++- arch/s390/kernel/nmi.c | 19 +- arch/s390/kernel/process.c | 30 +- arch/s390/kernel/ptrace.c | 117 +---- arch/s390/kernel/setup.c | 3 +- arch/s390/kernel/signal.c | 12 +- arch/s390/kernel/smp.c | 2 +- arch/s390/kernel/sys_s390.c | 102 ----- arch/s390/kernel/syscall.c | 172 ++++++++ arch/s390/kernel/traps.c | 65 +++ arch/s390/kernel/uprobes.c | 6 +- arch/s390/kvm/kvm-s390.c | 3 + arch/s390/kvm/vsie.c | 3 + arch/s390/lib/uaccess.c | 12 +- arch/s390/mm/fault.c | 2 +- 40 files changed, 754 insertions(+), 1022 deletions(-) create mode 100644 arch/s390/include/asm/entry-common.h delete mode 100644 arch/s390/kernel/sys_s390.c create mode 100644 arch/s390/kernel/syscall.c (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c72874f09741..41a2c58c6e7a 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -123,6 +123,7 @@ config S390 select GENERIC_ALLOCATOR select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES + select GENERIC_ENTRY select GENERIC_FIND_FIRST_BIT select GENERIC_GETTIMEOFDAY select GENERIC_PTDUMP diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index 6bfaceebbbc0..ef96c25fa921 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -6,10 +6,12 @@ config TRACE_IRQFLAGS_SUPPORT config EARLY_PRINTK def_bool y -config DEBUG_USER_ASCE - bool "Debug User ASCE" +config DEBUG_ENTRY + bool "Debug low-level entry code" + depends on DEBUG_KERNEL help - Check on exit to user space that address space control - elements are setup correctly. + This option enables sanity checks in s390 low-level entry code. + Some of these sanity checks may slow down kernel entries and + exits or otherwise impact performance. If unsure, say N. diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index c1c4f97897cd..2d8dcce6e028 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -833,7 +833,6 @@ CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y CONFIG_FTRACE_STARTUP_TEST=y # CONFIG_EVENT_TRACE_STARTUP_TEST is not set -CONFIG_DEBUG_USER_ASCE=y CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m CONFIG_FAULT_INJECTION=y @@ -857,3 +856,4 @@ CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BITOPS=m CONFIG_TEST_BPF=m +CONFIG_DEBUG_ENTRY=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 467a06d92be6..3eadcda4aca9 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -781,7 +781,6 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y -CONFIG_DEBUG_USER_ASCE=y CONFIG_LKDTM=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index cb729d111e20..1d389847b588 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -35,4 +35,6 @@ u64 arch_cpu_idle_time(int cpu); #define arch_idle_time(cpu) arch_cpu_idle_time(cpu) +void account_idle_time_irq(void); + #endif /* _S390_CPUTIME_H */ diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 5775fc22f410..66d51ad090ab 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -233,8 +233,7 @@ extern char elf_platform[]; do { \ set_personality(PER_LINUX | \ (current->personality & (~PER_MASK))); \ - current->thread.sys_call_table = \ - (unsigned long) &sys_call_table; \ + current->thread.sys_call_table = sys_call_table; \ } while (0) #else /* CONFIG_COMPAT */ #define SET_PERSONALITY(ex) \ @@ -245,11 +244,11 @@ do { \ if ((ex).e_ident[EI_CLASS] == ELFCLASS32) { \ set_thread_flag(TIF_31BIT); \ current->thread.sys_call_table = \ - (unsigned long) &sys_call_table_emu; \ + sys_call_table_emu; \ } else { \ clear_thread_flag(TIF_31BIT); \ current->thread.sys_call_table = \ - (unsigned long) &sys_call_table; \ + sys_call_table; \ } \ } while (0) #endif /* CONFIG_COMPAT */ diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h new file mode 100644 index 000000000000..75cebc80474e --- /dev/null +++ b/arch/s390/include/asm/entry-common.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_S390_ENTRY_COMMON_H +#define ARCH_S390_ENTRY_COMMON_H + +#include +#include +#include +#include +#include +#include + +#define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP) + +void do_per_trap(struct pt_regs *regs); +void do_syscall(struct pt_regs *regs); + +typedef void (*pgm_check_func)(struct pt_regs *regs); + +extern pgm_check_func pgm_check_table[128]; + +#ifdef CONFIG_DEBUG_ENTRY +static __always_inline void arch_check_user_regs(struct pt_regs *regs) +{ + debug_user_asce(0); +} + +#define arch_check_user_regs arch_check_user_regs +#endif /* CONFIG_DEBUG_ENTRY */ + +static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs, + unsigned long ti_work) +{ + if (ti_work & _TIF_PER_TRAP) { + clear_thread_flag(TIF_PER_TRAP); + do_per_trap(regs); + } + + if (ti_work & _TIF_GUARDED_STORAGE) + gs_load_bc_cb(regs); +} + +#define arch_exit_to_user_mode_work arch_exit_to_user_mode_work + +static __always_inline void arch_exit_to_user_mode(void) +{ + if (test_cpu_flag(CIF_FPU)) + __load_fpu_regs(); + + if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) + debug_user_asce(1); +} + +#define arch_exit_to_user_mode arch_exit_to_user_mode + +static inline bool on_thread_stack(void) +{ + return !(((unsigned long)(current->stack) ^ current_stack_pointer()) & ~(THREAD_SIZE - 1)); +} + +#endif diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index 34a7ae68485c..a959b815a58b 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -47,6 +47,8 @@ #include void save_fpu_regs(void); +void load_fpu_regs(void); +void __load_fpu_regs(void); static inline int test_fp_ctl(u32 fpc) { diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h index 6d4226dcf42a..b04f6a794cdf 100644 --- a/arch/s390/include/asm/idle.h +++ b/arch/s390/include/asm/idle.h @@ -20,11 +20,13 @@ struct s390_idle_data { unsigned long long clock_idle_exit; unsigned long long timer_idle_enter; unsigned long long timer_idle_exit; + unsigned long mt_cycles_enter[8]; }; extern struct device_attribute dev_attr_idle_count; extern struct device_attribute dev_attr_idle_time_us; -void psw_idle(struct s390_idle_data *, unsigned long); +void psw_idle(struct s390_idle_data *data, unsigned long psw_mask); +void psw_idle_exit(void); #endif /* _S390_IDLE_H */ diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 69ce9191eaf1..4d65c8e4e6d0 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -81,8 +81,8 @@ struct lowcore { psw_t return_mcck_psw; /* 0x02a0 */ /* CPU accounting and timing values. */ - __u64 sync_enter_timer; /* 0x02b0 */ - __u64 async_enter_timer; /* 0x02b8 */ + __u64 sys_enter_timer; /* 0x02b0 */ + __u8 pad_0x02b8[0x02c0-0x02b8]; /* 0x02b8 */ __u64 mcck_enter_timer; /* 0x02c0 */ __u64 exit_timer; /* 0x02c8 */ __u64 user_timer; /* 0x02d0 */ diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index 5afee80cff58..20e51c9ff240 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -99,6 +99,7 @@ int nmi_alloc_per_cpu(struct lowcore *lc); void nmi_free_per_cpu(struct lowcore *lc); void s390_handle_mcck(void); +void __s390_handle_mcck(void); int s390_do_machine_check(struct pt_regs *regs); #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 2058a435add4..fa67b66bf144 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -38,6 +38,11 @@ #include #include #include +#include + +typedef long (*sys_call_ptr_t)(unsigned long, unsigned long, + unsigned long, unsigned long, + unsigned long, unsigned long); static inline void set_cpu_flag(int flag) { @@ -101,31 +106,32 @@ extern void __bpon(void); */ struct thread_struct { unsigned int acrs[NUM_ACRS]; - unsigned long ksp; /* kernel stack pointer */ - unsigned long user_timer; /* task cputime in user space */ - unsigned long guest_timer; /* task cputime in kvm guest */ - unsigned long system_timer; /* task cputime in kernel space */ - unsigned long hardirq_timer; /* task cputime in hardirq context */ - unsigned long softirq_timer; /* task cputime in softirq context */ - unsigned long sys_call_table; /* system call table address */ - unsigned long gmap_addr; /* address of last gmap fault. */ - unsigned int gmap_write_flag; /* gmap fault write indication */ - unsigned int gmap_int_code; /* int code of last gmap fault */ - unsigned int gmap_pfault; /* signal of a pending guest pfault */ + unsigned long ksp; /* kernel stack pointer */ + unsigned long user_timer; /* task cputime in user space */ + unsigned long guest_timer; /* task cputime in kvm guest */ + unsigned long system_timer; /* task cputime in kernel space */ + unsigned long hardirq_timer; /* task cputime in hardirq context */ + unsigned long softirq_timer; /* task cputime in softirq context */ + const sys_call_ptr_t *sys_call_table; /* system call table address */ + unsigned long gmap_addr; /* address of last gmap fault. */ + unsigned int gmap_write_flag; /* gmap fault write indication */ + unsigned int gmap_int_code; /* int code of last gmap fault */ + unsigned int gmap_pfault; /* signal of a pending guest pfault */ + /* Per-thread information related to debugging */ - struct per_regs per_user; /* User specified PER registers */ - struct per_event per_event; /* Cause of the last PER trap */ - unsigned long per_flags; /* Flags to control debug behavior */ - unsigned int system_call; /* system call number in signal */ - unsigned long last_break; /* last breaking-event-address. */ - /* pfault_wait is used to block the process on a pfault event */ + struct per_regs per_user; /* User specified PER registers */ + struct per_event per_event; /* Cause of the last PER trap */ + unsigned long per_flags; /* Flags to control debug behavior */ + unsigned int system_call; /* system call number in signal */ + unsigned long last_break; /* last breaking-event-address. */ + /* pfault_wait is used to block the process on a pfault event */ unsigned long pfault_wait; struct list_head list; /* cpu runtime instrumentation */ struct runtime_instr_cb *ri_cb; - struct gs_cb *gs_cb; /* Current guarded storage cb */ - struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */ - unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ + struct gs_cb *gs_cb; /* Current guarded storage cb */ + struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */ + unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ /* * Warning: 'fpu' is dynamically-sized. It *MUST* be at * the end. @@ -184,6 +190,7 @@ static inline void release_thread(struct task_struct *tsk) { } /* Free guarded storage control block */ void guarded_storage_release(struct task_struct *tsk); +void gs_load_bc_cb(struct pt_regs *regs); unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(tsk) ((struct pt_regs *) \ @@ -324,6 +331,11 @@ extern void memcpy_absolute(void *, void *, size_t); extern int s390_isolate_bp(void); extern int s390_isolate_bp_guest(void); +static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) +{ + return arch_irqs_disabled_flags(regs->psw.mask); +} + #endif /* __ASSEMBLY__ */ #endif /* __ASM_S390_PROCESSOR_H */ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 73ca7f7cac33..f828be78937f 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -11,13 +11,13 @@ #include #define PIF_SYSCALL 0 /* inside a system call */ -#define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */ -#define PIF_SYSCALL_RESTART 2 /* restart the current system call */ +#define PIF_SYSCALL_RESTART 1 /* restart the current system call */ +#define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */ #define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */ #define _PIF_SYSCALL BIT(PIF_SYSCALL) -#define _PIF_PER_TRAP BIT(PIF_PER_TRAP) #define _PIF_SYSCALL_RESTART BIT(PIF_SYSCALL_RESTART) +#define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET) #define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT) #ifndef __ASSEMBLY__ @@ -68,6 +68,9 @@ enum { &(*(struct psw_bits *)(&(__psw))); \ })) +#define PGM_INT_CODE_MASK 0x7f +#define PGM_INT_CODE_PER 0x80 + /* * The pt_regs struct defines the way the registers are stored on * the stack during a system call. diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index d9d5de0f67ff..9107e3dab68c 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -14,8 +14,8 @@ #include #include -extern const unsigned long sys_call_table[]; -extern const unsigned long sys_call_table_emu[]; +extern const sys_call_ptr_t sys_call_table[]; +extern const sys_call_ptr_t sys_call_table_emu[]; static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) @@ -56,6 +56,7 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { + set_pt_regs_flag(regs, PIF_SYSCALL_RET_SET); regs->gprs[2] = error ? error : val; } @@ -97,4 +98,10 @@ static inline int syscall_get_arch(struct task_struct *task) #endif return AUDIT_ARCH_S390X; } + +static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs) +{ + return false; +} + #endif /* _ASM_SYSCALL_H */ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 3c5b1f909b6d..28696ca7680d 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -36,6 +36,7 @@ */ struct thread_info { unsigned long flags; /* low level flags */ + unsigned long syscall_work; /* SYSCALL_WORK_ flags */ }; /* @@ -68,6 +69,7 @@ void arch_setup_new_exec(void); #define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ #define TIF_ISOLATE_BP 8 /* Run process with isolated BP */ #define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */ +#define TIF_PER_TRAP 10 /* Need to handle PER trap on exit to usermode */ #define TIF_31BIT 16 /* 32bit process */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ @@ -91,6 +93,7 @@ void arch_setup_new_exec(void); #define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING) #define _TIF_ISOLATE_BP BIT(TIF_ISOLATE_BP) #define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST) +#define _TIF_PER_TRAP BIT(TIF_PER_TRAP) #define _TIF_31BIT BIT(TIF_31BIT) #define _TIF_SINGLE_STEP BIT(TIF_SINGLE_STEP) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index c6707885e7c2..4756d2937e54 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -18,7 +18,7 @@ #include #include -void debug_user_asce(void); +void debug_user_asce(int exit); static inline int __range_ok(unsigned long addr, unsigned long size) { diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h index fac6a67988eb..fe17e448c0c5 100644 --- a/arch/s390/include/asm/vtime.h +++ b/arch/s390/include/asm/vtime.h @@ -4,4 +4,18 @@ #define __ARCH_HAS_VTIME_TASK_SWITCH +static inline void update_timer_sys(void) +{ + S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer; + S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.sys_enter_timer; + S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer; +} + +static inline void update_timer_mcck(void) +{ + S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer; + S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.mcck_enter_timer; + S390_lowcore.last_update_timer = S390_lowcore.mcck_enter_timer; +} + #endif /* _S390_VTIME_H */ diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h index 543dd70e12c8..ad64d673b5e6 100644 --- a/arch/s390/include/uapi/asm/ptrace.h +++ b/arch/s390/include/uapi/asm/ptrace.h @@ -179,8 +179,9 @@ #define ACR_SIZE 4 -#define PTRACE_OLDSETOPTIONS 21 - +#define PTRACE_OLDSETOPTIONS 21 +#define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 #ifndef __ASSEMBLY__ #include #include diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index dd73b7f07423..c97818a382f3 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -34,7 +34,7 @@ CFLAGS_dumpstack.o += -fno-optimize-sibling-calls CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o -obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o +obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 79724d861dc9..d22bb28ef50c 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -26,26 +26,14 @@ int main(void) BLANK(); /* thread struct offsets */ OFFSET(__THREAD_ksp, thread_struct, ksp); - OFFSET(__THREAD_sysc_table, thread_struct, sys_call_table); - OFFSET(__THREAD_last_break, thread_struct, last_break); - OFFSET(__THREAD_FPU_fpc, thread_struct, fpu.fpc); - OFFSET(__THREAD_FPU_regs, thread_struct, fpu.regs); - OFFSET(__THREAD_per_cause, thread_struct, per_event.cause); - OFFSET(__THREAD_per_address, thread_struct, per_event.address); - OFFSET(__THREAD_per_paid, thread_struct, per_event.paid); - OFFSET(__THREAD_trap_tdb, thread_struct, trap_tdb); BLANK(); /* thread info offsets */ OFFSET(__TI_flags, task_struct, thread_info.flags); BLANK(); /* pt_regs offsets */ - OFFSET(__PT_ARGS, pt_regs, args); OFFSET(__PT_PSW, pt_regs, psw); OFFSET(__PT_GPRS, pt_regs, gprs); OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2); - OFFSET(__PT_INT_CODE, pt_regs, int_code); - OFFSET(__PT_INT_PARM, pt_regs, int_parm); - OFFSET(__PT_INT_PARM_LONG, pt_regs, int_parm_long); OFFSET(__PT_FLAGS, pt_regs, flags); OFFSET(__PT_CR1, pt_regs, cr1); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); @@ -64,6 +52,7 @@ int main(void) OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit); OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter); OFFSET(__TIMER_IDLE_EXIT, s390_idle_data, timer_idle_exit); + OFFSET(__MT_CYCLES_ENTER, s390_idle_data, mt_cycles_enter); BLANK(); /* hardware defined lowcore locations 0x000 - 0x1ff */ OFFSET(__LC_EXT_PARAMS, lowcore, ext_params); @@ -115,13 +104,9 @@ int main(void) OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags); OFFSET(__LC_RETURN_PSW, lowcore, return_psw); OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw); - OFFSET(__LC_SYNC_ENTER_TIMER, lowcore, sync_enter_timer); - OFFSET(__LC_ASYNC_ENTER_TIMER, lowcore, async_enter_timer); + OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer); OFFSET(__LC_MCCK_ENTER_TIMER, lowcore, mcck_enter_timer); OFFSET(__LC_EXIT_TIMER, lowcore, exit_timer); - OFFSET(__LC_USER_TIMER, lowcore, user_timer); - OFFSET(__LC_SYSTEM_TIMER, lowcore, system_timer); - OFFSET(__LC_STEAL_TIMER, lowcore, steal_timer); OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer); OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); OFFSET(__LC_INT_CLOCK, lowcore, int_clock); diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 38d4bdbc34b9..1d0e17ec93eb 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -118,6 +118,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, ¤t->thread.fpu); clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ + clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); return 0; } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index f1ba197b10c0..785425b59ac1 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -51,38 +51,8 @@ STACK_SHIFT = PAGE_SHIFT + THREAD_SIZE_ORDER STACK_SIZE = 1 << STACK_SHIFT STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE -_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING | \ - _TIF_NOTIFY_SIGNAL) -_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ - _TIF_SYSCALL_TRACEPOINT) -_CIF_WORK = (_CIF_FPU) -_PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) - _LPP_OFFSET = __LC_LPP - .macro TRACE_IRQS_ON -#ifdef CONFIG_TRACE_IRQFLAGS - basr %r2,%r0 - brasl %r14,trace_hardirqs_on_caller -#endif - .endm - - .macro TRACE_IRQS_OFF -#ifdef CONFIG_TRACE_IRQFLAGS - basr %r2,%r0 - brasl %r14,trace_hardirqs_off_caller -#endif - .endm - - .macro LOCKDEP_SYS_EXIT -#ifdef CONFIG_LOCKDEP - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jz .+10 - brasl %r14,lockdep_sys_exit -#endif - .endm - .macro CHECK_STACK savearea #ifdef CONFIG_CHECK_STACK tml %r15,STACK_SIZE - CONFIG_STACK_GUARD @@ -91,12 +61,6 @@ _LPP_OFFSET = __LC_LPP #endif .endm - .macro DEBUG_USER_ASCE -#ifdef CONFIG_DEBUG_USER_ASCE - brasl %r14,debug_user_asce -#endif - .endm - .macro CHECK_VMAP_STACK savearea,oklabel #ifdef CONFIG_VMAP_STACK lgr %r14,%r15 @@ -117,9 +81,9 @@ _LPP_OFFSET = __LC_LPP #endif .endm - .macro SWITCH_ASYNC savearea,timer,clock + .macro SWITCH_KERNEL savearea tmhh %r8,0x0001 # interrupting from user ? - jnz 4f + jnz 1f #if IS_ENABLED(CONFIG_KVM) lgr %r14,%r9 larl %r13,.Lsie_gmap @@ -130,92 +94,16 @@ _LPP_OFFSET = __LC_LPP lghi %r11,\savearea # inside critical section, do cleanup brasl %r14,.Lcleanup_sie #endif -0: larl %r13,.Lpsw_idle_exit - cgr %r13,%r9 - jne 3f - - larl %r1,smp_cpu_mtid - llgf %r1,0(%r1) - ltgr %r1,%r1 - jz 2f # no SMT, skip mt_cycles calculation - .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15) - larl %r3,mt_cycles - ag %r3,__LC_PERCPU_OFFSET - la %r4,__SF_EMPTY+16(%r15) -1: lg %r0,0(%r3) - slg %r0,0(%r4) - alg %r0,64(%r4) - stg %r0,0(%r3) - la %r3,8(%r3) - la %r4,8(%r4) - brct %r1,1b - -2: mvc __CLOCK_IDLE_EXIT(8,%r2), \clock - mvc __TIMER_IDLE_EXIT(8,%r2), \timer - # account system time going idle - ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT - - lg %r13,__LC_STEAL_TIMER - alg %r13,__CLOCK_IDLE_ENTER(%r2) - slg %r13,__LC_LAST_UPDATE_CLOCK - stg %r13,__LC_STEAL_TIMER - - mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2) - - lg %r13,__LC_SYSTEM_TIMER - alg %r13,__LC_LAST_UPDATE_TIMER - slg %r13,__TIMER_IDLE_ENTER(%r2) - stg %r13,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) - - nihh %r8,0xfcfd # clear wait state and irq bits -3: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? - slgr %r14,%r15 - srag %r14,%r14,STACK_SHIFT - jnz 5f - CHECK_STACK \savearea +0: CHECK_STACK \savearea + lgr %r11,%r15 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 6f -4: UPDATE_VTIME %r14,%r15,\timer - BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP -5: lg %r15,__LC_ASYNC_STACK # load async stack -6: la %r11,STACK_FRAME_OVERHEAD(%r15) - .endm - - .macro UPDATE_VTIME w1,w2,enter_timer - lg \w1,__LC_EXIT_TIMER - lg \w2,__LC_LAST_UPDATE_TIMER - slg \w1,\enter_timer - slg \w2,__LC_EXIT_TIMER - alg \w1,__LC_USER_TIMER - alg \w2,__LC_SYSTEM_TIMER - stg \w1,__LC_USER_TIMER - stg \w2,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer - .endm - - .macro RESTORE_SM_CLEAR_PER - stg %r8,__LC_RETURN_PSW - ni __LC_RETURN_PSW,0xbf - ssm __LC_RETURN_PSW - .endm - - .macro ENABLE_INTS - stosm __SF_EMPTY(%r15),3 - .endm - - .macro ENABLE_INTS_TRACE - TRACE_IRQS_ON - ENABLE_INTS - .endm - - .macro DISABLE_INTS - stnsm __SF_EMPTY(%r15),0xfc - .endm - - .macro DISABLE_INTS_TRACE - DISABLE_INTS - TRACE_IRQS_OFF + stg %r11,__SF_BACKCHAIN(%r15) + j 2f +1: BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP + lctlg %c1,%c1,__LC_KERNEL_ASCE + lg %r15,__LC_KERNEL_STACK + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) +2: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm .macro STCK savearea @@ -267,18 +155,17 @@ _LPP_OFFSET = __LC_LPP "jnz .+8; .long 0xb2e8d000", 82 .endm - GEN_BR_THUNK %r9 GEN_BR_THUNK %r14 GEN_BR_THUNK %r14,%r11 .section .kprobes.text, "ax" .Ldummy: /* - * This nop exists only in order to avoid that __switch_to starts at + * This nop exists only in order to avoid that __bpon starts at * the beginning of the kprobes text section. In that case we would * have several symbols at the same address. E.g. objdump would take * an arbitrary symbol name when disassembling this code. - * With the added nop in between the __switch_to symbol is unique + * With the added nop in between the __bpon symbol is unique * again. */ nop 0 @@ -327,10 +214,6 @@ ENTRY(sie64a) stg %r3,__SF_SIE_SAVEAREA(%r15) # save guest register save area xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0 mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags - TSTMSK __LC_CPU_FLAGS,_CIF_FPU # load guest fp/vx registers ? - jno .Lsie_load_guest_gprs - brasl %r14,load_fpu_regs # load guest fp/vx regs -.Lsie_load_guest_gprs: lmg %r0,%r13,0(%r3) # load guest gprs 0-13 lg %r14,__LC_GMAP # get gmap pointer ltgr %r14,%r14 @@ -370,7 +253,6 @@ sie_exit: stmg %r0,%r13,0(%r14) # save guest gprs 0-13 xgr %r0,%r0 # clear guest registers to xgr %r1,%r1 # prevent speculative use - xgr %r2,%r2 xgr %r3,%r3 xgr %r4,%r4 xgr %r5,%r5 @@ -397,249 +279,68 @@ EXPORT_SYMBOL(sie_exit) */ ENTRY(system_call) - stpt __LC_SYNC_ENTER_TIMER + stpt __LC_SYS_ENTER_TIMER stmg %r8,%r15,__LC_SAVE_AREA_SYNC BPOFF - lg %r12,__LC_CURRENT - lghi %r14,_PIF_SYSCALL + lghi %r14,0 .Lsysc_per: lctlg %c1,%c1,__LC_KERNEL_ASCE - lghi %r13,__TASK_thread + lg %r12,__LC_CURRENT lg %r15,__LC_KERNEL_STACK - la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs - UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER - BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP - stmg %r0,%r7,__PT_R0(%r11) - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC - mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW - mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC - stg %r14,__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - ENABLE_INTS -.Lsysc_do_svc: + stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP # clear user controlled register to prevent speculative use xgr %r0,%r0 - # load address of system call table - lg %r10,__THREAD_sysc_table(%r13,%r12) - llgh %r8,__PT_INT_CODE+2(%r11) - slag %r8,%r8,3 # shift and test for svc 0 - jnz .Lsysc_nr_ok - # svc 0: system call number in %r1 - llgfr %r1,%r1 # clear high word in r1 - sth %r1,__PT_INT_CODE+2(%r11) - cghi %r1,NR_syscalls - jnl .Lsysc_nr_ok - slag %r8,%r1,3 -.Lsysc_nr_ok: - stg %r2,__PT_ORIG_GPR2(%r11) - stg %r7,STACK_FRAME_OVERHEAD(%r15) - lg %r9,0(%r8,%r10) # get system call add. - TSTMSK __TI_flags(%r12),_TIF_TRACE - jnz .Lsysc_tracesys - BASR_EX %r14,%r9 # call sys_xxxx - stg %r2,__PT_R2(%r11) # store return value - -.Lsysc_return: -#ifdef CONFIG_DEBUG_RSEQ - lgr %r2,%r11 - brasl %r14,rseq_syscall -#endif - LOCKDEP_SYS_EXIT -.Lsysc_tif: - DISABLE_INTS - TSTMSK __PT_FLAGS(%r11),_PIF_WORK - jnz .Lsysc_work - TSTMSK __TI_flags(%r12),_TIF_WORK - jnz .Lsysc_work # check for work - DEBUG_USER_ASCE + xgr %r1,%r1 + xgr %r4,%r4 + xgr %r5,%r5 + xgr %r6,%r6 + xgr %r7,%r7 + xgr %r8,%r8 + xgr %r9,%r9 + xgr %r10,%r10 + xgr %r11,%r11 + la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs + lgr %r3,%r14 + brasl %r14,__do_syscall lctlg %c1,%c1,__LC_USER_ASCE - BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP - TSTMSK __LC_CPU_FLAGS, _CIF_FPU - jz .Lsysc_skip_fpu - brasl %r14,load_fpu_regs -.Lsysc_skip_fpu: - mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) stpt __LC_EXIT_TIMER - lmg %r0,%r15,__PT_R0(%r11) b __LC_RETURN_LPSWE - -# -# One of the work bits is on. Find out which one. -# -.Lsysc_work: - ENABLE_INTS - TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED - jo .Lsysc_reschedule - TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART - jo .Lsysc_syscall_restart -#ifdef CONFIG_UPROBES - TSTMSK __TI_flags(%r12),_TIF_UPROBE - jo .Lsysc_uprobe_notify -#endif - TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE - jo .Lsysc_guarded_storage - TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP - jo .Lsysc_singlestep -#ifdef CONFIG_LIVEPATCH - TSTMSK __TI_flags(%r12),_TIF_PATCH_PENDING - jo .Lsysc_patch_pending # handle live patching just before - # signals and possible syscall restart -#endif - TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART - jo .Lsysc_syscall_restart - TSTMSK __TI_flags(%r12),(_TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL) - jnz .Lsysc_sigpending - TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME - jo .Lsysc_notify_resume - j .Lsysc_return - -# -# _TIF_NEED_RESCHED is set, call schedule -# -.Lsysc_reschedule: - larl %r14,.Lsysc_return - jg schedule - -# -# _TIF_SIGPENDING is set, call do_signal -# -.Lsysc_sigpending: - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_signal - TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL - jno .Lsysc_return -.Lsysc_do_syscall: - lghi %r13,__TASK_thread - lmg %r2,%r7,__PT_R2(%r11) # load svc arguments - lghi %r1,0 # svc 0 returns -ENOSYS - j .Lsysc_do_svc - -# -# _TIF_NOTIFY_RESUME is set, call do_notify_resume -# -.Lsysc_notify_resume: - lgr %r2,%r11 # pass pointer to pt_regs - larl %r14,.Lsysc_return - jg do_notify_resume - -# -# _TIF_UPROBE is set, call uprobe_notify_resume -# -#ifdef CONFIG_UPROBES -.Lsysc_uprobe_notify: - lgr %r2,%r11 # pass pointer to pt_regs - larl %r14,.Lsysc_return - jg uprobe_notify_resume -#endif - -# -# _TIF_GUARDED_STORAGE is set, call guarded_storage_load -# -.Lsysc_guarded_storage: - lgr %r2,%r11 # pass pointer to pt_regs - larl %r14,.Lsysc_return - jg gs_load_bc_cb -# -# _TIF_PATCH_PENDING is set, call klp_update_patch_state -# -#ifdef CONFIG_LIVEPATCH -.Lsysc_patch_pending: - lg %r2,__LC_CURRENT # pass pointer to task struct - larl %r14,.Lsysc_return - jg klp_update_patch_state -#endif - -# -# _PIF_PER_TRAP is set, call do_per_trap -# -.Lsysc_singlestep: - ni __PT_FLAGS+7(%r11),255-_PIF_PER_TRAP - lgr %r2,%r11 # pass pointer to pt_regs - larl %r14,.Lsysc_return - jg do_per_trap - -# -# _PIF_SYSCALL_RESTART is set, repeat the current system call -# -.Lsysc_syscall_restart: - ni __PT_FLAGS+7(%r11),255-_PIF_SYSCALL_RESTART - lmg %r1,%r7,__PT_R1(%r11) # load svc arguments - lg %r2,__PT_ORIG_GPR2(%r11) - j .Lsysc_do_svc - -# -# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before -# and after the system call -# -.Lsysc_tracesys: - lgr %r2,%r11 # pass pointer to pt_regs - la %r3,0 - llgh %r0,__PT_INT_CODE+2(%r11) - stg %r0,__PT_R2(%r11) - brasl %r14,do_syscall_trace_enter - lghi %r0,NR_syscalls - clgr %r0,%r2 - jnh .Lsysc_tracenogo - sllg %r8,%r2,3 - lg %r9,0(%r8,%r10) - lmg %r3,%r7,__PT_R3(%r11) - stg %r7,STACK_FRAME_OVERHEAD(%r15) - lg %r2,__PT_ORIG_GPR2(%r11) - BASR_EX %r14,%r9 # call sys_xxx - stg %r2,__PT_R2(%r11) # store return value -.Lsysc_tracenogo: - TSTMSK __TI_flags(%r12),_TIF_TRACE - jz .Lsysc_return - lgr %r2,%r11 # pass pointer to pt_regs - larl %r14,.Lsysc_return - jg do_syscall_trace_exit ENDPROC(system_call) # # a new process exits the kernel with ret_from_fork # ENTRY(ret_from_fork) - la %r11,STACK_FRAME_OVERHEAD(%r15) - lg %r12,__LC_CURRENT - brasl %r14,schedule_tail - tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ? - jne .Lsysc_tracenogo - # it's a kernel thread - lmg %r9,%r10,__PT_R9(%r11) # load gprs - la %r2,0(%r10) - BASR_EX %r14,%r9 - j .Lsysc_tracenogo + lgr %r3,%r11 + brasl %r14,__ret_from_fork + lctlg %c1,%c1,__LC_USER_ASCE + mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + stpt __LC_EXIT_TIMER + b __LC_RETURN_LPSWE ENDPROC(ret_from_fork) -ENTRY(kernel_thread_starter) - la %r2,0(%r10) - BASR_EX %r14,%r9 - j .Lsysc_tracenogo -ENDPROC(kernel_thread_starter) - /* * Program check handler routine */ ENTRY(pgm_check_handler) - stpt __LC_SYNC_ENTER_TIMER + stpt __LC_SYS_ENTER_TIMER BPOFF stmg %r8,%r15,__LC_SAVE_AREA_SYNC - lg %r10,__LC_LAST_BREAK - srag %r11,%r10,12 - jnz 0f - /* if __LC_LAST_BREAK is < 4096, it contains one of - * the lpswe addresses in lowcore. Set it to 1 (initial state) - * to prevent leaking that address to userspace. - */ - lghi %r10,1 -0: lg %r12,__LC_CURRENT - lghi %r11,0 + lg %r12,__LC_CURRENT + lghi %r10,0 lmg %r8,%r9,__LC_PGM_OLD_PSW tmhh %r8,0x0001 # coming from user space? jno .Lpgm_skip_asce lctlg %c1,%c1,__LC_KERNEL_ASCE - j 3f + j 3f # -> fault in user space .Lpgm_skip_asce: #if IS_ENABLED(CONFIG_KVM) # cleanup critical section for program checks in sie64a @@ -653,7 +354,7 @@ ENTRY(pgm_check_handler) ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit - lghi %r11,_PIF_GUEST_FAULT + lghi %r10,_PIF_GUEST_FAULT #endif 1: tmhh %r8,0x4000 # PER bit set in old PSW ? jnz 2f # -> enabled, can't be a double fault @@ -661,82 +362,37 @@ ENTRY(pgm_check_handler) jnz .Lpgm_svcper # -> single stepped svc 2: CHECK_STACK __LC_SAVE_AREA_SYNC aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - # CHECK_VMAP_STACK branches to stack_overflow or 5f - CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,5f -3: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER - BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP + # CHECK_VMAP_STACK branches to stack_overflow or 4f + CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f +3: BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP lg %r15,__LC_KERNEL_STACK - lgr %r14,%r12 - aghi %r14,__TASK_thread # pointer to thread_struct - lghi %r13,__LC_PGM_TDB - tm __LC_PGM_ILC+2,0x02 # check for transaction abort - jz 4f - mvc __THREAD_trap_tdb(256,%r14),0(%r13) -4: stg %r10,__THREAD_last_break(%r14) -5: lgr %r13,%r11 - la %r11,STACK_FRAME_OVERHEAD(%r15) +4: la %r11,STACK_FRAME_OVERHEAD(%r15) + stg %r10,__PT_FLAGS(%r11) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + stmg %r8,%r9,__PT_PSW(%r11) + # clear user controlled registers to prevent speculative use xgr %r0,%r0 xgr %r1,%r1 - xgr %r2,%r2 xgr %r3,%r3 xgr %r4,%r4 xgr %r5,%r5 xgr %r6,%r6 xgr %r7,%r7 - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC - stmg %r8,%r9,__PT_PSW(%r11) - mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC - mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE - stg %r13,__PT_FLAGS(%r11) - stg %r10,__PT_ARGS(%r11) - tm __LC_PGM_ILC+3,0x80 # check for per exception - jz 6f - tmhh %r8,0x0001 # kernel per event ? - jz .Lpgm_kprobe - oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP - mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS - mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE - mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID -6: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - RESTORE_SM_CLEAR_PER - larl %r1,pgm_check_table - llgh %r10,__PT_INT_CODE+2(%r11) - nill %r10,0x007f - sll %r10,3 - je .Lpgm_return - lg %r9,0(%r10,%r1) # load address of handler routine - lgr %r2,%r11 # pass pointer to pt_regs - BASR_EX %r14,%r9 # branch to interrupt-handler -.Lpgm_return: - LOCKDEP_SYS_EXIT - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jno .Lpgm_restore - TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL - jo .Lsysc_do_syscall - j .Lsysc_tif -.Lpgm_restore: - DISABLE_INTS - TSTMSK __LC_CPU_FLAGS, _CIF_FPU - jz .Lpgm_skip_fpu - brasl %r14,load_fpu_regs -.Lpgm_skip_fpu: - mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + lgr %r2,%r11 + brasl %r14,__do_pgm_check + tmhh %r8,0x0001 # returning to user space? + jno .Lpgm_exit_kernel + lctlg %c1,%c1,__LC_USER_ASCE + BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP stpt __LC_EXIT_TIMER - lmg %r0,%r15,__PT_R0(%r11) +.Lpgm_exit_kernel: + mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) b __LC_RETURN_LPSWE -# -# PER event in supervisor state, must be kprobes -# -.Lpgm_kprobe: - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - RESTORE_SM_CLEAR_PER - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_per_trap - j .Lpgm_return - # # single stepped system call # @@ -744,26 +400,26 @@ ENTRY(pgm_check_handler) mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW larl %r14,.Lsysc_per stg %r14,__LC_RETURN_PSW+8 - lghi %r14,_PIF_SYSCALL | _PIF_PER_TRAP + lghi %r14,1 lpswe __LC_RETURN_PSW # branch to .Lsysc_per ENDPROC(pgm_check_handler) /* - * IO interrupt handler routine + * Interrupt handler macro used for external and IO interrupts. */ -ENTRY(io_int_handler) +.macro INT_HANDLER name,lc_old_psw,handler +ENTRY(\name) STCK __LC_INT_CLOCK - stpt __LC_ASYNC_ENTER_TIMER + stpt __LC_SYS_ENTER_TIMER BPOFF stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r12,__LC_CURRENT - lmg %r8,%r9,__LC_IO_OLD_PSW - SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK + lmg %r8,%r9,\lc_old_psw + SWITCH_KERNEL __LC_SAVE_AREA_ASYNC stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use xgr %r0,%r0 xgr %r1,%r1 - xgr %r2,%r2 xgr %r3,%r3 xgr %r4,%r4 xgr %r5,%r5 @@ -772,322 +428,48 @@ ENTRY(io_int_handler) xgr %r10,%r10 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) - tm __PT_PSW+1(%r11),0x01 # coming from user space? - jno .Lio_skip_asce + tm %r8,0x0001 # coming from user space? + jno 1f lctlg %c1,%c1,__LC_KERNEL_ASCE -.Lio_skip_asce: - mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID - xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - TRACE_IRQS_OFF -.Lio_loop: - lgr %r2,%r11 # pass pointer to pt_regs - lghi %r3,IO_INTERRUPT - tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ? - jz .Lio_call - lghi %r3,THIN_INTERRUPT -.Lio_call: - brasl %r14,do_IRQ - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPAR - jz .Lio_return - tpi 0 - jz .Lio_return - mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID - j .Lio_loop -.Lio_return: - LOCKDEP_SYS_EXIT - TSTMSK __TI_flags(%r12),_TIF_WORK - jnz .Lio_work # there is work to do (signals etc.) - TSTMSK __LC_CPU_FLAGS,_CIF_WORK - jnz .Lio_work -.Lio_restore: - TRACE_IRQS_ON +1: lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,\handler mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jno .Lio_exit_kernel - DEBUG_USER_ASCE + tmhh %r8,0x0001 # returning to user ? + jno 2f lctlg %c1,%c1,__LC_USER_ASCE BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP stpt __LC_EXIT_TIMER -.Lio_exit_kernel: - lmg %r0,%r15,__PT_R0(%r11) +2: lmg %r0,%r15,__PT_R0(%r11) b __LC_RETURN_LPSWE -.Lio_done: - -# -# There is work todo, find out in which context we have been interrupted: -# 1) if we return to user space we can do all _TIF_WORK work -# 2) if we return to kernel code and kvm is enabled check if we need to -# modify the psw to leave SIE -# 3) if we return to kernel code and preemptive scheduling is enabled check -# the preemption counter and if it is zero call preempt_schedule_irq -# Before any work can be done, a switch to the kernel stack is required. -# -.Lio_work: - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jo .Lio_work_user # yes -> do resched & signal -#ifdef CONFIG_PREEMPTION - # check for preemptive scheduling - icm %r0,15,__LC_PREEMPT_COUNT - jnz .Lio_restore # preemption is disabled - TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED - jno .Lio_restore - # switch to kernel stack - lg %r1,__PT_R15(%r11) - aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) - la %r11,STACK_FRAME_OVERHEAD(%r1) - lgr %r15,%r1 - brasl %r14,preempt_schedule_irq - j .Lio_return -#else - j .Lio_restore -#endif - -# -# Need to do work before returning to userspace, switch to kernel stack -# -.Lio_work_user: - lg %r1,__LC_KERNEL_STACK - mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) - la %r11,STACK_FRAME_OVERHEAD(%r1) - lgr %r15,%r1 - -# -# One of the work bits is on. Find out which one. -# - TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED - jo .Lio_reschedule -#ifdef CONFIG_LIVEPATCH - TSTMSK __TI_flags(%r12),_TIF_PATCH_PENDING - jo .Lio_patch_pending -#endif - TSTMSK __TI_flags(%r12),(_TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL) - jnz .Lio_sigpending - TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME - jo .Lio_notify_resume - TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE - jo .Lio_guarded_storage - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - jo .Lio_vxrs - j .Lio_return - -# -# CIF_FPU is set, restore floating-point controls and floating-point registers. -# -.Lio_vxrs: - larl %r14,.Lio_return - jg load_fpu_regs - -# -# _TIF_GUARDED_STORAGE is set, call guarded_storage_load -# -.Lio_guarded_storage: - ENABLE_INTS_TRACE - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,gs_load_bc_cb - DISABLE_INTS_TRACE - j .Lio_return +ENDPROC(\name) +.endm -# -# _TIF_NEED_RESCHED is set, call schedule -# -.Lio_reschedule: - ENABLE_INTS_TRACE - brasl %r14,schedule # call scheduler - DISABLE_INTS_TRACE - j .Lio_return - -# -# _TIF_PATCH_PENDING is set, call klp_update_patch_state -# -#ifdef CONFIG_LIVEPATCH -.Lio_patch_pending: - lg %r2,__LC_CURRENT # pass pointer to task struct - larl %r14,.Lio_return - jg klp_update_patch_state -#endif - -# -# _TIF_SIGPENDING or is set, call do_signal -# -.Lio_sigpending: - ENABLE_INTS_TRACE - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_signal - DISABLE_INTS_TRACE - j .Lio_return - -# -# _TIF_NOTIFY_RESUME or is set, call do_notify_resume -# -.Lio_notify_resume: - ENABLE_INTS_TRACE - lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_notify_resume - DISABLE_INTS_TRACE - j .Lio_return -ENDPROC(io_int_handler) - -/* - * External interrupt handler routine - */ -ENTRY(ext_int_handler) - STCK __LC_INT_CLOCK - stpt __LC_ASYNC_ENTER_TIMER - BPOFF - stmg %r8,%r15,__LC_SAVE_AREA_ASYNC - lg %r12,__LC_CURRENT - lmg %r8,%r9,__LC_EXT_OLD_PSW - SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK - stmg %r0,%r7,__PT_R0(%r11) - # clear user controlled registers to prevent speculative use - xgr %r0,%r0 - xgr %r1,%r1 - xgr %r2,%r2 - xgr %r3,%r3 - xgr %r4,%r4 - xgr %r5,%r5 - xgr %r6,%r6 - xgr %r7,%r7 - xgr %r10,%r10 - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC - stmg %r8,%r9,__PT_PSW(%r11) - tm __PT_PSW+1(%r11),0x01 # coming from user space? - jno .Lext_skip_asce - lctlg %c1,%c1,__LC_KERNEL_ASCE -.Lext_skip_asce: - lghi %r1,__LC_EXT_PARAMS2 - mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR - mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS - mvc __PT_INT_PARM_LONG(8,%r11),0(%r1) - xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - TRACE_IRQS_OFF - lgr %r2,%r11 # pass pointer to pt_regs - lghi %r3,EXT_INTERRUPT - brasl %r14,do_IRQ - j .Lio_return -ENDPROC(ext_int_handler) +INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq +INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq /* * Load idle PSW. */ ENTRY(psw_idle) stg %r3,__SF_EMPTY(%r15) - larl %r1,.Lpsw_idle_exit + larl %r1,psw_idle_exit stg %r1,__SF_EMPTY+8(%r15) larl %r1,smp_cpu_mtid llgf %r1,0(%r1) ltgr %r1,%r1 jz .Lpsw_idle_stcctm - .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15) + .insn rsy,0xeb0000000017,%r1,5,__MT_CYCLES_ENTER(%r2) .Lpsw_idle_stcctm: oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT BPON STCK __CLOCK_IDLE_ENTER(%r2) stpt __TIMER_IDLE_ENTER(%r2) lpswe __SF_EMPTY(%r15) -.Lpsw_idle_exit: +.globl psw_idle_exit +psw_idle_exit: BR_EX %r14 ENDPROC(psw_idle) -/* - * Store floating-point controls and floating-point or vector register - * depending whether the vector facility is available. A critical section - * cleanup assures that the registers are stored even if interrupted for - * some other work. The CIF_FPU flag is set to trigger a lazy restore - * of the register contents at return from io or a system call. - */ -ENTRY(save_fpu_regs) - stnsm __SF_EMPTY(%r15),0xfc - lg %r2,__LC_CURRENT - aghi %r2,__TASK_thread - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - jo .Lsave_fpu_regs_exit - stfpc __THREAD_FPU_fpc(%r2) - lg %r3,__THREAD_FPU_regs(%r2) - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX - jz .Lsave_fpu_regs_fp # no -> store FP regs - VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3) - VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3) - j .Lsave_fpu_regs_done # -> set CIF_FPU flag -.Lsave_fpu_regs_fp: - std 0,0(%r3) - std 1,8(%r3) - std 2,16(%r3) - std 3,24(%r3) - std 4,32(%r3) - std 5,40(%r3) - std 6,48(%r3) - std 7,56(%r3) - std 8,64(%r3) - std 9,72(%r3) - std 10,80(%r3) - std 11,88(%r3) - std 12,96(%r3) - std 13,104(%r3) - std 14,112(%r3) - std 15,120(%r3) -.Lsave_fpu_regs_done: - oi __LC_CPU_FLAGS+7,_CIF_FPU -.Lsave_fpu_regs_exit: - ssm __SF_EMPTY(%r15) - BR_EX %r14 -.Lsave_fpu_regs_end: -ENDPROC(save_fpu_regs) -EXPORT_SYMBOL(save_fpu_regs) - -/* - * Load floating-point controls and floating-point or vector registers. - * A critical section cleanup assures that the register contents are - * loaded even if interrupted for some other work. - * - * There are special calling conventions to fit into sysc and io return work: - * %r15: - * The function requires: - * %r4 - */ -load_fpu_regs: - stnsm __SF_EMPTY(%r15),0xfc - lg %r4,__LC_CURRENT - aghi %r4,__TASK_thread - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - jno .Lload_fpu_regs_exit - lfpc __THREAD_FPU_fpc(%r4) - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX - lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area - jz .Lload_fpu_regs_fp # -> no VX, load FP regs - VLM %v0,%v15,0,%r4 - VLM %v16,%v31,256,%r4 - j .Lload_fpu_regs_done -.Lload_fpu_regs_fp: - ld 0,0(%r4) - ld 1,8(%r4) - ld 2,16(%r4) - ld 3,24(%r4) - ld 4,32(%r4) - ld 5,40(%r4) - ld 6,48(%r4) - ld 7,56(%r4) - ld 8,64(%r4) - ld 9,72(%r4) - ld 10,80(%r4) - ld 11,88(%r4) - ld 12,96(%r4) - ld 13,104(%r4) - ld 14,112(%r4) - ld 15,120(%r4) -.Lload_fpu_regs_done: - ni __LC_CPU_FLAGS+7,255-_CIF_FPU -.Lload_fpu_regs_exit: - ssm __SF_EMPTY(%r15) - BR_EX %r14 -.Lload_fpu_regs_end: -ENDPROC(load_fpu_regs) - /* * Machine check handler routines */ @@ -1146,11 +528,8 @@ ENTRY(mcck_int_handler) mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID jo 3f - la %r14,__LC_SYNC_ENTER_TIMER - clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER - jl 0f - la %r14,__LC_ASYNC_ENTER_TIMER -0: clc 0(8,%r14),__LC_EXIT_TIMER + la %r14,__LC_SYS_ENTER_TIMER + clc 0(8,%r14),__LC_EXIT_TIMER jl 1f la %r14,__LC_EXIT_TIMER 1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER @@ -1165,14 +544,13 @@ ENTRY(mcck_int_handler) TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID jno .Lmcck_panic 4: ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off - SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER,__LC_MCCK_CLOCK + SWITCH_KERNEL __LC_GPREGS_SAVE_AREA+64 .Lmcck_skip: lghi %r14,__LC_GPREGS_SAVE_AREA+64 stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use xgr %r0,%r0 xgr %r1,%r1 - xgr %r2,%r2 xgr %r3,%r3 xgr %r4,%r4 xgr %r5,%r5 @@ -1183,7 +561,6 @@ ENTRY(mcck_int_handler) stmg %r8,%r9,__PT_PSW(%r11) la %r14,4095 mvc __PT_CR1(8,%r11),__LC_CREGS_SAVE_AREA-4095+8(%r14) - lctlg %c1,%c1,__LC_KERNEL_ASCE xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs @@ -1195,9 +572,7 @@ ENTRY(mcck_int_handler) xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 - TRACE_IRQS_OFF brasl %r14,s390_handle_mcck - TRACE_IRQS_ON .Lmcck_return: lctlg %c1,%c1,__PT_CR1(%r11) lmg %r0,%r10,__PT_R0(%r11) diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index a16c33b32ab0..3d0c0ac5c20e 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -17,8 +17,9 @@ void io_int_handler(void); void mcck_int_handler(void); void restart_int_handler(void); -asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); -asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); +void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs); +void __do_pgm_check(struct pt_regs *regs); +void __do_syscall(struct pt_regs *regs, int per_trap); void do_protection_exception(struct pt_regs *regs); void do_dat_exception(struct pt_regs *regs); @@ -48,9 +49,7 @@ void translation_exception(struct pt_regs *regs); void vector_exception(struct pt_regs *regs); void monitor_event_exception(struct pt_regs *regs); -void do_per_trap(struct pt_regs *regs); void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str); -void syscall_trace(struct pt_regs *regs, int entryexit); void kernel_stack_overflow(struct pt_regs * regs); void do_signal(struct pt_regs *regs); void handle_signal32(struct ksignal *ksig, sigset_t *oldset, @@ -58,7 +57,8 @@ void handle_signal32(struct ksignal *ksig, sigset_t *oldset, void do_notify_resume(struct pt_regs *regs); void __init init_IRQ(void); -void do_IRQ(struct pt_regs *regs, int irq); +void do_io_irq(struct pt_regs *regs); +void do_ext_irq(struct pt_regs *regs); void do_restart(void); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); @@ -82,8 +82,6 @@ long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user DECLARE_PER_CPU(u64, mt_cycles[8]); -void gs_load_bc_cb(struct pt_regs *regs); - unsigned long stack_alloc(void); void stack_free(unsigned long stack); diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index 0da378e2eb25..d864c9a325e2 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -175,3 +175,91 @@ void __kernel_fpu_end(struct kernel_fpu *state, u32 flags) : "1", "cc"); } EXPORT_SYMBOL(__kernel_fpu_end); + +void __load_fpu_regs(void) +{ + struct fpu *state = ¤t->thread.fpu; + unsigned long *regs = current->thread.fpu.regs; + + asm volatile("lfpc %0" : : "Q" (state->fpc)); + if (likely(MACHINE_HAS_VX)) { + asm volatile("lgr 1,%0\n" + "VLM 0,15,0,1\n" + "VLM 16,31,256,1\n" + : + : "d" (regs) + : "1", "cc", "memory"); + } else { + asm volatile("ld 0,%0" : : "Q" (regs[0])); + asm volatile("ld 1,%0" : : "Q" (regs[1])); + asm volatile("ld 2,%0" : : "Q" (regs[2])); + asm volatile("ld 3,%0" : : "Q" (regs[3])); + asm volatile("ld 4,%0" : : "Q" (regs[4])); + asm volatile("ld 5,%0" : : "Q" (regs[5])); + asm volatile("ld 6,%0" : : "Q" (regs[6])); + asm volatile("ld 7,%0" : : "Q" (regs[7])); + asm volatile("ld 8,%0" : : "Q" (regs[8])); + asm volatile("ld 9,%0" : : "Q" (regs[9])); + asm volatile("ld 10,%0" : : "Q" (regs[10])); + asm volatile("ld 11,%0" : : "Q" (regs[11])); + asm volatile("ld 12,%0" : : "Q" (regs[12])); + asm volatile("ld 13,%0" : : "Q" (regs[13])); + asm volatile("ld 14,%0" : : "Q" (regs[14])); + asm volatile("ld 15,%0" : : "Q" (regs[15])); + } + clear_cpu_flag(CIF_FPU); +} +EXPORT_SYMBOL(__load_fpu_regs); + +void load_fpu_regs(void) +{ + raw_local_irq_disable(); + __load_fpu_regs(); + raw_local_irq_enable(); +} +EXPORT_SYMBOL(load_fpu_regs); + +void save_fpu_regs(void) +{ + unsigned long flags, *regs; + struct fpu *state; + + local_irq_save(flags); + + if (test_cpu_flag(CIF_FPU)) + goto out; + + state = ¤t->thread.fpu; + regs = current->thread.fpu.regs; + + asm volatile("stfpc %0" : "=Q" (state->fpc)); + if (likely(MACHINE_HAS_VX)) { + asm volatile("lgr 1,%0\n" + "VSTM 0,15,0,1\n" + "VSTM 16,31,256,1\n" + : + : "d" (regs) + : "1", "cc", "memory"); + } else { + asm volatile("std 0,%0" : "=Q" (regs[0])); + asm volatile("std 1,%0" : "=Q" (regs[1])); + asm volatile("std 2,%0" : "=Q" (regs[2])); + asm volatile("std 3,%0" : "=Q" (regs[3])); + asm volatile("std 4,%0" : "=Q" (regs[4])); + asm volatile("std 5,%0" : "=Q" (regs[5])); + asm volatile("std 6,%0" : "=Q" (regs[6])); + asm volatile("std 7,%0" : "=Q" (regs[7])); + asm volatile("std 8,%0" : "=Q" (regs[8])); + asm volatile("std 9,%0" : "=Q" (regs[9])); + asm volatile("std 10,%0" : "=Q" (regs[10])); + asm volatile("std 11,%0" : "=Q" (regs[11])); + asm volatile("std 12,%0" : "=Q" (regs[12])); + asm volatile("std 13,%0" : "=Q" (regs[13])); + asm volatile("std 14,%0" : "=Q" (regs[14])); + asm volatile("std 15,%0" : "=Q" (regs[15])); + } + set_cpu_flag(CIF_FPU); +out: + local_irq_restore(flags); +} +EXPORT_SYMBOL(save_fpu_regs); diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index a5d4d80d6ede..812073ea073e 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -14,12 +14,36 @@ #include #include #include +#include #include #include #include "entry.h" static DEFINE_PER_CPU(struct s390_idle_data, s390_idle); +void account_idle_time_irq(void) +{ + struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); + u64 cycles_new[8]; + int i; + + clear_cpu_flag(CIF_ENABLED_WAIT); + if (smp_cpu_mtid) { + stcctm(MT_DIAG, smp_cpu_mtid, cycles_new); + for (i = 0; i < smp_cpu_mtid; i++) + this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]); + } + + idle->clock_idle_exit = S390_lowcore.int_clock; + idle->timer_idle_exit = S390_lowcore.sys_enter_timer; + + S390_lowcore.steal_timer += idle->clock_idle_enter - S390_lowcore.last_update_clock; + S390_lowcore.last_update_clock = idle->clock_idle_exit; + + S390_lowcore.system_timer += S390_lowcore.last_update_timer - idle->timer_idle_enter; + S390_lowcore.last_update_timer = idle->timer_idle_exit; +} + void arch_cpu_idle(void) { struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index f8a8b9428ae2..c6d40bcf4a68 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -95,19 +96,97 @@ static const struct irq_class irqclass_sub_desc[] = { {.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"}, }; -void do_IRQ(struct pt_regs *regs, int irq) +static void do_IRQ(struct pt_regs *regs, int irq) { - struct pt_regs *old_regs; - - old_regs = set_irq_regs(regs); - irq_enter(); if (tod_after_eq(S390_lowcore.int_clock, S390_lowcore.clock_comparator)) /* Serve timer interrupts first. */ clock_comparator_work(); generic_handle_irq(irq); +} + +static int on_async_stack(void) +{ + unsigned long frame = current_frame_address(); + + return !!!((S390_lowcore.async_stack - frame) >> (PAGE_SHIFT + THREAD_SIZE_ORDER)); +} + +static void do_irq_async(struct pt_regs *regs, int irq) +{ + if (on_async_stack()) + do_IRQ(regs, irq); + else + CALL_ON_STACK(do_IRQ, S390_lowcore.async_stack, 2, regs, irq); +} + +static int irq_pending(struct pt_regs *regs) +{ + int cc; + + asm volatile("tpi 0\n" + "ipm %0" : "=d" (cc) : : "cc"); + return cc >> 28; +} + +void noinstr do_io_irq(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + struct pt_regs *old_regs = set_irq_regs(regs); + int from_idle; + + irq_enter(); + + if (user_mode(regs)) + update_timer_sys(); + + from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit; + if (from_idle) + account_idle_time_irq(); + + do { + memcpy(®s->int_code, &S390_lowcore.subchannel_id, 12); + if (S390_lowcore.io_int_word & BIT(31)) + do_irq_async(regs, THIN_INTERRUPT); + else + do_irq_async(regs, IO_INTERRUPT); + } while (MACHINE_IS_LPAR && irq_pending(regs)); + + irq_exit(); + set_irq_regs(old_regs); + irqentry_exit(regs, state); + + if (from_idle) + regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT); +} + +void noinstr do_ext_irq(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + struct pt_regs *old_regs = set_irq_regs(regs); + int from_idle; + + irq_enter(); + + if (user_mode(regs)) + update_timer_sys(); + + memcpy(®s->int_code, &S390_lowcore.ext_cpu_addr, 4); + regs->int_parm = S390_lowcore.ext_params; + regs->int_parm_long = *(unsigned long *)S390_lowcore.ext_params2; + + from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit; + if (from_idle) + account_idle_time_irq(); + + do_irq_async(regs, EXT_INTERRUPT); + irq_exit(); set_irq_regs(old_regs); + irqentry_exit(regs, state); + + if (from_idle) + regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT); } static void show_msi_interrupt(struct seq_file *p, int irq) diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 86c8d5370e7f..11f8c296f60d 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -131,12 +131,11 @@ static notrace void s390_handle_damage(void) NOKPROBE_SYMBOL(s390_handle_damage); /* - * Main machine check handler function. Will be called with interrupts enabled - * or disabled and machine checks enabled or disabled. + * Main machine check handler function. Will be called with interrupts disabled + * and machine checks enabled. */ -void s390_handle_mcck(void) +void __s390_handle_mcck(void) { - unsigned long flags; struct mcck_struct mcck; /* @@ -144,12 +143,10 @@ void s390_handle_mcck(void) * machine checks. Afterwards delete the old state and enable machine * checks again. */ - local_irq_save(flags); local_mcck_disable(); mcck = *this_cpu_ptr(&cpu_mcck); memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck)); local_mcck_enable(); - local_irq_restore(flags); if (mcck.channel_report) crw_handle_channel_report(); @@ -181,8 +178,13 @@ void s390_handle_mcck(void) do_exit(SIGSEGV); } } -EXPORT_SYMBOL_GPL(s390_handle_mcck); +void noinstr s390_handle_mcck(void) +{ + trace_hardirqs_off(); + __s390_handle_mcck(); + trace_hardirqs_on(); +} /* * returns 0 if all required registers are available * returns 1 otherwise @@ -344,6 +346,9 @@ int notrace s390_do_machine_check(struct pt_regs *regs) int mcck_pending = 0; nmi_enter(); + + if (user_mode(regs)) + update_timer_mcck(); inc_irq_stat(NMI_NMI); mci.val = S390_lowcore.mcck_interruption_code; mcck = this_cpu_ptr(&cpu_mcck); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index bc3ca54edfb4..367bd000f6d1 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -43,9 +44,22 @@ #include #include "entry.h" -asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); +void ret_from_fork(void) asm("ret_from_fork"); -extern void kernel_thread_starter(void); +void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs) +{ + void (*func)(void *arg); + + schedule_tail(prev); + + if (!user_mode(regs)) { + /* Kernel thread */ + func = (void *)regs->gprs[9]; + func((void *)regs->gprs[10]); + } + clear_pt_regs_flag(regs, PIF_SYSCALL); + syscall_exit_to_user_mode(regs); +} void flush_thread(void) { @@ -108,10 +122,12 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, p->thread.last_break = 1; frame->sf.back_chain = 0; + frame->sf.gprs[5] = (unsigned long)frame + sizeof(struct stack_frame); + frame->sf.gprs[6] = (unsigned long)p; /* new return point is ret_from_fork */ - frame->sf.gprs[8] = (unsigned long) ret_from_fork; + frame->sf.gprs[8] = (unsigned long)ret_from_fork; /* fake return stack for resume(), don't go back to schedule */ - frame->sf.gprs[9] = (unsigned long) frame; + frame->sf.gprs[9] = (unsigned long)frame; /* Store access registers to kernel stack of new process. */ if (unlikely(p->flags & PF_KTHREAD)) { @@ -120,10 +136,10 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; frame->childregs.psw.addr = - (unsigned long) kernel_thread_starter; + (unsigned long)__ret_from_fork; frame->childregs.gprs[9] = new_stackp; /* function */ frame->childregs.gprs[10] = arg; - frame->childregs.gprs[11] = (unsigned long) do_exit; + frame->childregs.gprs[11] = (unsigned long)do_exit; frame->childregs.orig_gpr2 = -1; return 0; @@ -153,7 +169,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, return 0; } -asmlinkage void execve_tail(void) +void execve_tail(void) { current->thread.fpu.fpc = 0; asm volatile("sfpc %0" : : "d" (0)); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index a76dd27fb2e8..18b3416fd663 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -7,6 +7,7 @@ * Martin Schwidefsky (schwidefsky@de.ibm.com) */ +#include "asm/ptrace.h" #include #include #include @@ -37,9 +38,6 @@ #include "compat_ptrace.h" #endif -#define CREATE_TRACE_POINTS -#include - void update_cr_regs(struct task_struct *task) { struct pt_regs *regs = task_pt_regs(task); @@ -140,7 +138,7 @@ void ptrace_disable(struct task_struct *task) memset(&task->thread.per_user, 0, sizeof(task->thread.per_user)); memset(&task->thread.per_event, 0, sizeof(task->thread.per_event)); clear_tsk_thread_flag(task, TIF_SINGLE_STEP); - clear_pt_regs_flag(task_pt_regs(task), PIF_PER_TRAP); + clear_tsk_thread_flag(task, TIF_PER_TRAP); task->thread.per_flags = 0; } @@ -322,25 +320,6 @@ static inline void __poke_user_per(struct task_struct *child, child->thread.per_user.end = data; } -static void fixup_int_code(struct task_struct *child, addr_t data) -{ - struct pt_regs *regs = task_pt_regs(child); - int ilc = regs->int_code >> 16; - u16 insn; - - if (ilc > 6) - return; - - if (ptrace_access_vm(child, regs->psw.addr - (regs->int_code >> 16), - &insn, sizeof(insn), FOLL_FORCE) != sizeof(insn)) - return; - - /* double check that tracee stopped on svc instruction */ - if ((insn >> 8) != 0xa) - return; - - regs->int_code = 0x20000 | (data & 0xffff); -} /* * Write a word to the user area of a process at location addr. This * operation does have an additional problem compared to peek_user. @@ -374,10 +353,12 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) } if (test_pt_regs_flag(regs, PIF_SYSCALL) && - addr == offsetof(struct user, regs.gprs[2])) - fixup_int_code(child, data); - *(addr_t *)((addr_t) ®s->psw + addr) = data; + addr == offsetof(struct user, regs.gprs[2])) { + struct pt_regs *regs = task_pt_regs(child); + regs->int_code = 0x20000 | (data & 0xffff); + } + *(addr_t *)((addr_t) ®s->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { /* * access registers are stored in the thread structure @@ -742,10 +723,12 @@ static int __poke_user_compat(struct task_struct *child, regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | (__u64)(tmp & PSW32_ADDR_AMODE); } else { - if (test_pt_regs_flag(regs, PIF_SYSCALL) && - addr == offsetof(struct compat_user, regs.gprs[2])) - fixup_int_code(child, data); + addr == offsetof(struct compat_user, regs.gprs[2])) { + struct pt_regs *regs = task_pt_regs(child); + + regs->int_code = 0x20000 | (data & 0xffff); + } /* gpr 0-15 */ *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } @@ -862,82 +845,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, } #endif -asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) -{ - unsigned long mask = -1UL; - long ret = -1; - - if (is_compat_task()) - mask = 0xffffffff; - - /* - * The sysc_tracesys code in entry.S stored the system - * call number to gprs[2]. - */ - if (test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(regs)) { - /* - * Tracing decided this syscall should not happen. Skip - * the system call and the system call restart handling. - */ - goto skip; - } - -#ifdef CONFIG_SECCOMP - /* Do the secure computing check after ptrace. */ - if (unlikely(test_thread_flag(TIF_SECCOMP))) { - struct seccomp_data sd; - - if (is_compat_task()) { - sd.instruction_pointer = regs->psw.addr & 0x7fffffff; - sd.arch = AUDIT_ARCH_S390; - } else { - sd.instruction_pointer = regs->psw.addr; - sd.arch = AUDIT_ARCH_S390X; - } - - sd.nr = regs->int_code & 0xffff; - sd.args[0] = regs->orig_gpr2 & mask; - sd.args[1] = regs->gprs[3] & mask; - sd.args[2] = regs->gprs[4] & mask; - sd.args[3] = regs->gprs[5] & mask; - sd.args[4] = regs->gprs[6] & mask; - sd.args[5] = regs->gprs[7] & mask; - - if (__secure_computing(&sd) == -1) - goto skip; - } -#endif /* CONFIG_SECCOMP */ - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_enter(regs, regs->int_code & 0xffff); - - - audit_syscall_entry(regs->int_code & 0xffff, regs->orig_gpr2 & mask, - regs->gprs[3] &mask, regs->gprs[4] &mask, - regs->gprs[5] &mask); - - if ((signed long)regs->gprs[2] >= NR_syscalls) { - regs->gprs[2] = -ENOSYS; - ret = -ENOSYS; - } - return regs->gprs[2]; -skip: - clear_pt_regs_flag(regs, PIF_SYSCALL); - return ret; -} - -asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) -{ - audit_syscall_exit(regs); - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_exit(regs, regs->gprs[2]); - - if (test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, 0); -} - /* * user_regset definitions. */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 1fbed91c73bc..c7feda84edbb 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -411,8 +411,7 @@ static void __init setup_lowcore_dat_off(void) memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list, sizeof(lc->alt_stfle_fac_list)); nmi_alloc_boot_cpu(lc); - lc->sync_enter_timer = S390_lowcore.sync_enter_timer; - lc->async_enter_timer = S390_lowcore.async_enter_timer; + lc->sys_enter_timer = S390_lowcore.sys_enter_timer; lc->exit_timer = S390_lowcore.exit_timer; lc->user_timer = S390_lowcore.user_timer; lc->system_timer = S390_lowcore.system_timer; diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index b27b6c1f058d..fce1b2a28a40 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -170,6 +170,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) fpregs_load(&user_sregs.fpregs, ¤t->thread.fpu); clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ + clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); return 0; } @@ -459,7 +460,8 @@ static void handle_signal(struct ksignal *ksig, sigset_t *oldset, * the kernel can handle, and then we build all the user-level signal handling * stack-frames in one go after that. */ -void do_signal(struct pt_regs *regs) + +void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) { struct ksignal ksig; sigset_t *oldset = sigmask_to_save(); @@ -472,7 +474,7 @@ void do_signal(struct pt_regs *regs) current->thread.system_call = test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0; - if (test_thread_flag(TIF_SIGPENDING) && get_signal(&ksig)) { + if (has_signal && get_signal(&ksig)) { /* Whee! Actually deliver the signal. */ if (current->thread.system_call) { regs->int_code = current->thread.system_call; @@ -498,6 +500,7 @@ void do_signal(struct pt_regs *regs) } /* No longer in a system call */ clear_pt_regs_flag(regs, PIF_SYSCALL); + clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); rseq_signal_deliver(&ksig, regs); if (is_compat_task()) handle_signal32(&ksig, oldset, regs); @@ -508,6 +511,7 @@ void do_signal(struct pt_regs *regs) /* No handlers present - check for system call restart */ clear_pt_regs_flag(regs, PIF_SYSCALL); + clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); if (current->thread.system_call) { regs->int_code = current->thread.system_call; switch (regs->gprs[2]) { @@ -520,9 +524,9 @@ void do_signal(struct pt_regs *regs) case -ERESTARTNOINTR: /* Restart system call with magic TIF bit. */ regs->gprs[2] = regs->orig_gpr2; - set_pt_regs_flag(regs, PIF_SYSCALL); + set_pt_regs_flag(regs, PIF_SYSCALL_RESTART); if (test_thread_flag(TIF_SINGLE_STEP)) - clear_pt_regs_flag(regs, PIF_PER_TRAP); + clear_thread_flag(TIF_PER_TRAP); break; } } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 27c763014114..c5abbb94ac6e 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -499,7 +499,7 @@ static void smp_handle_ext_call(void) if (test_bit(ec_call_function_single, &bits)) generic_smp_call_function_single_interrupt(); if (test_bit(ec_mcck_pending, &bits)) - s390_handle_mcck(); + __s390_handle_mcck(); } static void do_ext_call_interrupt(struct ext_code ext_code, diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c deleted file mode 100644 index 202fa73ac167..000000000000 --- a/arch/s390/kernel/sys_s390.c +++ /dev/null @@ -1,102 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * S390 version - * Copyright IBM Corp. 1999, 2000 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Thomas Spatzier (tspat@de.ibm.com) - * - * Derived from "arch/i386/kernel/sys_i386.c" - * - * This file contains various random system calls that - * have a non-standard calling sequence on the Linux/s390 - * platform. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "entry.h" - -/* - * Perform the mmap() system call. Linux for S/390 isn't able to handle more - * than 5 system call parameters, so this system call uses a memory block - * for parameter passing. - */ - -struct s390_mmap_arg_struct { - unsigned long addr; - unsigned long len; - unsigned long prot; - unsigned long flags; - unsigned long fd; - unsigned long offset; -}; - -SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg) -{ - struct s390_mmap_arg_struct a; - int error = -EFAULT; - - if (copy_from_user(&a, arg, sizeof(a))) - goto out; - error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); -out: - return error; -} - -#ifdef CONFIG_SYSVIPC -/* - * sys_ipc() is the de-multiplexer for the SysV IPC calls. - */ -SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, - unsigned long, third, void __user *, ptr) -{ - if (call >> 16) - return -EINVAL; - /* The s390 sys_ipc variant has only five parameters instead of six - * like the generic variant. The only difference is the handling of - * the SEMTIMEDOP subcall where on s390 the third parameter is used - * as a pointer to a struct timespec where the generic variant uses - * the fifth parameter. - * Therefore we can call the generic variant by simply passing the - * third parameter also as fifth parameter. - */ - return ksys_ipc(call, first, second, third, ptr, third); -} -#endif /* CONFIG_SYSVIPC */ - -SYSCALL_DEFINE1(s390_personality, unsigned int, personality) -{ - unsigned int ret = current->personality; - - if (personality(current->personality) == PER_LINUX32 && - personality(personality) == PER_LINUX) - personality |= PER_LINUX32; - - if (personality != 0xffffffff) - set_personality(personality); - - if (personality(ret) == PER_LINUX32) - ret &= ~PER_LINUX32; - - return ret; -} - -SYSCALL_DEFINE0(ni_syscall) -{ - return -ENOSYS; -} diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c new file mode 100644 index 000000000000..25c0fb19b0a5 --- /dev/null +++ b/arch/s390/kernel/syscall.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * S390 version + * Copyright IBM Corp. 1999, 2000 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Thomas Spatzier (tspat@de.ibm.com) + * + * Derived from "arch/i386/kernel/sys_i386.c" + * + * This file contains various random system calls that + * have a non-standard calling sequence on the Linux/s390 + * platform. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "entry.h" + +/* + * Perform the mmap() system call. Linux for S/390 isn't able to handle more + * than 5 system call parameters, so this system call uses a memory block + * for parameter passing. + */ + +struct s390_mmap_arg_struct { + unsigned long addr; + unsigned long len; + unsigned long prot; + unsigned long flags; + unsigned long fd; + unsigned long offset; +}; + +SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg) +{ + struct s390_mmap_arg_struct a; + int error = -EFAULT; + + if (copy_from_user(&a, arg, sizeof(a))) + goto out; + error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); +out: + return error; +} + +#ifdef CONFIG_SYSVIPC +/* + * sys_ipc() is the de-multiplexer for the SysV IPC calls. + */ +SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, + unsigned long, third, void __user *, ptr) +{ + if (call >> 16) + return -EINVAL; + /* The s390 sys_ipc variant has only five parameters instead of six + * like the generic variant. The only difference is the handling of + * the SEMTIMEDOP subcall where on s390 the third parameter is used + * as a pointer to a struct timespec where the generic variant uses + * the fifth parameter. + * Therefore we can call the generic variant by simply passing the + * third parameter also as fifth parameter. + */ + return ksys_ipc(call, first, second, third, ptr, third); +} +#endif /* CONFIG_SYSVIPC */ + +SYSCALL_DEFINE1(s390_personality, unsigned int, personality) +{ + unsigned int ret = current->personality; + + if (personality(current->personality) == PER_LINUX32 && + personality(personality) == PER_LINUX) + personality |= PER_LINUX32; + + if (personality != 0xffffffff) + set_personality(personality); + + if (personality(ret) == PER_LINUX32) + ret &= ~PER_LINUX32; + + return ret; +} + +SYSCALL_DEFINE0(ni_syscall) +{ + return -ENOSYS; +} + +void do_syscall(struct pt_regs *regs) +{ + unsigned long nr; + + nr = regs->int_code & 0xffff; + if (!nr) { + nr = regs->gprs[1] & 0xffff; + regs->int_code &= ~0xffffUL; + regs->int_code |= nr; + } + + regs->gprs[2] = nr; + + nr = syscall_enter_from_user_mode_work(regs, nr); + + /* + * In the s390 ptrace ABI, both the syscall number and the return value + * use gpr2. However, userspace puts the syscall number either in the + * svc instruction itself, or uses gpr1. To make at least skipping syscalls + * work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here + * and if set, the syscall will be skipped. + */ + if (!test_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)) { + regs->gprs[2] = -ENOSYS; + if (likely(nr < NR_syscalls)) { + regs->gprs[2] = current->thread.sys_call_table[nr]( + regs->orig_gpr2, regs->gprs[3], + regs->gprs[4], regs->gprs[5], + regs->gprs[6], regs->gprs[7]); + } + } else { + clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET); + } + syscall_exit_to_user_mode_work(regs); +} + +void noinstr __do_syscall(struct pt_regs *regs, int per_trap) +{ + enter_from_user_mode(regs); + + memcpy(®s->gprs[8], S390_lowcore.save_area_sync, 8 * sizeof(unsigned long)); + memcpy(®s->int_code, &S390_lowcore.svc_ilc, sizeof(regs->int_code)); + regs->psw = S390_lowcore.svc_old_psw; + + update_timer_sys(); + + local_irq_enable(); + regs->orig_gpr2 = regs->gprs[2]; + + if (per_trap) + set_thread_flag(TIF_PER_TRAP); + + for (;;) { + regs->flags = 0; + set_pt_regs_flag(regs, PIF_SYSCALL); + do_syscall(regs); + if (!test_pt_regs_flag(regs, PIF_SYSCALL_RESTART)) + break; + local_irq_enable(); + } + exit_to_user_mode(); +} diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 8d1e8a1a97df..db7dd59b570c 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -13,6 +13,8 @@ * 'Traps.c' handles hardware traps and faults after we have saved some * state in 'asm.s'. */ +#include "asm/irqflags.h" +#include "asm/ptrace.h" #include #include #include @@ -23,7 +25,9 @@ #include #include #include +#include #include +#include #include "entry.h" static inline void __user *get_trap_ip(struct pt_regs *regs) @@ -288,3 +292,64 @@ void __init trap_init(void) local_mcck_enable(); test_monitor_call(); } + +void noinstr __do_pgm_check(struct pt_regs *regs) +{ + unsigned long last_break = S390_lowcore.breaking_event_addr; + unsigned int trapnr, syscall_redirect = 0; + irqentry_state_t state; + + regs->int_code = *(u32 *)&S390_lowcore.pgm_ilc; + regs->int_parm_long = S390_lowcore.trans_exc_code; + + state = irqentry_enter(regs); + + if (user_mode(regs)) { + update_timer_sys(); + if (last_break < 4096) + last_break = 1; + current->thread.last_break = last_break; + regs->args[0] = last_break; + } + + if (S390_lowcore.pgm_code & 0x0200) { + /* transaction abort */ + memcpy(¤t->thread.trap_tdb, &S390_lowcore.pgm_tdb, 256); + } + + if (S390_lowcore.pgm_code & PGM_INT_CODE_PER) { + if (user_mode(regs)) { + struct per_event *ev = ¤t->thread.per_event; + + set_thread_flag(TIF_PER_TRAP); + ev->address = S390_lowcore.per_address; + ev->cause = *(u16 *)&S390_lowcore.per_code; + ev->paid = S390_lowcore.per_access_id; + } else { + /* PER event in kernel is kprobes */ + __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER); + do_per_trap(regs); + goto out; + } + } + + if (!irqs_disabled_flags(regs->psw.mask)) + trace_hardirqs_on(); + __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER); + + trapnr = regs->int_code & PGM_INT_CODE_MASK; + if (trapnr) + pgm_check_table[trapnr](regs); + syscall_redirect = user_mode(regs) && test_pt_regs_flag(regs, PIF_SYSCALL); +out: + local_irq_disable(); + irqentry_exit(regs, state); + + if (syscall_redirect) { + enter_from_user_mode(regs); + local_irq_enable(); + regs->orig_gpr2 = regs->gprs[2]; + do_syscall(regs); + exit_to_user_mode(); + } +} diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c index 5007fac01bb5..bbf8622bbf5d 100644 --- a/arch/s390/kernel/uprobes.c +++ b/arch/s390/kernel/uprobes.c @@ -32,7 +32,7 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) return -EINVAL; if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) return -EINVAL; - clear_pt_regs_flag(regs, PIF_PER_TRAP); + clear_thread_flag(TIF_PER_TRAP); auprobe->saved_per = psw_bits(regs->psw).per; auprobe->saved_int_code = regs->int_code; regs->int_code = UPROBE_TRAP_NR; @@ -103,7 +103,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) /* fix per address */ current->thread.per_event.address = utask->vaddr; /* trigger per event */ - set_pt_regs_flag(regs, PIF_PER_TRAP); + set_thread_flag(TIF_PER_TRAP); } return 0; } @@ -259,7 +259,7 @@ static void sim_stor_event(struct pt_regs *regs, void *addr, int len) return; current->thread.per_event.address = regs->psw.addr; current->thread.per_event.cause = PER_EVENT_STORE >> 16; - set_pt_regs_flag(regs, PIF_PER_TRAP); + set_thread_flag(TIF_PER_TRAP); } /* diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index dbafd057ca6a..759bbc012b6c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "kvm-s390.h" #include "gaccess.h" @@ -4147,6 +4148,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) vcpu->run->s.regs.gprs, sizeof(sie_page->pv_grregs)); } + if (test_cpu_flag(CIF_FPU)) + load_fpu_regs(); exit_reason = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); if (kvm_s390_pv_cpu_is_protected(vcpu)) { diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index c5d0a58b2c29..bd803e091918 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "kvm-s390.h" #include "gaccess.h" @@ -1028,6 +1029,8 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) */ vcpu->arch.sie_block->prog0c |= PROG_IN_SIE; barrier(); + if (test_cpu_flag(CIF_FPU)) + load_fpu_regs(); if (!kvm_s390_vcpu_sie_inhibited(vcpu)) rc = sie64a(scb_s, vcpu->run->s.regs.gprs); barrier(); diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index e8f642446fed..2fece1fd210a 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -16,8 +16,8 @@ #include #include -#ifdef CONFIG_DEBUG_USER_ASCE -void debug_user_asce(void) +#ifdef CONFIG_DEBUG_ENTRY +void debug_user_asce(int exit) { unsigned long cr1, cr7; @@ -25,12 +25,14 @@ void debug_user_asce(void) __ctl_store(cr7, 7, 7); if (cr1 == S390_lowcore.kernel_asce && cr7 == S390_lowcore.user_asce) return; - panic("incorrect ASCE on kernel exit\n" + panic("incorrect ASCE on kernel %s\n" "cr1: %016lx cr7: %016lx\n" "kernel: %016llx user: %016llx\n", - cr1, cr7, S390_lowcore.kernel_asce, S390_lowcore.user_asce); + exit ? "exit" : "entry", cr1, cr7, + S390_lowcore.kernel_asce, S390_lowcore.user_asce); + } -#endif /*CONFIG_DEBUG_USER_ASCE */ +#endif /*CONFIG_DEBUG_ENTRY */ #ifndef CONFIG_HAVE_MARCH_Z10_FEATURES static DEFINE_STATIC_KEY_FALSE(have_mvcos); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index b8210103de14..e30c7c781172 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -385,7 +385,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) * The instruction that caused the program check has * been nullified. Don't signal single step via SIGTRAP. */ - clear_pt_regs_flag(regs, PIF_PER_TRAP); + clear_thread_flag(TIF_PER_TRAP); if (kprobe_page_fault(regs, 14)) return 0; -- cgit v1.2.3 From 39589ada1cab9548325767e6aea43a31132467de Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 18 Jan 2021 09:00:58 +0100 Subject: s390: remove asmlinkage On s390 asmlinkage is a nop, so remove it. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/preempt.h | 4 ++-- arch/s390/include/asm/syscall_wrapper.h | 30 +++++++++++++++--------------- 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index 6ede29907fbf..b49e0492842c 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -131,9 +131,9 @@ static inline bool should_resched(int preempt_offset) #endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ #ifdef CONFIG_PREEMPTION -extern asmlinkage void preempt_schedule(void); +extern void preempt_schedule(void); #define __preempt_schedule() preempt_schedule() -extern asmlinkage void preempt_schedule_notrace(void); +extern void preempt_schedule_notrace(void); #define __preempt_schedule_notrace() preempt_schedule_notrace() #endif /* CONFIG_PREEMPTION */ diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h index 1320f4213d80..5364bfc866e0 100644 --- a/arch/s390/include/asm/syscall_wrapper.h +++ b/arch/s390/include/asm/syscall_wrapper.h @@ -30,9 +30,9 @@ }) #define __S390_SYS_STUBx(x, name, ...) \ - asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));\ + long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \ - asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ + long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ long ret = __s390x_sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__));\ __MAP(x,__SC_TEST,__VA_ARGS__); \ @@ -45,17 +45,17 @@ */ #define COMPAT_SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __s390_compat_sys_##sname(void); \ + long __s390_compat_sys_##sname(void); \ ALLOW_ERROR_INJECTION(__s390_compat_sys_##sname, ERRNO); \ - asmlinkage long __s390_compat_sys_##sname(void) + long __s390_compat_sys_##sname(void) #define SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __s390x_sys_##sname(void); \ + long __s390x_sys_##sname(void); \ ALLOW_ERROR_INJECTION(__s390x_sys_##sname, ERRNO); \ - asmlinkage long __s390_sys_##sname(void) \ + long __s390_sys_##sname(void) \ __attribute__((alias(__stringify(__s390x_sys_##sname)))); \ - asmlinkage long __s390x_sys_##sname(void) + long __s390x_sys_##sname(void) #define COND_SYSCALL(name) \ cond_syscall(__s390x_sys_##name); \ @@ -69,13 +69,13 @@ __diag_push(); \ __diag_ignore(GCC, 8, "-Wattribute-alias", \ "Type aliasing is used to sanitize syscall arguments");\ - asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ - asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ + long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(__se_compat_sys##name)))); \ ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ - asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ - asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ long ret = __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\ __MAP(x,__SC_TEST,__VA_ARGS__); \ @@ -101,9 +101,9 @@ #define SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __s390x_sys_##sname(void); \ + long __s390x_sys_##sname(void); \ ALLOW_ERROR_INJECTION(__s390x_sys_##sname, ERRNO); \ - asmlinkage long __s390x_sys_##sname(void) + long __s390x_sys_##sname(void) #define COND_SYSCALL(name) \ cond_syscall(__s390x_sys_##name) @@ -117,13 +117,13 @@ __diag_push(); \ __diag_ignore(GCC, 8, "-Wattribute-alias", \ "Type aliasing is used to sanitize syscall arguments");\ - asmlinkage long __s390x_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ + long __s390x_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(__se_sys##name)))); \ ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO); \ long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ __S390_SYS_STUBx(x, name, __VA_ARGS__) \ - asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ __MAP(x,__SC_TEST,__VA_ARGS__); \ -- cgit v1.2.3 From 3a790cc1c9ef1b7b613cf648e6fb756a842caa16 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 18 Jan 2021 09:35:38 +0100 Subject: s390: pass struct pt_regs instead of registers to syscalls Instead of fetching all registers from struct pt_regs and passing them to the syscall wrappers, let the system call wrappers only fetch the values really required. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/processor.h | 4 +- arch/s390/include/asm/syscall_wrapper.h | 114 ++++++++++++++++++++------------ arch/s390/kernel/syscall.c | 8 +-- 3 files changed, 75 insertions(+), 51 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index fa67b66bf144..023a15dc25a3 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -40,9 +40,7 @@ #include #include -typedef long (*sys_call_ptr_t)(unsigned long, unsigned long, - unsigned long, unsigned long, - unsigned long, unsigned long); +typedef long (*sys_call_ptr_t)(struct pt_regs *regs); static inline void set_cpu_flag(int flag) { diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h index 5364bfc866e0..ad2c996e7e93 100644 --- a/arch/s390/include/asm/syscall_wrapper.h +++ b/arch/s390/include/asm/syscall_wrapper.h @@ -7,6 +7,33 @@ #ifndef _ASM_S390_SYSCALL_WRAPPER_H #define _ASM_S390_SYSCALL_WRAPPER_H +#define __SC_TYPE(t, a) t + +#define SYSCALL_PT_ARG6(regs, m, t1, t2, t3, t4, t5, t6)\ + SYSCALL_PT_ARG5(regs, m, t1, t2, t3, t4, t5), \ + m(t6, (regs->gprs[7])) + +#define SYSCALL_PT_ARG5(regs, m, t1, t2, t3, t4, t5) \ + SYSCALL_PT_ARG4(regs, m, t1, t2, t3, t4), \ + m(t5, (regs->gprs[6])) + +#define SYSCALL_PT_ARG4(regs, m, t1, t2, t3, t4) \ + SYSCALL_PT_ARG3(regs, m, t1, t2, t3), \ + m(t4, (regs->gprs[5])) + +#define SYSCALL_PT_ARG3(regs, m, t1, t2, t3) \ + SYSCALL_PT_ARG2(regs, m, t1, t2), \ + m(t3, (regs->gprs[4])) + +#define SYSCALL_PT_ARG2(regs, m, t1, t2) \ + SYSCALL_PT_ARG1(regs, m, t1), \ + m(t2, (regs->gprs[3])) + +#define SYSCALL_PT_ARG1(regs, m, t1) \ + m(t1, (regs->orig_gpr2)) + +#define SYSCALL_PT_ARGS(x, ...) SYSCALL_PT_ARG##x(__VA_ARGS__) + #ifdef CONFIG_COMPAT #define __SC_COMPAT_TYPE(t, a) \ __typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a @@ -29,14 +56,15 @@ (t)__ReS; \ }) -#define __S390_SYS_STUBx(x, name, ...) \ - long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ - ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \ - long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ - { \ - long ret = __s390x_sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__));\ - __MAP(x,__SC_TEST,__VA_ARGS__); \ - return ret; \ +#define __S390_SYS_STUBx(x, name, ...) \ + long __s390_sys##name(struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \ + long __s390_sys##name(struct pt_regs *regs) \ + { \ + long ret = __do_sys##name(SYSCALL_PT_ARGS(x, regs, \ + __SC_COMPAT_CAST, __MAP(x, __SC_TYPE, __VA_ARGS__))); \ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + return ret; \ } /* @@ -65,23 +93,24 @@ SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers); \ SYSCALL_ALIAS(__s390_sys_##name, sys_ni_posix_timers) -#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ - __diag_push(); \ - __diag_ignore(GCC, 8, "-Wattribute-alias", \ - "Type aliasing is used to sanitize syscall arguments");\ - long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ - long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ - __attribute__((alias(__stringify(__se_compat_sys##name)))); \ - ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \ - static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ - long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ - long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ - { \ - long ret = __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\ - __MAP(x,__SC_TEST,__VA_ARGS__); \ - return ret; \ - } \ - __diag_pop(); \ +#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + __diag_push(); \ + __diag_ignore(GCC, 8, "-Wattribute-alias", \ + "Type aliasing is used to sanitize syscall arguments"); \ + long __s390_compat_sys##name(struct pt_regs *regs); \ + long __s390_compat_sys##name(struct pt_regs *regs) \ + __attribute__((alias(__stringify(__se_compat_sys##name)))); \ + ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \ + static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + long __se_compat_sys##name(struct pt_regs *regs); \ + long __se_compat_sys##name(struct pt_regs *regs) \ + { \ + long ret = __do_compat_sys##name(SYSCALL_PT_ARGS(x, regs, __SC_DELOUSE, \ + __MAP(x, __SC_TYPE, __VA_ARGS__))); \ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + return ret; \ + } \ + __diag_pop(); \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) /* @@ -113,23 +142,24 @@ #endif /* CONFIG_COMPAT */ -#define __SYSCALL_DEFINEx(x, name, ...) \ - __diag_push(); \ - __diag_ignore(GCC, 8, "-Wattribute-alias", \ - "Type aliasing is used to sanitize syscall arguments");\ - long __s390x_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ - __attribute__((alias(__stringify(__se_sys##name)))); \ - ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO); \ - long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ - static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ - __S390_SYS_STUBx(x, name, __VA_ARGS__) \ - long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ - { \ - long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ - __MAP(x,__SC_TEST,__VA_ARGS__); \ - return ret; \ - } \ - __diag_pop(); \ +#define __SYSCALL_DEFINEx(x, name, ...) \ + __diag_push(); \ + __diag_ignore(GCC, 8, "-Wattribute-alias", \ + "Type aliasing is used to sanitize syscall arguments"); \ + long __s390x_sys##name(struct pt_regs *regs) \ + __attribute__((alias(__stringify(__se_sys##name)))); \ + ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO); \ + static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + long __se_sys##name(struct pt_regs *regs); \ + __S390_SYS_STUBx(x, name, __VA_ARGS__) \ + long __se_sys##name(struct pt_regs *regs) \ + { \ + long ret = __do_sys##name(SYSCALL_PT_ARGS(x, regs, \ + __SC_CAST, __MAP(x, __SC_TYPE, __VA_ARGS__))); \ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + return ret; \ + } \ + __diag_pop(); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* _ASM_X86_SYSCALL_WRAPPER_H */ diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index 25c0fb19b0a5..bc8e650e377d 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c @@ -132,12 +132,8 @@ void do_syscall(struct pt_regs *regs) */ if (!test_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)) { regs->gprs[2] = -ENOSYS; - if (likely(nr < NR_syscalls)) { - regs->gprs[2] = current->thread.sys_call_table[nr]( - regs->orig_gpr2, regs->gprs[3], - regs->gprs[4], regs->gprs[5], - regs->gprs[6], regs->gprs[7]); - } + if (likely(nr < NR_syscalls)) + regs->gprs[2] = current->thread.sys_call_table[nr](regs); } else { clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET); } -- cgit v1.2.3 From c1971eae30cfc0c239ffb9bb3152d750854e05f2 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 21 Jan 2021 10:36:13 +0100 Subject: s390: add missing include to arch/s390/kernel/signal.c This fixes the following warning: CHECK linux/arch/s390/kernel/signal.c linux/arch/s390/kernel/signal.c:465:6: warning: symbol 'arch_do_signal_or_restart' was not declared. Should it be static? Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/signal.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390') diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index fce1b2a28a40..90163e6184f5 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From e1bff843cde62a45a287b7f9b4cd5e824e8e49e2 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 19 Jan 2021 08:49:37 +0100 Subject: s390/pci: remove superfluous zdev->zbus check Checking zdev->zbus for NULL in __zpci_event_availability() is superfluous as it can never be NULL at this point. While harmless this check causes smatch warnings because we later access zdev->zbus with only having checked zdev != NULL which is sufficient. The reason zdev->zbus can never be NULL is since with zdev != NULL given we know the zdev came from get_zdev_by_fid() and thus the zpci_list. Now on first glance at zpci_create_device() one may assume that there is a window where the zdev is in the list without a zdev, however this window can't overlap with __zpci_event_availability() as zpci_create_device() either runs on the same kthread as part of availability events, or during the initial CLP List PCI at which point the __zpci_event_availability() is not yet called as zPCI is not yet initialized. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 9a6bae503fe6..5c6407f5df20 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -80,7 +80,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) enum zpci_state state; int ret; - if (zdev && zdev->zbus && zdev->zbus->bus) + if (zdev && zdev->zbus->bus) pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); zpci_err("avail CCDF:\n"); -- cgit v1.2.3 From 1daafea411f36cfa52eb58c2e7f9e2254fd42b28 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Fri, 15 Jan 2021 08:56:19 +0100 Subject: s390/crypto: improve retry logic in case of master key change A master key change on a CCA card may cause an immediately following request to derive an protected key from a secure key to fail with error condition 8/2290. The recommendation from firmware is to retry with 1 second sleep. So now the low level cca functions return -EAGAIN when this error condition is seen and the paes retry function will evaluate the return value. Seeing EAGAIN and running in process context results in trying to sleep for 1 s now. Signed-off-by: Harald Freudenberger Reviewed-by: Ingo Franzki Signed-off-by: Vasily Gorbik --- arch/s390/crypto/paes_s390.c | 28 ++++++++++++++++++++-------- drivers/s390/crypto/zcrypt_ccamisc.c | 15 ++++++++++++--- 2 files changed, 32 insertions(+), 11 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index f3caeb17c85b..a279b7d23a5e 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -128,6 +129,9 @@ static inline int __paes_keyblob2pkey(struct key_blob *kb, /* try three times in case of failure */ for (i = 0; i < 3; i++) { + if (i > 0 && ret == -EAGAIN && in_task()) + if (msleep_interruptible(1000)) + return -EINTR; ret = pkey_keyblob2pkey(kb->key, kb->keylen, pk); if (ret == 0) break; @@ -138,10 +142,12 @@ static inline int __paes_keyblob2pkey(struct key_blob *kb, static inline int __paes_convert_key(struct s390_paes_ctx *ctx) { + int ret; struct pkey_protkey pkey; - if (__paes_keyblob2pkey(&ctx->kb, &pkey)) - return -EINVAL; + ret = __paes_keyblob2pkey(&ctx->kb, &pkey); + if (ret) + return ret; spin_lock_bh(&ctx->pk_lock); memcpy(&ctx->pk, &pkey, sizeof(pkey)); @@ -169,10 +175,12 @@ static void ecb_paes_exit(struct crypto_skcipher *tfm) static inline int __ecb_paes_set_key(struct s390_paes_ctx *ctx) { + int rc; unsigned long fc; - if (__paes_convert_key(ctx)) - return -EINVAL; + rc = __paes_convert_key(ctx); + if (rc) + return rc; /* Pick the correct function code based on the protected key type */ fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PAES_128 : @@ -282,10 +290,12 @@ static void cbc_paes_exit(struct crypto_skcipher *tfm) static inline int __cbc_paes_set_key(struct s390_paes_ctx *ctx) { + int rc; unsigned long fc; - if (__paes_convert_key(ctx)) - return -EINVAL; + rc = __paes_convert_key(ctx); + if (rc) + return rc; /* Pick the correct function code based on the protected key type */ fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMC_PAES_128 : @@ -577,10 +587,12 @@ static void ctr_paes_exit(struct crypto_skcipher *tfm) static inline int __ctr_paes_set_key(struct s390_paes_ctx *ctx) { + int rc; unsigned long fc; - if (__paes_convert_key(ctx)) - return -EINVAL; + rc = __paes_convert_key(ctx); + if (rc) + return rc; /* Pick the correct function code based on the protected key type */ fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMCTR_PAES_128 : diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c index b1046811450f..d68c0ed5e0dd 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.c +++ b/drivers/s390/crypto/zcrypt_ccamisc.c @@ -662,7 +662,10 @@ int cca_sec2protkey(u16 cardnr, u16 domain, __func__, (int) prepcblk->ccp_rtcode, (int) prepcblk->ccp_rscode); - rc = -EIO; + if (prepcblk->ccp_rtcode == 8 && prepcblk->ccp_rscode == 2290) + rc = -EAGAIN; + else + rc = -EIO; goto out; } if (prepcblk->ccp_rscode != 0) { @@ -1275,7 +1278,10 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey, __func__, (int) prepcblk->ccp_rtcode, (int) prepcblk->ccp_rscode); - rc = -EIO; + if (prepcblk->ccp_rtcode == 8 && prepcblk->ccp_rscode == 2290) + rc = -EAGAIN; + else + rc = -EIO; goto out; } if (prepcblk->ccp_rscode != 0) { @@ -1441,7 +1447,10 @@ int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key, __func__, (int) prepcblk->ccp_rtcode, (int) prepcblk->ccp_rscode); - rc = -EIO; + if (prepcblk->ccp_rtcode == 8 && prepcblk->ccp_rscode == 2290) + rc = -EAGAIN; + else + rc = -EIO; goto out; } if (prepcblk->ccp_rscode != 0) { -- cgit v1.2.3 From 1ecbcfd57ed6ee11ec39eac9b6516883c925c558 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 2 Jun 2020 14:09:10 +0200 Subject: s390/qdio: remove Input tasklet code Both qeth and zfcp have fully moved to the polling-driven flow for Input Queues with commit 0a6e634535f1 ("s390/qdio: extend polling support to multiple queues") and commit 0b524abc2dd1 ("scsi: zfcp: Lift Input Queue tasklet from qdio"). So remove the tasklet code for Input Queues, streamline the IRQ handlers and push the tasklet struct into struct qdio_output_q. Signed-off-by: Julian Wiedmann Reviewed-by: Benjamin Block Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/qdio.h | 2 +- drivers/s390/cio/qdio.h | 16 +++++---- drivers/s390/cio/qdio_debug.c | 3 -- drivers/s390/cio/qdio_main.c | 72 ++++------------------------------------- drivers/s390/cio/qdio_setup.c | 18 ++--------- drivers/s390/cio/qdio_thinint.c | 36 ++++----------------- 6 files changed, 27 insertions(+), 120 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 19e84c95d1e7..f96454f5d4cd 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -336,7 +336,7 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, * @no_output_qs: number of output queues * @input_handler: handler to be called for input queues * @output_handler: handler to be called for output queues - * @irq_poll: Data IRQ polling handler (NULL when not supported) + * @irq_poll: Data IRQ polling handler * @scan_threshold: # of in-use buffers that triggers scan on output queue * @int_parm: interruption parameter * @input_sbal_addr_array: per-queue array, each element points to 128 SBALs diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index cd2df4ff8e0e..b4e7152fd586 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -139,9 +139,6 @@ struct qdio_dev_perf_stat { unsigned int qdio_int; unsigned int pci_request_int; - unsigned int tasklet_inbound; - unsigned int tasklet_inbound_resched; - unsigned int tasklet_inbound_resched2; unsigned int tasklet_outbound; unsigned int siga_read; @@ -193,6 +190,8 @@ struct qdio_output_q { struct qdio_outbuf_state *sbal_state; /* timer to check for more outbound work */ struct timer_list timer; + /* tasklet to check for completions */ + struct tasklet_struct tasklet; }; /* @@ -222,7 +221,6 @@ struct qdio_q { /* last scan of the queue */ u64 timestamp; - struct tasklet_struct tasklet; struct qdio_queue_perf_stat q_stats; struct qdio_buffer *sbal[QDIO_MAX_BUFFERS_PER_Q] ____cacheline_aligned; @@ -324,6 +322,14 @@ static inline int multicast_outbound(struct qdio_q *q) (q->nr == q->irq_ptr->nr_output_qs - 1); } +static inline void qdio_deliver_irq(struct qdio_irq *irq) +{ + if (!test_and_set_bit(QDIO_IRQ_DISABLED, &irq->poll_state)) + irq->irq_poll(irq->cdev, irq->int_parm); + else + QDIO_PERF_STAT_INC(irq, int_discarded); +} + #define pci_out_supported(irq) ((irq)->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED) #define is_qebsm(q) (q->irq_ptr->sch_token != 0) @@ -359,13 +365,11 @@ int qdio_establish_thinint(struct qdio_irq *irq_ptr); void qdio_shutdown_thinint(struct qdio_irq *irq_ptr); void tiqdio_add_device(struct qdio_irq *irq_ptr); void tiqdio_remove_device(struct qdio_irq *irq_ptr); -void tiqdio_inbound_processing(unsigned long q); int qdio_thinint_init(void); void qdio_thinint_exit(void); int test_nonshared_ind(struct qdio_irq *); /* prototypes for setup */ -void qdio_inbound_processing(unsigned long data); void qdio_outbound_processing(unsigned long data); void qdio_outbound_timer(struct timer_list *t); void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c index 863d17c802ca..728abf7e9ccb 100644 --- a/drivers/s390/cio/qdio_debug.c +++ b/drivers/s390/cio/qdio_debug.c @@ -197,9 +197,6 @@ static char *qperf_names[] = { "Assumed adapter interrupts", "QDIO interrupts", "Requested PCIs", - "Inbound tasklet runs", - "Inbound tasklet resched", - "Inbound tasklet resched2", "Outbound tasklet runs", "SIGA read", "SIGA write", diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index f9a31c7819ae..a4dc5e283750 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -575,51 +575,12 @@ static void qdio_kick_handler(struct qdio_q *q, unsigned int start, static inline int qdio_tasklet_schedule(struct qdio_q *q) { if (likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE)) { - tasklet_schedule(&q->tasklet); + tasklet_schedule(&q->u.out.tasklet); return 0; } return -EPERM; } -static void __qdio_inbound_processing(struct qdio_q *q) -{ - unsigned int start = q->first_to_check; - int count; - - qperf_inc(q, tasklet_inbound); - - count = qdio_inbound_q_moved(q, start); - if (count == 0) - return; - - qdio_kick_handler(q, start, count); - start = add_buf(start, count); - q->first_to_check = start; - - if (!qdio_inbound_q_done(q, start)) { - /* means poll time is not yet over */ - qperf_inc(q, tasklet_inbound_resched); - if (!qdio_tasklet_schedule(q)) - return; - } - - qdio_stop_polling(q); - /* - * We need to check again to not lose initiative after - * resetting the ACK state. - */ - if (!qdio_inbound_q_done(q, start)) { - qperf_inc(q, tasklet_inbound_resched2); - qdio_tasklet_schedule(q); - } -} - -void qdio_inbound_processing(unsigned long data) -{ - struct qdio_q *q = (struct qdio_q *)data; - __qdio_inbound_processing(q); -} - static void qdio_check_pending(struct qdio_q *q, unsigned int index) { unsigned char state; @@ -825,19 +786,6 @@ static inline void qdio_check_outbound_pci_queues(struct qdio_irq *irq) qdio_tasklet_schedule(out); } -void tiqdio_inbound_processing(unsigned long data) -{ - struct qdio_q *q = (struct qdio_q *)data; - - if (need_siga_sync(q) && need_siga_sync_after_ai(q)) - qdio_sync_queues(q); - - /* The interrupt could be caused by a PCI request: */ - qdio_check_outbound_pci_queues(q->irq_ptr); - - __qdio_inbound_processing(q); -} - static inline void qdio_set_state(struct qdio_irq *irq_ptr, enum qdio_irq_states state) { @@ -865,15 +813,7 @@ static void qdio_int_handler_pci(struct qdio_irq *irq_ptr) if (unlikely(irq_ptr->state != QDIO_IRQ_STATE_ACTIVE)) return; - if (irq_ptr->irq_poll) { - if (!test_and_set_bit(QDIO_IRQ_DISABLED, &irq_ptr->poll_state)) - irq_ptr->irq_poll(irq_ptr->cdev, irq_ptr->int_parm); - else - QDIO_PERF_STAT_INC(irq_ptr, int_discarded); - } else { - for_each_input_queue(irq_ptr, q, i) - tasklet_schedule(&q->tasklet); - } + qdio_deliver_irq(irq_ptr); if (!pci_out_supported(irq_ptr) || !irq_ptr->scan_threshold) return; @@ -1016,12 +956,9 @@ static void qdio_shutdown_queues(struct qdio_irq *irq_ptr) struct qdio_q *q; int i; - for_each_input_queue(irq_ptr, q, i) - tasklet_kill(&q->tasklet); - for_each_output_queue(irq_ptr, q, i) { del_timer_sync(&q->u.out.timer); - tasklet_kill(&q->tasklet); + tasklet_kill(&q->u.out.tasklet); } } @@ -1263,6 +1200,9 @@ int qdio_establish(struct ccw_device *cdev, !init_data->output_sbal_addr_array) return -EINVAL; + if (!init_data->irq_poll) + return -EINVAL; + mutex_lock(&irq_ptr->setup_mutex); qdio_trace_init_data(irq_ptr, init_data); qdio_setup_irq(irq_ptr, init_data); diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index a5b2e16b7aa8..3571ca62a74c 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -259,14 +259,6 @@ static void setup_queues(struct qdio_irq *irq_ptr, setup_storage_lists(q, irq_ptr, qdio_init->input_sbal_addr_array[i], i); - - if (is_thinint_irq(irq_ptr)) { - tasklet_init(&q->tasklet, tiqdio_inbound_processing, - (unsigned long) q); - } else { - tasklet_init(&q->tasklet, qdio_inbound_processing, - (unsigned long) q); - } } for_each_output_queue(irq_ptr, q, i) { @@ -280,7 +272,7 @@ static void setup_queues(struct qdio_irq *irq_ptr, setup_storage_lists(q, irq_ptr, qdio_init->output_sbal_addr_array[i], i); - tasklet_init(&q->tasklet, qdio_outbound_processing, + tasklet_init(&q->u.out.tasklet, qdio_outbound_processing, (unsigned long) q); timer_setup(&q->u.out.timer, qdio_outbound_timer, 0); } @@ -483,12 +475,8 @@ int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data) ccw_device_get_schid(cdev, &irq_ptr->schid); setup_queues(irq_ptr, init_data); - if (init_data->irq_poll) { - irq_ptr->irq_poll = init_data->irq_poll; - set_bit(QDIO_IRQ_DISABLED, &irq_ptr->poll_state); - } else { - irq_ptr->irq_poll = NULL; - } + irq_ptr->irq_poll = init_data->irq_poll; + set_bit(QDIO_IRQ_DISABLED, &irq_ptr->poll_state); setup_qib(irq_ptr, init_data); set_impl_params(irq_ptr, init_data->qib_param_field_format, diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c index 7a440e4328cd..d495bf0200aa 100644 --- a/drivers/s390/cio/qdio_thinint.c +++ b/drivers/s390/cio/qdio_thinint.c @@ -106,32 +106,6 @@ static inline u32 clear_shared_ind(void) return xchg(&q_indicators[TIQDIO_SHARED_IND].ind, 0); } -static inline void tiqdio_call_inq_handlers(struct qdio_irq *irq) -{ - struct qdio_q *q; - int i; - - if (!references_shared_dsci(irq)) - xchg(irq->dsci, 0); - - if (irq->irq_poll) { - if (!test_and_set_bit(QDIO_IRQ_DISABLED, &irq->poll_state)) - irq->irq_poll(irq->cdev, irq->int_parm); - else - QDIO_PERF_STAT_INC(irq, int_discarded); - - return; - } - - for_each_input_queue(irq, q, i) { - /* - * Call inbound processing but not directly - * since that could starve other thinint queues. - */ - tasklet_schedule(&q->tasklet); - } -} - /** * tiqdio_thinint_handler - thin interrupt handler for qdio * @airq: pointer to adapter interrupt descriptor @@ -153,10 +127,14 @@ static void tiqdio_thinint_handler(struct airq_struct *airq, bool floating) if (unlikely(references_shared_dsci(irq))) { if (!si_used) continue; - } else if (!*irq->dsci) - continue; + } else { + if (!*irq->dsci) + continue; + + xchg(irq->dsci, 0); + } - tiqdio_call_inq_handlers(irq); + qdio_deliver_irq(irq); QDIO_PERF_STAT_INC(irq, adapter_int); } -- cgit v1.2.3 From ba764dd703feacb5a9c410d191af1b6cfbe96845 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 22 Jul 2020 16:53:54 +0200 Subject: s390/pci: refactor zpci_create_device() Currently zpci_create_device() is only called in clp_add_pci_device() which allocates the memory for the struct zpci_dev being created. There is little separation of concerns as only both functions together can create a zpci_dev and the only CLP specific code in clp_add_pci_device() is a call to clp_query_pci_fn(). Improve this by removing clp_add_pci_device() and refactor zpci_create_device() such that it alone creates and initializes the zpci_dev given the FID and Function Handle. For this we need to make clp_query_pci_fn() non-static. While at it remove the function handle parameter since we can just take that from the zpci_dev. Also move adding to the zpci_list to after the zdev has been fully created which eliminates a window where a partially initialized zdev can be found by get_zdev_by_fid(). Acked-by: Pierre Morel Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pci.h | 4 ++-- arch/s390/pci/pci.c | 57 ++++++++++++++++++++++++++++++++------------- arch/s390/pci/pci_clp.c | 40 +++---------------------------- arch/s390/pci/pci_event.c | 4 ++-- 4 files changed, 48 insertions(+), 57 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 212628932ddc..053fe8b8dec7 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -201,7 +201,7 @@ extern unsigned int s390_pci_no_rid; Prototypes ----------------------------------------------------------------------------- */ /* Base stuff */ -int zpci_create_device(struct zpci_dev *); +int zpci_create_device(u32 fid, u32 fh, enum zpci_state state); void zpci_remove_device(struct zpci_dev *zdev); int zpci_enable_device(struct zpci_dev *); int zpci_disable_device(struct zpci_dev *); @@ -212,7 +212,7 @@ void zpci_remove_reserved_devices(void); /* CLP */ int clp_setup_writeback_mio(void); int clp_scan_pci_devices(void); -int clp_add_pci_device(u32, u32, int); +int clp_query_pci_fn(struct zpci_dev *zdev); int clp_enable_fh(struct zpci_dev *, u8); int clp_disable_fh(struct zpci_dev *); int clp_get_state(u32 fid, enum zpci_state *state); diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 41df8fcfddde..600881d894dd 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -695,43 +695,68 @@ void zpci_remove_device(struct zpci_dev *zdev) } } -int zpci_create_device(struct zpci_dev *zdev) +/** + * zpci_create_device() - Create a new zpci_dev and add it to the zbus + * @fid: Function ID of the device to be created + * @fh: Current Function Handle of the device to be created + * @state: Initial state after creation either Standby or Configured + * + * Creates a new zpci device and adds it to its, possibly newly created, zbus + * as well as zpci_list. + * + * Returns: 0 on success, an error value otherwise + */ +int zpci_create_device(u32 fid, u32 fh, enum zpci_state state) { + struct zpci_dev *zdev; int rc; - kref_init(&zdev->kref); + zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, state); + zdev = kzalloc(sizeof(*zdev), GFP_KERNEL); + if (!zdev) + return -ENOMEM; - spin_lock(&zpci_list_lock); - list_add_tail(&zdev->entry, &zpci_list); - spin_unlock(&zpci_list_lock); + /* FID and Function Handle are the static/dynamic identifiers */ + zdev->fid = fid; + zdev->fh = fh; - rc = zpci_init_iommu(zdev); + /* Query function properties and update zdev */ + rc = clp_query_pci_fn(zdev); if (rc) - goto out; + goto error; + zdev->state = state; + kref_init(&zdev->kref); mutex_init(&zdev->lock); + + rc = zpci_init_iommu(zdev); + if (rc) + goto error; + if (zdev->state == ZPCI_FN_STATE_CONFIGURED) { rc = zpci_enable_device(zdev); if (rc) - goto out_destroy_iommu; + goto error_destroy_iommu; } rc = zpci_bus_device_register(zdev, &pci_root_ops); if (rc) - goto out_disable; + goto error_disable; + + spin_lock(&zpci_list_lock); + list_add_tail(&zdev->entry, &zpci_list); + spin_unlock(&zpci_list_lock); return 0; -out_disable: +error_disable: if (zdev->state == ZPCI_FN_STATE_ONLINE) zpci_disable_device(zdev); - -out_destroy_iommu: +error_destroy_iommu: zpci_destroy_iommu(zdev); -out: - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); +error: + zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc); + kfree(zdev); return rc; } diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 153720d21ae7..d3331596ddbe 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -181,7 +181,7 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev, return 0; } -static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) +int clp_query_pci_fn(struct zpci_dev *zdev) { struct clp_req_rsp_query_pci *rrb; int rc; @@ -194,7 +194,7 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) rrb->request.hdr.len = sizeof(rrb->request); rrb->request.hdr.cmd = CLP_QUERY_PCI_FN; rrb->response.hdr.len = sizeof(rrb->response); - rrb->request.fh = fh; + rrb->request.fh = zdev->fh; rc = clp_req(rrb, CLP_LPS_PCI); if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { @@ -212,40 +212,6 @@ out: return rc; } -int clp_add_pci_device(u32 fid, u32 fh, int configured) -{ - struct zpci_dev *zdev; - int rc = -ENOMEM; - - zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, configured); - zdev = kzalloc(sizeof(*zdev), GFP_KERNEL); - if (!zdev) - goto error; - - zdev->fh = fh; - zdev->fid = fid; - - /* Query function properties and update zdev */ - rc = clp_query_pci_fn(zdev, fh); - if (rc) - goto error; - - if (configured) - zdev->state = ZPCI_FN_STATE_CONFIGURED; - else - zdev->state = ZPCI_FN_STATE_STANDBY; - - rc = zpci_create_device(zdev); - if (rc) - goto error; - return 0; - -error: - zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc); - kfree(zdev); - return rc; -} - static int clp_refresh_fh(u32 fid); /* * Enable/Disable a given PCI function and update its function handle if @@ -408,7 +374,7 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) zdev = get_zdev_by_fid(entry->fid); if (!zdev) - clp_add_pci_device(entry->fid, entry->fh, entry->config_state); + zpci_create_device(entry->fid, entry->fh, entry->config_state); } int clp_scan_pci_devices(void) diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 5c6407f5df20..b4162da4e8a2 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -89,7 +89,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) switch (ccdf->pec) { case 0x0301: /* Reserved|Standby -> Configured */ if (!zdev) { - ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 1); + zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED); break; } /* the configuration request may be stale */ @@ -116,7 +116,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) break; case 0x0302: /* Reserved -> Standby */ if (!zdev) { - clp_add_pci_device(ccdf->fid, ccdf->fh, 0); + zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY); break; } zdev->fh = ccdf->fh; -- cgit v1.2.3 From d09cb482ed3985881a3f5acecc965d06d65a976c Mon Sep 17 00:00:00 2001 From: Chengyang Fan Date: Mon, 25 Jan 2021 17:58:39 +0800 Subject: s390/ap: remove unneeded semicolon Remove a superfluous semicolon after function definition. Signed-off-by: Chengyang Fan Message-Id: <20210125095839.1720265-1-cy.fan@huawei.com> Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/ap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index aea32dda3d14..837d1699b109 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -368,7 +368,7 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid, #if IS_ENABLED(CONFIG_ZCRYPT) void ap_bus_cfg_chg(void); #else -static inline void ap_bus_cfg_chg(void){}; +static inline void ap_bus_cfg_chg(void){} #endif #endif /* _ASM_S390_AP_H_ */ -- cgit v1.2.3 From 85eda128e133bccaa2c7f7cf25d8de7991561be4 Mon Sep 17 00:00:00 2001 From: Jiapeng Zhong Date: Tue, 26 Jan 2021 17:09:12 +0800 Subject: s390: Simplify the calculation of variables Fix the following coccicheck warnings: ./arch/s390/include/asm/scsw.h:528:48-50: WARNING !A || A && B is equivalent to !A || B. Reported-by: Abaci Robot Signed-off-by: Jiapeng Zhong Reviewed-by: Vineeth Vijayan Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/scsw.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h index c00f7b031628..a7c3ccf681da 100644 --- a/arch/s390/include/asm/scsw.h +++ b/arch/s390/include/asm/scsw.h @@ -525,8 +525,7 @@ static inline int scsw_cmd_is_valid_pno(union scsw *scsw) return (scsw->cmd.fctl != 0) && (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) && (!(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) || - ((scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) && - (scsw->cmd.actl & SCSW_ACTL_SUSPENDED))); + (scsw->cmd.actl & SCSW_ACTL_SUSPENDED)); } /** -- cgit v1.2.3 From d010b378736898d7a65a9f9105088f1d335da48d Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Thu, 28 Jan 2021 16:33:54 +0100 Subject: s390: update defconfigs Disable CONFIG_TMPFS_INODE64 which is currently broken on s390x because size of ino_t on s390x is 4 bytes. This fixes the following error with kdump: [ 9.415082] [608]: Remounting '/' read-only in with options 'size=238372k,nr_inodes=59593,inode64'. [ 9.415093] rootfs: Cannot use inode64 with <64bit inums in kernel [ 9.415093] [ 9.415100] [608]: Failed to remount '/' read-only: Invalid argument Fixes: 5c60ed283e1d ("s390: update defconfigs") Signed-off-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 1 - arch/s390/configs/defconfig | 1 - 2 files changed, 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 2d8dcce6e028..5233da9e3ea6 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -631,7 +631,6 @@ CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y -CONFIG_TMPFS_INODE64=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 3eadcda4aca9..b32e0e68891e 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -619,7 +619,6 @@ CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y -CONFIG_TMPFS_INODE64=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m -- cgit v1.2.3 From 5b96e6c10b632cb8ff40f3d107c4e9dece18beeb Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Wed, 3 Feb 2021 19:28:35 +0100 Subject: s390/thread_info.h: fix task_struct declaration warning Add missing forward declaration for task_struct. The warning appears when the -Werror C compiler flag is being used. Signed-off-by: Alexander Egorenkov Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/thread_info.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 28696ca7680d..e6674796aa6f 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -47,6 +47,8 @@ struct thread_info { .flags = 0, \ } +struct task_struct; + void arch_release_task_struct(struct task_struct *tsk); int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); -- cgit v1.2.3 From 9f9b312db40b3860dfd874e9e4e8aa2c3efcbe2b Mon Sep 17 00:00:00 2001 From: Marc Hartmayer Date: Fri, 5 Feb 2021 15:22:03 +0000 Subject: s390/debug_config: enable kmemleak detector ...but set it to off by default. Use the kernel command line option `kmemleak=on` to enable it. Signed-off-by: Marc Hartmayer Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 5233da9e3ea6..75f0c977c73c 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -792,6 +792,8 @@ CONFIG_DEBUG_OBJECTS_RCU_HEAD=y CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y CONFIG_SLUB_DEBUG_ON=y CONFIG_SLUB_STATS=y +CONFIG_DEBUG_KMEMLEAK=y +CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_VM=y CONFIG_DEBUG_VM_VMACACHE=y -- cgit v1.2.3 From 42d7ccca37108991dc9cffa729f27328995532a3 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Thu, 4 Feb 2021 16:22:05 +0100 Subject: s390/defconfig: add some NFT modules Since Fedora 33 the virtualization stack of Fedora requires a couple of netfilter modules to function properly. Let's add these to defconfig and debug_defconfig. Signed-off-by: Halil Pasic Reported-by: Marc Hartmayer Tested-by: Bjoern Walk Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 6 ++++++ arch/s390/configs/defconfig | 6 ++++++ 2 files changed, 12 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 75f0c977c73c..2ae38ef35d52 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -178,13 +178,17 @@ CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=y CONFIG_NFT_CT=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m +CONFIG_NFT_OBJREF=m +CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m +CONFIG_NFT_FIB_INET=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_AUDIT=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -276,6 +280,7 @@ CONFIG_IP_VS_NQ=m CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m CONFIG_NF_TABLES_IPV4=y +CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m @@ -296,6 +301,7 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_TABLES_IPV6=y +CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index b32e0e68891e..057ad15bdc63 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -169,13 +169,17 @@ CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=y CONFIG_NFT_CT=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m +CONFIG_NFT_OBJREF=m +CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m +CONFIG_NFT_FIB_INET=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_AUDIT=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -267,6 +271,7 @@ CONFIG_IP_VS_NQ=m CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m CONFIG_NF_TABLES_IPV4=y +CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m @@ -287,6 +292,7 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_TABLES_IPV6=y +CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m -- cgit v1.2.3 From 74b6844803e47754323137b2283bd58620b49302 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 24 Jan 2021 19:37:55 +0100 Subject: s390/vdso: remove VDSO32_LBASE compat leftover Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/vdso.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index f65590889054..f46c2596c21f 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -4,8 +4,7 @@ #include -/* Default link addresses for the vDSOs */ -#define VDSO32_LBASE 0 +/* Default link address for the vDSO */ #define VDSO64_LBASE 0 #define VDSO_VERSION_STRING LINUX_2.6.29 -- cgit v1.2.3 From 1432cfe69e25819d96f653a4a44dad41e1163a83 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 24 Jan 2021 19:44:18 +0100 Subject: s390/vdso: fix vdso data page definition The vdso data page actually contains an array. Fix that. This doesn't fix a real bug, just reflects reality. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 8bc269c55fd3..c6aeddcd687d 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -87,10 +87,10 @@ __setup("vdso=", vdso_setup); * The vdso data page */ static union { - struct vdso_data data; + struct vdso_data data[CS_BASES]; u8 page[PAGE_SIZE]; } vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = (struct vdso_data *)&vdso_data_store.data; +struct vdso_data *vdso_data = vdso_data_store.data; void vdso_getcpu_init(void) { -- cgit v1.2.3 From 96c0c7ae5266ec347041312ae22d947b5371e5b3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 24 Jan 2021 19:51:34 +0100 Subject: s390/vdso: convert vdso_init() to arch_initcall Convert vdso_init() to arch_initcall like it is on all other architectures. This requires to remove the vdso_getcpu_init() call from vdso_init() since it must be called before smp is enabled. vdso_getcpu_init() is now an early_initcall like on powerpc. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/vdso.h | 2 +- arch/s390/kernel/vdso.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index f46c2596c21f..e4ea142a082c 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -13,7 +13,7 @@ extern struct vdso_data *vdso_data; -void vdso_getcpu_init(void); +int vdso_getcpu_init(void); #endif /* __ASSEMBLY__ */ #endif /* __S390_VDSO_H__ */ diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index c6aeddcd687d..0bb287ae0f04 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -92,10 +92,12 @@ static union { } vdso_data_store __page_aligned_data; struct vdso_data *vdso_data = vdso_data_store.data; -void vdso_getcpu_init(void) +int vdso_getcpu_init(void) { set_tod_programmable_field(smp_processor_id()); + return 0; } +early_initcall(vdso_getcpu_init); /* Must be called before SMP init */ /* * This is called from binfmt_elf, we create the special vma for the @@ -167,7 +169,6 @@ static int __init vdso_init(void) { int i; - vdso_getcpu_init(); /* Calculate the size of the 64 bit vDSO */ vdso64_pages = ((&vdso64_end - &vdso64_start + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; @@ -188,4 +189,4 @@ static int __init vdso_init(void) return 0; } -early_initcall(vdso_init); +arch_initcall(vdso_init); -- cgit v1.2.3 From ea44de691ebad701c849b174dabd376ed6d7d1ae Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 24 Jan 2021 20:04:08 +0100 Subject: s390/vdso: simplify vdso size calculation The vdso is (and must) be page aligned and its size must also be a multiple of PAGE_SIZE. Therefore no need to round upwards. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 0bb287ae0f04..7075459aed51 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -170,8 +170,7 @@ static int __init vdso_init(void) int i; /* Calculate the size of the 64 bit vDSO */ - vdso64_pages = ((&vdso64_end - &vdso64_start - + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; + vdso64_pages = ((&vdso64_end - &vdso64_start) >> PAGE_SHIFT) + 1; /* Make sure pages are in the correct state */ vdso64_pagelist = kcalloc(vdso64_pages + 1, sizeof(struct page *), -- cgit v1.2.3 From e1eac1947bae72eff74925b2fb82b93ded11ae6a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 24 Jan 2021 20:08:40 +0100 Subject: s390/vdso: remove BUG_ON() Handle allocation error gracefully and simply disable vdso instead of leaving the system in an undefined state. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 7075459aed51..f06791c085e7 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -175,7 +175,10 @@ static int __init vdso_init(void) /* Make sure pages are in the correct state */ vdso64_pagelist = kcalloc(vdso64_pages + 1, sizeof(struct page *), GFP_KERNEL); - BUG_ON(vdso64_pagelist == NULL); + if (!vdso64_pagelist) { + vdso_enabled = 0; + return -ENOMEM; + } for (i = 0; i < vdso64_pages - 1; i++) { struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); get_page(pg); -- cgit v1.2.3 From 5ffd9af0fb611069f0e390b568a6460ff2c4122c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 24 Jan 2021 20:10:27 +0100 Subject: s390/vdso: remove superfluous check vdso_pages (aka vdso64_pages) is never 0, therefore remove the check. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index f06791c085e7..da18ba855099 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -118,13 +118,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) return 0; vdso_pages = vdso64_pages; - /* - * vDSO has a problem and was disabled, just don't "enable" it for - * the process - */ - if (vdso_pages == 0) - return 0; - /* * pick a base address for the vDSO in process space. We try to put * it at vdso_base which is the "natural" base for it, but we might -- cgit v1.2.3 From 6755270b5ee28c7699f80216f7781557c1c2eb40 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 24 Jan 2021 20:22:29 +0100 Subject: s390/vdso: remove superfluous variables A few local variables exist only so the contents of a global variable can be copied to them, and use that value only for reading. Just remove them and rename some global variables. Also change vdso64_[start|end] to be character arrays to be consistent with other architectures, and get rid of the global variable vdso64_kbase. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso.c | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index da18ba855099..f4e1e4580b77 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -31,10 +31,9 @@ #include #include -extern char vdso64_start, vdso64_end; -static void *vdso64_kbase = &vdso64_start; -static unsigned int vdso64_pages; -static struct page **vdso64_pagelist; +extern char vdso64_start[], vdso64_end[]; +static unsigned int vdso_pages; +static struct page **vdso_pagelist; /* * Should the kernel map a VDSO page into processes and pass its @@ -45,12 +44,6 @@ unsigned int __read_mostly vdso_enabled = 1; static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { - struct page **vdso_pagelist; - unsigned long vdso_pages; - - vdso_pagelist = vdso64_pagelist; - vdso_pages = vdso64_pages; - if (vmf->pgoff >= vdso_pages) return VM_FAULT_SIGBUS; @@ -107,7 +100,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long vdso_pages; unsigned long vdso_base; int rc; @@ -117,7 +109,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (is_compat_task()) return 0; - vdso_pages = vdso64_pages; /* * pick a base address for the vDSO in process space. We try to put * it at vdso_base which is the "natural" base for it, but we might @@ -162,23 +153,23 @@ static int __init vdso_init(void) { int i; - /* Calculate the size of the 64 bit vDSO */ - vdso64_pages = ((&vdso64_end - &vdso64_start) >> PAGE_SHIFT) + 1; + /* Calculate the size of the vDSO */ + vdso_pages = ((vdso64_end - vdso64_start) >> PAGE_SHIFT) + 1; /* Make sure pages are in the correct state */ - vdso64_pagelist = kcalloc(vdso64_pages + 1, sizeof(struct page *), - GFP_KERNEL); - if (!vdso64_pagelist) { + vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), + GFP_KERNEL); + if (!vdso_pagelist) { vdso_enabled = 0; return -ENOMEM; } - for (i = 0; i < vdso64_pages - 1; i++) { - struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); + for (i = 0; i < vdso_pages - 1; i++) { + struct page *pg = virt_to_page(vdso64_start + i * PAGE_SIZE); get_page(pg); - vdso64_pagelist[i] = pg; + vdso_pagelist[i] = pg; } - vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); - vdso64_pagelist[vdso64_pages] = NULL; + vdso_pagelist[vdso_pages - 1] = virt_to_page(vdso_data); + vdso_pagelist[vdso_pages] = NULL; get_page(virt_to_page(vdso_data)); -- cgit v1.2.3 From 8d4be7f318bc69cb63b712a4fd0dfd8eebe64d0b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 24 Jan 2021 20:57:08 +0100 Subject: s390/vdso: misc simple code changes - remove unneeded includes - move functions around - remove obvious and/or incorrect comments - shorten some if conditions No functional change. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso.c | 102 ++++++++++++++---------------------------------- 1 file changed, 30 insertions(+), 72 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index f4e1e4580b77..a7eed8bf3959 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -6,41 +6,41 @@ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) */ -#include +#include +#include +#include #include -#include +#include #include #include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include +#include #include -#include -#include -#include -#include -#include #include -#include -#include extern char vdso64_start[], vdso64_end[]; static unsigned int vdso_pages; static struct page **vdso_pagelist; -/* - * Should the kernel map a VDSO page into processes and pass its - * address down to glibc upon exec()? - */ +static union { + struct vdso_data data[CS_BASES]; + u8 page[PAGE_SIZE]; +} vdso_data_store __page_aligned_data; + +struct vdso_data *vdso_data = vdso_data_store.data; + unsigned int __read_mostly vdso_enabled = 1; +static int __init vdso_setup(char *str) +{ + bool enabled; + + if (!kstrtobool(str, &enabled)) + vdso_enabled = enabled; + return 1; +} +__setup("vdso=", vdso_setup); + static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { @@ -56,7 +56,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *vma) { current->mm->context.vdso_base = vma->vm_start; - return 0; } @@ -66,25 +65,6 @@ static const struct vm_special_mapping vdso_mapping = { .mremap = vdso_mremap, }; -static int __init vdso_setup(char *str) -{ - bool enabled; - - if (!kstrtobool(str, &enabled)) - vdso_enabled = enabled; - return 1; -} -__setup("vdso=", vdso_setup); - -/* - * The vdso data page - */ -static union { - struct vdso_data data[CS_BASES]; - u8 page[PAGE_SIZE]; -} vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = vdso_data_store.data; - int vdso_getcpu_init(void) { set_tod_programmable_field(smp_processor_id()); @@ -92,10 +72,6 @@ int vdso_getcpu_init(void) } early_initcall(vdso_getcpu_init); /* Must be called before SMP init */ -/* - * This is called from binfmt_elf, we create the special vma for the - * vDSO and insert it into the mm struct tree - */ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; @@ -103,25 +79,14 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) unsigned long vdso_base; int rc; - if (!vdso_enabled) - return 0; - - if (is_compat_task()) + if (!vdso_enabled || is_compat_task()) return 0; - - /* - * pick a base address for the vDSO in process space. We try to put - * it at vdso_base which is the "natural" base for it, but we might - * fail and end up putting it elsewhere. - */ if (mmap_write_lock_killable(mm)) return -EINTR; vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0); - if (IS_ERR_VALUE(vdso_base)) { - rc = vdso_base; - goto out_up; - } - + rc = vdso_base; + if (IS_ERR_VALUE(vdso_base)) + goto out; /* * our vma flags don't have VM_WRITE so by default, the process * isn't allowed to write those pages. @@ -136,15 +101,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, &vdso_mapping); - if (IS_ERR(vma)) { - rc = PTR_ERR(vma); - goto out_up; - } - + rc = PTR_ERR(vma); + if (IS_ERR(vma)) + goto out; current->mm->context.vdso_base = vdso_base; rc = 0; - -out_up: +out: mmap_write_unlock(mm); return rc; } @@ -153,9 +115,7 @@ static int __init vdso_init(void) { int i; - /* Calculate the size of the vDSO */ vdso_pages = ((vdso64_end - vdso64_start) >> PAGE_SHIFT) + 1; - /* Make sure pages are in the correct state */ vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL); @@ -170,9 +130,7 @@ static int __init vdso_init(void) } vdso_pagelist[vdso_pages - 1] = virt_to_page(vdso_data); vdso_pagelist[vdso_pages] = NULL; - get_page(virt_to_page(vdso_data)); - return 0; } arch_initcall(vdso_init); -- cgit v1.2.3 From dfc11c98763aed6b2fa17d5d23f28a429ab9877b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 24 Jan 2021 21:36:14 +0100 Subject: s390/vdso: get rid of vdso_fault Implement vdso mapping similar to arm64 and powerpc. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso.c | 35 +++++++++-------------------------- 1 file changed, 9 insertions(+), 26 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index a7eed8bf3959..86e7a3921348 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -20,7 +20,6 @@ extern char vdso64_start[], vdso64_end[]; static unsigned int vdso_pages; -static struct page **vdso_pagelist; static union { struct vdso_data data[CS_BASES]; @@ -41,17 +40,6 @@ static int __init vdso_setup(char *str) } __setup("vdso=", vdso_setup); -static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - if (vmf->pgoff >= vdso_pages) - return VM_FAULT_SIGBUS; - - vmf->page = vdso_pagelist[vmf->pgoff]; - get_page(vmf->page); - return 0; -} - static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *vma) { @@ -59,9 +47,8 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } -static const struct vm_special_mapping vdso_mapping = { +static struct vm_special_mapping vdso_mapping = { .name = "[vdso]", - .fault = vdso_fault, .mremap = vdso_mremap, }; @@ -113,24 +100,20 @@ out: static int __init vdso_init(void) { + struct page **pages; int i; vdso_pages = ((vdso64_end - vdso64_start) >> PAGE_SHIFT) + 1; - /* Make sure pages are in the correct state */ - vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), - GFP_KERNEL); - if (!vdso_pagelist) { + pages = kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL); + if (!pages) { vdso_enabled = 0; return -ENOMEM; } - for (i = 0; i < vdso_pages - 1; i++) { - struct page *pg = virt_to_page(vdso64_start + i * PAGE_SIZE); - get_page(pg); - vdso_pagelist[i] = pg; - } - vdso_pagelist[vdso_pages - 1] = virt_to_page(vdso_data); - vdso_pagelist[vdso_pages] = NULL; - get_page(virt_to_page(vdso_data)); + for (i = 0; i < vdso_pages - 1; i++) + pages[i] = virt_to_page(vdso64_start + i * PAGE_SIZE); + pages[vdso_pages - 1] = virt_to_page(vdso_data); + pages[vdso_pages] = NULL; + vdso_mapping.pages = pages; return 0; } arch_initcall(vdso_init); -- cgit v1.2.3 From 5056c2c53a22a61facb1a551bf736df9b06e513a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 24 Jan 2021 22:01:16 +0100 Subject: s390/vdso: put vdso datapage in a separate vma Add a separate "[vvar]" mapping for the vdso datapage, since it doesn't need to be executable or COW-able. This is actually the s390 implementation of commit 871549385278 ("arm64: vdso: put vdso datapage in a separate vma") Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso.c | 55 +++++++++++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 20 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 86e7a3921348..968b263f64b4 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -40,6 +40,14 @@ static int __init vdso_setup(char *str) } __setup("vdso=", vdso_setup); +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + if (vmf->pgoff == 0) + return vmf_insert_pfn(vma, vmf->address, virt_to_pfn(vdso_data)); + return VM_FAULT_SIGBUS; +} + static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *vma) { @@ -47,6 +55,11 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } +static struct vm_special_mapping vvar_mapping = { + .name = "[vvar]", + .fault = vvar_fault, +}; + static struct vm_special_mapping vdso_mapping = { .name = "[vdso]", .mremap = vdso_mremap, @@ -61,38 +74,41 @@ early_initcall(vdso_getcpu_init); /* Must be called before SMP init */ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { + unsigned long addr, vdso_text_start, vdso_text_len, vdso_mapping_len; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long vdso_base; int rc; if (!vdso_enabled || is_compat_task()) return 0; if (mmap_write_lock_killable(mm)) return -EINTR; - vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0); - rc = vdso_base; - if (IS_ERR_VALUE(vdso_base)) + vdso_text_len = vdso_pages << PAGE_SHIFT; + vdso_mapping_len = vdso_text_len + PAGE_SIZE; + addr = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); + rc = addr; + if (IS_ERR_VALUE(addr)) goto out; - /* - * our vma flags don't have VM_WRITE so by default, the process - * isn't allowed to write those pages. - * gdb can break that with ptrace interface, and thus trigger COW - * on those pages but it's then your responsibility to never do that - * on the "data" page of the vDSO or you'll stop getting kernel - * updates and your nice userland gettimeofday will be totally dead. - * It's fine to use that for setting breakpoints in the vDSO code - * pages though. - */ - vma = _install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, + /* VM_MAYWRITE for COW so gdb can set breakpoints */ + vdso_text_start = addr; + vma = _install_special_mapping(mm, addr, vdso_text_len, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, &vdso_mapping); rc = PTR_ERR(vma); if (IS_ERR(vma)) goto out; - current->mm->context.vdso_base = vdso_base; - rc = 0; + addr += vdso_text_len; + vma = _install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ|VM_MAYREAD|VM_PFNMAP, + &vvar_mapping); + if (IS_ERR(vma)) { + do_munmap(mm, vdso_text_start, vdso_text_len, NULL); + rc = PTR_ERR(vma); + } else { + current->mm->context.vdso_base = vdso_text_start; + rc = 0; + } out: mmap_write_unlock(mm); return rc; @@ -103,15 +119,14 @@ static int __init vdso_init(void) struct page **pages; int i; - vdso_pages = ((vdso64_end - vdso64_start) >> PAGE_SHIFT) + 1; + vdso_pages = (vdso64_end - vdso64_start) >> PAGE_SHIFT; pages = kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL); if (!pages) { vdso_enabled = 0; return -ENOMEM; } - for (i = 0; i < vdso_pages - 1; i++) + for (i = 0; i < vdso_pages; i++) pages[i] = virt_to_page(vdso64_start + i * PAGE_SIZE); - pages[vdso_pages - 1] = virt_to_page(vdso_data); pages[vdso_pages] = NULL; vdso_mapping.pages = pages; return 0; -- cgit v1.2.3 From 214b3564869cd93258616411962a6fceef2c5ec7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 5 Feb 2021 16:09:14 +0100 Subject: s390/vdso: move data page before code pages For consistency with x86 and arm64 move the data page before code pages. Similar to commit 601255ae3c98 ("arm64: vdso: move data page before code pages"). Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso.c | 28 ++++++++++++++-------------- arch/s390/kernel/vdso64/vdso64.lds.S | 4 +--- 2 files changed, 15 insertions(+), 17 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 968b263f64b4..31920b76ae6d 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -74,7 +74,8 @@ early_initcall(vdso_getcpu_init); /* Must be called before SMP init */ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { - unsigned long addr, vdso_text_start, vdso_text_len, vdso_mapping_len; + unsigned long vdso_text_len, vdso_mapping_len; + unsigned long vvar_start, vdso_text_start; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; int rc; @@ -85,25 +86,24 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) return -EINTR; vdso_text_len = vdso_pages << PAGE_SHIFT; vdso_mapping_len = vdso_text_len + PAGE_SIZE; - addr = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); - rc = addr; - if (IS_ERR_VALUE(addr)) + vvar_start = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); + rc = vvar_start; + if (IS_ERR_VALUE(vvar_start)) goto out; + vma = _install_special_mapping(mm, vvar_start, PAGE_SIZE, + VM_READ|VM_MAYREAD|VM_PFNMAP, + &vvar_mapping); + rc = PTR_ERR(vma); + if (IS_ERR(vma)) + goto out; + vdso_text_start = vvar_start + PAGE_SIZE; /* VM_MAYWRITE for COW so gdb can set breakpoints */ - vdso_text_start = addr; - vma = _install_special_mapping(mm, addr, vdso_text_len, + vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, &vdso_mapping); - rc = PTR_ERR(vma); - if (IS_ERR(vma)) - goto out; - addr += vdso_text_len; - vma = _install_special_mapping(mm, addr, PAGE_SIZE, - VM_READ|VM_MAYREAD|VM_PFNMAP, - &vvar_mapping); if (IS_ERR(vma)) { - do_munmap(mm, vdso_text_start, vdso_text_len, NULL); + do_munmap(mm, vvar_start, PAGE_SIZE, NULL); rc = PTR_ERR(vma); } else { current->mm->context.vdso_base = vdso_text_start; diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index 7bde3909290f..99063b4c6e27 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -13,6 +13,7 @@ ENTRY(_start) SECTIONS { + PROVIDE(_vdso_data = . - PAGE_SIZE); . = VDSO64_LBASE + SIZEOF_HEADERS; .hash : { *(.hash) } :text @@ -94,9 +95,6 @@ SECTIONS .debug_ranges 0 : { *(.debug_ranges) } .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } - . = ALIGN(PAGE_SIZE); - PROVIDE(_vdso_data = .); - /DISCARD/ : { *(.note.GNU-stack) *(.branch_lt) -- cgit v1.2.3 From 1ba2d6c0fd4e4ff5cc2372a4a5c41d65cb18f09a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 31 Jan 2021 22:27:36 +0100 Subject: s390/vdso: simplify __arch_get_hw_counter() Use the passed in vdso_data pointer instead of calculating it again. This is also required as a prerequisite for vdso time namespaces: if a process is part of a time namespace __arch_get_vdso_data() will return a pointer to the time namespace data page instead of the vdso data page, which is not what __arch_get_hw_counter() expects. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/vdso/gettimeofday.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h index bf123065ad3b..c92b0dec0d79 100644 --- a/arch/s390/include/asm/vdso/gettimeofday.h +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -24,13 +24,12 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void) static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd) { - const struct vdso_data *vdso = __arch_get_vdso_data(); u64 adj, now; now = get_tod_clock(); - adj = vdso->arch_data.tod_steering_end - now; + adj = vd->arch_data.tod_steering_end - now; if (unlikely((s64) adj > 0)) - now += (vdso->arch_data.tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15); + now += (vd->arch_data.tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15); return now; } -- cgit v1.2.3 From eeab78b05d202f15e58ab10675a4f736a1c9bd29 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 5 Feb 2021 16:19:32 +0100 Subject: s390/vdso: implement generic vdso time namespace support Implement generic vdso time namespace support which also enables time namespaces for s390. This is quite similar to what arm64 has. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 + arch/s390/configs/zfcpdump_defconfig | 1 + arch/s390/include/asm/vdso.h | 2 + arch/s390/include/asm/vdso/gettimeofday.h | 7 ++ arch/s390/kernel/vdso.c | 102 ++++++++++++++++++++++++++++-- arch/s390/kernel/vdso64/vdso64.lds.S | 5 +- 6 files changed, 110 insertions(+), 8 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 41a2c58c6e7a..5de9f409e4d0 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -129,6 +129,7 @@ config S390 select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL + select GENERIC_VDSO_TIME_NS select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 0200ccf10ace..acf982a2ae4c 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -3,6 +3,7 @@ CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y # CONFIG_CPU_ISOLATION is not set # CONFIG_UTS_NS is not set +# CONFIG_TIME_NS is not set # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index e4ea142a082c..b45e3dddd2c2 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -7,6 +7,8 @@ /* Default link address for the vDSO */ #define VDSO64_LBASE 0 +#define __VVAR_PAGES 2 + #define VDSO_VERSION_STRING LINUX_2.6.29 #ifndef __ASSEMBLY__ diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h index c92b0dec0d79..ed89ef742530 100644 --- a/arch/s390/include/asm/vdso/gettimeofday.h +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -67,4 +67,11 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts) return r2; } +#ifdef CONFIG_TIME_NS +static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +{ + return _timens_data; +} +#endif + #endif diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 31920b76ae6d..dd967af29d2b 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -15,12 +15,15 @@ #include #include #include +#include #include #include extern char vdso64_start[], vdso64_end[]; static unsigned int vdso_pages; +static struct vm_special_mapping vvar_mapping; + static union { struct vdso_data data[CS_BASES]; u8 page[PAGE_SIZE]; @@ -28,6 +31,12 @@ static union { struct vdso_data *vdso_data = vdso_data_store.data; +enum vvar_pages { + VVAR_DATA_PAGE_OFFSET, + VVAR_TIMENS_PAGE_OFFSET, + VVAR_NR_PAGES, +}; + unsigned int __read_mostly vdso_enabled = 1; static int __init vdso_setup(char *str) @@ -40,12 +49,89 @@ static int __init vdso_setup(char *str) } __setup("vdso=", vdso_setup); +#ifdef CONFIG_TIME_NS +struct vdso_data *arch_get_vdso_data(void *vvar_page) +{ + return (struct vdso_data *)(vvar_page); +} + +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + if (likely(vma->vm_mm == current->mm)) + return current->nsproxy->time_ns->vvar_page; + /* + * VM_PFNMAP | VM_IO protect .fault() handler from being called + * through interfaces like /proc/$pid/mem or + * process_vm_{readv,writev}() as long as there's no .access() + * in special_mapping_vmops(). + * For more details check_vma_flags() and __access_remote_vm() + */ + WARN(1, "vvar_page accessed remotely"); + return NULL; +} + +/* + * The VVAR page layout depends on whether a task belongs to the root or + * non-root time namespace. Whenever a task changes its namespace, the VVAR + * page tables are cleared and then they will be re-faulted with a + * corresponding layout. + * See also the comment near timens_setup_vdso_data() for details. + */ +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + + mmap_read_lock(mm); + for (vma = mm->mmap; vma; vma = vma->vm_next) { + unsigned long size = vma->vm_end - vma->vm_start; + + if (!vma_is_special_mapping(vma, &vvar_mapping)) + continue; + zap_page_range(vma, vma->vm_start, size); + break; + } + mmap_read_unlock(mm); + return 0; +} +#else +static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} +#endif + static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { - if (vmf->pgoff == 0) - return vmf_insert_pfn(vma, vmf->address, virt_to_pfn(vdso_data)); - return VM_FAULT_SIGBUS; + struct page *timens_page = find_timens_vvar_page(vma); + unsigned long pfn; + + switch (vmf->pgoff) { + case VVAR_DATA_PAGE_OFFSET: + if (timens_page) + pfn = page_to_pfn(timens_page); + else + pfn = virt_to_pfn(vdso_data); + break; +#ifdef CONFIG_TIME_NS + case VVAR_TIMENS_PAGE_OFFSET: + /* + * If a task belongs to a time namespace then a namespace + * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and + * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET + * offset. + * See also the comment near timens_setup_vdso_data(). + */ + if (!timens_page) + return VM_FAULT_SIGBUS; + pfn = virt_to_pfn(vdso_data); + break; +#endif /* CONFIG_TIME_NS */ + default: + return VM_FAULT_SIGBUS; + } + return vmf_insert_pfn(vma, vmf->address, pfn); } static int vdso_mremap(const struct vm_special_mapping *sm, @@ -80,23 +166,25 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) struct vm_area_struct *vma; int rc; + BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); if (!vdso_enabled || is_compat_task()) return 0; if (mmap_write_lock_killable(mm)) return -EINTR; vdso_text_len = vdso_pages << PAGE_SHIFT; - vdso_mapping_len = vdso_text_len + PAGE_SIZE; + vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE; vvar_start = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); rc = vvar_start; if (IS_ERR_VALUE(vvar_start)) goto out; - vma = _install_special_mapping(mm, vvar_start, PAGE_SIZE, - VM_READ|VM_MAYREAD|VM_PFNMAP, + vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE, + VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| + VM_PFNMAP, &vvar_mapping); rc = PTR_ERR(vma); if (IS_ERR(vma)) goto out; - vdso_text_start = vvar_start + PAGE_SIZE; + vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE; /* VM_MAYWRITE for COW so gdb can set breakpoints */ vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, VM_READ|VM_EXEC| diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index 99063b4c6e27..518f1ea405f4 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -13,7 +13,10 @@ ENTRY(_start) SECTIONS { - PROVIDE(_vdso_data = . - PAGE_SIZE); + PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); +#ifdef CONFIG_TIME_NS + PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); +#endif . = VDSO64_LBASE + SIZEOF_HEADERS; .hash : { *(.hash) } :text -- cgit v1.2.3 From fe8344a09272f3a8b71c2ad72fdf8ef3eaef71e5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 31 Jan 2021 23:07:42 +0100 Subject: s390/vdso: on timens page fault prefault also VVAR page This is the s390 variant of commit e6b28ec65b6d ("x86/vdso: On timens page fault prefault also VVAR page"). Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index dd967af29d2b..8c4e07d533c8 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -105,14 +105,23 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *timens_page = find_timens_vvar_page(vma); - unsigned long pfn; + unsigned long addr, pfn; + vm_fault_t err; switch (vmf->pgoff) { case VVAR_DATA_PAGE_OFFSET: - if (timens_page) + pfn = virt_to_pfn(vdso_data); + if (timens_page) { + /* + * Fault in VVAR page too, since it will be accessed + * to get clock data anyway. + */ + addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE; + err = vmf_insert_pfn(vma, addr, pfn); + if (unlikely(err & VM_FAULT_ERROR)) + return err; pfn = page_to_pfn(timens_page); - else - pfn = virt_to_pfn(vdso_data); + } break; #ifdef CONFIG_TIME_NS case VVAR_TIMENS_PAGE_OFFSET: -- cgit v1.2.3 From b29c5093820d333eef22f58cd04ec0d089059c39 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 2 Feb 2021 16:45:37 +0100 Subject: s390/vtime: fix inline assembly clobber list The stck/stckf instruction used within the inline assembly within do_account_vtime() changes the condition code. This is not reflected with the clobber list, and therefore might result in incorrect code generation. It seems unlikely that the compiler could generate incorrect code considering the surrounding C code, but it must still be fixed. Cc: Reviewed-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vtime.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 5aaa2ca6a928..978a35ea6081 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -136,7 +136,8 @@ static int do_account_vtime(struct task_struct *tsk) " stck %1" /* Store current tod clock value */ #endif : "=Q" (S390_lowcore.last_update_timer), - "=Q" (S390_lowcore.last_update_clock)); + "=Q" (S390_lowcore.last_update_clock) + : : "cc"); clock = S390_lowcore.last_update_clock - clock; timer -= S390_lowcore.last_update_timer; -- cgit v1.2.3 From 683071b02c440eb84d9133dc33bd3d3d37522a5f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 1 Feb 2021 21:40:22 +0100 Subject: s390/cpum_cf_diag: use get_tod_clock_fast() Use get_tod_clock_fast() instead of store_tod_clock(), since store_tod_clock() can be very slow. Reviewed-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/perf_cpum_cf_diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index e949ab832ed7..6f6b3382edb7 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -119,7 +119,7 @@ static void cf_diag_trailer(struct cf_trailer_entry *te) te->speed = 1; te->clock_base = 1; /* Save clock base */ memcpy(&te->tod_base, &tod_clock_base[1], 8); - store_tod_clock((__u64 *)&te->timestamp); + te->timestamp = get_tod_clock_fast(); } /* -- cgit v1.2.3 From b22446d00af972ef624958a09dcbe85974b701fd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 1 Feb 2021 21:53:08 +0100 Subject: s390/time: use stcke instead of stck Use STORE CLOCK EXTENDED instead of STORE CLOCK in early tod clock setup. This is just to remove another usage of stck, trying to remove all usages of STORE CLOCK. This doesn't fix anything. Reviewed-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/timex.h | 10 ++++++---- arch/s390/kernel/early.c | 6 +++--- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index c8e244ecdfde..63bf3bd6e83f 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -17,6 +17,8 @@ /* The value of the TOD clock for 1.1.1970. */ #define TOD_UNIX_EPOCH 0x7d91048bca000000ULL +#define STORE_CLOCK_EXT_SIZE 16 /* stcke writes 16 bytes */ + extern u64 clock_comparator_max; /* Inline functions for clock register access. */ @@ -32,15 +34,16 @@ static inline int set_tod_clock(__u64 time) return cc; } -static inline int store_tod_clock(__u64 *time) +static inline int store_tod_clock_ext(char *time) { + typedef struct { char _[STORE_CLOCK_EXT_SIZE]; } addrtype; int cc; asm volatile( - " stck %1\n" + " stcke %1\n" " ipm %0\n" " srl %0,28\n" - : "=d" (cc), "=Q" (*time) : : "cc"); + : "=d" (cc), "=Q" (*(addrtype *)time) : : "cc"); return cc; } @@ -144,7 +147,6 @@ static inline void local_tick_enable(unsigned long long comp) } #define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ -#define STORE_CLOCK_EXT_SIZE 16 /* stcke writes 16 bytes */ typedef unsigned long long cycles_t; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index cc89763a4d3c..ee063b56b5d1 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -35,12 +35,12 @@ static void __init reset_tod_clock(void) { - u64 time; + char time[STORE_CLOCK_EXT_SIZE]; - if (store_tod_clock(&time) == 0) + if (store_tod_clock_ext(time) == 0) return; /* TOD clock not running. Set the clock to Unix Epoch. */ - if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0) + if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock_ext(time) != 0) disabled_wait(); memset(tod_clock_base, 0, 16); -- cgit v1.2.3 From 78f6570946228d0e1dac5f42f398e3e07924b945 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 2 Feb 2021 13:46:47 +0100 Subject: s390/entry: use cpu alternative for stck/stckf Use a cpu alternative to switch between stck and stckf instead of making it compile time dependent. This will also make kernels compiled for old machines, but running on newer machines, use stckf. Reviewed-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 785425b59ac1..9b3aea98f886 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -106,12 +106,10 @@ _LPP_OFFSET = __LC_LPP 2: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm + # Use STORE CLOCK by default, switch to STORE CLOCK FAST if available. .macro STCK savearea -#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES - .insn s,0xb27c0000,\savearea # store clock fast -#else - .insn s,0xb2050000,\savearea # store clock -#endif + ALTERNATIVE ".insn s,0xb2050000,\savearea", \ + ".insn s,0xb27c0000,\savearea", 25 .endm /* -- cgit v1.2.3 From 80841ad8d4ca3e91d90e18b4fa0cc1c925e9fad1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 2 Feb 2021 17:10:49 +0100 Subject: s390/alternatives: add alternative_input() / alternative_io() Add support for alternative inline assemblies with input and output arguments. This is consistent to x86. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/alternative.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index 1c8a38f762a3..d3880ca764ee 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -145,6 +145,22 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \ altinstr2, facility2) ::: "memory") +/* Alternative inline assembly with input. */ +#define alternative_input(oldinstr, newinstr, feature, input...) \ + asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ + : : input) + +/* Like alternative_input, but with a single output argument */ +#define alternative_io(oldinstr, altinstr, facility, output, input...) \ + asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) \ + : output : input) + +/* Use this macro if more than one output parameter is needed. */ +#define ASM_OUTPUT2(a...) a + +/* Use this macro if clobbers are needed without inputs. */ +#define ASM_NO_INPUT_CLOBBER(clobber...) : clobber + #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_ALTERNATIVE_H */ -- cgit v1.2.3 From 1c7673476b82983768c6a4dd78775f817f0e0f88 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 2 Feb 2021 16:59:50 +0100 Subject: s390/vtime: use cpu alternative for stck/stckf Use a cpu alternative to switch between stck and stckf instead of making it compile time dependent. This will also make kernels compiled for old machines, but running on newer machines, use stckf. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vtime.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 978a35ea6081..73c7afcc0527 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -13,7 +13,7 @@ #include #include #include - +#include #include #include #include @@ -128,16 +128,13 @@ static int do_account_vtime(struct task_struct *tsk) timer = S390_lowcore.last_update_timer; clock = S390_lowcore.last_update_clock; - asm volatile( - " stpt %0\n" /* Store current cpu timer value */ -#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES - " stckf %1" /* Store current tod clock value */ -#else - " stck %1" /* Store current tod clock value */ -#endif - : "=Q" (S390_lowcore.last_update_timer), - "=Q" (S390_lowcore.last_update_clock) - : : "cc"); + /* Use STORE CLOCK by default, STORE CLOCK FAST if available. */ + alternative_io("stpt %0\n .insn s,0xb2050000,%1\n", + "stpt %0\n .insn s,0xb27c0000,%1\n", + 25, + ASM_OUTPUT2("=Q" (S390_lowcore.last_update_timer), + "=Q" (S390_lowcore.last_update_clock)), + ASM_NO_INPUT_CLOBBER("cc")); clock = S390_lowcore.last_update_clock - clock; timer -= S390_lowcore.last_update_timer; -- cgit v1.2.3 From b0d31159a46787380353426faaad8febc9bef009 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 28 Jan 2021 13:06:05 +0100 Subject: s390: open code SWITCH_KERNEL macro This is a preparation patch for two later bugfixes. In the past both int_handler and machine check handler used SWITCH_KERNEL to switch to the kernel stack. However, SWITCH_KERNEL doesn't work properly in machine check context. So instead of adding more complexity to this macro, just remove it. Signed-off-by: Sven Schnelle Cc: # v5.8+ Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 74 ++++++++++++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 28 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 9b3aea98f886..ed5acf95235f 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -81,32 +81,6 @@ _LPP_OFFSET = __LC_LPP #endif .endm - .macro SWITCH_KERNEL savearea - tmhh %r8,0x0001 # interrupting from user ? - jnz 1f -#if IS_ENABLED(CONFIG_KVM) - lgr %r14,%r9 - larl %r13,.Lsie_gmap - slgr %r14,%r13 - lghi %r13,.Lsie_done - .Lsie_gmap - clgr %r14,%r13 - jhe 0f - lghi %r11,\savearea # inside critical section, do cleanup - brasl %r14,.Lcleanup_sie -#endif -0: CHECK_STACK \savearea - lgr %r11,%r15 - aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - stg %r11,__SF_BACKCHAIN(%r15) - j 2f -1: BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP - lctlg %c1,%c1,__LC_KERNEL_ASCE - lg %r15,__LC_KERNEL_STACK - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) -2: la %r11,STACK_FRAME_OVERHEAD(%r15) - .endm - - # Use STORE CLOCK by default, switch to STORE CLOCK FAST if available. .macro STCK savearea ALTERNATIVE ".insn s,0xb2050000,\savearea", \ ".insn s,0xb27c0000,\savearea", 25 @@ -413,7 +387,28 @@ ENTRY(\name) stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r12,__LC_CURRENT lmg %r8,%r9,\lc_old_psw - SWITCH_KERNEL __LC_SAVE_AREA_ASYNC + tmhh %r8,0x0001 # interrupting from user ? + jnz 1f +#if IS_ENABLED(CONFIG_KVM) + lgr %r14,%r9 + larl %r13,.Lsie_gmap + slgr %r14,%r13 + lghi %r13,.Lsie_done - .Lsie_gmap + clgr %r14,%r13 + jhe 0f + lghi %r11,__LC_SAVE_AREA_ASYNC # inside critical section, do cleanup + brasl %r14,.Lcleanup_sie +#endif +0: CHECK_STACK __LC_SAVE_AREA_ASYNC + lgr %r11,%r15 + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + stg %r11,__SF_BACKCHAIN(%r15) + j 2f +1: BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP + lctlg %c1,%c1,__LC_KERNEL_ASCE + lg %r15,__LC_KERNEL_STACK + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) +2: la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use xgr %r0,%r0 @@ -542,7 +537,30 @@ ENTRY(mcck_int_handler) TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID jno .Lmcck_panic 4: ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off - SWITCH_KERNEL __LC_GPREGS_SAVE_AREA+64 + tmhh %r8,0x0001 # interrupting from user ? + jnz .Lmcck_user +#if IS_ENABLED(CONFIG_KVM) + lgr %r14,%r9 + larl %r13,.Lsie_gmap + slgr %r14,%r13 + lghi %r13,.Lsie_done - .Lsie_gmap + clgr %r14,%r13 + jhe .Lmcck_stack + lghi %r11,__LC_GPREGS_SAVE_AREA+64 # inside critical section, do cleanup + brasl %r14,.Lcleanup_sie +.Lmcck_stack: +#endif + CHECK_STACK __LC_GPREGS_SAVE_AREA+64 + lgr %r11,%r15 + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + stg %r11,__SF_BACKCHAIN(%r15) + j 5f +.Lmcck_user: + BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP + lctlg %c1,%c1,__LC_KERNEL_ASCE + lg %r15,__LC_KERNEL_STACK + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) +5: la %r11,STACK_FRAME_OVERHEAD(%r15) .Lmcck_skip: lghi %r14,__LC_GPREGS_SAVE_AREA+64 stmg %r0,%r7,__PT_R0(%r11) -- cgit v1.2.3 From 64985c3a223d15f151204b3aa37e587b9466378d Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 10 Feb 2021 13:39:19 +0100 Subject: s390: use WRITE_ONCE when re-allocating async stack The code does: S390_lowcore.async_stack = new + STACK_INIT_OFFSET; But the compiler is free to first assign one value and add the other value later. If a IRQ would be coming in between these two operations, it would run with an invalid stack. Prevent this by using WRITE_ONCE. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c7feda84edbb..6b004940c4dc 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -346,7 +346,7 @@ static int __init async_stack_realloc(void) new = stack_alloc(); if (!new) panic("Couldn't allocate async stack"); - S390_lowcore.async_stack = new + STACK_INIT_OFFSET; + WRITE_ONCE(S390_lowcore.async_stack, new + STACK_INIT_OFFSET); free_pages(old, THREAD_SIZE_ORDER); return 0; } -- cgit v1.2.3 From b61b1595124a1694501105e5dd488de0c0c6bc2a Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 3 Feb 2021 09:02:51 +0100 Subject: s390: add stack for machine check handler The previous code used the normal kernel stack for machine checks. This is problematic when a machine check interrupts a system call or interrupt handler right at the beginning where registers are set up. Assume system_call is interrupted at the first instruction and a machine check is triggered. The machine check handler is called, checks the PSW to see whether it is coming from user space, notices that it is already in kernel mode but %r15 still contains the user space stack. This would lead to a kernel crash. There are basically two ways of fixing that: Either using the 'critical cleanup' approach which compares the address in the PSW to see whether it is already at a point where the stack has been set up, or use an extra stack for the machine check handler. For simplicity, we will go with the second approach and allocate an extra stack. This adds some memory overhead for large systems, but usually large system have plenty of memory so this isn't really a concern. But it keeps the mchk stack setup simple and less error prone. Fixes: 0b0ed657fe00 ("s390: remove critical section cleanup from entry.S") Signed-off-by: Sven Schnelle Cc: # v5.8+ Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/lowcore.h | 13 ++++++------- arch/s390/kernel/asm-offsets.c | 1 + arch/s390/kernel/entry.S | 17 +++++++---------- arch/s390/kernel/setup.c | 18 ++++++++++++++++-- arch/s390/kernel/smp.c | 17 +++++++++++------ 5 files changed, 41 insertions(+), 25 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 4d65c8e4e6d0..22bceeeba4bc 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -107,16 +107,15 @@ struct lowcore { __u64 async_stack; /* 0x0350 */ __u64 nodat_stack; /* 0x0358 */ __u64 restart_stack; /* 0x0360 */ - + __u64 mcck_stack; /* 0x0368 */ /* Restart function and parameter. */ - __u64 restart_fn; /* 0x0368 */ - __u64 restart_data; /* 0x0370 */ - __u64 restart_source; /* 0x0378 */ + __u64 restart_fn; /* 0x0370 */ + __u64 restart_data; /* 0x0378 */ + __u64 restart_source; /* 0x0380 */ /* Address space pointer. */ - __u64 kernel_asce; /* 0x0380 */ - __u64 user_asce; /* 0x0388 */ - __u8 pad_0x0390[0x0398-0x0390]; /* 0x0390 */ + __u64 kernel_asce; /* 0x0388 */ + __u64 user_asce; /* 0x0390 */ /* * The lpp and current_pid fields form a diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index d22bb28ef50c..15e637728a4b 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -118,6 +118,7 @@ int main(void) OFFSET(__LC_ASYNC_STACK, lowcore, async_stack); OFFSET(__LC_NODAT_STACK, lowcore, nodat_stack); OFFSET(__LC_RESTART_STACK, lowcore, restart_stack); + OFFSET(__LC_MCCK_STACK, lowcore, mcck_stack); OFFSET(__LC_RESTART_FN, lowcore, restart_fn); OFFSET(__LC_RESTART_DATA, lowcore, restart_data); OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index ed5acf95235f..f7953bb17558 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -70,6 +70,8 @@ _LPP_OFFSET = __LC_LPP je \oklabel clg %r14,__LC_ASYNC_STACK je \oklabel + clg %r14,__LC_MCCK_STACK + je \oklabel clg %r14,__LC_NODAT_STACK je \oklabel clg %r14,__LC_RESTART_STACK @@ -548,20 +550,16 @@ ENTRY(mcck_int_handler) jhe .Lmcck_stack lghi %r11,__LC_GPREGS_SAVE_AREA+64 # inside critical section, do cleanup brasl %r14,.Lcleanup_sie -.Lmcck_stack: #endif - CHECK_STACK __LC_GPREGS_SAVE_AREA+64 - lgr %r11,%r15 - aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - stg %r11,__SF_BACKCHAIN(%r15) - j 5f + j .Lmcck_stack .Lmcck_user: BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP +.Lmcck_stack: + lg %r15,__LC_MCCK_STACK +.Lmcck_skip: + la %r11,STACK_FRAME_OVERHEAD(%r15) lctlg %c1,%c1,__LC_KERNEL_ASCE - lg %r15,__LC_KERNEL_STACK xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) -5: la %r11,STACK_FRAME_OVERHEAD(%r15) -.Lmcck_skip: lghi %r14,__LC_GPREGS_SAVE_AREA+64 stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use @@ -602,7 +600,6 @@ ENTRY(mcck_int_handler) .Lmcck_panic: lg %r15,__LC_NODAT_STACK - la %r11,STACK_FRAME_OVERHEAD(%r15) j .Lmcck_skip ENDPROC(mcck_int_handler) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 6b004940c4dc..60da976eee6f 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -338,7 +338,7 @@ int __init arch_early_irq_init(void) return 0; } -static int __init async_stack_realloc(void) +static int __init stack_realloc(void) { unsigned long old, new; @@ -348,9 +348,16 @@ static int __init async_stack_realloc(void) panic("Couldn't allocate async stack"); WRITE_ONCE(S390_lowcore.async_stack, new + STACK_INIT_OFFSET); free_pages(old, THREAD_SIZE_ORDER); + + old = S390_lowcore.mcck_stack - STACK_INIT_OFFSET; + new = stack_alloc(); + if (!new) + panic("Couldn't allocate machine check stack"); + WRITE_ONCE(S390_lowcore.mcck_stack, new + STACK_INIT_OFFSET); + memblock_free(old, THREAD_SIZE); return 0; } -early_initcall(async_stack_realloc); +early_initcall(stack_realloc); void __init arch_call_rest_init(void) { @@ -372,6 +379,7 @@ void __init arch_call_rest_init(void) static void __init setup_lowcore_dat_off(void) { unsigned long int_psw_mask = PSW_KERNEL_BITS; + unsigned long mcck_stack; struct lowcore *lc; if (IS_ENABLED(CONFIG_KASAN)) @@ -439,6 +447,12 @@ static void __init setup_lowcore_dat_off(void) lc->restart_data = 0; lc->restart_source = -1UL; + mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); + if (!mcck_stack) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", + __func__, THREAD_SIZE, THREAD_SIZE); + lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET; + /* Setup absolute zero lowcore */ mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack); mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index c5abbb94ac6e..e299892440b6 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -189,7 +189,7 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) { - unsigned long async_stack, nodat_stack; + unsigned long async_stack, nodat_stack, mcck_stack; struct lowcore *lc; if (pcpu != &pcpu_devices[0]) { @@ -202,13 +202,15 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET; } async_stack = stack_alloc(); - if (!async_stack) - goto out; + mcck_stack = stack_alloc(); + if (!async_stack || !mcck_stack) + goto out_stack; lc = pcpu->lowcore; memcpy(lc, &S390_lowcore, 512); memset((char *) lc + 512, 0, sizeof(*lc) - 512); lc->async_stack = async_stack + STACK_INIT_OFFSET; lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET; + lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET; lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_index = 0; @@ -216,12 +218,13 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); if (nmi_alloc_per_cpu(lc)) - goto out_async; + goto out_stack; lowcore_ptr[cpu] = lc; pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc); return 0; -out_async: +out_stack: + stack_free(mcck_stack); stack_free(async_stack); out: if (pcpu != &pcpu_devices[0]) { @@ -233,16 +236,18 @@ out: static void pcpu_free_lowcore(struct pcpu *pcpu) { - unsigned long async_stack, nodat_stack, lowcore; + unsigned long async_stack, nodat_stack, mcck_stack, lowcore; nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET; async_stack = pcpu->lowcore->async_stack - STACK_INIT_OFFSET; + mcck_stack = pcpu->lowcore->mcck_stack - STACK_INIT_OFFSET; lowcore = (unsigned long) pcpu->lowcore; pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); lowcore_ptr[pcpu - pcpu_devices] = NULL; nmi_free_per_cpu(pcpu->lowcore); stack_free(async_stack); + stack_free(mcck_stack); if (pcpu == &pcpu_devices[0]) return; free_pages(nodat_stack, THREAD_SIZE_ORDER); -- cgit v1.2.3 From 26521412ae22d06caab98721757b2721c6d7c46c Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 3 Feb 2021 09:16:45 +0100 Subject: s390: fix kernel asce loading when sie is interrupted If a machine check is coming in during sie, the PU saves the control registers to the machine check save area. Afterwards mcck_int_handler is called, which loads __LC_KERNEL_ASCE into %cr1. Later the code restores %cr1 from the machine check area, but that is wrong when SIE was interrupted because the machine check area still contains the gmap asce. Instead it should return with either __KERNEL_ASCE in %cr1 when interrupted in SIE or the previous %cr1 content saved in the machine check save area. Fixes: 87d598634521 ("s390/mm: remove set_fs / rework address space handling") Signed-off-by: Sven Schnelle Cc: # v5.8+ Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index f7953bb17558..377294969954 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -558,6 +558,7 @@ ENTRY(mcck_int_handler) lg %r15,__LC_MCCK_STACK .Lmcck_skip: la %r11,STACK_FRAME_OVERHEAD(%r15) + stctg %c1,%c1,__PT_CR1(%r11) lctlg %c1,%c1,__LC_KERNEL_ASCE xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lghi %r14,__LC_GPREGS_SAVE_AREA+64 @@ -573,8 +574,6 @@ ENTRY(mcck_int_handler) xgr %r10,%r10 mvc __PT_R8(64,%r11),0(%r14) stmg %r8,%r9,__PT_PSW(%r11) - la %r14,4095 - mvc __PT_CR1(8,%r11),__LC_CREGS_SAVE_AREA-4095+8(%r14) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs -- cgit v1.2.3 From 33ea04872da15ea8236f92da6009af5a1b0af641 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 3 Feb 2021 17:46:12 +0100 Subject: s390: use r13 in cleanup_sie as temp register Instead of thrashing r11 which is normally our pointer to struct pt_regs on the stack, use r13 as temporary register in the BR_EX macro. r13 is already used in cleanup_sie, so no need to thrash another register. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 377294969954..d1236a9f73b2 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -130,7 +130,7 @@ _LPP_OFFSET = __LC_LPP .endm GEN_BR_THUNK %r14 - GEN_BR_THUNK %r14,%r11 + GEN_BR_THUNK %r14,%r13 .section .kprobes.text, "ax" .Ldummy: @@ -665,7 +665,7 @@ ENDPROC(stack_overflow) ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_KERNEL_ASCE larl %r9,sie_exit # skip forward to sie_exit - BR_EX %r14,%r11 + BR_EX %r14,%r13 #endif .section .rodata, "a" -- cgit v1.2.3 From efa54735905c03bf876b4451cfaef6b45046bc53 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 3 Feb 2021 17:50:00 +0100 Subject: s390: split cleanup_sie The current code uses the address in %r11 to figure out whether it was called from the machine check handler or from a normal interrupt handler. Instead of doing this implicit logic (which is mostly a leftover from the old critical cleanup approach) just add a second label and use that. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index d1236a9f73b2..c10b9f31eef7 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -214,7 +214,7 @@ ENTRY(sie64a) # are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable. # Other instructions between sie64a and .Lsie_done should not cause program # interrupts. So lets use 3 nops as a landing pad for all possible rewinds. -# See also .Lcleanup_sie +# See also .Lcleanup_sie_mcck/.Lcleanup_sie_int .Lrewind_pad6: nopr 7 .Lrewind_pad4: @@ -398,8 +398,7 @@ ENTRY(\name) lghi %r13,.Lsie_done - .Lsie_gmap clgr %r14,%r13 jhe 0f - lghi %r11,__LC_SAVE_AREA_ASYNC # inside critical section, do cleanup - brasl %r14,.Lcleanup_sie + brasl %r14,.Lcleanup_sie_int #endif 0: CHECK_STACK __LC_SAVE_AREA_ASYNC lgr %r11,%r15 @@ -548,8 +547,7 @@ ENTRY(mcck_int_handler) lghi %r13,.Lsie_done - .Lsie_gmap clgr %r14,%r13 jhe .Lmcck_stack - lghi %r11,__LC_GPREGS_SAVE_AREA+64 # inside critical section, do cleanup - brasl %r14,.Lcleanup_sie + brasl %r14,.Lcleanup_sie_mcck #endif j .Lmcck_stack .Lmcck_user: @@ -651,16 +649,15 @@ ENDPROC(stack_overflow) #endif #if IS_ENABLED(CONFIG_KVM) -.Lcleanup_sie: - cghi %r11,__LC_SAVE_AREA_ASYNC #Is this in normal interrupt? - je 1f +.Lcleanup_sie_mcck: larl %r13,.Lsie_entry slgr %r9,%r13 larl %r13,.Lsie_skip clgr %r9,%r13 - jh 1f + jh .Lcleanup_sie_int oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST -1: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) +.Lcleanup_sie_int: + BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_KERNEL_ASCE -- cgit v1.2.3 From 96c0a6a72d181a330db6dc9848ff2e6584b1aa5b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 10 Feb 2021 21:51:02 +0100 Subject: s390,alpha: switch to 64-bit ino_t s390 and alpha are the only 64 bit architectures with a 32-bit ino_t. Since this is quite unusual this causes bugs from time to time. See e.g. commit ebce3eb2f7ef ("ceph: fix inode number handling on arches with 32-bit ino_t") for an example. This (obviously) also prevents s390 and alpha to use 64-bit ino_t for tmpfs. See commit b85a7a8bb573 ("tmpfs: disallow CONFIG_TMPFS_INODE64 on s390"). Therefore switch both s390 and alpha to 64-bit ino_t. This should only have an effect on the ustat system call. To prevent ABI breakage define struct ustat compatible to the old layout and change sys_ustat() accordingly. Acked-by: Linus Torvalds Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/Kconfig | 4 ++++ arch/alpha/Kconfig | 1 + arch/s390/Kconfig | 1 + fs/statfs.c | 5 ++++- include/linux/types.h | 8 ++++++-- 5 files changed, 16 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/Kconfig b/arch/Kconfig index 24862d15f3a3..383c98e86a70 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -327,6 +327,10 @@ config ARCH_32BIT_OFF_T still support 32-bit off_t. This option is enabled for all such architectures explicitly. +# Selected by 64 bit architectures which have a 32 bit f_tinode in struct ustat +config ARCH_32BIT_USTAT_F_TINODE + bool + config HAVE_ASM_MODVERSIONS bool help diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 1f51437d5765..96ce6565890e 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -2,6 +2,7 @@ config ALPHA bool default y + select ARCH_32BIT_USTAT_F_TINODE select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_NO_PREEMPT diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 5de9f409e4d0..b68e6952866d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -58,6 +58,7 @@ config S390 # Note: keep this list sorted alphabetically # imply IMA_SECURE_AND_OR_TRUSTED_BOOT + select ARCH_32BIT_USTAT_F_TINODE select ARCH_BINFMT_ELF_STATE select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX diff --git a/fs/statfs.c b/fs/statfs.c index 68cb07788750..0ba34c135593 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -255,7 +255,10 @@ SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf) memset(&tmp,0,sizeof(struct ustat)); tmp.f_tfree = sbuf.f_bfree; - tmp.f_tinode = sbuf.f_ffree; + if (IS_ENABLED(CONFIG_ARCH_32BIT_USTAT_F_TINODE)) + tmp.f_tinode = min_t(u64, sbuf.f_ffree, UINT_MAX); + else + tmp.f_tinode = sbuf.f_ffree; return copy_to_user(ubuf, &tmp, sizeof(struct ustat)) ? -EFAULT : 0; } diff --git a/include/linux/types.h b/include/linux/types.h index a147977602b5..ac825ad90e44 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -14,7 +14,7 @@ typedef u32 __kernel_dev_t; typedef __kernel_fd_set fd_set; typedef __kernel_dev_t dev_t; -typedef __kernel_ino_t ino_t; +typedef __kernel_ulong_t ino_t; typedef __kernel_mode_t mode_t; typedef unsigned short umode_t; typedef u32 nlink_t; @@ -189,7 +189,11 @@ struct hlist_node { struct ustat { __kernel_daddr_t f_tfree; - __kernel_ino_t f_tinode; +#ifdef CONFIG_ARCH_32BIT_USTAT_F_TINODE + unsigned int f_tinode; +#else + unsigned long f_tinode; +#endif char f_fname[6]; char f_fpack[6]; }; -- cgit v1.2.3 From e4101be56c85effa4509b35a208482f888e79cfc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 7 Feb 2021 22:00:22 +0100 Subject: s390/time: introduce union tod_clock Introduce union tod_clock which is supposed to be used to decode and access various fields of the result of STORE CLOCK EXTENDED. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/timex.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 63bf3bd6e83f..271a1e12cc73 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -21,6 +21,25 @@ extern u64 clock_comparator_max; +union tod_clock { + __uint128_t val; + struct { + __uint128_t ei : 8; /* epoch index */ + __uint128_t tod : 64; /* bits 0-63 of tod clock */ + __uint128_t : 40; + __uint128_t pf : 16; /* programmable field */ + }; + struct { + __uint128_t eitod : 72; /* epoch index + bits 0-63 tod clock */ + __uint128_t : 56; + }; + struct { + __uint128_t us : 60; /* micro-seconds */ + __uint128_t sus : 12; /* sub-microseconds */ + __uint128_t : 56; + }; +} __packed; + /* Inline functions for clock register access. */ static inline int set_tod_clock(__u64 time) { -- cgit v1.2.3 From 530f639f1efe076df8d56719ab45eb7203175ecf Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 8 Feb 2021 13:56:49 +0100 Subject: s390/time: rename store_tod_clock_ext() and use union tod_clock Rename store_tod_clock_ext() to store_tod_clock_ext_cc() to reflect that it returns a condition code and also use union tod_clock as parameter. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/timex.h | 5 ++--- arch/s390/kernel/early.c | 6 +++--- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 271a1e12cc73..24964579684b 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -53,16 +53,15 @@ static inline int set_tod_clock(__u64 time) return cc; } -static inline int store_tod_clock_ext(char *time) +static inline int store_tod_clock_ext_cc(union tod_clock *clk) { - typedef struct { char _[STORE_CLOCK_EXT_SIZE]; } addrtype; int cc; asm volatile( " stcke %1\n" " ipm %0\n" " srl %0,28\n" - : "=d" (cc), "=Q" (*(addrtype *)time) : : "cc"); + : "=d" (cc), "=Q" (*clk) : : "cc"); return cc; } diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index ee063b56b5d1..8f046f985d8e 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -35,12 +35,12 @@ static void __init reset_tod_clock(void) { - char time[STORE_CLOCK_EXT_SIZE]; + union tod_clock clk; - if (store_tod_clock_ext(time) == 0) + if (store_tod_clock_ext_cc(&clk) == 0) return; /* TOD clock not running. Set the clock to Unix Epoch. */ - if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock_ext(time) != 0) + if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk)) disabled_wait(); memset(tod_clock_base, 0, 16); -- cgit v1.2.3 From cc2c7db28f7924e9133adc06293a74838ddee59a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 8 Feb 2021 13:58:47 +0100 Subject: s390/time: introduce new store_tod_clock_ext() Introduce new store_tod_clock_ext() function, which is the same like store_tod_clock_ext_cc() except that it doesn't return a condition code. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/timex.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 24964579684b..7bfdcae34515 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -65,6 +65,11 @@ static inline int store_tod_clock_ext_cc(union tod_clock *clk) return cc; } +static inline void store_tod_clock_ext(union tod_clock *tod) +{ + asm volatile("stcke %0" : "=Q" (*tod) : : "cc"); +} + static inline void set_clock_comparator(__u64 time) { asm volatile("sckc %0" : : "Q" (time)); -- cgit v1.2.3 From f8d8977a3d971011ab04e4569a664628bd03935e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 8 Feb 2021 16:06:10 +0100 Subject: s390/time: convert tod_clock_base to union Convert tod_clock_base to union tod_clock. This simplifies quite a bit of code and also fixes a bug in read_persistent_clock64(); void read_persistent_clock64(struct timespec64 *ts) { __u64 delta; delta = initial_leap_seconds + TOD_UNIX_EPOCH; get_tod_clock_ext(clk); *(__u64 *) &clk[1] -= delta; if (*(__u64 *) &clk[1] > delta) clk[0]--; ext_to_timespec64(clk, ts); } Assume &clk[1] == 3 and delta == 2; then after the substraction the if condition becomes true and the epoch part of the clock is decremented by one because of an assumed overflow, even though there is none. Fix this by using 128 bit arithmetics and let the compiler do the right thing: void read_persistent_clock64(struct timespec64 *ts) { union tod_clock clk; u64 delta; delta = initial_leap_seconds + TOD_UNIX_EPOCH; store_tod_clock_ext(&clk); clk.eitod -= delta; ext_to_timespec64(&clk, ts); } Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/timex.h | 4 +-- arch/s390/kernel/early.c | 6 ++--- arch/s390/kernel/perf_cpum_cf_diag.c | 2 +- arch/s390/kernel/perf_cpum_sf.c | 2 +- arch/s390/kernel/time.c | 50 ++++++++++++------------------------ 5 files changed, 24 insertions(+), 40 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 7bfdcae34515..dbd7b44bc9da 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -208,7 +208,7 @@ static inline cycles_t get_cycles(void) int get_phys_clock(unsigned long *clock); void init_cpu_timer(void); -extern unsigned char tod_clock_base[16] __aligned(8); +extern union tod_clock tod_clock_base; /** * get_clock_monotonic - returns current time in clock rate units @@ -222,7 +222,7 @@ static inline unsigned long long get_tod_clock_monotonic(void) unsigned long long tod; preempt_disable_notrace(); - tod = get_tod_clock() - *(unsigned long long *) &tod_clock_base[1]; + tod = get_tod_clock() - tod_clock_base.tod; preempt_enable_notrace(); return tod; } diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 8f046f985d8e..a361d2e70025 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -43,8 +43,8 @@ static void __init reset_tod_clock(void) if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk)) disabled_wait(); - memset(tod_clock_base, 0, 16); - *(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH; + memset(&tod_clock_base, 0, sizeof(tod_clock_base)); + tod_clock_base.tod = TOD_UNIX_EPOCH; S390_lowcore.last_update_clock = TOD_UNIX_EPOCH; } @@ -230,7 +230,7 @@ static __init void detect_machine_facilities(void) } if (test_facility(133)) S390_lowcore.machine_flags |= MACHINE_FLAG_GS; - if (test_facility(139) && (tod_clock_base[1] & 0x80)) { + if (test_facility(139) && (tod_clock_base.tod >> 63)) { /* Enabled signed clock comparator comparisons */ S390_lowcore.machine_flags |= MACHINE_FLAG_SCC; clock_comparator_max = -1ULL >> 1; diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index 6f6b3382edb7..b5c86fb70d63 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -118,7 +118,7 @@ static void cf_diag_trailer(struct cf_trailer_entry *te) if (te->cpu_speed) te->speed = 1; te->clock_base = 1; /* Save clock base */ - memcpy(&te->tod_base, &tod_clock_base[1], 8); + te->tod_base = tod_clock_base.tod; te->timestamp = get_tod_clock_fast(); } diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 19cd7b961c45..db62def4ef28 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1682,7 +1682,7 @@ static void aux_sdb_init(unsigned long sdb) /* Save clock base */ te->clock_base = 1; - memcpy(&te->progusage2, &tod_clock_base[1], 8); + te->progusage2 = tod_clock_base.tod; } /* diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index c59cb44fbb7d..06bcfa636638 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -55,11 +55,7 @@ #include #include "entry.h" -unsigned char tod_clock_base[16] __aligned(8) = { - /* Force to data section. */ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -}; +union tod_clock tod_clock_base __section(".data"); EXPORT_SYMBOL_GPL(tod_clock_base); u64 clock_comparator_max = -1ULL; @@ -86,7 +82,7 @@ void __init time_early_init(void) struct ptff_qui qui; /* Initialize TOD steering parameters */ - tod_steering_end = *(unsigned long long *) &tod_clock_base[1]; + tod_steering_end = tod_clock_base.tod; vdso_data->arch_data.tod_steering_end = tod_steering_end; if (!test_facility(28)) @@ -113,18 +109,13 @@ unsigned long long notrace sched_clock(void) } NOKPROBE_SYMBOL(sched_clock); -static void ext_to_timespec64(unsigned char *clk, struct timespec64 *xt) +static void ext_to_timespec64(union tod_clock *clk, struct timespec64 *xt) { - unsigned long long high, low, rem, sec, nsec; + unsigned long rem, sec, nsec; - /* Split extendnd TOD clock to micro-seconds and sub-micro-seconds */ - high = (*(unsigned long long *) clk) >> 4; - low = (*(unsigned long long *)&clk[7]) << 4; - /* Calculate seconds and nano-seconds */ - sec = high; + sec = clk->us; rem = do_div(sec, 1000000); - nsec = (((low >> 32) + (rem << 32)) * 1000) >> 32; - + nsec = ((clk->sus + (rem << 12)) * 125) >> 9; xt->tv_sec = sec; xt->tv_nsec = nsec; } @@ -204,30 +195,26 @@ static void stp_reset(void); void read_persistent_clock64(struct timespec64 *ts) { - unsigned char clk[STORE_CLOCK_EXT_SIZE]; - __u64 delta; + union tod_clock clk; + u64 delta; delta = initial_leap_seconds + TOD_UNIX_EPOCH; - get_tod_clock_ext(clk); - *(__u64 *) &clk[1] -= delta; - if (*(__u64 *) &clk[1] > delta) - clk[0]--; - ext_to_timespec64(clk, ts); + store_tod_clock_ext(&clk); + clk.eitod -= delta; + ext_to_timespec64(&clk, ts); } void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, struct timespec64 *boot_offset) { - unsigned char clk[STORE_CLOCK_EXT_SIZE]; struct timespec64 boot_time; - __u64 delta; + union tod_clock clk; + u64 delta; delta = initial_leap_seconds + TOD_UNIX_EPOCH; - memcpy(clk, tod_clock_base, STORE_CLOCK_EXT_SIZE); - *(__u64 *)&clk[1] -= delta; - if (*(__u64 *)&clk[1] > delta) - clk[0]--; - ext_to_timespec64(clk, &boot_time); + clk = tod_clock_base; + clk.eitod -= delta; + ext_to_timespec64(&clk, &boot_time); read_persistent_clock64(wall_time); *boot_offset = timespec64_sub(*wall_time, boot_time); @@ -381,10 +368,7 @@ static void clock_sync_global(unsigned long long delta) struct ptff_qto qto; /* Fixup the monotonic sched clock. */ - *(unsigned long long *) &tod_clock_base[1] += delta; - if (*(unsigned long long *) &tod_clock_base[1] < delta) - /* Epoch overflow */ - tod_clock_base[0]++; + tod_clock_base.eitod += delta; /* Adjust TOD steering parameters. */ now = get_tod_clock(); adj = tod_steering_end - now; -- cgit v1.2.3 From 169ceac42926155870e7ad8165f01ab15caac17a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 8 Feb 2021 16:16:28 +0100 Subject: s390/vdso: use union tod_clock Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso64/getcpu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/vdso64/getcpu.c b/arch/s390/kernel/vdso64/getcpu.c index 5b2bc7494d5b..5c5d4a848b76 100644 --- a/arch/s390/kernel/vdso64/getcpu.c +++ b/arch/s390/kernel/vdso64/getcpu.c @@ -8,12 +8,12 @@ int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) { - __u16 todval[8]; + union tod_clock clk; /* CPU number is stored in the programmable field of the TOD clock */ - get_tod_clock_ext((char *)todval); + store_tod_clock_ext(&clk); if (cpu) - *cpu = todval[7]; + *cpu = clk.pf; /* NUMA node is always zero */ if (node) *node = 0; -- cgit v1.2.3 From 2cfd7b73f593ebf53e90a3464aa66c9ca996936e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 8 Feb 2021 16:27:33 +0100 Subject: s390/kvm: use union tod_clock Use union tod_clock and get rid of the kvm specific struct kvm_s390_tod_clock_ext which apparently was introduced for the same purpose. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kvm/kvm-s390.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 759bbc012b6c..2f09e9d7dc95 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -165,12 +165,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -struct kvm_s390_tod_clock_ext { - __u8 epoch_idx; - __u64 tod; - __u8 reserved[7]; -} __packed; - /* allow nested virtualization in KVM (if enabled by user space) */ static int nested; module_param(nested, int, S_IRUGO); @@ -1166,17 +1160,17 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) static void kvm_s390_get_tod_clock(struct kvm *kvm, struct kvm_s390_vm_tod_clock *gtod) { - struct kvm_s390_tod_clock_ext htod; + union tod_clock clk; preempt_disable(); - get_tod_clock_ext((char *)&htod); + store_tod_clock_ext(&clk); - gtod->tod = htod.tod + kvm->arch.epoch; + gtod->tod = clk.tod + kvm->arch.epoch; gtod->epoch_idx = 0; if (test_kvm_facility(kvm, 139)) { - gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx; - if (gtod->tod < htod.tod) + gtod->epoch_idx = clk.ei + kvm->arch.epdx; + if (gtod->tod < clk.tod) gtod->epoch_idx += 1; } @@ -3867,18 +3861,18 @@ void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) { struct kvm_vcpu *vcpu; - struct kvm_s390_tod_clock_ext htod; + union tod_clock clk; int i; mutex_lock(&kvm->lock); preempt_disable(); - get_tod_clock_ext((char *)&htod); + store_tod_clock_ext(&clk); - kvm->arch.epoch = gtod->tod - htod.tod; + kvm->arch.epoch = gtod->tod - clk.tod; kvm->arch.epdx = 0; if (test_kvm_facility(kvm, 139)) { - kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; + kvm->arch.epdx = gtod->epoch_idx - clk.ei; if (kvm->arch.epoch > gtod->tod) kvm->arch.epdx -= 1; } -- cgit v1.2.3 From d1deda6f2b238bfcd3a4521b3221974443416342 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 8 Feb 2021 16:32:27 +0100 Subject: s390/debug: use union tod_clock Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/debug.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index b6619ae9a3e0..bb958d32bd81 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -829,11 +829,11 @@ static inline debug_entry_t *get_active_entry(debug_info_t *id) static inline void debug_finish_entry(debug_info_t *id, debug_entry_t *active, int level, int exception) { - unsigned char clk[STORE_CLOCK_EXT_SIZE]; unsigned long timestamp; + union tod_clock clk; - get_tod_clock_ext(clk); - timestamp = *(unsigned long *) &clk[0] >> 4; + store_tod_clock_ext(&clk); + timestamp = clk.us; timestamp -= TOD_UNIX_EPOCH >> 12; active->clock = timestamp; active->cpu = smp_processor_id(); -- cgit v1.2.3 From 01f224b9d7227208a2dba8ef93b8fe1a29d0b9f1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 8 Feb 2021 16:36:52 +0100 Subject: s390/hypfs: use store_tod_clock_ext() Use store_tod_clock_ext() instead of get_tod_clock_ext(). Unfortunately one usage has to be converted to a cast, since otherwise a uapi header file would have to be changed. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/hypfs/hypfs_diag0c.c | 2 +- arch/s390/hypfs/hypfs_vm.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c index 3235e4d82f2d..6c43d2ba2079 100644 --- a/arch/s390/hypfs/hypfs_diag0c.c +++ b/arch/s390/hypfs/hypfs_diag0c.c @@ -84,7 +84,7 @@ static int dbfs_diag0c_create(void **data, void **data_free_ptr, size_t *size) if (IS_ERR(diag0c_data)) return PTR_ERR(diag0c_data); memset(&diag0c_data->hdr, 0, sizeof(diag0c_data->hdr)); - get_tod_clock_ext(diag0c_data->hdr.tod_ext); + store_tod_clock_ext((union tod_clock *)diag0c_data->hdr.tod_ext); diag0c_data->hdr.len = count * sizeof(struct hypfs_diag0c_entry); diag0c_data->hdr.version = DBFS_D0C_HDR_VERSION; diag0c_data->hdr.count = count; diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index e1fcc03159ef..33f973ff9744 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -234,7 +234,7 @@ failed: struct dbfs_d2fc_hdr { u64 len; /* Length of d2fc buffer without header */ u16 version; /* Version of header */ - char tod_ext[STORE_CLOCK_EXT_SIZE]; /* TOD clock for d2fc */ + union tod_clock tod_ext; /* TOD clock for d2fc */ u64 count; /* Number of VM guests in d2fc buffer */ char reserved[30]; } __attribute__ ((packed)); @@ -252,7 +252,7 @@ static int dbfs_diag2fc_create(void **data, void **data_free_ptr, size_t *size) d2fc = diag2fc_store(guest_query, &count, sizeof(d2fc->hdr)); if (IS_ERR(d2fc)) return PTR_ERR(d2fc); - get_tod_clock_ext(d2fc->hdr.tod_ext); + store_tod_clock_ext(&d2fc->hdr.tod_ext); d2fc->hdr.len = count * sizeof(struct diag2fc_data); d2fc->hdr.version = DBFS_D2FC_HDR_VERSION; d2fc->hdr.count = count; -- cgit v1.2.3 From fc4a925f7774fe14f8f6318d0d7ed7d2596f073f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 8 Feb 2021 16:44:45 +0100 Subject: s390/crypto: use store_tod_clock_ext() Use store_tod_clock_ext() in order to be able to get rid get_tod_clock_ext(). Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/crypto/prng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index b2f219ec379c..234d791ca59d 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -414,7 +414,7 @@ static int __init prng_sha512_instantiate(void) } /* append the seed by 16 bytes of unique nonce */ - get_tod_clock_ext(seed + seedlen); + store_tod_clock_ext((union tod_clock *)(seed + seedlen)); seedlen += 16; /* now initial seed of the prno drng */ -- cgit v1.2.3 From 7ef37dd7bb00b94e027f63ef626a3a1c58474da9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 8 Feb 2021 16:19:27 +0100 Subject: s390/time: remove get_tod_clock_ext() Remove get_tod_clock_ext() and the STORE_CLOCK_EXT_SIZE define. This enforces all users of the existing low level functions to use a union tod_clock. This way there is now a compile time check for the correct time and therefore also if the size of the argument matches what will be written to by the STORE CLOCK EXTENDED instruction. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/timex.h | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index dbd7b44bc9da..c4e23e925665 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -17,8 +17,6 @@ /* The value of the TOD clock for 1.1.1970. */ #define TOD_UNIX_EPOCH 0x7d91048bca000000ULL -#define STORE_CLOCK_EXT_SIZE 16 /* stcke writes 16 bytes */ - extern u64 clock_comparator_max; union tod_clock { @@ -173,19 +171,12 @@ static inline void local_tick_enable(unsigned long long comp) typedef unsigned long long cycles_t; -static inline void get_tod_clock_ext(char *clk) -{ - typedef struct { char _[STORE_CLOCK_EXT_SIZE]; } addrtype; - - asm volatile("stcke %0" : "=Q" (*(addrtype *) clk) : : "cc"); -} - static inline unsigned long long get_tod_clock(void) { - char clk[STORE_CLOCK_EXT_SIZE]; + union tod_clock clk; - get_tod_clock_ext(clk); - return *((unsigned long long *)&clk[1]); + store_tod_clock_ext(&clk); + return clk.tod; } static inline unsigned long long get_tod_clock_fast(void) -- cgit v1.2.3 From 7940eaf2e956ce3d67ac9efb5b621adbb823e049 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Sat, 30 Jan 2021 13:04:53 +0100 Subject: s390/qdio: improve handling of PENDING buffers for QEBSM devices For QEBSM devices the 'merge_pending' mechanism in get_buf_states() doesn't apply, and we can actually get SLSB_P_OUTPUT_PENDING returned. So for this case propagating the PENDING state to the driver via the queue's sbal_state doesn't make sense and creates unnecessary overhead. Instead introduce a new QDIO_ERROR_* flag that gets passed to the driver, and triggers the same processing as if the buffers were flagged as QDIO_OUTBUF_STATE_FLAG_PENDING. Signed-off-by: Julian Wiedmann Reviewed-by: Benjamin Block Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/qdio.h | 1 + drivers/s390/cio/qdio_main.c | 11 +++++++++-- drivers/s390/net/qeth_core_main.c | 9 ++++++--- 3 files changed, 16 insertions(+), 5 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index f96454f5d4cd..c85f75a3d452 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -315,6 +315,7 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, #define QDIO_ERROR_GET_BUF_STATE 0x0002 #define QDIO_ERROR_SET_BUF_STATE 0x0004 #define QDIO_ERROR_SLSB_STATE 0x0100 +#define QDIO_ERROR_SLSB_PENDING 0x0200 #define QDIO_ERROR_FATAL 0x00ff #define QDIO_ERROR_TEMPORARY 0xff00 diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 4252f43ef214..0ad5a4c1bb08 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -571,6 +571,7 @@ static int get_outbound_buffer_frontier(struct qdio_q *q, unsigned int start, unsigned int *error) { unsigned char state = 0; + unsigned int i; int count; q->timestamp = get_tod_clock_fast(); @@ -591,8 +592,14 @@ static int get_outbound_buffer_frontier(struct qdio_q *q, unsigned int start, return 0; switch (state) { - case SLSB_P_OUTPUT_EMPTY: case SLSB_P_OUTPUT_PENDING: + /* detach the utilized QAOBs: */ + for (i = 0; i < count; i++) + q->u.out.aobs[QDIO_BUFNR(start + i)] = NULL; + + *error = QDIO_ERROR_SLSB_PENDING; + fallthrough; + case SLSB_P_OUTPUT_EMPTY: /* the adapter got it */ DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "out empty:%1d %02x", q->nr, count); @@ -643,7 +650,7 @@ static inline int qdio_outbound_q_moved(struct qdio_q *q, unsigned int start, if (count) { DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "out moved:%1d", q->nr); - if (q->u.out.use_cq) { + if (q->u.out.use_cq && *error != QDIO_ERROR_SLSB_PENDING) { unsigned int i; for (i = 0; i < count; i++) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index cf18d87da41e..13056cc2377b 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -6068,14 +6068,17 @@ int qeth_poll(struct napi_struct *napi, int budget) EXPORT_SYMBOL_GPL(qeth_poll); static void qeth_iqd_tx_complete(struct qeth_qdio_out_q *queue, - unsigned int bidx, bool error, int budget) + unsigned int bidx, unsigned int qdio_error, + int budget) { struct qeth_qdio_out_buffer *buffer = queue->bufs[bidx]; u8 sflags = buffer->buffer->element[15].sflags; struct qeth_card *card = queue->card; + bool error = !!qdio_error; - if (queue->bufstates && (queue->bufstates[bidx].flags & - QDIO_OUTBUF_STATE_FLAG_PENDING)) { + if ((qdio_error == QDIO_ERROR_SLSB_PENDING) || + (queue->bufstates && (queue->bufstates[bidx].flags & + QDIO_OUTBUF_STATE_FLAG_PENDING))) { WARN_ON_ONCE(card->options.cq != QETH_CQ_ENABLED); QETH_CARD_TEXT_(card, 5, "pel%u", bidx); -- cgit v1.2.3 From 2223318c2862edc7f5b282939b850b19fc934ec4 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Sat, 30 Jan 2021 13:22:56 +0100 Subject: s390/qdio: remove 'merge_pending' mechanism For non-QEBSM devices, get_buf_states() merges PENDING and EMPTY buffers into a single group of finished buffers. To allow the upper-layer driver to differentiate between the two states, qdio_check_pending() looks at each buffer's state again and sets the sbal_state flag to QDIO_OUTBUF_STATE_FLAG_PENDING accordingly. So effectively we're spending overhead on _every_ Output Queue inspection, just to avoid some additional TX completion calls in case a group of buffers has completed with mixed EMPTY / PENDING state. Given that PENDING buffers should rarely occur, this is a bad trade-off. In particular so as the additional checks in get_buf_states() affect _all_ device types (even those that don't use the PENDING state). Rip it all out, and just report the PENDING completions separately as we already do for QEBSM devices. Signed-off-by: Julian Wiedmann Reviewed-by: Benjamin Block Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/qdio.h | 4 --- drivers/s390/cio/qdio_main.c | 59 ++++++--------------------------------- drivers/s390/net/qeth_core_main.c | 4 +-- 3 files changed, 10 insertions(+), 57 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index c85f75a3d452..d9215c7106f0 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -250,17 +250,13 @@ struct slsb { * struct qdio_outbuf_state - SBAL related asynchronous operation information * (for communication with upper layer programs) * (only required for use with completion queues) - * @flags: flags indicating state of buffer * @user: pointer to upper layer program's state information related to SBAL * (stored in user1 data of QAOB) */ struct qdio_outbuf_state { - u8 flags; void *user; }; -#define QDIO_OUTBUF_STATE_FLAG_PENDING 0x01 - #define CHSC_AC1_INITIATE_INPUTQ 0x80 diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 0ad5a4c1bb08..03a011619908 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -202,7 +202,7 @@ again: */ static inline int get_buf_states(struct qdio_q *q, unsigned int bufnr, unsigned char *state, unsigned int count, - int auto_ack, int merge_pending) + int auto_ack) { unsigned char __state = 0; int i = 1; @@ -217,18 +217,9 @@ static inline int get_buf_states(struct qdio_q *q, unsigned int bufnr, if (__state & SLSB_OWNER_CU) goto out; - if (merge_pending && __state == SLSB_P_OUTPUT_PENDING) - __state = SLSB_P_OUTPUT_EMPTY; - for (; i < count; i++) { bufnr = next_buf(bufnr); - /* merge PENDING into EMPTY: */ - if (merge_pending && - q->slsb.val[bufnr] == SLSB_P_OUTPUT_PENDING && - __state == SLSB_P_OUTPUT_EMPTY) - continue; - /* stop if next state differs from initial state: */ if (q->slsb.val[bufnr] != __state) break; @@ -242,7 +233,7 @@ out: static inline int get_buf_state(struct qdio_q *q, unsigned int bufnr, unsigned char *state, int auto_ack) { - return get_buf_states(q, bufnr, state, 1, auto_ack, 0); + return get_buf_states(q, bufnr, state, 1, auto_ack); } /* wrap-around safe setting of slsb states, returns number of changed buffers */ @@ -464,7 +455,7 @@ static int get_inbound_buffer_frontier(struct qdio_q *q, unsigned int start, * No siga sync here, as a PCI or we after a thin interrupt * already sync'ed the queues. */ - count = get_buf_states(q, start, &state, count, 1, 0); + count = get_buf_states(q, start, &state, count, 1); if (!count) return 0; @@ -541,7 +532,6 @@ static inline unsigned long qdio_aob_for_buffer(struct qdio_output_q *q, WARN_ON_ONCE(phys_aob & 0xFF); } - q->sbal_state[bufnr].flags = 0; return phys_aob; } @@ -554,19 +544,6 @@ static inline int qdio_tasklet_schedule(struct qdio_q *q) return -EPERM; } -static void qdio_check_pending(struct qdio_q *q, unsigned int index) -{ - unsigned char state; - - if (get_buf_state(q, index, &state, 0) > 0 && - state == SLSB_P_OUTPUT_PENDING && - q->u.out.aobs[index]) { - q->u.out.sbal_state[index].flags |= - QDIO_OUTBUF_STATE_FLAG_PENDING; - q->u.out.aobs[index] = NULL; - } -} - static int get_outbound_buffer_frontier(struct qdio_q *q, unsigned int start, unsigned int *error) { @@ -587,7 +564,7 @@ static int get_outbound_buffer_frontier(struct qdio_q *q, unsigned int start, if (!count) return 0; - count = get_buf_states(q, start, &state, count, 0, q->u.out.use_cq); + count = get_buf_states(q, start, &state, count, 0); if (!count) return 0; @@ -609,6 +586,9 @@ static int get_outbound_buffer_frontier(struct qdio_q *q, unsigned int start, account_sbals(q, count); return count; case SLSB_P_OUTPUT_ERROR: + DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "out error:%1d %02x", + q->nr, count); + *error = QDIO_ERROR_SLSB_STATE; process_buffer_error(q, start, count); atomic_sub(count, &q->nr_buf_used); @@ -640,27 +620,6 @@ static inline int qdio_outbound_q_done(struct qdio_q *q) return atomic_read(&q->nr_buf_used) == 0; } -static inline int qdio_outbound_q_moved(struct qdio_q *q, unsigned int start, - unsigned int *error) -{ - int count; - - count = get_outbound_buffer_frontier(q, start, error); - - if (count) { - DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "out moved:%1d", q->nr); - - if (q->u.out.use_cq && *error != QDIO_ERROR_SLSB_PENDING) { - unsigned int i; - - for (i = 0; i < count; i++) - qdio_check_pending(q, QDIO_BUFNR(start + i)); - } - } - - return count; -} - static int qdio_kick_outbound_q(struct qdio_q *q, unsigned int count, unsigned long aob) { @@ -715,7 +674,7 @@ void qdio_outbound_tasklet(struct tasklet_struct *t) qperf_inc(q, tasklet_outbound); WARN_ON_ONCE(atomic_read(&q->nr_buf_used) < 0); - count = qdio_outbound_q_moved(q, start, &error); + count = get_outbound_buffer_frontier(q, start, &error); if (count) { q->first_to_check = add_buf(start, count); @@ -1482,7 +1441,7 @@ static int __qdio_inspect_queue(struct qdio_q *q, unsigned int *bufnr, *error = 0; count = q->is_input_q ? get_inbound_buffer_frontier(q, start, error) : - qdio_outbound_q_moved(q, start, error); + get_outbound_buffer_frontier(q, start, error); if (count == 0) return 0; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 13056cc2377b..068e2d9b1eb8 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -6076,9 +6076,7 @@ static void qeth_iqd_tx_complete(struct qeth_qdio_out_q *queue, struct qeth_card *card = queue->card; bool error = !!qdio_error; - if ((qdio_error == QDIO_ERROR_SLSB_PENDING) || - (queue->bufstates && (queue->bufstates[bidx].flags & - QDIO_OUTBUF_STATE_FLAG_PENDING))) { + if (qdio_error == QDIO_ERROR_SLSB_PENDING) { WARN_ON_ONCE(card->options.cq != QETH_CQ_ENABLED); QETH_CARD_TEXT_(card, 5, "pel%u", bidx); -- cgit v1.2.3