diff options
Diffstat (limited to 'arch/s390/kernel')
62 files changed, 2235 insertions, 1454 deletions
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 48caae8c7e10..ea5ed6654050 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -38,14 +38,15 @@ CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls obj-y := head64.o traps.o time.o process.o early.o setup.o idle.o vtime.o obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o -obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o +obj-y += debug.o irq.o ipl.o dis.o vdso.o cpufeature.o obj-y += sysinfo.o lgr.o os_info.o ctlreg.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o +obj-y += diag/ -extra-y += vmlinux.lds +always-$(KBUILD_BUILTIN) += vmlinux.lds obj-$(CONFIG_SYSFS) += nospec-sysfs.o CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c index 09cd24cbe74e..6252b7d115dd 100644 --- a/arch/s390/kernel/abs_lowcore.c +++ b/arch/s390/kernel/abs_lowcore.c @@ -2,9 +2,9 @@ #include <linux/pgtable.h> #include <asm/abs_lowcore.h> +#include <asm/sections.h> unsigned long __bootdata_preserved(__abs_lowcore); -int __bootdata_preserved(relocate_lowcore); int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc) { diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index 8d5d0de35de0..90c0e6408992 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -1,41 +1,90 @@ // SPDX-License-Identifier: GPL-2.0 +#ifndef pr_fmt +#define pr_fmt(fmt) "alt: " fmt +#endif + #include <linux/uaccess.h> +#include <linux/printk.h> #include <asm/nospec-branch.h> #include <asm/abs_lowcore.h> #include <asm/alternative.h> #include <asm/facility.h> +#include <asm/sections.h> +#include <asm/machine.h> + +#ifndef a_debug +#define a_debug pr_debug +#endif + +#ifndef __kernel_va +#define __kernel_va(x) (void *)(x) +#endif + +unsigned long __bootdata_preserved(machine_features[1]); + +struct alt_debug { + unsigned long facilities[MAX_FACILITY_BIT / BITS_PER_LONG]; + unsigned long mfeatures[MAX_MFEATURE_BIT / BITS_PER_LONG]; + int spec; +}; + +static struct alt_debug __bootdata_preserved(alt_debug); + +static void alternative_dump(u8 *old, u8 *new, unsigned int len, unsigned int type, unsigned int data) +{ + char oinsn[33], ninsn[33]; + unsigned long kptr; + unsigned int pos; + + for (pos = 0; pos < len && 2 * pos < sizeof(oinsn) - 3; pos++) + hex_byte_pack(&oinsn[2 * pos], old[pos]); + oinsn[2 * pos] = 0; + for (pos = 0; pos < len && 2 * pos < sizeof(ninsn) - 3; pos++) + hex_byte_pack(&ninsn[2 * pos], new[pos]); + ninsn[2 * pos] = 0; + kptr = (unsigned long)__kernel_va(old); + a_debug("[%d/%3d] %016lx: %s -> %s\n", type, data, kptr, oinsn, ninsn); +} void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx) { - u8 *instr, *replacement; + struct alt_debug *d; struct alt_instr *a; - bool replace; + bool debug, replace; + u8 *old, *new; /* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. */ + d = &alt_debug; for (a = start; a < end; a++) { if (!(a->ctx & ctx)) continue; switch (a->type) { case ALT_TYPE_FACILITY: replace = test_facility(a->data); + debug = __test_facility(a->data, d->facilities); + break; + case ALT_TYPE_FEATURE: + replace = test_machine_feature(a->data); + debug = __test_machine_feature(a->data, d->mfeatures); break; case ALT_TYPE_SPEC: replace = nobp_enabled(); - break; - case ALT_TYPE_LOWCORE: - replace = have_relocated_lowcore(); + debug = d->spec; break; default: replace = false; + debug = false; } if (!replace) continue; - instr = (u8 *)&a->instr_offset + a->instr_offset; - replacement = (u8 *)&a->repl_offset + a->repl_offset; - s390_kernel_write(instr, replacement, a->instrlen); + old = (u8 *)&a->instr_offset + a->instr_offset; + new = (u8 *)&a->repl_offset + a->repl_offset; + if (debug) + alternative_dump(old, new, a->instrlen, a->type, a->data); + s390_kernel_write(old, new, a->instrlen); } } diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 5529248d84fb..95ecad9c7d7d 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -5,16 +5,14 @@ * and format the required data. */ -#define ASM_OFFSETS_C - #include <linux/kbuild.h> -#include <linux/kvm_host.h> #include <linux/sched.h> #include <linux/purgatory.h> #include <linux/pgtable.h> -#include <linux/ftrace.h> -#include <asm/gmap.h> +#include <linux/ftrace_regs.h> +#include <asm/kvm_host_types.h> #include <asm/stacktrace.h> +#include <asm/ptrace.h> int main(void) { @@ -50,8 +48,8 @@ int main(void) OFFSET(__PT_R14, pt_regs, gprs[14]); OFFSET(__PT_R15, pt_regs, gprs[15]); OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2); + OFFSET(__PT_INT_CODE, pt_regs, int_code); OFFSET(__PT_FLAGS, pt_regs, flags); - OFFSET(__PT_CR1, pt_regs, cr1); OFFSET(__PT_LAST_BREAK, pt_regs, last_break); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); @@ -77,7 +75,8 @@ int main(void) OFFSET(__LC_EXT_CPU_ADDR, lowcore, ext_cpu_addr); OFFSET(__LC_EXT_INT_CODE, lowcore, ext_int_code); OFFSET(__LC_PGM_ILC, lowcore, pgm_ilc); - OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_code); + OFFSET(__LC_PGM_CODE, lowcore, pgm_code); + OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_int_code); OFFSET(__LC_DATA_EXC_CODE, lowcore, data_exc_code); OFFSET(__LC_MON_CLASS_NR, lowcore, mon_class_num); OFFSET(__LC_PER_CODE, lowcore, per_code); @@ -123,7 +122,6 @@ int main(void) OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer); OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); OFFSET(__LC_INT_CLOCK, lowcore, int_clock); - OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock); OFFSET(__LC_CURRENT, lowcore, current_task); OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); OFFSET(__LC_ASYNC_STACK, lowcore, async_stack); @@ -138,7 +136,6 @@ int main(void) OFFSET(__LC_USER_ASCE, lowcore, user_asce); OFFSET(__LC_LPP, lowcore, lpp); OFFSET(__LC_CURRENT_PID, lowcore, current_pid); - OFFSET(__LC_GMAP, lowcore, gmap); OFFSET(__LC_LAST_BREAK, lowcore, last_break); /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ OFFSET(__LC_DUMP_REIPL, lowcore, ipib); @@ -161,7 +158,6 @@ int main(void) OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb); BLANK(); /* gmap/sie offsets */ - OFFSET(__GMAP_ASCE, gmap, asce); OFFSET(__SIE_PROG0C, kvm_s390_sie_block, prog0c); OFFSET(__SIE_PROG20, kvm_s390_sie_block, prog20); /* kexec_sha_region */ @@ -178,14 +174,8 @@ int main(void) DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size)); DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line)); DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size)); -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - /* function graph return value tracing */ - OFFSET(__FGRAPH_RET_GPR2, fgraph_ret_regs, gpr2); - OFFSET(__FGRAPH_RET_FP, fgraph_ret_regs, fp); - DEFINE(__FGRAPH_RET_SIZE, sizeof(struct fgraph_ret_regs)); -#endif - OFFSET(__FTRACE_REGS_PT_REGS, ftrace_regs, regs); - DEFINE(__FTRACE_REGS_SIZE, sizeof(struct ftrace_regs)); + OFFSET(__FTRACE_REGS_PT_REGS, __arch_ftrace_regs, regs); + DEFINE(__FTRACE_REGS_SIZE, sizeof(struct __arch_ftrace_regs)); OFFSET(__PCPU_FLAGS, pcpu, flags); return 0; diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c index bf983513dd33..c217a5e64094 100644 --- a/arch/s390/kernel/cert_store.c +++ b/arch/s390/kernel/cert_store.c @@ -138,7 +138,7 @@ static void cert_store_key_describe(const struct key *key, struct seq_file *m) * First 64 bytes of the key description is key name in EBCDIC CP 500. * Convert it to ASCII for displaying in /proc/keys. */ - strscpy(ascii, key->description, sizeof(ascii)); + strscpy(ascii, key->description); EBCASC_500(ascii, VC_NAME_LEN_BYTES); seq_puts(m, ascii); @@ -235,7 +235,7 @@ static int __diag320(unsigned long subcode, void *addr) { union register_pair rp = { .even = (unsigned long)addr, }; - asm volatile( + asm_inline volatile( " diag %[rp],%[subcode],0x320\n" "0: nopr %%r7\n" EX_TABLE(0b, 0b) diff --git a/arch/s390/kernel/cpacf.c b/arch/s390/kernel/cpacf.c index c8575dbc890d..4b9b34f95d72 100644 --- a/arch/s390/kernel/cpacf.c +++ b/arch/s390/kernel/cpacf.c @@ -14,7 +14,7 @@ #define CPACF_QUERY(name, instruction) \ static ssize_t name##_query_raw_read(struct file *fp, \ struct kobject *kobj, \ - struct bin_attribute *attr, \ + const struct bin_attribute *attr, \ char *buf, loff_t offs, \ size_t count) \ { \ @@ -24,7 +24,7 @@ static ssize_t name##_query_raw_read(struct file *fp, \ return -EOPNOTSUPP; \ return memory_read_from_buffer(buf, count, &offs, &mask, sizeof(mask)); \ } \ -static BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t)) +static const BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t)) CPACF_QUERY(km, KM); CPACF_QUERY(kmc, KMC); @@ -40,20 +40,20 @@ CPACF_QUERY(prno, PRNO); CPACF_QUERY(kma, KMA); CPACF_QUERY(kdsa, KDSA); -#define CPACF_QAI(name, instruction) \ -static ssize_t name##_query_auth_info_raw_read( \ - struct file *fp, struct kobject *kobj, \ - struct bin_attribute *attr, char *buf, loff_t offs, \ - size_t count) \ -{ \ - cpacf_qai_t qai; \ - \ - if (!cpacf_qai(CPACF_##instruction, &qai)) \ - return -EOPNOTSUPP; \ - return memory_read_from_buffer(buf, count, &offs, &qai, \ - sizeof(qai)); \ -} \ -static BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t)) +#define CPACF_QAI(name, instruction) \ +static ssize_t name##_query_auth_info_raw_read( \ + struct file *fp, struct kobject *kobj, \ + const struct bin_attribute *attr, char *buf, loff_t offs, \ + size_t count) \ +{ \ + cpacf_qai_t qai; \ + \ + if (!cpacf_qai(CPACF_##instruction, &qai)) \ + return -EOPNOTSUPP; \ + return memory_read_from_buffer(buf, count, &offs, &qai, \ + sizeof(qai)); \ +} \ +static const BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t)) CPACF_QAI(km, KM); CPACF_QAI(kmc, KMC); @@ -69,7 +69,7 @@ CPACF_QAI(prno, PRNO); CPACF_QAI(kma, KMA); CPACF_QAI(kdsa, KDSA); -static struct bin_attribute *cpacf_attrs[] = { +static const struct bin_attribute *const cpacf_attrs[] = { &bin_attr_km_query_raw, &bin_attr_kmc_query_raw, &bin_attr_kimd_query_raw, @@ -101,7 +101,7 @@ static struct bin_attribute *cpacf_attrs[] = { static const struct attribute_group cpacf_attr_grp = { .name = "cpacf", - .bin_attrs = cpacf_attrs, + .bin_attrs_new = cpacf_attrs, }; static int __init cpacf_init(void) diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index b210a29d3ee9..2f4174b961de 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -20,6 +20,7 @@ #include <asm/diag.h> #include <asm/ebcdic.h> #include <asm/cpcmd.h> +#include <asm/asm.h> static DEFINE_SPINLOCK(cpcmd_lock); static char cpcmd_buf[241]; @@ -45,12 +46,11 @@ static int diag8_response(int cmdlen, char *response, int *rlen) ry.odd = *rlen; asm volatile( " diag %[rx],%[ry],0x8\n" - " ipm %[cc]\n" - " srl %[cc],28\n" - : [cc] "=&d" (cc), [ry] "+&d" (ry.pair) + CC_IPM(cc) + : CC_OUT(cc, cc), [ry] "+d" (ry.pair) : [rx] "d" (rx.pair) - : "cc"); - if (cc) + : CC_CLOBBER); + if (CC_TRANSFORM(cc)) *rlen += ry.odd; else *rlen = ry.odd; diff --git a/arch/s390/kernel/cpufeature.c b/arch/s390/kernel/cpufeature.c index 1b2ae42a0c15..76210f001028 100644 --- a/arch/s390/kernel/cpufeature.c +++ b/arch/s390/kernel/cpufeature.c @@ -5,11 +5,13 @@ #include <linux/cpufeature.h> #include <linux/bug.h> +#include <asm/machine.h> #include <asm/elf.h> enum { TYPE_HWCAP, TYPE_FACILITY, + TYPE_MACHINE, }; struct s390_cpu_feature { @@ -21,6 +23,7 @@ static struct s390_cpu_feature s390_cpu_features[MAX_CPU_FEATURES] = { [S390_CPU_FEATURE_MSA] = {.type = TYPE_HWCAP, .num = HWCAP_NR_MSA}, [S390_CPU_FEATURE_VXRS] = {.type = TYPE_HWCAP, .num = HWCAP_NR_VXRS}, [S390_CPU_FEATURE_UV] = {.type = TYPE_FACILITY, .num = 158}, + [S390_CPU_FEATURE_D288] = {.type = TYPE_MACHINE, .num = MFEATURE_DIAG288}, }; /* @@ -38,6 +41,8 @@ int cpu_have_feature(unsigned int num) return !!(elf_hwcap & BIT(feature->num)); case TYPE_FACILITY: return test_facility(feature->num); + case TYPE_MACHINE: + return test_machine_feature(feature->num); default: WARN_ON_ONCE(1); return 0; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index edae13416196..adb164223f8c 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -63,9 +63,7 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu) { struct save_area *sa; - sa = memblock_alloc(sizeof(*sa), 8); - if (!sa) - return NULL; + sa = memblock_alloc_or_panic(sizeof(*sa), 8); if (is_boot_cpu) list_add(&sa->list, &dump_save_areas); @@ -237,14 +235,16 @@ int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, prot); } -static const char *nt_name(Elf64_Word type) +/* + * Return true only when in a kdump or stand-alone kdump environment. + * Note that /proc/vmcore might also be available in "standard zfcp/nvme dump" + * environments, where this function returns false; see dump_available(). + */ +bool is_kdump_kernel(void) { - const char *name = "LINUX"; - - if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG) - name = KEXEC_CORE_NOTE_NAME; - return name; + return oldmem_data.start; } +EXPORT_SYMBOL_GPL(is_kdump_kernel); /* * Initialize ELF note @@ -270,10 +270,8 @@ static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len, return PTR_ADD(buf, len); } -static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len) -{ - return nt_init_name(buf, type, desc, d_len, nt_name(type)); -} +#define nt_init(buf, type, desc) \ + nt_init_name(buf, NT_ ## type, &(desc), sizeof(desc), NN_ ## type) /* * Calculate the size of ELF note @@ -289,10 +287,7 @@ static size_t nt_size_name(int d_len, const char *name) return size; } -static inline size_t nt_size(Elf64_Word type, int d_len) -{ - return nt_size_name(d_len, nt_name(type)); -} +#define nt_size(type, desc) nt_size_name(sizeof(desc), NN_ ## type) /* * Fill ELF notes for one CPU with save area registers @@ -313,18 +308,16 @@ static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa) memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc)); memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs)); /* Create ELF notes for the CPU */ - ptr = nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus)); - ptr = nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset)); - ptr = nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer)); - ptr = nt_init(ptr, NT_S390_TODCMP, &sa->todcmp, sizeof(sa->todcmp)); - ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg)); - ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs)); - ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix)); + ptr = nt_init(ptr, PRSTATUS, nt_prstatus); + ptr = nt_init(ptr, PRFPREG, nt_fpregset); + ptr = nt_init(ptr, S390_TIMER, sa->timer); + ptr = nt_init(ptr, S390_TODCMP, sa->todcmp); + ptr = nt_init(ptr, S390_TODPREG, sa->todpreg); + ptr = nt_init(ptr, S390_CTRS, sa->ctrs); + ptr = nt_init(ptr, S390_PREFIX, sa->prefix); if (cpu_has_vx()) { - ptr = nt_init(ptr, NT_S390_VXRS_HIGH, - &sa->vxrs_high, sizeof(sa->vxrs_high)); - ptr = nt_init(ptr, NT_S390_VXRS_LOW, - &sa->vxrs_low, sizeof(sa->vxrs_low)); + ptr = nt_init(ptr, S390_VXRS_HIGH, sa->vxrs_high); + ptr = nt_init(ptr, S390_VXRS_LOW, sa->vxrs_low); } return ptr; } @@ -337,16 +330,16 @@ static size_t get_cpu_elf_notes_size(void) struct save_area *sa = NULL; size_t size; - size = nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus)); - size += nt_size(NT_PRFPREG, sizeof(elf_fpregset_t)); - size += nt_size(NT_S390_TIMER, sizeof(sa->timer)); - size += nt_size(NT_S390_TODCMP, sizeof(sa->todcmp)); - size += nt_size(NT_S390_TODPREG, sizeof(sa->todpreg)); - size += nt_size(NT_S390_CTRS, sizeof(sa->ctrs)); - size += nt_size(NT_S390_PREFIX, sizeof(sa->prefix)); + size = nt_size(PRSTATUS, struct elf_prstatus); + size += nt_size(PRFPREG, elf_fpregset_t); + size += nt_size(S390_TIMER, sa->timer); + size += nt_size(S390_TODCMP, sa->todcmp); + size += nt_size(S390_TODPREG, sa->todpreg); + size += nt_size(S390_CTRS, sa->ctrs); + size += nt_size(S390_PREFIX, sa->prefix); if (cpu_has_vx()) { - size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high)); - size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low)); + size += nt_size(S390_VXRS_HIGH, sa->vxrs_high); + size += nt_size(S390_VXRS_LOW, sa->vxrs_low); } return size; @@ -361,8 +354,8 @@ static void *nt_prpsinfo(void *ptr) memset(&prpsinfo, 0, sizeof(prpsinfo)); prpsinfo.pr_sname = 'R'; - strcpy(prpsinfo.pr_fname, "vmlinux"); - return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo)); + strscpy(prpsinfo.pr_fname, "vmlinux"); + return nt_init(ptr, PRPSINFO, prpsinfo); } /* @@ -497,6 +490,19 @@ static int get_mem_chunk_cnt(void) return cnt; } +static void fill_ptload(Elf64_Phdr *phdr, unsigned long paddr, + unsigned long vaddr, unsigned long size) +{ + phdr->p_type = PT_LOAD; + phdr->p_vaddr = vaddr; + phdr->p_offset = paddr; + phdr->p_paddr = paddr; + phdr->p_filesz = size; + phdr->p_memsz = size; + phdr->p_flags = PF_R | PF_W | PF_X; + phdr->p_align = PAGE_SIZE; +} + /* * Initialize ELF loads (new kernel) */ @@ -509,14 +515,8 @@ static void loads_init(Elf64_Phdr *phdr, bool os_info_has_vm) if (os_info_has_vm) old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE); for_each_physmem_range(idx, &oldmem_type, &start, &end) { - phdr->p_type = PT_LOAD; - phdr->p_vaddr = old_identity_base + start; - phdr->p_offset = start; - phdr->p_paddr = start; - phdr->p_filesz = end - start; - phdr->p_memsz = end - start; - phdr->p_flags = PF_R | PF_W | PF_X; - phdr->p_align = PAGE_SIZE; + fill_ptload(phdr, start, old_identity_base + start, + end - start); phdr++; } } @@ -526,6 +526,22 @@ static bool os_info_has_vm(void) return os_info_old_value(OS_INFO_KASLR_OFFSET); } +#ifdef CONFIG_PROC_VMCORE_DEVICE_RAM +/* + * Fill PT_LOAD for a physical memory range owned by a device and detected by + * its device driver. + */ +void elfcorehdr_fill_device_ram_ptload_elf64(Elf64_Phdr *phdr, + unsigned long long paddr, unsigned long long size) +{ + unsigned long old_identity_base = 0; + + if (os_info_has_vm()) + old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE); + fill_ptload(phdr, paddr, old_identity_base + paddr, size); +} +#endif + /* * Prepare PT_LOAD type program header for kernel image region */ @@ -578,7 +594,7 @@ static size_t get_elfcorehdr_size(int phdr_count) /* PT_NOTES */ size += sizeof(Elf64_Phdr); /* nt_prpsinfo */ - size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo)); + size += nt_size(PRPSINFO, struct elf_prpsinfo); /* regsets */ size += get_cpu_cnt() * get_cpu_elf_notes_size(); /* nt_vmcoreinfo */ diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index e62bea9ab21e..2a41be2f7925 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -24,6 +24,7 @@ #include <linux/export.h> #include <linux/init.h> #include <linux/fs.h> +#include <linux/math.h> #include <linux/minmax.h> #include <linux/debugfs.h> @@ -38,13 +39,13 @@ typedef struct file_private_info { loff_t offset; /* offset of last read in file */ - int act_area; /* number of last formated area */ + int act_area; /* number of last formatted area */ int act_page; /* act page in given area */ - int act_entry; /* last formated entry (offset */ + int act_entry; /* last formatted entry (offset */ /* relative to beginning of last */ - /* formated page) */ + /* formatted page) */ size_t act_entry_offset; /* up to this offset we copied */ - /* in last read the last formated */ + /* in last read the last formatted */ /* entry to userland */ char temp_buf[2048]; /* buffer for output */ debug_info_t *debug_info_org; /* original debug information */ @@ -63,7 +64,7 @@ typedef struct { long args[]; } debug_sprintf_entry_t; -/* internal function prototyes */ +/* internal function prototypes */ static int debug_init(void); static ssize_t debug_output(struct file *file, char __user *user_buf, @@ -77,12 +78,14 @@ static debug_info_t *debug_info_create(const char *name, int pages_per_area, static void debug_info_get(debug_info_t *); static void debug_info_put(debug_info_t *); static int debug_prolog_level_fn(debug_info_t *id, - struct debug_view *view, char *out_buf); + struct debug_view *view, char *out_buf, + size_t out_buf_size); static int debug_input_level_fn(debug_info_t *id, struct debug_view *view, struct file *file, const char __user *user_buf, size_t user_buf_size, loff_t *offset); static int debug_prolog_pages_fn(debug_info_t *id, - struct debug_view *view, char *out_buf); + struct debug_view *view, char *out_buf, + size_t out_buf_size); static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view, struct file *file, const char __user *user_buf, size_t user_buf_size, loff_t *offset); @@ -90,9 +93,8 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, struct file *file, const char __user *user_buf, size_t user_buf_size, loff_t *offset); static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *in_buf); -static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *inbuf); + char *out_buf, size_t out_buf_size, + const char *in_buf); static void debug_areas_swap(debug_info_t *a, debug_info_t *b); static void debug_events_append(debug_info_t *dest, debug_info_t *src); @@ -249,7 +251,7 @@ static debug_info_t *debug_info_alloc(const char *name, int pages_per_area, rc->level = level; rc->buf_size = buf_size; rc->entry_size = sizeof(debug_entry_t) + buf_size; - strscpy(rc->name, name, sizeof(rc->name)); + strscpy(rc->name, name); memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *)); memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *)); refcount_set(&(rc->ref_count), 0); @@ -350,7 +352,10 @@ static debug_info_t *debug_info_copy(debug_info_t *in, int mode) for (i = 0; i < in->nr_areas; i++) { for (j = 0; j < in->pages_per_area; j++) memcpy(rc->areas[i][j], in->areas[i][j], PAGE_SIZE); + rc->active_pages[i] = in->active_pages[i]; + rc->active_entries[i] = in->active_entries[i]; } + rc->active_area = in->active_area; out: spin_unlock_irqrestore(&in->lock, flags); return rc; @@ -380,7 +385,7 @@ static void debug_info_put(debug_info_t *db_info) /* * debug_format_entry: - * - format one debug entry and return size of formated data + * - format one debug entry and return size of formatted data */ static int debug_format_entry(file_private_info_t *p_info) { @@ -391,8 +396,10 @@ static int debug_format_entry(file_private_info_t *p_info) if (p_info->act_entry == DEBUG_PROLOG_ENTRY) { /* print prolog */ - if (view->prolog_proc) - len += view->prolog_proc(id_snap, view, p_info->temp_buf); + if (view->prolog_proc) { + len += view->prolog_proc(id_snap, view, p_info->temp_buf, + sizeof(p_info->temp_buf)); + } goto out; } if (!id_snap->areas) /* this is true, if we have a prolog only view */ @@ -402,21 +409,31 @@ static int debug_format_entry(file_private_info_t *p_info) if (act_entry->clock == 0LL) goto out; /* empty entry */ - if (view->header_proc) + if (view->header_proc) { len += view->header_proc(id_snap, view, p_info->act_area, - act_entry, p_info->temp_buf + len); - if (view->format_proc) + act_entry, p_info->temp_buf + len, + sizeof(p_info->temp_buf) - len); + } + if (view->format_proc) { len += view->format_proc(id_snap, view, p_info->temp_buf + len, + sizeof(p_info->temp_buf) - len, DEBUG_DATA(act_entry)); + } out: return len; } -/* - * debug_next_entry: - * - goto next entry in p_info +/** + * debug_next_entry - Go to the next entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the next entry. If no further entry + * exists the current position is set to one after the end the return value + * indicates that no further entries exist. + * + * Return: True if there are more following entries, false otherwise */ -static inline int debug_next_entry(file_private_info_t *p_info) +static inline bool debug_next_entry(file_private_info_t *p_info) { debug_info_t *id; @@ -424,10 +441,10 @@ static inline int debug_next_entry(file_private_info_t *p_info) if (p_info->act_entry == DEBUG_PROLOG_ENTRY) { p_info->act_entry = 0; p_info->act_page = 0; - goto out; + return true; } if (!id->areas) - return 1; + return false; p_info->act_entry += id->entry_size; /* switch to next page, if we reached the end of the page */ if (p_info->act_entry > (PAGE_SIZE - id->entry_size)) { @@ -440,16 +457,93 @@ static inline int debug_next_entry(file_private_info_t *p_info) p_info->act_page = 0; } if (p_info->act_area >= id->nr_areas) - return 1; + return false; } -out: - return 0; + return true; +} + +/** + * debug_to_act_entry - Go to the currently active entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the currently active + * entry of @p_info->debug_info_snap + */ +static void debug_to_act_entry(file_private_info_t *p_info) +{ + debug_info_t *snap_id; + + snap_id = p_info->debug_info_snap; + p_info->act_area = snap_id->active_area; + p_info->act_page = snap_id->active_pages[snap_id->active_area]; + p_info->act_entry = snap_id->active_entries[snap_id->active_area]; +} + +/** + * debug_prev_entry - Go to the previous entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the previous entry. If no previous entry + * exists the current position is set left as DEBUG_PROLOG_ENTRY and the return value + * indicates that no previous entries exist. + * + * Return: True if there are more previous entries, false otherwise + */ + +static inline bool debug_prev_entry(file_private_info_t *p_info) +{ + debug_info_t *id; + + id = p_info->debug_info_snap; + if (p_info->act_entry == DEBUG_PROLOG_ENTRY) + debug_to_act_entry(p_info); + if (!id->areas) + return false; + p_info->act_entry -= id->entry_size; + /* switch to prev page, if we reached the beginning of the page */ + if (p_info->act_entry < 0) { + /* end of previous page */ + p_info->act_entry = rounddown(PAGE_SIZE, id->entry_size) - id->entry_size; + p_info->act_page--; + if (p_info->act_page < 0) { + /* previous area */ + p_info->act_area--; + p_info->act_page = id->pages_per_area - 1; + } + if (p_info->act_area < 0) + p_info->act_area = (id->nr_areas - 1) % id->nr_areas; + } + /* check full circle */ + if (id->active_area == p_info->act_area && + id->active_pages[id->active_area] == p_info->act_page && + id->active_entries[id->active_area] == p_info->act_entry) + return false; + return true; +} + +/** + * debug_move_entry - Go to next entry in either the forward or backward direction + * @p_info: Private info that is manipulated + * @reverse: If true go to the next entry in reverse i.e. previous + * + * Sets the current position in @p_info to the next (@reverse == false) or + * previous (@reverse == true) entry. + * + * Return: True if there are further entries in that direction, + * false otherwise. + */ +static bool debug_move_entry(file_private_info_t *p_info, bool reverse) +{ + if (reverse) + return debug_prev_entry(p_info); + else + return debug_next_entry(p_info); } /* * debug_output: * - called for user read() - * - copies formated debug entries to the user buffer + * - copies formatted debug entries to the user buffer */ static ssize_t debug_output(struct file *file, /* file descriptor */ char __user *user_buf, /* user buffer */ @@ -485,7 +579,7 @@ static ssize_t debug_output(struct file *file, /* file descriptor */ } if (copy_size == formatted_line_residue) { entry_offset = 0; - if (debug_next_entry(p_info)) + if (!debug_next_entry(p_info)) goto out; } } @@ -520,15 +614,51 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf, return rc; /* number of input characters */ } +static file_private_info_t *debug_file_private_alloc(debug_info_t *debug_info, + struct debug_view *view) +{ + debug_info_t *debug_info_snapshot; + file_private_info_t *p_info; + + /* + * Make snapshot of current debug areas to get it consistent. + * To copy all the areas is only needed, if we have a view which + * formats the debug areas. + */ + if (!view->format_proc && !view->header_proc) + debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS); + else + debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS); + + if (!debug_info_snapshot) + return NULL; + p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL); + if (!p_info) { + debug_info_free(debug_info_snapshot); + return NULL; + } + p_info->offset = 0; + p_info->debug_info_snap = debug_info_snapshot; + p_info->debug_info_org = debug_info; + p_info->view = view; + p_info->act_area = 0; + p_info->act_page = 0; + p_info->act_entry = DEBUG_PROLOG_ENTRY; + p_info->act_entry_offset = 0; + debug_info_get(debug_info); + + return p_info; +} + /* * debug_open: * - called for user open() - * - copies formated output to private_data area of the file + * - copies formatted output to private_data area of the file * handle */ static int debug_open(struct inode *inode, struct file *file) { - debug_info_t *debug_info, *debug_info_snapshot; + debug_info_t *debug_info; file_private_info_t *p_info; int i, rc = 0; @@ -546,42 +676,26 @@ static int debug_open(struct inode *inode, struct file *file) goto out; found: - - /* Make snapshot of current debug areas to get it consistent. */ - /* To copy all the areas is only needed, if we have a view which */ - /* formats the debug areas. */ - - if (!debug_info->views[i]->format_proc && !debug_info->views[i]->header_proc) - debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS); - else - debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS); - - if (!debug_info_snapshot) { - rc = -ENOMEM; - goto out; - } - p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL); + p_info = debug_file_private_alloc(debug_info, debug_info->views[i]); if (!p_info) { - debug_info_free(debug_info_snapshot); rc = -ENOMEM; goto out; } - p_info->offset = 0; - p_info->debug_info_snap = debug_info_snapshot; - p_info->debug_info_org = debug_info; - p_info->view = debug_info->views[i]; - p_info->act_area = 0; - p_info->act_page = 0; - p_info->act_entry = DEBUG_PROLOG_ENTRY; - p_info->act_entry_offset = 0; file->private_data = p_info; - debug_info_get(debug_info); nonseekable_open(inode, file); out: mutex_unlock(&debug_mutex); return rc; } +static void debug_file_private_free(file_private_info_t *p_info) +{ + if (p_info->debug_info_snap) + debug_info_free(p_info->debug_info_snap); + debug_info_put(p_info->debug_info_org); + kfree(p_info); +} + /* * debug_close: * - called for user close() @@ -592,13 +706,59 @@ static int debug_close(struct inode *inode, struct file *file) file_private_info_t *p_info; p_info = (file_private_info_t *) file->private_data; - if (p_info->debug_info_snap) - debug_info_free(p_info->debug_info_snap); - debug_info_put(p_info->debug_info_org); - kfree(file->private_data); + debug_file_private_free(p_info); + file->private_data = NULL; return 0; /* success */ } +/** + * debug_dump - Get a textual representation of debug info, or as much as fits + * @id: Debug information to use + * @view: View with which to dump the debug information + * @buf: Buffer the textual debug data representation is written to + * @buf_size: Size of the buffer, including the trailing '\0' byte + * @reverse: Go backwards from the last written entry + * + * This function may be used whenever a textual representation of the debug + * information is required without using an s390dbf file. + * + * Note: It is the callers responsibility to supply a view that is compatible + * with the debug information data. + * + * Return: On success returns the number of bytes written to the buffer not + * including the trailing '\0' byte. If bug_size == 0 the function returns 0. + * On failure an error code less than 0 is returned. + */ +ssize_t debug_dump(debug_info_t *id, struct debug_view *view, + char *buf, size_t buf_size, bool reverse) +{ + file_private_info_t *p_info; + size_t size, offset = 0; + + /* Need space for '\0' byte */ + if (buf_size < 1) + return 0; + buf_size--; + + p_info = debug_file_private_alloc(id, view); + if (!p_info) + return -ENOMEM; + + /* There is always at least the DEBUG_PROLOG_ENTRY */ + do { + size = debug_format_entry(p_info); + size = min(size, buf_size - offset); + memcpy(buf + offset, p_info->temp_buf, size); + offset += size; + if (offset >= buf_size) + break; + } while (debug_move_entry(p_info, reverse)); + debug_file_private_free(p_info); + buf[offset] = '\0'; + + return offset; +} + /* Create debugfs entries and add to internal list. */ static void _debug_register(debug_info_t *id) { @@ -962,7 +1122,7 @@ static int s390dbf_procactive(const struct ctl_table *table, int write, return 0; } -static struct ctl_table s390dbf_table[] = { +static const struct ctl_table s390dbf_table[] = { { .procname = "debug_stoppable", .data = &debug_stoppable, @@ -1292,9 +1452,9 @@ static inline int debug_get_uint(char *buf) */ static int debug_prolog_pages_fn(debug_info_t *id, struct debug_view *view, - char *out_buf) + char *out_buf, size_t out_buf_size) { - return sprintf(out_buf, "%i\n", id->pages_per_area); + return scnprintf(out_buf, out_buf_size, "%i\n", id->pages_per_area); } /* @@ -1341,14 +1501,14 @@ out: * prints out actual debug level */ static int debug_prolog_level_fn(debug_info_t *id, struct debug_view *view, - char *out_buf) + char *out_buf, size_t out_buf_size) { int rc = 0; if (id->level == DEBUG_OFF_LEVEL) - rc = sprintf(out_buf, "-\n"); + rc = scnprintf(out_buf, out_buf_size, "-\n"); else - rc = sprintf(out_buf, "%i\n", id->level); + rc = scnprintf(out_buf, out_buf_size, "%i\n", id->level); return rc; } @@ -1465,22 +1625,24 @@ out: * prints debug data in hex/ascii format */ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *in_buf) + char *out_buf, size_t out_buf_size, const char *in_buf) { int i, rc = 0; - for (i = 0; i < id->buf_size; i++) - rc += sprintf(out_buf + rc, "%02x ", ((unsigned char *) in_buf)[i]); - rc += sprintf(out_buf + rc, "| "); + for (i = 0; i < id->buf_size; i++) { + rc += scnprintf(out_buf + rc, out_buf_size - rc, + "%02x ", ((unsigned char *)in_buf)[i]); + } + rc += scnprintf(out_buf + rc, out_buf_size - rc, "| "); for (i = 0; i < id->buf_size; i++) { unsigned char c = in_buf[i]; if (isascii(c) && isprint(c)) - rc += sprintf(out_buf + rc, "%c", c); + rc += scnprintf(out_buf + rc, out_buf_size - rc, "%c", c); else - rc += sprintf(out_buf + rc, "."); + rc += scnprintf(out_buf + rc, out_buf_size - rc, "."); } - rc += sprintf(out_buf + rc, "\n"); + rc += scnprintf(out_buf + rc, out_buf_size - rc, "\n"); return rc; } @@ -1488,7 +1650,8 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, * prints header for debug entry */ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, - int area, debug_entry_t *entry, char *out_buf) + int area, debug_entry_t *entry, char *out_buf, + size_t out_buf_size) { unsigned long sec, usec; unsigned long caller; @@ -1505,22 +1668,22 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, else except_str = "-"; caller = (unsigned long) entry->caller; - rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %px ", - area, sec, usec, level, except_str, - entry->cpu, (void *)caller); + rc += scnprintf(out_buf, out_buf_size, "%02i %011ld:%06lu %1u %1s %04u %px ", + area, sec, usec, level, except_str, + entry->cpu, (void *)caller); return rc; } EXPORT_SYMBOL(debug_dflt_header_fn); /* - * prints debug data sprintf-formated: + * prints debug data sprintf-formatted: * debug_sprinf_event/exception calls must be used together with this view */ #define DEBUG_SPRINTF_MAX_ARGS 10 -static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *inbuf) +int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, + char *out_buf, size_t out_buf_size, const char *inbuf) { debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf; int num_longs, num_used_args = 0, i, rc = 0; @@ -1533,8 +1696,9 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, goto out; /* bufsize of entry too small */ if (num_longs == 1) { /* no args, we use only the string */ - strcpy(out_buf, curr_event->string); - rc = strlen(curr_event->string); + rc = strscpy(out_buf, curr_event->string, out_buf_size); + if (rc == -E2BIG) + rc = out_buf_size; goto out; } @@ -1546,15 +1710,17 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, for (i = 0; i < num_used_args; i++) index[i] = i; - rc = sprintf(out_buf, curr_event->string, curr_event->args[index[0]], - curr_event->args[index[1]], curr_event->args[index[2]], - curr_event->args[index[3]], curr_event->args[index[4]], - curr_event->args[index[5]], curr_event->args[index[6]], - curr_event->args[index[7]], curr_event->args[index[8]], - curr_event->args[index[9]]); + rc = scnprintf(out_buf, out_buf_size, + curr_event->string, curr_event->args[index[0]], + curr_event->args[index[1]], curr_event->args[index[2]], + curr_event->args[index[3]], curr_event->args[index[4]], + curr_event->args[index[5]], curr_event->args[index[6]], + curr_event->args[index[7]], curr_event->args[index[8]], + curr_event->args[index[9]]); out: return rc; } +EXPORT_SYMBOL(debug_sprintf_format_fn); /* * debug_init: diff --git a/arch/s390/kernel/diag/Makefile b/arch/s390/kernel/diag/Makefile new file mode 100644 index 000000000000..956aee6c4090 --- /dev/null +++ b/arch/s390/kernel/diag/Makefile @@ -0,0 +1 @@ +obj-y := diag_misc.o diag324.o diag.o diag310.o diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag/diag.c index 007e1795670e..56b862ba9be8 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag/diag.c @@ -16,7 +16,8 @@ #include <asm/diag.h> #include <asm/trace/diag.h> #include <asm/sections.h> -#include "entry.h" +#include <asm/asm.h> +#include "../entry.h" struct diag_stat { unsigned int counter[NR_DIAG_STAT]; @@ -50,8 +51,10 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = { [DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" }, [DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" }, [DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" }, + [DIAG_STAT_X310] = { .code = 0x310, .name = "Memory Topology Information" }, [DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" }, [DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" }, + [DIAG_STAT_X324] = { .code = 0x324, .name = "Power Information Block" }, [DIAG_STAT_X49C] = { .code = 0x49c, .name = "Warning-Track Interruption" }, [DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" }, }; @@ -192,7 +195,7 @@ static inline int __diag204(unsigned long *subcode, unsigned long size, void *ad { union register_pair rp = { .even = *subcode, .odd = size }; - asm volatile( + asm_inline volatile( " diag %[addr],%[rp],0x204\n" "0: nopr %%r7\n" EX_TABLE(0b,0b) @@ -283,7 +286,7 @@ int diag224(void *ptr) int rc = -EOPNOTSUPP; diag_stat_inc(DIAG_STAT_X224); - asm volatile("\n" + asm_inline volatile("\n" " diag %[type],%[addr],0x224\n" "0: lhi %[rc],0\n" "1:\n" @@ -307,16 +310,15 @@ EXPORT_SYMBOL(diag26c); int diag49c(unsigned long subcode) { - int rc; + int cc; diag_stat_inc(DIAG_STAT_X49C); asm volatile( " diag %[subcode],0,0x49c\n" - " ipm %[rc]\n" - " srl %[rc],28\n" - : [rc] "=d" (rc) + CC_IPM(cc) + : CC_OUT(cc, cc) : [subcode] "d" (subcode) - : "cc"); - return rc; + : CC_CLOBBER); + return CC_TRANSFORM(cc); } EXPORT_SYMBOL(diag49c); diff --git a/arch/s390/kernel/diag/diag310.c b/arch/s390/kernel/diag/diag310.c new file mode 100644 index 000000000000..d6a34454aa5a --- /dev/null +++ b/arch/s390/kernel/diag/diag310.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Request memory topology information via diag0x310. + * + * Copyright IBM Corp. 2025 + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> +#include <asm/diag.h> +#include <asm/sclp.h> +#include <uapi/asm/diag.h> +#include "diag_ioctl.h" + +#define DIAG310_LEVELMIN 1 +#define DIAG310_LEVELMAX 6 + +enum diag310_sc { + DIAG310_SUBC_0 = 0, + DIAG310_SUBC_1 = 1, + DIAG310_SUBC_4 = 4, + DIAG310_SUBC_5 = 5 +}; + +enum diag310_retcode { + DIAG310_RET_SUCCESS = 0x0001, + DIAG310_RET_BUSY = 0x0101, + DIAG310_RET_OPNOTSUPP = 0x0102, + DIAG310_RET_SC4_INVAL = 0x0401, + DIAG310_RET_SC4_NODATA = 0x0402, + DIAG310_RET_SC5_INVAL = 0x0501, + DIAG310_RET_SC5_NODATA = 0x0502, + DIAG310_RET_SC5_ESIZE = 0x0503 +}; + +union diag310_response { + u64 response; + struct { + u64 result : 32; + u64 : 16; + u64 rc : 16; + }; +}; + +union diag310_req_subcode { + u64 subcode; + struct { + u64 : 48; + u64 st : 8; + u64 sc : 8; + }; +}; + +union diag310_req_size { + u64 size; + struct { + u64 page_count : 32; + u64 : 32; + }; +}; + +static inline unsigned long diag310(unsigned long subcode, unsigned long size, void *addr) +{ + union register_pair rp = { .even = (unsigned long)addr, .odd = size }; + + diag_stat_inc(DIAG_STAT_X310); + asm volatile("diag %[rp],%[subcode],0x310\n" + : [rp] "+d" (rp.pair) + : [subcode] "d" (subcode) + : "memory"); + return rp.odd; +} + +static int diag310_result_to_errno(unsigned int result) +{ + switch (result) { + case DIAG310_RET_BUSY: + return -EBUSY; + case DIAG310_RET_OPNOTSUPP: + return -EOPNOTSUPP; + default: + return -EINVAL; + } +} + +static int diag310_get_subcode_mask(unsigned long *mask) +{ + union diag310_response res; + + res.response = diag310(DIAG310_SUBC_0, 0, NULL); + if (res.rc != DIAG310_RET_SUCCESS) + return diag310_result_to_errno(res.rc); + *mask = res.response; + return 0; +} + +static int diag310_get_memtop_stride(unsigned long *stride) +{ + union diag310_response res; + + res.response = diag310(DIAG310_SUBC_1, 0, NULL); + if (res.rc != DIAG310_RET_SUCCESS) + return diag310_result_to_errno(res.rc); + *stride = res.result; + return 0; +} + +static int diag310_get_memtop_size(unsigned long *pages, unsigned long level) +{ + union diag310_req_subcode req = { .sc = DIAG310_SUBC_4, .st = level }; + union diag310_response res; + + res.response = diag310(req.subcode, 0, NULL); + switch (res.rc) { + case DIAG310_RET_SUCCESS: + *pages = res.result; + return 0; + case DIAG310_RET_SC4_NODATA: + return -ENODATA; + case DIAG310_RET_SC4_INVAL: + return -EINVAL; + default: + return diag310_result_to_errno(res.rc); + } +} + +static int diag310_store_topology_map(void *buf, unsigned long pages, unsigned long level) +{ + union diag310_req_subcode req_sc = { .sc = DIAG310_SUBC_5, .st = level }; + union diag310_req_size req_size = { .page_count = pages }; + union diag310_response res; + + res.response = diag310(req_sc.subcode, req_size.size, buf); + switch (res.rc) { + case DIAG310_RET_SUCCESS: + return 0; + case DIAG310_RET_SC5_NODATA: + return -ENODATA; + case DIAG310_RET_SC5_ESIZE: + return -EOVERFLOW; + case DIAG310_RET_SC5_INVAL: + return -EINVAL; + default: + return diag310_result_to_errno(res.rc); + } +} + +static int diag310_check_features(void) +{ + static int features_available; + unsigned long mask; + int rc; + + if (READ_ONCE(features_available)) + return 0; + if (!sclp.has_diag310) + return -EOPNOTSUPP; + rc = diag310_get_subcode_mask(&mask); + if (rc) + return rc; + if (!test_bit_inv(DIAG310_SUBC_1, &mask)) + return -EOPNOTSUPP; + if (!test_bit_inv(DIAG310_SUBC_4, &mask)) + return -EOPNOTSUPP; + if (!test_bit_inv(DIAG310_SUBC_5, &mask)) + return -EOPNOTSUPP; + WRITE_ONCE(features_available, 1); + return 0; +} + +static int memtop_get_stride_len(unsigned long *res) +{ + static unsigned long memtop_stride; + unsigned long stride; + int rc; + + stride = READ_ONCE(memtop_stride); + if (!stride) { + rc = diag310_get_memtop_stride(&stride); + if (rc) + return rc; + WRITE_ONCE(memtop_stride, stride); + } + *res = stride; + return 0; +} + +static int memtop_get_page_count(unsigned long *res, unsigned long level) +{ + static unsigned long memtop_pages[DIAG310_LEVELMAX]; + unsigned long pages; + int rc; + + if (level > DIAG310_LEVELMAX || level < DIAG310_LEVELMIN) + return -EINVAL; + pages = READ_ONCE(memtop_pages[level - 1]); + if (!pages) { + rc = diag310_get_memtop_size(&pages, level); + if (rc) + return rc; + WRITE_ONCE(memtop_pages[level - 1], pages); + } + *res = pages; + return 0; +} + +long diag310_memtop_stride(unsigned long arg) +{ + size_t __user *argp = (void __user *)arg; + unsigned long stride; + int rc; + + rc = diag310_check_features(); + if (rc) + return rc; + rc = memtop_get_stride_len(&stride); + if (rc) + return rc; + if (put_user(stride, argp)) + return -EFAULT; + return 0; +} + +long diag310_memtop_len(unsigned long arg) +{ + size_t __user *argp = (void __user *)arg; + unsigned long pages, level; + int rc; + + rc = diag310_check_features(); + if (rc) + return rc; + if (get_user(level, argp)) + return -EFAULT; + rc = memtop_get_page_count(&pages, level); + if (rc) + return rc; + if (put_user(pages * PAGE_SIZE, argp)) + return -EFAULT; + return 0; +} + +long diag310_memtop_buf(unsigned long arg) +{ + struct diag310_memtop __user *udata = (struct diag310_memtop __user *)arg; + unsigned long level, pages, data_size; + u64 address; + void *buf; + int rc; + + rc = diag310_check_features(); + if (rc) + return rc; + if (get_user(level, &udata->nesting_lvl)) + return -EFAULT; + if (get_user(address, &udata->address)) + return -EFAULT; + rc = memtop_get_page_count(&pages, level); + if (rc) + return rc; + data_size = pages * PAGE_SIZE; + buf = __vmalloc_node(data_size, PAGE_SIZE, GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT, + NUMA_NO_NODE, __builtin_return_address(0)); + if (!buf) + return -ENOMEM; + rc = diag310_store_topology_map(buf, pages, level); + if (rc) + goto out; + if (copy_to_user((void __user *)address, buf, data_size)) + rc = -EFAULT; +out: + vfree(buf); + return rc; +} diff --git a/arch/s390/kernel/diag/diag324.c b/arch/s390/kernel/diag/diag324.c new file mode 100644 index 000000000000..7fa4c0b7eb6c --- /dev/null +++ b/arch/s390/kernel/diag/diag324.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Request power readings for resources in a computing environment via + * diag 0x324. diag 0x324 stores the power readings in the power information + * block (pib). + * + * Copyright IBM Corp. 2024 + */ + +#define pr_fmt(fmt) "diag324: " fmt +#include <linux/fs.h> +#include <linux/gfp.h> +#include <linux/ioctl.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/ktime.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/timer.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> + +#include <asm/diag.h> +#include <asm/sclp.h> +#include <asm/timex.h> +#include <uapi/asm/diag.h> +#include "diag_ioctl.h" + +enum subcode { + DIAG324_SUBC_0 = 0, + DIAG324_SUBC_1 = 1, + DIAG324_SUBC_2 = 2, +}; + +enum retcode { + DIAG324_RET_SUCCESS = 0x0001, + DIAG324_RET_SUBC_NOTAVAIL = 0x0103, + DIAG324_RET_INSUFFICIENT_SIZE = 0x0104, + DIAG324_RET_READING_UNAVAILABLE = 0x0105, +}; + +union diag324_response { + u64 response; + struct { + u64 installed : 32; + u64 : 16; + u64 rc : 16; + } sc0; + struct { + u64 format : 16; + u64 : 16; + u64 pib_len : 16; + u64 rc : 16; + } sc1; + struct { + u64 : 48; + u64 rc : 16; + } sc2; +}; + +union diag324_request { + u64 request; + struct { + u64 : 32; + u64 allocated : 16; + u64 : 12; + u64 sc : 4; + } sc2; +}; + +struct pib { + u32 : 8; + u32 num : 8; + u32 len : 16; + u32 : 24; + u32 hlen : 8; + u64 : 64; + u64 intv; + u8 r[]; +} __packed; + +struct pibdata { + struct pib *pib; + ktime_t expire; + u64 sequence; + size_t len; + int rc; +}; + +static DEFINE_MUTEX(pibmutex); +static struct pibdata pibdata; + +#define PIBWORK_DELAY (5 * NSEC_PER_SEC) + +static void pibwork_handler(struct work_struct *work); +static DECLARE_DELAYED_WORK(pibwork, pibwork_handler); + +static unsigned long diag324(unsigned long subcode, void *addr) +{ + union register_pair rp = { .even = (unsigned long)addr }; + + diag_stat_inc(DIAG_STAT_X324); + asm volatile("diag %[rp],%[subcode],0x324\n" + : [rp] "+d" (rp.pair) + : [subcode] "d" (subcode) + : "memory"); + return rp.odd; +} + +static void pibwork_handler(struct work_struct *work) +{ + struct pibdata *data = &pibdata; + ktime_t timedout; + + mutex_lock(&pibmutex); + timedout = ktime_add_ns(data->expire, PIBWORK_DELAY); + if (ktime_before(ktime_get(), timedout)) { + mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY)); + goto out; + } + vfree(data->pib); + data->pib = NULL; +out: + mutex_unlock(&pibmutex); +} + +static void pib_update(struct pibdata *data) +{ + union diag324_request req = { .sc2.sc = DIAG324_SUBC_2, .sc2.allocated = data->len }; + union diag324_response res; + int rc; + + memset(data->pib, 0, data->len); + res.response = diag324(req.request, data->pib); + switch (res.sc2.rc) { + case DIAG324_RET_SUCCESS: + rc = 0; + break; + case DIAG324_RET_SUBC_NOTAVAIL: + rc = -ENOENT; + break; + case DIAG324_RET_INSUFFICIENT_SIZE: + rc = -EMSGSIZE; + break; + case DIAG324_RET_READING_UNAVAILABLE: + rc = -EBUSY; + break; + default: + rc = -EINVAL; + } + data->rc = rc; +} + +long diag324_pibbuf(unsigned long arg) +{ + struct diag324_pib __user *udata = (struct diag324_pib __user *)arg; + struct pibdata *data = &pibdata; + static bool first = true; + u64 address; + int rc; + + if (!data->len) + return -EOPNOTSUPP; + if (get_user(address, &udata->address)) + return -EFAULT; + mutex_lock(&pibmutex); + rc = -ENOMEM; + if (!data->pib) + data->pib = vmalloc(data->len); + if (!data->pib) + goto out; + if (first || ktime_after(ktime_get(), data->expire)) { + pib_update(data); + data->sequence++; + data->expire = ktime_add_ns(ktime_get(), tod_to_ns(data->pib->intv)); + mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY)); + first = false; + } + rc = data->rc; + if (rc != 0 && rc != -EBUSY) + goto out; + rc = copy_to_user((void __user *)address, data->pib, data->pib->len); + rc |= put_user(data->sequence, &udata->sequence); + if (rc) + rc = -EFAULT; +out: + mutex_unlock(&pibmutex); + return rc; +} + +long diag324_piblen(unsigned long arg) +{ + struct pibdata *data = &pibdata; + + if (!data->len) + return -EOPNOTSUPP; + if (put_user(data->len, (size_t __user *)arg)) + return -EFAULT; + return 0; +} + +static int __init diag324_init(void) +{ + union diag324_response res; + unsigned long installed; + + if (!sclp.has_diag324) + return -EOPNOTSUPP; + res.response = diag324(DIAG324_SUBC_0, NULL); + if (res.sc0.rc != DIAG324_RET_SUCCESS) + return -EOPNOTSUPP; + installed = res.response; + if (!test_bit_inv(DIAG324_SUBC_1, &installed)) + return -EOPNOTSUPP; + if (!test_bit_inv(DIAG324_SUBC_2, &installed)) + return -EOPNOTSUPP; + res.response = diag324(DIAG324_SUBC_1, NULL); + if (res.sc1.rc != DIAG324_RET_SUCCESS) + return -EOPNOTSUPP; + pibdata.len = res.sc1.pib_len; + return 0; +} +device_initcall(diag324_init); diff --git a/arch/s390/kernel/diag/diag_ioctl.h b/arch/s390/kernel/diag/diag_ioctl.h new file mode 100644 index 000000000000..7080be946785 --- /dev/null +++ b/arch/s390/kernel/diag/diag_ioctl.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _DIAG_IOCTL_H +#define _DIAG_IOCTL_H + +#include <linux/types.h> + +long diag324_pibbuf(unsigned long arg); +long diag324_piblen(unsigned long arg); + +long diag310_memtop_stride(unsigned long arg); +long diag310_memtop_len(unsigned long arg); +long diag310_memtop_buf(unsigned long arg); + +#endif /* _DIAG_IOCTL_H */ diff --git a/arch/s390/kernel/diag/diag_misc.c b/arch/s390/kernel/diag/diag_misc.c new file mode 100644 index 000000000000..efffe02ea02e --- /dev/null +++ b/arch/s390/kernel/diag/diag_misc.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Provide diagnose information via misc device /dev/diag. + * + * Copyright IBM Corp. 2024 + */ + +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/ioctl.h> +#include <linux/kernel.h> +#include <linux/miscdevice.h> +#include <linux/types.h> + +#include <uapi/asm/diag.h> +#include "diag_ioctl.h" + +static long diag_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + long rc; + + switch (cmd) { + case DIAG324_GET_PIBLEN: + rc = diag324_piblen(arg); + break; + case DIAG324_GET_PIBBUF: + rc = diag324_pibbuf(arg); + break; + case DIAG310_GET_STRIDE: + rc = diag310_memtop_stride(arg); + break; + case DIAG310_GET_MEMTOPLEN: + rc = diag310_memtop_len(arg); + break; + case DIAG310_GET_MEMTOPBUF: + rc = diag310_memtop_buf(arg); + break; + default: + rc = -ENOIOCTLCMD; + break; + } + return rc; +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = nonseekable_open, + .unlocked_ioctl = diag_ioctl, +}; + +static struct miscdevice diagdev = { + .name = "diag", + .minor = MISC_DYNAMIC_MINOR, + .fops = &fops, + .mode = 0444, +}; + +static int diag_init(void) +{ + return misc_register(&diagdev); +} + +device_initcall(diag_init); diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 1ecd0580561f..dd410962ecbe 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -17,6 +17,7 @@ #include <linux/sched.h> #include <linux/sched/debug.h> #include <linux/sched/task_stack.h> +#include <asm/asm-offsets.h> #include <asm/processor.h> #include <asm/debug.h> #include <asm/dis.h> @@ -198,13 +199,8 @@ void __noreturn die(struct pt_regs *regs, const char *str) console_verbose(); spin_lock_irq(&die_lock); bust_spinlocks(1); - printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff, + printk("%s: %04x ilc:%d [#%d]", str, regs->int_code & 0xffff, regs->int_code >> 17, ++die_counter); -#ifdef CONFIG_PREEMPT - pr_cont("PREEMPT "); -#elif defined(CONFIG_PREEMPT_RT) - pr_cont("PREEMPT_RT "); -#endif pr_cont("SMP "); if (debug_pagealloc_enabled()) pr_cont("DEBUG_PAGEALLOC"); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 62f8f5a750a3..54cf0923050f 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/sched/debug.h> +#include <linux/cpufeature.h> #include <linux/compiler.h> #include <linux/init.h> #include <linux/errno.h> @@ -21,6 +22,8 @@ #include <asm/asm-extable.h> #include <linux/memblock.h> #include <asm/access-regs.h> +#include <asm/asm-offsets.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/ebcdic.h> #include <asm/fpu.h> @@ -36,12 +39,14 @@ #include <asm/boot_data.h> #include "entry.h" -#define decompressor_handled_param(param) \ -static int __init ignore_decompressor_param_##param(char *s) \ +#define __decompressor_handled_param(func, param) \ +static int __init ignore_decompressor_param_##func(char *s) \ { \ return 0; \ } \ -early_param(#param, ignore_decompressor_param_##param) +early_param(#param, ignore_decompressor_param_##func) + +#define decompressor_handled_param(param) __decompressor_handled_param(param, param) decompressor_handled_param(mem); decompressor_handled_param(vmalloc); @@ -50,6 +55,8 @@ decompressor_handled_param(facilities); decompressor_handled_param(nokaslr); decompressor_handled_param(cmma); decompressor_handled_param(relocate_lowcore); +decompressor_handled_param(bootdebug); +__decompressor_handled_param(debug_alternative, debug-alternative); #if IS_ENABLED(CONFIG_KVM) decompressor_handled_param(prot_virt); #endif @@ -58,25 +65,10 @@ static void __init kasan_early_init(void) { #ifdef CONFIG_KASAN init_task.kasan_depth = 0; - sclp_early_printk("KernelAddressSanitizer initialized\n"); + pr_info("KernelAddressSanitizer initialized\n"); #endif } -static void __init reset_tod_clock(void) -{ - union tod_clock clk; - - if (store_tod_clock_ext_cc(&clk) == 0) - return; - /* TOD clock not running. Set the clock to Unix Epoch. */ - if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk)) - disabled_wait(); - - memset(&tod_clock_base, 0, sizeof(tod_clock_base)); - tod_clock_base.tod = TOD_UNIX_EPOCH; - get_lowcore()->last_update_clock = TOD_UNIX_EPOCH; -} - /* * Initialize storage key for kernel pages */ @@ -95,26 +87,6 @@ static noinline __init void init_kernel_storage_key(void) static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE); -static noinline __init void detect_machine_type(void) -{ - struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page; - - /* Check current-configuration-level */ - if (stsi(NULL, 0, 0, 0) <= 2) { - get_lowcore()->machine_flags |= MACHINE_FLAG_LPAR; - return; - } - /* Get virtual-machine cpu information. */ - if (stsi(vmms, 3, 2, 2) || !vmms->count) - return; - - /* Detect known hypervisors */ - if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) - get_lowcore()->machine_flags |= MACHINE_FLAG_KVM; - else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4)) - get_lowcore()->machine_flags |= MACHINE_FLAG_VM; -} - /* Remove leading, trailing and double whitespace. */ static inline void strim_all(char *str) { @@ -155,9 +127,9 @@ static noinline __init void setup_arch_string(void) strim_all(hvstr); } else { sprintf(hvstr, "%s", - MACHINE_IS_LPAR ? "LPAR" : - MACHINE_IS_VM ? "z/VM" : - MACHINE_IS_KVM ? "KVM" : "unknown"); + machine_is_lpar() ? "LPAR" : + machine_is_vm() ? "z/VM" : + machine_is_kvm() ? "KVM" : "unknown"); } dump_stack_set_arch_desc("%s (%s)", mstr, hvstr); } @@ -166,9 +138,8 @@ static __init void setup_topology(void) { int max_mnest; - if (!test_facility(11)) + if (!cpu_has_topology()) return; - get_lowcore()->machine_flags |= MACHINE_FLAG_TOPOLOGY; for (max_mnest = 6; max_mnest > 1; max_mnest--) { if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0) break; @@ -217,65 +188,10 @@ static noinline __init void setup_lowcore_early(void) lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); } -static __init void detect_diag9c(void) -{ - unsigned int cpu_address; - int rc; - - cpu_address = stap(); - diag_stat_inc(DIAG_STAT_X09C); - asm volatile( - " diag %2,0,0x9c\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc"); - if (!rc) - get_lowcore()->machine_flags |= MACHINE_FLAG_DIAG9C; -} - -static __init void detect_machine_facilities(void) -{ - if (test_facility(8)) { - get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT1; - system_ctl_set_bit(0, CR0_EDAT_BIT); - } - if (test_facility(78)) - get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT2; - if (test_facility(3)) - get_lowcore()->machine_flags |= MACHINE_FLAG_IDTE; - if (test_facility(50) && test_facility(73)) { - get_lowcore()->machine_flags |= MACHINE_FLAG_TE; - system_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT); - } - if (test_facility(51)) - get_lowcore()->machine_flags |= MACHINE_FLAG_TLB_LC; - if (test_facility(129)) - system_ctl_set_bit(0, CR0_VECTOR_BIT); - if (test_facility(130)) - get_lowcore()->machine_flags |= MACHINE_FLAG_NX; - if (test_facility(133)) - get_lowcore()->machine_flags |= MACHINE_FLAG_GS; - if (test_facility(139) && (tod_clock_base.tod >> 63)) { - /* Enabled signed clock comparator comparisons */ - get_lowcore()->machine_flags |= MACHINE_FLAG_SCC; - clock_comparator_max = -1ULL >> 1; - system_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT); - } - if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) { - get_lowcore()->machine_flags |= MACHINE_FLAG_PCI_MIO; - /* the control bit is set during PCI initialization */ - } - if (test_facility(194)) - get_lowcore()->machine_flags |= MACHINE_FLAG_RDP; - if (test_facility(85)) - get_lowcore()->machine_flags |= MACHINE_FLAG_SEQ_INSN; -} - static inline void save_vector_registers(void) { #ifdef CONFIG_CRASH_DUMP - if (test_facility(129)) + if (cpu_has_vx()) save_vx_regs(boot_cpu_vector_save_area); #endif } @@ -307,17 +223,13 @@ static void __init sort_amode31_extable(void) void __init startup_init(void) { kasan_early_init(); - reset_tod_clock(); time_early_init(); init_kernel_storage_key(); lockdep_off(); sort_amode31_extable(); setup_lowcore_early(); - detect_machine_type(); setup_arch_string(); setup_boot_command_line(); - detect_diag9c(); - detect_machine_facilities(); save_vector_registers(); setup_topology(); sclp_early_detect(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index d6d5317f768e..0f00f4b06d51 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -29,6 +29,7 @@ #include <asm/nmi.h> #include <asm/nospec-insn.h> #include <asm/lowcore.h> +#include <asm/machine.h> _LPP_OFFSET = __LC_LPP @@ -44,7 +45,7 @@ _LPP_OFFSET = __LC_LPP ALTERNATIVE_2 "b \lpswe;nopr", \ ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193), \ __stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \ - ALT_LOWCORE + ALT_FEATURE(MFEATURE_LOWCORE) .endm .macro MBEAR reg, lowcore @@ -52,16 +53,7 @@ _LPP_OFFSET = __LC_LPP ALT_FACILITY(193) .endm - .macro CHECK_STACK savearea, lowcore -#ifdef CONFIG_CHECK_STACK - tml %r15,THREAD_SIZE - CONFIG_STACK_GUARD - la %r14,\savearea(\lowcore) - jz stack_overflow -#endif - .endm - .macro CHECK_VMAP_STACK savearea, lowcore, oklabel -#ifdef CONFIG_VMAP_STACK lgr %r14,%r15 nill %r14,0x10000 - THREAD_SIZE oill %r14,STACK_INIT_OFFSET @@ -76,10 +68,7 @@ _LPP_OFFSET = __LC_LPP clg %r14,__LC_RESTART_STACK(\lowcore) je \oklabel la %r14,\savearea(\lowcore) - j stack_overflow -#else - j \oklabel -#endif + j stack_invalid .endm /* @@ -127,7 +116,7 @@ _LPP_OFFSET = __LC_LPP .macro SIEEXIT sie_control,lowcore lg %r9,\sie_control # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_KERNEL_ASCE(\lowcore) # load primary asce + lctlg %c1,%c1,__LC_USER_ASCE(\lowcore) # load primary asce lg %r9,__LC_CURRENT(\lowcore) mvi __TI_sie(%r9),0 larl %r9,sie_exit # skip forward to sie_exit @@ -219,20 +208,9 @@ SYM_FUNC_START(__sie64a) lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE GET_LC %r14 - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r14) # load primary asce + lctlg %c1,%c1,__LC_USER_ASCE(%r14) # load primary asce lg %r14,__LC_CURRENT(%r14) mvi __TI_sie(%r14),0 -# some program checks are suppressing. C code (e.g. do_protection_exception) -# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There -# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable. -# Other instructions between __sie64a and .Lsie_done should not cause program -# interrupts. So lets use 3 nops as a landing pad for all possible rewinds. -.Lrewind_pad6: - nopr 7 -.Lrewind_pad4: - nopr 7 -.Lrewind_pad2: - nopr 7 SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL) lg %r14,__SF_SIE_SAVEAREA(%r15) # load guest register save area stmg %r0,%r13,0(%r14) # save guest gprs 0-13 @@ -244,15 +222,6 @@ SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL) lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers lg %r2,__SF_SIE_REASON(%r15) # return exit reason code BR_EX %r14 -.Lsie_fault: - lghi %r14,-EFAULT - stg %r14,__SF_SIE_REASON(%r15) # set exit reason code - j sie_exit - - EX_TABLE(.Lrewind_pad6,.Lsie_fault) - EX_TABLE(.Lrewind_pad4,.Lsie_fault) - EX_TABLE(.Lrewind_pad2,.Lsie_fault) - EX_TABLE(sie_exit,.Lsie_fault) SYM_FUNC_END(__sie64a) EXPORT_SYMBOL(__sie64a) EXPORT_SYMBOL(sie_exit) @@ -271,7 +240,6 @@ SYM_CODE_START(system_call) lghi %r14,0 .Lsysc_per: STBEAR __LC_LAST_BREAK(%r13) - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) lg %r15,__LC_KERNEL_STACK(%r13) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15) @@ -292,7 +260,6 @@ SYM_CODE_START(system_call) lgr %r3,%r14 brasl %r14,__do_syscall STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE(%r13) mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) @@ -309,7 +276,6 @@ SYM_CODE_START(ret_from_fork) brasl %r14,__ret_from_fork STACKLEAK_ERASE GET_LC %r13 - lctlg %c1,%c1,__LC_USER_ASCE(%r13) mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) @@ -327,38 +293,33 @@ SYM_CODE_START(pgm_check_handler) GET_LC %r13 stpt __LC_SYS_ENTER_TIMER(%r13) BPOFF - lgr %r10,%r15 lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13) + xgr %r10,%r10 tmhh %r8,0x0001 # coming from user space? - jno .Lpgm_skip_asce - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) - j 3f # -> fault in user space -.Lpgm_skip_asce: + jo 3f # -> fault in user space +#if IS_ENABLED(CONFIG_KVM) + lg %r11,__LC_CURRENT(%r13) + tm __TI_sie(%r11),0xff + jz 1f + BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST + SIEEXIT __SF_SIE_CONTROL(%r15),%r13 + lghi %r10,_PIF_GUEST_FAULT +#endif 1: tmhh %r8,0x4000 # PER bit set in old PSW ? jnz 2f # -> enabled, can't be a double fault tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception jnz .Lpgm_svcper # -> single stepped svc -2: CHECK_STACK __LC_SAVE_AREA,%r13 - aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - # CHECK_VMAP_STACK branches to stack_overflow or 4f +2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + # CHECK_VMAP_STACK branches to stack_invalid or 4f CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f 3: lg %r15,__LC_KERNEL_STACK(%r13) 4: la %r11,STACK_FRAME_OVERHEAD(%r15) - xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) + stg %r10,__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13) mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK(%r13) - stctg %c1,%c1,__PT_CR1(%r11) -#if IS_ENABLED(CONFIG_KVM) - ltg %r12,__LC_GMAP(%r13) - jz 5f - clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11) - jne 5f - BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST - SIEEXIT __SF_SIE_CONTROL(%r10),%r13 -#endif -5: stmg %r8,%r9,__PT_PSW(%r11) + stmg %r8,%r9,__PT_PSW(%r11) # clear user controlled registers to prevent speculative use xgr %r0,%r0 xgr %r1,%r1 @@ -367,12 +328,12 @@ SYM_CODE_START(pgm_check_handler) xgr %r5,%r5 xgr %r6,%r6 xgr %r7,%r7 + xgr %r12,%r12 lgr %r2,%r11 brasl %r14,__do_pgm_check tmhh %r8,0x0001 # returning to user space? jno .Lpgm_exit_kernel STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE(%r13) BPON stpt __LC_EXIT_TIMER(%r13) .Lpgm_exit_kernel: @@ -414,11 +375,9 @@ SYM_CODE_START(\name) BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT __SF_SIE_CONTROL(%r15),%r13 #endif -0: CHECK_STACK __LC_SAVE_AREA,%r13 - aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) +0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f -1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) - lg %r15,__LC_KERNEL_STACK(%r13) +1: lg %r15,__LC_KERNEL_STACK(%r13) 2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) @@ -441,7 +400,6 @@ SYM_CODE_START(\name) tmhh %r8,0x0001 # returning to user ? jno 2f STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE(%r13) BPON stpt __LC_EXIT_TIMER(%r13) 2: LBEAR __PT_LAST_BREAK(%r11) @@ -450,9 +408,13 @@ SYM_CODE_START(\name) SYM_CODE_END(\name) .endm + .section .irqentry.text, "ax" + INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq + .section .kprobes.text, "ax" + /* * Machine check handler routines */ @@ -497,7 +459,7 @@ SYM_CODE_START(mcck_int_handler) clgrjl %r9,%r14, 4f larl %r14,.Lsie_leave clgrjhe %r9,%r14, 4f - lg %r10,__LC_PCPU + lg %r10,__LC_PCPU(%r13) oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST 4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT __SF_SIE_CONTROL(%r15),%r13 @@ -505,8 +467,6 @@ SYM_CODE_START(mcck_int_handler) .Lmcck_user: lg %r15,__LC_MCCK_STACK(%r13) la %r11,STACK_FRAME_OVERHEAD(%r15) - stctg %c1,%c1,__PT_CR1(%r11) - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lay %r14,__LC_GPREGS_SAVE_AREA(%r13) mvc __PT_R0(128,%r11),0(%r14) @@ -524,7 +484,6 @@ SYM_CODE_START(mcck_int_handler) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,s390_do_machine_check - lctlg %c1,%c1,__PT_CR1(%r11) lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ? @@ -619,25 +578,24 @@ SYM_CODE_END(early_pgm_check_handler) .section .kprobes.text, "ax" -#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK) /* - * The synchronous or the asynchronous stack overflowed. We are dead. + * The synchronous or the asynchronous stack pointer is invalid. We are dead. * No need to properly save the registers, we are going to panic anyway. * Setup a pt_regs so that show_trace can provide a good call trace. */ -SYM_CODE_START(stack_overflow) +SYM_CODE_START(stack_invalid) GET_LC %r15 lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_R8(64,%r11),0(%r14) - stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 + GET_LC %r2 + mvc __PT_ORIG_GPR2(8,%r11),__LC_PGM_LAST_BREAK(%r2) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs - jg kernel_stack_overflow -SYM_CODE_END(stack_overflow) -#endif + jg kernel_stack_invalid +SYM_CODE_END(stack_invalid) .section .data, "aw" .balign 4 @@ -653,7 +611,7 @@ SYM_DATA_END(daton_psw) .balign 8 #define SYSCALL(esame,emu) .quad __s390x_ ## esame SYM_DATA_START(sys_call_table) -#include "asm/syscall_table.h" +#include <asm/syscall_table.h> SYM_DATA_END(sys_call_table) #undef SYSCALL @@ -661,7 +619,7 @@ SYM_DATA_END(sys_call_table) #define SYSCALL(esame,emu) .quad __s390_ ## emu SYM_DATA_START(sys_call_table_emu) -#include "asm/syscall_table.h" +#include <asm/syscall_table.h> SYM_DATA_END(sys_call_table_emu) #undef SYSCALL #endif diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 21969520f947..dd55cc6bbc28 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -31,7 +31,7 @@ void do_secure_storage_access(struct pt_regs *regs); void do_non_secure_storage_access(struct pt_regs *regs); void do_secure_storage_violation(struct pt_regs *regs); void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str); -void kernel_stack_overflow(struct pt_regs * regs); +void kernel_stack_invalid(struct pt_regs *regs); void handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs); @@ -41,7 +41,6 @@ void do_restart(void *arg); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); int setup_profiling_timer(unsigned int multiplier); -unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip); struct s390_mmap_arg_struct; struct fadvise64_64_args; diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 0b6e62d1d8b8..e94bb98f5231 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/kmsan-checks.h> +#include <linux/cpufeature.h> #include <linux/kprobes.h> #include <linux/execmem.h> #include <trace/syscall.h> @@ -69,7 +70,7 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end) bool ftrace_need_init_nop(void) { - return !MACHINE_HAS_SEQ_INSN; + return !cpu_has_seq_insn(); } int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) @@ -189,7 +190,7 @@ static int ftrace_modify_trampoline_call(struct dyn_ftrace *rec, int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { - if (MACHINE_HAS_SEQ_INSN) + if (cpu_has_seq_insn()) return ftrace_patch_branch_insn(rec->ip, old_addr, addr); else return ftrace_modify_trampoline_call(rec, old_addr, addr); @@ -213,8 +214,8 @@ static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable) int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - /* Expect brcl 0xf,... for the !MACHINE_HAS_SEQ_INSN case */ - if (MACHINE_HAS_SEQ_INSN) + /* Expect brcl 0xf,... for the !cpu_has_seq_insn() case */ + if (cpu_has_seq_insn()) return ftrace_patch_branch_insn(rec->ip, addr, 0); else return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false); @@ -234,7 +235,7 @@ static int ftrace_make_trampoline_call(struct dyn_ftrace *rec, unsigned long add int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - if (MACHINE_HAS_SEQ_INSN) + if (cpu_has_seq_insn()) return ftrace_patch_branch_insn(rec->ip, 0, addr); else return ftrace_make_trampoline_call(rec, addr); @@ -261,43 +262,19 @@ void ftrace_arch_code_modify_post_process(void) } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -/* - * Hook the return address and push it in the stack of return addresses - * in current thread info. - */ -unsigned long prepare_ftrace_return(unsigned long ra, unsigned long sp, - unsigned long ip) -{ - if (unlikely(ftrace_graph_is_dead())) - goto out; - if (unlikely(atomic_read(¤t->tracing_graph_pause))) - goto out; - ip -= MCOUNT_INSN_SIZE; - if (!function_graph_enter(ra, ip, 0, (void *) sp)) - ra = (unsigned long) return_to_handler; -out: - return ra; -} -NOKPROBE_SYMBOL(prepare_ftrace_return); -/* - * Patch the kernel code at ftrace_graph_caller location. The instruction - * there is branch relative on condition. To enable the ftrace graph code - * block, we simply patch the mask field of the instruction to zero and - * turn the instruction into a nop. - * To disable the ftrace graph code the mask field will be patched to - * all ones, which turns the instruction into an unconditional branch. - */ -int ftrace_enable_ftrace_graph_caller(void) +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) { - /* Expect brc 0xf,... */ - return ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false); -} + unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14]; + unsigned long sp = arch_ftrace_regs(fregs)->regs.gprs[15]; -int ftrace_disable_ftrace_graph_caller(void) -{ - /* Expect brc 0x0,... */ - return ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true); + if (unlikely(ftrace_graph_is_dead())) + return; + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + if (!function_graph_enter_regs(*parent, ip, 0, (unsigned long *)sp, fregs)) + *parent = (unsigned long)&return_to_handler; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ @@ -318,7 +295,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - kmsan_unpoison_memory(fregs, sizeof(*fregs)); + kmsan_unpoison_memory(fregs, ftrace_regs_size()); regs = ftrace_get_regs(fregs); p = get_kprobe((kprobe_opcode_t *)ip); if (!regs || unlikely(!p) || kprobe_disabled(p)) diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c index 0b68168d9566..cf26d7a37425 100644 --- a/arch/s390/kernel/guarded_storage.c +++ b/arch/s390/kernel/guarded_storage.c @@ -4,6 +4,7 @@ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ +#include <linux/cpufeature.h> #include <linux/kernel.h> #include <linux/syscalls.h> #include <linux/signal.h> @@ -109,7 +110,7 @@ static int gs_broadcast(void) SYSCALL_DEFINE2(s390_guarded_storage, int, command, struct gs_cb __user *, gs_cb) { - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -EOPNOTSUPP; switch (command) { case GS_ENABLE: diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 396034b2fe67..7edb9ded199c 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -18,12 +18,10 @@ __HEAD SYM_CODE_START(startup_continue) - larl %r1,tod_clock_base - GET_LC %r2 - mvc 0(16,%r1),__LC_BOOT_CLOCK(%r2) # # Setup stack # + GET_LC %r2 larl %r14,init_task stg %r14,__LC_CURRENT(%r2) larl %r15,init_thread_union+STACK_INIT_OFFSET diff --git a/arch/s390/kernel/hiperdispatch.c b/arch/s390/kernel/hiperdispatch.c index 2a99a216ab62..e7b66d046e8d 100644 --- a/arch/s390/kernel/hiperdispatch.c +++ b/arch/s390/kernel/hiperdispatch.c @@ -45,6 +45,7 @@ * therefore delaying the throughput loss caused by using SMP threads. */ +#include <linux/cpufeature.h> #include <linux/cpumask.h> #include <linux/debugfs.h> #include <linux/device.h> @@ -87,7 +88,7 @@ static DECLARE_DELAYED_WORK(hd_capacity_work, hd_capacity_work_fn); static int hd_set_hiperdispatch_mode(int enable) { - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) enable = 0; if (hd_enabled == enable) return 0; @@ -292,7 +293,7 @@ static int hiperdispatch_ctl_handler(const struct ctl_table *ctl, int write, return 0; } -static struct ctl_table hiperdispatch_ctl_table[] = { +static const struct ctl_table hiperdispatch_ctl_table[] = { { .procname = "hiperdispatch", .mode = 0644, diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index f17bb7bf9392..ff15f91affde 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -22,6 +22,7 @@ #include <linux/debug_locks.h> #include <linux/vmalloc.h> #include <asm/asm-extable.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/ipl.h> #include <asm/smp.h> @@ -185,7 +186,7 @@ static inline int __diag308(unsigned long subcode, unsigned long addr) r1.even = addr; r1.odd = 0; - asm volatile( + asm_inline volatile( " diag %[r1],%[subcode],0x308\n" "0: nopr %%r7\n" EX_TABLE(0b,0b) @@ -209,7 +210,7 @@ static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \ struct kobj_attribute *attr, \ char *page) \ { \ - return scnprintf(page, PAGE_SIZE, _format, ##args); \ + return sysfs_emit(page, _format, ##args); \ } #define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk) \ @@ -269,8 +270,8 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ { \ if (len >= sizeof(_value)) \ return -E2BIG; \ - len = strscpy(_value, buf, sizeof(_value)); \ - if (len < 0) \ + len = strscpy(_value, buf); \ + if ((ssize_t)len < 0) \ return len; \ strim(_value); \ return len; \ @@ -280,58 +281,58 @@ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ sys_##_prefix##_##_name##_show, \ sys_##_prefix##_##_name##_store) -#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ -static ssize_t sys_##_prefix##_scp_data_show(struct file *filp, \ - struct kobject *kobj, \ - struct bin_attribute *attr, \ - char *buf, loff_t off, \ - size_t count) \ -{ \ - size_t size = _ipl_block.scp_data_len; \ - void *scp_data = _ipl_block.scp_data; \ - \ - return memory_read_from_buffer(buf, count, &off, \ - scp_data, size); \ +#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ +static ssize_t sys_##_prefix##_scp_data_show(struct file *filp, \ + struct kobject *kobj, \ + const struct bin_attribute *attr, \ + char *buf, loff_t off, \ + size_t count) \ +{ \ + size_t size = _ipl_block.scp_data_len; \ + void *scp_data = _ipl_block.scp_data; \ + \ + return memory_read_from_buffer(buf, count, &off, \ + scp_data, size); \ } #define IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\ -static ssize_t sys_##_prefix##_scp_data_store(struct file *filp, \ - struct kobject *kobj, \ - struct bin_attribute *attr, \ - char *buf, loff_t off, \ - size_t count) \ -{ \ - size_t scpdata_len = count; \ - size_t padding; \ - \ - if (off) \ - return -EINVAL; \ - \ - memcpy(_ipl_block.scp_data, buf, count); \ - if (scpdata_len % 8) { \ - padding = 8 - (scpdata_len % 8); \ - memset(_ipl_block.scp_data + scpdata_len, \ - 0, padding); \ - scpdata_len += padding; \ - } \ - \ - _ipl_block_hdr.len = _ipl_bp_len + scpdata_len; \ - _ipl_block.len = _ipl_bp0_len + scpdata_len; \ - _ipl_block.scp_data_len = scpdata_len; \ - \ - return count; \ +static ssize_t sys_##_prefix##_scp_data_store(struct file *filp, \ + struct kobject *kobj, \ + const struct bin_attribute *attr, \ + char *buf, loff_t off, \ + size_t count) \ +{ \ + size_t scpdata_len = count; \ + size_t padding; \ + \ + if (off) \ + return -EINVAL; \ + \ + memcpy(_ipl_block.scp_data, buf, count); \ + if (scpdata_len % 8) { \ + padding = 8 - (scpdata_len % 8); \ + memset(_ipl_block.scp_data + scpdata_len, \ + 0, padding); \ + scpdata_len += padding; \ + } \ + \ + _ipl_block_hdr.len = _ipl_bp_len + scpdata_len; \ + _ipl_block.len = _ipl_bp0_len + scpdata_len; \ + _ipl_block.scp_data_len = scpdata_len; \ + \ + return count; \ } #define DEFINE_IPL_ATTR_SCP_DATA_RO(_prefix, _ipl_block, _size) \ IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ -static struct bin_attribute sys_##_prefix##_scp_data_attr = \ +static const struct bin_attribute sys_##_prefix##_scp_data_attr = \ __BIN_ATTR(scp_data, 0444, sys_##_prefix##_scp_data_show, \ NULL, _size) #define DEFINE_IPL_ATTR_SCP_DATA_RW(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len, _size)\ IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\ -static struct bin_attribute sys_##_prefix##_scp_data_attr = \ +static const struct bin_attribute sys_##_prefix##_scp_data_attr = \ __BIN_ATTR(scp_data, 0644, sys_##_prefix##_scp_data_show, \ sys_##_prefix##_scp_data_store, _size) @@ -372,7 +373,7 @@ EXPORT_SYMBOL_GPL(ipl_info); static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%s\n", ipl_type_str(ipl_info.type)); + return sysfs_emit(page, "%s\n", ipl_type_str(ipl_info.type)); } static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type); @@ -380,7 +381,7 @@ static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type); static ssize_t ipl_secure_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%i\n", !!ipl_secure_flag); + return sysfs_emit(page, "%i\n", !!ipl_secure_flag); } static struct kobj_attribute sys_ipl_secure_attr = @@ -389,7 +390,7 @@ static struct kobj_attribute sys_ipl_secure_attr = static ssize_t ipl_has_secure_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%i\n", !!sclp.has_sipl); + return sysfs_emit(page, "%i\n", !!sclp.has_sipl); } static struct kobj_attribute sys_ipl_has_secure_attr = @@ -402,7 +403,7 @@ static ssize_t ipl_vm_parm_show(struct kobject *kobj, if (ipl_block_valid && (ipl_block.pb0_hdr.pbt == IPL_PBT_CCW)) ipl_block_get_ascii_vmparm(parm, sizeof(parm), &ipl_block); - return sprintf(page, "%s\n", parm); + return sysfs_emit(page, "%s\n", parm); } static struct kobj_attribute sys_ipl_vm_parm_attr = @@ -413,18 +414,18 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj, { switch (ipl_info.type) { case IPL_TYPE_CCW: - return sprintf(page, "0.%x.%04x\n", ipl_block.ccw.ssid, - ipl_block.ccw.devno); + return sysfs_emit(page, "0.%x.%04x\n", ipl_block.ccw.ssid, + ipl_block.ccw.devno); case IPL_TYPE_ECKD: case IPL_TYPE_ECKD_DUMP: - return sprintf(page, "0.%x.%04x\n", ipl_block.eckd.ssid, - ipl_block.eckd.devno); + return sysfs_emit(page, "0.%x.%04x\n", ipl_block.eckd.ssid, + ipl_block.eckd.devno); case IPL_TYPE_FCP: case IPL_TYPE_FCP_DUMP: - return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno); + return sysfs_emit(page, "0.0.%04x\n", ipl_block.fcp.devno); case IPL_TYPE_NVME: case IPL_TYPE_NVME_DUMP: - return sprintf(page, "%08ux\n", ipl_block.nvme.fid); + return sysfs_emit(page, "%08ux\n", ipl_block.nvme.fid); default: return 0; } @@ -434,19 +435,19 @@ static struct kobj_attribute sys_ipl_device_attr = __ATTR(device, 0444, sys_ipl_device_show, NULL); static ssize_t sys_ipl_parameter_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { return memory_read_from_buffer(buf, count, &off, &ipl_block, ipl_block.hdr.len); } -static struct bin_attribute sys_ipl_parameter_attr = +static const struct bin_attribute sys_ipl_parameter_attr = __BIN_ATTR(binary_parameter, 0444, sys_ipl_parameter_read, NULL, PAGE_SIZE); DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_fcp, ipl_block.fcp, PAGE_SIZE); -static struct bin_attribute *ipl_fcp_bin_attrs[] = { +static const struct bin_attribute *const ipl_fcp_bin_attrs[] = { &sys_ipl_parameter_attr, &sys_ipl_fcp_scp_data_attr, NULL, @@ -454,7 +455,7 @@ static struct bin_attribute *ipl_fcp_bin_attrs[] = { DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_nvme, ipl_block.nvme, PAGE_SIZE); -static struct bin_attribute *ipl_nvme_bin_attrs[] = { +static const struct bin_attribute *const ipl_nvme_bin_attrs[] = { &sys_ipl_parameter_attr, &sys_ipl_nvme_scp_data_attr, NULL, @@ -462,7 +463,7 @@ static struct bin_attribute *ipl_nvme_bin_attrs[] = { DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_eckd, ipl_block.eckd, PAGE_SIZE); -static struct bin_attribute *ipl_eckd_bin_attrs[] = { +static const struct bin_attribute *const ipl_eckd_bin_attrs[] = { &sys_ipl_parameter_attr, &sys_ipl_eckd_scp_data_attr, NULL, @@ -503,12 +504,12 @@ static ssize_t eckd_##_name##_br_chr_show(struct kobject *kobj, \ if (!ipb->br_chr.cyl && \ !ipb->br_chr.head && \ !ipb->br_chr.record) \ - return sprintf(buf, "auto\n"); \ + return sysfs_emit(buf, "auto\n"); \ \ - return sprintf(buf, "0x%x,0x%x,0x%x\n", \ - ipb->br_chr.cyl, \ - ipb->br_chr.head, \ - ipb->br_chr.record); \ + return sysfs_emit(buf, "0x%x,0x%x,0x%x\n", \ + ipb->br_chr.cyl, \ + ipb->br_chr.head, \ + ipb->br_chr.record); \ } #define IPL_ATTR_BR_CHR_STORE_FN(_name, _ipb) \ @@ -573,11 +574,11 @@ static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj, char loadparm[LOADPARM_LEN + 1] = {}; if (!sclp_ipl_info.is_valid) - return sprintf(page, "#unknown#\n"); + return sysfs_emit(page, "#unknown#\n"); memcpy(loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN); EBCASC(loadparm, LOADPARM_LEN); strim(loadparm); - return sprintf(page, "%s\n", loadparm); + return sysfs_emit(page, "%s\n", loadparm); } static struct kobj_attribute sys_ipl_ccw_loadparm_attr = @@ -593,9 +594,9 @@ static struct attribute *ipl_fcp_attrs[] = { NULL, }; -static struct attribute_group ipl_fcp_attr_group = { +static const struct attribute_group ipl_fcp_attr_group = { .attrs = ipl_fcp_attrs, - .bin_attrs = ipl_fcp_bin_attrs, + .bin_attrs_new = ipl_fcp_bin_attrs, }; static struct attribute *ipl_nvme_attrs[] = { @@ -607,9 +608,9 @@ static struct attribute *ipl_nvme_attrs[] = { NULL, }; -static struct attribute_group ipl_nvme_attr_group = { +static const struct attribute_group ipl_nvme_attr_group = { .attrs = ipl_nvme_attrs, - .bin_attrs = ipl_nvme_bin_attrs, + .bin_attrs_new = ipl_nvme_bin_attrs, }; static struct attribute *ipl_eckd_attrs[] = { @@ -620,9 +621,9 @@ static struct attribute *ipl_eckd_attrs[] = { NULL, }; -static struct attribute_group ipl_eckd_attr_group = { +static const struct attribute_group ipl_eckd_attr_group = { .attrs = ipl_eckd_attrs, - .bin_attrs = ipl_eckd_bin_attrs, + .bin_attrs_new = ipl_eckd_bin_attrs, }; /* CCW ipl device attributes */ @@ -640,11 +641,11 @@ static struct attribute *ipl_ccw_attrs_lpar[] = { NULL, }; -static struct attribute_group ipl_ccw_attr_group_vm = { +static const struct attribute_group ipl_ccw_attr_group_vm = { .attrs = ipl_ccw_attrs_vm, }; -static struct attribute_group ipl_ccw_attr_group_lpar = { +static const struct attribute_group ipl_ccw_attr_group_lpar = { .attrs = ipl_ccw_attrs_lpar }; @@ -655,7 +656,7 @@ static struct attribute *ipl_common_attrs[] = { NULL, }; -static struct attribute_group ipl_common_attr_group = { +static const struct attribute_group ipl_common_attr_group = { .attrs = ipl_common_attrs, }; @@ -685,7 +686,7 @@ static int __init ipl_init(void) goto out; switch (ipl_info.type) { case IPL_TYPE_CCW: - if (MACHINE_IS_VM) + if (machine_is_vm()) rc = sysfs_create_group(&ipl_kset->kobj, &ipl_ccw_attr_group_vm); else @@ -731,7 +732,7 @@ static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb, char vmparm[DIAG308_VMPARM_SIZE + 1] = {}; ipl_block_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb); - return sprintf(page, "%s\n", vmparm); + return sysfs_emit(page, "%s\n", vmparm); } static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb, @@ -808,7 +809,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_fcp, reipl_block_fcp->hdr, IPL_BP_FCP_LEN, IPL_BP0_FCP_LEN, DIAG308_SCPDATA_SIZE); -static struct bin_attribute *reipl_fcp_bin_attrs[] = { +static const struct bin_attribute *const reipl_fcp_bin_attrs[] = { &sys_reipl_fcp_scp_data_attr, NULL, }; @@ -839,7 +840,7 @@ static ssize_t reipl_generic_loadparm_show(struct ipl_parameter_block *ipb, char buf[LOADPARM_LEN + 1]; reipl_get_ascii_loadparm(buf, ipb); - return sprintf(page, "%s\n", buf); + return sysfs_emit(page, "%s\n", buf); } static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb, @@ -895,7 +896,7 @@ DEFINE_GENERIC_LOADPARM(eckd); static ssize_t reipl_fcp_clear_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%u\n", reipl_fcp_clear); + return sysfs_emit(page, "%u\n", reipl_fcp_clear); } static ssize_t reipl_fcp_clear_store(struct kobject *kobj, @@ -917,9 +918,9 @@ static struct attribute *reipl_fcp_attrs[] = { NULL, }; -static struct attribute_group reipl_fcp_attr_group = { +static const struct attribute_group reipl_fcp_attr_group = { .attrs = reipl_fcp_attrs, - .bin_attrs = reipl_fcp_bin_attrs, + .bin_attrs_new = reipl_fcp_bin_attrs, }; static struct kobj_attribute sys_reipl_fcp_clear_attr = @@ -932,7 +933,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_nvme, reipl_block_nvme->hdr, IPL_BP_NVME_LEN, IPL_BP0_NVME_LEN, DIAG308_SCPDATA_SIZE); -static struct bin_attribute *reipl_nvme_bin_attrs[] = { +static const struct bin_attribute *const reipl_nvme_bin_attrs[] = { &sys_reipl_nvme_scp_data_attr, NULL, }; @@ -955,15 +956,15 @@ static struct attribute *reipl_nvme_attrs[] = { NULL, }; -static struct attribute_group reipl_nvme_attr_group = { +static const struct attribute_group reipl_nvme_attr_group = { .attrs = reipl_nvme_attrs, - .bin_attrs = reipl_nvme_bin_attrs + .bin_attrs_new = reipl_nvme_bin_attrs }; static ssize_t reipl_nvme_clear_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%u\n", reipl_nvme_clear); + return sysfs_emit(page, "%u\n", reipl_nvme_clear); } static ssize_t reipl_nvme_clear_store(struct kobject *kobj, @@ -984,7 +985,7 @@ DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw); static ssize_t reipl_ccw_clear_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%u\n", reipl_ccw_clear); + return sysfs_emit(page, "%u\n", reipl_ccw_clear); } static ssize_t reipl_ccw_clear_store(struct kobject *kobj, @@ -1031,7 +1032,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_eckd, reipl_block_eckd->hdr, IPL_BP_ECKD_LEN, IPL_BP0_ECKD_LEN, DIAG308_SCPDATA_SIZE); -static struct bin_attribute *reipl_eckd_bin_attrs[] = { +static const struct bin_attribute *const reipl_eckd_bin_attrs[] = { &sys_reipl_eckd_scp_data_attr, NULL, }; @@ -1048,15 +1049,15 @@ static struct attribute *reipl_eckd_attrs[] = { NULL, }; -static struct attribute_group reipl_eckd_attr_group = { +static const struct attribute_group reipl_eckd_attr_group = { .attrs = reipl_eckd_attrs, - .bin_attrs = reipl_eckd_bin_attrs + .bin_attrs_new = reipl_eckd_bin_attrs }; static ssize_t reipl_eckd_clear_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%u\n", reipl_eckd_clear); + return sysfs_emit(page, "%u\n", reipl_eckd_clear); } static ssize_t reipl_eckd_clear_store(struct kobject *kobj, @@ -1086,7 +1087,7 @@ static ssize_t reipl_nss_name_show(struct kobject *kobj, char nss_name[NSS_NAME_SIZE + 1] = {}; reipl_get_ascii_nss_name(nss_name, reipl_block_nss); - return sprintf(page, "%s\n", nss_name); + return sysfs_emit(page, "%s\n", nss_name); } static ssize_t reipl_nss_name_store(struct kobject *kobj, @@ -1171,7 +1172,7 @@ static int reipl_set_type(enum ipl_type type) static ssize_t reipl_type_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%s\n", ipl_type_str(reipl_type)); + return sysfs_emit(page, "%s\n", ipl_type_str(reipl_type)); } static ssize_t reipl_type_store(struct kobject *kobj, @@ -1272,7 +1273,7 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb) ipb->ccw.flags = IPL_PB0_FLAG_LOADPARM; /* VM PARM */ - if (MACHINE_IS_VM && ipl_block_valid && + if (machine_is_vm() && ipl_block_valid && (ipl_block.ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP)) { ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP; @@ -1286,7 +1287,7 @@ static int __init reipl_nss_init(void) { int rc; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 0; reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL); @@ -1311,8 +1312,8 @@ static int __init reipl_ccw_init(void) return -ENOMEM; rc = sysfs_create_group(&reipl_kset->kobj, - MACHINE_IS_VM ? &reipl_ccw_attr_group_vm - : &reipl_ccw_attr_group_lpar); + machine_is_vm() ? &reipl_ccw_attr_group_vm + : &reipl_ccw_attr_group_lpar); if (rc) return rc; @@ -1587,15 +1588,15 @@ static struct attribute *dump_fcp_attrs[] = { NULL, }; -static struct bin_attribute *dump_fcp_bin_attrs[] = { +static const struct bin_attribute *const dump_fcp_bin_attrs[] = { &sys_dump_fcp_scp_data_attr, NULL, }; -static struct attribute_group dump_fcp_attr_group = { +static const struct attribute_group dump_fcp_attr_group = { .name = IPL_FCP_STR, .attrs = dump_fcp_attrs, - .bin_attrs = dump_fcp_bin_attrs, + .bin_attrs_new = dump_fcp_bin_attrs, }; /* NVME dump device attributes */ @@ -1621,15 +1622,15 @@ static struct attribute *dump_nvme_attrs[] = { NULL, }; -static struct bin_attribute *dump_nvme_bin_attrs[] = { +static const struct bin_attribute *const dump_nvme_bin_attrs[] = { &sys_dump_nvme_scp_data_attr, NULL, }; -static struct attribute_group dump_nvme_attr_group = { +static const struct attribute_group dump_nvme_attr_group = { .name = IPL_NVME_STR, .attrs = dump_nvme_attrs, - .bin_attrs = dump_nvme_bin_attrs, + .bin_attrs_new = dump_nvme_bin_attrs, }; /* ECKD dump device attributes */ @@ -1655,15 +1656,15 @@ static struct attribute *dump_eckd_attrs[] = { NULL, }; -static struct bin_attribute *dump_eckd_bin_attrs[] = { +static const struct bin_attribute *const dump_eckd_bin_attrs[] = { &sys_dump_eckd_scp_data_attr, NULL, }; -static struct attribute_group dump_eckd_attr_group = { +static const struct attribute_group dump_eckd_attr_group = { .name = IPL_ECKD_STR, .attrs = dump_eckd_attrs, - .bin_attrs = dump_eckd_bin_attrs, + .bin_attrs_new = dump_eckd_bin_attrs, }; /* CCW dump device attributes */ @@ -1692,7 +1693,7 @@ static int dump_set_type(enum dump_type type) static ssize_t dump_type_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%s\n", dump_type_str(dump_type)); + return sysfs_emit(page, "%s\n", dump_type_str(dump_type)); } static ssize_t dump_type_store(struct kobject *kobj, @@ -1717,6 +1718,24 @@ static ssize_t dump_type_store(struct kobject *kobj, static struct kobj_attribute dump_type_attr = __ATTR(dump_type, 0644, dump_type_show, dump_type_store); +static ssize_t dump_area_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sysfs_emit(page, "%lu\n", sclp.hsa_size); +} + +static struct kobj_attribute dump_area_size_attr = __ATTR_RO(dump_area_size); + +static struct attribute *dump_attrs[] = { + &dump_type_attr.attr, + &dump_area_size_attr.attr, + NULL, +}; + +static struct attribute_group dump_attr_group = { + .attrs = dump_attrs, +}; + static struct kset *dump_kset; static void diag308_dump(void *dump_block) @@ -1853,7 +1872,7 @@ static int __init dump_init(void) dump_kset = kset_create_and_add("dump", NULL, firmware_kobj); if (!dump_kset) return -ENOMEM; - rc = sysfs_create_file(&dump_kset->kobj, &dump_type_attr.attr); + rc = sysfs_create_group(&dump_kset->kobj, &dump_attr_group); if (rc) { kset_unregister(dump_kset); return rc; @@ -1969,7 +1988,7 @@ static void vmcmd_run(struct shutdown_trigger *trigger) static int vmcmd_init(void) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return -EOPNOTSUPP; vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj); if (!vmcmd_kset) @@ -2034,7 +2053,7 @@ static struct shutdown_trigger on_reboot_trigger = {ON_REIPL_STR, static ssize_t on_reboot_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%s\n", on_reboot_trigger.action->name); + return sysfs_emit(page, "%s\n", on_reboot_trigger.action->name); } static ssize_t on_reboot_store(struct kobject *kobj, @@ -2060,7 +2079,7 @@ static struct shutdown_trigger on_panic_trigger = {ON_PANIC_STR, &stop_action}; static ssize_t on_panic_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%s\n", on_panic_trigger.action->name); + return sysfs_emit(page, "%s\n", on_panic_trigger.action->name); } static ssize_t on_panic_store(struct kobject *kobj, @@ -2086,7 +2105,7 @@ static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR, static ssize_t on_restart_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%s\n", on_restart_trigger.action->name); + return sysfs_emit(page, "%s\n", on_restart_trigger.action->name); } static ssize_t on_restart_store(struct kobject *kobj, @@ -2122,7 +2141,7 @@ static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action}; static ssize_t on_halt_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%s\n", on_halt_trigger.action->name); + return sysfs_emit(page, "%s\n", on_halt_trigger.action->name); } static ssize_t on_halt_store(struct kobject *kobj, @@ -2148,7 +2167,7 @@ static struct shutdown_trigger on_poff_trigger = {ON_POFF_STR, &stop_action}; static ssize_t on_poff_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return sprintf(page, "%s\n", on_poff_trigger.action->name); + return sysfs_emit(page, "%s\n", on_poff_trigger.action->name); } static ssize_t on_poff_store(struct kobject *kobj, @@ -2230,26 +2249,28 @@ static int __init s390_ipl_init(void) __initcall(s390_ipl_init); -static void __init strncpy_skip_quote(char *dst, char *src, int n) +static void __init strscpy_skip_quote(char *dst, char *src, int n) { int sx, dx; - dx = 0; - for (sx = 0; src[sx] != 0; sx++) { + if (!n) + return; + for (sx = 0, dx = 0; src[sx]; sx++) { if (src[sx] == '"') continue; - dst[dx++] = src[sx]; - if (dx >= n) + dst[dx] = src[sx]; + if (dx + 1 == n) break; + dx++; } + dst[dx] = '\0'; } static int __init vmcmd_on_reboot_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_reboot, str, VMCMD_MAX_SIZE); - vmcmd_on_reboot[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_reboot, str, sizeof(vmcmd_on_reboot)); on_reboot_trigger.action = &vmcmd_action; return 1; } @@ -2257,10 +2278,9 @@ __setup("vmreboot=", vmcmd_on_reboot_setup); static int __init vmcmd_on_panic_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_panic, str, VMCMD_MAX_SIZE); - vmcmd_on_panic[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_panic, str, sizeof(vmcmd_on_panic)); on_panic_trigger.action = &vmcmd_action; return 1; } @@ -2268,10 +2288,9 @@ __setup("vmpanic=", vmcmd_on_panic_setup); static int __init vmcmd_on_halt_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_halt, str, VMCMD_MAX_SIZE); - vmcmd_on_halt[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_halt, str, sizeof(vmcmd_on_halt)); on_halt_trigger.action = &vmcmd_action; return 1; } @@ -2279,10 +2298,9 @@ __setup("vmhalt=", vmcmd_on_halt_setup); static int __init vmcmd_on_poff_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_poff, str, VMCMD_MAX_SIZE); - vmcmd_on_poff[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_poff, str, sizeof(vmcmd_on_poff)); on_poff_trigger.action = &vmcmd_action; return 1; } diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 2639a3d12736..bdf9c7cb5685 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -9,6 +9,7 @@ */ #include <linux/kernel_stat.h> +#include <linux/cpufeature.h> #include <linux/interrupt.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> @@ -25,11 +26,13 @@ #include <asm/irq_regs.h> #include <asm/cputime.h> #include <asm/lowcore.h> +#include <asm/machine.h> #include <asm/irq.h> #include <asm/hw_irq.h> #include <asm/stacktrace.h> #include <asm/softirq_stack.h> #include <asm/vtime.h> +#include <asm/asm.h> #include "entry.h" DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat); @@ -83,7 +86,6 @@ static const struct irq_class irqclass_sub_desc[] = { {.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"}, {.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"}, {.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"}, - {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"}, {.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"}, {.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"}, {.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"}, @@ -129,9 +131,13 @@ static int irq_pending(struct pt_regs *regs) { int cc; - asm volatile("tpi 0\n" - "ipm %0" : "=d" (cc) : : "cc"); - return cc >> 28; + asm volatile( + " tpi 0\n" + CC_IPM(cc) + : CC_OUT(cc, cc) + : + : CC_CLOBBER); + return CC_TRANSFORM(cc); } void noinstr do_io_irq(struct pt_regs *regs) @@ -144,7 +150,7 @@ void noinstr do_io_irq(struct pt_regs *regs) if (user_mode(regs)) { update_timer_sys(); - if (static_branch_likely(&cpu_has_bear)) + if (cpu_has_bear()) current->thread.last_break = regs->last_break; } @@ -159,7 +165,7 @@ void noinstr do_io_irq(struct pt_regs *regs) do_irq_async(regs, THIN_INTERRUPT); else do_irq_async(regs, IO_INTERRUPT); - } while (MACHINE_IS_LPAR && irq_pending(regs)); + } while (machine_is_lpar() && irq_pending(regs)); irq_exit_rcu(); @@ -180,7 +186,7 @@ void noinstr do_ext_irq(struct pt_regs *regs) if (user_mode(regs)) { update_timer_sys(); - if (static_branch_likely(&cpu_has_bear)) + if (cpu_has_bear()) current->thread.last_break = regs->last_break; } @@ -253,7 +259,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); goto out; } - if (index < nr_irqs) { + if (index < irq_get_nr_irqs()) { show_msi_interrupt(p, index); goto out; } diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 6295faf0987d..c450120b4474 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -13,6 +13,7 @@ #include <linux/ptrace.h> #include <linux/preempt.h> #include <linux/stop_machine.h> +#include <linux/cpufeature.h> #include <linux/kdebug.h> #include <linux/uaccess.h> #include <linux/extable.h> @@ -153,7 +154,7 @@ void arch_arm_kprobe(struct kprobe *p) { struct swap_insn_args args = {.p = p, .arm_kprobe = 1}; - if (MACHINE_HAS_SEQ_INSN) { + if (cpu_has_seq_insn()) { swap_instruction(&args); text_poke_sync(); } else { @@ -166,7 +167,7 @@ void arch_disarm_kprobe(struct kprobe *p) { struct swap_insn_args args = {.p = p, .arm_kprobe = 0}; - if (MACHINE_HAS_SEQ_INSN) { + if (cpu_has_seq_insn()) { swap_instruction(&args); text_poke_sync(); } else { @@ -489,6 +490,12 @@ int __init arch_init_kprobes(void) return 0; } +int __init arch_populate_kprobe_blacklist(void) +{ + return kprobe_add_area_blacklist((unsigned long)__irqentry_text_start, + (unsigned long)__irqentry_text_end); +} + int arch_trampoline_kprobe(struct kprobe *p) { return 0; diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c index 6652e54cf3db..6d1ffca5f798 100644 --- a/arch/s390/kernel/lgr.c +++ b/arch/s390/kernel/lgr.c @@ -166,7 +166,7 @@ static struct timer_list lgr_timer; */ static void lgr_timer_set(void) { - mod_timer(&lgr_timer, jiffies + msecs_to_jiffies(LGR_TIMER_INTERVAL_SECS * MSEC_PER_SEC)); + mod_timer(&lgr_timer, jiffies + secs_to_jiffies(LGR_TIMER_INTERVAL_SECS)); } /* diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 8f681ccfb83a..baeb3dcfc1c8 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -13,7 +13,9 @@ #include <linux/reboot.h> #include <linux/ftrace.h> #include <linux/debug_locks.h> +#include <linux/cpufeature.h> #include <asm/guarded_storage.h> +#include <asm/machine.h> #include <asm/pfault.h> #include <asm/cio.h> #include <asm/fpu.h> @@ -94,7 +96,7 @@ static noinline void __machine_kdump(void *image) mcesa = __va(get_lowcore()->mcesad & MCESA_ORIGIN_MASK); if (cpu_has_vx()) save_vx_regs((__vector128 *) mcesa->vector_save_area); - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { local_ctl_store(2, &cr2_old.reg); cr2_new = cr2_old; cr2_new.gse = 1; @@ -178,7 +180,7 @@ void arch_kexec_unprotect_crashkres(void) static int machine_kexec_prepare_kdump(void) { #ifdef CONFIG_CRASH_DUMP - if (MACHINE_IS_VM) + if (machine_is_vm()) diag10_range(PFN_DOWN(crashk_res.start), PFN_DOWN(crashk_res.end - crashk_res.start + 1)); return 0; diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 7e267ef63a7f..1fec370fecf4 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -104,17 +104,6 @@ SYM_CODE_START(ftrace_common) lgr %r3,%r14 la %r5,STACK_FREGS(%r15) BASR_EX %r14,%r1 -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -# The j instruction gets runtime patched to a nop instruction. -# See ftrace_enable_ftrace_graph_caller. -SYM_INNER_LABEL(ftrace_graph_caller, SYM_L_GLOBAL) - j .Lftrace_graph_caller_end - lmg %r2,%r3,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15) - lg %r4,(STACK_FREGS_PTREGS_PSW+8)(%r15) - brasl %r14,prepare_ftrace_return - stg %r2,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15) -.Lftrace_graph_caller_end: -#endif lg %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15) #ifdef MARCH_HAS_Z196_FEATURES ltg %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15) @@ -134,14 +123,14 @@ SYM_CODE_END(ftrace_common) SYM_FUNC_START(return_to_handler) stmg %r2,%r5,32(%r15) lgr %r1,%r15 - aghi %r15,-(STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE) + # allocate ftrace_regs and stack frame for ftrace_return_to_handler + aghi %r15,-STACK_FRAME_SIZE_FREGS stg %r1,__SF_BACKCHAIN(%r15) - la %r3,STACK_FRAME_OVERHEAD(%r15) - stg %r1,__FGRAPH_RET_FP(%r3) - stg %r2,__FGRAPH_RET_GPR2(%r3) - lgr %r2,%r3 + stg %r2,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15) + stg %r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15) + la %r2,STACK_FRAME_OVERHEAD(%r15) brasl %r14,ftrace_return_to_handler - aghi %r15,STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE + aghi %r15,STACK_FRAME_SIZE_FREGS lgr %r14,%r2 lmg %r2,%r5,32(%r15) BR_EX %r14 diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index fbd218b6fc8e..3da371c144eb 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -9,6 +9,7 @@ */ #include <linux/kernel_stat.h> +#include <linux/cpufeature.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/entry-common.h> @@ -45,7 +46,7 @@ static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck); static inline int nmi_needs_mcesa(void) { - return cpu_has_vx() || MACHINE_HAS_GS; + return cpu_has_vx() || cpu_has_gs(); } /* @@ -61,7 +62,7 @@ void __init nmi_alloc_mcesa_early(u64 *mcesad) if (!nmi_needs_mcesa()) return; *mcesad = __pa(&boot_mcesa); - if (MACHINE_HAS_GS) + if (cpu_has_gs()) *mcesad |= ilog2(MCESA_MAX_SIZE); } @@ -73,14 +74,14 @@ int nmi_alloc_mcesa(u64 *mcesad) *mcesad = 0; if (!nmi_needs_mcesa()) return 0; - size = MACHINE_HAS_GS ? MCESA_MAX_SIZE : MCESA_MIN_SIZE; + size = cpu_has_gs() ? MCESA_MAX_SIZE : MCESA_MIN_SIZE; origin = kmalloc(size, GFP_KERNEL); if (!origin) return -ENOMEM; /* The pointer is stored with mcesa_bits ORed in */ kmemleak_not_leak(origin); *mcesad = __pa(origin); - if (MACHINE_HAS_GS) + if (cpu_has_gs()) *mcesad |= ilog2(MCESA_MAX_SIZE); return 0; } diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c index a95188818637..5970dd3ee7c5 100644 --- a/arch/s390/kernel/nospec-sysfs.c +++ b/arch/s390/kernel/nospec-sysfs.c @@ -7,17 +7,17 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + return sysfs_emit(buf, "Mitigation: __user pointer sanitization\n"); } ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { if (test_facility(156)) - return sprintf(buf, "Mitigation: etokens\n"); + return sysfs_emit(buf, "Mitigation: etokens\n"); if (nospec_uses_trampoline()) - return sprintf(buf, "Mitigation: execute trampolines\n"); + return sysfs_emit(buf, "Mitigation: execute trampolines\n"); if (nobp_enabled()) - return sprintf(buf, "Mitigation: limited branch prediction\n"); - return sprintf(buf, "Vulnerable\n"); + return sysfs_emit(buf, "Mitigation: limited branch prediction\n"); + return sysfs_emit(buf, "Vulnerable\n"); } diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c index ddc1448ea2e1..2fc40f97c0ad 100644 --- a/arch/s390/kernel/numa.c +++ b/arch/s390/kernel/numa.c @@ -21,12 +21,8 @@ void __init numa_setup(void) nodes_clear(node_possible_map); node_set(0, node_possible_map); node_set_online(0); - for (nid = 0; nid < MAX_NUMNODES; nid++) { - NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8); - if (!NODE_DATA(nid)) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(pg_data_t), 8); - } + for (nid = 0; nid < MAX_NUMNODES; nid++) + NODE_DATA(nid) = memblock_alloc_or_panic(sizeof(pg_data_t), 8); NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT; NODE_DATA(0)->node_id = 0; } diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index b695f980bbde..c2a468986212 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -18,6 +18,7 @@ #include <asm/physmem_info.h> #include <asm/maccess.h> #include <asm/asm-offsets.h> +#include <asm/sections.h> #include <asm/ipl.h> /* @@ -180,7 +181,7 @@ fail: } /* - * Return pointer to os infor entry and its size + * Return pointer to os info entry and its size */ void *os_info_old_entry(int nr, unsigned long *size) { diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index e2e0aa463fbd..6a262e198e35 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -442,7 +442,7 @@ static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset) ctrset_size = 48; else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5) ctrset_size = 128; - else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7) + else if (cpumf_ctr_info.csvn >= 6 && cpumf_ctr_info.csvn <= 8) ctrset_size = 160; break; case CPUMF_CTR_SET_MT_DIAG: @@ -835,7 +835,7 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) return validate_ctr_version(hwc->config, set); } -/* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different +/* Events CPU_CYCLES and INSTRUCTIONS can be submitted with two different * attribute::type values: * - PERF_TYPE_HARDWARE: * - pmu->type: @@ -858,18 +858,13 @@ static int cpumf_pmu_event_type(struct perf_event *event) static int cpumf_pmu_event_init(struct perf_event *event) { unsigned int type = event->attr.type; - int err; + int err = -ENOENT; if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) err = __hw_perf_event_init(event, type); else if (event->pmu->type == type) /* Registered as unknown PMU */ err = __hw_perf_event_init(event, cpumf_pmu_event_type(event)); - else - return -ENOENT; - - if (unlikely(err) && event->destroy) - event->destroy(event); return err; } @@ -879,8 +874,8 @@ static int hw_perf_event_reset(struct perf_event *event) u64 prev, new; int err; + prev = local64_read(&event->hw.prev_count); do { - prev = local64_read(&event->hw.prev_count); err = ecctr(event->hw.config, &new); if (err) { if (err != 3) @@ -892,7 +887,7 @@ static int hw_perf_event_reset(struct perf_event *event) */ new = 0; } - } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); + } while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, new)); return err; } @@ -902,12 +897,12 @@ static void hw_perf_event_update(struct perf_event *event) u64 prev, new, delta; int err; + prev = local64_read(&event->hw.prev_count); do { - prev = local64_read(&event->hw.prev_count); err = ecctr(event->hw.config, &new); if (err) return; - } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); + } while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, new)); delta = (prev <= new) ? new - prev : (-1ULL - prev) + new + 1; /* overflow */ @@ -981,12 +976,10 @@ static int cfdiag_push_sample(struct perf_event *event, if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.frag.size = cpuhw->usedss; raw.frag.data = cpuhw->stop; - perf_sample_save_raw_data(&data, &raw); + perf_sample_save_raw_data(&data, event, &raw); } overflow = perf_event_overflow(event, &data, ®s); - if (overflow) - event->pmu->stop(event, 0); perf_event_update_userpage(event); return overflow; @@ -1054,7 +1047,7 @@ static void cpumf_pmu_del(struct perf_event *event, int flags) * * When a new perf event has been added but not yet started, this can * clear enable control and resets all counters in a set. Therefore, - * cpumf_pmu_start() always has to reenable a counter set. + * cpumf_pmu_start() always has to re-enable a counter set. */ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) if (!atomic_read(&cpuhw->ctr_set[i])) @@ -1819,8 +1812,6 @@ static int cfdiag_event_init(struct perf_event *event) event->destroy = hw_perf_event_destroy; err = cfdiag_event_init2(event); - if (unlikely(err)) - event->destroy(event); out: return err; } @@ -1863,7 +1854,7 @@ static const struct attribute_group *cfdiag_attr_groups[] = { /* Performance monitoring unit for event CF_DIAG. Since this event * is also started and stopped via the perf_event_open() system call, use * the same event enable/disable call back functions. They do not - * have a pointer to the perf_event strcture as first parameter. + * have a pointer to the perf_event structure as first parameter. * * The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common. * Reuse them and distinguish the event (always first parameter) via diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index e4a6bfc91080..7ace1f9e4ccf 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -237,7 +237,6 @@ CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4); CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5); CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); - CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080); CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081); CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082); @@ -291,8 +290,8 @@ CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4); CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5); CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7); CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc); -CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108); -CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x0108); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x0109); CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); CPUMF_EVENT_ATTR(cf_z16, L1D_RO_EXCL_WRITES, 0x0080); @@ -365,6 +364,83 @@ CPUMF_EVENT_ATTR(cf_z16, NNPA_WAIT_LOCK, 0x010d); CPUMF_EVENT_ATTR(cf_z16, NNPA_HOLD_LOCK, 0x010e); CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); +CPUMF_EVENT_ATTR(cf_z17, L1D_RO_EXCL_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z17, CRSTE_1MB_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_GPAGE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z17, ITLB2_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z17, ITLB2_MISSES, 0x0087); +CPUMF_EVENT_ATTR(cf_z17, TLB2_PTE_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z17, TLB2_CRSTE_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z17, TLB2_ENGINES_BUSY, 0x008b); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TEND, 0x008c); +CPUMF_EVENT_ATTR(cf_z17, TX_NC_TEND, 0x008d); +CPUMF_EVENT_ATTR(cf_z17, L1C_TLB2_MISSES, 0x008f); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ, 0x0091); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_IV, 0x0092); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_CHIP_HIT, 0x0093); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_DRAWER_HIT, 0x0094); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP, 0x0095); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_CHIP_HIT, 0x0097); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT, 0x0098); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE, 0x0099); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER, 0x009a); +CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER, 0x009b); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_MEMORY, 0x009c); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE_MEMORY, 0x009d); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER_MEMORY, 0x009e); +CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER_MEMORY, 0x009f); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_IV, 0x00a0); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT, 0x00a1); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_IV, 0x00a3); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_IV, 0x00a6); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ, 0x00a9); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_IV, 0x00aa); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_CHIP_HIT, 0x00ab); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_DRAWER_HIT, 0x00ac); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP, 0x00ad); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_IV, 0x00ae); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_CHIP_HIT, 0x00af); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT, 0x00b0); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_MODULE, 0x00b1); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_DRAWER, 0x00b2); +CPUMF_EVENT_ATTR(cf_z17, ICW_OFF_DRAWER, 0x00b3); +CPUMF_EVENT_ATTR(cf_z17, CYCLES_SAMETHRD, 0x00ca); +CPUMF_EVENT_ATTR(cf_z17, CYCLES_DIFFTHRD, 0x00cb); +CPUMF_EVENT_ATTR(cf_z17, INST_SAMETHRD, 0x00cc); +CPUMF_EVENT_ATTR(cf_z17, INST_DIFFTHRD, 0x00cd); +CPUMF_EVENT_ATTR(cf_z17, WRONG_BRANCH_PREDICTION, 0x00ce); +CPUMF_EVENT_ATTR(cf_z17, VX_BCD_EXECUTION_SLOTS, 0x00e1); +CPUMF_EVENT_ATTR(cf_z17, DECIMAL_INSTRUCTIONS, 0x00e2); +CPUMF_EVENT_ATTR(cf_z17, LAST_HOST_TRANSLATIONS, 0x00e8); +CPUMF_EVENT_ATTR(cf_z17, TX_NC_TABORT, 0x00f4); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_NO_SPECIAL, 0x00f5); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_SPECIAL, 0x00f6); +CPUMF_EVENT_ATTR(cf_z17, DFLT_ACCESS, 0x00f8); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CYCLES, 0x00fd); +CPUMF_EVENT_ATTR(cf_z17, SORTL, 0x0100); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CC, 0x0109); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CCFINISH, 0x010a); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INVOCATIONS, 0x010b); +CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPLETIONS, 0x010c); +CPUMF_EVENT_ATTR(cf_z17, NNPA_WAIT_LOCK, 0x010d); +CPUMF_EVENT_ATTR(cf_z17, NNPA_HOLD_LOCK, 0x010e); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_ONCHIP, 0x0110); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_OFFCHIP, 0x0111); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_DIFF, 0x0112); +CPUMF_EVENT_ATTR(cf_z17, NNPA_4K_PREFETCH, 0x0114); +CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPL_LOCK, 0x0115); +CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK, 0x0116); +CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO, 0x0117); +CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); +CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES), @@ -414,7 +490,7 @@ static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = { NULL, }; -static struct attribute *cpumcf_svn_67_pmu_event_attr[] __initdata = { +static struct attribute *cpumcf_svn_678_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS), CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES), CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS), @@ -779,6 +855,87 @@ static struct attribute *cpumcf_z16_pmu_event_attr[] __initdata = { NULL, }; +static struct attribute *cpumcf_z17_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z17, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, CRSTE_1MB_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_GPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z17, ITLB2_WRITES), + CPUMF_EVENT_PTR(cf_z17, ITLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z17, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z17, TLB2_ENGINES_BUSY), + CPUMF_EVENT_PTR(cf_z17, TX_C_TEND), + CPUMF_EVENT_PTR(cf_z17, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_z17, L1C_TLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_IV), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_IV), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z17, ICW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z17, CYCLES_SAMETHRD), + CPUMF_EVENT_PTR(cf_z17, CYCLES_DIFFTHRD), + CPUMF_EVENT_PTR(cf_z17, INST_SAMETHRD), + CPUMF_EVENT_PTR(cf_z17, INST_DIFFTHRD), + CPUMF_EVENT_PTR(cf_z17, WRONG_BRANCH_PREDICTION), + CPUMF_EVENT_PTR(cf_z17, VX_BCD_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z17, DECIMAL_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_z17, LAST_HOST_TRANSLATIONS), + CPUMF_EVENT_PTR(cf_z17, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_SPECIAL), + CPUMF_EVENT_PTR(cf_z17, DFLT_ACCESS), + CPUMF_EVENT_PTR(cf_z17, DFLT_CYCLES), + CPUMF_EVENT_PTR(cf_z17, SORTL), + CPUMF_EVENT_PTR(cf_z17, DFLT_CC), + CPUMF_EVENT_PTR(cf_z17, DFLT_CCFINISH), + CPUMF_EVENT_PTR(cf_z17, NNPA_INVOCATIONS), + CPUMF_EVENT_PTR(cf_z17, NNPA_COMPLETIONS), + CPUMF_EVENT_PTR(cf_z17, NNPA_WAIT_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_HOLD_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_ONCHIP), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_OFFCHIP), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_DIFF), + CPUMF_EVENT_PTR(cf_z17, NNPA_4K_PREFETCH), + CPUMF_EVENT_PTR(cf_z17, NNPA_COMPL_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO), + CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE), + CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE), + NULL, +}; + /* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ static struct attribute_group cpumcf_pmu_events_group = { @@ -859,7 +1016,7 @@ __init const struct attribute_group **cpumf_cf_event_group(void) if (ci.csvn >= 1 && ci.csvn <= 5) csvn = cpumcf_svn_12345_pmu_event_attr; else if (ci.csvn >= 6) - csvn = cpumcf_svn_67_pmu_event_attr; + csvn = cpumcf_svn_678_pmu_event_attr; /* Determine model-specific counter set(s) */ get_cpu_id(&cpu_id); @@ -892,6 +1049,10 @@ __init const struct attribute_group **cpumf_cf_event_group(void) case 0x3932: model = cpumcf_z16_pmu_event_attr; break; + case 0x9175: + case 0x9176: + model = cpumcf_z17_pmu_event_attr; + break; default: model = none; break; diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 5b765e3ccf0c..91469401f2c9 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -180,39 +180,27 @@ static int sf_buffer_available(struct cpu_hw_sf *cpuhw) */ static void free_sampling_buffer(struct sf_buffer *sfb) { - unsigned long *sdbt, *curr; - - if (!sfb->sdbt) - return; + unsigned long *sdbt, *curr, *head; sdbt = sfb->sdbt; - curr = sdbt; - + if (!sdbt) + return; + sfb->sdbt = NULL; /* Free the SDBT after all SDBs are processed... */ - while (1) { - if (!*curr || !sdbt) - break; - - /* Process table-link entries */ + head = sdbt; + curr = sdbt; + do { if (is_link_entry(curr)) { + /* Process table-link entries */ curr = get_next_sdbt(curr); - if (sdbt) - free_page((unsigned long)sdbt); - - /* If the origin is reached, sampling buffer is freed */ - if (curr == sfb->sdbt) - break; - else - sdbt = curr; + free_page((unsigned long)sdbt); + sdbt = curr; } else { /* Process SDB pointer */ - if (*curr) { - free_page((unsigned long)phys_to_virt(*curr)); - curr++; - } + free_page((unsigned long)phys_to_virt(*curr)); + curr++; } - } - + } while (curr != head); memset(sfb, 0, sizeof(*sfb)); } @@ -404,7 +392,7 @@ static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc) static void deallocate_buffers(struct cpu_hw_sf *cpuhw) { - if (cpuhw->sfb.sdbt) + if (sf_buffer_available(cpuhw)) free_sampling_buffer(&cpuhw->sfb); } @@ -559,16 +547,15 @@ static void setup_pmc_cpu(void *flags) { struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); + sf_disable(); switch (*((int *)flags)) { case PMC_INIT: memset(cpuhw, 0, sizeof(*cpuhw)); qsi(&cpuhw->qsi); cpuhw->flags |= PMU_F_RESERVED; - sf_disable(); break; case PMC_RELEASE: cpuhw->flags &= ~PMU_F_RESERVED; - sf_disable(); deallocate_buffers(cpuhw); break; } @@ -759,7 +746,6 @@ static int __hw_perf_event_init(struct perf_event *event) reserve_pmc_hardware(); refcount_set(&num_events, 1); } - mutex_unlock(&pmc_reserve_mutex); event->destroy = hw_perf_event_destroy; /* Access per-CPU sampling information (query sampling info) */ @@ -818,7 +804,7 @@ static int __hw_perf_event_init(struct perf_event *event) /* Use AUX buffer. No need to allocate it by ourself */ if (attr->config == PERF_EVENT_CPUM_SF_DIAG) - return 0; + goto out; /* Allocate the per-CPU sampling buffer using the CPU information * from the event. If the event is not pinned to a particular @@ -848,6 +834,7 @@ static int __hw_perf_event_init(struct perf_event *event) if (is_default_overflow_handler(event)) event->overflow_handler = cpumsf_output_event_pid; out: + mutex_unlock(&pmc_reserve_mutex); return err; } @@ -898,9 +885,6 @@ static int cpumsf_pmu_event_init(struct perf_event *event) event->attr.exclude_idle = 0; err = __hw_perf_event_init(event); - if (unlikely(err)) - if (event->destroy) - event->destroy(event); return err; } @@ -910,10 +894,14 @@ static void cpumsf_pmu_enable(struct pmu *pmu) struct hw_perf_event *hwc; int err; - if (cpuhw->flags & PMU_F_ENABLED) - return; - - if (cpuhw->flags & PMU_F_ERR_MASK) + /* + * Event must be + * - added/started on this CPU (PMU_F_IN_USE set) + * - and CPU must be available (PMU_F_RESERVED set) + * - and not already enabled (PMU_F_ENABLED not set) + * - and not in error condition (PMU_F_ERR_MASK not set) + */ + if (cpuhw->flags != (PMU_F_IN_USE | PMU_F_RESERVED)) return; /* Check whether to extent the sampling buffer. @@ -927,33 +915,27 @@ static void cpumsf_pmu_enable(struct pmu *pmu) * facility, but it can be fully re-enabled using sampling controls that * have been saved in cpumsf_pmu_disable(). */ - if (cpuhw->event) { - hwc = &cpuhw->event->hw; - if (!(SAMPL_DIAG_MODE(hwc))) { - /* - * Account number of overflow-designated - * buffer extents - */ - sfb_account_overflows(cpuhw, hwc); - extend_sampling_buffer(&cpuhw->sfb, hwc); - } - /* Rate may be adjusted with ioctl() */ - cpuhw->lsctl.interval = SAMPL_RATE(hwc); + hwc = &cpuhw->event->hw; + if (!(SAMPL_DIAG_MODE(hwc))) { + /* + * Account number of overflow-designated buffer extents + */ + sfb_account_overflows(cpuhw, hwc); + extend_sampling_buffer(&cpuhw->sfb, hwc); } + /* Rate may be adjusted with ioctl() */ + cpuhw->lsctl.interval = SAMPL_RATE(hwc); /* (Re)enable the PMU and sampling facility */ - cpuhw->flags |= PMU_F_ENABLED; - barrier(); - err = lsctl(&cpuhw->lsctl); if (err) { - cpuhw->flags &= ~PMU_F_ENABLED; pr_err("Loading sampling controls failed: op 1 err %i\n", err); return; } /* Load current program parameter */ lpp(&get_lowcore()->lpp); + cpuhw->flags |= PMU_F_ENABLED; } static void cpumsf_pmu_disable(struct pmu *pmu) @@ -996,7 +978,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu) cpuhw->flags &= ~PMU_F_ENABLED; } -/* perf_exclude_event() - Filter event +/* perf_event_exclude() - Filter event * @event: The perf event * @regs: pt_regs structure * @sde_regs: Sample-data-entry (sde) regs structure @@ -1005,7 +987,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu) * * Return non-zero if the event shall be excluded. */ -static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, +static int perf_event_exclude(struct perf_event *event, struct pt_regs *regs, struct perf_sf_sde_regs *sde_regs) { if (event->attr.exclude_user && user_mode(regs)) @@ -1088,12 +1070,9 @@ static int perf_push_sample(struct perf_event *event, data.tid_entry.pid = basic->hpp & LPP_PID_MASK; overflow = 0; - if (perf_exclude_event(event, ®s, sde_regs)) + if (perf_event_exclude(event, ®s, sde_regs)) goto out; - if (perf_event_overflow(event, &data, ®s)) { - overflow = 1; - event->pmu->stop(event, 0); - } + overflow = perf_event_overflow(event, &data, ®s); perf_event_update_userpage(event); out: return overflow; @@ -1191,8 +1170,8 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, static void hw_perf_event_update(struct perf_event *event, int flush_all) { unsigned long long event_overflow, sampl_overflow, num_sdb; - union hws_trailer_header old, prev, new; struct hw_perf_event *hwc = &event->hw; + union hws_trailer_header prev, new; struct hws_trailer_entry *te; unsigned long *sdbt, sdb; int done; @@ -1236,13 +1215,11 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) /* Reset trailer (using compare-double-and-swap) */ prev.val = READ_ONCE_ALIGNED_128(te->header.val); do { - old.val = prev.val; new.val = prev.val; new.f = 0; new.a = 1; new.overflow = 0; - prev.val = cmpxchg128(&te->header.val, old.val, new.val); - } while (prev.val != old.val); + } while (!try_cmpxchg128(&te->header.val, &prev.val, new.val)); /* Advance to next sample-data-block */ sdbt++; @@ -1408,16 +1385,15 @@ static int aux_output_begin(struct perf_output_handle *handle, static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, unsigned long long *overflow) { - union hws_trailer_header old, prev, new; + union hws_trailer_header prev, new; struct hws_trailer_entry *te; te = aux_sdb_trailer(aux, alert_index); prev.val = READ_ONCE_ALIGNED_128(te->header.val); do { - old.val = prev.val; new.val = prev.val; - *overflow = old.overflow; - if (old.f) { + *overflow = prev.overflow; + if (prev.f) { /* * SDB is already set by hardware. * Abort and try to set somewhere @@ -1427,8 +1403,7 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, } new.a = 1; new.overflow = 0; - prev.val = cmpxchg128(&te->header.val, old.val, new.val); - } while (prev.val != old.val); + } while (!try_cmpxchg128(&te->header.val, &prev.val, new.val)); return true; } @@ -1457,7 +1432,7 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, unsigned long long *overflow) { - union hws_trailer_header old, prev, new; + union hws_trailer_header prev, new; unsigned long i, range_scan, idx; unsigned long long orig_overflow; struct hws_trailer_entry *te; @@ -1489,17 +1464,15 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, te = aux_sdb_trailer(aux, idx); prev.val = READ_ONCE_ALIGNED_128(te->header.val); do { - old.val = prev.val; new.val = prev.val; - orig_overflow = old.overflow; + orig_overflow = prev.overflow; new.f = 0; new.overflow = 0; if (idx == aux->alert_mark) new.a = 1; else new.a = 0; - prev.val = cmpxchg128(&te->header.val, old.val, new.val); - } while (prev.val != old.val); + } while (!try_cmpxchg128(&te->header.val, &prev.val, new.val)); *overflow += orig_overflow; } @@ -1780,7 +1753,9 @@ static void cpumsf_pmu_stop(struct perf_event *event, int flags) event->hw.state |= PERF_HES_STOPPED; if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { - hw_perf_event_update(event, 1); + /* CPU hotplug off removes SDBs. No samples to extract. */ + if (cpuhw->flags & PMU_F_RESERVED) + hw_perf_event_update(event, 1); event->hw.state |= PERF_HES_UPTODATE; } perf_pmu_enable(event->pmu); @@ -1795,7 +1770,7 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags) if (cpuhw->flags & PMU_F_IN_USE) return -EAGAIN; - if (!SAMPL_DIAG_MODE(&event->hw) && !cpuhw->sfb.sdbt) + if (!SAMPL_DIAG_MODE(&event->hw) && !sf_buffer_available(cpuhw)) return -EINVAL; perf_pmu_disable(event->pmu); @@ -1957,13 +1932,12 @@ static void cpumf_measurement_alert(struct ext_code ext_code, /* Program alert request */ if (alert & CPU_MF_INT_SF_PRA) { - if (cpuhw->flags & PMU_F_IN_USE) + if (cpuhw->flags & PMU_F_IN_USE) { if (SAMPL_DIAG_MODE(&cpuhw->event->hw)) hw_collect_aux(cpuhw); else hw_perf_event_update(cpuhw->event, 0); - else - WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE)); + } } /* Report measurement alerts only for non-PRA codes */ @@ -1984,7 +1958,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code, /* Invalid sampling buffer entry */ if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) { - pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n", + pr_err("A sampling buffer entry is incorrect (alert=%#x)\n", alert); cpuhw->flags |= PMU_F_ERR_IBE; sf_disable(); diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 5fff629b1a89..2b9611c4718e 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -57,7 +57,7 @@ static unsigned long instruction_pointer_guest(struct pt_regs *regs) return sie_block(regs)->gpsw.addr; } -unsigned long perf_instruction_pointer(struct pt_regs *regs) +unsigned long perf_arch_instruction_pointer(struct pt_regs *regs) { return is_in_guest(regs) ? instruction_pointer_guest(regs) : instruction_pointer(regs); @@ -84,7 +84,7 @@ static unsigned long perf_misc_flags_sf(struct pt_regs *regs) return flags; } -unsigned long perf_misc_flags(struct pt_regs *regs) +unsigned long perf_arch_misc_flags(struct pt_regs *regs) { /* Check if the cpum_sf PMU has created the pt_regs structure. * In this case, perf misc flags can be easily extracted. Otherwise, @@ -228,5 +228,5 @@ ssize_t cpumf_events_sysfs_show(struct device *dev, struct perf_pmu_events_attr *pmu_attr; pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); - return sprintf(page, "event=0x%04llx\n", pmu_attr->id); + return sysfs_emit(page, "event=0x%04llx\n", pmu_attr->id); } diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index fa7325454266..63875270941b 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -478,7 +478,7 @@ static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump, if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.frag.size = rawsize; raw.frag.data = cpump->save; - perf_sample_save_raw_data(&data, &raw); + perf_sample_save_raw_data(&data, event, &raw); } overflow = perf_event_overflow(event, &data, ®s); @@ -518,7 +518,8 @@ static void paicrypt_have_samples(void) /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event CRYPTO_ALL is allowed. */ -static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { /* We started with a clean page on event installation. So read out * results on schedule_out and if page was dirty, save old values. diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 7f462bef1fc0..fd14d5ebccbc 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -503,7 +503,7 @@ static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump, if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.frag.size = rawsize; raw.frag.data = cpump->save; - perf_sample_save_raw_data(&data, &raw); + perf_sample_save_raw_data(&data, event, &raw); } overflow = perf_event_overflow(event, &data, ®s); @@ -542,7 +542,8 @@ static void paiext_have_samples(void) /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event NNPA_ALL is allowed. */ -static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { /* We started with a clean page on event installation. So read out * results on schedule_out and if page was dirty, save old values. diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 5ce9a795a0fe..11f70c1e2797 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/stop_machine.h> +#include <linux/cpufeature.h> #include <linux/bitops.h> #include <linux/kernel.h> #include <linux/random.h> @@ -19,6 +20,7 @@ #include <linux/cpu.h> #include <linux/smp.h> #include <asm/text-patching.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/facility.h> #include <asm/elf.h> @@ -72,7 +74,7 @@ void notrace stop_machine_yield(const struct cpumask *cpumask) this_cpu = smp_processor_id(); if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) { __this_cpu_write(cpu_relax_retry, 0); - cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false); + cpu = cpumask_next_wrap(this_cpu, cpumask); if (cpu >= nr_cpu_ids) return; if (arch_vcpu_is_preempted(cpu)) @@ -209,14 +211,14 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_DFP; /* huge page support */ - if (MACHINE_HAS_EDAT1) + if (cpu_has_edat1()) elf_hwcap |= HWCAP_HPAGE; /* 64-bit register support for 31-bit processes */ elf_hwcap |= HWCAP_HIGH_GPRS; /* transactional execution */ - if (MACHINE_HAS_TE) + if (machine_has_tx()) elf_hwcap |= HWCAP_TE; /* vector */ @@ -244,10 +246,10 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_NNPA; /* guarded storage */ - if (MACHINE_HAS_GS) + if (cpu_has_gs()) elf_hwcap |= HWCAP_GS; - if (MACHINE_HAS_PCI_MIO) + if (test_machine_feature(MFEATURE_PCI_MIO)) elf_hwcap |= HWCAP_PCI_MIO; /* virtualization support */ @@ -266,31 +268,35 @@ static int __init setup_elf_platform(void) add_device_randomness(&cpu_id, sizeof(cpu_id)); switch (cpu_id.machine) { default: /* Use "z10" as default. */ - strcpy(elf_platform, "z10"); + strscpy(elf_platform, "z10"); break; case 0x2817: case 0x2818: - strcpy(elf_platform, "z196"); + strscpy(elf_platform, "z196"); break; case 0x2827: case 0x2828: - strcpy(elf_platform, "zEC12"); + strscpy(elf_platform, "zEC12"); break; case 0x2964: case 0x2965: - strcpy(elf_platform, "z13"); + strscpy(elf_platform, "z13"); break; case 0x3906: case 0x3907: - strcpy(elf_platform, "z14"); + strscpy(elf_platform, "z14"); break; case 0x8561: case 0x8562: - strcpy(elf_platform, "z15"); + strscpy(elf_platform, "z15"); break; case 0x3931: case 0x3932: - strcpy(elf_platform, "z16"); + strscpy(elf_platform, "z16"); + break; + case 0x9175: + case 0x9176: + strscpy(elf_platform, "z17"); break; } return 0; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 1cfed8b710b8..e1240f6b29fa 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -7,10 +7,10 @@ * Martin Schwidefsky (schwidefsky@de.ibm.com) */ -#include "asm/ptrace.h" #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> +#include <linux/cpufeature.h> #include <linux/mm.h> #include <linux/smp.h> #include <linux/errno.h> @@ -31,6 +31,9 @@ #include <asm/unistd.h> #include <asm/runtime_instr.h> #include <asm/facility.h> +#include <asm/machine.h> +#include <asm/ptrace.h> +#include <asm/rwonce.h> #include <asm/fpu.h> #include "entry.h" @@ -60,7 +63,7 @@ void update_cr_regs(struct task_struct *task) cr0_new = cr0_old; cr2_new = cr2_old; /* Take care of the enable/disable of transactional execution. */ - if (MACHINE_HAS_TE) { + if (machine_has_tx()) { /* Set or clear transaction execution TXC bit 8. */ cr0_new.tcx = 1; if (task->thread.per_flags & PER_FLAG_NO_TE) @@ -75,7 +78,7 @@ void update_cr_regs(struct task_struct *task) } } /* Take care of enable/disable of guarded storage. */ - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { cr2_new.gse = 0; if (task->thread.gs_cb) cr2_new.gse = 1; @@ -470,18 +473,18 @@ long arch_ptrace(struct task_struct *child, long request, case PTRACE_GET_LAST_BREAK: return put_user(child->thread.last_break, (unsigned long __user *)data); case PTRACE_ENABLE_TE: - if (!MACHINE_HAS_TE) + if (!machine_has_tx()) return -EIO; child->thread.per_flags &= ~PER_FLAG_NO_TE; return 0; case PTRACE_DISABLE_TE: - if (!MACHINE_HAS_TE) + if (!machine_has_tx()) return -EIO; child->thread.per_flags |= PER_FLAG_NO_TE; child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND; return 0; case PTRACE_TE_ABORT_RAND: - if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE)) + if (!machine_has_tx() || (child->thread.per_flags & PER_FLAG_NO_TE)) return -EIO; switch (data) { case 0UL: @@ -1033,7 +1036,7 @@ static int s390_gs_cb_get(struct task_struct *target, { struct gs_cb *data = target->thread.gs_cb; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!data) return -ENODATA; @@ -1050,7 +1053,7 @@ static int s390_gs_cb_set(struct task_struct *target, struct gs_cb gs_cb = { }, *data = NULL; int rc; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!target->thread.gs_cb) { data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -1087,7 +1090,7 @@ static int s390_gs_bc_get(struct task_struct *target, { struct gs_cb *data = target->thread.gs_bc_cb; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!data) return -ENODATA; @@ -1101,7 +1104,7 @@ static int s390_gs_bc_set(struct task_struct *target, { struct gs_cb *data = target->thread.gs_bc_cb; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!data) { data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -1521,13 +1524,6 @@ static const char *gpr_names[NUM_GPRS] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", }; -unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset) -{ - if (offset >= NUM_GPRS) - return 0; - return regs->gprs[offset]; -} - int regs_query_register_offset(const char *name) { unsigned long offset; @@ -1547,29 +1543,3 @@ const char *regs_query_register_name(unsigned int offset) return NULL; return gpr_names[offset]; } - -static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) -{ - unsigned long ksp = kernel_stack_pointer(regs); - - return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1)); -} - -/** - * regs_get_kernel_stack_nth() - get Nth entry of the stack - * @regs:pt_regs which contains kernel stack pointer. - * @n:stack entry number. - * - * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which - * is specifined by @regs. If the @n th entry is NOT in the kernel stack, - * this returns 0. - */ -unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) -{ - unsigned long addr; - - addr = kernel_stack_pointer(regs) + n * sizeof(long); - if (!regs_within_kernel_stack(regs, addr)) - return 0; - return *(unsigned long *)addr; -} diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index a3fea683b227..f244c5560e7f 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -54,6 +54,7 @@ #include <asm/archrandom.h> #include <asm/boot_data.h> +#include <asm/machine.h> #include <asm/ipl.h> #include <asm/facility.h> #include <asm/smp.h> @@ -157,25 +158,29 @@ u64 __bootdata_preserved(stfle_fac_list[16]); EXPORT_SYMBOL(stfle_fac_list); struct oldmem_data __bootdata_preserved(oldmem_data); -unsigned long VMALLOC_START; +char __bootdata(boot_rb)[PAGE_SIZE * 2]; +bool __bootdata(boot_earlyprintk); +size_t __bootdata(boot_rb_off); +char __bootdata(bootdebug_filter)[128]; +bool __bootdata(bootdebug); + +unsigned long __bootdata_preserved(VMALLOC_START); EXPORT_SYMBOL(VMALLOC_START); -unsigned long VMALLOC_END; +unsigned long __bootdata_preserved(VMALLOC_END); EXPORT_SYMBOL(VMALLOC_END); -struct page *vmemmap; +struct page *__bootdata_preserved(vmemmap); EXPORT_SYMBOL(vmemmap); -unsigned long vmemmap_size; +unsigned long __bootdata_preserved(vmemmap_size); -unsigned long MODULES_VADDR; -unsigned long MODULES_END; +unsigned long __bootdata_preserved(MODULES_VADDR); +unsigned long __bootdata_preserved(MODULES_END); /* An array with a pointer to the lowcore of every CPU. */ struct lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); -DEFINE_STATIC_KEY_FALSE(cpu_has_bear); - /* * The Write Back bit position in the physaddr is given by the SLPC PCI. * Leaving the mask zero always uses write through which is safe @@ -245,7 +250,7 @@ static void __init conmode_default(void) char query_buffer[1024]; char *ptr; - if (MACHINE_IS_VM) { + if (machine_is_vm()) { cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL); console_devno = simple_strtoul(query_buffer + 5, NULL, 16); ptr = strstr(query_buffer, "SUBCHANNEL ="); @@ -283,7 +288,7 @@ static void __init conmode_default(void) SET_CONSOLE_SCLP; #endif } - } else if (MACHINE_IS_KVM) { + } else if (machine_is_kvm()) { if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE)) SET_CONSOLE_VT220; else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE)) @@ -359,36 +364,24 @@ void *restart_stack; unsigned long stack_alloc(void) { -#ifdef CONFIG_VMAP_STACK - void *ret; + void *stack; - ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP, - NUMA_NO_NODE, __builtin_return_address(0)); - kmemleak_not_leak(ret); - return (unsigned long)ret; -#else - return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); -#endif + stack = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP, + NUMA_NO_NODE, __builtin_return_address(0)); + kmemleak_not_leak(stack); + return (unsigned long)stack; } void stack_free(unsigned long stack) { -#ifdef CONFIG_VMAP_STACK - vfree((void *) stack); -#else - free_pages(stack, THREAD_SIZE_ORDER); -#endif + vfree((void *)stack); } static unsigned long __init stack_alloc_early(void) { unsigned long stack; - stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); - if (!stack) { - panic("%s: Failed to allocate %lu bytes align=0x%lx\n", - __func__, THREAD_SIZE, THREAD_SIZE); - } + stack = (unsigned long)memblock_alloc_or_panic(THREAD_SIZE, THREAD_SIZE); return stack; } @@ -421,7 +414,6 @@ static void __init setup_lowcore(void) lc->clock_comparator = clock_comparator_max; lc->current_task = (unsigned long)&init_task; lc->lpp = LPP_MAGIC; - lc->machine_flags = get_lowcore()->machine_flags; lc->preempt_count = get_lowcore()->preempt_count; nmi_alloc_mcesa_early(&lc->mcesad); lc->sys_enter_timer = get_lowcore()->sys_enter_timer; @@ -512,10 +504,7 @@ static void __init setup_resources(void) bss_resource.end = __pa_symbol(__bss_stop) - 1; for_each_mem_range(i, &start, &end) { - res = memblock_alloc(sizeof(*res), 8); - if (!res) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(*res), 8); + res = memblock_alloc_or_panic(sizeof(*res), 8); res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; res->name = "System RAM"; @@ -534,10 +523,7 @@ static void __init setup_resources(void) std_res->start > res->end) continue; if (std_res->end > res->end) { - sub_res = memblock_alloc(sizeof(*sub_res), 8); - if (!sub_res) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(*sub_res), 8); + sub_res = memblock_alloc_or_panic(sizeof(*sub_res), 8); *sub_res = *std_res; sub_res->end = res->end; std_res->start = res->end + 1; @@ -664,7 +650,7 @@ static void __init reserve_crashkernel(void) return; } - if (!oldmem_data.start && MACHINE_IS_VM) + if (!oldmem_data.start && machine_is_vm()) diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; @@ -704,7 +690,7 @@ static void __init reserve_physmem_info(void) { unsigned long addr, size; - if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size)) memblock_reserve(addr, size); } @@ -712,7 +698,7 @@ static void __init free_physmem_info(void) { unsigned long addr, size; - if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size)) memblock_phys_free(addr, size); } @@ -742,7 +728,7 @@ static void __init reserve_lowcore(void) void *lowcore_end = lowcore_start + sizeof(struct lowcore); void *start, *end; - if ((void *)__identity_base < lowcore_end) { + if (absolute_pointer(__identity_base) < lowcore_end) { start = max(lowcore_start, (void *)__identity_base); end = min(lowcore_end, (void *)(__identity_base + ident_map_size)); memblock_reserve(__pa(start), __pa(end)); @@ -824,9 +810,7 @@ static void __init setup_randomness(void) { struct sysinfo_3_2_2 *vmms; - vmms = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - if (!vmms) - panic("Failed to allocate memory for sysinfo structure\n"); + vmms = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE); if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count); memblock_free(vmms, PAGE_SIZE); @@ -886,6 +870,23 @@ static void __init log_component_list(void) } /* + * Print avoiding interpretation of % in buf and taking bootdebug option + * into consideration. + */ +static void __init print_rb_entry(const char *buf) +{ + char fmt[] = KERN_SOH "0boot: %s"; + int level = printk_get_level(buf); + + buf = skip_timestamp(printk_skip_level(buf)); + if (level == KERN_DEBUG[1] && (!bootdebug || !bootdebug_filter_match(buf))) + return; + + fmt[1] = level; + printk(fmt, buf); +} + +/* * Setup function called from init/main.c just after the banner * was printed. */ @@ -895,17 +896,20 @@ void __init setup_arch(char **cmdline_p) /* * print what head.S has found out about the machine */ - if (MACHINE_IS_VM) + if (machine_is_vm()) pr_info("Linux is running as a z/VM " "guest operating system in 64-bit mode\n"); - else if (MACHINE_IS_KVM) + else if (machine_is_kvm()) pr_info("Linux is running under KVM in 64-bit mode\n"); - else if (MACHINE_IS_LPAR) + else if (machine_is_lpar()) pr_info("Linux is running natively in 64-bit mode\n"); else pr_info("Linux is running as a guest in 64-bit mode\n"); + /* Print decompressor messages if not already printed */ + if (!boot_earlyprintk) + boot_rb_foreach(print_rb_entry); - if (have_relocated_lowcore()) + if (machine_has_relocated_lowcore()) pr_info("Lowcore relocated to 0x%px\n", get_lowcore()); log_component_list(); @@ -955,7 +959,7 @@ void __init setup_arch(char **cmdline_p) setup_uv(); dma_contiguous_reserve(ident_map_size); vmcp_cma_reserve(); - if (MACHINE_HAS_EDAT2) + if (cpu_has_edat2()) hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); reserve_crashkernel(); @@ -975,10 +979,7 @@ void __init setup_arch(char **cmdline_p) numa_setup(); smp_detect_cpus(); topology_init_early(); - - if (test_facility(193)) - static_branch_enable(&cpu_has_bear); - + setup_protection_map(); /* * Create kernel page tables. */ @@ -1006,3 +1007,8 @@ void __init setup_arch(char **cmdline_p) /* Add system specific data to the random pool */ setup_randomness(); } + +void __init arch_cpu_finalize_init(void) +{ + sclp_init(); +} diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 4df56fdb2488..81f12bb77f62 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -18,6 +18,7 @@ #define KMSG_COMPONENT "cpu" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/cpufeature.h> #include <linux/workqueue.h> #include <linux/memblock.h> #include <linux/export.h> @@ -38,6 +39,7 @@ #include <linux/kprobes.h> #include <asm/access-regs.h> #include <asm/asm-offsets.h> +#include <asm/machine.h> #include <asm/ctlreg.h> #include <asm/pfault.h> #include <asm/diag.h> @@ -97,13 +99,6 @@ __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS]; static unsigned int smp_max_threads __initdata = -1U; cpumask_t cpu_setup_mask; -static int __init early_nosmt(char *s) -{ - smp_max_threads = 1; - return 0; -} -early_param("nosmt", early_nosmt); - static int __init early_smt(char *s) { get_option(&s, &smp_max_threads); @@ -263,13 +258,12 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->percpu_offset = __per_cpu_offset[cpu]; lc->kernel_asce = get_lowcore()->kernel_asce; lc->user_asce = s390_invalid_asce; - lc->machine_flags = get_lowcore()->machine_flags; lc->user_timer = lc->system_timer = lc->steal_timer = lc->avg_steal_timer = 0; abs_lc = get_abs_lowcore(); memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area)); put_abs_lowcore(abs_lc); - lc->cregs_save_area[1] = lc->kernel_asce; + lc->cregs_save_area[1] = lc->user_asce; lc->cregs_save_area[7] = lc->user_asce; save_access_regs((unsigned int *) lc->access_regs_save_area); arch_spin_lock_setup(cpu); @@ -416,7 +410,7 @@ EXPORT_SYMBOL(arch_vcpu_is_preempted); void notrace smp_yield_cpu(int cpu) { - if (!MACHINE_HAS_DIAG9C) + if (!machine_has_diag9c()) return; diag_stat_inc_norecursion(DIAG_STAT_X09C); asm volatile("diag %0,0,0x9c" @@ -561,10 +555,10 @@ int smp_store_status(int cpu) if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; - if (!cpu_has_vx() && !MACHINE_HAS_GS) + if (!cpu_has_vx() && !cpu_has_gs()) return 0; pa = lc->mcesad & MCESA_ORIGIN_MASK; - if (MACHINE_HAS_GS) + if (cpu_has_gs()) pa |= lc->mcesad & MCESA_LC_MASK; if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) @@ -574,7 +568,7 @@ int smp_store_status(int cpu) /* * Collect CPU state of the previous, crashed system. - * There are four cases: + * There are three cases: * 1) standard zfcp/nvme dump * condition: OLDMEM_BASE == NULL && is_ipl_type_dump() == true * The state for all CPUs except the boot CPU needs to be collected @@ -587,16 +581,16 @@ int smp_store_status(int cpu) * with sigp stop-and-store-status. The firmware or the boot-loader * stored the registers of the boot CPU in the absolute lowcore in the * memory of the old system. - * 3) kdump and the old kernel did not store the CPU state, - * or stand-alone kdump for DASD - * condition: OLDMEM_BASE != NULL && !is_kdump_kernel() + * 3) kdump or stand-alone kdump for DASD + * condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == false * The state for all CPUs except the boot CPU needs to be collected * with sigp stop-and-store-status. The kexec code or the boot-loader * stored the registers of the boot CPU in the memory of the old system. - * 4) kdump and the old kernel stored the CPU state - * condition: OLDMEM_BASE != NULL && is_kdump_kernel() - * This case does not exist for s390 anymore, setup_arch explicitly - * deactivates the elfcorehdr= kernel parameter + * + * Note that the legacy kdump mode where the old kernel stored the CPU states + * does no longer exist: setup_arch() explicitly deactivates the elfcorehdr= + * kernel parameter. The is_kdump_kernel() implementation on s390 is independent + * of the elfcorehdr= parameter. */ static bool dump_available(void) { @@ -611,9 +605,7 @@ void __init smp_save_dump_ipl_cpu(void) if (!dump_available()) return; sa = save_area_alloc(true); - regs = memblock_alloc(512, 8); - if (!sa || !regs) - panic("could not allocate memory for boot CPU save area\n"); + regs = memblock_alloc_or_panic(512, 8); copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512); save_area_add_regs(sa, regs); memblock_free(regs, 512); @@ -646,8 +638,6 @@ void __init smp_save_dump_secondary_cpus(void) SIGP_CC_NOT_OPERATIONAL) continue; sa = save_area_alloc(false); - if (!sa) - panic("could not allocate memory for save area\n"); __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page)); save_area_add_regs(sa, page); if (cpu_has_vx()) { @@ -792,10 +782,7 @@ void __init smp_detect_cpus(void) u16 address; /* Get CPU information */ - info = memblock_alloc(sizeof(*info), 8); - if (!info) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(*info), 8); + info = memblock_alloc_or_panic(sizeof(*info), 8); smp_get_core_info(info, 1); /* Find boot CPU type */ if (sclp.has_core_type) { @@ -814,6 +801,7 @@ void __init smp_detect_cpus(void) mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp; mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1; pcpu_set_smt(mtid); + cpu_smt_set_num_threads(smp_cpu_mtid + 1, smp_cpu_mtid + 1); /* Print number of CPUs */ c_cpus = s_cpus = 0; @@ -1011,7 +999,7 @@ static ssize_t cpu_configure_show(struct device *dev, ssize_t count; mutex_lock(&smp_cpu_state_mutex); - count = sprintf(buf, "%d\n", per_cpu(pcpu_devices, dev->id).state); + count = sysfs_emit(buf, "%d\n", per_cpu(pcpu_devices, dev->id).state); mutex_unlock(&smp_cpu_state_mutex); return count; } @@ -1083,7 +1071,7 @@ static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); static ssize_t show_cpu_address(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", per_cpu(pcpu_devices, dev->id).address); + return sysfs_emit(buf, "%d\n", per_cpu(pcpu_devices, dev->id).address); } static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 9f59837d159e..b153a395f46d 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -9,6 +9,7 @@ #include <linux/stacktrace.h> #include <linux/uaccess.h> #include <linux/compat.h> +#include <asm/asm-offsets.h> #include <asm/stacktrace.h> #include <asm/unwind.h> #include <asm/kprobes.h> @@ -151,7 +152,7 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo break; } if (!store_ip(consume_entry, cookie, entry, perf, ip)) - return; + break; first = false; } pagefault_enable(); diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c index 1cf2ad04f8e9..d40f0b983e74 100644 --- a/arch/s390/kernel/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -17,6 +17,7 @@ #include <asm/ebcdic.h> #include <asm/facility.h> #include <asm/sthyi.h> +#include <asm/asm.h> #include "entry.h" #define DED_WEIGHT 0xffff @@ -425,13 +426,12 @@ static int sthyi(u64 vaddr, u64 *rc) asm volatile( ".insn rre,0xB2560000,%[r1],%[r2]\n" - "ipm %[cc]\n" - "srl %[cc],28\n" - : [cc] "=&d" (cc), [r2] "+&d" (r2.pair) + CC_IPM(cc) + : CC_OUT(cc, cc), [r2] "+&d" (r2.pair) : [r1] "d" (r1.pair) - : "memory", "cc"); + : CC_CLOBBER_LIST("memory")); *rc = r2.odd; - return cc; + return CC_TRANSFORM(cc); } static int fill_dst(void *dst, u64 *rc) diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index 5ec28028315b..4fee74553ca2 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c @@ -12,6 +12,7 @@ * platform. */ +#include <linux/cpufeature.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/mm.h> @@ -81,25 +82,35 @@ SYSCALL_DEFINE0(ni_syscall) return -ENOSYS; } -static void do_syscall(struct pt_regs *regs) +void noinstr __do_syscall(struct pt_regs *regs, int per_trap) { unsigned long nr; + add_random_kstack_offset(); + enter_from_user_mode(regs); + regs->psw = get_lowcore()->svc_old_psw; + regs->int_code = get_lowcore()->svc_int_code; + update_timer_sys(); + if (cpu_has_bear()) + current->thread.last_break = regs->last_break; + local_irq_enable(); + regs->orig_gpr2 = regs->gprs[2]; + if (unlikely(per_trap)) + set_thread_flag(TIF_PER_TRAP); + regs->flags = 0; + set_pt_regs_flag(regs, PIF_SYSCALL); nr = regs->int_code & 0xffff; - if (!nr) { + if (likely(!nr)) { nr = regs->gprs[1] & 0xffff; regs->int_code &= ~0xffffUL; regs->int_code |= nr; } - regs->gprs[2] = nr; - if (nr == __NR_restart_syscall && !(current->restart_block.arch_data & 1)) { regs->psw.addr = current->restart_block.arch_data; current->restart_block.arch_data = 1; } nr = syscall_enter_from_user_mode_work(regs, nr); - /* * In the s390 ptrace ABI, both the syscall number and the return value * use gpr2. However, userspace puts the syscall number either in the @@ -107,37 +118,11 @@ static void do_syscall(struct pt_regs *regs) * work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here * and if set, the syscall will be skipped. */ - if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET))) goto out; regs->gprs[2] = -ENOSYS; - if (likely(nr >= NR_syscalls)) - goto out; - do { + if (likely(nr < NR_syscalls)) regs->gprs[2] = current->thread.sys_call_table[nr](regs); - } while (test_and_clear_pt_regs_flag(regs, PIF_EXECVE_PGSTE_RESTART)); out: - syscall_exit_to_user_mode_work(regs); -} - -void noinstr __do_syscall(struct pt_regs *regs, int per_trap) -{ - add_random_kstack_offset(); - enter_from_user_mode(regs); - regs->psw = get_lowcore()->svc_old_psw; - regs->int_code = get_lowcore()->svc_int_code; - update_timer_sys(); - if (static_branch_likely(&cpu_has_bear)) - current->thread.last_break = regs->last_break; - - local_irq_enable(); - regs->orig_gpr2 = regs->gprs[2]; - - if (per_trap) - set_thread_flag(TIF_PER_TRAP); - - regs->flags = 0; - set_pt_regs_flag(regs, PIF_SYSCALL); - do_syscall(regs); - exit_to_user_mode(); + syscall_exit_to_user_mode(regs); } diff --git a/arch/s390/kernel/syscalls/Makefile b/arch/s390/kernel/syscalls/Makefile index 1bb78b9468e8..c5d958a09ff4 100644 --- a/arch/s390/kernel/syscalls/Makefile +++ b/arch/s390/kernel/syscalls/Makefile @@ -12,7 +12,7 @@ kapi-hdrs-y := $(kapi)/unistd_nr.h uapi-hdrs-y := $(uapi)/unistd_32.h uapi-hdrs-y += $(uapi)/unistd_64.h -targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y) $(uapi-hdrs-y)) +targets += $(addprefix ../../../../,$(gen-y) $(kapi-hdrs-y) $(uapi-hdrs-y)) PHONY += kapi uapi @@ -23,23 +23,26 @@ uapi: $(uapi-hdrs-y) # Create output directory if not already present $(shell mkdir -p $(uapi) $(kapi)) -filechk_syshdr = $(CONFIG_SHELL) '$(systbl)' -H -a $(syshdr_abi_$(basetarget)) -f "$2" < $< +quiet_cmd_syshdr = SYSHDR $@ + cmd_syshdr = $(CONFIG_SHELL) '$(systbl)' -H -a $(syshdr_abi_$(basetarget)) -f "$@" < $< > $@ -filechk_sysnr = $(CONFIG_SHELL) '$(systbl)' -N -a $(sysnr_abi_$(basetarget)) < $< +quiet_cmd_sysnr = SYSNR $@ + cmd_sysnr = $(CONFIG_SHELL) '$(systbl)' -N -a $(sysnr_abi_$(basetarget)) < $< > $@ -filechk_syscalls = $(CONFIG_SHELL) '$(systbl)' -S < $< +quiet_cmd_syscalls = SYSTBL $@ + cmd_syscalls = $(CONFIG_SHELL) '$(systbl)' -S < $< > $@ syshdr_abi_unistd_32 := common,32 -$(uapi)/unistd_32.h: $(syscall) FORCE - $(call filechk,syshdr,$@) +$(uapi)/unistd_32.h: $(syscall) $(systbl) FORCE + $(call if_changed,syshdr) syshdr_abi_unistd_64 := common,64 -$(uapi)/unistd_64.h: $(syscall) FORCE - $(call filechk,syshdr,$@) +$(uapi)/unistd_64.h: $(syscall) $(systbl) FORCE + $(call if_changed,syshdr) -$(kapi)/syscall_table.h: $(syscall) FORCE - $(call filechk,syscalls) +$(kapi)/syscall_table.h: $(syscall) $(systbl) FORCE + $(call if_changed,syscalls) sysnr_abi_unistd_nr := common,32,64 -$(kapi)/unistd_nr.h: $(syscall) FORCE - $(call filechk,sysnr) +$(kapi)/unistd_nr.h: $(syscall) $(systbl) FORCE + $(call if_changed,sysnr) diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 01071182763e..a4569b96ef06 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -465,3 +465,8 @@ 460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules 462 common mseal sys_mseal sys_mseal +463 common setxattrat sys_setxattrat sys_setxattrat +464 common getxattrat sys_getxattrat sys_getxattrat +465 common listxattrat sys_listxattrat sys_listxattrat +466 common removexattrat sys_removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 88055f58fbda..1ea84e942bd4 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -5,6 +5,7 @@ * Martin Schwidefsky <schwidefsky@de.ibm.com>, */ +#include <linux/cpufeature.h> #include <linux/debugfs.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -15,54 +16,17 @@ #include <linux/export.h> #include <linux/slab.h> #include <asm/asm-extable.h> +#include <asm/machine.h> #include <asm/ebcdic.h> #include <asm/debug.h> #include <asm/sysinfo.h> #include <asm/cpcmd.h> #include <asm/topology.h> #include <asm/fpu.h> +#include <asm/asm.h> int topology_max_mnest; -static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl) -{ - int r0 = (fc << 28) | sel1; - int rc = 0; - - asm volatile( - " lr 0,%[r0]\n" - " lr 1,%[r1]\n" - " stsi 0(%[sysinfo])\n" - "0: jz 2f\n" - "1: lhi %[rc],%[retval]\n" - "2: lr %[r0],0\n" - EX_TABLE(0b, 1b) - : [r0] "+d" (r0), [rc] "+d" (rc) - : [r1] "d" (sel2), - [sysinfo] "a" (sysinfo), - [retval] "K" (-EOPNOTSUPP) - : "cc", "0", "1", "memory"); - *lvl = ((unsigned int) r0) >> 28; - return rc; -} - -/* - * stsi - store system information - * - * Returns the current configuration level if function code 0 was specified. - * Otherwise returns 0 on success or a negative value on error. - */ -int stsi(void *sysinfo, int fc, int sel1, int sel2) -{ - int lvl, rc; - - rc = __stsi(sysinfo, fc, sel1, sel2, &lvl); - if (rc) - return rc; - return fc ? 0 : lvl; -} -EXPORT_SYMBOL(stsi); - #ifdef CONFIG_PROC_FS static bool convert_ext_name(unsigned char encoding, char *name, size_t len) @@ -154,7 +118,7 @@ static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info) int i; seq_putc(m, '\n'); - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return; if (stsi(info, 15, 1, topology_max_mnest)) return; @@ -415,7 +379,7 @@ static struct service_level service_level_vm = { static __init int create_proc_service_level(void) { proc_create_seq("service_levels", 0, NULL, &service_level_seq_ops); - if (MACHINE_IS_VM) + if (machine_is_vm()) register_service_level(&service_level_vm); return 0; } @@ -559,7 +523,7 @@ static __init int stsi_init_debugfs(void) sf = &stsi_file[i]; debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops); } - if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && MACHINE_HAS_TOPOLOGY) { + if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && cpu_has_topology()) { char link_to[10]; sprintf(link_to, "15_1_%d", topology_mnest_limit()); diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S index c0a70efa2426..26f2981aa09e 100644 --- a/arch/s390/kernel/text_amode31.S +++ b/arch/s390/kernel/text_amode31.S @@ -18,8 +18,7 @@ * affects a few functions that are not performance-relevant. */ .macro BR_EX_AMODE31_r14 - larl %r1,0f - ex 0,0(%r1) + exrl 0,0f j . 0: br %r14 .endm diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index b713effe0579..fed17d407a44 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -36,7 +36,6 @@ #include <linux/profile.h> #include <linux/timex.h> #include <linux/notifier.h> -#include <linux/timekeeper_internal.h> #include <linux/clockchips.h> #include <linux/gfp.h> #include <linux/kprobes.h> @@ -55,10 +54,10 @@ #include <asm/cio.h> #include "entry.h" -union tod_clock tod_clock_base __section(".data"); +union tod_clock __bootdata_preserved(tod_clock_base); EXPORT_SYMBOL_GPL(tod_clock_base); -u64 clock_comparator_max = -1ULL; +u64 __bootdata_preserved(clock_comparator_max); EXPORT_SYMBOL_GPL(clock_comparator_max); static DEFINE_PER_CPU(struct clock_event_device, comparators); @@ -80,12 +79,10 @@ void __init time_early_init(void) { struct ptff_qto qto; struct ptff_qui qui; - int cs; /* Initialize TOD steering parameters */ tod_steering_end = tod_clock_base.tod; - for (cs = 0; cs < CS_BASES; cs++) - vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; + vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end; if (!test_facility(28)) return; @@ -255,6 +252,7 @@ static struct clocksource clocksource_tod = { .shift = 24, .flags = CLOCK_SOURCE_IS_CONTINUOUS, .vdso_clock_mode = VDSO_CLOCKMODE_TOD, + .id = CSID_S390_TOD, }; struct clocksource * __init clocksource_default_clock(void) @@ -373,7 +371,6 @@ static void clock_sync_global(long delta) { unsigned long now, adj; struct ptff_qto qto; - int cs; /* Fixup the monotonic sched clock. */ tod_clock_base.eitod += delta; @@ -389,10 +386,8 @@ static void clock_sync_global(long delta) panic("TOD clock sync offset %li is too large to drift\n", tod_steering_delta); tod_steering_end = now + (abs(tod_steering_delta) << 15); - for (cs = 0; cs < CS_BASES; cs++) { - vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; - vdso_data[cs].arch_data.tod_steering_delta = tod_steering_delta; - } + vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end; + vdso_k_time_data->arch_data.tod_steering_delta = tod_steering_delta; /* Update LPAR offset. */ if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0) @@ -468,6 +463,12 @@ static void __init stp_reset(void) } } +bool stp_enabled(void) +{ + return test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags) && stp_online; +} +EXPORT_SYMBOL(stp_enabled); + static void stp_timeout(struct timer_list *unused) { queue_work(time_sync_wq, &stp_work); @@ -656,12 +657,12 @@ static void stp_check_leap(void) if (ret < 0) pr_err("failed to set leap second flags\n"); /* arm Timer to clear leap second flags */ - mod_timer(&stp_timer, jiffies + msecs_to_jiffies(14400 * MSEC_PER_SEC)); + mod_timer(&stp_timer, jiffies + secs_to_jiffies(14400)); } else { /* The day the leap second is scheduled for hasn't been reached. Retry * in one hour. */ - mod_timer(&stp_timer, jiffies + msecs_to_jiffies(3600 * MSEC_PER_SEC)); + mod_timer(&stp_timer, jiffies + secs_to_jiffies(3600)); } } @@ -679,7 +680,7 @@ static void stp_work_fn(struct work_struct *work) if (!stp_online) { chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL); - del_timer_sync(&stp_timer); + timer_delete_sync(&stp_timer); goto out_unlock; } @@ -729,8 +730,8 @@ static ssize_t ctn_id_show(struct device *dev, mutex_lock(&stp_mutex); if (stpinfo_valid()) - ret = sprintf(buf, "%016lx\n", - *(unsigned long *) stp_info.ctnid); + ret = sysfs_emit(buf, "%016lx\n", + *(unsigned long *)stp_info.ctnid); mutex_unlock(&stp_mutex); return ret; } @@ -745,7 +746,7 @@ static ssize_t ctn_type_show(struct device *dev, mutex_lock(&stp_mutex); if (stpinfo_valid()) - ret = sprintf(buf, "%i\n", stp_info.ctn); + ret = sysfs_emit(buf, "%i\n", stp_info.ctn); mutex_unlock(&stp_mutex); return ret; } @@ -760,7 +761,7 @@ static ssize_t dst_offset_show(struct device *dev, mutex_lock(&stp_mutex); if (stpinfo_valid() && (stp_info.vbits & 0x2000)) - ret = sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); + ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.dsto); mutex_unlock(&stp_mutex); return ret; } @@ -775,7 +776,7 @@ static ssize_t leap_seconds_show(struct device *dev, mutex_lock(&stp_mutex); if (stpinfo_valid() && (stp_info.vbits & 0x8000)) - ret = sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); + ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.leaps); mutex_unlock(&stp_mutex); return ret; } @@ -801,11 +802,11 @@ static ssize_t leap_seconds_scheduled_show(struct device *dev, return ret; if (!stzi.lsoib.p) - return sprintf(buf, "0,0\n"); + return sysfs_emit(buf, "0,0\n"); - return sprintf(buf, "%lu,%d\n", - tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC, - stzi.lsoib.nlso - stzi.lsoib.also); + return sysfs_emit(buf, "%lu,%d\n", + tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC, + stzi.lsoib.nlso - stzi.lsoib.also); } static DEVICE_ATTR_RO(leap_seconds_scheduled); @@ -818,7 +819,7 @@ static ssize_t stratum_show(struct device *dev, mutex_lock(&stp_mutex); if (stpinfo_valid()) - ret = sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); + ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.stratum); mutex_unlock(&stp_mutex); return ret; } @@ -833,7 +834,7 @@ static ssize_t time_offset_show(struct device *dev, mutex_lock(&stp_mutex); if (stpinfo_valid() && (stp_info.vbits & 0x0800)) - ret = sprintf(buf, "%i\n", (int) stp_info.tto); + ret = sysfs_emit(buf, "%i\n", (int)stp_info.tto); mutex_unlock(&stp_mutex); return ret; } @@ -848,7 +849,7 @@ static ssize_t time_zone_offset_show(struct device *dev, mutex_lock(&stp_mutex); if (stpinfo_valid() && (stp_info.vbits & 0x4000)) - ret = sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); + ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.tzo); mutex_unlock(&stp_mutex); return ret; } @@ -863,7 +864,7 @@ static ssize_t timing_mode_show(struct device *dev, mutex_lock(&stp_mutex); if (stpinfo_valid()) - ret = sprintf(buf, "%i\n", stp_info.tmd); + ret = sysfs_emit(buf, "%i\n", stp_info.tmd); mutex_unlock(&stp_mutex); return ret; } @@ -878,7 +879,7 @@ static ssize_t timing_state_show(struct device *dev, mutex_lock(&stp_mutex); if (stpinfo_valid()) - ret = sprintf(buf, "%i\n", stp_info.tst); + ret = sysfs_emit(buf, "%i\n", stp_info.tst); mutex_unlock(&stp_mutex); return ret; } @@ -889,7 +890,7 @@ static ssize_t online_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%i\n", stp_online); + return sysfs_emit(buf, "%i\n", stp_online); } static ssize_t online_store(struct device *dev, diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 813e5da9a973..3df048e190b1 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -6,6 +6,7 @@ #define KMSG_COMPONENT "cpu" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/cpufeature.h> #include <linux/workqueue.h> #include <linux/memblock.h> #include <linux/uaccess.h> @@ -26,6 +27,7 @@ #include <linux/node.h> #include <asm/hiperdispatch.h> #include <asm/sysinfo.h> +#include <asm/asm.h> #define PTF_HORIZONTAL (0UL) #define PTF_VERTICAL (1UL) @@ -224,22 +226,22 @@ static void topology_update_polarization_simple(void) static int ptf(unsigned long fc) { - int rc; + int cc; asm volatile( - " .insn rre,0xb9a20000,%1,%1\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (rc) - : "d" (fc) : "cc"); - return rc; + " .insn rre,0xb9a20000,%[fc],%[fc]\n" + CC_IPM(cc) + : CC_OUT(cc, cc) + : [fc] "d" (fc) + : CC_CLOBBER); + return CC_TRANSFORM(cc); } int topology_set_cpu_management(int fc) { int cpu, rc; - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return -EOPNOTSUPP; if (fc) rc = ptf(PTF_VERTICAL); @@ -314,13 +316,13 @@ static int __arch_update_cpu_topology(void) hd_status = 0; rc = 0; mutex_lock(&smp_cpu_state_mutex); - if (MACHINE_HAS_TOPOLOGY) { + if (cpu_has_topology()) { rc = 1; store_topology(info); tl_to_masks(info); } update_cpu_masks(); - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) topology_update_polarization_simple(); if (cpu_management == 1) hd_status = hd_enable_hiperdispatch(); @@ -370,12 +372,12 @@ static void set_topology_timer(void) if (atomic_add_unless(&topology_poll, -1, 0)) mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100)); else - mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC)); + mod_timer(&topology_timer, jiffies + secs_to_jiffies(60)); } void topology_expect_change(void) { - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return; /* This is racy, but it doesn't matter since it is just a heuristic. * Worst case is that we poll in a higher frequency for a bit longer. @@ -412,7 +414,7 @@ static ssize_t dispatching_show(struct device *dev, ssize_t count; mutex_lock(&smp_cpu_state_mutex); - count = sprintf(buf, "%d\n", cpu_management); + count = sysfs_emit(buf, "%d\n", cpu_management); mutex_unlock(&smp_cpu_state_mutex); return count; } @@ -443,19 +445,19 @@ static ssize_t cpu_polarization_show(struct device *dev, mutex_lock(&smp_cpu_state_mutex); switch (smp_cpu_get_polarization(cpu)) { case POLARIZATION_HRZ: - count = sprintf(buf, "horizontal\n"); + count = sysfs_emit(buf, "horizontal\n"); break; case POLARIZATION_VL: - count = sprintf(buf, "vertical:low\n"); + count = sysfs_emit(buf, "vertical:low\n"); break; case POLARIZATION_VM: - count = sprintf(buf, "vertical:medium\n"); + count = sysfs_emit(buf, "vertical:medium\n"); break; case POLARIZATION_VH: - count = sprintf(buf, "vertical:high\n"); + count = sysfs_emit(buf, "vertical:high\n"); break; default: - count = sprintf(buf, "unknown\n"); + count = sysfs_emit(buf, "unknown\n"); break; } mutex_unlock(&smp_cpu_state_mutex); @@ -479,7 +481,7 @@ static ssize_t cpu_dedicated_show(struct device *dev, ssize_t count; mutex_lock(&smp_cpu_state_mutex); - count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu)); + count = sysfs_emit(buf, "%d\n", topology_cpu_dedicated(cpu)); mutex_unlock(&smp_cpu_state_mutex); return count; } @@ -499,7 +501,7 @@ int topology_cpu_init(struct cpu *cpu) int rc; rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); - if (rc || !MACHINE_HAS_TOPOLOGY) + if (rc || !cpu_has_topology()) return rc; rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group); if (rc) @@ -547,33 +549,38 @@ static void __init alloc_masks(struct sysinfo_15_1_x *info, nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; nr_masks = max(nr_masks, 1); for (i = 0; i < nr_masks; i++) { - mask->next = memblock_alloc(sizeof(*mask->next), 8); - if (!mask->next) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(*mask->next), 8); + mask->next = memblock_alloc_or_panic(sizeof(*mask->next), 8); mask = mask->next; } } +static int __init detect_polarization(union topology_entry *tle) +{ + struct topology_core *tl_core; + + while (tle->nl) + tle = next_tle(tle); + tl_core = (struct topology_core *)tle; + return tl_core->pp != POLARIZATION_HRZ; +} + void __init topology_init_early(void) { struct sysinfo_15_1_x *info; set_sched_topology(s390_topology); if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) { - if (MACHINE_HAS_TOPOLOGY) + if (cpu_has_topology()) topology_mode = TOPOLOGY_MODE_HW; else topology_mode = TOPOLOGY_MODE_SINGLE; } - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) goto out; - tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - if (!tl_info) - panic("%s: Failed to allocate %lu bytes align=0x%lx\n", - __func__, PAGE_SIZE, PAGE_SIZE); + tl_info = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE); info = tl_info; store_topology(info); + cpu_management = detect_polarization(info->tle); pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n", info->mag[0], info->mag[1], info->mag[2], info->mag[3], info->mag[4], info->mag[5], info->mnest); @@ -590,7 +597,7 @@ static inline int topology_get_mode(int enabled) { if (!enabled) return TOPOLOGY_MODE_SINGLE; - return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; + return cpu_has_topology() ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; } static inline int topology_is_enabled(void) @@ -661,7 +668,7 @@ static int polarization_ctl_handler(const struct ctl_table *ctl, int write, return set_polarization(polarization); } -static struct ctl_table topology_ctl_table[] = { +static const struct ctl_table topology_ctl_table[] = { { .procname = "topology", .mode = 0644, @@ -680,7 +687,7 @@ static int __init topology_init(void) int rc = 0; timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE); - if (MACHINE_HAS_TOPOLOGY) + if (cpu_has_topology()) set_topology_timer(); else topology_update_polarization_simple(); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 160b2acba8db..19687dab32f7 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -3,18 +3,13 @@ * S390 version * Copyright IBM Corp. 1999, 2000 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * * Derived from "arch/i386/kernel/traps.c" * Copyright (C) 1991, 1992 Linus Torvalds */ -/* - * 'Traps.c' handles hardware traps and faults after we have saved some - * state in 'asm.s'. - */ -#include "asm/irqflags.h" -#include "asm/ptrace.h" +#include <linux/cpufeature.h> #include <linux/kprobes.h> #include <linux/kdebug.h> #include <linux/randomize_kstack.h> @@ -29,8 +24,11 @@ #include <linux/entry-common.h> #include <linux/kmsan.h> #include <asm/asm-extable.h> +#include <asm/irqflags.h> +#include <asm/ptrace.h> #include <asm/vtime.h> #include <asm/fpu.h> +#include <asm/fault.h> #include "entry.h" static inline void __user *get_trap_ip(struct pt_regs *regs) @@ -41,7 +39,7 @@ static inline void __user *get_trap_ip(struct pt_regs *regs) address = current->thread.trap_tdb.data[3]; else address = regs->psw.addr; - return (void __user *) (address - (regs->int_code >> 16)); + return (void __user *)(address - (regs->int_code >> 16)); } #ifdef CONFIG_GENERIC_BUG @@ -56,16 +54,15 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) if (user_mode(regs)) { force_sig_fault(si_signo, si_code, get_trap_ip(regs)); report_user_fault(regs, si_signo, 0); - } else { + } else { if (!fixup_exception(regs)) die(regs, str); - } + } } static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) { - if (notify_die(DIE_TRAP, str, regs, 0, - regs->int_code, si_signo) == NOTIFY_STOP) + if (notify_die(DIE_TRAP, str, regs, 0, regs->int_code, si_signo) == NOTIFY_STOP) return; do_report_trap(regs, si_signo, si_code, str); } @@ -77,8 +74,7 @@ void do_per_trap(struct pt_regs *regs) return; if (!current->ptrace) return; - force_sig_fault(SIGTRAP, TRAP_HWBKPT, - (void __force __user *) current->thread.per_event.address); + force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __force __user *)current->thread.per_event.address); } NOKPROBE_SYMBOL(do_per_trap); @@ -97,36 +93,25 @@ static void name(struct pt_regs *regs) \ do_trap(regs, signr, sicode, str); \ } -DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, - "addressing exception") -DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, - "execute exception") -DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, - "fixpoint divide exception") -DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, - "fixpoint overflow exception") -DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, - "HFP overflow exception") -DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, - "HFP underflow exception") -DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, - "HFP significance exception") -DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, - "HFP divide exception") -DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, - "HFP square root exception") -DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, - "operand exception") -DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, - "privileged operation") -DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, - "special operation exception") -DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, - "transaction constraint exception") +DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, "addressing exception") +DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, "fixpoint divide exception") +DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, "execute exception") +DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, "HFP divide exception") +DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, "HFP overflow exception") +DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, "HFP significance exception") +DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, "HFP square root exception") +DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, "HFP underflow exception") +DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, "operand exception") +DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, "fixpoint overflow exception") +DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, "privileged operation") +DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, "special operation exception") +DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, "specification exception"); +DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, "transaction constraint exception") static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc) { int si_code = 0; + /* FPC[2] is Data Exception Code */ if ((fpc & 0x00000300) == 0) { /* bits 6 and 7 of DXC are 0 iff IEEE exception */ @@ -152,36 +137,35 @@ static void translation_specification_exception(struct pt_regs *regs) static void illegal_op(struct pt_regs *regs) { - __u8 opcode[6]; - __u16 __user *location; int is_uprobe_insn = 0; + u16 __user *location; int signal = 0; + u16 opcode; location = get_trap_ip(regs); - if (user_mode(regs)) { - if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) + if (get_user(opcode, location)) return; - if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) { + if (opcode == S390_BREAKPOINT_U16) { if (current->ptrace) force_sig_fault(SIGTRAP, TRAP_BRKPT, location); else signal = SIGILL; #ifdef CONFIG_UPROBES - } else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) { + } else if (opcode == UPROBE_SWBP_INSN) { is_uprobe_insn = 1; #endif - } else + } else { signal = SIGILL; + } } /* - * We got either an illegal op in kernel mode, or user space trapped + * This is either an illegal op in kernel mode, or user space trapped * on a uprobes illegal instruction. See if kprobes or uprobes picks * it up. If not, SIGILL. */ if (is_uprobe_insn || !user_mode(regs)) { - if (notify_die(DIE_BPT, "bpt", regs, 0, - 3, SIGTRAP) != NOTIFY_STOP) + if (notify_die(DIE_BPT, "bpt", regs, 0, 3, SIGTRAP) != NOTIFY_STOP) signal = SIGILL; } if (signal) @@ -189,18 +173,10 @@ static void illegal_op(struct pt_regs *regs) } NOKPROBE_SYMBOL(illegal_op); -DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, - "specification exception"); - static void vector_exception(struct pt_regs *regs) { int si_code, vic; - if (!cpu_has_vx()) { - do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation"); - return; - } - /* get vector interrupt code from fpc */ save_user_fpu_regs(); vic = (current->thread.ufpu.fpc & 0xf00) >> 8; @@ -248,7 +224,6 @@ static void monitor_event_exception(struct pt_regs *regs) { if (user_mode(regs)) return; - switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) { case BUG_TRAP_TYPE_NONE: fixup_exception(regs); @@ -261,7 +236,7 @@ static void monitor_event_exception(struct pt_regs *regs) } } -void kernel_stack_overflow(struct pt_regs *regs) +void kernel_stack_invalid(struct pt_regs *regs) { /* * Normally regs are unpoisoned by the generic entry code, but @@ -269,12 +244,12 @@ void kernel_stack_overflow(struct pt_regs *regs) */ kmsan_unpoison_entry_regs(regs); bust_spinlocks(1); - printk("Kernel stack overflow.\n"); + pr_emerg("Kernel stack pointer invalid\n"); show_regs(regs); bust_spinlocks(0); - panic("Corrupt kernel stack, can't continue."); + panic("Invalid kernel stack pointer, cannot continue"); } -NOKPROBE_SYMBOL(kernel_stack_overflow); +NOKPROBE_SYMBOL(kernel_stack_invalid); static void __init test_monitor_call(void) { @@ -282,12 +257,12 @@ static void __init test_monitor_call(void) if (!IS_ENABLED(CONFIG_BUG)) return; - asm volatile( + asm_inline volatile( " mc 0,0\n" - "0: xgr %0,%0\n" + "0: lhi %[val],0\n" "1:\n" - EX_TABLE(0b,1b) - : "+d" (val)); + EX_TABLE(0b, 1b) + : [val] "+d" (val)); if (!val) panic("Monitor call doesn't work!\n"); } @@ -317,26 +292,36 @@ void noinstr __do_pgm_check(struct pt_regs *regs) struct lowcore *lc = get_lowcore(); irqentry_state_t state; unsigned int trapnr; + union teid teid; + teid.val = lc->trans_exc_code; regs->int_code = lc->pgm_int_code; - regs->int_parm_long = lc->trans_exc_code; - + regs->int_parm_long = teid.val; + /* + * In case of a guest fault, short-circuit the fault handler and return. + * This way the sie64a() function will return 0; fault address and + * other relevant bits are saved in current->thread.gmap_teid, and + * the fault number in current->thread.gmap_int_code. KVM will be + * able to use this information to handle the fault. + */ + if (test_pt_regs_flag(regs, PIF_GUEST_FAULT)) { + current->thread.gmap_teid.val = regs->int_parm_long; + current->thread.gmap_int_code = regs->int_code & 0xffff; + return; + } state = irqentry_enter(regs); - if (user_mode(regs)) { update_timer_sys(); - if (!static_branch_likely(&cpu_has_bear)) { + if (!cpu_has_bear()) { if (regs->last_break < 4096) regs->last_break = 1; } current->thread.last_break = regs->last_break; } - if (lc->pgm_code & 0x0200) { /* transaction abort */ current->thread.trap_tdb = lc->pgm_tdb; } - if (lc->pgm_code & PGM_INT_CODE_PER) { if (user_mode(regs)) { struct per_event *ev = ¤t->thread.per_event; @@ -352,11 +337,9 @@ void noinstr __do_pgm_check(struct pt_regs *regs) goto out; } } - if (!irqs_disabled_flags(regs->psw.mask)) trace_hardirqs_on(); __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER); - trapnr = regs->int_code & PGM_INT_CODE_MASK; if (trapnr) pgm_check_table[trapnr](regs); @@ -408,8 +391,8 @@ static void (*pgm_check_table[128])(struct pt_regs *regs) = { [0x3b] = do_dat_exception, [0x3c] = default_trap_handler, [0x3d] = do_secure_storage_access, - [0x3e] = do_non_secure_storage_access, - [0x3f] = do_secure_storage_violation, + [0x3e] = default_trap_handler, + [0x3f] = default_trap_handler, [0x40] = monitor_event_exception, [0x41 ... 0x7f] = default_trap_handler, }; @@ -420,5 +403,3 @@ static void (*pgm_check_table[128])(struct pt_regs *regs) = { __stringify(default_trap_handler)) COND_TRAP(do_secure_storage_access); -COND_TRAP(do_non_secure_storage_access); -COND_TRAP(do_secure_storage_violation); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 9646f773208a..b99478e84da4 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -2,7 +2,7 @@ /* * Common Ultravisor functions and initialization * - * Copyright IBM Corp. 2019, 2020 + * Copyright IBM Corp. 2019, 2024 */ #define KMSG_COMPONENT "prot_virt" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt @@ -15,23 +15,11 @@ #include <linux/pagemap.h> #include <linux/swap.h> #include <linux/pagewalk.h> +#include <linux/backing-dev.h> #include <asm/facility.h> #include <asm/sections.h> #include <asm/uv.h> -#if !IS_ENABLED(CONFIG_KVM) -unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) -{ - return 0; -} - -int gmap_fault(struct gmap *gmap, unsigned long gaddr, - unsigned int fault_flags) -{ - return 0; -} -#endif - /* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */ int __bootdata_preserved(prot_virt_guest); EXPORT_SYMBOL(prot_virt_guest); @@ -148,7 +136,7 @@ int uv_destroy_folio(struct folio *folio) { int rc; - /* See gmap_make_secure(): large folios cannot be secure */ + /* Large folios cannot be secure */ if (unlikely(folio_test_large(folio))) return 0; @@ -159,6 +147,7 @@ int uv_destroy_folio(struct folio *folio) folio_put(folio); return rc; } +EXPORT_SYMBOL(uv_destroy_folio); /* * The present PTE still indirectly holds a folio reference through the mapping. @@ -175,7 +164,7 @@ int uv_destroy_pte(pte_t pte) * * @paddr: Absolute host address of page to be exported */ -static int uv_convert_from_secure(unsigned long paddr) +int uv_convert_from_secure(unsigned long paddr) { struct uv_cb_cfs uvcb = { .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR, @@ -187,15 +176,16 @@ static int uv_convert_from_secure(unsigned long paddr) return -EINVAL; return 0; } +EXPORT_SYMBOL_GPL(uv_convert_from_secure); /* * The caller must already hold a reference to the folio. */ -static int uv_convert_from_secure_folio(struct folio *folio) +int uv_convert_from_secure_folio(struct folio *folio) { int rc; - /* See gmap_make_secure(): large folios cannot be secure */ + /* Large folios cannot be secure */ if (unlikely(folio_test_large(folio))) return 0; @@ -206,6 +196,7 @@ static int uv_convert_from_secure_folio(struct folio *folio) folio_put(folio); return rc; } +EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio); /* * The present PTE still indirectly holds a folio reference through the mapping. @@ -216,6 +207,39 @@ int uv_convert_from_secure_pte(pte_t pte) return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte))); } +/** + * should_export_before_import - Determine whether an export is needed + * before an import-like operation + * @uvcb: the Ultravisor control block of the UVC to be performed + * @mm: the mm of the process + * + * Returns whether an export is needed before every import-like operation. + * This is needed for shared pages, which don't trigger a secure storage + * exception when accessed from a different guest. + * + * Although considered as one, the Unpin Page UVC is not an actual import, + * so it is not affected. + * + * No export is needed also when there is only one protected VM, because the + * page cannot belong to the wrong VM in that case (there is no "other VM" + * it can belong to). + * + * Return: true if an export is needed before every import, otherwise false. + */ +static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) +{ + /* + * The misc feature indicates, among other things, that importing a + * shared page from a different protected VM will automatically also + * transfer its ownership. + */ + if (uv_has_feature(BIT_UV_FEAT_MISC)) + return false; + if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) + return false; + return atomic_read(&mm->context.protected_count) > 1; +} + /* * Calculate the expected ref_count for a folio that would otherwise have no * further pins. This was cribbed from similar functions in other places in @@ -237,13 +261,31 @@ static int expected_folio_refs(struct folio *folio) return res; } -static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) +/** + * __make_folio_secure() - make a folio secure + * @folio: the folio to make secure + * @uvcb: the uvcb that describes the UVC to be used + * + * The folio @folio will be made secure if possible, @uvcb will be passed + * as-is to the UVC. + * + * Return: 0 on success; + * -EBUSY if the folio is in writeback or has too many references; + * -EAGAIN if the UVC needs to be attempted again; + * -ENXIO if the address is not mapped; + * -EINVAL if the UVC failed for other reasons. + * + * Context: The caller must hold exactly one extra reference on the folio + * (it's the same logic as split_folio()), and the folio must be + * locked. + */ +static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) { int expected, cc = 0; if (folio_test_writeback(folio)) - return -EAGAIN; - expected = expected_folio_refs(folio); + return -EBUSY; + expected = expected_folio_refs(folio) + 1; if (!folio_ref_freeze(folio, expected)) return -EBUSY; set_bit(PG_arch_1, &folio->flags); @@ -268,262 +310,167 @@ static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) return uvcb->rc == 0x10a ? -ENXIO : -EINVAL; } -/** - * should_export_before_import - Determine whether an export is needed - * before an import-like operation - * @uvcb: the Ultravisor control block of the UVC to be performed - * @mm: the mm of the process - * - * Returns whether an export is needed before every import-like operation. - * This is needed for shared pages, which don't trigger a secure storage - * exception when accessed from a different guest. - * - * Although considered as one, the Unpin Page UVC is not an actual import, - * so it is not affected. - * - * No export is needed also when there is only one protected VM, because the - * page cannot belong to the wrong VM in that case (there is no "other VM" - * it can belong to). - * - * Return: true if an export is needed before every import, otherwise false. - */ -static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) +static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb) { - /* - * The misc feature indicates, among other things, that importing a - * shared page from a different protected VM will automatically also - * transfer its ownership. - */ - if (uv_has_feature(BIT_UV_FEAT_MISC)) - return false; - if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) - return false; - return atomic_read(&mm->context.protected_count) > 1; -} + int rc; -/* - * Drain LRU caches: the local one on first invocation and the ones of all - * CPUs on successive invocations. Returns "true" on the first invocation. - */ -static bool drain_lru(bool *drain_lru_called) -{ - /* - * If we have tried a local drain and the folio refcount - * still does not match our expected safe value, try with a - * system wide drain. This is needed if the pagevecs holding - * the page are on a different CPU. - */ - if (*drain_lru_called) { - lru_add_drain_all(); - /* We give up here, don't retry immediately. */ - return false; - } - /* - * We are here if the folio refcount does not match the - * expected safe value. The main culprits are usually - * pagevecs. With lru_add_drain() we drain the pagevecs - * on the local CPU so that hopefully the refcount will - * reach the expected safe value. - */ - lru_add_drain(); - *drain_lru_called = true; - /* The caller should try again immediately */ - return true; + if (!folio_trylock(folio)) + return -EAGAIN; + if (should_export_before_import(uvcb, mm)) + uv_convert_from_secure(folio_to_phys(folio)); + rc = __make_folio_secure(folio, uvcb); + folio_unlock(folio); + + return rc; } -/* - * Requests the Ultravisor to make a page accessible to a guest. - * If it's brought in the first time, it will be cleared. If - * it has been exported before, it will be decrypted and integrity - * checked. +/** + * s390_wiggle_split_folio() - try to drain extra references to a folio and + * split the folio if it is large. + * @mm: the mm containing the folio to work on + * @folio: the folio + * + * Context: Must be called while holding an extra reference to the folio; + * the mm lock should not be held. + * Return: 0 if the operation was successful; + * -EAGAIN if splitting the large folio was not successful, + * but another attempt can be made; + * -EINVAL in case of other folio splitting errors. See split_folio(). */ -int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) +static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio) { - struct vm_area_struct *vma; - bool drain_lru_called = false; - spinlock_t *ptelock; - unsigned long uaddr; - struct folio *folio; - pte_t *ptep; - int rc; + int rc, tried_splits; -again: - rc = -EFAULT; - mmap_read_lock(gmap->mm); + lockdep_assert_not_held(&mm->mmap_lock); + folio_wait_writeback(folio); + lru_add_drain_all(); - uaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(uaddr)) - goto out; - vma = vma_lookup(gmap->mm, uaddr); - if (!vma) - goto out; - /* - * Secure pages cannot be huge and userspace should not combine both. - * In case userspace does it anyway this will result in an -EFAULT for - * the unpack. The guest is thus never reaching secure mode. If - * userspace is playing dirty tricky with mapping huge pages later - * on this will result in a segmentation fault. - */ - if (is_vm_hugetlb_page(vma)) - goto out; - - rc = -ENXIO; - ptep = get_locked_pte(gmap->mm, uaddr, &ptelock); - if (!ptep) - goto out; - if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) { - folio = page_folio(pte_page(*ptep)); - rc = -EAGAIN; - if (folio_test_large(folio)) { - rc = -E2BIG; - } else if (folio_trylock(folio)) { - if (should_export_before_import(uvcb, gmap->mm)) - uv_convert_from_secure(PFN_PHYS(folio_pfn(folio))); - rc = make_folio_secure(folio, uvcb); + if (!folio_test_large(folio)) + return 0; + + for (tried_splits = 0; tried_splits < 2; tried_splits++) { + struct address_space *mapping; + loff_t lstart, lend; + struct inode *inode; + + folio_lock(folio); + rc = split_folio(folio); + if (rc != -EBUSY) { folio_unlock(folio); + return rc; } /* - * Once we drop the PTL, the folio may get unmapped and - * freed immediately. We need a temporary reference. + * Splitting with -EBUSY can fail for various reasons, but we + * have to handle one case explicitly for now: some mappings + * don't allow for splitting dirty folios; writeback will + * mark them clean again, including marking all page table + * entries mapping the folio read-only, to catch future write + * attempts. + * + * While the system should be writing back dirty folios in the + * background, we obtained this folio by looking up a writable + * page table entry. On these problematic mappings, writable + * page table entries imply dirty folios, preventing the + * split in the first place. + * + * To prevent a livelock when trigger writeback manually and + * letting the caller look up the folio again in the page + * table (turning it dirty), immediately try to split again. + * + * This is only a problem for some mappings (e.g., XFS); + * mappings that do not support writeback (e.g., shmem) do not + * apply. */ - if (rc == -EAGAIN || rc == -E2BIG) - folio_get(folio); - } - pte_unmap_unlock(ptep, ptelock); -out: - mmap_read_unlock(gmap->mm); - - switch (rc) { - case -E2BIG: - folio_lock(folio); - rc = split_folio(folio); - folio_unlock(folio); - folio_put(folio); - - switch (rc) { - case 0: - /* Splitting succeeded, try again immediately. */ - goto again; - case -EAGAIN: - /* Additional folio references. */ - if (drain_lru(&drain_lru_called)) - goto again; - return -EAGAIN; - case -EBUSY: - /* Unexpected race. */ - return -EAGAIN; + if (!folio_test_dirty(folio) || folio_test_anon(folio) || + !folio->mapping || !mapping_can_writeback(folio->mapping)) { + folio_unlock(folio); + break; } - WARN_ON_ONCE(1); - return -ENXIO; - case -EAGAIN: + /* - * If we are here because the UVC returned busy or partial - * completion, this is just a useless check, but it is safe. + * Ideally, we'd only trigger writeback on this exact folio. But + * there is no easy way to do that, so we'll stabilize the + * mapping while we still hold the folio lock, so we can drop + * the folio lock to trigger writeback on the range currently + * covered by the folio instead. */ - folio_wait_writeback(folio); - folio_put(folio); - return -EAGAIN; - case -EBUSY: - /* Additional folio references. */ - if (drain_lru(&drain_lru_called)) - goto again; - return -EAGAIN; - case -ENXIO: - if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE)) - return -EFAULT; - return -EAGAIN; - } - return rc; -} -EXPORT_SYMBOL_GPL(gmap_make_secure); + mapping = folio->mapping; + lstart = folio_pos(folio); + lend = lstart + folio_size(folio) - 1; + inode = igrab(mapping->host); + folio_unlock(folio); -int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr) -{ - struct uv_cb_cts uvcb = { - .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, - .header.len = sizeof(uvcb), - .guest_handle = gmap->guest_handle, - .gaddr = gaddr, - }; + if (unlikely(!inode)) + break; - return gmap_make_secure(gmap, gaddr, &uvcb); + filemap_write_and_wait_range(mapping, lstart, lend); + iput(mapping->host); + } + return -EAGAIN; } -EXPORT_SYMBOL_GPL(gmap_convert_to_secure); -/** - * gmap_destroy_page - Destroy a guest page. - * @gmap: the gmap of the guest - * @gaddr: the guest address to destroy - * - * An attempt will be made to destroy the given guest page. If the attempt - * fails, an attempt is made to export the page. If both attempts fail, an - * appropriate error is returned. - */ -int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) +int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb) { struct vm_area_struct *vma; struct folio_walk fw; - unsigned long uaddr; struct folio *folio; int rc; - rc = -EFAULT; - mmap_read_lock(gmap->mm); - - uaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(uaddr)) - goto out; - vma = vma_lookup(gmap->mm, uaddr); - if (!vma) - goto out; - /* - * Huge pages should not be able to become secure - */ - if (is_vm_hugetlb_page(vma)) - goto out; + mmap_read_lock(mm); + vma = vma_lookup(mm, hva); + if (!vma) { + mmap_read_unlock(mm); + return -EFAULT; + } + folio = folio_walk_start(&fw, vma, hva, 0); + if (!folio) { + mmap_read_unlock(mm); + return -ENXIO; + } - rc = 0; - folio = folio_walk_start(&fw, vma, uaddr, 0); - if (!folio) - goto out; - /* - * See gmap_make_secure(): large folios cannot be secure. Small - * folio implies FW_LEVEL_PTE. - */ - if (folio_test_large(folio) || !pte_write(fw.pte)) - goto out_walk_end; - rc = uv_destroy_folio(folio); + folio_get(folio); /* - * Fault handlers can race; it is possible that two CPUs will fault - * on the same secure page. One CPU can destroy the page, reboot, - * re-enter secure mode and import it, while the second CPU was - * stuck at the beginning of the handler. At some point the second - * CPU will be able to progress, and it will not be able to destroy - * the page. In that case we do not want to terminate the process, - * we instead try to export the page. + * Secure pages cannot be huge and userspace should not combine both. + * In case userspace does it anyway this will result in an -EFAULT for + * the unpack. The guest is thus never reaching secure mode. + * If userspace plays dirty tricks and decides to map huge pages at a + * later point in time, it will receive a segmentation fault or + * KVM_RUN will return -EFAULT. */ - if (rc) - rc = uv_convert_from_secure_folio(folio); -out_walk_end: + if (folio_test_hugetlb(folio)) + rc = -EFAULT; + else if (folio_test_large(folio)) + rc = -E2BIG; + else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID)) + rc = -ENXIO; + else + rc = make_folio_secure(mm, folio, uvcb); folio_walk_end(&fw, vma); -out: - mmap_read_unlock(gmap->mm); + mmap_read_unlock(mm); + + if (rc == -E2BIG || rc == -EBUSY) { + rc = s390_wiggle_split_folio(mm, folio); + if (!rc) + rc = -EAGAIN; + } + folio_put(folio); + return rc; } -EXPORT_SYMBOL_GPL(gmap_destroy_page); +EXPORT_SYMBOL_GPL(make_hva_secure); /* * To be called with the folio locked or with an extra reference! This will - * prevent gmap_make_secure from touching the folio concurrently. Having 2 - * parallel arch_make_folio_accessible is fine, as the UV calls will become a - * no-op if the folio is already exported. + * prevent kvm_s390_pv_make_secure() from touching the folio concurrently. + * Having 2 parallel arch_make_folio_accessible is fine, as the UV calls will + * become a no-op if the folio is already exported. */ int arch_make_folio_accessible(struct folio *folio) { int rc = 0; - /* See gmap_make_secure(): large folios cannot be secure */ + /* Large folios cannot be secure */ if (unlikely(folio_test_large(folio))) return 0; @@ -696,12 +643,32 @@ static struct kobj_attribute uv_query_supp_secret_types_attr = static ssize_t uv_query_max_secrets(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sysfs_emit(buf, "%d\n", uv_info.max_secrets); + return sysfs_emit(buf, "%d\n", + uv_info.max_assoc_secrets + uv_info.max_retr_secrets); } static struct kobj_attribute uv_query_max_secrets_attr = __ATTR(max_secrets, 0444, uv_query_max_secrets, NULL); +static ssize_t uv_query_max_retr_secrets(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%d\n", uv_info.max_retr_secrets); +} + +static struct kobj_attribute uv_query_max_retr_secrets_attr = + __ATTR(max_retr_secrets, 0444, uv_query_max_retr_secrets, NULL); + +static ssize_t uv_query_max_assoc_secrets(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "%d\n", uv_info.max_assoc_secrets); +} + +static struct kobj_attribute uv_query_max_assoc_secrets_attr = + __ATTR(max_assoc_secrets, 0444, uv_query_max_assoc_secrets, NULL); + static struct attribute *uv_query_attrs[] = { &uv_query_facilities_attr.attr, &uv_query_feature_indications_attr.attr, @@ -719,13 +686,81 @@ static struct attribute *uv_query_attrs[] = { &uv_query_supp_add_secret_pcf_attr.attr, &uv_query_supp_secret_types_attr.attr, &uv_query_max_secrets_attr.attr, + &uv_query_max_assoc_secrets_attr.attr, + &uv_query_max_retr_secrets_attr.attr, NULL, }; +static inline struct uv_cb_query_keys uv_query_keys(void) +{ + struct uv_cb_query_keys uvcb = { + .header.cmd = UVC_CMD_QUERY_KEYS, + .header.len = sizeof(uvcb) + }; + + uv_call(0, (uint64_t)&uvcb); + return uvcb; +} + +static inline ssize_t emit_hash(struct uv_key_hash *hash, char *buf, int at) +{ + return sysfs_emit_at(buf, at, "%016llx%016llx%016llx%016llx\n", + hash->dword[0], hash->dword[1], hash->dword[2], hash->dword[3]); +} + +static ssize_t uv_keys_host_key(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct uv_cb_query_keys uvcb = uv_query_keys(); + + return emit_hash(&uvcb.key_hashes[UVC_QUERY_KEYS_IDX_HK], buf, 0); +} + +static struct kobj_attribute uv_keys_host_key_attr = + __ATTR(host_key, 0444, uv_keys_host_key, NULL); + +static ssize_t uv_keys_backup_host_key(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct uv_cb_query_keys uvcb = uv_query_keys(); + + return emit_hash(&uvcb.key_hashes[UVC_QUERY_KEYS_IDX_BACK_HK], buf, 0); +} + +static struct kobj_attribute uv_keys_backup_host_key_attr = + __ATTR(backup_host_key, 0444, uv_keys_backup_host_key, NULL); + +static ssize_t uv_keys_all(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct uv_cb_query_keys uvcb = uv_query_keys(); + ssize_t len = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(uvcb.key_hashes); i++) + len += emit_hash(uvcb.key_hashes + i, buf, len); + + return len; +} + +static struct kobj_attribute uv_keys_all_attr = + __ATTR(all, 0444, uv_keys_all, NULL); + static struct attribute_group uv_query_attr_group = { .attrs = uv_query_attrs, }; +static struct attribute *uv_keys_attrs[] = { + &uv_keys_host_key_attr.attr, + &uv_keys_backup_host_key_attr.attr, + &uv_keys_all_attr.attr, + NULL, +}; + +static struct attribute_group uv_keys_attr_group = { + .attrs = uv_keys_attrs, +}; + static ssize_t uv_is_prot_virt_guest(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -751,9 +786,27 @@ static const struct attribute *uv_prot_virt_attrs[] = { }; static struct kset *uv_query_kset; +static struct kset *uv_keys_kset; static struct kobject *uv_kobj; -static int __init uv_info_init(void) +static int __init uv_sysfs_dir_init(const struct attribute_group *grp, + struct kset **uv_dir_kset, const char *name) +{ + struct kset *kset; + int rc; + + kset = kset_create_and_add(name, NULL, uv_kobj); + if (!kset) + return -ENOMEM; + *uv_dir_kset = kset; + + rc = sysfs_create_group(&kset->kobj, grp); + if (rc) + kset_unregister(kset); + return rc; +} + +static int __init uv_sysfs_init(void) { int rc = -ENOMEM; @@ -768,17 +821,16 @@ static int __init uv_info_init(void) if (rc) goto out_kobj; - uv_query_kset = kset_create_and_add("query", NULL, uv_kobj); - if (!uv_query_kset) { - rc = -ENOMEM; + rc = uv_sysfs_dir_init(&uv_query_attr_group, &uv_query_kset, "query"); + if (rc) goto out_ind_files; - } - rc = sysfs_create_group(&uv_query_kset->kobj, &uv_query_attr_group); - if (!rc) - return 0; + /* Get installed key hashes if available, ignore any errors */ + if (test_bit_inv(BIT_UVC_CMD_QUERY_KEYS, uv_info.inst_calls_list)) + uv_sysfs_dir_init(&uv_keys_attr_group, &uv_keys_kset, "keys"); + + return 0; - kset_unregister(uv_query_kset); out_ind_files: sysfs_remove_files(uv_kobj, uv_prot_virt_attrs); out_kobj: @@ -786,4 +838,110 @@ out_kobj: kobject_put(uv_kobj); return rc; } -device_initcall(uv_info_init); +device_initcall(uv_sysfs_init); + +/* + * Locate a secret in the list by its id. + * @secret_id: search pattern. + * @list: ephemeral buffer space + * @secret: output data, containing the secret's metadata. + * + * Search for a secret with the given secret_id in the Ultravisor secret store. + * + * Context: might sleep. + */ +static int find_secret_in_page(const u8 secret_id[UV_SECRET_ID_LEN], + const struct uv_secret_list *list, + struct uv_secret_list_item_hdr *secret) +{ + u16 i; + + for (i = 0; i < list->total_num_secrets; i++) { + if (memcmp(secret_id, list->secrets[i].id, UV_SECRET_ID_LEN) == 0) { + *secret = list->secrets[i].hdr; + return 0; + } + } + return -ENOENT; +} + +/* + * Do the actual search for `uv_get_secret_metadata`. + * @secret_id: search pattern. + * @list: ephemeral buffer space + * @secret: output data, containing the secret's metadata. + * + * Context: might sleep. + */ +int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN], + struct uv_secret_list *list, + struct uv_secret_list_item_hdr *secret) +{ + u16 start_idx = 0; + u16 list_rc; + int ret; + + do { + uv_list_secrets(list, start_idx, &list_rc, NULL); + if (list_rc != UVC_RC_EXECUTED && list_rc != UVC_RC_MORE_DATA) { + if (list_rc == UVC_RC_INV_CMD) + return -ENODEV; + else + return -EIO; + } + ret = find_secret_in_page(secret_id, list, secret); + if (ret == 0) + return ret; + start_idx = list->next_secret_idx; + } while (list_rc == UVC_RC_MORE_DATA && start_idx < list->next_secret_idx); + + return -ENOENT; +} +EXPORT_SYMBOL_GPL(uv_find_secret); + +/** + * uv_retrieve_secret() - get the secret value for the secret index. + * @secret_idx: Secret index for which the secret should be retrieved. + * @buf: Buffer to store retrieved secret. + * @buf_size: Size of the buffer. The correct buffer size is reported as part of + * the result from `uv_get_secret_metadata`. + * + * Calls the Retrieve Secret UVC and translates the UV return code into an errno. + * + * Context: might sleep. + * + * Return: + * * %0 - Entry found; buffer contains a valid secret. + * * %ENOENT: - No entry found or secret at the index is non-retrievable. + * * %ENODEV: - Not supported: UV not available or command not available. + * * %EINVAL: - Buffer too small for content. + * * %EIO: - Other unexpected UV error. + */ +int uv_retrieve_secret(u16 secret_idx, u8 *buf, size_t buf_size) +{ + struct uv_cb_retr_secr uvcb = { + .header.len = sizeof(uvcb), + .header.cmd = UVC_CMD_RETR_SECRET, + .secret_idx = secret_idx, + .buf_addr = (u64)buf, + .buf_size = buf_size, + }; + + uv_call_sched(0, (u64)&uvcb); + + switch (uvcb.header.rc) { + case UVC_RC_EXECUTED: + return 0; + case UVC_RC_INV_CMD: + return -ENODEV; + case UVC_RC_RETR_SECR_STORE_EMPTY: + case UVC_RC_RETR_SECR_INV_SECRET: + case UVC_RC_RETR_SECR_INV_IDX: + return -ENOENT; + case UVC_RC_RETR_SECR_BUF_SMALL: + return -EINVAL; + default: + return -EIO; + } +} +EXPORT_SYMBOL_GPL(uv_retrieve_secret); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 598b512cde01..430feb1a5013 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -16,8 +16,8 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/smp.h> -#include <linux/time_namespace.h> #include <linux/random.h> +#include <linux/vdso_datastore.h> #include <vdso/datapage.h> #include <asm/vdso/vsyscall.h> #include <asm/alternative.h> @@ -26,85 +26,6 @@ extern char vdso64_start[], vdso64_end[]; extern char vdso32_start[], vdso32_end[]; -static struct vm_special_mapping vvar_mapping; - -static union vdso_data_store vdso_data_store __page_aligned_data; - -struct vdso_data *vdso_data = vdso_data_store.data; - -#ifdef CONFIG_TIME_NS -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return (struct vdso_data *)(vvar_page); -} - -/* - * The VVAR page layout depends on whether a task belongs to the root or - * non-root time namespace. Whenever a task changes its namespace, the VVAR - * page tables are cleared and then they will be re-faulted with a - * corresponding layout. - * See also the comment near timens_setup_vdso_data() for details. - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - VMA_ITERATOR(vmi, mm, 0); - struct vm_area_struct *vma; - - mmap_read_lock(mm); - for_each_vma(vmi, vma) { - if (!vma_is_special_mapping(vma, &vvar_mapping)) - continue; - zap_vma_pages(vma); - break; - } - mmap_read_unlock(mm); - return 0; -} -#endif - -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long addr, pfn; - vm_fault_t err; - - switch (vmf->pgoff) { - case VVAR_DATA_PAGE_OFFSET: - pfn = virt_to_pfn(vdso_data); - if (timens_page) { - /* - * Fault in VVAR page too, since it will be accessed - * to get clock data anyway. - */ - addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE; - err = vmf_insert_pfn(vma, addr, pfn); - if (unlikely(err & VM_FAULT_ERROR)) - return err; - pfn = page_to_pfn(timens_page); - } - break; -#ifdef CONFIG_TIME_NS - case VVAR_TIMENS_PAGE_OFFSET: - /* - * If a task belongs to a time namespace then a namespace - * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and - * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET - * offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (!timens_page) - return VM_FAULT_SIGBUS; - pfn = virt_to_pfn(vdso_data); - break; -#endif /* CONFIG_TIME_NS */ - default: - return VM_FAULT_SIGBUS; - } - return vmf_insert_pfn(vma, vmf->address, pfn); -} - static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *vma) { @@ -112,11 +33,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } -static struct vm_special_mapping vvar_mapping = { - .name = "[vvar]", - .fault = vvar_fault, -}; - static struct vm_special_mapping vdso64_mapping = { .name = "[vdso]", .mremap = vdso_mremap, @@ -142,7 +58,7 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) struct vm_area_struct *vma; int rc; - BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES); if (mmap_write_lock_killable(mm)) return -EINTR; @@ -157,17 +73,14 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) rc = vvar_start; if (IS_ERR_VALUE(vvar_start)) goto out; - vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE, - VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| - VM_PFNMAP, - &vvar_mapping); + vma = vdso_install_vvar_mapping(mm, vvar_start); rc = PTR_ERR(vma); if (IS_ERR(vma)) goto out; - vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE; + vdso_text_start = vvar_start + VDSO_NR_PAGES * PAGE_SIZE; /* VM_MAYWRITE for COW so gdb can set breakpoints */ vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, - VM_READ|VM_EXEC| + VM_READ|VM_EXEC|VM_SEALED_SYSMAP| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_mapping); if (IS_ERR(vma)) { @@ -220,7 +133,7 @@ unsigned long vdso_text_size(void) unsigned long vdso_size(void) { - return vdso_text_size() + VVAR_NR_PAGES * PAGE_SIZE; + return vdso_text_size() + VDSO_NR_PAGES * PAGE_SIZE; } int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index 2c5afb88d298..1e4ddd1a683f 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -2,7 +2,7 @@ # List of files in the vdso # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso32 = vdso_user_wrapper-32.o note-32.o # Build rules diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S index 65b9513a5a0e..9630d58c2080 100644 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -6,17 +6,16 @@ #include <asm/page.h> #include <asm/vdso.h> +#include <vdso/datapage.h> OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") OUTPUT_ARCH(s390:31-bit) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif - . = VDSO_LBASE + SIZEOF_HEADERS; + VDSO_VVAR_SYMS + + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text .gnu.hash : { *(.gnu.hash) } diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 37bb4b761229..d8f0df742809 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -2,10 +2,10 @@ # List of files in the vdso # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o -VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK) +VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE) CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE) CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE) diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index 753040a4b5ab..e4f6551ae898 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -7,18 +7,16 @@ #include <asm/vdso/vsyscall.h> #include <asm/page.h> #include <asm/vdso.h> +#include <vdso/datapage.h> OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); - PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif - . = VDSO_LBASE + SIZEOF_HEADERS; + VDSO_VVAR_SYMS + + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text .gnu.hash : { *(.gnu.hash) } diff --git a/arch/s390/kernel/vmcore_info.c b/arch/s390/kernel/vmcore_info.c index 23f7d7619a99..cc8933e04ff7 100644 --- a/arch/s390/kernel/vmcore_info.c +++ b/arch/s390/kernel/vmcore_info.c @@ -1,8 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/vmcore_info.h> -#include <asm/abs_lowcore.h> #include <linux/mm.h> +#include <asm/abs_lowcore.h> +#include <asm/sections.h> #include <asm/setup.h> void arch_crash_save_vmcoreinfo(void) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 377b9aaf8c92..ff1ddba96352 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -52,7 +52,6 @@ SECTIONS SOFTIRQENTRY_TEXT FTRACE_HOTPATCH_TRAMPOLINES_TEXT *(.text.*_indirect_*) - *(.fixup) *(.gnu.warning) . = ALIGN(PAGE_SIZE); _etext = .; /* End of text section */ |