diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-09 19:04:10 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-09 19:04:10 +0300 |
commit | becdce1c66b21ce1c0452e16127182ef692f47ba (patch) | |
tree | a37f26fbbc43fad56b12881f6d57dc4a0fdb8d98 /arch | |
parent | f8cf2f16a7c95acce497bfafa90e7c6d8397d653 (diff) | |
parent | 92fa7a13c845c91f6a8177250474bbcab7fcf45e (diff) | |
download | linux-becdce1c66b21ce1c0452e16127182ef692f47ba.tar.xz |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull s390 updates from Martin Schwidefsky:
- Improvements for the spectre defense:
* The spectre related code is consolidated to a single file
nospec-branch.c
* Automatic enable/disable for the spectre v2 defenses (expoline vs.
nobp)
* Syslog messages for specve v2 are added
* Enable CONFIG_GENERIC_CPU_VULNERABILITIES and define the attribute
functions for spectre v1 and v2
- Add helper macros for assembler alternatives and use them to shorten
the code in entry.S.
- Add support for persistent configuration data via the SCLP Store Data
interface. The H/W interface requires a page table that uses 4K pages
only, the code to setup such an address space is added as well.
- Enable virtio GPU emulation in QEMU. To do this the depends
statements for a few common Kconfig options are modified.
- Add support for format-3 channel path descriptors and add a binary
sysfs interface to export the associated utility strings.
- Add a sysfs attribute to control the IFCC handling in case of
constant channel errors.
- The vfio-ccw changes from Cornelia.
- Bug fixes and cleanups.
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (40 commits)
s390/kvm: improve stack frame constants in entry.S
s390/lpp: use assembler alternatives for the LPP instruction
s390/entry.S: use assembler alternatives
s390: add assembler macros for CPU alternatives
s390: add sysfs attributes for spectre
s390: report spectre mitigation via syslog
s390: add automatic detection of the spectre defense
s390: move nobp parameter functions to nospec-branch.c
s390/cio: add util_string sysfs attribute
s390/chsc: query utility strings via fmt3 channel path descriptor
s390/cio: rename struct channel_path_desc
s390/cio: fix unbind of io_subchannel_driver
s390/qdio: split up CCQ handling for EQBS / SQBS
s390/qdio: don't retry EQBS after CCQ 96
s390/qdio: restrict buffer merging to eligible devices
s390/qdio: don't merge ERROR output buffers
s390/qdio: simplify math in get_*_buffer_frontier()
s390/decompressor: trim uncompressed image head during the build
s390/crypto: Fix kernel crash on aes_s390 module remove.
s390/defkeymap: fix global init to zero
...
Diffstat (limited to 'arch')
29 files changed, 614 insertions, 195 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index eaee7087886f..32a0d5b958bf 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -120,6 +120,7 @@ config S390 select GENERIC_CLOCKEVENTS select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_DEVICES if !SMP + select GENERIC_CPU_VULNERABILITIES select GENERIC_FIND_FIRST_BIT select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL @@ -576,7 +577,7 @@ choice config EXPOLINE_OFF bool "spectre_v2=off" -config EXPOLINE_MEDIUM +config EXPOLINE_AUTO bool "spectre_v2=auto" config EXPOLINE_FULL diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 2ced3239cb84..c79936d02f7b 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -47,9 +47,6 @@ cflags-$(CONFIG_MARCH_Z14_TUNE) += -mtune=z14 cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include -#KBUILD_IMAGE is necessary for make rpm -KBUILD_IMAGE :=arch/s390/boot/image - # # Prevent tail-call optimizations, to get clearer backtraces: # @@ -84,7 +81,7 @@ ifdef CONFIG_EXPOLINE CC_FLAGS_EXPOLINE += -mfunction-return=thunk CC_FLAGS_EXPOLINE += -mindirect-branch-table export CC_FLAGS_EXPOLINE - cflags-y += $(CC_FLAGS_EXPOLINE) + cflags-y += $(CC_FLAGS_EXPOLINE) -DCC_USING_EXPOLINE endif endif @@ -126,6 +123,9 @@ tools := arch/s390/tools all: image bzImage +#KBUILD_IMAGE is necessary for packaging targets like rpm-pkg, deb-pkg... +KBUILD_IMAGE := $(boot)/bzImage + install: vmlinux $(Q)$(MAKE) $(build)=$(boot) $@ diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 26d6a94f40f6..5766f7b9b271 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -29,11 +29,16 @@ LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T $(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) $(call if_changed,ld) -sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 0x\1/p' +TRIM_HEAD_SIZE := 0x11000 -quiet_cmd_sizes = GEN $@ +sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 (0x\1 - $(TRIM_HEAD_SIZE))/p' + +quiet_cmd_sizes = GEN $@ cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@ +quiet_cmd_trim_head = TRIM $@ + cmd_trim_head = tail -c +$$(($(TRIM_HEAD_SIZE) + 1)) $< > $@ + $(obj)/sizes.h: vmlinux $(call if_changed,sizes) @@ -43,10 +48,13 @@ $(obj)/head.o: $(obj)/sizes.h CFLAGS_misc.o += -I$(objtree)/$(obj) $(obj)/misc.o: $(obj)/sizes.h -OBJCOPYFLAGS_vmlinux.bin := -R .comment -S -$(obj)/vmlinux.bin: vmlinux +OBJCOPYFLAGS_vmlinux.bin.full := -R .comment -S +$(obj)/vmlinux.bin.full: vmlinux $(call if_changed,objcopy) +$(obj)/vmlinux.bin: $(obj)/vmlinux.bin.full + $(call if_changed,trim_head) + vmlinux.bin.all-y := $(obj)/vmlinux.bin suffix-$(CONFIG_KERNEL_GZIP) := gz diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S index 231d1491d431..9f94eca0f467 100644 --- a/arch/s390/boot/compressed/head.S +++ b/arch/s390/boot/compressed/head.S @@ -23,12 +23,10 @@ ENTRY(startup_continue) aghi %r15,-160 brasl %r14,decompress_kernel # Set up registers for memory mover. We move the decompressed image to - # 0x11000, starting at offset 0x11000 in the decompressed image so - # that code living at 0x11000 in the image will end up at 0x11000 in - # memory. + # 0x11000, where startup_continue of the decompressed image is supposed + # to be. lgr %r4,%r2 lg %r2,.Loffset-.LPG1(%r13) - la %r4,0(%r2,%r4) lg %r3,.Lmvsize-.LPG1(%r13) lgr %r5,%r3 # Move the memory mover someplace safe so it doesn't overwrite itself. diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index cecf38b9ec82..63838a17e56a 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -27,8 +27,8 @@ /* Symbols defined by linker scripts */ extern char input_data[]; extern int input_len; -extern char _text, _end; -extern char _bss, _ebss; +extern char _end[]; +extern char _bss[], _ebss[]; static void error(char *m); @@ -144,7 +144,7 @@ unsigned long decompress_kernel(void) { void *output, *kernel_end; - output = (void *) ALIGN((unsigned long) &_end + HEAP_SIZE, PAGE_SIZE); + output = (void *) ALIGN((unsigned long) _end + HEAP_SIZE, PAGE_SIZE); kernel_end = output + SZ__bss_start; check_ipl_parmblock((void *) 0, (unsigned long) kernel_end); @@ -166,8 +166,8 @@ unsigned long decompress_kernel(void) * Clear bss section. free_mem_ptr and free_mem_end_ptr need to be * initialized afterwards since they reside in bss. */ - memset(&_bss, 0, &_ebss - &_bss); - free_mem_ptr = (unsigned long) &_end; + memset(_bss, 0, _ebss - _bss); + free_mem_ptr = (unsigned long) _end; free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error); diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S index 8150132b144f..d43c2db12d30 100644 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -52,6 +52,7 @@ SECTIONS /* Sections to be discarded */ /DISCARD/ : { *(.eh_frame) + *(__ex_table) *(*__ksymtab*) } } diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index d60798737d86..fa9b7dd1a513 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -1047,6 +1047,7 @@ static struct aead_alg gcm_aes_aead = { static struct crypto_alg *aes_s390_algs_ptr[5]; static int aes_s390_algs_num; +static struct aead_alg *aes_s390_aead_alg; static int aes_s390_register_alg(struct crypto_alg *alg) { @@ -1065,7 +1066,8 @@ static void aes_s390_fini(void) if (ctrblk) free_page((unsigned long) ctrblk); - crypto_unregister_aead(&gcm_aes_aead); + if (aes_s390_aead_alg) + crypto_unregister_aead(aes_s390_aead_alg); } static int __init aes_s390_init(void) @@ -1123,6 +1125,7 @@ static int __init aes_s390_init(void) ret = crypto_register_aead(&gcm_aes_aead); if (ret) goto out_err; + aes_s390_aead_alg = &gcm_aes_aead; } return 0; diff --git a/arch/s390/include/asm/alternative-asm.h b/arch/s390/include/asm/alternative-asm.h new file mode 100644 index 000000000000..955d620db23e --- /dev/null +++ b/arch/s390/include/asm/alternative-asm.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_ALTERNATIVE_ASM_H +#define _ASM_S390_ALTERNATIVE_ASM_H + +#ifdef __ASSEMBLY__ + +/* + * Check the length of an instruction sequence. The length may not be larger + * than 254 bytes and it has to be divisible by 2. + */ +.macro alt_len_check start,end + .if ( \end - \start ) > 254 + .error "cpu alternatives does not support instructions blocks > 254 bytes\n" + .endif + .if ( \end - \start ) % 2 + .error "cpu alternatives instructions length is odd\n" + .endif +.endm + +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ +.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature + .long \orig_start - . + .long \alt_start - . + .word \feature + .byte \orig_end - \orig_start + .byte \alt_end - \alt_start +.endm + +/* + * Fill up @bytes with nops. The macro emits 6-byte nop instructions + * for the bulk of the area, possibly followed by a 4-byte and/or + * a 2-byte nop if the size of the area is not divisible by 6. + */ +.macro alt_pad_fill bytes + .fill ( \bytes ) / 6, 6, 0xc0040000 + .fill ( \bytes ) % 6 / 4, 4, 0x47000000 + .fill ( \bytes ) % 6 % 4 / 2, 2, 0x0700 +.endm + +/* + * Fill up @bytes with nops. If the number of bytes is larger + * than 6, emit a jg instruction to branch over all nops, then + * fill an area of size (@bytes - 6) with nop instructions. + */ +.macro alt_pad bytes + .if ( \bytes > 0 ) + .if ( \bytes > 6 ) + jg . + \bytes + alt_pad_fill \bytes - 6 + .else + alt_pad_fill \bytes + .endif + .endif +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. ".skip" directive takes care of proper instruction padding + * in case @newinstr is longer than @oldinstr. + */ +.macro ALTERNATIVE oldinstr, newinstr, feature + .pushsection .altinstr_replacement,"ax" +770: \newinstr +771: .popsection +772: \oldinstr +773: alt_len_check 770b, 771b + alt_len_check 772b, 773b + alt_pad ( ( 771b - 770b ) - ( 773b - 772b ) ) +774: .pushsection .altinstructions,"a" + alt_entry 772b, 774b, 770b, 771b, \feature + .popsection +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. ".skip" directive takes care of proper instruction padding + * in case @newinstr is longer than @oldinstr. + */ +.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 + .pushsection .altinstr_replacement,"ax" +770: \newinstr1 +771: \newinstr2 +772: .popsection +773: \oldinstr +774: alt_len_check 770b, 771b + alt_len_check 771b, 772b + alt_len_check 773b, 774b + .if ( 771b - 770b > 772b - 771b ) + alt_pad ( ( 771b - 770b ) - ( 774b - 773b ) ) + .else + alt_pad ( ( 772b - 771b ) - ( 774b - 773b ) ) + .endif +775: .pushsection .altinstructions,"a" + alt_entry 773b, 775b, 770b, 771b,\feature1 + alt_entry 773b, 775b, 771b, 772b,\feature2 + .popsection +.endm + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_S390_ALTERNATIVE_ASM_H */ diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index 633f8da86137..20bce136b2e5 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -230,5 +230,5 @@ int ccw_device_siosl(struct ccw_device *); extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *); -struct channel_path_desc *ccw_device_get_chp_desc(struct ccw_device *, int); +struct channel_path_desc_fmt0 *ccw_device_get_chp_desc(struct ccw_device *, int); #endif /* _S390_CCWDEV_H_ */ diff --git a/arch/s390/include/asm/chpid.h b/arch/s390/include/asm/chpid.h index 4773f747915c..20e0d22f29e9 100644 --- a/arch/s390/include/asm/chpid.h +++ b/arch/s390/include/asm/chpid.h @@ -9,7 +9,7 @@ #include <uapi/asm/chpid.h> #include <asm/cio.h> -struct channel_path_desc { +struct channel_path_desc_fmt0 { u8 flags; u8 lsn; u8 desc; diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index dc84a0171bb3..847a04262b9c 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -227,7 +227,7 @@ struct esw_eadm { * a field is valid; a field not being valid is always passed as %0. * If a unit check occurred, @ecw may contain sense data; this is retrieved * by the common I/O layer itself if the device doesn't support concurrent - * sense (so that the device driver never needs to perform basic sene itself). + * sense (so that the device driver never needs to perform basic sense itself). * For unsolicited interrupts, the irb is passed as-is (expect for sense data, * if applicable). */ diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index dd08db491b89..f58d17e9dd65 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -29,12 +29,12 @@ /* CPU measurement facility support */ static inline int cpum_cf_avail(void) { - return MACHINE_HAS_LPP && test_facility(67); + return test_facility(40) && test_facility(67); } static inline int cpum_sf_avail(void) { - return MACHINE_HAS_LPP && test_facility(68); + return test_facility(40) && test_facility(68); } diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h index fb56fa3283a2..0563fd3e8458 100644 --- a/arch/s390/include/asm/css_chars.h +++ b/arch/s390/include/asm/css_chars.h @@ -32,8 +32,10 @@ struct css_general_char { u32 fcx : 1; /* bit 88 */ u32 : 19; u32 alt_ssi : 1; /* bit 108 */ - u32:1; - u32 narf:1; /* bit 110 */ + u32 : 1; + u32 narf : 1; /* bit 110 */ + u32 : 12; + u32 util_str : 1;/* bit 123 */ } __packed; extern struct css_general_char css_general_characteristics; diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h index 7df48e5cf36f..35bf28fe4c64 100644 --- a/arch/s390/include/asm/nospec-branch.h +++ b/arch/s390/include/asm/nospec-branch.h @@ -6,12 +6,10 @@ #include <linux/types.h> -extern int nospec_call_disable; -extern int nospec_return_disable; +extern int nospec_disable; void nospec_init_branches(void); -void nospec_call_revert(s32 *start, s32 *end); -void nospec_return_revert(s32 *start, s32 *end); +void nospec_revert(s32 *start, s32 *end); #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index c7b4333d1de0..f0f9bcf94c03 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -151,4 +151,7 @@ void vmem_map_init(void); void *vmem_crst_alloc(unsigned long val); pte_t *vmem_pte_alloc(void); +unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages); +void base_asce_free(unsigned long asce); + #endif /* _S390_PGALLOC_H */ diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h index 79b7ffa91832..c00f7b031628 100644 --- a/arch/s390/include/asm/scsw.h +++ b/arch/s390/include/asm/scsw.h @@ -390,10 +390,10 @@ static inline int scsw_cmd_is_valid_key(union scsw *scsw) } /** - * scsw_cmd_is_valid_sctl - check fctl field validity + * scsw_cmd_is_valid_sctl - check sctl field validity * @scsw: pointer to scsw * - * Return non-zero if the fctl field of the specified command mode scsw is + * Return non-zero if the sctl field of the specified command mode scsw is * valid, zero otherwise. */ static inline int scsw_cmd_is_valid_sctl(union scsw *scsw) diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 2eb0c8a7b664..124154fdfc97 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -25,7 +25,6 @@ #define MACHINE_FLAG_DIAG44 _BITUL(6) #define MACHINE_FLAG_EDAT1 _BITUL(7) #define MACHINE_FLAG_EDAT2 _BITUL(8) -#define MACHINE_FLAG_LPP _BITUL(9) #define MACHINE_FLAG_TOPOLOGY _BITUL(10) #define MACHINE_FLAG_TE _BITUL(11) #define MACHINE_FLAG_TLB_LC _BITUL(12) @@ -66,7 +65,6 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_DIAG44 (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44) #define MACHINE_HAS_EDAT1 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1) #define MACHINE_HAS_EDAT2 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2) -#define MACHINE_HAS_LPP (S390_lowcore.machine_flags & MACHINE_FLAG_LPP) #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) #define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h index 451c601406b6..832be5c2584f 100644 --- a/arch/s390/include/uapi/asm/dasd.h +++ b/arch/s390/include/uapi/asm/dasd.h @@ -68,25 +68,27 @@ typedef struct dasd_information2_t { #define DASD_FORMAT_CDL 2 /* * values to be used for dasd_information_t.features - * 0x00: default features - * 0x01: readonly (ro) - * 0x02: use diag discipline (diag) - * 0x04: set the device initially online (internal use only) - * 0x08: enable ERP related logging - * 0x10: allow I/O to fail on lost paths - * 0x20: allow I/O to fail when a lock was stolen - * 0x40: give access to raw eckd data - * 0x80: enable discard support + * 0x100: default features + * 0x001: readonly (ro) + * 0x002: use diag discipline (diag) + * 0x004: set the device initially online (internal use only) + * 0x008: enable ERP related logging + * 0x010: allow I/O to fail on lost paths + * 0x020: allow I/O to fail when a lock was stolen + * 0x040: give access to raw eckd data + * 0x080: enable discard support + * 0x100: enable autodisable for IFCC errors (default) */ -#define DASD_FEATURE_DEFAULT 0x00 -#define DASD_FEATURE_READONLY 0x01 -#define DASD_FEATURE_USEDIAG 0x02 -#define DASD_FEATURE_INITIAL_ONLINE 0x04 -#define DASD_FEATURE_ERPLOG 0x08 -#define DASD_FEATURE_FAILFAST 0x10 -#define DASD_FEATURE_FAILONSLCK 0x20 -#define DASD_FEATURE_USERAW 0x40 -#define DASD_FEATURE_DISCARD 0x80 +#define DASD_FEATURE_READONLY 0x001 +#define DASD_FEATURE_USEDIAG 0x002 +#define DASD_FEATURE_INITIAL_ONLINE 0x004 +#define DASD_FEATURE_ERPLOG 0x008 +#define DASD_FEATURE_FAILFAST 0x010 +#define DASD_FEATURE_FAILONSLCK 0x020 +#define DASD_FEATURE_USERAW 0x040 +#define DASD_FEATURE_DISCARD 0x080 +#define DASD_FEATURE_PATH_AUTODISABLE 0x100 +#define DASD_FEATURE_DEFAULT DASD_FEATURE_PATH_AUTODISABLE #define DASD_PARTN_BITS 2 diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 7f27e3da9709..b06a6f79c1ec 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -61,11 +61,11 @@ obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o +obj-y += nospec-branch.o extra-y += head.o head64.o vmlinux.lds -obj-$(CONFIG_EXPOLINE) += nospec-branch.o -CFLAGS_REMOVE_expoline.o += $(CC_FLAGS_EXPOLINE) +CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index 22476135f738..8e1f2aee85ef 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -2,6 +2,7 @@ #include <linux/module.h> #include <asm/alternative.h> #include <asm/facility.h> +#include <asm/nospec-branch.h> #define MAX_PATCH_LEN (255 - 1) @@ -15,29 +16,6 @@ static int __init disable_alternative_instructions(char *str) early_param("noaltinstr", disable_alternative_instructions); -static int __init nobp_setup_early(char *str) -{ - bool enabled; - int rc; - - rc = kstrtobool(str, &enabled); - if (rc) - return rc; - if (enabled && test_facility(82)) - __set_facility(82, S390_lowcore.alt_stfle_fac_list); - else - __clear_facility(82, S390_lowcore.alt_stfle_fac_list); - return 0; -} -early_param("nobp", nobp_setup_early); - -static int __init nospec_setup_early(char *str) -{ - __clear_facility(82, S390_lowcore.alt_stfle_fac_list); - return 0; -} -early_param("nospec", nospec_setup_early); - struct brcl_insn { u16 opc; s32 disp; diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 587b195b588d..cfe2c45c5180 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -63,6 +63,7 @@ int main(void) OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[0]); OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[1]); OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]); + OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[3]); BLANK(); /* timeval/timezone offsets for use by vdso */ OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index ac707a9f729e..b00b515baa53 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -67,7 +67,7 @@ static noinline __init void init_kernel_storage_key(void) #if PAGE_DEFAULT_KEY unsigned long end_pfn, init_pfn; - end_pfn = PFN_UP(__pa(&_end)); + end_pfn = PFN_UP(__pa(_end)); for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++) page_set_storage_key(init_pfn << PAGE_SHIFT, @@ -242,8 +242,6 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2; if (test_facility(3)) S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE; - if (test_facility(40)) - S390_lowcore.machine_flags |= MACHINE_FLAG_LPP; if (test_facility(50) && test_facility(73)) { S390_lowcore.machine_flags |= MACHINE_FLAG_TE; __ctl_set_bit(0, 55); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index a5621ea6d123..3f22f139a041 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -11,6 +11,7 @@ #include <linux/init.h> #include <linux/linkage.h> +#include <asm/alternative-asm.h> #include <asm/processor.h> #include <asm/cache.h> #include <asm/ctl_reg.h> @@ -57,6 +58,8 @@ _CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \ _CIF_ASCE_SECONDARY | _CIF_FPU) _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) +_LPP_OFFSET = __LC_LPP + #define BASED(name) name-cleanup_critical(%r13) .macro TRACE_IRQS_ON @@ -162,65 +165,22 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) .endm .macro BPOFF - .pushsection .altinstr_replacement, "ax" -660: .long 0xb2e8c000 - .popsection -661: .long 0x47000000 - .pushsection .altinstructions, "a" - .long 661b - . - .long 660b - . - .word 82 - .byte 4 - .byte 4 - .popsection + ALTERNATIVE "", ".long 0xb2e8c000", 82 .endm .macro BPON - .pushsection .altinstr_replacement, "ax" -662: .long 0xb2e8d000 - .popsection -663: .long 0x47000000 - .pushsection .altinstructions, "a" - .long 663b - . - .long 662b - . - .word 82 - .byte 4 - .byte 4 - .popsection + ALTERNATIVE "", ".long 0xb2e8d000", 82 .endm .macro BPENTER tif_ptr,tif_mask - .pushsection .altinstr_replacement, "ax" -662: .word 0xc004, 0x0000, 0x0000 # 6 byte nop - .word 0xc004, 0x0000, 0x0000 # 6 byte nop - .popsection -664: TSTMSK \tif_ptr,\tif_mask - jz . + 8 - .long 0xb2e8d000 - .pushsection .altinstructions, "a" - .long 664b - . - .long 662b - . - .word 82 - .byte 12 - .byte 12 - .popsection + ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .long 0xb2e8d000", \ + "", 82 .endm .macro BPEXIT tif_ptr,tif_mask TSTMSK \tif_ptr,\tif_mask - .pushsection .altinstr_replacement, "ax" -662: jnz . + 8 - .long 0xb2e8d000 - .popsection -664: jz . + 8 - .long 0xb2e8c000 - .pushsection .altinstructions, "a" - .long 664b - . - .long 662b - . - .word 82 - .byte 8 - .byte 8 - .popsection + ALTERNATIVE "jz .+8; .long 0xb2e8c000", \ + "jnz .+8; .long 0xb2e8d000", 82 .endm #ifdef CONFIG_EXPOLINE @@ -323,10 +283,8 @@ ENTRY(__switch_to) aghi %r3,__TASK_pid mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP - jz 0f - .insn s,0xb2800000,__LC_LPP # set program parameter -0: BR_R1USE_R14 + ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40 + BR_R1USE_R14 .L__critical_start: @@ -339,10 +297,10 @@ ENTRY(__switch_to) ENTRY(sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers lg %r12,__LC_CURRENT - stg %r2,__SF_EMPTY(%r15) # save control block pointer - stg %r3,__SF_EMPTY+8(%r15) # save guest register save area - xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # reason code = 0 - mvc __SF_EMPTY+24(8,%r15),__TI_flags(%r12) # copy thread flags + stg %r2,__SF_SIE_CONTROL(%r15) # save control block pointer + stg %r3,__SF_SIE_SAVEAREA(%r15) # save guest register save area + xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0 + mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags TSTMSK __LC_CPU_FLAGS,_CIF_FPU # load guest fp/vx registers ? jno .Lsie_load_guest_gprs brasl %r14,load_fpu_regs # load guest fp/vx regs @@ -353,18 +311,18 @@ ENTRY(sie64a) jz .Lsie_gmap lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce .Lsie_gmap: - lg %r14,__SF_EMPTY(%r15) # get control block pointer + lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now tm __SIE_PROG20+3(%r14),3 # last exit... jnz .Lsie_skip TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lsie_skip # exit if fp/vx regs changed - BPEXIT __SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) + BPEXIT __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_entry: sie 0(%r14) .Lsie_exit: BPOFF - BPENTER __SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) + BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_skip: ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce @@ -383,7 +341,7 @@ ENTRY(sie64a) nopr 7 .globl sie_exit sie_exit: - lg %r14,__SF_EMPTY+8(%r15) # load guest register save area + lg %r14,__SF_SIE_SAVEAREA(%r15) # load guest register save area stmg %r0,%r13,0(%r14) # save guest gprs 0-13 xgr %r0,%r0 # clear guest registers to xgr %r1,%r1 # prevent speculative use @@ -392,11 +350,11 @@ sie_exit: xgr %r4,%r4 xgr %r5,%r5 lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers - lg %r2,__SF_EMPTY+16(%r15) # return exit reason code + lg %r2,__SF_SIE_REASON(%r15) # return exit reason code BR_R1USE_R14 .Lsie_fault: lghi %r14,-EFAULT - stg %r14,__SF_EMPTY+16(%r15) # set exit reason code + stg %r14,__SF_SIE_REASON(%r15) # set exit reason code j sie_exit EX_TABLE(.Lrewind_pad6,.Lsie_fault) @@ -685,7 +643,7 @@ ENTRY(pgm_check_handler) slg %r14,BASED(.Lsie_critical_start) clg %r14,BASED(.Lsie_critical_length) jhe 0f - lg %r14,__SF_EMPTY(%r15) # get control block pointer + lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit @@ -1285,10 +1243,8 @@ ENTRY(mcck_int_handler) # PSW restart interrupt handler # ENTRY(restart_int_handler) - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP - jz 0f - .insn s,0xb2800000,__LC_LPP -0: stg %r15,__LC_SAVE_AREA_RESTART + ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40 + stg %r15,__LC_SAVE_AREA_RESTART lg %r15,__LC_RESTART_STACK aghi %r15,-__PT_SIZE # create pt_regs on stack xc 0(__PT_SIZE,%r15),0(%r15) @@ -1397,8 +1353,8 @@ cleanup_critical: clg %r9,BASED(.Lsie_crit_mcck_length) jh 1f oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST -1: BPENTER __SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) - lg %r9,__SF_EMPTY(%r15) # get control block pointer +1: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) + lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 1fc6d1ff92d3..5a83be955c70 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -159,7 +159,7 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, me->core_layout.size += me->arch.got_size; me->arch.plt_offset = me->core_layout.size; if (me->arch.plt_size) { - if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_call_disable) + if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) me->arch.plt_size += PLT_ENTRY_SIZE; me->core_layout.size += me->arch.plt_size; } @@ -318,8 +318,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, info->plt_offset; ip[0] = 0x0d10e310; /* basr 1,0 */ ip[1] = 0x100a0004; /* lg 1,10(1) */ - if (IS_ENABLED(CONFIG_EXPOLINE) && - !nospec_call_disable) { + if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) { unsigned int *ij; ij = me->core_layout.base + me->arch.plt_offset + @@ -440,7 +439,7 @@ int module_finalize(const Elf_Ehdr *hdr, void *aseg; if (IS_ENABLED(CONFIG_EXPOLINE) && - !nospec_call_disable && me->arch.plt_size) { + !nospec_disable && me->arch.plt_size) { unsigned int *ij; ij = me->core_layout.base + me->arch.plt_offset + @@ -467,11 +466,11 @@ int module_finalize(const Elf_Ehdr *hdr, if (IS_ENABLED(CONFIG_EXPOLINE) && (!strcmp(".nospec_call_table", secname))) - nospec_call_revert(aseg, aseg + s->sh_size); + nospec_revert(aseg, aseg + s->sh_size); if (IS_ENABLED(CONFIG_EXPOLINE) && (!strcmp(".nospec_return_table", secname))) - nospec_return_revert(aseg, aseg + s->sh_size); + nospec_revert(aseg, aseg + s->sh_size); } jump_label_apply_nops(me); diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 9aff72d3abda..14867ec5f726 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -1,32 +1,108 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/module.h> +#include <linux/device.h> #include <asm/nospec-branch.h> -int nospec_call_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF); -int nospec_return_disable = !IS_ENABLED(CONFIG_EXPOLINE_FULL); +static int __init nobp_setup_early(char *str) +{ + bool enabled; + int rc; + + rc = kstrtobool(str, &enabled); + if (rc) + return rc; + if (enabled && test_facility(82)) { + /* + * The user explicitely requested nobp=1, enable it and + * disable the expoline support. + */ + __set_facility(82, S390_lowcore.alt_stfle_fac_list); + if (IS_ENABLED(CONFIG_EXPOLINE)) + nospec_disable = 1; + } else { + __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + } + return 0; +} +early_param("nobp", nobp_setup_early); + +static int __init nospec_setup_early(char *str) +{ + __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + return 0; +} +early_param("nospec", nospec_setup_early); + +static int __init nospec_report(void) +{ + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) + pr_info("Spectre V2 mitigation: execute trampolines.\n"); + if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) + pr_info("Spectre V2 mitigation: limited branch prediction.\n"); + return 0; +} +arch_initcall(nospec_report); + +#ifdef CONFIG_SYSFS +ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); +} + +ssize_t cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) + return sprintf(buf, "Mitigation: execute trampolines\n"); + if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) + return sprintf(buf, "Mitigation: limited branch prediction.\n"); + return sprintf(buf, "Vulnerable\n"); +} +#endif + +#ifdef CONFIG_EXPOLINE + +int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF); static int __init nospectre_v2_setup_early(char *str) { - nospec_call_disable = 1; - nospec_return_disable = 1; + nospec_disable = 1; return 0; } early_param("nospectre_v2", nospectre_v2_setup_early); +static int __init spectre_v2_auto_early(void) +{ + if (IS_ENABLED(CC_USING_EXPOLINE)) { + /* + * The kernel has been compiled with expolines. + * Keep expolines enabled and disable nobp. + */ + nospec_disable = 0; + __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + } + /* + * If the kernel has not been compiled with expolines the + * nobp setting decides what is done, this depends on the + * CONFIG_KERNEL_NP option and the nobp/nospec parameters. + */ + return 0; +} +#ifdef CONFIG_EXPOLINE_AUTO +early_initcall(spectre_v2_auto_early); +#endif + static int __init spectre_v2_setup_early(char *str) { if (str && !strncmp(str, "on", 2)) { - nospec_call_disable = 0; - nospec_return_disable = 0; - } - if (str && !strncmp(str, "off", 3)) { - nospec_call_disable = 1; - nospec_return_disable = 1; - } - if (str && !strncmp(str, "auto", 4)) { - nospec_call_disable = 0; - nospec_return_disable = 1; + nospec_disable = 0; + __clear_facility(82, S390_lowcore.alt_stfle_fac_list); } + if (str && !strncmp(str, "off", 3)) + nospec_disable = 1; + if (str && !strncmp(str, "auto", 4)) + spectre_v2_auto_early(); return 0; } early_param("spectre_v2", spectre_v2_setup_early); @@ -79,15 +155,9 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end) } } -void __init_or_module nospec_call_revert(s32 *start, s32 *end) -{ - if (nospec_call_disable) - __nospec_revert(start, end); -} - -void __init_or_module nospec_return_revert(s32 *start, s32 *end) +void __init_or_module nospec_revert(s32 *start, s32 *end) { - if (nospec_return_disable) + if (nospec_disable) __nospec_revert(start, end); } @@ -95,6 +165,8 @@ extern s32 __nospec_call_start[], __nospec_call_end[]; extern s32 __nospec_return_start[], __nospec_return_end[]; void __init nospec_init_branches(void) { - nospec_call_revert(__nospec_call_start, __nospec_call_end); - nospec_return_revert(__nospec_return_start, __nospec_return_end); + nospec_revert(__nospec_call_start, __nospec_call_end); + nospec_revert(__nospec_return_start, __nospec_return_end); } + +#endif /* CONFIG_EXPOLINE */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index a6a91f01a17a..7b58a712f818 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -221,6 +221,8 @@ static void __init conmode_default(void) SET_CONSOLE_SCLP; #endif } + if (IS_ENABLED(CONFIG_VT) && IS_ENABLED(CONFIG_DUMMY_CONSOLE)) + conswitchp = &dummy_con; } #ifdef CONFIG_CRASH_DUMP @@ -413,12 +415,12 @@ static void __init setup_resources(void) struct memblock_region *reg; int j; - code_resource.start = (unsigned long) &_text; - code_resource.end = (unsigned long) &_etext - 1; - data_resource.start = (unsigned long) &_etext; - data_resource.end = (unsigned long) &_edata - 1; - bss_resource.start = (unsigned long) &__bss_start; - bss_resource.end = (unsigned long) &__bss_stop - 1; + code_resource.start = (unsigned long) _text; + code_resource.end = (unsigned long) _etext - 1; + data_resource.start = (unsigned long) _etext; + data_resource.end = (unsigned long) _edata - 1; + bss_resource.start = (unsigned long) __bss_start; + bss_resource.end = (unsigned long) __bss_stop - 1; for_each_memblock(memory, reg) { res = memblock_virt_alloc(sizeof(*res), 8); @@ -667,7 +669,7 @@ static void __init check_initrd(void) */ static void __init reserve_kernel(void) { - unsigned long start_pfn = PFN_UP(__pa(&_end)); + unsigned long start_pfn = PFN_UP(__pa(_end)); #ifdef CONFIG_DMA_API_DEBUG /* @@ -888,9 +890,9 @@ void __init setup_arch(char **cmdline_p) /* Is init_mm really needed? */ init_mm.start_code = PAGE_OFFSET; - init_mm.end_code = (unsigned long) &_etext; - init_mm.end_data = (unsigned long) &_edata; - init_mm.brk = (unsigned long) &_end; + init_mm.end_code = (unsigned long) _etext; + init_mm.end_data = (unsigned long) _edata; + init_mm.brk = (unsigned long) _end; parse_early_param(); #ifdef CONFIG_CRASH_DUMP diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index ce329c876d8c..75b7b307946e 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -153,8 +153,8 @@ int pfn_is_nosave(unsigned long pfn) { unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end)); - unsigned long end_rodata_pfn = PFN_DOWN(__pa(&__end_rodata)) - 1; - unsigned long stext_pfn = PFN_DOWN(__pa(&_stext)); + unsigned long end_rodata_pfn = PFN_DOWN(__pa(__end_rodata)) - 1; + unsigned long stext_pfn = PFN_DOWN(__pa(_stext)); /* Always save lowcore pages (LC protection might be enabled). */ if (pfn <= LC_PAGES) diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 507f23ba2034..7cdea2ec51e9 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -24,8 +24,8 @@ enum address_markers_idx { static struct addr_marker address_markers[] = { [IDENTITY_NR] = {0, "Identity Mapping"}, - [KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"}, - [KERNEL_END_NR] = {(unsigned long)&_end, "Kernel Image End"}, + [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"}, + [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"}, [VMEMMAP_NR] = {0, "vmemmap Area"}, [VMALLOC_NR] = {0, "vmalloc Area"}, [MODULES_NR] = {0, "Modules Area"}, diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index cb364153c43c..562f72955956 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -6,8 +6,9 @@ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ -#include <linux/mm.h> #include <linux/sysctl.h> +#include <linux/slab.h> +#include <linux/mm.h> #include <asm/mmu_context.h> #include <asm/pgalloc.h> #include <asm/gmap.h> @@ -366,3 +367,293 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) if ((*batch)->nr == MAX_TABLE_BATCH) tlb_flush_mmu(tlb); } + +/* + * Base infrastructure required to generate basic asces, region, segment, + * and page tables that do not make use of enhanced features like EDAT1. + */ + +static struct kmem_cache *base_pgt_cache; + +static unsigned long base_pgt_alloc(void) +{ + u64 *table; + + table = kmem_cache_alloc(base_pgt_cache, GFP_KERNEL); + if (table) + memset64(table, _PAGE_INVALID, PTRS_PER_PTE); + return (unsigned long) table; +} + +static void base_pgt_free(unsigned long table) +{ + kmem_cache_free(base_pgt_cache, (void *) table); +} + +static unsigned long base_crst_alloc(unsigned long val) +{ + unsigned long table; + + table = __get_free_pages(GFP_KERNEL, CRST_ALLOC_ORDER); + if (table) + crst_table_init((unsigned long *)table, val); + return table; +} + +static void base_crst_free(unsigned long table) +{ + free_pages(table, CRST_ALLOC_ORDER); +} + +#define BASE_ADDR_END_FUNC(NAME, SIZE) \ +static inline unsigned long base_##NAME##_addr_end(unsigned long addr, \ + unsigned long end) \ +{ \ + unsigned long next = (addr + (SIZE)) & ~((SIZE) - 1); \ + \ + return (next - 1) < (end - 1) ? next : end; \ +} + +BASE_ADDR_END_FUNC(page, _PAGE_SIZE) +BASE_ADDR_END_FUNC(segment, _SEGMENT_SIZE) +BASE_ADDR_END_FUNC(region3, _REGION3_SIZE) +BASE_ADDR_END_FUNC(region2, _REGION2_SIZE) +BASE_ADDR_END_FUNC(region1, _REGION1_SIZE) + +static inline unsigned long base_lra(unsigned long address) +{ + unsigned long real; + + asm volatile( + " lra %0,0(%1)\n" + : "=d" (real) : "a" (address) : "cc"); + return real; +} + +static int base_page_walk(unsigned long origin, unsigned long addr, + unsigned long end, int alloc) +{ + unsigned long *pte, next; + + if (!alloc) + return 0; + pte = (unsigned long *) origin; + pte += (addr & _PAGE_INDEX) >> _PAGE_SHIFT; + do { + next = base_page_addr_end(addr, end); + *pte = base_lra(addr); + } while (pte++, addr = next, addr < end); + return 0; +} + +static int base_segment_walk(unsigned long origin, unsigned long addr, + unsigned long end, int alloc) +{ + unsigned long *ste, next, table; + int rc; + + ste = (unsigned long *) origin; + ste += (addr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; + do { + next = base_segment_addr_end(addr, end); + if (*ste & _SEGMENT_ENTRY_INVALID) { + if (!alloc) + continue; + table = base_pgt_alloc(); + if (!table) + return -ENOMEM; + *ste = table | _SEGMENT_ENTRY; + } + table = *ste & _SEGMENT_ENTRY_ORIGIN; + rc = base_page_walk(table, addr, next, alloc); + if (rc) + return rc; + if (!alloc) + base_pgt_free(table); + cond_resched(); + } while (ste++, addr = next, addr < end); + return 0; +} + +static int base_region3_walk(unsigned long origin, unsigned long addr, + unsigned long end, int alloc) +{ + unsigned long *rtte, next, table; + int rc; + + rtte = (unsigned long *) origin; + rtte += (addr & _REGION3_INDEX) >> _REGION3_SHIFT; + do { + next = base_region3_addr_end(addr, end); + if (*rtte & _REGION_ENTRY_INVALID) { + if (!alloc) + continue; + table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY); + if (!table) + return -ENOMEM; + *rtte = table | _REGION3_ENTRY; + } + table = *rtte & _REGION_ENTRY_ORIGIN; + rc = base_segment_walk(table, addr, next, alloc); + if (rc) + return rc; + if (!alloc) + base_crst_free(table); + } while (rtte++, addr = next, addr < end); + return 0; +} + +static int base_region2_walk(unsigned long origin, unsigned long addr, + unsigned long end, int alloc) +{ + unsigned long *rste, next, table; + int rc; + + rste = (unsigned long *) origin; + rste += (addr & _REGION2_INDEX) >> _REGION2_SHIFT; + do { + next = base_region2_addr_end(addr, end); + if (*rste & _REGION_ENTRY_INVALID) { + if (!alloc) + continue; + table = base_crst_alloc(_REGION3_ENTRY_EMPTY); + if (!table) + return -ENOMEM; + *rste = table | _REGION2_ENTRY; + } + table = *rste & _REGION_ENTRY_ORIGIN; + rc = base_region3_walk(table, addr, next, alloc); + if (rc) + return rc; + if (!alloc) + base_crst_free(table); + } while (rste++, addr = next, addr < end); + return 0; +} + +static int base_region1_walk(unsigned long origin, unsigned long addr, + unsigned long end, int alloc) +{ + unsigned long *rfte, next, table; + int rc; + + rfte = (unsigned long *) origin; + rfte += (addr & _REGION1_INDEX) >> _REGION1_SHIFT; + do { + next = base_region1_addr_end(addr, end); + if (*rfte & _REGION_ENTRY_INVALID) { + if (!alloc) + continue; + table = base_crst_alloc(_REGION2_ENTRY_EMPTY); + if (!table) + return -ENOMEM; + *rfte = table | _REGION1_ENTRY; + } + table = *rfte & _REGION_ENTRY_ORIGIN; + rc = base_region2_walk(table, addr, next, alloc); + if (rc) + return rc; + if (!alloc) + base_crst_free(table); + } while (rfte++, addr = next, addr < end); + return 0; +} + +/** + * base_asce_free - free asce and tables returned from base_asce_alloc() + * @asce: asce to be freed + * + * Frees all region, segment, and page tables that were allocated with a + * corresponding base_asce_alloc() call. + */ +void base_asce_free(unsigned long asce) +{ + unsigned long table = asce & _ASCE_ORIGIN; + + if (!asce) + return; + switch (asce & _ASCE_TYPE_MASK) { + case _ASCE_TYPE_SEGMENT: + base_segment_walk(table, 0, _REGION3_SIZE, 0); + break; + case _ASCE_TYPE_REGION3: + base_region3_walk(table, 0, _REGION2_SIZE, 0); + break; + case _ASCE_TYPE_REGION2: + base_region2_walk(table, 0, _REGION1_SIZE, 0); + break; + case _ASCE_TYPE_REGION1: + base_region1_walk(table, 0, -_PAGE_SIZE, 0); + break; + } + base_crst_free(table); +} + +static int base_pgt_cache_init(void) +{ + static DEFINE_MUTEX(base_pgt_cache_mutex); + unsigned long sz = _PAGE_TABLE_SIZE; + + if (base_pgt_cache) + return 0; + mutex_lock(&base_pgt_cache_mutex); + if (!base_pgt_cache) + base_pgt_cache = kmem_cache_create("base_pgt", sz, sz, 0, NULL); + mutex_unlock(&base_pgt_cache_mutex); + return base_pgt_cache ? 0 : -ENOMEM; +} + +/** + * base_asce_alloc - create kernel mapping without enhanced DAT features + * @addr: virtual start address of kernel mapping + * @num_pages: number of consecutive pages + * + * Generate an asce, including all required region, segment and page tables, + * that can be used to access the virtual kernel mapping. The difference is + * that the returned asce does not make use of any enhanced DAT features like + * e.g. large pages. This is required for some I/O functions that pass an + * asce, like e.g. some service call requests. + * + * Note: the returned asce may NEVER be attached to any cpu. It may only be + * used for I/O requests. tlb entries that might result because the + * asce was attached to a cpu won't be cleared. + */ +unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages) +{ + unsigned long asce, table, end; + int rc; + + if (base_pgt_cache_init()) + return 0; + end = addr + num_pages * PAGE_SIZE; + if (end <= _REGION3_SIZE) { + table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY); + if (!table) + return 0; + rc = base_segment_walk(table, addr, end, 1); + asce = table | _ASCE_TYPE_SEGMENT | _ASCE_TABLE_LENGTH; + } else if (end <= _REGION2_SIZE) { + table = base_crst_alloc(_REGION3_ENTRY_EMPTY); + if (!table) + return 0; + rc = base_region3_walk(table, addr, end, 1); + asce = table | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; + } else if (end <= _REGION1_SIZE) { + table = base_crst_alloc(_REGION2_ENTRY_EMPTY); + if (!table) + return 0; + rc = base_region2_walk(table, addr, end, 1); + asce = table | _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; + } else { + table = base_crst_alloc(_REGION1_ENTRY_EMPTY); + if (!table) + return 0; + rc = base_region1_walk(table, addr, end, 1); + asce = table | _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH; + } + if (rc) { + base_asce_free(asce); + asce = 0; + } + return asce; +} |